_html.py
13.3 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
###############################################################################
# Name: html.py #
# Purpose: Define HTML syntax for highlighting and other features #
# Author: Cody Precord <cprecord@editra.org> #
# Copyright: (c) 2007 Cody Precord <staff@editra.org> #
# License: wxWindows License #
###############################################################################
"""
FILE: html.py
AUTHOR: Cody Precord
@summary: Lexer configuration module for HTML/DHTML/SGML.
@todo: Add Netscape/Microsoft Tag Extenstions (maybe)
@todo: Styleing needs lots of tweaking
"""
__author__ = "Cody Precord <cprecord@editra.org>"
__svnid__ = "$Id: _html.py 68798 2011-08-20 17:17:05Z CJP $"
__revision__ = "$Revision: 68798 $"
#-----------------------------------------------------------------------------#
# Imports
import wx.stc as stc
# Local Imports
import synglob
import syndata
import _javascript
import _vbscript
#-----------------------------------------------------------------------------#
#---- Keyword Specifications ----#
# HTML Tags (HTML4)
HTML_TAGS = (0, "address applet area a base basefont big blockquote br caption "
"center cite code dd dfn dir div dl dt font form hr html img "
"input isindex kbd li link map menu meta ol option param pre p "
"samp span select small strike sub sup table td textarea th tr "
"script noscript tt ul var xmp b i u h1 h2 h3 h4 h5 h6 em "
"strong head body title "
# HTML 4.0 Tags
"abbr acronym bdo button col label colgroup del fieldset "
"iframe ins legend object optgroup q s tbody tfoot thead "
# HTML 5 Tags
"article aside audio canvas command datalist details dialog "
"embed figcaption figure footer header hgroup keygen mark "
"meter nav output progress rp rt ruby section source time "
"video "
# Tag Attributes / Arguments
"action align alink alt archive background bgcolor border "
"bordercolor cellpadding cellspacing checked class clear "
"codebase color cols colspan content coords enctype face "
"gutter height hspace id link lowsrc marginheight marginwidth "
"maxlength method name prompt rel rev rows rowspan scrolling "
"selected shape size src start target text type url usemap "
"ismap valign value vlink vspace width wrap href http-equiv "
# HTML 4 Tag Attributes /Arguments
"accept accesskey axis char charoff charset cite classid "
"codetype compact data datetime declare defer dir disabled for "
"frame headers hreflang lang language longdesc multiple nohref "
"nowrap profile readonly rules scheme scope standby style "
"summary tabindex valuetype version "
# HTML 5 Tag Attributes / Arguments
"async autocomplete contenteditable contextmenu date "
"datetime-local draggable email formaction formenctype "
"formmethod formnovalidate formtarget hidden list manifest max "
"media min month novalidate number pattern ping range required "
"reversed role sandbox scoped seamless search sizes spellcheck "
"srcdoc step tel week "
# DHTML Support
"dtml-var dtml-if dtml-unless dtml-in dtml-with dtml-let "
"dtml-call dtml-raise dtml-try dtml-comment dtml-tree")
#---- Extra defs ----#
# ColdFusion Tags
CF_TAGS = ("cfabort cfapplet cfapplication cfargument cfassociate cfbreak "
"cfcache cfcalendar cfcase cfcatch cfchart cfchartdata "
"cfchartseries cfcol cfcollection cfcomponent cfcontent cfcookie "
"cfdefaultcase cfdirectory cfdocument cfdocumentitem "
"cfdocumentsection cfdump cfelse cfelseif cferror cfexecute cfexit "
"cffile cfflush cfform cfformgroup cfformitem cfftp cffunction "
"cfgrid cfgridcolumn cfgridrow cfgridupdate cfheader cfhtmlhead "
"cfhttp cfhttpparam cfif cfimport cfinclude cfindex cfinput "
"cfinsert cfinvoke cfinvokeargument cfldap cflocation cflock cflog "
"cflogin cfloginuser cflogout cfloop cfmail cfmailparam cfmailpart "
"cfmodule cfNTauthenticate cfobject cfobjectcache cfoutput cfparam "
"cfpop cfprocessingdirective cfprocparam cfprocresult cfproperty "
"cfquery cfqueryparam cfregistry cfreport cfreportparam cfrethrow "
"cfreturn cfsavecontent cfschedule cfscript cfsearch cfselect cfset "
"cfsetting cfsilent cfslider cfstoredproc cfswitch cftable "
"cftextarea cfthrow cftimer cftrace cftransaction cftree cftreeitem "
"cftry cfupdate cfwddx cfxml")
# JavaScript Keywords (see javascript.py)
JS_KEYWORDS = (1, _javascript.KeywordString(synglob.ID_LANG_JS))
# VBScript Keywords (currently unsupported)
VBS_KEYWORDS = (2, _vbscript.VBS_KW)
# Python Keywords (see python.py)
PY_KEYWORDS = (3, "")
# PHP Keywords (see php.py)
# This module is loaded for files with a .html/htm extension so it is assumed
# that there is no php in the file. On the other hand the php module loads
# this module so that it can support embedded html. This behavior may be changed
# in the future
# XML Keywords (see xml.py)
# XML files are handled independantly from html although there is support for
# embedded xml highlighting it is currently not being used.
# SGML Keywords
SGML_KEYWORDS = (5, "ELEMENT DOCTYPE ATTLIST ENTITY NOTATION")
#SGML_KEYWORDS = (5, "#CURRENT #IMPLIED #REQUIRED ATTLIST CDATA DOCTYPE "
# "ELEMENT ENTITY HTML IDREF INCLUDE IGNORE NMTOKEN NUMBER "
# "RCDATA TEMP")
# SGML Block Keywords
SGML_BLOCK = (7, "")
#---- Syntax Style Specs ----#
SYNTAX_ITEMS = [ (stc.STC_H_DEFAULT, 'default_style'),
(stc.STC_H_ASP, 'array_style'),
(stc.STC_H_ASPAT, 'array_style'),
(stc.STC_H_ATTRIBUTE, 'keyword2_style'),
(stc.STC_H_ATTRIBUTEUNKNOWN, 'error_style'),
(stc.STC_H_CDATA, 'default_style'), # Style ME
(stc.STC_H_COMMENT, 'comment_style'),
(stc.STC_H_DOUBLESTRING, 'string_style'),
(stc.STC_H_ENTITY, 'default_style'), # Style ME
(stc.STC_H_NUMBER, 'number_style'),
(stc.STC_H_OTHER, 'default_style'), # Style ME
(stc.STC_H_QUESTION, 'scalar_style'),
(stc.STC_H_SCRIPT, 'funct_style'), # STYLE ME
(stc.STC_H_SGML_1ST_PARAM, 'keyword2_style'), # STYLE ME
(stc.STC_H_SGML_1ST_PARAM_COMMENT, 'comment_style'),
(stc.STC_H_SGML_BLOCK_DEFAULT, 'default_style'), # STYLE ME
(stc.STC_H_SGML_COMMAND, 'keyword_style'), # STYLE ME
(stc.STC_H_SGML_COMMENT, 'comment_style'),
(stc.STC_H_SGML_DEFAULT, 'array_style'), # STYLE ME
(stc.STC_H_SGML_DOUBLESTRING, 'string_style'),
(stc.STC_H_SGML_ENTITY, 'default_style'), # STYLE ME
(stc.STC_H_SGML_ERROR, 'error_style'),
(stc.STC_H_SGML_SIMPLESTRING, 'string_style'),
(stc.STC_H_SGML_SPECIAL, 'default_style'), # STYLE ME
(stc.STC_H_SINGLESTRING, 'string_style'),
(stc.STC_H_TAG, 'keyword_style'),
(stc.STC_H_TAGEND, 'keyword_style'),
(stc.STC_H_TAGUNKNOWN, 'error_style'),
(stc.STC_H_VALUE, 'number_style'),
(stc.STC_H_XCCOMMENT, 'comment_style'),
(stc.STC_H_XMLEND, 'scalar_style'),
(stc.STC_H_XMLSTART, 'scalar_style'),
# Embedded JavaScript
(stc.STC_HJ_COMMENT, 'comment_style'),
(stc.STC_HJ_COMMENTDOC, 'comment_style'),
(stc.STC_HJ_COMMENTLINE, 'comment_style'),
(stc.STC_HJ_DEFAULT, 'default_style'),
(stc.STC_HJ_DOUBLESTRING, 'default_style'), # STYLE ME
(stc.STC_HJ_KEYWORD, 'default_style'), # STYLE ME
(stc.STC_HJ_NUMBER, 'default_style'), # STYLE ME
(stc.STC_HJ_REGEX, 'default_style'), # STYLE ME
(stc.STC_HJ_SINGLESTRING, 'default_style'), # STYLE ME
(stc.STC_HJ_START, 'default_style'), # STYLE ME
(stc.STC_HJ_STRINGEOL, 'default_style'), # STYLE ME
(stc.STC_HJ_SYMBOLS, 'default_style'), # STYLE ME
(stc.STC_HJ_WORD, 'default_style'), # STYLE ME
(stc.STC_HJA_COMMENT, 'comment_style'),
(stc.STC_HJA_COMMENTDOC, 'comment_style'),
(stc.STC_HJA_COMMENTLINE, 'comment_style'),
(stc.STC_HJA_DEFAULT, 'default_style'),
(stc.STC_HJA_DOUBLESTRING, 'default_style'), # STYLE ME
(stc.STC_HJA_KEYWORD, 'default_style'), # STYLE ME
(stc.STC_HJA_NUMBER, 'default_style'), # STYLE ME
(stc.STC_HJA_REGEX, 'default_style'), # STYLE ME # STYLE ME
(stc.STC_HJA_SINGLESTRING, 'default_style'), # STYLE ME
(stc.STC_HJA_START, 'default_style'), # STYLE ME
(stc.STC_HJA_STRINGEOL, 'default_style'), # STYLE ME
(stc.STC_HJA_SYMBOLS, 'default_style'), # STYLE ME
(stc.STC_HJA_WORD, 'default_style'),
(stc.STC_HBA_DEFAULT, 'operator_style'), # Styles ( ) ?
(stc.STC_HBA_COMMENTLINE, 'comment_style'),
(stc.STC_HBA_IDENTIFIER, 'default_style'), # TODO
(stc.STC_HBA_NUMBER, 'number_style'),
(stc.STC_HBA_START, 'default_style'), # TODO
(stc.STC_HBA_STRING, 'string_style'),
(stc.STC_HBA_STRINGEOL, 'stringeol_style'),
(stc.STC_HBA_WORD, 'keyword_style') ]
#---- Extra Properties ----#
FOLD = ("fold", "1")
FLD_HTML = ("fold.html", "1")
#------------------------------------------------------------------------------#
class SyntaxData(syndata.SyntaxDataBase):
"""SyntaxData object for Html and related languages"""
def __init__(self, langid):
super(SyntaxData, self).__init__(langid)
# Setup
self.SetLexer(stc.STC_LEX_HTML)
self.RegisterFeature(synglob.FEATURE_AUTOINDENT, AutoIndenter)
def GetKeywords(self):
"""Returns Specified Keywords List"""
if self.LangId == synglob.ID_LANG_COLDFUSION:
return [(HTML_TAGS[0], HTML_TAGS[1] + " " + CF_TAGS), JS_KEYWORDS]
else:
return [HTML_TAGS, JS_KEYWORDS, SGML_KEYWORDS, VBS_KEYWORDS]
def GetSyntaxSpec(self):
"""Syntax Specifications"""
return SYNTAX_ITEMS + _javascript.SYNTAX_ITEMS
def GetProperties(self):
"""Returns a list of Extra Properties to set"""
return [FOLD, FLD_HTML]
def GetCommentPattern(self):
"""Returns a list of characters used to comment a block of code"""
return [u'<!--', u'-->']
#-----------------------------------------------------------------------------#
def AutoIndenter(estc, pos, ichar):
"""Auto indent python code.
@param estc: EditraStyledTextCtrl
@param pos: current carat position
@param ichar: Indentation character
"""
rtxt = u''
line = estc.GetCurrentLine()
spos = estc.PositionFromLine(line)
text = estc.GetTextRange(spos, pos)
eolch = estc.GetEOLChar()
inspace = text.isspace()
# Cursor is in the indent area somewhere
if inspace:
estc.AddText(eolch + text)
return
# Check if the cursor is in column 0 and just return newline.
if not len(text):
estc.AddText(eolch)
return
if ichar == u"\t":
tabw = estc.GetTabWidth()
else:
tabw = estc.GetIndent()
# Standard indent to match previous line
indent = estc.GetLineIndentation(line)
levels = indent / tabw
end_spaces = ((indent - (tabw * levels)) * u" ")
rtxt = eolch + (ichar * levels) + end_spaces
# Check if we need some 'special' indentation
tmp = text.rstrip()
if tmp.endswith(u">"):
# At a tag check for if we need extra indentation
tagstart = tmp.rfind(u"<")
if tagstart >= 0:
tagval = tmp[tagstart:]
if not tagval.startswith(u"</") and \
not tagval.endswith(u"/>") and \
not tagval.endswith(u"?>"):
# Cursor is after an opening tag so we need to indent more
# First match to the starting tag
levels = (tagstart / tabw) # Add an extra level
end_spaces = ((tagstart - (tabw * levels)) * u" ")
rtxt = eolch + (ichar * (levels+1)) + end_spaces
# Put text in the buffer
estc.AddText(rtxt)
#---- Syntax Modules Internal Functions ----#
def KeywordString(option=0):
"""Returns the specified Keyword String
@param option: specific subset of keywords to get
"""
if option == synglob.ID_LANG_SGML:
return SGML_KEYWORDS[1]
else:
return HTML_TAGS[1]
#---- End Syntax Modules Internal Functions ----#