UNPKG

244 kBJavaScriptView Raw
1"use strict";
2module.exports = HTMLParser;
3
4var Document = require('./Document');
5var DocumentType = require('./DocumentType');
6var Node = require('./Node');
7var NAMESPACE = require('./utils').NAMESPACE;
8var html = require('./htmlelts');
9var impl = html.elements;
10
11var pushAll = Function.prototype.apply.bind(Array.prototype.push);
12
13/*
14 * This file contains an implementation of the HTML parsing algorithm.
15 * The algorithm and the implementation are complex because HTML
16 * explicitly defines how the parser should behave for all possible
17 * valid and invalid inputs.
18 *
19 * Usage:
20 *
21 * The file defines a single HTMLParser() function, which dom.js exposes
22 * publicly as document.implementation.mozHTMLParser(). This is a
23 * factory function, not a constructor.
24 *
25 * When you call document.implementation.mozHTMLParser(), it returns
26 * an object that has parse() and document() methods. To parse HTML text,
27 * pass the text (in one or more chunks) to the parse() method. When
28 * you've passed all the text (on the last chunk, or afterward) pass
29 * true as the second argument to parse() to tell the parser that there
30 * is no more coming. Call document() to get the document object that
31 * the parser is parsing into. You can call this at any time, before
32 * or after calling parse().
33 *
34 * The first argument to mozHTMLParser is the absolute URL of the document.
35 *
36 * The second argument is optional and is for internal use only. Pass an
37 * element as the fragmentContext to do innerHTML parsing for the
38 * element. To do innerHTML parsing on a document, pass null. Otherwise,
39 * omit the 2nd argument. See HTMLElement.innerHTML for an example. Note
40 * that if you pass a context element, the end() method will return an
41 * unwrapped document instead of a wrapped one.
42 *
43 * Implementation details:
44 *
45 * This is a long file of almost 7000 lines. It is structured as one
46 * big function nested within another big function. The outer
47 * function defines a bunch of constant data, utility functions
48 * that use that data, and a couple of classes used by the parser.
49 * The outer function also defines and returns the
50 * inner function. This inner function is the HTMLParser factory
51 * function that implements the parser and holds all the parser state
52 * as local variables. The HTMLParser function is quite big because
53 * it defines many nested functions that use those local variables.
54 *
55 * There are three tightly coupled parser stages: a scanner, a
56 * tokenizer and a tree builder. In a (possibly misguided) attempt at
57 * efficiency, the stages are not implemented as separate classes:
58 * everything shares state and is (mostly) implemented in imperative
59 * (rather than OO) style.
60 *
61 * The stages of the parser work like this: When the client code calls
62 * the parser's parse() method, the specified string is passed to
63 * scanChars(). The scanner loops through that string and passes characters
64 * (sometimes one at a time, sometimes in chunks) to the tokenizer stage.
65 * The tokenizer groups the characters into tokens: tags, endtags, runs
66 * of text, comments, doctype declarations, and the end-of-file (EOF)
67 * token. These tokens are then passed to the tree building stage via
68 * the insertToken() function. The tree building stage builds up the
69 * document tree.
70 *
71 * The tokenizer stage is a finite state machine. Each state is
72 * implemented as a function with a name that ends in "_state". The
73 * initial state is data_state(). The current tokenizer state is stored
74 * in the variable 'tokenizer'. Most state functions expect a single
75 * integer argument which represents a single UTF-16 codepoint. Some
76 * states want more characters and set a lookahead property on
77 * themselves. The scanChars() function in the scanner checks for this
78 * lookahead property. If it doesn't exist, then scanChars() just passes
79 * the next input character to the current tokenizer state function.
80 * Otherwise, scanChars() looks ahead (a given # of characters, or for a
81 * matching string, or for a matching regexp) and passes a string of
82 * characters to the current tokenizer state function.
83 *
84 * As a shortcut, certain states of the tokenizer use regular expressions
85 * to look ahead in the scanner's input buffer for runs of text, simple
86 * tags and attributes. For well-formed input, these shortcuts skip a
87 * lot of state transitions and speed things up a bit.
88 *
89 * When a tokenizer state function has consumed a complete token, it
90 * emits that token, by calling insertToken(), or by calling a utility
91 * function that itself calls insertToken(). These tokens are passed to
92 * the tree building stage, which is also a state machine. Like the
93 * tokenizer, the tree building states are implemented as functions, and
94 * these functions have names that end with _mode (because the HTML spec
95 * refers to them as insertion modes). The current insertion mode is held
96 * by the 'parser' variable. Each insertion mode function takes up to 4
97 * arguments. The first is a token type, represented by the constants
98 * TAG, ENDTAG, TEXT, COMMENT, DOCTYPE and EOF. The second argument is
99 * the value of the token: the text or comment data, or tagname or
100 * doctype. For tags, the 3rd argument is an array of attributes. For
101 * DOCTYPES it is the optional public id. For tags, the 4th argument is
102 * true if the tag is self-closing. For doctypes, the 4th argument is the
103 * optional system id.
104 *
105 * Search for "***" to find the major sub-divisions in the code.
106 */
107
108
109/***
110 * Data prolog. Lots of constants declared here, including some
111 * very large objects. They're used throughout the code that follows
112 */
113// Token types for the tree builder.
114var EOF = -1;
115var TEXT = 1;
116var TAG = 2;
117var ENDTAG = 3;
118var COMMENT = 4;
119var DOCTYPE = 5;
120
121// A re-usable empty array
122var NOATTRS = [];
123
124// These DTD public ids put the browser in quirks mode
125var quirkyPublicIds = /^HTML$|^-\/\/W3O\/\/DTD W3 HTML Strict 3\.0\/\/EN\/\/$|^-\/W3C\/DTD HTML 4\.0 Transitional\/EN$|^\+\/\/Silmaril\/\/dtd html Pro v0r11 19970101\/\/|^-\/\/AdvaSoft Ltd\/\/DTD HTML 3\.0 asWedit \+ extensions\/\/|^-\/\/AS\/\/DTD HTML 3\.0 asWedit \+ extensions\/\/|^-\/\/IETF\/\/DTD HTML 2\.0 Level 1\/\/|^-\/\/IETF\/\/DTD HTML 2\.0 Level 2\/\/|^-\/\/IETF\/\/DTD HTML 2\.0 Strict Level 1\/\/|^-\/\/IETF\/\/DTD HTML 2\.0 Strict Level 2\/\/|^-\/\/IETF\/\/DTD HTML 2\.0 Strict\/\/|^-\/\/IETF\/\/DTD HTML 2\.0\/\/|^-\/\/IETF\/\/DTD HTML 2\.1E\/\/|^-\/\/IETF\/\/DTD HTML 3\.0\/\/|^-\/\/IETF\/\/DTD HTML 3\.2 Final\/\/|^-\/\/IETF\/\/DTD HTML 3\.2\/\/|^-\/\/IETF\/\/DTD HTML 3\/\/|^-\/\/IETF\/\/DTD HTML Level 0\/\/|^-\/\/IETF\/\/DTD HTML Level 1\/\/|^-\/\/IETF\/\/DTD HTML Level 2\/\/|^-\/\/IETF\/\/DTD HTML Level 3\/\/|^-\/\/IETF\/\/DTD HTML Strict Level 0\/\/|^-\/\/IETF\/\/DTD HTML Strict Level 1\/\/|^-\/\/IETF\/\/DTD HTML Strict Level 2\/\/|^-\/\/IETF\/\/DTD HTML Strict Level 3\/\/|^-\/\/IETF\/\/DTD HTML Strict\/\/|^-\/\/IETF\/\/DTD HTML\/\/|^-\/\/Metrius\/\/DTD Metrius Presentational\/\/|^-\/\/Microsoft\/\/DTD Internet Explorer 2\.0 HTML Strict\/\/|^-\/\/Microsoft\/\/DTD Internet Explorer 2\.0 HTML\/\/|^-\/\/Microsoft\/\/DTD Internet Explorer 2\.0 Tables\/\/|^-\/\/Microsoft\/\/DTD Internet Explorer 3\.0 HTML Strict\/\/|^-\/\/Microsoft\/\/DTD Internet Explorer 3\.0 HTML\/\/|^-\/\/Microsoft\/\/DTD Internet Explorer 3\.0 Tables\/\/|^-\/\/Netscape Comm\. Corp\.\/\/DTD HTML\/\/|^-\/\/Netscape Comm\. Corp\.\/\/DTD Strict HTML\/\/|^-\/\/O'Reilly and Associates\/\/DTD HTML 2\.0\/\/|^-\/\/O'Reilly and Associates\/\/DTD HTML Extended 1\.0\/\/|^-\/\/O'Reilly and Associates\/\/DTD HTML Extended Relaxed 1\.0\/\/|^-\/\/SoftQuad Software\/\/DTD HoTMetaL PRO 6\.0::19990601::extensions to HTML 4\.0\/\/|^-\/\/SoftQuad\/\/DTD HoTMetaL PRO 4\.0::19971010::extensions to HTML 4\.0\/\/|^-\/\/Spyglass\/\/DTD HTML 2\.0 Extended\/\/|^-\/\/SQ\/\/DTD HTML 2\.0 HoTMetaL \+ extensions\/\/|^-\/\/Sun Microsystems Corp\.\/\/DTD HotJava HTML\/\/|^-\/\/Sun Microsystems Corp\.\/\/DTD HotJava Strict HTML\/\/|^-\/\/W3C\/\/DTD HTML 3 1995-03-24\/\/|^-\/\/W3C\/\/DTD HTML 3\.2 Draft\/\/|^-\/\/W3C\/\/DTD HTML 3\.2 Final\/\/|^-\/\/W3C\/\/DTD HTML 3\.2\/\/|^-\/\/W3C\/\/DTD HTML 3\.2S Draft\/\/|^-\/\/W3C\/\/DTD HTML 4\.0 Frameset\/\/|^-\/\/W3C\/\/DTD HTML 4\.0 Transitional\/\/|^-\/\/W3C\/\/DTD HTML Experimental 19960712\/\/|^-\/\/W3C\/\/DTD HTML Experimental 970421\/\/|^-\/\/W3C\/\/DTD W3 HTML\/\/|^-\/\/W3O\/\/DTD W3 HTML 3\.0\/\/|^-\/\/WebTechs\/\/DTD Mozilla HTML 2\.0\/\/|^-\/\/WebTechs\/\/DTD Mozilla HTML\/\//i;
126
127var quirkySystemId = "http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd";
128
129var conditionallyQuirkyPublicIds = /^-\/\/W3C\/\/DTD HTML 4\.01 Frameset\/\/|^-\/\/W3C\/\/DTD HTML 4\.01 Transitional\/\//i;
130
131// These DTD public ids put the browser in limited quirks mode
132var limitedQuirkyPublicIds = /^-\/\/W3C\/\/DTD XHTML 1\.0 Frameset\/\/|^-\/\/W3C\/\/DTD XHTML 1\.0 Transitional\/\//i;
133
134
135// Element sets below. See the isA() function for a way to test
136// whether an element is a member of a set
137var specialSet = Object.create(null);
138specialSet[NAMESPACE.HTML] = {
139 __proto__: null,
140 "address":true, "applet":true, "area":true, "article":true,
141 "aside":true, "base":true, "basefont":true, "bgsound":true,
142 "blockquote":true, "body":true, "br":true, "button":true,
143 "caption":true, "center":true, "col":true, "colgroup":true,
144 "dd":true, "details":true, "dir":true,
145 "div":true, "dl":true, "dt":true, "embed":true,
146 "fieldset":true, "figcaption":true, "figure":true, "footer":true,
147 "form":true, "frame":true, "frameset":true, "h1":true,
148 "h2":true, "h3":true, "h4":true, "h5":true,
149 "h6":true, "head":true, "header":true, "hgroup":true,
150 "hr":true, "html":true, "iframe":true, "img":true,
151 "input":true, "li":true, "link":true,
152 "listing":true, "main":true, "marquee":true, "menu":true, "meta":true,
153 "nav":true, "noembed":true, "noframes":true, "noscript":true,
154 "object":true, "ol":true, "p":true, "param":true,
155 "plaintext":true, "pre":true, "script":true, "section":true,
156 "select":true, "source":true, "style":true, "summary":true, "table":true,
157 "tbody":true, "td":true, "template":true, "textarea":true, "tfoot":true,
158 "th":true, "thead":true, "title":true, "tr":true, "track":true,
159 // Note that "xmp" was removed from the "special" set in the latest
160 // spec, apparently by accident; see
161 // https://github.com/whatwg/html/pull/1919
162 "ul":true, "wbr":true, "xmp":true
163};
164specialSet[NAMESPACE.SVG] = {
165 __proto__: null,
166 "foreignObject": true, "desc": true, "title": true
167};
168specialSet[NAMESPACE.MATHML] = {
169 __proto__: null,
170 "mi":true, "mo":true, "mn":true, "ms":true,
171 "mtext":true, "annotation-xml":true
172};
173
174// The set of address, div, and p HTML tags
175var addressdivpSet = Object.create(null);
176addressdivpSet[NAMESPACE.HTML] = {
177 __proto__: null,
178 "address":true, "div":true, "p":true
179};
180
181var dddtSet = Object.create(null);
182dddtSet[NAMESPACE.HTML] = {
183 __proto__: null,
184 "dd":true, "dt":true
185};
186
187var tablesectionrowSet = Object.create(null);
188tablesectionrowSet[NAMESPACE.HTML] = {
189 __proto__: null,
190 "table":true, "thead":true, "tbody":true, "tfoot":true, "tr":true
191};
192
193var impliedEndTagsSet = Object.create(null);
194impliedEndTagsSet[NAMESPACE.HTML] = {
195 __proto__: null,
196 "dd": true, "dt": true, "li": true, "menuitem": true, "optgroup": true,
197 "option": true, "p": true, "rb": true, "rp": true, "rt": true, "rtc": true
198};
199
200var thoroughImpliedEndTagsSet = Object.create(null);
201thoroughImpliedEndTagsSet[NAMESPACE.HTML] = {
202 __proto__: null,
203 "caption": true, "colgroup": true, "dd": true, "dt": true, "li": true,
204 "optgroup": true, "option": true, "p": true, "rb": true, "rp": true,
205 "rt": true, "rtc": true, "tbody": true, "td": true, "tfoot": true,
206 "th": true, "thead": true, "tr": true
207};
208
209var tableContextSet = Object.create(null);
210tableContextSet[NAMESPACE.HTML] = {
211 __proto__: null,
212 "table": true, "template": true, "html": true
213};
214
215var tableBodyContextSet = Object.create(null);
216tableBodyContextSet[NAMESPACE.HTML] = {
217 __proto__: null,
218 "tbody": true, "tfoot": true, "thead": true, "template": true, "html": true
219};
220
221var tableRowContextSet = Object.create(null);
222tableRowContextSet[NAMESPACE.HTML] = {
223 __proto__: null,
224 "tr": true, "template": true, "html": true
225};
226
227// See http://www.w3.org/TR/html5/forms.html#form-associated-element
228var formassociatedSet = Object.create(null);
229formassociatedSet[NAMESPACE.HTML] = {
230 __proto__: null,
231 "button": true, "fieldset": true, "input": true, "keygen": true,
232 "object": true, "output": true, "select": true, "textarea": true,
233 "img": true
234};
235
236var inScopeSet = Object.create(null);
237inScopeSet[NAMESPACE.HTML]= {
238 __proto__: null,
239 "applet":true, "caption":true, "html":true, "table":true,
240 "td":true, "th":true, "marquee":true, "object":true,
241 "template":true
242};
243inScopeSet[NAMESPACE.MATHML] = {
244 __proto__: null,
245 "mi":true, "mo":true, "mn":true, "ms":true,
246 "mtext":true, "annotation-xml":true
247};
248inScopeSet[NAMESPACE.SVG] = {
249 __proto__: null,
250 "foreignObject":true, "desc":true, "title":true
251};
252
253var inListItemScopeSet = Object.create(inScopeSet);
254inListItemScopeSet[NAMESPACE.HTML] =
255 Object.create(inScopeSet[NAMESPACE.HTML]);
256inListItemScopeSet[NAMESPACE.HTML].ol = true;
257inListItemScopeSet[NAMESPACE.HTML].ul = true;
258
259var inButtonScopeSet = Object.create(inScopeSet);
260inButtonScopeSet[NAMESPACE.HTML] =
261 Object.create(inScopeSet[NAMESPACE.HTML]);
262inButtonScopeSet[NAMESPACE.HTML].button = true;
263
264var inTableScopeSet = Object.create(null);
265inTableScopeSet[NAMESPACE.HTML] = {
266 __proto__: null,
267 "html":true, "table":true, "template":true
268};
269
270// The set of elements for select scope is the everything *except* these
271var invertedSelectScopeSet = Object.create(null);
272invertedSelectScopeSet[NAMESPACE.HTML] = {
273 __proto__: null,
274 "optgroup":true, "option":true
275};
276
277var mathmlTextIntegrationPointSet = Object.create(null);
278mathmlTextIntegrationPointSet[NAMESPACE.MATHML] = {
279 __proto__: null,
280 mi: true,
281 mo: true,
282 mn: true,
283 ms: true,
284 mtext: true
285};
286
287var htmlIntegrationPointSet = Object.create(null);
288htmlIntegrationPointSet[NAMESPACE.SVG] = {
289 __proto__: null,
290 foreignObject: true,
291 desc: true,
292 title: true
293};
294
295var foreignAttributes = {
296 __proto__: null,
297 "xlink:actuate": NAMESPACE.XLINK, "xlink:arcrole": NAMESPACE.XLINK,
298 "xlink:href": NAMESPACE.XLINK, "xlink:role": NAMESPACE.XLINK,
299 "xlink:show": NAMESPACE.XLINK, "xlink:title": NAMESPACE.XLINK,
300 "xlink:type": NAMESPACE.XLINK, "xml:base": NAMESPACE.XML,
301 "xml:lang": NAMESPACE.XML, "xml:space": NAMESPACE.XML,
302 "xmlns": NAMESPACE.XMLNS, "xmlns:xlink": NAMESPACE.XMLNS
303};
304
305
306// Lowercase to mixed case mapping for SVG attributes and tagnames
307var svgAttrAdjustments = {
308 __proto__: null,
309 attributename: "attributeName", attributetype: "attributeType",
310 basefrequency: "baseFrequency", baseprofile: "baseProfile",
311 calcmode: "calcMode", clippathunits: "clipPathUnits",
312 diffuseconstant: "diffuseConstant",
313 edgemode: "edgeMode",
314 filterunits: "filterUnits",
315 glyphref: "glyphRef", gradienttransform: "gradientTransform",
316 gradientunits: "gradientUnits", kernelmatrix: "kernelMatrix",
317 kernelunitlength: "kernelUnitLength", keypoints: "keyPoints",
318 keysplines: "keySplines", keytimes: "keyTimes",
319 lengthadjust: "lengthAdjust", limitingconeangle: "limitingConeAngle",
320 markerheight: "markerHeight", markerunits: "markerUnits",
321 markerwidth: "markerWidth", maskcontentunits: "maskContentUnits",
322 maskunits: "maskUnits", numoctaves: "numOctaves",
323 pathlength: "pathLength", patterncontentunits: "patternContentUnits",
324 patterntransform: "patternTransform", patternunits: "patternUnits",
325 pointsatx: "pointsAtX", pointsaty: "pointsAtY",
326 pointsatz: "pointsAtZ", preservealpha: "preserveAlpha",
327 preserveaspectratio: "preserveAspectRatio",
328 primitiveunits: "primitiveUnits", refx: "refX",
329 refy: "refY", repeatcount: "repeatCount",
330 repeatdur: "repeatDur", requiredextensions: "requiredExtensions",
331 requiredfeatures: "requiredFeatures",
332 specularconstant: "specularConstant",
333 specularexponent: "specularExponent", spreadmethod: "spreadMethod",
334 startoffset: "startOffset", stddeviation: "stdDeviation",
335 stitchtiles: "stitchTiles", surfacescale: "surfaceScale",
336 systemlanguage: "systemLanguage", tablevalues: "tableValues",
337 targetx: "targetX", targety: "targetY",
338 textlength: "textLength", viewbox: "viewBox",
339 viewtarget: "viewTarget", xchannelselector: "xChannelSelector",
340 ychannelselector: "yChannelSelector", zoomandpan: "zoomAndPan"
341};
342
343var svgTagNameAdjustments = {
344 __proto__: null,
345 altglyph: "altGlyph", altglyphdef: "altGlyphDef",
346 altglyphitem: "altGlyphItem", animatecolor: "animateColor",
347 animatemotion: "animateMotion", animatetransform: "animateTransform",
348 clippath: "clipPath", feblend: "feBlend",
349 fecolormatrix: "feColorMatrix",
350 fecomponenttransfer: "feComponentTransfer", fecomposite: "feComposite",
351 feconvolvematrix: "feConvolveMatrix",
352 fediffuselighting: "feDiffuseLighting",
353 fedisplacementmap: "feDisplacementMap",
354 fedistantlight: "feDistantLight", feflood: "feFlood",
355 fefunca: "feFuncA", fefuncb: "feFuncB",
356 fefuncg: "feFuncG", fefuncr: "feFuncR",
357 fegaussianblur: "feGaussianBlur", feimage: "feImage",
358 femerge: "feMerge", femergenode: "feMergeNode",
359 femorphology: "feMorphology", feoffset: "feOffset",
360 fepointlight: "fePointLight", fespecularlighting: "feSpecularLighting",
361 fespotlight: "feSpotLight", fetile: "feTile",
362 feturbulence: "feTurbulence", foreignobject: "foreignObject",
363 glyphref: "glyphRef", lineargradient: "linearGradient",
364 radialgradient: "radialGradient", textpath: "textPath"
365};
366
367
368// Data for parsing numeric and named character references
369// These next 3 objects are direct translations of tables
370// in the HTML spec into JavaScript object format
371var numericCharRefReplacements = {
372 __proto__: null,
373 0x00:0xFFFD, 0x80:0x20AC, 0x82:0x201A, 0x83:0x0192, 0x84:0x201E,
374 0x85:0x2026, 0x86:0x2020, 0x87:0x2021, 0x88:0x02C6, 0x89:0x2030,
375 0x8A:0x0160, 0x8B:0x2039, 0x8C:0x0152, 0x8E:0x017D, 0x91:0x2018,
376 0x92:0x2019, 0x93:0x201C, 0x94:0x201D, 0x95:0x2022, 0x96:0x2013,
377 0x97:0x2014, 0x98:0x02DC, 0x99:0x2122, 0x9A:0x0161, 0x9B:0x203A,
378 0x9C:0x0153, 0x9E:0x017E, 0x9F:0x0178
379};
380
381/*
382 * This table is generated with test/tools/update-entities.js
383 */
384var namedCharRefs = {
385 __proto__: null,
386 "AElig":0xc6, "AElig;":0xc6,
387 "AMP":0x26, "AMP;":0x26,
388 "Aacute":0xc1, "Aacute;":0xc1,
389 "Abreve;":0x102, "Acirc":0xc2,
390 "Acirc;":0xc2, "Acy;":0x410,
391 "Afr;":[0xd835,0xdd04], "Agrave":0xc0,
392 "Agrave;":0xc0, "Alpha;":0x391,
393 "Amacr;":0x100, "And;":0x2a53,
394 "Aogon;":0x104, "Aopf;":[0xd835,0xdd38],
395 "ApplyFunction;":0x2061, "Aring":0xc5,
396 "Aring;":0xc5, "Ascr;":[0xd835,0xdc9c],
397 "Assign;":0x2254, "Atilde":0xc3,
398 "Atilde;":0xc3, "Auml":0xc4,
399 "Auml;":0xc4, "Backslash;":0x2216,
400 "Barv;":0x2ae7, "Barwed;":0x2306,
401 "Bcy;":0x411, "Because;":0x2235,
402 "Bernoullis;":0x212c, "Beta;":0x392,
403 "Bfr;":[0xd835,0xdd05], "Bopf;":[0xd835,0xdd39],
404 "Breve;":0x2d8, "Bscr;":0x212c,
405 "Bumpeq;":0x224e, "CHcy;":0x427,
406 "COPY":0xa9, "COPY;":0xa9,
407 "Cacute;":0x106, "Cap;":0x22d2,
408 "CapitalDifferentialD;":0x2145, "Cayleys;":0x212d,
409 "Ccaron;":0x10c, "Ccedil":0xc7,
410 "Ccedil;":0xc7, "Ccirc;":0x108,
411 "Cconint;":0x2230, "Cdot;":0x10a,
412 "Cedilla;":0xb8, "CenterDot;":0xb7,
413 "Cfr;":0x212d, "Chi;":0x3a7,
414 "CircleDot;":0x2299, "CircleMinus;":0x2296,
415 "CirclePlus;":0x2295, "CircleTimes;":0x2297,
416 "ClockwiseContourIntegral;":0x2232, "CloseCurlyDoubleQuote;":0x201d,
417 "CloseCurlyQuote;":0x2019, "Colon;":0x2237,
418 "Colone;":0x2a74, "Congruent;":0x2261,
419 "Conint;":0x222f, "ContourIntegral;":0x222e,
420 "Copf;":0x2102, "Coproduct;":0x2210,
421 "CounterClockwiseContourIntegral;":0x2233, "Cross;":0x2a2f,
422 "Cscr;":[0xd835,0xdc9e], "Cup;":0x22d3,
423 "CupCap;":0x224d, "DD;":0x2145,
424 "DDotrahd;":0x2911, "DJcy;":0x402,
425 "DScy;":0x405, "DZcy;":0x40f,
426 "Dagger;":0x2021, "Darr;":0x21a1,
427 "Dashv;":0x2ae4, "Dcaron;":0x10e,
428 "Dcy;":0x414, "Del;":0x2207,
429 "Delta;":0x394, "Dfr;":[0xd835,0xdd07],
430 "DiacriticalAcute;":0xb4, "DiacriticalDot;":0x2d9,
431 "DiacriticalDoubleAcute;":0x2dd, "DiacriticalGrave;":0x60,
432 "DiacriticalTilde;":0x2dc, "Diamond;":0x22c4,
433 "DifferentialD;":0x2146, "Dopf;":[0xd835,0xdd3b],
434 "Dot;":0xa8, "DotDot;":0x20dc,
435 "DotEqual;":0x2250, "DoubleContourIntegral;":0x222f,
436 "DoubleDot;":0xa8, "DoubleDownArrow;":0x21d3,
437 "DoubleLeftArrow;":0x21d0, "DoubleLeftRightArrow;":0x21d4,
438 "DoubleLeftTee;":0x2ae4, "DoubleLongLeftArrow;":0x27f8,
439 "DoubleLongLeftRightArrow;":0x27fa, "DoubleLongRightArrow;":0x27f9,
440 "DoubleRightArrow;":0x21d2, "DoubleRightTee;":0x22a8,
441 "DoubleUpArrow;":0x21d1, "DoubleUpDownArrow;":0x21d5,
442 "DoubleVerticalBar;":0x2225, "DownArrow;":0x2193,
443 "DownArrowBar;":0x2913, "DownArrowUpArrow;":0x21f5,
444 "DownBreve;":0x311, "DownLeftRightVector;":0x2950,
445 "DownLeftTeeVector;":0x295e, "DownLeftVector;":0x21bd,
446 "DownLeftVectorBar;":0x2956, "DownRightTeeVector;":0x295f,
447 "DownRightVector;":0x21c1, "DownRightVectorBar;":0x2957,
448 "DownTee;":0x22a4, "DownTeeArrow;":0x21a7,
449 "Downarrow;":0x21d3, "Dscr;":[0xd835,0xdc9f],
450 "Dstrok;":0x110, "ENG;":0x14a,
451 "ETH":0xd0, "ETH;":0xd0,
452 "Eacute":0xc9, "Eacute;":0xc9,
453 "Ecaron;":0x11a, "Ecirc":0xca,
454 "Ecirc;":0xca, "Ecy;":0x42d,
455 "Edot;":0x116, "Efr;":[0xd835,0xdd08],
456 "Egrave":0xc8, "Egrave;":0xc8,
457 "Element;":0x2208, "Emacr;":0x112,
458 "EmptySmallSquare;":0x25fb, "EmptyVerySmallSquare;":0x25ab,
459 "Eogon;":0x118, "Eopf;":[0xd835,0xdd3c],
460 "Epsilon;":0x395, "Equal;":0x2a75,
461 "EqualTilde;":0x2242, "Equilibrium;":0x21cc,
462 "Escr;":0x2130, "Esim;":0x2a73,
463 "Eta;":0x397, "Euml":0xcb,
464 "Euml;":0xcb, "Exists;":0x2203,
465 "ExponentialE;":0x2147, "Fcy;":0x424,
466 "Ffr;":[0xd835,0xdd09], "FilledSmallSquare;":0x25fc,
467 "FilledVerySmallSquare;":0x25aa, "Fopf;":[0xd835,0xdd3d],
468 "ForAll;":0x2200, "Fouriertrf;":0x2131,
469 "Fscr;":0x2131, "GJcy;":0x403,
470 "GT":0x3e, "GT;":0x3e,
471 "Gamma;":0x393, "Gammad;":0x3dc,
472 "Gbreve;":0x11e, "Gcedil;":0x122,
473 "Gcirc;":0x11c, "Gcy;":0x413,
474 "Gdot;":0x120, "Gfr;":[0xd835,0xdd0a],
475 "Gg;":0x22d9, "Gopf;":[0xd835,0xdd3e],
476 "GreaterEqual;":0x2265, "GreaterEqualLess;":0x22db,
477 "GreaterFullEqual;":0x2267, "GreaterGreater;":0x2aa2,
478 "GreaterLess;":0x2277, "GreaterSlantEqual;":0x2a7e,
479 "GreaterTilde;":0x2273, "Gscr;":[0xd835,0xdca2],
480 "Gt;":0x226b, "HARDcy;":0x42a,
481 "Hacek;":0x2c7, "Hat;":0x5e,
482 "Hcirc;":0x124, "Hfr;":0x210c,
483 "HilbertSpace;":0x210b, "Hopf;":0x210d,
484 "HorizontalLine;":0x2500, "Hscr;":0x210b,
485 "Hstrok;":0x126, "HumpDownHump;":0x224e,
486 "HumpEqual;":0x224f, "IEcy;":0x415,
487 "IJlig;":0x132, "IOcy;":0x401,
488 "Iacute":0xcd, "Iacute;":0xcd,
489 "Icirc":0xce, "Icirc;":0xce,
490 "Icy;":0x418, "Idot;":0x130,
491 "Ifr;":0x2111, "Igrave":0xcc,
492 "Igrave;":0xcc, "Im;":0x2111,
493 "Imacr;":0x12a, "ImaginaryI;":0x2148,
494 "Implies;":0x21d2, "Int;":0x222c,
495 "Integral;":0x222b, "Intersection;":0x22c2,
496 "InvisibleComma;":0x2063, "InvisibleTimes;":0x2062,
497 "Iogon;":0x12e, "Iopf;":[0xd835,0xdd40],
498 "Iota;":0x399, "Iscr;":0x2110,
499 "Itilde;":0x128, "Iukcy;":0x406,
500 "Iuml":0xcf, "Iuml;":0xcf,
501 "Jcirc;":0x134, "Jcy;":0x419,
502 "Jfr;":[0xd835,0xdd0d], "Jopf;":[0xd835,0xdd41],
503 "Jscr;":[0xd835,0xdca5], "Jsercy;":0x408,
504 "Jukcy;":0x404, "KHcy;":0x425,
505 "KJcy;":0x40c, "Kappa;":0x39a,
506 "Kcedil;":0x136, "Kcy;":0x41a,
507 "Kfr;":[0xd835,0xdd0e], "Kopf;":[0xd835,0xdd42],
508 "Kscr;":[0xd835,0xdca6], "LJcy;":0x409,
509 "LT":0x3c, "LT;":0x3c,
510 "Lacute;":0x139, "Lambda;":0x39b,
511 "Lang;":0x27ea, "Laplacetrf;":0x2112,
512 "Larr;":0x219e, "Lcaron;":0x13d,
513 "Lcedil;":0x13b, "Lcy;":0x41b,
514 "LeftAngleBracket;":0x27e8, "LeftArrow;":0x2190,
515 "LeftArrowBar;":0x21e4, "LeftArrowRightArrow;":0x21c6,
516 "LeftCeiling;":0x2308, "LeftDoubleBracket;":0x27e6,
517 "LeftDownTeeVector;":0x2961, "LeftDownVector;":0x21c3,
518 "LeftDownVectorBar;":0x2959, "LeftFloor;":0x230a,
519 "LeftRightArrow;":0x2194, "LeftRightVector;":0x294e,
520 "LeftTee;":0x22a3, "LeftTeeArrow;":0x21a4,
521 "LeftTeeVector;":0x295a, "LeftTriangle;":0x22b2,
522 "LeftTriangleBar;":0x29cf, "LeftTriangleEqual;":0x22b4,
523 "LeftUpDownVector;":0x2951, "LeftUpTeeVector;":0x2960,
524 "LeftUpVector;":0x21bf, "LeftUpVectorBar;":0x2958,
525 "LeftVector;":0x21bc, "LeftVectorBar;":0x2952,
526 "Leftarrow;":0x21d0, "Leftrightarrow;":0x21d4,
527 "LessEqualGreater;":0x22da, "LessFullEqual;":0x2266,
528 "LessGreater;":0x2276, "LessLess;":0x2aa1,
529 "LessSlantEqual;":0x2a7d, "LessTilde;":0x2272,
530 "Lfr;":[0xd835,0xdd0f], "Ll;":0x22d8,
531 "Lleftarrow;":0x21da, "Lmidot;":0x13f,
532 "LongLeftArrow;":0x27f5, "LongLeftRightArrow;":0x27f7,
533 "LongRightArrow;":0x27f6, "Longleftarrow;":0x27f8,
534 "Longleftrightarrow;":0x27fa, "Longrightarrow;":0x27f9,
535 "Lopf;":[0xd835,0xdd43], "LowerLeftArrow;":0x2199,
536 "LowerRightArrow;":0x2198, "Lscr;":0x2112,
537 "Lsh;":0x21b0, "Lstrok;":0x141,
538 "Lt;":0x226a, "Map;":0x2905,
539 "Mcy;":0x41c, "MediumSpace;":0x205f,
540 "Mellintrf;":0x2133, "Mfr;":[0xd835,0xdd10],
541 "MinusPlus;":0x2213, "Mopf;":[0xd835,0xdd44],
542 "Mscr;":0x2133, "Mu;":0x39c,
543 "NJcy;":0x40a, "Nacute;":0x143,
544 "Ncaron;":0x147, "Ncedil;":0x145,
545 "Ncy;":0x41d, "NegativeMediumSpace;":0x200b,
546 "NegativeThickSpace;":0x200b, "NegativeThinSpace;":0x200b,
547 "NegativeVeryThinSpace;":0x200b, "NestedGreaterGreater;":0x226b,
548 "NestedLessLess;":0x226a, "NewLine;":0xa,
549 "Nfr;":[0xd835,0xdd11], "NoBreak;":0x2060,
550 "NonBreakingSpace;":0xa0, "Nopf;":0x2115,
551 "Not;":0x2aec, "NotCongruent;":0x2262,
552 "NotCupCap;":0x226d, "NotDoubleVerticalBar;":0x2226,
553 "NotElement;":0x2209, "NotEqual;":0x2260,
554 "NotEqualTilde;":[0x2242,0x338], "NotExists;":0x2204,
555 "NotGreater;":0x226f, "NotGreaterEqual;":0x2271,
556 "NotGreaterFullEqual;":[0x2267,0x338], "NotGreaterGreater;":[0x226b,0x338],
557 "NotGreaterLess;":0x2279, "NotGreaterSlantEqual;":[0x2a7e,0x338],
558 "NotGreaterTilde;":0x2275, "NotHumpDownHump;":[0x224e,0x338],
559 "NotHumpEqual;":[0x224f,0x338], "NotLeftTriangle;":0x22ea,
560 "NotLeftTriangleBar;":[0x29cf,0x338], "NotLeftTriangleEqual;":0x22ec,
561 "NotLess;":0x226e, "NotLessEqual;":0x2270,
562 "NotLessGreater;":0x2278, "NotLessLess;":[0x226a,0x338],
563 "NotLessSlantEqual;":[0x2a7d,0x338], "NotLessTilde;":0x2274,
564 "NotNestedGreaterGreater;":[0x2aa2,0x338], "NotNestedLessLess;":[0x2aa1,0x338],
565 "NotPrecedes;":0x2280, "NotPrecedesEqual;":[0x2aaf,0x338],
566 "NotPrecedesSlantEqual;":0x22e0, "NotReverseElement;":0x220c,
567 "NotRightTriangle;":0x22eb, "NotRightTriangleBar;":[0x29d0,0x338],
568 "NotRightTriangleEqual;":0x22ed, "NotSquareSubset;":[0x228f,0x338],
569 "NotSquareSubsetEqual;":0x22e2, "NotSquareSuperset;":[0x2290,0x338],
570 "NotSquareSupersetEqual;":0x22e3, "NotSubset;":[0x2282,0x20d2],
571 "NotSubsetEqual;":0x2288, "NotSucceeds;":0x2281,
572 "NotSucceedsEqual;":[0x2ab0,0x338], "NotSucceedsSlantEqual;":0x22e1,
573 "NotSucceedsTilde;":[0x227f,0x338], "NotSuperset;":[0x2283,0x20d2],
574 "NotSupersetEqual;":0x2289, "NotTilde;":0x2241,
575 "NotTildeEqual;":0x2244, "NotTildeFullEqual;":0x2247,
576 "NotTildeTilde;":0x2249, "NotVerticalBar;":0x2224,
577 "Nscr;":[0xd835,0xdca9], "Ntilde":0xd1,
578 "Ntilde;":0xd1, "Nu;":0x39d,
579 "OElig;":0x152, "Oacute":0xd3,
580 "Oacute;":0xd3, "Ocirc":0xd4,
581 "Ocirc;":0xd4, "Ocy;":0x41e,
582 "Odblac;":0x150, "Ofr;":[0xd835,0xdd12],
583 "Ograve":0xd2, "Ograve;":0xd2,
584 "Omacr;":0x14c, "Omega;":0x3a9,
585 "Omicron;":0x39f, "Oopf;":[0xd835,0xdd46],
586 "OpenCurlyDoubleQuote;":0x201c, "OpenCurlyQuote;":0x2018,
587 "Or;":0x2a54, "Oscr;":[0xd835,0xdcaa],
588 "Oslash":0xd8, "Oslash;":0xd8,
589 "Otilde":0xd5, "Otilde;":0xd5,
590 "Otimes;":0x2a37, "Ouml":0xd6,
591 "Ouml;":0xd6, "OverBar;":0x203e,
592 "OverBrace;":0x23de, "OverBracket;":0x23b4,
593 "OverParenthesis;":0x23dc, "PartialD;":0x2202,
594 "Pcy;":0x41f, "Pfr;":[0xd835,0xdd13],
595 "Phi;":0x3a6, "Pi;":0x3a0,
596 "PlusMinus;":0xb1, "Poincareplane;":0x210c,
597 "Popf;":0x2119, "Pr;":0x2abb,
598 "Precedes;":0x227a, "PrecedesEqual;":0x2aaf,
599 "PrecedesSlantEqual;":0x227c, "PrecedesTilde;":0x227e,
600 "Prime;":0x2033, "Product;":0x220f,
601 "Proportion;":0x2237, "Proportional;":0x221d,
602 "Pscr;":[0xd835,0xdcab], "Psi;":0x3a8,
603 "QUOT":0x22, "QUOT;":0x22,
604 "Qfr;":[0xd835,0xdd14], "Qopf;":0x211a,
605 "Qscr;":[0xd835,0xdcac], "RBarr;":0x2910,
606 "REG":0xae, "REG;":0xae,
607 "Racute;":0x154, "Rang;":0x27eb,
608 "Rarr;":0x21a0, "Rarrtl;":0x2916,
609 "Rcaron;":0x158, "Rcedil;":0x156,
610 "Rcy;":0x420, "Re;":0x211c,
611 "ReverseElement;":0x220b, "ReverseEquilibrium;":0x21cb,
612 "ReverseUpEquilibrium;":0x296f, "Rfr;":0x211c,
613 "Rho;":0x3a1, "RightAngleBracket;":0x27e9,
614 "RightArrow;":0x2192, "RightArrowBar;":0x21e5,
615 "RightArrowLeftArrow;":0x21c4, "RightCeiling;":0x2309,
616 "RightDoubleBracket;":0x27e7, "RightDownTeeVector;":0x295d,
617 "RightDownVector;":0x21c2, "RightDownVectorBar;":0x2955,
618 "RightFloor;":0x230b, "RightTee;":0x22a2,
619 "RightTeeArrow;":0x21a6, "RightTeeVector;":0x295b,
620 "RightTriangle;":0x22b3, "RightTriangleBar;":0x29d0,
621 "RightTriangleEqual;":0x22b5, "RightUpDownVector;":0x294f,
622 "RightUpTeeVector;":0x295c, "RightUpVector;":0x21be,
623 "RightUpVectorBar;":0x2954, "RightVector;":0x21c0,
624 "RightVectorBar;":0x2953, "Rightarrow;":0x21d2,
625 "Ropf;":0x211d, "RoundImplies;":0x2970,
626 "Rrightarrow;":0x21db, "Rscr;":0x211b,
627 "Rsh;":0x21b1, "RuleDelayed;":0x29f4,
628 "SHCHcy;":0x429, "SHcy;":0x428,
629 "SOFTcy;":0x42c, "Sacute;":0x15a,
630 "Sc;":0x2abc, "Scaron;":0x160,
631 "Scedil;":0x15e, "Scirc;":0x15c,
632 "Scy;":0x421, "Sfr;":[0xd835,0xdd16],
633 "ShortDownArrow;":0x2193, "ShortLeftArrow;":0x2190,
634 "ShortRightArrow;":0x2192, "ShortUpArrow;":0x2191,
635 "Sigma;":0x3a3, "SmallCircle;":0x2218,
636 "Sopf;":[0xd835,0xdd4a], "Sqrt;":0x221a,
637 "Square;":0x25a1, "SquareIntersection;":0x2293,
638 "SquareSubset;":0x228f, "SquareSubsetEqual;":0x2291,
639 "SquareSuperset;":0x2290, "SquareSupersetEqual;":0x2292,
640 "SquareUnion;":0x2294, "Sscr;":[0xd835,0xdcae],
641 "Star;":0x22c6, "Sub;":0x22d0,
642 "Subset;":0x22d0, "SubsetEqual;":0x2286,
643 "Succeeds;":0x227b, "SucceedsEqual;":0x2ab0,
644 "SucceedsSlantEqual;":0x227d, "SucceedsTilde;":0x227f,
645 "SuchThat;":0x220b, "Sum;":0x2211,
646 "Sup;":0x22d1, "Superset;":0x2283,
647 "SupersetEqual;":0x2287, "Supset;":0x22d1,
648 "THORN":0xde, "THORN;":0xde,
649 "TRADE;":0x2122, "TSHcy;":0x40b,
650 "TScy;":0x426, "Tab;":0x9,
651 "Tau;":0x3a4, "Tcaron;":0x164,
652 "Tcedil;":0x162, "Tcy;":0x422,
653 "Tfr;":[0xd835,0xdd17], "Therefore;":0x2234,
654 "Theta;":0x398, "ThickSpace;":[0x205f,0x200a],
655 "ThinSpace;":0x2009, "Tilde;":0x223c,
656 "TildeEqual;":0x2243, "TildeFullEqual;":0x2245,
657 "TildeTilde;":0x2248, "Topf;":[0xd835,0xdd4b],
658 "TripleDot;":0x20db, "Tscr;":[0xd835,0xdcaf],
659 "Tstrok;":0x166, "Uacute":0xda,
660 "Uacute;":0xda, "Uarr;":0x219f,
661 "Uarrocir;":0x2949, "Ubrcy;":0x40e,
662 "Ubreve;":0x16c, "Ucirc":0xdb,
663 "Ucirc;":0xdb, "Ucy;":0x423,
664 "Udblac;":0x170, "Ufr;":[0xd835,0xdd18],
665 "Ugrave":0xd9, "Ugrave;":0xd9,
666 "Umacr;":0x16a, "UnderBar;":0x5f,
667 "UnderBrace;":0x23df, "UnderBracket;":0x23b5,
668 "UnderParenthesis;":0x23dd, "Union;":0x22c3,
669 "UnionPlus;":0x228e, "Uogon;":0x172,
670 "Uopf;":[0xd835,0xdd4c], "UpArrow;":0x2191,
671 "UpArrowBar;":0x2912, "UpArrowDownArrow;":0x21c5,
672 "UpDownArrow;":0x2195, "UpEquilibrium;":0x296e,
673 "UpTee;":0x22a5, "UpTeeArrow;":0x21a5,
674 "Uparrow;":0x21d1, "Updownarrow;":0x21d5,
675 "UpperLeftArrow;":0x2196, "UpperRightArrow;":0x2197,
676 "Upsi;":0x3d2, "Upsilon;":0x3a5,
677 "Uring;":0x16e, "Uscr;":[0xd835,0xdcb0],
678 "Utilde;":0x168, "Uuml":0xdc,
679 "Uuml;":0xdc, "VDash;":0x22ab,
680 "Vbar;":0x2aeb, "Vcy;":0x412,
681 "Vdash;":0x22a9, "Vdashl;":0x2ae6,
682 "Vee;":0x22c1, "Verbar;":0x2016,
683 "Vert;":0x2016, "VerticalBar;":0x2223,
684 "VerticalLine;":0x7c, "VerticalSeparator;":0x2758,
685 "VerticalTilde;":0x2240, "VeryThinSpace;":0x200a,
686 "Vfr;":[0xd835,0xdd19], "Vopf;":[0xd835,0xdd4d],
687 "Vscr;":[0xd835,0xdcb1], "Vvdash;":0x22aa,
688 "Wcirc;":0x174, "Wedge;":0x22c0,
689 "Wfr;":[0xd835,0xdd1a], "Wopf;":[0xd835,0xdd4e],
690 "Wscr;":[0xd835,0xdcb2], "Xfr;":[0xd835,0xdd1b],
691 "Xi;":0x39e, "Xopf;":[0xd835,0xdd4f],
692 "Xscr;":[0xd835,0xdcb3], "YAcy;":0x42f,
693 "YIcy;":0x407, "YUcy;":0x42e,
694 "Yacute":0xdd, "Yacute;":0xdd,
695 "Ycirc;":0x176, "Ycy;":0x42b,
696 "Yfr;":[0xd835,0xdd1c], "Yopf;":[0xd835,0xdd50],
697 "Yscr;":[0xd835,0xdcb4], "Yuml;":0x178,
698 "ZHcy;":0x416, "Zacute;":0x179,
699 "Zcaron;":0x17d, "Zcy;":0x417,
700 "Zdot;":0x17b, "ZeroWidthSpace;":0x200b,
701 "Zeta;":0x396, "Zfr;":0x2128,
702 "Zopf;":0x2124, "Zscr;":[0xd835,0xdcb5],
703 "aacute":0xe1, "aacute;":0xe1,
704 "abreve;":0x103, "ac;":0x223e,
705 "acE;":[0x223e,0x333], "acd;":0x223f,
706 "acirc":0xe2, "acirc;":0xe2,
707 "acute":0xb4, "acute;":0xb4,
708 "acy;":0x430, "aelig":0xe6,
709 "aelig;":0xe6, "af;":0x2061,
710 "afr;":[0xd835,0xdd1e], "agrave":0xe0,
711 "agrave;":0xe0, "alefsym;":0x2135,
712 "aleph;":0x2135, "alpha;":0x3b1,
713 "amacr;":0x101, "amalg;":0x2a3f,
714 "amp":0x26, "amp;":0x26,
715 "and;":0x2227, "andand;":0x2a55,
716 "andd;":0x2a5c, "andslope;":0x2a58,
717 "andv;":0x2a5a, "ang;":0x2220,
718 "ange;":0x29a4, "angle;":0x2220,
719 "angmsd;":0x2221, "angmsdaa;":0x29a8,
720 "angmsdab;":0x29a9, "angmsdac;":0x29aa,
721 "angmsdad;":0x29ab, "angmsdae;":0x29ac,
722 "angmsdaf;":0x29ad, "angmsdag;":0x29ae,
723 "angmsdah;":0x29af, "angrt;":0x221f,
724 "angrtvb;":0x22be, "angrtvbd;":0x299d,
725 "angsph;":0x2222, "angst;":0xc5,
726 "angzarr;":0x237c, "aogon;":0x105,
727 "aopf;":[0xd835,0xdd52], "ap;":0x2248,
728 "apE;":0x2a70, "apacir;":0x2a6f,
729 "ape;":0x224a, "apid;":0x224b,
730 "apos;":0x27, "approx;":0x2248,
731 "approxeq;":0x224a, "aring":0xe5,
732 "aring;":0xe5, "ascr;":[0xd835,0xdcb6],
733 "ast;":0x2a, "asymp;":0x2248,
734 "asympeq;":0x224d, "atilde":0xe3,
735 "atilde;":0xe3, "auml":0xe4,
736 "auml;":0xe4, "awconint;":0x2233,
737 "awint;":0x2a11, "bNot;":0x2aed,
738 "backcong;":0x224c, "backepsilon;":0x3f6,
739 "backprime;":0x2035, "backsim;":0x223d,
740 "backsimeq;":0x22cd, "barvee;":0x22bd,
741 "barwed;":0x2305, "barwedge;":0x2305,
742 "bbrk;":0x23b5, "bbrktbrk;":0x23b6,
743 "bcong;":0x224c, "bcy;":0x431,
744 "bdquo;":0x201e, "becaus;":0x2235,
745 "because;":0x2235, "bemptyv;":0x29b0,
746 "bepsi;":0x3f6, "bernou;":0x212c,
747 "beta;":0x3b2, "beth;":0x2136,
748 "between;":0x226c, "bfr;":[0xd835,0xdd1f],
749 "bigcap;":0x22c2, "bigcirc;":0x25ef,
750 "bigcup;":0x22c3, "bigodot;":0x2a00,
751 "bigoplus;":0x2a01, "bigotimes;":0x2a02,
752 "bigsqcup;":0x2a06, "bigstar;":0x2605,
753 "bigtriangledown;":0x25bd, "bigtriangleup;":0x25b3,
754 "biguplus;":0x2a04, "bigvee;":0x22c1,
755 "bigwedge;":0x22c0, "bkarow;":0x290d,
756 "blacklozenge;":0x29eb, "blacksquare;":0x25aa,
757 "blacktriangle;":0x25b4, "blacktriangledown;":0x25be,
758 "blacktriangleleft;":0x25c2, "blacktriangleright;":0x25b8,
759 "blank;":0x2423, "blk12;":0x2592,
760 "blk14;":0x2591, "blk34;":0x2593,
761 "block;":0x2588, "bne;":[0x3d,0x20e5],
762 "bnequiv;":[0x2261,0x20e5], "bnot;":0x2310,
763 "bopf;":[0xd835,0xdd53], "bot;":0x22a5,
764 "bottom;":0x22a5, "bowtie;":0x22c8,
765 "boxDL;":0x2557, "boxDR;":0x2554,
766 "boxDl;":0x2556, "boxDr;":0x2553,
767 "boxH;":0x2550, "boxHD;":0x2566,
768 "boxHU;":0x2569, "boxHd;":0x2564,
769 "boxHu;":0x2567, "boxUL;":0x255d,
770 "boxUR;":0x255a, "boxUl;":0x255c,
771 "boxUr;":0x2559, "boxV;":0x2551,
772 "boxVH;":0x256c, "boxVL;":0x2563,
773 "boxVR;":0x2560, "boxVh;":0x256b,
774 "boxVl;":0x2562, "boxVr;":0x255f,
775 "boxbox;":0x29c9, "boxdL;":0x2555,
776 "boxdR;":0x2552, "boxdl;":0x2510,
777 "boxdr;":0x250c, "boxh;":0x2500,
778 "boxhD;":0x2565, "boxhU;":0x2568,
779 "boxhd;":0x252c, "boxhu;":0x2534,
780 "boxminus;":0x229f, "boxplus;":0x229e,
781 "boxtimes;":0x22a0, "boxuL;":0x255b,
782 "boxuR;":0x2558, "boxul;":0x2518,
783 "boxur;":0x2514, "boxv;":0x2502,
784 "boxvH;":0x256a, "boxvL;":0x2561,
785 "boxvR;":0x255e, "boxvh;":0x253c,
786 "boxvl;":0x2524, "boxvr;":0x251c,
787 "bprime;":0x2035, "breve;":0x2d8,
788 "brvbar":0xa6, "brvbar;":0xa6,
789 "bscr;":[0xd835,0xdcb7], "bsemi;":0x204f,
790 "bsim;":0x223d, "bsime;":0x22cd,
791 "bsol;":0x5c, "bsolb;":0x29c5,
792 "bsolhsub;":0x27c8, "bull;":0x2022,
793 "bullet;":0x2022, "bump;":0x224e,
794 "bumpE;":0x2aae, "bumpe;":0x224f,
795 "bumpeq;":0x224f, "cacute;":0x107,
796 "cap;":0x2229, "capand;":0x2a44,
797 "capbrcup;":0x2a49, "capcap;":0x2a4b,
798 "capcup;":0x2a47, "capdot;":0x2a40,
799 "caps;":[0x2229,0xfe00], "caret;":0x2041,
800 "caron;":0x2c7, "ccaps;":0x2a4d,
801 "ccaron;":0x10d, "ccedil":0xe7,
802 "ccedil;":0xe7, "ccirc;":0x109,
803 "ccups;":0x2a4c, "ccupssm;":0x2a50,
804 "cdot;":0x10b, "cedil":0xb8,
805 "cedil;":0xb8, "cemptyv;":0x29b2,
806 "cent":0xa2, "cent;":0xa2,
807 "centerdot;":0xb7, "cfr;":[0xd835,0xdd20],
808 "chcy;":0x447, "check;":0x2713,
809 "checkmark;":0x2713, "chi;":0x3c7,
810 "cir;":0x25cb, "cirE;":0x29c3,
811 "circ;":0x2c6, "circeq;":0x2257,
812 "circlearrowleft;":0x21ba, "circlearrowright;":0x21bb,
813 "circledR;":0xae, "circledS;":0x24c8,
814 "circledast;":0x229b, "circledcirc;":0x229a,
815 "circleddash;":0x229d, "cire;":0x2257,
816 "cirfnint;":0x2a10, "cirmid;":0x2aef,
817 "cirscir;":0x29c2, "clubs;":0x2663,
818 "clubsuit;":0x2663, "colon;":0x3a,
819 "colone;":0x2254, "coloneq;":0x2254,
820 "comma;":0x2c, "commat;":0x40,
821 "comp;":0x2201, "compfn;":0x2218,
822 "complement;":0x2201, "complexes;":0x2102,
823 "cong;":0x2245, "congdot;":0x2a6d,
824 "conint;":0x222e, "copf;":[0xd835,0xdd54],
825 "coprod;":0x2210, "copy":0xa9,
826 "copy;":0xa9, "copysr;":0x2117,
827 "crarr;":0x21b5, "cross;":0x2717,
828 "cscr;":[0xd835,0xdcb8], "csub;":0x2acf,
829 "csube;":0x2ad1, "csup;":0x2ad0,
830 "csupe;":0x2ad2, "ctdot;":0x22ef,
831 "cudarrl;":0x2938, "cudarrr;":0x2935,
832 "cuepr;":0x22de, "cuesc;":0x22df,
833 "cularr;":0x21b6, "cularrp;":0x293d,
834 "cup;":0x222a, "cupbrcap;":0x2a48,
835 "cupcap;":0x2a46, "cupcup;":0x2a4a,
836 "cupdot;":0x228d, "cupor;":0x2a45,
837 "cups;":[0x222a,0xfe00], "curarr;":0x21b7,
838 "curarrm;":0x293c, "curlyeqprec;":0x22de,
839 "curlyeqsucc;":0x22df, "curlyvee;":0x22ce,
840 "curlywedge;":0x22cf, "curren":0xa4,
841 "curren;":0xa4, "curvearrowleft;":0x21b6,
842 "curvearrowright;":0x21b7, "cuvee;":0x22ce,
843 "cuwed;":0x22cf, "cwconint;":0x2232,
844 "cwint;":0x2231, "cylcty;":0x232d,
845 "dArr;":0x21d3, "dHar;":0x2965,
846 "dagger;":0x2020, "daleth;":0x2138,
847 "darr;":0x2193, "dash;":0x2010,
848 "dashv;":0x22a3, "dbkarow;":0x290f,
849 "dblac;":0x2dd, "dcaron;":0x10f,
850 "dcy;":0x434, "dd;":0x2146,
851 "ddagger;":0x2021, "ddarr;":0x21ca,
852 "ddotseq;":0x2a77, "deg":0xb0,
853 "deg;":0xb0, "delta;":0x3b4,
854 "demptyv;":0x29b1, "dfisht;":0x297f,
855 "dfr;":[0xd835,0xdd21], "dharl;":0x21c3,
856 "dharr;":0x21c2, "diam;":0x22c4,
857 "diamond;":0x22c4, "diamondsuit;":0x2666,
858 "diams;":0x2666, "die;":0xa8,
859 "digamma;":0x3dd, "disin;":0x22f2,
860 "div;":0xf7, "divide":0xf7,
861 "divide;":0xf7, "divideontimes;":0x22c7,
862 "divonx;":0x22c7, "djcy;":0x452,
863 "dlcorn;":0x231e, "dlcrop;":0x230d,
864 "dollar;":0x24, "dopf;":[0xd835,0xdd55],
865 "dot;":0x2d9, "doteq;":0x2250,
866 "doteqdot;":0x2251, "dotminus;":0x2238,
867 "dotplus;":0x2214, "dotsquare;":0x22a1,
868 "doublebarwedge;":0x2306, "downarrow;":0x2193,
869 "downdownarrows;":0x21ca, "downharpoonleft;":0x21c3,
870 "downharpoonright;":0x21c2, "drbkarow;":0x2910,
871 "drcorn;":0x231f, "drcrop;":0x230c,
872 "dscr;":[0xd835,0xdcb9], "dscy;":0x455,
873 "dsol;":0x29f6, "dstrok;":0x111,
874 "dtdot;":0x22f1, "dtri;":0x25bf,
875 "dtrif;":0x25be, "duarr;":0x21f5,
876 "duhar;":0x296f, "dwangle;":0x29a6,
877 "dzcy;":0x45f, "dzigrarr;":0x27ff,
878 "eDDot;":0x2a77, "eDot;":0x2251,
879 "eacute":0xe9, "eacute;":0xe9,
880 "easter;":0x2a6e, "ecaron;":0x11b,
881 "ecir;":0x2256, "ecirc":0xea,
882 "ecirc;":0xea, "ecolon;":0x2255,
883 "ecy;":0x44d, "edot;":0x117,
884 "ee;":0x2147, "efDot;":0x2252,
885 "efr;":[0xd835,0xdd22], "eg;":0x2a9a,
886 "egrave":0xe8, "egrave;":0xe8,
887 "egs;":0x2a96, "egsdot;":0x2a98,
888 "el;":0x2a99, "elinters;":0x23e7,
889 "ell;":0x2113, "els;":0x2a95,
890 "elsdot;":0x2a97, "emacr;":0x113,
891 "empty;":0x2205, "emptyset;":0x2205,
892 "emptyv;":0x2205, "emsp13;":0x2004,
893 "emsp14;":0x2005, "emsp;":0x2003,
894 "eng;":0x14b, "ensp;":0x2002,
895 "eogon;":0x119, "eopf;":[0xd835,0xdd56],
896 "epar;":0x22d5, "eparsl;":0x29e3,
897 "eplus;":0x2a71, "epsi;":0x3b5,
898 "epsilon;":0x3b5, "epsiv;":0x3f5,
899 "eqcirc;":0x2256, "eqcolon;":0x2255,
900 "eqsim;":0x2242, "eqslantgtr;":0x2a96,
901 "eqslantless;":0x2a95, "equals;":0x3d,
902 "equest;":0x225f, "equiv;":0x2261,
903 "equivDD;":0x2a78, "eqvparsl;":0x29e5,
904 "erDot;":0x2253, "erarr;":0x2971,
905 "escr;":0x212f, "esdot;":0x2250,
906 "esim;":0x2242, "eta;":0x3b7,
907 "eth":0xf0, "eth;":0xf0,
908 "euml":0xeb, "euml;":0xeb,
909 "euro;":0x20ac, "excl;":0x21,
910 "exist;":0x2203, "expectation;":0x2130,
911 "exponentiale;":0x2147, "fallingdotseq;":0x2252,
912 "fcy;":0x444, "female;":0x2640,
913 "ffilig;":0xfb03, "fflig;":0xfb00,
914 "ffllig;":0xfb04, "ffr;":[0xd835,0xdd23],
915 "filig;":0xfb01, "fjlig;":[0x66,0x6a],
916 "flat;":0x266d, "fllig;":0xfb02,
917 "fltns;":0x25b1, "fnof;":0x192,
918 "fopf;":[0xd835,0xdd57], "forall;":0x2200,
919 "fork;":0x22d4, "forkv;":0x2ad9,
920 "fpartint;":0x2a0d, "frac12":0xbd,
921 "frac12;":0xbd, "frac13;":0x2153,
922 "frac14":0xbc, "frac14;":0xbc,
923 "frac15;":0x2155, "frac16;":0x2159,
924 "frac18;":0x215b, "frac23;":0x2154,
925 "frac25;":0x2156, "frac34":0xbe,
926 "frac34;":0xbe, "frac35;":0x2157,
927 "frac38;":0x215c, "frac45;":0x2158,
928 "frac56;":0x215a, "frac58;":0x215d,
929 "frac78;":0x215e, "frasl;":0x2044,
930 "frown;":0x2322, "fscr;":[0xd835,0xdcbb],
931 "gE;":0x2267, "gEl;":0x2a8c,
932 "gacute;":0x1f5, "gamma;":0x3b3,
933 "gammad;":0x3dd, "gap;":0x2a86,
934 "gbreve;":0x11f, "gcirc;":0x11d,
935 "gcy;":0x433, "gdot;":0x121,
936 "ge;":0x2265, "gel;":0x22db,
937 "geq;":0x2265, "geqq;":0x2267,
938 "geqslant;":0x2a7e, "ges;":0x2a7e,
939 "gescc;":0x2aa9, "gesdot;":0x2a80,
940 "gesdoto;":0x2a82, "gesdotol;":0x2a84,
941 "gesl;":[0x22db,0xfe00], "gesles;":0x2a94,
942 "gfr;":[0xd835,0xdd24], "gg;":0x226b,
943 "ggg;":0x22d9, "gimel;":0x2137,
944 "gjcy;":0x453, "gl;":0x2277,
945 "glE;":0x2a92, "gla;":0x2aa5,
946 "glj;":0x2aa4, "gnE;":0x2269,
947 "gnap;":0x2a8a, "gnapprox;":0x2a8a,
948 "gne;":0x2a88, "gneq;":0x2a88,
949 "gneqq;":0x2269, "gnsim;":0x22e7,
950 "gopf;":[0xd835,0xdd58], "grave;":0x60,
951 "gscr;":0x210a, "gsim;":0x2273,
952 "gsime;":0x2a8e, "gsiml;":0x2a90,
953 "gt":0x3e, "gt;":0x3e,
954 "gtcc;":0x2aa7, "gtcir;":0x2a7a,
955 "gtdot;":0x22d7, "gtlPar;":0x2995,
956 "gtquest;":0x2a7c, "gtrapprox;":0x2a86,
957 "gtrarr;":0x2978, "gtrdot;":0x22d7,
958 "gtreqless;":0x22db, "gtreqqless;":0x2a8c,
959 "gtrless;":0x2277, "gtrsim;":0x2273,
960 "gvertneqq;":[0x2269,0xfe00], "gvnE;":[0x2269,0xfe00],
961 "hArr;":0x21d4, "hairsp;":0x200a,
962 "half;":0xbd, "hamilt;":0x210b,
963 "hardcy;":0x44a, "harr;":0x2194,
964 "harrcir;":0x2948, "harrw;":0x21ad,
965 "hbar;":0x210f, "hcirc;":0x125,
966 "hearts;":0x2665, "heartsuit;":0x2665,
967 "hellip;":0x2026, "hercon;":0x22b9,
968 "hfr;":[0xd835,0xdd25], "hksearow;":0x2925,
969 "hkswarow;":0x2926, "hoarr;":0x21ff,
970 "homtht;":0x223b, "hookleftarrow;":0x21a9,
971 "hookrightarrow;":0x21aa, "hopf;":[0xd835,0xdd59],
972 "horbar;":0x2015, "hscr;":[0xd835,0xdcbd],
973 "hslash;":0x210f, "hstrok;":0x127,
974 "hybull;":0x2043, "hyphen;":0x2010,
975 "iacute":0xed, "iacute;":0xed,
976 "ic;":0x2063, "icirc":0xee,
977 "icirc;":0xee, "icy;":0x438,
978 "iecy;":0x435, "iexcl":0xa1,
979 "iexcl;":0xa1, "iff;":0x21d4,
980 "ifr;":[0xd835,0xdd26], "igrave":0xec,
981 "igrave;":0xec, "ii;":0x2148,
982 "iiiint;":0x2a0c, "iiint;":0x222d,
983 "iinfin;":0x29dc, "iiota;":0x2129,
984 "ijlig;":0x133, "imacr;":0x12b,
985 "image;":0x2111, "imagline;":0x2110,
986 "imagpart;":0x2111, "imath;":0x131,
987 "imof;":0x22b7, "imped;":0x1b5,
988 "in;":0x2208, "incare;":0x2105,
989 "infin;":0x221e, "infintie;":0x29dd,
990 "inodot;":0x131, "int;":0x222b,
991 "intcal;":0x22ba, "integers;":0x2124,
992 "intercal;":0x22ba, "intlarhk;":0x2a17,
993 "intprod;":0x2a3c, "iocy;":0x451,
994 "iogon;":0x12f, "iopf;":[0xd835,0xdd5a],
995 "iota;":0x3b9, "iprod;":0x2a3c,
996 "iquest":0xbf, "iquest;":0xbf,
997 "iscr;":[0xd835,0xdcbe], "isin;":0x2208,
998 "isinE;":0x22f9, "isindot;":0x22f5,
999 "isins;":0x22f4, "isinsv;":0x22f3,
1000 "isinv;":0x2208, "it;":0x2062,
1001 "itilde;":0x129, "iukcy;":0x456,
1002 "iuml":0xef, "iuml;":0xef,
1003 "jcirc;":0x135, "jcy;":0x439,
1004 "jfr;":[0xd835,0xdd27], "jmath;":0x237,
1005 "jopf;":[0xd835,0xdd5b], "jscr;":[0xd835,0xdcbf],
1006 "jsercy;":0x458, "jukcy;":0x454,
1007 "kappa;":0x3ba, "kappav;":0x3f0,
1008 "kcedil;":0x137, "kcy;":0x43a,
1009 "kfr;":[0xd835,0xdd28], "kgreen;":0x138,
1010 "khcy;":0x445, "kjcy;":0x45c,
1011 "kopf;":[0xd835,0xdd5c], "kscr;":[0xd835,0xdcc0],
1012 "lAarr;":0x21da, "lArr;":0x21d0,
1013 "lAtail;":0x291b, "lBarr;":0x290e,
1014 "lE;":0x2266, "lEg;":0x2a8b,
1015 "lHar;":0x2962, "lacute;":0x13a,
1016 "laemptyv;":0x29b4, "lagran;":0x2112,
1017 "lambda;":0x3bb, "lang;":0x27e8,
1018 "langd;":0x2991, "langle;":0x27e8,
1019 "lap;":0x2a85, "laquo":0xab,
1020 "laquo;":0xab, "larr;":0x2190,
1021 "larrb;":0x21e4, "larrbfs;":0x291f,
1022 "larrfs;":0x291d, "larrhk;":0x21a9,
1023 "larrlp;":0x21ab, "larrpl;":0x2939,
1024 "larrsim;":0x2973, "larrtl;":0x21a2,
1025 "lat;":0x2aab, "latail;":0x2919,
1026 "late;":0x2aad, "lates;":[0x2aad,0xfe00],
1027 "lbarr;":0x290c, "lbbrk;":0x2772,
1028 "lbrace;":0x7b, "lbrack;":0x5b,
1029 "lbrke;":0x298b, "lbrksld;":0x298f,
1030 "lbrkslu;":0x298d, "lcaron;":0x13e,
1031 "lcedil;":0x13c, "lceil;":0x2308,
1032 "lcub;":0x7b, "lcy;":0x43b,
1033 "ldca;":0x2936, "ldquo;":0x201c,
1034 "ldquor;":0x201e, "ldrdhar;":0x2967,
1035 "ldrushar;":0x294b, "ldsh;":0x21b2,
1036 "le;":0x2264, "leftarrow;":0x2190,
1037 "leftarrowtail;":0x21a2, "leftharpoondown;":0x21bd,
1038 "leftharpoonup;":0x21bc, "leftleftarrows;":0x21c7,
1039 "leftrightarrow;":0x2194, "leftrightarrows;":0x21c6,
1040 "leftrightharpoons;":0x21cb, "leftrightsquigarrow;":0x21ad,
1041 "leftthreetimes;":0x22cb, "leg;":0x22da,
1042 "leq;":0x2264, "leqq;":0x2266,
1043 "leqslant;":0x2a7d, "les;":0x2a7d,
1044 "lescc;":0x2aa8, "lesdot;":0x2a7f,
1045 "lesdoto;":0x2a81, "lesdotor;":0x2a83,
1046 "lesg;":[0x22da,0xfe00], "lesges;":0x2a93,
1047 "lessapprox;":0x2a85, "lessdot;":0x22d6,
1048 "lesseqgtr;":0x22da, "lesseqqgtr;":0x2a8b,
1049 "lessgtr;":0x2276, "lesssim;":0x2272,
1050 "lfisht;":0x297c, "lfloor;":0x230a,
1051 "lfr;":[0xd835,0xdd29], "lg;":0x2276,
1052 "lgE;":0x2a91, "lhard;":0x21bd,
1053 "lharu;":0x21bc, "lharul;":0x296a,
1054 "lhblk;":0x2584, "ljcy;":0x459,
1055 "ll;":0x226a, "llarr;":0x21c7,
1056 "llcorner;":0x231e, "llhard;":0x296b,
1057 "lltri;":0x25fa, "lmidot;":0x140,
1058 "lmoust;":0x23b0, "lmoustache;":0x23b0,
1059 "lnE;":0x2268, "lnap;":0x2a89,
1060 "lnapprox;":0x2a89, "lne;":0x2a87,
1061 "lneq;":0x2a87, "lneqq;":0x2268,
1062 "lnsim;":0x22e6, "loang;":0x27ec,
1063 "loarr;":0x21fd, "lobrk;":0x27e6,
1064 "longleftarrow;":0x27f5, "longleftrightarrow;":0x27f7,
1065 "longmapsto;":0x27fc, "longrightarrow;":0x27f6,
1066 "looparrowleft;":0x21ab, "looparrowright;":0x21ac,
1067 "lopar;":0x2985, "lopf;":[0xd835,0xdd5d],
1068 "loplus;":0x2a2d, "lotimes;":0x2a34,
1069 "lowast;":0x2217, "lowbar;":0x5f,
1070 "loz;":0x25ca, "lozenge;":0x25ca,
1071 "lozf;":0x29eb, "lpar;":0x28,
1072 "lparlt;":0x2993, "lrarr;":0x21c6,
1073 "lrcorner;":0x231f, "lrhar;":0x21cb,
1074 "lrhard;":0x296d, "lrm;":0x200e,
1075 "lrtri;":0x22bf, "lsaquo;":0x2039,
1076 "lscr;":[0xd835,0xdcc1], "lsh;":0x21b0,
1077 "lsim;":0x2272, "lsime;":0x2a8d,
1078 "lsimg;":0x2a8f, "lsqb;":0x5b,
1079 "lsquo;":0x2018, "lsquor;":0x201a,
1080 "lstrok;":0x142, "lt":0x3c,
1081 "lt;":0x3c, "ltcc;":0x2aa6,
1082 "ltcir;":0x2a79, "ltdot;":0x22d6,
1083 "lthree;":0x22cb, "ltimes;":0x22c9,
1084 "ltlarr;":0x2976, "ltquest;":0x2a7b,
1085 "ltrPar;":0x2996, "ltri;":0x25c3,
1086 "ltrie;":0x22b4, "ltrif;":0x25c2,
1087 "lurdshar;":0x294a, "luruhar;":0x2966,
1088 "lvertneqq;":[0x2268,0xfe00], "lvnE;":[0x2268,0xfe00],
1089 "mDDot;":0x223a, "macr":0xaf,
1090 "macr;":0xaf, "male;":0x2642,
1091 "malt;":0x2720, "maltese;":0x2720,
1092 "map;":0x21a6, "mapsto;":0x21a6,
1093 "mapstodown;":0x21a7, "mapstoleft;":0x21a4,
1094 "mapstoup;":0x21a5, "marker;":0x25ae,
1095 "mcomma;":0x2a29, "mcy;":0x43c,
1096 "mdash;":0x2014, "measuredangle;":0x2221,
1097 "mfr;":[0xd835,0xdd2a], "mho;":0x2127,
1098 "micro":0xb5, "micro;":0xb5,
1099 "mid;":0x2223, "midast;":0x2a,
1100 "midcir;":0x2af0, "middot":0xb7,
1101 "middot;":0xb7, "minus;":0x2212,
1102 "minusb;":0x229f, "minusd;":0x2238,
1103 "minusdu;":0x2a2a, "mlcp;":0x2adb,
1104 "mldr;":0x2026, "mnplus;":0x2213,
1105 "models;":0x22a7, "mopf;":[0xd835,0xdd5e],
1106 "mp;":0x2213, "mscr;":[0xd835,0xdcc2],
1107 "mstpos;":0x223e, "mu;":0x3bc,
1108 "multimap;":0x22b8, "mumap;":0x22b8,
1109 "nGg;":[0x22d9,0x338], "nGt;":[0x226b,0x20d2],
1110 "nGtv;":[0x226b,0x338], "nLeftarrow;":0x21cd,
1111 "nLeftrightarrow;":0x21ce, "nLl;":[0x22d8,0x338],
1112 "nLt;":[0x226a,0x20d2], "nLtv;":[0x226a,0x338],
1113 "nRightarrow;":0x21cf, "nVDash;":0x22af,
1114 "nVdash;":0x22ae, "nabla;":0x2207,
1115 "nacute;":0x144, "nang;":[0x2220,0x20d2],
1116 "nap;":0x2249, "napE;":[0x2a70,0x338],
1117 "napid;":[0x224b,0x338], "napos;":0x149,
1118 "napprox;":0x2249, "natur;":0x266e,
1119 "natural;":0x266e, "naturals;":0x2115,
1120 "nbsp":0xa0, "nbsp;":0xa0,
1121 "nbump;":[0x224e,0x338], "nbumpe;":[0x224f,0x338],
1122 "ncap;":0x2a43, "ncaron;":0x148,
1123 "ncedil;":0x146, "ncong;":0x2247,
1124 "ncongdot;":[0x2a6d,0x338], "ncup;":0x2a42,
1125 "ncy;":0x43d, "ndash;":0x2013,
1126 "ne;":0x2260, "neArr;":0x21d7,
1127 "nearhk;":0x2924, "nearr;":0x2197,
1128 "nearrow;":0x2197, "nedot;":[0x2250,0x338],
1129 "nequiv;":0x2262, "nesear;":0x2928,
1130 "nesim;":[0x2242,0x338], "nexist;":0x2204,
1131 "nexists;":0x2204, "nfr;":[0xd835,0xdd2b],
1132 "ngE;":[0x2267,0x338], "nge;":0x2271,
1133 "ngeq;":0x2271, "ngeqq;":[0x2267,0x338],
1134 "ngeqslant;":[0x2a7e,0x338], "nges;":[0x2a7e,0x338],
1135 "ngsim;":0x2275, "ngt;":0x226f,
1136 "ngtr;":0x226f, "nhArr;":0x21ce,
1137 "nharr;":0x21ae, "nhpar;":0x2af2,
1138 "ni;":0x220b, "nis;":0x22fc,
1139 "nisd;":0x22fa, "niv;":0x220b,
1140 "njcy;":0x45a, "nlArr;":0x21cd,
1141 "nlE;":[0x2266,0x338], "nlarr;":0x219a,
1142 "nldr;":0x2025, "nle;":0x2270,
1143 "nleftarrow;":0x219a, "nleftrightarrow;":0x21ae,
1144 "nleq;":0x2270, "nleqq;":[0x2266,0x338],
1145 "nleqslant;":[0x2a7d,0x338], "nles;":[0x2a7d,0x338],
1146 "nless;":0x226e, "nlsim;":0x2274,
1147 "nlt;":0x226e, "nltri;":0x22ea,
1148 "nltrie;":0x22ec, "nmid;":0x2224,
1149 "nopf;":[0xd835,0xdd5f], "not":0xac,
1150 "not;":0xac, "notin;":0x2209,
1151 "notinE;":[0x22f9,0x338], "notindot;":[0x22f5,0x338],
1152 "notinva;":0x2209, "notinvb;":0x22f7,
1153 "notinvc;":0x22f6, "notni;":0x220c,
1154 "notniva;":0x220c, "notnivb;":0x22fe,
1155 "notnivc;":0x22fd, "npar;":0x2226,
1156 "nparallel;":0x2226, "nparsl;":[0x2afd,0x20e5],
1157 "npart;":[0x2202,0x338], "npolint;":0x2a14,
1158 "npr;":0x2280, "nprcue;":0x22e0,
1159 "npre;":[0x2aaf,0x338], "nprec;":0x2280,
1160 "npreceq;":[0x2aaf,0x338], "nrArr;":0x21cf,
1161 "nrarr;":0x219b, "nrarrc;":[0x2933,0x338],
1162 "nrarrw;":[0x219d,0x338], "nrightarrow;":0x219b,
1163 "nrtri;":0x22eb, "nrtrie;":0x22ed,
1164 "nsc;":0x2281, "nsccue;":0x22e1,
1165 "nsce;":[0x2ab0,0x338], "nscr;":[0xd835,0xdcc3],
1166 "nshortmid;":0x2224, "nshortparallel;":0x2226,
1167 "nsim;":0x2241, "nsime;":0x2244,
1168 "nsimeq;":0x2244, "nsmid;":0x2224,
1169 "nspar;":0x2226, "nsqsube;":0x22e2,
1170 "nsqsupe;":0x22e3, "nsub;":0x2284,
1171 "nsubE;":[0x2ac5,0x338], "nsube;":0x2288,
1172 "nsubset;":[0x2282,0x20d2], "nsubseteq;":0x2288,
1173 "nsubseteqq;":[0x2ac5,0x338], "nsucc;":0x2281,
1174 "nsucceq;":[0x2ab0,0x338], "nsup;":0x2285,
1175 "nsupE;":[0x2ac6,0x338], "nsupe;":0x2289,
1176 "nsupset;":[0x2283,0x20d2], "nsupseteq;":0x2289,
1177 "nsupseteqq;":[0x2ac6,0x338], "ntgl;":0x2279,
1178 "ntilde":0xf1, "ntilde;":0xf1,
1179 "ntlg;":0x2278, "ntriangleleft;":0x22ea,
1180 "ntrianglelefteq;":0x22ec, "ntriangleright;":0x22eb,
1181 "ntrianglerighteq;":0x22ed, "nu;":0x3bd,
1182 "num;":0x23, "numero;":0x2116,
1183 "numsp;":0x2007, "nvDash;":0x22ad,
1184 "nvHarr;":0x2904, "nvap;":[0x224d,0x20d2],
1185 "nvdash;":0x22ac, "nvge;":[0x2265,0x20d2],
1186 "nvgt;":[0x3e,0x20d2], "nvinfin;":0x29de,
1187 "nvlArr;":0x2902, "nvle;":[0x2264,0x20d2],
1188 "nvlt;":[0x3c,0x20d2], "nvltrie;":[0x22b4,0x20d2],
1189 "nvrArr;":0x2903, "nvrtrie;":[0x22b5,0x20d2],
1190 "nvsim;":[0x223c,0x20d2], "nwArr;":0x21d6,
1191 "nwarhk;":0x2923, "nwarr;":0x2196,
1192 "nwarrow;":0x2196, "nwnear;":0x2927,
1193 "oS;":0x24c8, "oacute":0xf3,
1194 "oacute;":0xf3, "oast;":0x229b,
1195 "ocir;":0x229a, "ocirc":0xf4,
1196 "ocirc;":0xf4, "ocy;":0x43e,
1197 "odash;":0x229d, "odblac;":0x151,
1198 "odiv;":0x2a38, "odot;":0x2299,
1199 "odsold;":0x29bc, "oelig;":0x153,
1200 "ofcir;":0x29bf, "ofr;":[0xd835,0xdd2c],
1201 "ogon;":0x2db, "ograve":0xf2,
1202 "ograve;":0xf2, "ogt;":0x29c1,
1203 "ohbar;":0x29b5, "ohm;":0x3a9,
1204 "oint;":0x222e, "olarr;":0x21ba,
1205 "olcir;":0x29be, "olcross;":0x29bb,
1206 "oline;":0x203e, "olt;":0x29c0,
1207 "omacr;":0x14d, "omega;":0x3c9,
1208 "omicron;":0x3bf, "omid;":0x29b6,
1209 "ominus;":0x2296, "oopf;":[0xd835,0xdd60],
1210 "opar;":0x29b7, "operp;":0x29b9,
1211 "oplus;":0x2295, "or;":0x2228,
1212 "orarr;":0x21bb, "ord;":0x2a5d,
1213 "order;":0x2134, "orderof;":0x2134,
1214 "ordf":0xaa, "ordf;":0xaa,
1215 "ordm":0xba, "ordm;":0xba,
1216 "origof;":0x22b6, "oror;":0x2a56,
1217 "orslope;":0x2a57, "orv;":0x2a5b,
1218 "oscr;":0x2134, "oslash":0xf8,
1219 "oslash;":0xf8, "osol;":0x2298,
1220 "otilde":0xf5, "otilde;":0xf5,
1221 "otimes;":0x2297, "otimesas;":0x2a36,
1222 "ouml":0xf6, "ouml;":0xf6,
1223 "ovbar;":0x233d, "par;":0x2225,
1224 "para":0xb6, "para;":0xb6,
1225 "parallel;":0x2225, "parsim;":0x2af3,
1226 "parsl;":0x2afd, "part;":0x2202,
1227 "pcy;":0x43f, "percnt;":0x25,
1228 "period;":0x2e, "permil;":0x2030,
1229 "perp;":0x22a5, "pertenk;":0x2031,
1230 "pfr;":[0xd835,0xdd2d], "phi;":0x3c6,
1231 "phiv;":0x3d5, "phmmat;":0x2133,
1232 "phone;":0x260e, "pi;":0x3c0,
1233 "pitchfork;":0x22d4, "piv;":0x3d6,
1234 "planck;":0x210f, "planckh;":0x210e,
1235 "plankv;":0x210f, "plus;":0x2b,
1236 "plusacir;":0x2a23, "plusb;":0x229e,
1237 "pluscir;":0x2a22, "plusdo;":0x2214,
1238 "plusdu;":0x2a25, "pluse;":0x2a72,
1239 "plusmn":0xb1, "plusmn;":0xb1,
1240 "plussim;":0x2a26, "plustwo;":0x2a27,
1241 "pm;":0xb1, "pointint;":0x2a15,
1242 "popf;":[0xd835,0xdd61], "pound":0xa3,
1243 "pound;":0xa3, "pr;":0x227a,
1244 "prE;":0x2ab3, "prap;":0x2ab7,
1245 "prcue;":0x227c, "pre;":0x2aaf,
1246 "prec;":0x227a, "precapprox;":0x2ab7,
1247 "preccurlyeq;":0x227c, "preceq;":0x2aaf,
1248 "precnapprox;":0x2ab9, "precneqq;":0x2ab5,
1249 "precnsim;":0x22e8, "precsim;":0x227e,
1250 "prime;":0x2032, "primes;":0x2119,
1251 "prnE;":0x2ab5, "prnap;":0x2ab9,
1252 "prnsim;":0x22e8, "prod;":0x220f,
1253 "profalar;":0x232e, "profline;":0x2312,
1254 "profsurf;":0x2313, "prop;":0x221d,
1255 "propto;":0x221d, "prsim;":0x227e,
1256 "prurel;":0x22b0, "pscr;":[0xd835,0xdcc5],
1257 "psi;":0x3c8, "puncsp;":0x2008,
1258 "qfr;":[0xd835,0xdd2e], "qint;":0x2a0c,
1259 "qopf;":[0xd835,0xdd62], "qprime;":0x2057,
1260 "qscr;":[0xd835,0xdcc6], "quaternions;":0x210d,
1261 "quatint;":0x2a16, "quest;":0x3f,
1262 "questeq;":0x225f, "quot":0x22,
1263 "quot;":0x22, "rAarr;":0x21db,
1264 "rArr;":0x21d2, "rAtail;":0x291c,
1265 "rBarr;":0x290f, "rHar;":0x2964,
1266 "race;":[0x223d,0x331], "racute;":0x155,
1267 "radic;":0x221a, "raemptyv;":0x29b3,
1268 "rang;":0x27e9, "rangd;":0x2992,
1269 "range;":0x29a5, "rangle;":0x27e9,
1270 "raquo":0xbb, "raquo;":0xbb,
1271 "rarr;":0x2192, "rarrap;":0x2975,
1272 "rarrb;":0x21e5, "rarrbfs;":0x2920,
1273 "rarrc;":0x2933, "rarrfs;":0x291e,
1274 "rarrhk;":0x21aa, "rarrlp;":0x21ac,
1275 "rarrpl;":0x2945, "rarrsim;":0x2974,
1276 "rarrtl;":0x21a3, "rarrw;":0x219d,
1277 "ratail;":0x291a, "ratio;":0x2236,
1278 "rationals;":0x211a, "rbarr;":0x290d,
1279 "rbbrk;":0x2773, "rbrace;":0x7d,
1280 "rbrack;":0x5d, "rbrke;":0x298c,
1281 "rbrksld;":0x298e, "rbrkslu;":0x2990,
1282 "rcaron;":0x159, "rcedil;":0x157,
1283 "rceil;":0x2309, "rcub;":0x7d,
1284 "rcy;":0x440, "rdca;":0x2937,
1285 "rdldhar;":0x2969, "rdquo;":0x201d,
1286 "rdquor;":0x201d, "rdsh;":0x21b3,
1287 "real;":0x211c, "realine;":0x211b,
1288 "realpart;":0x211c, "reals;":0x211d,
1289 "rect;":0x25ad, "reg":0xae,
1290 "reg;":0xae, "rfisht;":0x297d,
1291 "rfloor;":0x230b, "rfr;":[0xd835,0xdd2f],
1292 "rhard;":0x21c1, "rharu;":0x21c0,
1293 "rharul;":0x296c, "rho;":0x3c1,
1294 "rhov;":0x3f1, "rightarrow;":0x2192,
1295 "rightarrowtail;":0x21a3, "rightharpoondown;":0x21c1,
1296 "rightharpoonup;":0x21c0, "rightleftarrows;":0x21c4,
1297 "rightleftharpoons;":0x21cc, "rightrightarrows;":0x21c9,
1298 "rightsquigarrow;":0x219d, "rightthreetimes;":0x22cc,
1299 "ring;":0x2da, "risingdotseq;":0x2253,
1300 "rlarr;":0x21c4, "rlhar;":0x21cc,
1301 "rlm;":0x200f, "rmoust;":0x23b1,
1302 "rmoustache;":0x23b1, "rnmid;":0x2aee,
1303 "roang;":0x27ed, "roarr;":0x21fe,
1304 "robrk;":0x27e7, "ropar;":0x2986,
1305 "ropf;":[0xd835,0xdd63], "roplus;":0x2a2e,
1306 "rotimes;":0x2a35, "rpar;":0x29,
1307 "rpargt;":0x2994, "rppolint;":0x2a12,
1308 "rrarr;":0x21c9, "rsaquo;":0x203a,
1309 "rscr;":[0xd835,0xdcc7], "rsh;":0x21b1,
1310 "rsqb;":0x5d, "rsquo;":0x2019,
1311 "rsquor;":0x2019, "rthree;":0x22cc,
1312 "rtimes;":0x22ca, "rtri;":0x25b9,
1313 "rtrie;":0x22b5, "rtrif;":0x25b8,
1314 "rtriltri;":0x29ce, "ruluhar;":0x2968,
1315 "rx;":0x211e, "sacute;":0x15b,
1316 "sbquo;":0x201a, "sc;":0x227b,
1317 "scE;":0x2ab4, "scap;":0x2ab8,
1318 "scaron;":0x161, "sccue;":0x227d,
1319 "sce;":0x2ab0, "scedil;":0x15f,
1320 "scirc;":0x15d, "scnE;":0x2ab6,
1321 "scnap;":0x2aba, "scnsim;":0x22e9,
1322 "scpolint;":0x2a13, "scsim;":0x227f,
1323 "scy;":0x441, "sdot;":0x22c5,
1324 "sdotb;":0x22a1, "sdote;":0x2a66,
1325 "seArr;":0x21d8, "searhk;":0x2925,
1326 "searr;":0x2198, "searrow;":0x2198,
1327 "sect":0xa7, "sect;":0xa7,
1328 "semi;":0x3b, "seswar;":0x2929,
1329 "setminus;":0x2216, "setmn;":0x2216,
1330 "sext;":0x2736, "sfr;":[0xd835,0xdd30],
1331 "sfrown;":0x2322, "sharp;":0x266f,
1332 "shchcy;":0x449, "shcy;":0x448,
1333 "shortmid;":0x2223, "shortparallel;":0x2225,
1334 "shy":0xad, "shy;":0xad,
1335 "sigma;":0x3c3, "sigmaf;":0x3c2,
1336 "sigmav;":0x3c2, "sim;":0x223c,
1337 "simdot;":0x2a6a, "sime;":0x2243,
1338 "simeq;":0x2243, "simg;":0x2a9e,
1339 "simgE;":0x2aa0, "siml;":0x2a9d,
1340 "simlE;":0x2a9f, "simne;":0x2246,
1341 "simplus;":0x2a24, "simrarr;":0x2972,
1342 "slarr;":0x2190, "smallsetminus;":0x2216,
1343 "smashp;":0x2a33, "smeparsl;":0x29e4,
1344 "smid;":0x2223, "smile;":0x2323,
1345 "smt;":0x2aaa, "smte;":0x2aac,
1346 "smtes;":[0x2aac,0xfe00], "softcy;":0x44c,
1347 "sol;":0x2f, "solb;":0x29c4,
1348 "solbar;":0x233f, "sopf;":[0xd835,0xdd64],
1349 "spades;":0x2660, "spadesuit;":0x2660,
1350 "spar;":0x2225, "sqcap;":0x2293,
1351 "sqcaps;":[0x2293,0xfe00], "sqcup;":0x2294,
1352 "sqcups;":[0x2294,0xfe00], "sqsub;":0x228f,
1353 "sqsube;":0x2291, "sqsubset;":0x228f,
1354 "sqsubseteq;":0x2291, "sqsup;":0x2290,
1355 "sqsupe;":0x2292, "sqsupset;":0x2290,
1356 "sqsupseteq;":0x2292, "squ;":0x25a1,
1357 "square;":0x25a1, "squarf;":0x25aa,
1358 "squf;":0x25aa, "srarr;":0x2192,
1359 "sscr;":[0xd835,0xdcc8], "ssetmn;":0x2216,
1360 "ssmile;":0x2323, "sstarf;":0x22c6,
1361 "star;":0x2606, "starf;":0x2605,
1362 "straightepsilon;":0x3f5, "straightphi;":0x3d5,
1363 "strns;":0xaf, "sub;":0x2282,
1364 "subE;":0x2ac5, "subdot;":0x2abd,
1365 "sube;":0x2286, "subedot;":0x2ac3,
1366 "submult;":0x2ac1, "subnE;":0x2acb,
1367 "subne;":0x228a, "subplus;":0x2abf,
1368 "subrarr;":0x2979, "subset;":0x2282,
1369 "subseteq;":0x2286, "subseteqq;":0x2ac5,
1370 "subsetneq;":0x228a, "subsetneqq;":0x2acb,
1371 "subsim;":0x2ac7, "subsub;":0x2ad5,
1372 "subsup;":0x2ad3, "succ;":0x227b,
1373 "succapprox;":0x2ab8, "succcurlyeq;":0x227d,
1374 "succeq;":0x2ab0, "succnapprox;":0x2aba,
1375 "succneqq;":0x2ab6, "succnsim;":0x22e9,
1376 "succsim;":0x227f, "sum;":0x2211,
1377 "sung;":0x266a, "sup1":0xb9,
1378 "sup1;":0xb9, "sup2":0xb2,
1379 "sup2;":0xb2, "sup3":0xb3,
1380 "sup3;":0xb3, "sup;":0x2283,
1381 "supE;":0x2ac6, "supdot;":0x2abe,
1382 "supdsub;":0x2ad8, "supe;":0x2287,
1383 "supedot;":0x2ac4, "suphsol;":0x27c9,
1384 "suphsub;":0x2ad7, "suplarr;":0x297b,
1385 "supmult;":0x2ac2, "supnE;":0x2acc,
1386 "supne;":0x228b, "supplus;":0x2ac0,
1387 "supset;":0x2283, "supseteq;":0x2287,
1388 "supseteqq;":0x2ac6, "supsetneq;":0x228b,
1389 "supsetneqq;":0x2acc, "supsim;":0x2ac8,
1390 "supsub;":0x2ad4, "supsup;":0x2ad6,
1391 "swArr;":0x21d9, "swarhk;":0x2926,
1392 "swarr;":0x2199, "swarrow;":0x2199,
1393 "swnwar;":0x292a, "szlig":0xdf,
1394 "szlig;":0xdf, "target;":0x2316,
1395 "tau;":0x3c4, "tbrk;":0x23b4,
1396 "tcaron;":0x165, "tcedil;":0x163,
1397 "tcy;":0x442, "tdot;":0x20db,
1398 "telrec;":0x2315, "tfr;":[0xd835,0xdd31],
1399 "there4;":0x2234, "therefore;":0x2234,
1400 "theta;":0x3b8, "thetasym;":0x3d1,
1401 "thetav;":0x3d1, "thickapprox;":0x2248,
1402 "thicksim;":0x223c, "thinsp;":0x2009,
1403 "thkap;":0x2248, "thksim;":0x223c,
1404 "thorn":0xfe, "thorn;":0xfe,
1405 "tilde;":0x2dc, "times":0xd7,
1406 "times;":0xd7, "timesb;":0x22a0,
1407 "timesbar;":0x2a31, "timesd;":0x2a30,
1408 "tint;":0x222d, "toea;":0x2928,
1409 "top;":0x22a4, "topbot;":0x2336,
1410 "topcir;":0x2af1, "topf;":[0xd835,0xdd65],
1411 "topfork;":0x2ada, "tosa;":0x2929,
1412 "tprime;":0x2034, "trade;":0x2122,
1413 "triangle;":0x25b5, "triangledown;":0x25bf,
1414 "triangleleft;":0x25c3, "trianglelefteq;":0x22b4,
1415 "triangleq;":0x225c, "triangleright;":0x25b9,
1416 "trianglerighteq;":0x22b5, "tridot;":0x25ec,
1417 "trie;":0x225c, "triminus;":0x2a3a,
1418 "triplus;":0x2a39, "trisb;":0x29cd,
1419 "tritime;":0x2a3b, "trpezium;":0x23e2,
1420 "tscr;":[0xd835,0xdcc9], "tscy;":0x446,
1421 "tshcy;":0x45b, "tstrok;":0x167,
1422 "twixt;":0x226c, "twoheadleftarrow;":0x219e,
1423 "twoheadrightarrow;":0x21a0, "uArr;":0x21d1,
1424 "uHar;":0x2963, "uacute":0xfa,
1425 "uacute;":0xfa, "uarr;":0x2191,
1426 "ubrcy;":0x45e, "ubreve;":0x16d,
1427 "ucirc":0xfb, "ucirc;":0xfb,
1428 "ucy;":0x443, "udarr;":0x21c5,
1429 "udblac;":0x171, "udhar;":0x296e,
1430 "ufisht;":0x297e, "ufr;":[0xd835,0xdd32],
1431 "ugrave":0xf9, "ugrave;":0xf9,
1432 "uharl;":0x21bf, "uharr;":0x21be,
1433 "uhblk;":0x2580, "ulcorn;":0x231c,
1434 "ulcorner;":0x231c, "ulcrop;":0x230f,
1435 "ultri;":0x25f8, "umacr;":0x16b,
1436 "uml":0xa8, "uml;":0xa8,
1437 "uogon;":0x173, "uopf;":[0xd835,0xdd66],
1438 "uparrow;":0x2191, "updownarrow;":0x2195,
1439 "upharpoonleft;":0x21bf, "upharpoonright;":0x21be,
1440 "uplus;":0x228e, "upsi;":0x3c5,
1441 "upsih;":0x3d2, "upsilon;":0x3c5,
1442 "upuparrows;":0x21c8, "urcorn;":0x231d,
1443 "urcorner;":0x231d, "urcrop;":0x230e,
1444 "uring;":0x16f, "urtri;":0x25f9,
1445 "uscr;":[0xd835,0xdcca], "utdot;":0x22f0,
1446 "utilde;":0x169, "utri;":0x25b5,
1447 "utrif;":0x25b4, "uuarr;":0x21c8,
1448 "uuml":0xfc, "uuml;":0xfc,
1449 "uwangle;":0x29a7, "vArr;":0x21d5,
1450 "vBar;":0x2ae8, "vBarv;":0x2ae9,
1451 "vDash;":0x22a8, "vangrt;":0x299c,
1452 "varepsilon;":0x3f5, "varkappa;":0x3f0,
1453 "varnothing;":0x2205, "varphi;":0x3d5,
1454 "varpi;":0x3d6, "varpropto;":0x221d,
1455 "varr;":0x2195, "varrho;":0x3f1,
1456 "varsigma;":0x3c2, "varsubsetneq;":[0x228a,0xfe00],
1457 "varsubsetneqq;":[0x2acb,0xfe00], "varsupsetneq;":[0x228b,0xfe00],
1458 "varsupsetneqq;":[0x2acc,0xfe00], "vartheta;":0x3d1,
1459 "vartriangleleft;":0x22b2, "vartriangleright;":0x22b3,
1460 "vcy;":0x432, "vdash;":0x22a2,
1461 "vee;":0x2228, "veebar;":0x22bb,
1462 "veeeq;":0x225a, "vellip;":0x22ee,
1463 "verbar;":0x7c, "vert;":0x7c,
1464 "vfr;":[0xd835,0xdd33], "vltri;":0x22b2,
1465 "vnsub;":[0x2282,0x20d2], "vnsup;":[0x2283,0x20d2],
1466 "vopf;":[0xd835,0xdd67], "vprop;":0x221d,
1467 "vrtri;":0x22b3, "vscr;":[0xd835,0xdccb],
1468 "vsubnE;":[0x2acb,0xfe00], "vsubne;":[0x228a,0xfe00],
1469 "vsupnE;":[0x2acc,0xfe00], "vsupne;":[0x228b,0xfe00],
1470 "vzigzag;":0x299a, "wcirc;":0x175,
1471 "wedbar;":0x2a5f, "wedge;":0x2227,
1472 "wedgeq;":0x2259, "weierp;":0x2118,
1473 "wfr;":[0xd835,0xdd34], "wopf;":[0xd835,0xdd68],
1474 "wp;":0x2118, "wr;":0x2240,
1475 "wreath;":0x2240, "wscr;":[0xd835,0xdccc],
1476 "xcap;":0x22c2, "xcirc;":0x25ef,
1477 "xcup;":0x22c3, "xdtri;":0x25bd,
1478 "xfr;":[0xd835,0xdd35], "xhArr;":0x27fa,
1479 "xharr;":0x27f7, "xi;":0x3be,
1480 "xlArr;":0x27f8, "xlarr;":0x27f5,
1481 "xmap;":0x27fc, "xnis;":0x22fb,
1482 "xodot;":0x2a00, "xopf;":[0xd835,0xdd69],
1483 "xoplus;":0x2a01, "xotime;":0x2a02,
1484 "xrArr;":0x27f9, "xrarr;":0x27f6,
1485 "xscr;":[0xd835,0xdccd], "xsqcup;":0x2a06,
1486 "xuplus;":0x2a04, "xutri;":0x25b3,
1487 "xvee;":0x22c1, "xwedge;":0x22c0,
1488 "yacute":0xfd, "yacute;":0xfd,
1489 "yacy;":0x44f, "ycirc;":0x177,
1490 "ycy;":0x44b, "yen":0xa5,
1491 "yen;":0xa5, "yfr;":[0xd835,0xdd36],
1492 "yicy;":0x457, "yopf;":[0xd835,0xdd6a],
1493 "yscr;":[0xd835,0xdcce], "yucy;":0x44e,
1494 "yuml":0xff, "yuml;":0xff,
1495 "zacute;":0x17a, "zcaron;":0x17e,
1496 "zcy;":0x437, "zdot;":0x17c,
1497 "zeetrf;":0x2128, "zeta;":0x3b6,
1498 "zfr;":[0xd835,0xdd37], "zhcy;":0x436,
1499 "zigrarr;":0x21dd, "zopf;":[0xd835,0xdd6b],
1500 "zscr;":[0xd835,0xdccf], "zwj;":0x200d,
1501 "zwnj;":0x200c,
1502};
1503/*
1504 * This regexp is generated with test/tools/update-entities.js
1505 * It will always match at least one character -- but note that there
1506 * are no entities whose names are a single character long.
1507 */
1508var NAMEDCHARREF = /(A(?:Elig;?|MP;?|acute;?|breve;|c(?:irc;?|y;)|fr;|grave;?|lpha;|macr;|nd;|o(?:gon;|pf;)|pplyFunction;|ring;?|s(?:cr;|sign;)|tilde;?|uml;?)|B(?:a(?:ckslash;|r(?:v;|wed;))|cy;|e(?:cause;|rnoullis;|ta;)|fr;|opf;|reve;|scr;|umpeq;)|C(?:Hcy;|OPY;?|a(?:cute;|p(?:;|italDifferentialD;)|yleys;)|c(?:aron;|edil;?|irc;|onint;)|dot;|e(?:dilla;|nterDot;)|fr;|hi;|ircle(?:Dot;|Minus;|Plus;|Times;)|lo(?:ckwiseContourIntegral;|seCurly(?:DoubleQuote;|Quote;))|o(?:lon(?:;|e;)|n(?:gruent;|int;|tourIntegral;)|p(?:f;|roduct;)|unterClockwiseContourIntegral;)|ross;|scr;|up(?:;|Cap;))|D(?:D(?:;|otrahd;)|Jcy;|Scy;|Zcy;|a(?:gger;|rr;|shv;)|c(?:aron;|y;)|el(?:;|ta;)|fr;|i(?:a(?:critical(?:Acute;|Do(?:t;|ubleAcute;)|Grave;|Tilde;)|mond;)|fferentialD;)|o(?:pf;|t(?:;|Dot;|Equal;)|uble(?:ContourIntegral;|Do(?:t;|wnArrow;)|L(?:eft(?:Arrow;|RightArrow;|Tee;)|ong(?:Left(?:Arrow;|RightArrow;)|RightArrow;))|Right(?:Arrow;|Tee;)|Up(?:Arrow;|DownArrow;)|VerticalBar;)|wn(?:Arrow(?:;|Bar;|UpArrow;)|Breve;|Left(?:RightVector;|TeeVector;|Vector(?:;|Bar;))|Right(?:TeeVector;|Vector(?:;|Bar;))|Tee(?:;|Arrow;)|arrow;))|s(?:cr;|trok;))|E(?:NG;|TH;?|acute;?|c(?:aron;|irc;?|y;)|dot;|fr;|grave;?|lement;|m(?:acr;|pty(?:SmallSquare;|VerySmallSquare;))|o(?:gon;|pf;)|psilon;|qu(?:al(?:;|Tilde;)|ilibrium;)|s(?:cr;|im;)|ta;|uml;?|x(?:ists;|ponentialE;))|F(?:cy;|fr;|illed(?:SmallSquare;|VerySmallSquare;)|o(?:pf;|rAll;|uriertrf;)|scr;)|G(?:Jcy;|T;?|amma(?:;|d;)|breve;|c(?:edil;|irc;|y;)|dot;|fr;|g;|opf;|reater(?:Equal(?:;|Less;)|FullEqual;|Greater;|Less;|SlantEqual;|Tilde;)|scr;|t;)|H(?:ARDcy;|a(?:cek;|t;)|circ;|fr;|ilbertSpace;|o(?:pf;|rizontalLine;)|s(?:cr;|trok;)|ump(?:DownHump;|Equal;))|I(?:Ecy;|Jlig;|Ocy;|acute;?|c(?:irc;?|y;)|dot;|fr;|grave;?|m(?:;|a(?:cr;|ginaryI;)|plies;)|n(?:t(?:;|e(?:gral;|rsection;))|visible(?:Comma;|Times;))|o(?:gon;|pf;|ta;)|scr;|tilde;|u(?:kcy;|ml;?))|J(?:c(?:irc;|y;)|fr;|opf;|s(?:cr;|ercy;)|ukcy;)|K(?:Hcy;|Jcy;|appa;|c(?:edil;|y;)|fr;|opf;|scr;)|L(?:Jcy;|T;?|a(?:cute;|mbda;|ng;|placetrf;|rr;)|c(?:aron;|edil;|y;)|e(?:ft(?:A(?:ngleBracket;|rrow(?:;|Bar;|RightArrow;))|Ceiling;|Do(?:ubleBracket;|wn(?:TeeVector;|Vector(?:;|Bar;)))|Floor;|Right(?:Arrow;|Vector;)|T(?:ee(?:;|Arrow;|Vector;)|riangle(?:;|Bar;|Equal;))|Up(?:DownVector;|TeeVector;|Vector(?:;|Bar;))|Vector(?:;|Bar;)|arrow;|rightarrow;)|ss(?:EqualGreater;|FullEqual;|Greater;|Less;|SlantEqual;|Tilde;))|fr;|l(?:;|eftarrow;)|midot;|o(?:ng(?:Left(?:Arrow;|RightArrow;)|RightArrow;|left(?:arrow;|rightarrow;)|rightarrow;)|pf;|wer(?:LeftArrow;|RightArrow;))|s(?:cr;|h;|trok;)|t;)|M(?:ap;|cy;|e(?:diumSpace;|llintrf;)|fr;|inusPlus;|opf;|scr;|u;)|N(?:Jcy;|acute;|c(?:aron;|edil;|y;)|e(?:gative(?:MediumSpace;|Thi(?:ckSpace;|nSpace;)|VeryThinSpace;)|sted(?:GreaterGreater;|LessLess;)|wLine;)|fr;|o(?:Break;|nBreakingSpace;|pf;|t(?:;|C(?:ongruent;|upCap;)|DoubleVerticalBar;|E(?:lement;|qual(?:;|Tilde;)|xists;)|Greater(?:;|Equal;|FullEqual;|Greater;|Less;|SlantEqual;|Tilde;)|Hump(?:DownHump;|Equal;)|Le(?:ftTriangle(?:;|Bar;|Equal;)|ss(?:;|Equal;|Greater;|Less;|SlantEqual;|Tilde;))|Nested(?:GreaterGreater;|LessLess;)|Precedes(?:;|Equal;|SlantEqual;)|R(?:everseElement;|ightTriangle(?:;|Bar;|Equal;))|S(?:quareSu(?:bset(?:;|Equal;)|perset(?:;|Equal;))|u(?:bset(?:;|Equal;)|cceeds(?:;|Equal;|SlantEqual;|Tilde;)|perset(?:;|Equal;)))|Tilde(?:;|Equal;|FullEqual;|Tilde;)|VerticalBar;))|scr;|tilde;?|u;)|O(?:Elig;|acute;?|c(?:irc;?|y;)|dblac;|fr;|grave;?|m(?:acr;|ega;|icron;)|opf;|penCurly(?:DoubleQuote;|Quote;)|r;|s(?:cr;|lash;?)|ti(?:lde;?|mes;)|uml;?|ver(?:B(?:ar;|rac(?:e;|ket;))|Parenthesis;))|P(?:artialD;|cy;|fr;|hi;|i;|lusMinus;|o(?:incareplane;|pf;)|r(?:;|ecedes(?:;|Equal;|SlantEqual;|Tilde;)|ime;|o(?:duct;|portion(?:;|al;)))|s(?:cr;|i;))|Q(?:UOT;?|fr;|opf;|scr;)|R(?:Barr;|EG;?|a(?:cute;|ng;|rr(?:;|tl;))|c(?:aron;|edil;|y;)|e(?:;|verse(?:E(?:lement;|quilibrium;)|UpEquilibrium;))|fr;|ho;|ight(?:A(?:ngleBracket;|rrow(?:;|Bar;|LeftArrow;))|Ceiling;|Do(?:ubleBracket;|wn(?:TeeVector;|Vector(?:;|Bar;)))|Floor;|T(?:ee(?:;|Arrow;|Vector;)|riangle(?:;|Bar;|Equal;))|Up(?:DownVector;|TeeVector;|Vector(?:;|Bar;))|Vector(?:;|Bar;)|arrow;)|o(?:pf;|undImplies;)|rightarrow;|s(?:cr;|h;)|uleDelayed;)|S(?:H(?:CHcy;|cy;)|OFTcy;|acute;|c(?:;|aron;|edil;|irc;|y;)|fr;|hort(?:DownArrow;|LeftArrow;|RightArrow;|UpArrow;)|igma;|mallCircle;|opf;|q(?:rt;|uare(?:;|Intersection;|Su(?:bset(?:;|Equal;)|perset(?:;|Equal;))|Union;))|scr;|tar;|u(?:b(?:;|set(?:;|Equal;))|c(?:ceeds(?:;|Equal;|SlantEqual;|Tilde;)|hThat;)|m;|p(?:;|erset(?:;|Equal;)|set;)))|T(?:HORN;?|RADE;|S(?:Hcy;|cy;)|a(?:b;|u;)|c(?:aron;|edil;|y;)|fr;|h(?:e(?:refore;|ta;)|i(?:ckSpace;|nSpace;))|ilde(?:;|Equal;|FullEqual;|Tilde;)|opf;|ripleDot;|s(?:cr;|trok;))|U(?:a(?:cute;?|rr(?:;|ocir;))|br(?:cy;|eve;)|c(?:irc;?|y;)|dblac;|fr;|grave;?|macr;|n(?:der(?:B(?:ar;|rac(?:e;|ket;))|Parenthesis;)|ion(?:;|Plus;))|o(?:gon;|pf;)|p(?:Arrow(?:;|Bar;|DownArrow;)|DownArrow;|Equilibrium;|Tee(?:;|Arrow;)|arrow;|downarrow;|per(?:LeftArrow;|RightArrow;)|si(?:;|lon;))|ring;|scr;|tilde;|uml;?)|V(?:Dash;|bar;|cy;|dash(?:;|l;)|e(?:e;|r(?:bar;|t(?:;|ical(?:Bar;|Line;|Separator;|Tilde;))|yThinSpace;))|fr;|opf;|scr;|vdash;)|W(?:circ;|edge;|fr;|opf;|scr;)|X(?:fr;|i;|opf;|scr;)|Y(?:Acy;|Icy;|Ucy;|acute;?|c(?:irc;|y;)|fr;|opf;|scr;|uml;)|Z(?:Hcy;|acute;|c(?:aron;|y;)|dot;|e(?:roWidthSpace;|ta;)|fr;|opf;|scr;)|a(?:acute;?|breve;|c(?:;|E;|d;|irc;?|ute;?|y;)|elig;?|f(?:;|r;)|grave;?|l(?:e(?:fsym;|ph;)|pha;)|m(?:a(?:cr;|lg;)|p;?)|n(?:d(?:;|and;|d;|slope;|v;)|g(?:;|e;|le;|msd(?:;|a(?:a;|b;|c;|d;|e;|f;|g;|h;))|rt(?:;|vb(?:;|d;))|s(?:ph;|t;)|zarr;))|o(?:gon;|pf;)|p(?:;|E;|acir;|e;|id;|os;|prox(?:;|eq;))|ring;?|s(?:cr;|t;|ymp(?:;|eq;))|tilde;?|uml;?|w(?:conint;|int;))|b(?:Not;|a(?:ck(?:cong;|epsilon;|prime;|sim(?:;|eq;))|r(?:vee;|wed(?:;|ge;)))|brk(?:;|tbrk;)|c(?:ong;|y;)|dquo;|e(?:caus(?:;|e;)|mptyv;|psi;|rnou;|t(?:a;|h;|ween;))|fr;|ig(?:c(?:ap;|irc;|up;)|o(?:dot;|plus;|times;)|s(?:qcup;|tar;)|triangle(?:down;|up;)|uplus;|vee;|wedge;)|karow;|l(?:a(?:ck(?:lozenge;|square;|triangle(?:;|down;|left;|right;))|nk;)|k(?:1(?:2;|4;)|34;)|ock;)|n(?:e(?:;|quiv;)|ot;)|o(?:pf;|t(?:;|tom;)|wtie;|x(?:D(?:L;|R;|l;|r;)|H(?:;|D;|U;|d;|u;)|U(?:L;|R;|l;|r;)|V(?:;|H;|L;|R;|h;|l;|r;)|box;|d(?:L;|R;|l;|r;)|h(?:;|D;|U;|d;|u;)|minus;|plus;|times;|u(?:L;|R;|l;|r;)|v(?:;|H;|L;|R;|h;|l;|r;)))|prime;|r(?:eve;|vbar;?)|s(?:cr;|emi;|im(?:;|e;)|ol(?:;|b;|hsub;))|u(?:ll(?:;|et;)|mp(?:;|E;|e(?:;|q;))))|c(?:a(?:cute;|p(?:;|and;|brcup;|c(?:ap;|up;)|dot;|s;)|r(?:et;|on;))|c(?:a(?:ps;|ron;)|edil;?|irc;|ups(?:;|sm;))|dot;|e(?:dil;?|mptyv;|nt(?:;|erdot;|))|fr;|h(?:cy;|eck(?:;|mark;)|i;)|ir(?:;|E;|c(?:;|eq;|le(?:arrow(?:left;|right;)|d(?:R;|S;|ast;|circ;|dash;)))|e;|fnint;|mid;|scir;)|lubs(?:;|uit;)|o(?:lon(?:;|e(?:;|q;))|m(?:ma(?:;|t;)|p(?:;|fn;|le(?:ment;|xes;)))|n(?:g(?:;|dot;)|int;)|p(?:f;|rod;|y(?:;|sr;|)))|r(?:arr;|oss;)|s(?:cr;|u(?:b(?:;|e;)|p(?:;|e;)))|tdot;|u(?:darr(?:l;|r;)|e(?:pr;|sc;)|larr(?:;|p;)|p(?:;|brcap;|c(?:ap;|up;)|dot;|or;|s;)|r(?:arr(?:;|m;)|ly(?:eq(?:prec;|succ;)|vee;|wedge;)|ren;?|vearrow(?:left;|right;))|vee;|wed;)|w(?:conint;|int;)|ylcty;)|d(?:Arr;|Har;|a(?:gger;|leth;|rr;|sh(?:;|v;))|b(?:karow;|lac;)|c(?:aron;|y;)|d(?:;|a(?:gger;|rr;)|otseq;)|e(?:g;?|lta;|mptyv;)|f(?:isht;|r;)|har(?:l;|r;)|i(?:am(?:;|ond(?:;|suit;)|s;)|e;|gamma;|sin;|v(?:;|ide(?:;|ontimes;|)|onx;))|jcy;|lc(?:orn;|rop;)|o(?:llar;|pf;|t(?:;|eq(?:;|dot;)|minus;|plus;|square;)|ublebarwedge;|wn(?:arrow;|downarrows;|harpoon(?:left;|right;)))|r(?:bkarow;|c(?:orn;|rop;))|s(?:c(?:r;|y;)|ol;|trok;)|t(?:dot;|ri(?:;|f;))|u(?:arr;|har;)|wangle;|z(?:cy;|igrarr;))|e(?:D(?:Dot;|ot;)|a(?:cute;?|ster;)|c(?:aron;|ir(?:;|c;?)|olon;|y;)|dot;|e;|f(?:Dot;|r;)|g(?:;|rave;?|s(?:;|dot;))|l(?:;|inters;|l;|s(?:;|dot;))|m(?:acr;|pty(?:;|set;|v;)|sp(?:1(?:3;|4;)|;))|n(?:g;|sp;)|o(?:gon;|pf;)|p(?:ar(?:;|sl;)|lus;|si(?:;|lon;|v;))|q(?:c(?:irc;|olon;)|s(?:im;|lant(?:gtr;|less;))|u(?:als;|est;|iv(?:;|DD;))|vparsl;)|r(?:Dot;|arr;)|s(?:cr;|dot;|im;)|t(?:a;|h;?)|u(?:ml;?|ro;)|x(?:cl;|ist;|p(?:ectation;|onentiale;)))|f(?:allingdotseq;|cy;|emale;|f(?:ilig;|l(?:ig;|lig;)|r;)|ilig;|jlig;|l(?:at;|lig;|tns;)|nof;|o(?:pf;|r(?:all;|k(?:;|v;)))|partint;|r(?:a(?:c(?:1(?:2;?|3;|4;?|5;|6;|8;)|2(?:3;|5;)|3(?:4;?|5;|8;)|45;|5(?:6;|8;)|78;)|sl;)|own;)|scr;)|g(?:E(?:;|l;)|a(?:cute;|mma(?:;|d;)|p;)|breve;|c(?:irc;|y;)|dot;|e(?:;|l;|q(?:;|q;|slant;)|s(?:;|cc;|dot(?:;|o(?:;|l;))|l(?:;|es;)))|fr;|g(?:;|g;)|imel;|jcy;|l(?:;|E;|a;|j;)|n(?:E;|ap(?:;|prox;)|e(?:;|q(?:;|q;))|sim;)|opf;|rave;|s(?:cr;|im(?:;|e;|l;))|t(?:;|c(?:c;|ir;)|dot;|lPar;|quest;|r(?:a(?:pprox;|rr;)|dot;|eq(?:less;|qless;)|less;|sim;)|)|v(?:ertneqq;|nE;))|h(?:Arr;|a(?:irsp;|lf;|milt;|r(?:dcy;|r(?:;|cir;|w;)))|bar;|circ;|e(?:arts(?:;|uit;)|llip;|rcon;)|fr;|ks(?:earow;|warow;)|o(?:arr;|mtht;|ok(?:leftarrow;|rightarrow;)|pf;|rbar;)|s(?:cr;|lash;|trok;)|y(?:bull;|phen;))|i(?:acute;?|c(?:;|irc;?|y;)|e(?:cy;|xcl;?)|f(?:f;|r;)|grave;?|i(?:;|i(?:int;|nt;)|nfin;|ota;)|jlig;|m(?:a(?:cr;|g(?:e;|line;|part;)|th;)|of;|ped;)|n(?:;|care;|fin(?:;|tie;)|odot;|t(?:;|cal;|e(?:gers;|rcal;)|larhk;|prod;))|o(?:cy;|gon;|pf;|ta;)|prod;|quest;?|s(?:cr;|in(?:;|E;|dot;|s(?:;|v;)|v;))|t(?:;|ilde;)|u(?:kcy;|ml;?))|j(?:c(?:irc;|y;)|fr;|math;|opf;|s(?:cr;|ercy;)|ukcy;)|k(?:appa(?:;|v;)|c(?:edil;|y;)|fr;|green;|hcy;|jcy;|opf;|scr;)|l(?:A(?:arr;|rr;|tail;)|Barr;|E(?:;|g;)|Har;|a(?:cute;|emptyv;|gran;|mbda;|ng(?:;|d;|le;)|p;|quo;?|rr(?:;|b(?:;|fs;)|fs;|hk;|lp;|pl;|sim;|tl;)|t(?:;|ail;|e(?:;|s;)))|b(?:arr;|brk;|r(?:ac(?:e;|k;)|k(?:e;|sl(?:d;|u;))))|c(?:aron;|e(?:dil;|il;)|ub;|y;)|d(?:ca;|quo(?:;|r;)|r(?:dhar;|ushar;)|sh;)|e(?:;|ft(?:arrow(?:;|tail;)|harpoon(?:down;|up;)|leftarrows;|right(?:arrow(?:;|s;)|harpoons;|squigarrow;)|threetimes;)|g;|q(?:;|q;|slant;)|s(?:;|cc;|dot(?:;|o(?:;|r;))|g(?:;|es;)|s(?:approx;|dot;|eq(?:gtr;|qgtr;)|gtr;|sim;)))|f(?:isht;|loor;|r;)|g(?:;|E;)|h(?:ar(?:d;|u(?:;|l;))|blk;)|jcy;|l(?:;|arr;|corner;|hard;|tri;)|m(?:idot;|oust(?:;|ache;))|n(?:E;|ap(?:;|prox;)|e(?:;|q(?:;|q;))|sim;)|o(?:a(?:ng;|rr;)|brk;|ng(?:left(?:arrow;|rightarrow;)|mapsto;|rightarrow;)|oparrow(?:left;|right;)|p(?:ar;|f;|lus;)|times;|w(?:ast;|bar;)|z(?:;|enge;|f;))|par(?:;|lt;)|r(?:arr;|corner;|har(?:;|d;)|m;|tri;)|s(?:aquo;|cr;|h;|im(?:;|e;|g;)|q(?:b;|uo(?:;|r;))|trok;)|t(?:;|c(?:c;|ir;)|dot;|hree;|imes;|larr;|quest;|r(?:Par;|i(?:;|e;|f;))|)|ur(?:dshar;|uhar;)|v(?:ertneqq;|nE;))|m(?:DDot;|a(?:cr;?|l(?:e;|t(?:;|ese;))|p(?:;|sto(?:;|down;|left;|up;))|rker;)|c(?:omma;|y;)|dash;|easuredangle;|fr;|ho;|i(?:cro;?|d(?:;|ast;|cir;|dot;?)|nus(?:;|b;|d(?:;|u;)))|l(?:cp;|dr;)|nplus;|o(?:dels;|pf;)|p;|s(?:cr;|tpos;)|u(?:;|ltimap;|map;))|n(?:G(?:g;|t(?:;|v;))|L(?:eft(?:arrow;|rightarrow;)|l;|t(?:;|v;))|Rightarrow;|V(?:Dash;|dash;)|a(?:bla;|cute;|ng;|p(?:;|E;|id;|os;|prox;)|tur(?:;|al(?:;|s;)))|b(?:sp;?|ump(?:;|e;))|c(?:a(?:p;|ron;)|edil;|ong(?:;|dot;)|up;|y;)|dash;|e(?:;|Arr;|ar(?:hk;|r(?:;|ow;))|dot;|quiv;|s(?:ear;|im;)|xist(?:;|s;))|fr;|g(?:E;|e(?:;|q(?:;|q;|slant;)|s;)|sim;|t(?:;|r;))|h(?:Arr;|arr;|par;)|i(?:;|s(?:;|d;)|v;)|jcy;|l(?:Arr;|E;|arr;|dr;|e(?:;|ft(?:arrow;|rightarrow;)|q(?:;|q;|slant;)|s(?:;|s;))|sim;|t(?:;|ri(?:;|e;)))|mid;|o(?:pf;|t(?:;|in(?:;|E;|dot;|v(?:a;|b;|c;))|ni(?:;|v(?:a;|b;|c;))|))|p(?:ar(?:;|allel;|sl;|t;)|olint;|r(?:;|cue;|e(?:;|c(?:;|eq;))))|r(?:Arr;|arr(?:;|c;|w;)|ightarrow;|tri(?:;|e;))|s(?:c(?:;|cue;|e;|r;)|hort(?:mid;|parallel;)|im(?:;|e(?:;|q;))|mid;|par;|qsu(?:be;|pe;)|u(?:b(?:;|E;|e;|set(?:;|eq(?:;|q;)))|cc(?:;|eq;)|p(?:;|E;|e;|set(?:;|eq(?:;|q;)))))|t(?:gl;|ilde;?|lg;|riangle(?:left(?:;|eq;)|right(?:;|eq;)))|u(?:;|m(?:;|ero;|sp;))|v(?:Dash;|Harr;|ap;|dash;|g(?:e;|t;)|infin;|l(?:Arr;|e;|t(?:;|rie;))|r(?:Arr;|trie;)|sim;)|w(?:Arr;|ar(?:hk;|r(?:;|ow;))|near;))|o(?:S;|a(?:cute;?|st;)|c(?:ir(?:;|c;?)|y;)|d(?:ash;|blac;|iv;|ot;|sold;)|elig;|f(?:cir;|r;)|g(?:on;|rave;?|t;)|h(?:bar;|m;)|int;|l(?:arr;|c(?:ir;|ross;)|ine;|t;)|m(?:acr;|ega;|i(?:cron;|d;|nus;))|opf;|p(?:ar;|erp;|lus;)|r(?:;|arr;|d(?:;|er(?:;|of;)|f;?|m;?)|igof;|or;|slope;|v;)|s(?:cr;|lash;?|ol;)|ti(?:lde;?|mes(?:;|as;))|uml;?|vbar;)|p(?:ar(?:;|a(?:;|llel;|)|s(?:im;|l;)|t;)|cy;|er(?:cnt;|iod;|mil;|p;|tenk;)|fr;|h(?:i(?:;|v;)|mmat;|one;)|i(?:;|tchfork;|v;)|l(?:an(?:ck(?:;|h;)|kv;)|us(?:;|acir;|b;|cir;|d(?:o;|u;)|e;|mn;?|sim;|two;))|m;|o(?:intint;|pf;|und;?)|r(?:;|E;|ap;|cue;|e(?:;|c(?:;|approx;|curlyeq;|eq;|n(?:approx;|eqq;|sim;)|sim;))|ime(?:;|s;)|n(?:E;|ap;|sim;)|o(?:d;|f(?:alar;|line;|surf;)|p(?:;|to;))|sim;|urel;)|s(?:cr;|i;)|uncsp;)|q(?:fr;|int;|opf;|prime;|scr;|u(?:at(?:ernions;|int;)|est(?:;|eq;)|ot;?))|r(?:A(?:arr;|rr;|tail;)|Barr;|Har;|a(?:c(?:e;|ute;)|dic;|emptyv;|ng(?:;|d;|e;|le;)|quo;?|rr(?:;|ap;|b(?:;|fs;)|c;|fs;|hk;|lp;|pl;|sim;|tl;|w;)|t(?:ail;|io(?:;|nals;)))|b(?:arr;|brk;|r(?:ac(?:e;|k;)|k(?:e;|sl(?:d;|u;))))|c(?:aron;|e(?:dil;|il;)|ub;|y;)|d(?:ca;|ldhar;|quo(?:;|r;)|sh;)|e(?:al(?:;|ine;|part;|s;)|ct;|g;?)|f(?:isht;|loor;|r;)|h(?:ar(?:d;|u(?:;|l;))|o(?:;|v;))|i(?:ght(?:arrow(?:;|tail;)|harpoon(?:down;|up;)|left(?:arrows;|harpoons;)|rightarrows;|squigarrow;|threetimes;)|ng;|singdotseq;)|l(?:arr;|har;|m;)|moust(?:;|ache;)|nmid;|o(?:a(?:ng;|rr;)|brk;|p(?:ar;|f;|lus;)|times;)|p(?:ar(?:;|gt;)|polint;)|rarr;|s(?:aquo;|cr;|h;|q(?:b;|uo(?:;|r;)))|t(?:hree;|imes;|ri(?:;|e;|f;|ltri;))|uluhar;|x;)|s(?:acute;|bquo;|c(?:;|E;|a(?:p;|ron;)|cue;|e(?:;|dil;)|irc;|n(?:E;|ap;|sim;)|polint;|sim;|y;)|dot(?:;|b;|e;)|e(?:Arr;|ar(?:hk;|r(?:;|ow;))|ct;?|mi;|swar;|tm(?:inus;|n;)|xt;)|fr(?:;|own;)|h(?:arp;|c(?:hcy;|y;)|ort(?:mid;|parallel;)|y;?)|i(?:gma(?:;|f;|v;)|m(?:;|dot;|e(?:;|q;)|g(?:;|E;)|l(?:;|E;)|ne;|plus;|rarr;))|larr;|m(?:a(?:llsetminus;|shp;)|eparsl;|i(?:d;|le;)|t(?:;|e(?:;|s;)))|o(?:ftcy;|l(?:;|b(?:;|ar;))|pf;)|pa(?:des(?:;|uit;)|r;)|q(?:c(?:ap(?:;|s;)|up(?:;|s;))|su(?:b(?:;|e;|set(?:;|eq;))|p(?:;|e;|set(?:;|eq;)))|u(?:;|ar(?:e;|f;)|f;))|rarr;|s(?:cr;|etmn;|mile;|tarf;)|t(?:ar(?:;|f;)|r(?:aight(?:epsilon;|phi;)|ns;))|u(?:b(?:;|E;|dot;|e(?:;|dot;)|mult;|n(?:E;|e;)|plus;|rarr;|s(?:et(?:;|eq(?:;|q;)|neq(?:;|q;))|im;|u(?:b;|p;)))|cc(?:;|approx;|curlyeq;|eq;|n(?:approx;|eqq;|sim;)|sim;)|m;|ng;|p(?:1;?|2;?|3;?|;|E;|d(?:ot;|sub;)|e(?:;|dot;)|hs(?:ol;|ub;)|larr;|mult;|n(?:E;|e;)|plus;|s(?:et(?:;|eq(?:;|q;)|neq(?:;|q;))|im;|u(?:b;|p;))))|w(?:Arr;|ar(?:hk;|r(?:;|ow;))|nwar;)|zlig;?)|t(?:a(?:rget;|u;)|brk;|c(?:aron;|edil;|y;)|dot;|elrec;|fr;|h(?:e(?:re(?:4;|fore;)|ta(?:;|sym;|v;))|i(?:ck(?:approx;|sim;)|nsp;)|k(?:ap;|sim;)|orn;?)|i(?:lde;|mes(?:;|b(?:;|ar;)|d;|)|nt;)|o(?:ea;|p(?:;|bot;|cir;|f(?:;|ork;))|sa;)|prime;|r(?:ade;|i(?:angle(?:;|down;|left(?:;|eq;)|q;|right(?:;|eq;))|dot;|e;|minus;|plus;|sb;|time;)|pezium;)|s(?:c(?:r;|y;)|hcy;|trok;)|w(?:ixt;|ohead(?:leftarrow;|rightarrow;)))|u(?:Arr;|Har;|a(?:cute;?|rr;)|br(?:cy;|eve;)|c(?:irc;?|y;)|d(?:arr;|blac;|har;)|f(?:isht;|r;)|grave;?|h(?:ar(?:l;|r;)|blk;)|l(?:c(?:orn(?:;|er;)|rop;)|tri;)|m(?:acr;|l;?)|o(?:gon;|pf;)|p(?:arrow;|downarrow;|harpoon(?:left;|right;)|lus;|si(?:;|h;|lon;)|uparrows;)|r(?:c(?:orn(?:;|er;)|rop;)|ing;|tri;)|scr;|t(?:dot;|ilde;|ri(?:;|f;))|u(?:arr;|ml;?)|wangle;)|v(?:Arr;|Bar(?:;|v;)|Dash;|a(?:ngrt;|r(?:epsilon;|kappa;|nothing;|p(?:hi;|i;|ropto;)|r(?:;|ho;)|s(?:igma;|u(?:bsetneq(?:;|q;)|psetneq(?:;|q;)))|t(?:heta;|riangle(?:left;|right;))))|cy;|dash;|e(?:e(?:;|bar;|eq;)|llip;|r(?:bar;|t;))|fr;|ltri;|nsu(?:b;|p;)|opf;|prop;|rtri;|s(?:cr;|u(?:bn(?:E;|e;)|pn(?:E;|e;)))|zigzag;)|w(?:circ;|e(?:d(?:bar;|ge(?:;|q;))|ierp;)|fr;|opf;|p;|r(?:;|eath;)|scr;)|x(?:c(?:ap;|irc;|up;)|dtri;|fr;|h(?:Arr;|arr;)|i;|l(?:Arr;|arr;)|map;|nis;|o(?:dot;|p(?:f;|lus;)|time;)|r(?:Arr;|arr;)|s(?:cr;|qcup;)|u(?:plus;|tri;)|vee;|wedge;)|y(?:ac(?:ute;?|y;)|c(?:irc;|y;)|en;?|fr;|icy;|opf;|scr;|u(?:cy;|ml;?))|z(?:acute;|c(?:aron;|y;)|dot;|e(?:etrf;|ta;)|fr;|hcy;|igrarr;|opf;|scr;|w(?:j;|nj;)))|[\s\S]/g;
1509
1510var NAMEDCHARREF_MAXLEN = 32;
1511
1512// Regular expression constants used by the tokenizer and parser
1513
1514// Note that \r is included in all of these regexps because it will need
1515// to be converted to LF by the scanChars() function.
1516var DBLQUOTEATTRVAL = /[^\r"&\u0000]+/g;
1517var SINGLEQUOTEATTRVAL = /[^\r'&\u0000]+/g;
1518var UNQUOTEDATTRVAL = /[^\r\t\n\f &>\u0000]+/g;
1519var TAGNAME = /[^\r\t\n\f \/>A-Z\u0000]+/g;
1520var ATTRNAME = /[^\r\t\n\f \/=>A-Z\u0000]+/g;
1521
1522var CDATATEXT = /[^\]\r\u0000\uffff]*/g;
1523var DATATEXT = /[^&<\r\u0000\uffff]*/g;
1524var RAWTEXT = /[^<\r\u0000\uffff]*/g;
1525var PLAINTEXT = /[^\r\u0000\uffff]*/g;
1526// Since we don't have the 'sticky tag', add '|.' to the end of SIMPLETAG
1527// and SIMPLEATTR so that we are guaranteed to always match. This prevents
1528// us from scanning past the lastIndex set. (Note that the desired matches
1529// are always greater than 1 char long, so longest-match will ensure that .
1530// is not matched unless the desired match fails.)
1531var SIMPLETAG = /(?:(\/)?([a-z]+)>)|[\s\S]/g;
1532var SIMPLEATTR = /(?:([-a-z]+)[ \t\n\f]*=[ \t\n\f]*('[^'&\r\u0000]*'|"[^"&\r\u0000]*"|[^\t\n\r\f "&'\u0000>][^&> \t\n\r\f\u0000]*[ \t\n\f]))|[\s\S]/g;
1533
1534var NONWS = /[^\x09\x0A\x0C\x0D\x20]/;
1535var ALLNONWS = /[^\x09\x0A\x0C\x0D\x20]/g; // like above, with g flag
1536var NONWSNONNUL = /[^\x00\x09\x0A\x0C\x0D\x20]/; // don't allow NUL either
1537var LEADINGWS = /^[\x09\x0A\x0C\x0D\x20]+/;
1538var NULCHARS = /\x00/g;
1539
1540/***
1541 * These are utility functions that don't use any of the parser's
1542 * internal state.
1543 */
1544function buf2str(buf) {
1545 var CHUNKSIZE=16384;
1546 if (buf.length < CHUNKSIZE) {
1547 return String.fromCharCode.apply(String, buf);
1548 }
1549 // special case for large strings, to avoid busting the stack.
1550 var result = '';
1551 for (var i = 0; i < buf.length; i += CHUNKSIZE) {
1552 result += String.fromCharCode.apply(String, buf.slice(i, i+CHUNKSIZE));
1553 }
1554 return result;
1555}
1556
1557function str2buf(s) {
1558 var result = [];
1559 for (var i=0; i<s.length; i++) {
1560 result[i] = s.charCodeAt(i);
1561 }
1562 return result;
1563}
1564
1565// Determine whether the element is a member of the set.
1566// The set is an object that maps namespaces to objects. The objects
1567// then map local tagnames to the value true if that tag is part of the set
1568function isA(elt, set) {
1569 if (typeof set === 'string') {
1570 // convenience case for testing a particular HTML element
1571 return elt.namespaceURI === NAMESPACE.HTML &&
1572 elt.localName === set;
1573 }
1574 var tagnames = set[elt.namespaceURI];
1575 return tagnames && tagnames[elt.localName];
1576}
1577
1578function isMathmlTextIntegrationPoint(n) {
1579 return isA(n, mathmlTextIntegrationPointSet);
1580}
1581
1582function isHTMLIntegrationPoint(n) {
1583 if (isA(n, htmlIntegrationPointSet)) return true;
1584 if (n.namespaceURI === NAMESPACE.MATHML &&
1585 n.localName === "annotation-xml") {
1586 var encoding = n.getAttribute("encoding");
1587 if (encoding) encoding = encoding.toLowerCase();
1588 if (encoding === "text/html" ||
1589 encoding === "application/xhtml+xml")
1590 return true;
1591 }
1592 return false;
1593}
1594
1595function adjustSVGTagName(name) {
1596 if (name in svgTagNameAdjustments)
1597 return svgTagNameAdjustments[name];
1598 else
1599 return name;
1600}
1601
1602function adjustSVGAttributes(attrs) {
1603 for(var i = 0, n = attrs.length; i < n; i++) {
1604 if (attrs[i][0] in svgAttrAdjustments) {
1605 attrs[i][0] = svgAttrAdjustments[attrs[i][0]];
1606 }
1607 }
1608}
1609
1610function adjustMathMLAttributes(attrs) {
1611 for(var i = 0, n = attrs.length; i < n; i++) {
1612 if (attrs[i][0] === "definitionurl") {
1613 attrs[i][0] = "definitionURL";
1614 break;
1615 }
1616 }
1617}
1618
1619function adjustForeignAttributes(attrs) {
1620 for(var i = 0, n = attrs.length; i < n; i++) {
1621 if (attrs[i][0] in foreignAttributes) {
1622 // Attributes with namespaces get a 3rd element:
1623 // [Qname, value, namespace]
1624 attrs[i].push(foreignAttributes[attrs[i][0]]);
1625 }
1626 }
1627}
1628
1629// For each attribute in attrs, if elt doesn't have an attribute
1630// by that name, add the attribute to elt
1631// XXX: I'm ignoring namespaces for now
1632function transferAttributes(attrs, elt) {
1633 for(var i = 0, n = attrs.length; i < n; i++) {
1634 var name = attrs[i][0], value = attrs[i][1];
1635 if (elt.hasAttribute(name)) continue;
1636 elt._setAttribute(name, value);
1637 }
1638}
1639
1640/***
1641 * The ElementStack class
1642 */
1643HTMLParser.ElementStack = function ElementStack() {
1644 this.elements = [];
1645 this.top = null; // stack.top is the "current node" in the spec
1646};
1647
1648/*
1649// This is for debugging only
1650HTMLParser.ElementStack.prototype.toString = function(e) {
1651 return "STACK: " +
1652 this.elements.map(function(e) {return e.localName;}).join("-");
1653}
1654*/
1655
1656HTMLParser.ElementStack.prototype.push = function(e) {
1657 this.elements.push(e);
1658 this.top = e;
1659};
1660
1661HTMLParser.ElementStack.prototype.pop = function(e) {
1662 this.elements.pop();
1663 this.top = this.elements[this.elements.length-1];
1664};
1665
1666// Pop elements off the stack up to and including the first
1667// element with the specified (HTML) tagname
1668HTMLParser.ElementStack.prototype.popTag = function(tag) {
1669 for(var i = this.elements.length-1; i > 0; i--) {
1670 var e = this.elements[i];
1671 if (isA(e, tag)) break;
1672 }
1673 this.elements.length = i;
1674 this.top = this.elements[i-1];
1675};
1676
1677// Pop elements off the stack up to and including the first
1678// element that is an instance of the specified type
1679HTMLParser.ElementStack.prototype.popElementType = function(type) {
1680 for(var i = this.elements.length-1; i > 0; i--) {
1681 if (this.elements[i] instanceof type) break;
1682 }
1683 this.elements.length = i;
1684 this.top = this.elements[i-1];
1685};
1686
1687// Pop elements off the stack up to and including the element e.
1688// Note that this is very different from removeElement()
1689// This requires that e is on the stack.
1690HTMLParser.ElementStack.prototype.popElement = function(e) {
1691 for(var i = this.elements.length-1; i > 0; i--) {
1692 if (this.elements[i] === e) break;
1693 }
1694 this.elements.length = i;
1695 this.top = this.elements[i-1];
1696};
1697
1698// Remove a specific element from the stack.
1699// Do nothing if the element is not on the stack
1700HTMLParser.ElementStack.prototype.removeElement = function(e) {
1701 if (this.top === e) this.pop();
1702 else {
1703 var idx = this.elements.lastIndexOf(e);
1704 if (idx !== -1)
1705 this.elements.splice(idx, 1);
1706 }
1707};
1708
1709HTMLParser.ElementStack.prototype.clearToContext = function(set) {
1710 // Note that we don't loop to 0. Never pop the <html> elt off.
1711 for(var i = this.elements.length-1; i > 0; i--) {
1712 if (isA(this.elements[i], set)) break;
1713 }
1714 this.elements.length = i+1;
1715 this.top = this.elements[i];
1716};
1717
1718HTMLParser.ElementStack.prototype.contains = function(tag) {
1719 return this.inSpecificScope(tag, Object.create(null));
1720};
1721
1722HTMLParser.ElementStack.prototype.inSpecificScope = function(tag, set) {
1723 for(var i = this.elements.length-1; i >= 0; i--) {
1724 var elt = this.elements[i];
1725 if (isA(elt, tag)) return true;
1726 if (isA(elt, set)) return false;
1727 }
1728 return false;
1729};
1730
1731// Like the above, but for a specific element, not a tagname
1732HTMLParser.ElementStack.prototype.elementInSpecificScope = function(target, set) {
1733 for(var i = this.elements.length-1; i >= 0; i--) {
1734 var elt = this.elements[i];
1735 if (elt === target) return true;
1736 if (isA(elt, set)) return false;
1737 }
1738 return false;
1739};
1740
1741// Like the above, but for an element interface, not a tagname
1742HTMLParser.ElementStack.prototype.elementTypeInSpecificScope = function(target, set) {
1743 for(var i = this.elements.length-1; i >= 0; i--) {
1744 var elt = this.elements[i];
1745 if (elt instanceof target) return true;
1746 if (isA(elt, set)) return false;
1747 }
1748 return false;
1749};
1750
1751HTMLParser.ElementStack.prototype.inScope = function(tag) {
1752 return this.inSpecificScope(tag, inScopeSet);
1753};
1754
1755HTMLParser.ElementStack.prototype.elementInScope = function(e) {
1756 return this.elementInSpecificScope(e, inScopeSet);
1757};
1758
1759HTMLParser.ElementStack.prototype.elementTypeInScope = function(type) {
1760 return this.elementTypeInSpecificScope(type, inScopeSet);
1761};
1762
1763HTMLParser.ElementStack.prototype.inButtonScope = function(tag) {
1764 return this.inSpecificScope(tag, inButtonScopeSet);
1765};
1766
1767HTMLParser.ElementStack.prototype.inListItemScope = function(tag) {
1768 return this.inSpecificScope(tag, inListItemScopeSet);
1769};
1770
1771HTMLParser.ElementStack.prototype.inTableScope = function(tag) {
1772 return this.inSpecificScope(tag, inTableScopeSet);
1773};
1774
1775HTMLParser.ElementStack.prototype.inSelectScope = function(tag) {
1776 // Can't implement this one with inSpecificScope, since it involves
1777 // a set defined by inverting another set. So implement manually.
1778 for(var i = this.elements.length-1; i >= 0; i--) {
1779 var elt = this.elements[i];
1780 if (elt.namespaceURI !== NAMESPACE.HTML) return false;
1781 var localname = elt.localName;
1782 if (localname === tag) return true;
1783 if (localname !== "optgroup" && localname !== "option")
1784 return false;
1785 }
1786 return false;
1787};
1788
1789HTMLParser.ElementStack.prototype.generateImpliedEndTags = function(butnot, thorough) {
1790 var endTagSet = thorough ? thoroughImpliedEndTagsSet : impliedEndTagsSet;
1791 for(var i = this.elements.length-1; i >= 0; i--) {
1792 var e = this.elements[i];
1793 if (butnot && isA(e, butnot)) break;
1794 if (!isA(this.elements[i], endTagSet)) break;
1795 }
1796
1797 this.elements.length = i+1;
1798 this.top = this.elements[i];
1799};
1800
1801/***
1802 * The ActiveFormattingElements class
1803 */
1804HTMLParser.ActiveFormattingElements = function AFE() {
1805 this.list = []; // elements
1806 this.attrs = []; // attribute tokens for cloning
1807};
1808
1809HTMLParser.ActiveFormattingElements.prototype.MARKER = { localName: "|" };
1810
1811/*
1812// For debugging
1813HTMLParser.ActiveFormattingElements.prototype.toString = function() {
1814 return "AFE: " +
1815 this.list.map(function(e) { return e.localName; }).join("-");
1816}
1817*/
1818
1819HTMLParser.ActiveFormattingElements.prototype.insertMarker = function() {
1820 this.list.push(this.MARKER);
1821 this.attrs.push(this.MARKER);
1822};
1823
1824HTMLParser.ActiveFormattingElements.prototype.push = function(elt, attrs) {
1825 // Scan backwards: if there are already 3 copies of this element
1826 // before we encounter a marker, then drop the last one
1827 var count = 0;
1828 for(var i = this.list.length-1; i >= 0; i--) {
1829 if (this.list[i] === this.MARKER) break;
1830 // equal() is defined below
1831 if (equal(elt, this.list[i], this.attrs[i])) {
1832 count++;
1833 if (count === 3) {
1834 this.list.splice(i, 1);
1835 this.attrs.splice(i, 1);
1836 break;
1837 }
1838 }
1839 }
1840
1841
1842 // Now push the element onto the list
1843 this.list.push(elt);
1844
1845 // Copy the attributes and push those on, too
1846 var attrcopy = [];
1847 for(var ii = 0; ii < attrs.length; ii++) {
1848 attrcopy[ii] = attrs[ii];
1849 }
1850
1851 this.attrs.push(attrcopy);
1852
1853 // This function defines equality of two elements for the purposes
1854 // of the AFE list. Note that it compares the new elements
1855 // attributes to the saved array of attributes associated with
1856 // the old element because a script could have changed the
1857 // old element's set of attributes
1858 function equal(newelt, oldelt, oldattrs) {
1859 if (newelt.localName !== oldelt.localName) return false;
1860 if (newelt._numattrs !== oldattrs.length) return false;
1861 for(var i = 0, n = oldattrs.length; i < n; i++) {
1862 var oldname = oldattrs[i][0];
1863 var oldval = oldattrs[i][1];
1864 if (!newelt.hasAttribute(oldname)) return false;
1865 if (newelt.getAttribute(oldname) !== oldval) return false;
1866 }
1867 return true;
1868 }
1869};
1870
1871HTMLParser.ActiveFormattingElements.prototype.clearToMarker = function() {
1872 for(var i = this.list.length-1; i >= 0; i--) {
1873 if (this.list[i] === this.MARKER) break;
1874 }
1875 if (i < 0) i = 0;
1876 this.list.length = i;
1877 this.attrs.length = i;
1878};
1879
1880// Find and return the last element with the specified tag between the
1881// end of the list and the last marker on the list.
1882// Used when parsing <a> in_body_mode()
1883HTMLParser.ActiveFormattingElements.prototype.findElementByTag = function(tag) {
1884 for(var i = this.list.length-1; i >= 0; i--) {
1885 var elt = this.list[i];
1886 if (elt === this.MARKER) break;
1887 if (elt.localName === tag) return elt;
1888 }
1889 return null;
1890};
1891
1892HTMLParser.ActiveFormattingElements.prototype.indexOf = function(e) {
1893 return this.list.lastIndexOf(e);
1894};
1895
1896// Find the element e in the list and remove it
1897// Used when parsing <a> in_body()
1898HTMLParser.ActiveFormattingElements.prototype.remove = function(e) {
1899 var idx = this.list.lastIndexOf(e);
1900 if (idx !== -1) {
1901 this.list.splice(idx, 1);
1902 this.attrs.splice(idx, 1);
1903 }
1904};
1905
1906// Find element a in the list and replace it with element b
1907// XXX: Do I need to handle attributes here?
1908HTMLParser.ActiveFormattingElements.prototype.replace = function(a, b, attrs) {
1909 var idx = this.list.lastIndexOf(a);
1910 if (idx !== -1) {
1911 this.list[idx] = b;
1912 this.attrs[idx] = attrs;
1913 }
1914};
1915
1916// Find a in the list and insert b after it
1917// This is only used for insert a bookmark object, so the
1918// attrs array doesn't really matter
1919HTMLParser.ActiveFormattingElements.prototype.insertAfter = function(a,b) {
1920 var idx = this.list.lastIndexOf(a);
1921 if (idx !== -1) {
1922 this.list.splice(idx, 0, b);
1923 this.attrs.splice(idx, 0, b);
1924 }
1925};
1926
1927
1928
1929
1930/***
1931 * This is the parser factory function. It is the return value of
1932 * the outer closure that it is defined within. Most of the parser
1933 * implementation details are inside this function.
1934 */
1935function HTMLParser(address, fragmentContext, options) {
1936 /***
1937 * These are the parser's state variables
1938 */
1939 // Scanner state
1940 var chars = null;
1941 var numchars = 0; // Length of chars
1942 var nextchar = 0; // Index of next char
1943 var input_complete = false; // Becomes true when end() called.
1944 var scanner_skip_newline = false; // If previous char was CR
1945 var reentrant_invocations = 0;
1946 var saved_scanner_state = [];
1947 var leftovers = "";
1948 var first_batch = true;
1949 var paused = 0; // Becomes non-zero while loading scripts
1950
1951
1952 // Tokenizer state
1953 var tokenizer = data_state; // Current tokenizer state
1954 var return_state;
1955 var character_reference_code;
1956 var tagnamebuf = "";
1957 var lasttagname = ""; // holds the target end tag for text states
1958 var tempbuf = [];
1959 var attrnamebuf = "";
1960 var attrvaluebuf = "";
1961 var commentbuf = [];
1962 var doctypenamebuf = [];
1963 var doctypepublicbuf = [];
1964 var doctypesystembuf = [];
1965 var attributes = [];
1966 var is_end_tag = false;
1967
1968 // Tree builder state
1969 var parser = initial_mode; // Current insertion mode
1970 var originalInsertionMode = null; // A saved insertion mode
1971 var templateInsertionModes = []; // Stack of template insertion modes.
1972 var stack = new HTMLParser.ElementStack(); // Stack of open elements
1973 var afe = new HTMLParser.ActiveFormattingElements(); // mis-nested tags
1974 var fragment = (fragmentContext!==undefined); // For innerHTML, etc.
1975 var head_element_pointer = null;
1976 var form_element_pointer = null;
1977 var scripting_enabled = true;
1978 if (fragmentContext) {
1979 scripting_enabled = fragmentContext.ownerDocument._scripting_enabled;
1980 }
1981 if (options && options.scripting_enabled === false)
1982 scripting_enabled = false;
1983 var frameset_ok = true;
1984 var force_quirks = false;
1985 var pending_table_text;
1986 var text_integration_mode; // XXX a spec bug workaround?
1987
1988 // A single run of characters, buffered up to be sent to
1989 // the parser as a single string.
1990 var textrun = [];
1991 var textIncludesNUL = false;
1992 var ignore_linefeed = false;
1993
1994 /***
1995 * This is the parser object that will be the return value of this
1996 * factory function, which is some 5000 lines below.
1997 * Note that the variable "parser" is the current state of the
1998 * parser's state machine. This variable "htmlparser" is the
1999 * return value and defines the public API of the parser
2000 */
2001 var htmlparser = {
2002 document: function() {
2003 return doc;
2004 },
2005
2006 // Internal function used from HTMLScriptElement to pause the
2007 // parser while a script is being loaded from the network
2008 pause: function() {
2009 // print("pausing parser");
2010 paused++;
2011 },
2012
2013 // Called when a script finishes loading
2014 resume: function() {
2015 // print("resuming parser");
2016 paused--;
2017 // XXX: added this to force a resumption.
2018 // Is this the right thing to do?
2019 this.parse("");
2020 },
2021
2022 // Parse the HTML text s.
2023 // The second argument should be true if there is no more
2024 // text to be parsed, and should be false or omitted otherwise.
2025 // The second argument must not be set for recursive invocations
2026 // from document.write()
2027 parse: function(s, end) {
2028
2029 // If we're paused, remember the text to parse, but
2030 // don't parse it now.
2031 if (paused > 0) {
2032 leftovers += s;
2033 return;
2034 }
2035
2036
2037 if (reentrant_invocations === 0) {
2038 // A normal, top-level invocation
2039 if (leftovers) {
2040 s = leftovers + s;
2041 leftovers = "";
2042 }
2043
2044 // Add a special marker character to the end of
2045 // the buffer. If the scanner is at the end of
2046 // the buffer and input_complete is set, then this
2047 // character will transform into an EOF token.
2048 // Having an actual character that represents EOF
2049 // in the character buffer makes lookahead regexp
2050 // matching work more easily, and this is
2051 // important for character references.
2052 if (end) {
2053 s += "\uFFFF";
2054 input_complete = true; // Makes scanChars() send EOF
2055 }
2056
2057 chars = s;
2058 numchars = s.length;
2059 nextchar = 0;
2060
2061 if (first_batch) {
2062 // We skip a leading Byte Order Mark (\uFEFF)
2063 // on first batch of text we're given
2064 first_batch = false;
2065 if (chars.charCodeAt(0) === 0xFEFF) nextchar = 1;
2066 }
2067
2068 reentrant_invocations++;
2069 scanChars();
2070 leftovers = chars.substring(nextchar, numchars);
2071 reentrant_invocations--;
2072 }
2073 else {
2074 // This is the re-entrant case, which we have to
2075 // handle a little differently.
2076 reentrant_invocations++;
2077
2078 // Save current scanner state
2079 saved_scanner_state.push(chars, numchars, nextchar);
2080
2081 // Set new scanner state
2082 chars = s;
2083 numchars = s.length;
2084 nextchar = 0;
2085
2086 // Now scan as many of these new chars as we can
2087 scanChars();
2088
2089 leftovers = chars.substring(nextchar, numchars);
2090
2091 // restore old scanner state
2092 nextchar = saved_scanner_state.pop();
2093 numchars = saved_scanner_state.pop();
2094 chars = saved_scanner_state.pop();
2095
2096 // If there were leftover chars from this invocation
2097 // insert them into the pending invocation's buffer
2098 // and trim already processed chars at the same time
2099 if (leftovers) {
2100 chars = leftovers + chars.substring(nextchar);
2101 numchars = chars.length;
2102 nextchar = 0;
2103 leftovers = "";
2104 }
2105
2106 // Decrement the counter
2107 reentrant_invocations--;
2108 }
2109 }
2110 };
2111
2112
2113 // This is the document we'll be building up
2114 var doc = new Document(true, address);
2115
2116 // The document needs to know about the parser, for document.write().
2117 // This _parser property will be deleted when we're done parsing.
2118 doc._parser = htmlparser;
2119
2120 // XXX I think that any document we use this parser on should support
2121 // scripts. But I may need to configure that through a parser parameter
2122 // Only documents with windows ("browsing contexts" to be precise)
2123 // allow scripting.
2124 doc._scripting_enabled = scripting_enabled;
2125
2126
2127 /***
2128 * The actual code of the HTMLParser() factory function begins here.
2129 */
2130
2131 if (fragmentContext) { // for innerHTML parsing
2132 if (fragmentContext.ownerDocument._quirks)
2133 doc._quirks = true;
2134 if (fragmentContext.ownerDocument._limitedQuirks)
2135 doc._limitedQuirks = true;
2136
2137 // Set the initial tokenizer state
2138 if (fragmentContext.namespaceURI === NAMESPACE.HTML) {
2139 switch(fragmentContext.localName) {
2140 case "title":
2141 case "textarea":
2142 tokenizer = rcdata_state;
2143 break;
2144 case "style":
2145 case "xmp":
2146 case "iframe":
2147 case "noembed":
2148 case "noframes":
2149 case "script":
2150 case "plaintext":
2151 tokenizer = plaintext_state;
2152 break;
2153 case "noscript":
2154 if (scripting_enabled)
2155 tokenizer = plaintext_state;
2156 }
2157 }
2158
2159 var root = doc.createElement("html");
2160 doc._appendChild(root);
2161 stack.push(root);
2162 if (fragmentContext instanceof impl.HTMLTemplateElement) {
2163 templateInsertionModes.push(in_template_mode);
2164 }
2165 resetInsertionMode();
2166
2167 for(var e = fragmentContext; e !== null; e = e.parentElement) {
2168 if (e instanceof impl.HTMLFormElement) {
2169 form_element_pointer = e;
2170 break;
2171 }
2172 }
2173 }
2174
2175 /***
2176 * Scanner functions
2177 */
2178 // Loop through the characters in chars, and pass them one at a time
2179 // to the tokenizer FSM. Return when no more characters can be processed
2180 // (This may leave 1 or more characters in the buffer: like a CR
2181 // waiting to see if the next char is LF, or for states that require
2182 // lookahead...)
2183 function scanChars() {
2184 var codepoint, s, pattern, eof;
2185
2186 while(nextchar < numchars) {
2187
2188 // If we just tokenized a </script> tag, then the paused flag
2189 // may have been set to tell us to stop tokenizing while
2190 // the script is loading
2191 if (paused > 0) {
2192 return;
2193 }
2194
2195
2196 switch(typeof tokenizer.lookahead) {
2197 case 'undefined':
2198 codepoint = chars.charCodeAt(nextchar++);
2199 if (scanner_skip_newline) {
2200 scanner_skip_newline = false;
2201 if (codepoint === 0x000A) {
2202 nextchar++;
2203 continue;
2204 }
2205 }
2206 switch(codepoint) {
2207 case 0x000D:
2208 // CR always turns into LF, but if the next character
2209 // is LF, then that second LF is skipped.
2210 if (nextchar < numchars) {
2211 if (chars.charCodeAt(nextchar) === 0x000A)
2212 nextchar++;
2213 }
2214 else {
2215 // We don't know the next char right now, so we
2216 // can't check if it is a LF. So set a flag
2217 scanner_skip_newline = true;
2218 }
2219
2220 // In either case, emit a LF
2221 tokenizer(0x000A);
2222
2223 break;
2224 case 0xFFFF:
2225 if (input_complete && nextchar === numchars) {
2226 tokenizer(EOF); // codepoint will be 0xFFFF here
2227 break;
2228 }
2229 /* falls through */
2230 default:
2231 tokenizer(codepoint);
2232 break;
2233 }
2234 break;
2235
2236 case 'number':
2237 codepoint = chars.charCodeAt(nextchar);
2238
2239 // The only tokenizer states that require fixed lookahead
2240 // only consume alphanum characters, so we don't have
2241 // to worry about CR and LF in this case
2242
2243 // tokenizer wants n chars of lookahead
2244 var n = tokenizer.lookahead;
2245 var needsString = true;
2246 if (n < 0) {
2247 needsString = false;
2248 n = -n;
2249 }
2250
2251 if (n < numchars - nextchar) {
2252 // If we can look ahead that far
2253 s = needsString ? chars.substring(nextchar, nextchar+n) : null;
2254 eof = false;
2255 }
2256 else { // if we don't have that many characters
2257 if (input_complete) { // If no more are coming
2258 // Just return what we have
2259 s = needsString ? chars.substring(nextchar, numchars) : null;
2260 eof = true;
2261 if (codepoint === 0xFFFF && nextchar === numchars-1)
2262 codepoint = EOF;
2263 }
2264 else {
2265 // Return now and wait for more chars later
2266 return;
2267 }
2268 }
2269 tokenizer(codepoint, s, eof);
2270 break;
2271 case 'string':
2272 codepoint = chars.charCodeAt(nextchar);
2273
2274 // tokenizer wants characters up to a matching string
2275 pattern = tokenizer.lookahead;
2276 var pos = chars.indexOf(pattern, nextchar);
2277 if (pos !== -1) {
2278 s = chars.substring(nextchar, pos + pattern.length);
2279 eof = false;
2280 }
2281 else { // No match
2282 // If more characters coming, wait for them
2283 if (!input_complete) return;
2284
2285 // Otherwise, we've got to return what we've got
2286 s = chars.substring(nextchar, numchars);
2287 if (codepoint === 0xFFFF && nextchar === numchars-1)
2288 codepoint = EOF;
2289 eof = true;
2290 }
2291
2292 // The tokenizer states that require this kind of
2293 // lookahead have to be careful to handle CR characters
2294 // correctly
2295 tokenizer(codepoint, s, eof);
2296 break;
2297 }
2298 }
2299 }
2300
2301
2302 /***
2303 * Tokenizer utility functions
2304 */
2305 function addAttribute(name,value) {
2306 // Make sure there isn't already an attribute with this name
2307 // If there is, ignore this one.
2308 for(var i = 0; i < attributes.length; i++) {
2309 if (attributes[i][0] === name) return;
2310 }
2311
2312 if (value !== undefined) {
2313 attributes.push([name, value]);
2314 }
2315 else {
2316 attributes.push([name]);
2317 }
2318 }
2319
2320 // Shortcut for simple attributes
2321 function handleSimpleAttribute() {
2322 SIMPLEATTR.lastIndex = nextchar-1;
2323 var matched = SIMPLEATTR.exec(chars);
2324 if (!matched) throw new Error("should never happen");
2325 var name = matched[1];
2326 if (!name) return false;
2327 var value = matched[2];
2328 var len = value.length;
2329 switch(value[0]) {
2330 case '"':
2331 case "'":
2332 value = value.substring(1, len-1);
2333 nextchar += (matched[0].length-1);
2334 tokenizer = after_attribute_value_quoted_state;
2335 break;
2336 default:
2337 tokenizer = before_attribute_name_state;
2338 nextchar += (matched[0].length-1);
2339 value = value.substring(0, len-1);
2340 break;
2341 }
2342
2343 // Make sure there isn't already an attribute with this name
2344 // If there is, ignore this one.
2345 for(var i = 0; i < attributes.length; i++) {
2346 if (attributes[i][0] === name) return true;
2347 }
2348
2349 attributes.push([name, value]);
2350 return true;
2351 }
2352
2353 function beginTagName() {
2354 is_end_tag = false;
2355 tagnamebuf = "";
2356 attributes.length = 0;
2357 }
2358 function beginEndTagName() {
2359 is_end_tag = true;
2360 tagnamebuf = "";
2361 attributes.length = 0;
2362 }
2363
2364 function beginTempBuf() { tempbuf.length = 0; }
2365 function beginAttrName() { attrnamebuf = ""; }
2366 function beginAttrValue() { attrvaluebuf = ""; }
2367 function beginComment() { commentbuf.length = 0; }
2368 function beginDoctype() {
2369 doctypenamebuf.length = 0;
2370 doctypepublicbuf = null;
2371 doctypesystembuf = null;
2372 }
2373 function beginDoctypePublicId() { doctypepublicbuf = []; }
2374 function beginDoctypeSystemId() { doctypesystembuf = []; }
2375 function forcequirks() { force_quirks = true; }
2376 function cdataAllowed() {
2377 return stack.top &&
2378 stack.top.namespaceURI !== "http://www.w3.org/1999/xhtml";
2379 }
2380
2381 // Return true if the codepoints in the specified buffer match the
2382 // characters of lasttagname
2383 function appropriateEndTag(buf) {
2384 return lasttagname === buf;
2385 }
2386
2387 function flushText() {
2388 if (textrun.length > 0) {
2389 var s = buf2str(textrun);
2390 textrun.length = 0;
2391
2392 if (ignore_linefeed) {
2393 ignore_linefeed = false;
2394 if (s[0] === "\n") s = s.substring(1);
2395 if (s.length === 0) return;
2396 }
2397
2398 insertToken(TEXT, s);
2399 textIncludesNUL = false;
2400 }
2401 ignore_linefeed = false;
2402 }
2403
2404 // Consume chars matched by the pattern and return them as a string. Starts
2405 // matching at the current position, so users should drop the current char
2406 // otherwise.
2407 function getMatchingChars(pattern) {
2408 pattern.lastIndex = nextchar - 1;
2409 var match = pattern.exec(chars);
2410 if (match && match.index === nextchar - 1) {
2411 match = match[0];
2412 nextchar += match.length - 1;
2413 /* Careful! Make sure we haven't matched the EOF character! */
2414 if (input_complete && nextchar === numchars) {
2415 // Oops, backup one.
2416 match = match.slice(0, -1);
2417 nextchar--;
2418 }
2419 return match;
2420 } else {
2421 throw new Error("should never happen");
2422 }
2423 }
2424
2425 // emit a string of chars that match a regexp
2426 // Returns false if no chars matched.
2427 function emitCharsWhile(pattern) {
2428 pattern.lastIndex = nextchar-1;
2429 var match = pattern.exec(chars)[0];
2430 if (!match) return false;
2431 emitCharString(match);
2432 nextchar += match.length - 1;
2433 return true;
2434 }
2435
2436 // This is used by CDATA sections
2437 function emitCharString(s) {
2438 if (textrun.length > 0) flushText();
2439
2440 if (ignore_linefeed) {
2441 ignore_linefeed = false;
2442 if (s[0] === "\n") s = s.substring(1);
2443 if (s.length === 0) return;
2444 }
2445
2446 insertToken(TEXT, s);
2447 }
2448
2449 function emitTag() {
2450 if (is_end_tag) insertToken(ENDTAG, tagnamebuf);
2451 else {
2452 // Remember the last open tag we emitted
2453 var tagname = tagnamebuf;
2454 tagnamebuf = "";
2455 lasttagname = tagname;
2456 insertToken(TAG, tagname, attributes);
2457 }
2458 }
2459
2460
2461 // A shortcut: look ahead and if this is a open or close tag
2462 // in lowercase with no spaces and no attributes, just emit it now.
2463 function emitSimpleTag() {
2464 SIMPLETAG.lastIndex = nextchar;
2465 var matched = SIMPLETAG.exec(chars);
2466 if (!matched) throw new Error("should never happen");
2467 var tagname = matched[2];
2468 if (!tagname) return false;
2469 var endtag = matched[1];
2470 if (endtag) {
2471 nextchar += (tagname.length+2);
2472 insertToken(ENDTAG, tagname);
2473 }
2474 else {
2475 nextchar += (tagname.length+1);
2476 lasttagname = tagname;
2477 insertToken(TAG, tagname, NOATTRS);
2478 }
2479 return true;
2480 }
2481
2482 function emitSelfClosingTag() {
2483 if (is_end_tag) insertToken(ENDTAG, tagnamebuf, null, true);
2484 else {
2485 insertToken(TAG, tagnamebuf, attributes, true);
2486 }
2487 }
2488
2489 function emitDoctype() {
2490 insertToken(DOCTYPE,
2491 buf2str(doctypenamebuf),
2492 doctypepublicbuf ? buf2str(doctypepublicbuf) : undefined,
2493 doctypesystembuf ? buf2str(doctypesystembuf) : undefined);
2494 }
2495
2496 function emitEOF() {
2497 flushText();
2498 parser(EOF); // EOF never goes to insertForeignContent()
2499 doc.modclock = 1; // Start tracking modifications
2500 }
2501
2502 // Insert a token, either using the current parser insertion mode
2503 // (for HTML stuff) or using the insertForeignToken() method.
2504 var insertToken = htmlparser.insertToken = function insertToken(t, value, arg3, arg4) {
2505 flushText();
2506 var current = stack.top;
2507
2508 if (!current || current.namespaceURI === NAMESPACE.HTML) {
2509 // This is the common case
2510 parser(t, value, arg3, arg4);
2511 }
2512 else {
2513 // Otherwise we may need to insert this token as foreign content
2514 if (t !== TAG && t !== TEXT) {
2515 insertForeignToken(t, value, arg3, arg4);
2516 }
2517 else {
2518 // But in some cases we treat it as regular content
2519 if ((isMathmlTextIntegrationPoint(current) &&
2520 (t === TEXT ||
2521 (t === TAG &&
2522 value !== "mglyph" && value !== "malignmark"))) ||
2523 (t === TAG &&
2524 value === "svg" &&
2525 current.namespaceURI === NAMESPACE.MATHML &&
2526 current.localName === "annotation-xml") ||
2527 isHTMLIntegrationPoint(current)) {
2528
2529 // XXX: the text_integration_mode stuff is an
2530 // attempted bug workaround of mine
2531 text_integration_mode = true;
2532 parser(t, value, arg3, arg4);
2533 text_integration_mode = false;
2534 }
2535 // Otherwise it is foreign content
2536 else {
2537 insertForeignToken(t, value, arg3, arg4);
2538 }
2539 }
2540 }
2541 };
2542
2543
2544 /***
2545 * Tree building utility functions
2546 */
2547 function insertComment(data) {
2548 var parent = stack.top;
2549 if (foster_parent_mode && isA(parent, tablesectionrowSet)) {
2550 fosterParent(function(doc) { return doc.createComment(data); });
2551 } else {
2552 // "If the adjusted insertion location is inside a template element,
2553 // let it instead be inside the template element's template contents"
2554 if (parent instanceof impl.HTMLTemplateElement) {
2555 parent = parent.content;
2556 }
2557 parent._appendChild(parent.ownerDocument.createComment(data));
2558 }
2559 }
2560
2561 function insertText(s) {
2562 var parent = stack.top;
2563 if (foster_parent_mode && isA(parent, tablesectionrowSet)) {
2564 fosterParent(function(doc) { return doc.createTextNode(s); });
2565 } else {
2566 // "If the adjusted insertion location is inside a template element,
2567 // let it instead be inside the template element's template contents"
2568 if (parent instanceof impl.HTMLTemplateElement) {
2569 parent = parent.content;
2570 }
2571 // "If there is a Text node immediately before the adjusted insertion
2572 // location, then append data to that Text node's data."
2573 var lastChild = parent.lastChild;
2574 if (lastChild && lastChild.nodeType === Node.TEXT_NODE) {
2575 lastChild.appendData(s);
2576 } else {
2577 parent._appendChild(parent.ownerDocument.createTextNode(s));
2578 }
2579 }
2580 }
2581
2582 function createHTMLElt(doc, name, attrs) {
2583 // Create the element this way, rather than with
2584 // doc.createElement because createElement() does error
2585 // checking on the element name that we need to avoid here.
2586 var elt = html.createElement(doc, name, null);
2587
2588 if (attrs) {
2589 for(var i = 0, n = attrs.length; i < n; i++) {
2590 // Use the _ version to avoid testing the validity
2591 // of the attribute name
2592 elt._setAttribute(attrs[i][0], attrs[i][1]);
2593 }
2594 }
2595 // XXX
2596 // If the element is a resettable form element,
2597 // run its reset algorithm now
2598 // XXX
2599 // handle case where form-element-pointer is not null
2600 return elt;
2601 }
2602
2603 // The in_table insertion mode turns on this flag, and that makes
2604 // insertHTMLElement use the foster parenting algorithm for elements
2605 // tags inside a table
2606 var foster_parent_mode = false;
2607
2608 function insertHTMLElement(name, attrs) {
2609 var elt = insertElement(function(doc) {
2610 return createHTMLElt(doc, name, attrs);
2611 });
2612
2613 // XXX
2614 // If this is a form element, set its form attribute property here
2615 if (isA(elt, formassociatedSet)) {
2616 elt._form = form_element_pointer;
2617 }
2618
2619 return elt;
2620 }
2621
2622 // Insert the element into the open element or foster parent it
2623 function insertElement(eltFunc) {
2624 var elt;
2625 if (foster_parent_mode && isA(stack.top, tablesectionrowSet)) {
2626 elt = fosterParent(eltFunc);
2627 }
2628 else if (stack.top instanceof impl.HTMLTemplateElement) {
2629 // "If the adjusted insertion location is inside a template element,
2630 // let it instead be inside the template element's template contents"
2631 elt = eltFunc(stack.top.content.ownerDocument);
2632 stack.top.content._appendChild(elt);
2633 } else {
2634 elt = eltFunc(stack.top.ownerDocument);
2635 stack.top._appendChild(elt);
2636 }
2637
2638 stack.push(elt);
2639 return elt;
2640 }
2641
2642 function insertForeignElement(name, attrs, ns) {
2643 return insertElement(function(doc) {
2644 var elt = doc.createElementNS(ns, name);
2645 if (attrs) {
2646 for(var i = 0, n = attrs.length; i < n; i++) {
2647 var attr = attrs[i];
2648 if (attr.length === 2)
2649 elt._setAttribute(attr[0], attr[1]);
2650 else {
2651 elt._setAttributeNS(attr[2], attr[0], attr[1]);
2652 }
2653 }
2654 }
2655 return elt;
2656 });
2657 }
2658
2659 function lastElementOfType(type) {
2660 for(var i = stack.elements.length-1; i >= 0; i--) {
2661 if (stack.elements[i] instanceof type) {
2662 return i;
2663 }
2664 }
2665 return -1;
2666 }
2667
2668 function fosterParent(eltFunc) {
2669 var parent, before, lastTable = -1, lastTemplate = -1, elt;
2670
2671 lastTable = lastElementOfType(impl.HTMLTableElement);
2672 lastTemplate = lastElementOfType(impl.HTMLTemplateElement);
2673
2674 if (lastTemplate >= 0 && (lastTable < 0 || lastTemplate > lastTable)) {
2675 parent = stack.elements[lastTemplate];
2676 } else if (lastTable >= 0) {
2677 parent = stack.elements[lastTable].parentNode;
2678 if (parent) {
2679 before = stack.elements[lastTable];
2680 } else {
2681 parent = stack.elements[lastTable - 1];
2682 }
2683 }
2684 if (!parent) parent = stack.elements[0]; // the `html` element.
2685
2686 // "If the adjusted insertion location is inside a template element,
2687 // let it instead be inside the template element's template contents"
2688 if (parent instanceof impl.HTMLTemplateElement) {
2689 parent = parent.content;
2690 }
2691 // Create element in the appropriate document.
2692 elt = eltFunc(parent.ownerDocument);
2693
2694 if (elt.nodeType === Node.TEXT_NODE) {
2695 var prev;
2696 if (before) prev = before.previousSibling;
2697 else prev = parent.lastChild;
2698 if (prev && prev.nodeType === Node.TEXT_NODE) {
2699 prev.appendData(elt.data);
2700 return elt;
2701 }
2702 }
2703 if (before)
2704 parent.insertBefore(elt, before);
2705 else
2706 parent._appendChild(elt);
2707 return elt;
2708 }
2709
2710
2711 function resetInsertionMode() {
2712 var last = false;
2713 for(var i = stack.elements.length-1; i >= 0; i--) {
2714 var node = stack.elements[i];
2715 if (i === 0) {
2716 last = true;
2717 if (fragment) {
2718 node = fragmentContext;
2719 }
2720 }
2721 if (node.namespaceURI === NAMESPACE.HTML) {
2722 var tag = node.localName;
2723 switch(tag) {
2724 case "select":
2725 for(var j = i; j > 0; ) {
2726 var ancestor = stack.elements[--j];
2727 if (ancestor instanceof impl.HTMLTemplateElement) {
2728 break;
2729 } else if (ancestor instanceof impl.HTMLTableElement) {
2730 parser = in_select_in_table_mode;
2731 return;
2732 }
2733 }
2734 parser = in_select_mode;
2735 return;
2736 case "tr":
2737 parser = in_row_mode;
2738 return;
2739 case "tbody":
2740 case "tfoot":
2741 case "thead":
2742 parser = in_table_body_mode;
2743 return;
2744 case "caption":
2745 parser = in_caption_mode;
2746 return;
2747 case "colgroup":
2748 parser = in_column_group_mode;
2749 return;
2750 case "table":
2751 parser = in_table_mode;
2752 return;
2753 case "template":
2754 parser = templateInsertionModes[templateInsertionModes.length-1];
2755 return;
2756 case "body":
2757 parser = in_body_mode;
2758 return;
2759 case "frameset":
2760 parser = in_frameset_mode;
2761 return;
2762 case "html":
2763 if (head_element_pointer === null) {
2764 parser = before_head_mode;
2765 } else {
2766 parser = after_head_mode;
2767 }
2768 return;
2769 default:
2770 if (!last) {
2771 if (tag === "head") {
2772 parser = in_head_mode;
2773 return;
2774 }
2775 if (tag === "td" || tag === "th") {
2776 parser = in_cell_mode;
2777 return;
2778 }
2779 }
2780 }
2781 }
2782 if (last) {
2783 parser = in_body_mode;
2784 return;
2785 }
2786 }
2787 }
2788
2789
2790 function parseRawText(name, attrs) {
2791 insertHTMLElement(name, attrs);
2792 tokenizer = rawtext_state;
2793 originalInsertionMode = parser;
2794 parser = text_mode;
2795 }
2796
2797 function parseRCDATA(name, attrs) {
2798 insertHTMLElement(name, attrs);
2799 tokenizer = rcdata_state;
2800 originalInsertionMode = parser;
2801 parser = text_mode;
2802 }
2803
2804 // Make a copy of element i on the list of active formatting
2805 // elements, using its original attributes, not current
2806 // attributes (which may have been modified by a script)
2807 function afeclone(doc, i) {
2808 return {
2809 elt: createHTMLElt(doc, afe.list[i].localName, afe.attrs[i]),
2810 attrs: afe.attrs[i],
2811 };
2812 }
2813
2814
2815 function afereconstruct() {
2816 if (afe.list.length === 0) return;
2817 var entry = afe.list[afe.list.length-1];
2818 // If the last is a marker , do nothing
2819 if (entry === afe.MARKER) return;
2820 // Or if it is an open element, do nothing
2821 if (stack.elements.lastIndexOf(entry) !== -1) return;
2822
2823 // Loop backward through the list until we find a marker or an
2824 // open element, and then move forward one from there.
2825 for(var i = afe.list.length-2; i >= 0; i--) {
2826 entry = afe.list[i];
2827 if (entry === afe.MARKER) break;
2828 if (stack.elements.lastIndexOf(entry) !== -1) break;
2829 }
2830
2831 // Now loop forward, starting from the element after the current
2832 // one, recreating formatting elements and pushing them back onto
2833 // the list of open elements
2834 for(i = i+1; i < afe.list.length; i++) {
2835 var newelt = insertElement(function(doc) { return afeclone(doc, i).elt; });
2836 afe.list[i] = newelt;
2837 }
2838 }
2839
2840 // Used by the adoptionAgency() function
2841 var BOOKMARK = {localName:"BM"};
2842
2843 function adoptionAgency(tag) {
2844 // If the current node is an HTML element whose tag name is subject,
2845 // and the current node is not in the list of active formatting
2846 // elements, then pop the current node off the stack of open
2847 // elements and abort these steps.
2848 if (isA(stack.top, tag) && afe.indexOf(stack.top) === -1) {
2849 stack.pop();
2850 return true; // no more handling required
2851 }
2852
2853 // Let outer loop counter be zero.
2854 var outer = 0;
2855
2856 // Outer loop: If outer loop counter is greater than or
2857 // equal to eight, then abort these steps.
2858 while(outer < 8) {
2859 // Increment outer loop counter by one.
2860 outer++;
2861
2862 // Let the formatting element be the last element in the list
2863 // of active formatting elements that: is between the end of
2864 // the list and the last scope marker in the list, if any, or
2865 // the start of the list otherwise, and has the same tag name
2866 // as the token.
2867 var fmtelt = afe.findElementByTag(tag);
2868
2869 // If there is no such node, then abort these steps and instead
2870 // act as described in the "any other end tag" entry below.
2871 if (!fmtelt) {
2872 return false; // false means handle by the default case
2873 }
2874
2875 // Otherwise, if there is such a node, but that node is not in
2876 // the stack of open elements, then this is a parse error;
2877 // remove the element from the list, and abort these steps.
2878 var index = stack.elements.lastIndexOf(fmtelt);
2879 if (index === -1) {
2880 afe.remove(fmtelt);
2881 return true; // true means no more handling required
2882 }
2883
2884 // Otherwise, if there is such a node, and that node is also in
2885 // the stack of open elements, but the element is not in scope,
2886 // then this is a parse error; ignore the token, and abort
2887 // these steps.
2888 if (!stack.elementInScope(fmtelt)) {
2889 return true;
2890 }
2891
2892 // Let the furthest block be the topmost node in the stack of
2893 // open elements that is lower in the stack than the formatting
2894 // element, and is an element in the special category. There
2895 // might not be one.
2896 var furthestblock = null, furthestblockindex;
2897 for(var i = index+1; i < stack.elements.length; i++) {
2898 if (isA(stack.elements[i], specialSet)) {
2899 furthestblock = stack.elements[i];
2900 furthestblockindex = i;
2901 break;
2902 }
2903 }
2904
2905 // If there is no furthest block, then the UA must skip the
2906 // subsequent steps and instead just pop all the nodes from the
2907 // bottom of the stack of open elements, from the current node
2908 // up to and including the formatting element, and remove the
2909 // formatting element from the list of active formatting
2910 // elements.
2911 if (!furthestblock) {
2912 stack.popElement(fmtelt);
2913 afe.remove(fmtelt);
2914 return true;
2915 }
2916 else {
2917 // Let the common ancestor be the element immediately above
2918 // the formatting element in the stack of open elements.
2919 var ancestor = stack.elements[index-1];
2920
2921 // Let a bookmark note the position of the formatting
2922 // element in the list of active formatting elements
2923 // relative to the elements on either side of it in the
2924 // list.
2925 afe.insertAfter(fmtelt, BOOKMARK);
2926
2927 // Let node and last node be the furthest block.
2928 var node = furthestblock;
2929 var lastnode = furthestblock;
2930 var nodeindex = furthestblockindex;
2931 var nodeafeindex;
2932
2933 // Let inner loop counter be zero.
2934 var inner = 0;
2935
2936 while (true) {
2937
2938 // Increment inner loop counter by one.
2939 inner++;
2940
2941 // Let node be the element immediately above node in
2942 // the stack of open elements, or if node is no longer
2943 // in the stack of open elements (e.g. because it got
2944 // removed by this algorithm), the element that was
2945 // immediately above node in the stack of open elements
2946 // before node was removed.
2947 node = stack.elements[--nodeindex];
2948
2949 // If node is the formatting element, then go
2950 // to the next step in the overall algorithm.
2951 if (node === fmtelt) break;
2952
2953 // If the inner loop counter is greater than three and node
2954 // is in the list of active formatting elements, then remove
2955 // node from the list of active formatting elements.
2956 nodeafeindex = afe.indexOf(node);
2957 if (inner > 3 && nodeafeindex !== -1) {
2958 afe.remove(node);
2959 nodeafeindex = -1;
2960 }
2961
2962 // If node is not in the list of active formatting
2963 // elements, then remove node from the stack of open
2964 // elements and then go back to the step labeled inner
2965 // loop.
2966 if (nodeafeindex === -1) {
2967 stack.removeElement(node);
2968 continue;
2969 }
2970
2971 // Create an element for the token for which the
2972 // element node was created with common ancestor as
2973 // the intended parent, replace the entry for node
2974 // in the list of active formatting elements with an
2975 // entry for the new element, replace the entry for
2976 // node in the stack of open elements with an entry for
2977 // the new element, and let node be the new element.
2978 var newelt = afeclone(ancestor.ownerDocument, nodeafeindex);
2979 afe.replace(node, newelt.elt, newelt.attrs);
2980 stack.elements[nodeindex] = newelt.elt;
2981 node = newelt.elt;
2982
2983 // If last node is the furthest block, then move the
2984 // aforementioned bookmark to be immediately after the
2985 // new node in the list of active formatting elements.
2986 if (lastnode === furthestblock) {
2987 afe.remove(BOOKMARK);
2988 afe.insertAfter(newelt.elt, BOOKMARK);
2989 }
2990
2991 // Insert last node into node, first removing it from
2992 // its previous parent node if any.
2993 node._appendChild(lastnode);
2994
2995 // Let last node be node.
2996 lastnode = node;
2997 }
2998
2999 // If the common ancestor node is a table, tbody, tfoot,
3000 // thead, or tr element, then, foster parent whatever last
3001 // node ended up being in the previous step, first removing
3002 // it from its previous parent node if any.
3003 if (foster_parent_mode && isA(ancestor, tablesectionrowSet)) {
3004 fosterParent(function() { return lastnode; });
3005 }
3006 // Otherwise, append whatever last node ended up being in
3007 // the previous step to the common ancestor node, first
3008 // removing it from its previous parent node if any.
3009 else if (ancestor instanceof impl.HTMLTemplateElement) {
3010 ancestor.content._appendChild(lastnode);
3011 } else {
3012 ancestor._appendChild(lastnode);
3013 }
3014
3015 // Create an element for the token for which the
3016 // formatting element was created, with furthest block
3017 // as the intended parent.
3018 var newelt2 = afeclone(furthestblock.ownerDocument, afe.indexOf(fmtelt));
3019
3020 // Take all of the child nodes of the furthest block and
3021 // append them to the element created in the last step.
3022 while(furthestblock.hasChildNodes()) {
3023 newelt2.elt._appendChild(furthestblock.firstChild);
3024 }
3025
3026 // Append that new element to the furthest block.
3027 furthestblock._appendChild(newelt2.elt);
3028
3029 // Remove the formatting element from the list of active
3030 // formatting elements, and insert the new element into the
3031 // list of active formatting elements at the position of
3032 // the aforementioned bookmark.
3033 afe.remove(fmtelt);
3034 afe.replace(BOOKMARK, newelt2.elt, newelt2.attrs);
3035
3036 // Remove the formatting element from the stack of open
3037 // elements, and insert the new element into the stack of
3038 // open elements immediately below the position of the
3039 // furthest block in that stack.
3040 stack.removeElement(fmtelt);
3041 var pos = stack.elements.lastIndexOf(furthestblock);
3042 stack.elements.splice(pos+1, 0, newelt2.elt);
3043 }
3044 }
3045
3046 return true;
3047 }
3048
3049 // We do this when we get /script in in_text_mode
3050 function handleScriptEnd() {
3051 // XXX:
3052 // This is just a stub implementation right now and doesn't run scripts.
3053 // Getting this method right involves the event loop, URL resolution
3054 // script fetching etc. For now I just want to be able to parse
3055 // documents and test the parser.
3056
3057 //var script = stack.top;
3058 stack.pop();
3059 parser = originalInsertionMode;
3060 //script._prepare();
3061 return;
3062
3063 // XXX: here is what this method is supposed to do
3064
3065 // Provide a stable state.
3066
3067 // Let script be the current node (which will be a script
3068 // element).
3069
3070 // Pop the current node off the stack of open elements.
3071
3072 // Switch the insertion mode to the original insertion mode.
3073
3074 // Let the old insertion point have the same value as the current
3075 // insertion point. Let the insertion point be just before the
3076 // next input character.
3077
3078 // Increment the parser's script nesting level by one.
3079
3080 // Prepare the script. This might cause some script to execute,
3081 // which might cause new characters to be inserted into the
3082 // tokenizer, and might cause the tokenizer to output more tokens,
3083 // resulting in a reentrant invocation of the parser.
3084
3085 // Decrement the parser's script nesting level by one. If the
3086 // parser's script nesting level is zero, then set the parser
3087 // pause flag to false.
3088
3089 // Let the insertion point have the value of the old insertion
3090 // point. (In other words, restore the insertion point to its
3091 // previous value. This value might be the "undefined" value.)
3092
3093 // At this stage, if there is a pending parsing-blocking script,
3094 // then:
3095
3096 // If the script nesting level is not zero:
3097
3098 // Set the parser pause flag to true, and abort the processing
3099 // of any nested invocations of the tokenizer, yielding
3100 // control back to the caller. (Tokenization will resume when
3101 // the caller returns to the "outer" tree construction stage.)
3102
3103 // The tree construction stage of this particular parser is
3104 // being called reentrantly, say from a call to
3105 // document.write().
3106
3107 // Otherwise:
3108
3109 // Run these steps:
3110
3111 // Let the script be the pending parsing-blocking
3112 // script. There is no longer a pending
3113 // parsing-blocking script.
3114
3115 // Block the tokenizer for this instance of the HTML
3116 // parser, such that the event loop will not run tasks
3117 // that invoke the tokenizer.
3118
3119 // If the parser's Document has a style sheet that is
3120 // blocking scripts or the script's "ready to be
3121 // parser-executed" flag is not set: spin the event
3122 // loop until the parser's Document has no style sheet
3123 // that is blocking scripts and the script's "ready to
3124 // be parser-executed" flag is set.
3125
3126 // Unblock the tokenizer for this instance of the HTML
3127 // parser, such that tasks that invoke the tokenizer
3128 // can again be run.
3129
3130 // Let the insertion point be just before the next
3131 // input character.
3132
3133 // Increment the parser's script nesting level by one
3134 // (it should be zero before this step, so this sets
3135 // it to one).
3136
3137 // Execute the script.
3138
3139 // Decrement the parser's script nesting level by
3140 // one. If the parser's script nesting level is zero
3141 // (which it always should be at this point), then set
3142 // the parser pause flag to false.
3143
3144 // Let the insertion point be undefined again.
3145
3146 // If there is once again a pending parsing-blocking
3147 // script, then repeat these steps from step 1.
3148
3149
3150 }
3151
3152 function stopParsing() {
3153 // XXX This is just a temporary implementation to get the parser working.
3154 // A full implementation involves scripts and events and the event loop.
3155
3156 // Remove the link from document to parser.
3157 // This is instead of "set the insertion point to undefined".
3158 // It means that document.write() can't write into the doc anymore.
3159 delete doc._parser;
3160
3161 stack.elements.length = 0; // pop everything off
3162
3163 // If there is a window object associated with the document
3164 // then trigger an load event on it
3165 if (doc.defaultView) {
3166 doc.defaultView.dispatchEvent(new impl.Event("load",{}));
3167 }
3168
3169 }
3170
3171 /****
3172 * Tokenizer states
3173 */
3174
3175 /**
3176 * This file was partially mechanically generated from
3177 * http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html
3178 *
3179 * After mechanical conversion, it was further converted from
3180 * prose to JS by hand, but the intent is that it is a very
3181 * faithful rendering of the HTML tokenization spec in
3182 * JavaScript.
3183 *
3184 * It is not a goal of this tokenizer to detect or report
3185 * parse errors.
3186 *
3187 * XXX The tokenizer is supposed to work with straight UTF32
3188 * codepoints. But I don't think it has any dependencies on
3189 * any character outside of the BMP so I think it is safe to
3190 * pass it UTF16 characters. I don't think it will ever change
3191 * state in the middle of a surrogate pair.
3192 */
3193
3194 /*
3195 * Each state is represented by a function. For most states, the
3196 * scanner simply passes the next character (as an integer
3197 * codepoint) to the current state function and automatically
3198 * consumes the character. If the state function can't process
3199 * the character it can call pushback() to push it back to the
3200 * scanner.
3201 *
3202 * Some states require lookahead, though. If a state function has
3203 * a lookahead property, then it is invoked differently. In this
3204 * case, the scanner invokes the function with 3 arguments: 1) the
3205 * next codepoint 2) a string of lookahead text 3) a boolean that
3206 * is true if the lookahead goes all the way to the EOF. (XXX
3207 * actually maybe this third is not necessary... the lookahead
3208 * could just include \uFFFF?)
3209 *
3210 * If the lookahead property of a state function is an integer, it
3211 * specifies the number of characters required. If it is a string,
3212 * then the scanner will scan for that string and return all
3213 * characters up to and including that sequence, or up to EOF. If
3214 * the lookahead property is a regexp, then the scanner will match
3215 * the regexp at the current point and return the matching string.
3216 *
3217 * States that require lookahead are responsible for explicitly
3218 * consuming the characters they process. They do this by
3219 * incrementing nextchar by the number of processed characters.
3220 */
3221 function reconsume(c, new_state) {
3222 tokenizer = new_state;
3223 nextchar--; // pushback
3224 }
3225
3226 function data_state(c) {
3227 switch(c) {
3228 case 0x0026: // AMPERSAND
3229 return_state = data_state;
3230 tokenizer = character_reference_state;
3231 break;
3232 case 0x003C: // LESS-THAN SIGN
3233 if (emitSimpleTag()) // Shortcut for <p>, <dl>, </div> etc.
3234 break;
3235 tokenizer = tag_open_state;
3236 break;
3237 case 0x0000: // NULL
3238 // Usually null characters emitted by the tokenizer will be
3239 // ignored by the tree builder, but sometimes they'll be
3240 // converted to \uFFFD. I don't want to have the search every
3241 // string emitted to replace NULs, so I'll set a flag
3242 // if I've emitted a NUL.
3243 textrun.push(c);
3244 textIncludesNUL = true;
3245 break;
3246 case -1: // EOF
3247 emitEOF();
3248 break;
3249 default:
3250 // Instead of just pushing a single character and then
3251 // coming back to the very same place, lookahead and
3252 // emit everything we can at once.
3253 /*jshint -W030 */
3254 emitCharsWhile(DATATEXT) || textrun.push(c);
3255 break;
3256 }
3257 }
3258
3259 function rcdata_state(c) {
3260 // Save the open tag so we can find a matching close tag
3261 switch(c) {
3262 case 0x0026: // AMPERSAND
3263 return_state = rcdata_state;
3264 tokenizer = character_reference_state;
3265 break;
3266 case 0x003C: // LESS-THAN SIGN
3267 tokenizer = rcdata_less_than_sign_state;
3268 break;
3269 case 0x0000: // NULL
3270 textrun.push(0xFFFD); // REPLACEMENT CHARACTER
3271 textIncludesNUL = true;
3272 break;
3273 case -1: // EOF
3274 emitEOF();
3275 break;
3276 default:
3277 textrun.push(c);
3278 break;
3279 }
3280 }
3281
3282 function rawtext_state(c) {
3283 switch(c) {
3284 case 0x003C: // LESS-THAN SIGN
3285 tokenizer = rawtext_less_than_sign_state;
3286 break;
3287 case 0x0000: // NULL
3288 textrun.push(0xFFFD); // REPLACEMENT CHARACTER
3289 break;
3290 case -1: // EOF
3291 emitEOF();
3292 break;
3293 default:
3294 /*jshint -W030 */
3295 emitCharsWhile(RAWTEXT) || textrun.push(c);
3296 break;
3297 }
3298 }
3299
3300 function script_data_state(c) {
3301 switch(c) {
3302 case 0x003C: // LESS-THAN SIGN
3303 tokenizer = script_data_less_than_sign_state;
3304 break;
3305 case 0x0000: // NULL
3306 textrun.push(0xFFFD); // REPLACEMENT CHARACTER
3307 break;
3308 case -1: // EOF
3309 emitEOF();
3310 break;
3311 default:
3312 /*jshint -W030 */
3313 emitCharsWhile(RAWTEXT) || textrun.push(c);
3314 break;
3315 }
3316 }
3317
3318 function plaintext_state(c) {
3319 switch(c) {
3320 case 0x0000: // NULL
3321 textrun.push(0xFFFD); // REPLACEMENT CHARACTER
3322 break;
3323 case -1: // EOF
3324 emitEOF();
3325 break;
3326 default:
3327 /*jshint -W030 */
3328 emitCharsWhile(PLAINTEXT) || textrun.push(c);
3329 break;
3330 }
3331 }
3332
3333 function tag_open_state(c) {
3334 switch(c) {
3335 case 0x0021: // EXCLAMATION MARK
3336 tokenizer = markup_declaration_open_state;
3337 break;
3338 case 0x002F: // SOLIDUS
3339 tokenizer = end_tag_open_state;
3340 break;
3341 case 0x0041: // [A-Z]
3342 case 0x0042:case 0x0043:case 0x0044:case 0x0045:case 0x0046:
3343 case 0x0047:case 0x0048:case 0x0049:case 0x004A:case 0x004B:
3344 case 0x004C:case 0x004D:case 0x004E:case 0x004F:case 0x0050:
3345 case 0x0051:case 0x0052:case 0x0053:case 0x0054:case 0x0055:
3346 case 0x0056:case 0x0057:case 0x0058:case 0x0059:case 0x005A:
3347 case 0x0061: // [a-z]
3348 case 0x0062:case 0x0063:case 0x0064:case 0x0065:case 0x0066:
3349 case 0x0067:case 0x0068:case 0x0069:case 0x006A:case 0x006B:
3350 case 0x006C:case 0x006D:case 0x006E:case 0x006F:case 0x0070:
3351 case 0x0071:case 0x0072:case 0x0073:case 0x0074:case 0x0075:
3352 case 0x0076:case 0x0077:case 0x0078:case 0x0079:case 0x007A:
3353 beginTagName();
3354 reconsume(c, tag_name_state);
3355 break;
3356 case 0x003F: // QUESTION MARK
3357 reconsume(c, bogus_comment_state);
3358 break;
3359 default:
3360 textrun.push(0x003C); // LESS-THAN SIGN
3361 reconsume(c, data_state);
3362 break;
3363 }
3364 }
3365
3366 function end_tag_open_state(c) {
3367 switch(c) {
3368 case 0x0041: // [A-Z]
3369 case 0x0042:case 0x0043:case 0x0044:case 0x0045:case 0x0046:
3370 case 0x0047:case 0x0048:case 0x0049:case 0x004A:case 0x004B:
3371 case 0x004C:case 0x004D:case 0x004E:case 0x004F:case 0x0050:
3372 case 0x0051:case 0x0052:case 0x0053:case 0x0054:case 0x0055:
3373 case 0x0056:case 0x0057:case 0x0058:case 0x0059:case 0x005A:
3374 case 0x0061: // [a-z]
3375 case 0x0062:case 0x0063:case 0x0064:case 0x0065:case 0x0066:
3376 case 0x0067:case 0x0068:case 0x0069:case 0x006A:case 0x006B:
3377 case 0x006C:case 0x006D:case 0x006E:case 0x006F:case 0x0070:
3378 case 0x0071:case 0x0072:case 0x0073:case 0x0074:case 0x0075:
3379 case 0x0076:case 0x0077:case 0x0078:case 0x0079:case 0x007A:
3380 beginEndTagName();
3381 reconsume(c, tag_name_state);
3382 break;
3383 case 0x003E: // GREATER-THAN SIGN
3384 tokenizer = data_state;
3385 break;
3386 case -1: // EOF
3387 textrun.push(0x003C); // LESS-THAN SIGN
3388 textrun.push(0x002F); // SOLIDUS
3389 emitEOF();
3390 break;
3391 default:
3392 reconsume(c, bogus_comment_state);
3393 break;
3394 }
3395 }
3396
3397 function tag_name_state(c) {
3398 switch(c) {
3399 case 0x0009: // CHARACTER TABULATION (tab)
3400 case 0x000A: // LINE FEED (LF)
3401 case 0x000C: // FORM FEED (FF)
3402 case 0x0020: // SPACE
3403 tokenizer = before_attribute_name_state;
3404 break;
3405 case 0x002F: // SOLIDUS
3406 tokenizer = self_closing_start_tag_state;
3407 break;
3408 case 0x003E: // GREATER-THAN SIGN
3409 tokenizer = data_state;
3410 emitTag();
3411 break;
3412 case 0x0041: // [A-Z]
3413 case 0x0042:case 0x0043:case 0x0044:case 0x0045:case 0x0046:
3414 case 0x0047:case 0x0048:case 0x0049:case 0x004A:case 0x004B:
3415 case 0x004C:case 0x004D:case 0x004E:case 0x004F:case 0x0050:
3416 case 0x0051:case 0x0052:case 0x0053:case 0x0054:case 0x0055:
3417 case 0x0056:case 0x0057:case 0x0058:case 0x0059:case 0x005A:
3418 tagnamebuf += String.fromCharCode(c + 0x0020);
3419 break;
3420 case 0x0000: // NULL
3421 tagnamebuf += String.fromCharCode(0xFFFD /* REPLACEMENT CHARACTER */);
3422 break;
3423 case -1: // EOF
3424 emitEOF();
3425 break;
3426 default:
3427 tagnamebuf += getMatchingChars(TAGNAME);
3428 break;
3429 }
3430 }
3431
3432 function rcdata_less_than_sign_state(c) {
3433 /* identical to the RAWTEXT less-than sign state, except s/RAWTEXT/RCDATA/g */
3434 if (c === 0x002F) { // SOLIDUS
3435 beginTempBuf();
3436 tokenizer = rcdata_end_tag_open_state;
3437 }
3438 else {
3439 textrun.push(0x003C); // LESS-THAN SIGN
3440 reconsume(c, rcdata_state);
3441 }
3442 }
3443
3444 function rcdata_end_tag_open_state(c) {
3445 /* identical to the RAWTEXT (and Script data) end tag open state, except s/RAWTEXT/RCDATA/g */
3446 switch(c) {
3447 case 0x0041: // [A-Z]
3448 case 0x0042:case 0x0043:case 0x0044:case 0x0045:case 0x0046:
3449 case 0x0047:case 0x0048:case 0x0049:case 0x004A:case 0x004B:
3450 case 0x004C:case 0x004D:case 0x004E:case 0x004F:case 0x0050:
3451 case 0x0051:case 0x0052:case 0x0053:case 0x0054:case 0x0055:
3452 case 0x0056:case 0x0057:case 0x0058:case 0x0059:case 0x005A:
3453 case 0x0061: // [a-z]
3454 case 0x0062:case 0x0063:case 0x0064:case 0x0065:case 0x0066:
3455 case 0x0067:case 0x0068:case 0x0069:case 0x006A:case 0x006B:
3456 case 0x006C:case 0x006D:case 0x006E:case 0x006F:case 0x0070:
3457 case 0x0071:case 0x0072:case 0x0073:case 0x0074:case 0x0075:
3458 case 0x0076:case 0x0077:case 0x0078:case 0x0079:case 0x007A:
3459 beginEndTagName();
3460 reconsume(c, rcdata_end_tag_name_state);
3461 break;
3462 default:
3463 textrun.push(0x003C); // LESS-THAN SIGN
3464 textrun.push(0x002F); // SOLIDUS
3465 reconsume(c, rcdata_state);
3466 break;
3467 }
3468 }
3469
3470 function rcdata_end_tag_name_state(c) {
3471 /* identical to the RAWTEXT (and Script data) end tag name state, except s/RAWTEXT/RCDATA/g */
3472 switch(c) {
3473 case 0x0009: // CHARACTER TABULATION (tab)
3474 case 0x000A: // LINE FEED (LF)
3475 case 0x000C: // FORM FEED (FF)
3476 case 0x0020: // SPACE
3477 if (appropriateEndTag(tagnamebuf)) {
3478 tokenizer = before_attribute_name_state;
3479 return;
3480 }
3481 break;
3482 case 0x002F: // SOLIDUS
3483 if (appropriateEndTag(tagnamebuf)) {
3484 tokenizer = self_closing_start_tag_state;
3485 return;
3486 }
3487 break;
3488 case 0x003E: // GREATER-THAN SIGN
3489 if (appropriateEndTag(tagnamebuf)) {
3490 tokenizer = data_state;
3491 emitTag();
3492 return;
3493 }
3494 break;
3495 case 0x0041: // [A-Z]
3496 case 0x0042:case 0x0043:case 0x0044:case 0x0045:case 0x0046:
3497 case 0x0047:case 0x0048:case 0x0049:case 0x004A:case 0x004B:
3498 case 0x004C:case 0x004D:case 0x004E:case 0x004F:case 0x0050:
3499 case 0x0051:case 0x0052:case 0x0053:case 0x0054:case 0x0055:
3500 case 0x0056:case 0x0057:case 0x0058:case 0x0059:case 0x005A:
3501
3502 tagnamebuf += String.fromCharCode(c + 0x0020);
3503 tempbuf.push(c);
3504 return;
3505 case 0x0061: // [a-z]
3506 case 0x0062:case 0x0063:case 0x0064:case 0x0065:case 0x0066:
3507 case 0x0067:case 0x0068:case 0x0069:case 0x006A:case 0x006B:
3508 case 0x006C:case 0x006D:case 0x006E:case 0x006F:case 0x0070:
3509 case 0x0071:case 0x0072:case 0x0073:case 0x0074:case 0x0075:
3510 case 0x0076:case 0x0077:case 0x0078:case 0x0079:case 0x007A:
3511
3512 tagnamebuf += String.fromCharCode(c);
3513 tempbuf.push(c);
3514 return;
3515 default:
3516 break;
3517 }
3518
3519 // If we don't return in one of the cases above, then this was not
3520 // an appropriately matching close tag, so back out by emitting all
3521 // the characters as text
3522 textrun.push(0x003C); // LESS-THAN SIGN
3523 textrun.push(0x002F); // SOLIDUS
3524 pushAll(textrun, tempbuf);
3525 reconsume(c, rcdata_state);
3526 }
3527
3528 function rawtext_less_than_sign_state(c) {
3529 /* identical to the RCDATA less-than sign state, except s/RCDATA/RAWTEXT/g
3530 */
3531 if (c === 0x002F) { // SOLIDUS
3532 beginTempBuf();
3533 tokenizer = rawtext_end_tag_open_state;
3534 }
3535 else {
3536 textrun.push(0x003C); // LESS-THAN SIGN
3537 reconsume(c, rawtext_state);
3538 }
3539 }
3540
3541 function rawtext_end_tag_open_state(c) {
3542 /* identical to the RCDATA (and Script data) end tag open state, except s/RCDATA/RAWTEXT/g */
3543 switch(c) {
3544 case 0x0041: // [A-Z]
3545 case 0x0042:case 0x0043:case 0x0044:case 0x0045:case 0x0046:
3546 case 0x0047:case 0x0048:case 0x0049:case 0x004A:case 0x004B:
3547 case 0x004C:case 0x004D:case 0x004E:case 0x004F:case 0x0050:
3548 case 0x0051:case 0x0052:case 0x0053:case 0x0054:case 0x0055:
3549 case 0x0056:case 0x0057:case 0x0058:case 0x0059:case 0x005A:
3550 case 0x0061: // [a-z]
3551 case 0x0062:case 0x0063:case 0x0064:case 0x0065:case 0x0066:
3552 case 0x0067:case 0x0068:case 0x0069:case 0x006A:case 0x006B:
3553 case 0x006C:case 0x006D:case 0x006E:case 0x006F:case 0x0070:
3554 case 0x0071:case 0x0072:case 0x0073:case 0x0074:case 0x0075:
3555 case 0x0076:case 0x0077:case 0x0078:case 0x0079:case 0x007A:
3556 beginEndTagName();
3557 reconsume(c, rawtext_end_tag_name_state);
3558 break;
3559 default:
3560 textrun.push(0x003C); // LESS-THAN SIGN
3561 textrun.push(0x002F); // SOLIDUS
3562 reconsume(c, rawtext_state);
3563 break;
3564 }
3565 }
3566
3567 function rawtext_end_tag_name_state(c) {
3568 /* identical to the RCDATA (and Script data) end tag name state, except s/RCDATA/RAWTEXT/g */
3569 switch(c) {
3570 case 0x0009: // CHARACTER TABULATION (tab)
3571 case 0x000A: // LINE FEED (LF)
3572 case 0x000C: // FORM FEED (FF)
3573 case 0x0020: // SPACE
3574 if (appropriateEndTag(tagnamebuf)) {
3575 tokenizer = before_attribute_name_state;
3576 return;
3577 }
3578 break;
3579 case 0x002F: // SOLIDUS
3580 if (appropriateEndTag(tagnamebuf)) {
3581 tokenizer = self_closing_start_tag_state;
3582 return;
3583 }
3584 break;
3585 case 0x003E: // GREATER-THAN SIGN
3586 if (appropriateEndTag(tagnamebuf)) {
3587 tokenizer = data_state;
3588 emitTag();
3589 return;
3590 }
3591 break;
3592 case 0x0041: // [A-Z]
3593 case 0x0042:case 0x0043:case 0x0044:case 0x0045:case 0x0046:
3594 case 0x0047:case 0x0048:case 0x0049:case 0x004A:case 0x004B:
3595 case 0x004C:case 0x004D:case 0x004E:case 0x004F:case 0x0050:
3596 case 0x0051:case 0x0052:case 0x0053:case 0x0054:case 0x0055:
3597 case 0x0056:case 0x0057:case 0x0058:case 0x0059:case 0x005A:
3598 tagnamebuf += String.fromCharCode(c + 0x0020);
3599 tempbuf.push(c);
3600 return;
3601 case 0x0061: // [a-z]
3602 case 0x0062:case 0x0063:case 0x0064:case 0x0065:case 0x0066:
3603 case 0x0067:case 0x0068:case 0x0069:case 0x006A:case 0x006B:
3604 case 0x006C:case 0x006D:case 0x006E:case 0x006F:case 0x0070:
3605 case 0x0071:case 0x0072:case 0x0073:case 0x0074:case 0x0075:
3606 case 0x0076:case 0x0077:case 0x0078:case 0x0079:case 0x007A:
3607 tagnamebuf += String.fromCharCode(c);
3608 tempbuf.push(c);
3609 return;
3610 default:
3611 break;
3612 }
3613
3614 // If we don't return in one of the cases above, then this was not
3615 // an appropriately matching close tag, so back out by emitting all
3616 // the characters as text
3617 textrun.push(0x003C); // LESS-THAN SIGN
3618 textrun.push(0x002F); // SOLIDUS
3619 pushAll(textrun,tempbuf);
3620 reconsume(c, rawtext_state);
3621 }
3622
3623 function script_data_less_than_sign_state(c) {
3624 switch(c) {
3625 case 0x002F: // SOLIDUS
3626 beginTempBuf();
3627 tokenizer = script_data_end_tag_open_state;
3628 break;
3629 case 0x0021: // EXCLAMATION MARK
3630 tokenizer = script_data_escape_start_state;
3631 textrun.push(0x003C); // LESS-THAN SIGN
3632 textrun.push(0x0021); // EXCLAMATION MARK
3633 break;
3634 default:
3635 textrun.push(0x003C); // LESS-THAN SIGN
3636 reconsume(c, script_data_state);
3637 break;
3638 }
3639 }
3640
3641 function script_data_end_tag_open_state(c) {
3642 /* identical to the RCDATA (and RAWTEXT) end tag open state, except s/RCDATA/Script data/g */
3643 switch(c) {
3644 case 0x0041: // [A-Z]
3645 case 0x0042:case 0x0043:case 0x0044:case 0x0045:case 0x0046:
3646 case 0x0047:case 0x0048:case 0x0049:case 0x004A:case 0x004B:
3647 case 0x004C:case 0x004D:case 0x004E:case 0x004F:case 0x0050:
3648 case 0x0051:case 0x0052:case 0x0053:case 0x0054:case 0x0055:
3649 case 0x0056:case 0x0057:case 0x0058:case 0x0059:case 0x005A:
3650 case 0x0061: // [a-z]
3651 case 0x0062:case 0x0063:case 0x0064:case 0x0065:case 0x0066:
3652 case 0x0067:case 0x0068:case 0x0069:case 0x006A:case 0x006B:
3653 case 0x006C:case 0x006D:case 0x006E:case 0x006F:case 0x0070:
3654 case 0x0071:case 0x0072:case 0x0073:case 0x0074:case 0x0075:
3655 case 0x0076:case 0x0077:case 0x0078:case 0x0079:case 0x007A:
3656 beginEndTagName();
3657 reconsume(c, script_data_end_tag_name_state);
3658 break;
3659 default:
3660 textrun.push(0x003C); // LESS-THAN SIGN
3661 textrun.push(0x002F); // SOLIDUS
3662 reconsume(c, script_data_state);
3663 break;
3664 }
3665 }
3666
3667 function script_data_end_tag_name_state(c) {
3668 /* identical to the RCDATA (and RAWTEXT) end tag name state, except s/RCDATA/Script data/g */
3669 switch(c) {
3670 case 0x0009: // CHARACTER TABULATION (tab)
3671 case 0x000A: // LINE FEED (LF)
3672 case 0x000C: // FORM FEED (FF)
3673 case 0x0020: // SPACE
3674 if (appropriateEndTag(tagnamebuf)) {
3675 tokenizer = before_attribute_name_state;
3676 return;
3677 }
3678 break;
3679 case 0x002F: // SOLIDUS
3680 if (appropriateEndTag(tagnamebuf)) {
3681 tokenizer = self_closing_start_tag_state;
3682 return;
3683 }
3684 break;
3685 case 0x003E: // GREATER-THAN SIGN
3686 if (appropriateEndTag(tagnamebuf)) {
3687 tokenizer = data_state;
3688 emitTag();
3689 return;
3690 }
3691 break;
3692 case 0x0041: // [A-Z]
3693 case 0x0042:case 0x0043:case 0x0044:case 0x0045:case 0x0046:
3694 case 0x0047:case 0x0048:case 0x0049:case 0x004A:case 0x004B:
3695 case 0x004C:case 0x004D:case 0x004E:case 0x004F:case 0x0050:
3696 case 0x0051:case 0x0052:case 0x0053:case 0x0054:case 0x0055:
3697 case 0x0056:case 0x0057:case 0x0058:case 0x0059:case 0x005A:
3698
3699 tagnamebuf += String.fromCharCode(c + 0x0020);
3700 tempbuf.push(c);
3701 return;
3702 case 0x0061: // [a-z]
3703 case 0x0062:case 0x0063:case 0x0064:case 0x0065:case 0x0066:
3704 case 0x0067:case 0x0068:case 0x0069:case 0x006A:case 0x006B:
3705 case 0x006C:case 0x006D:case 0x006E:case 0x006F:case 0x0070:
3706 case 0x0071:case 0x0072:case 0x0073:case 0x0074:case 0x0075:
3707 case 0x0076:case 0x0077:case 0x0078:case 0x0079:case 0x007A:
3708
3709 tagnamebuf += String.fromCharCode(c);
3710 tempbuf.push(c);
3711 return;
3712 default:
3713 break;
3714 }
3715
3716 // If we don't return in one of the cases above, then this was not
3717 // an appropriately matching close tag, so back out by emitting all
3718 // the characters as text
3719 textrun.push(0x003C); // LESS-THAN SIGN
3720 textrun.push(0x002F); // SOLIDUS
3721 pushAll(textrun,tempbuf);
3722 reconsume(c, script_data_state);
3723 }
3724
3725 function script_data_escape_start_state(c) {
3726 if (c === 0x002D) { // HYPHEN-MINUS
3727 tokenizer = script_data_escape_start_dash_state;
3728 textrun.push(0x002D); // HYPHEN-MINUS
3729 }
3730 else {
3731 reconsume(c, script_data_state);
3732 }
3733 }
3734
3735 function script_data_escape_start_dash_state(c) {
3736 if (c === 0x002D) { // HYPHEN-MINUS
3737 tokenizer = script_data_escaped_dash_dash_state;
3738 textrun.push(0x002D); // HYPHEN-MINUS
3739 }
3740 else {
3741 reconsume(c, script_data_state);
3742 }
3743 }
3744
3745 function script_data_escaped_state(c) {
3746 switch(c) {
3747 case 0x002D: // HYPHEN-MINUS
3748 tokenizer = script_data_escaped_dash_state;
3749 textrun.push(0x002D); // HYPHEN-MINUS
3750 break;
3751 case 0x003C: // LESS-THAN SIGN
3752 tokenizer = script_data_escaped_less_than_sign_state;
3753 break;
3754 case 0x0000: // NULL
3755 textrun.push(0xFFFD); // REPLACEMENT CHARACTER
3756 break;
3757 case -1: // EOF
3758 emitEOF();
3759 break;
3760 default:
3761 textrun.push(c);
3762 break;
3763 }
3764 }
3765
3766 function script_data_escaped_dash_state(c) {
3767 switch(c) {
3768 case 0x002D: // HYPHEN-MINUS
3769 tokenizer = script_data_escaped_dash_dash_state;
3770 textrun.push(0x002D); // HYPHEN-MINUS
3771 break;
3772 case 0x003C: // LESS-THAN SIGN
3773 tokenizer = script_data_escaped_less_than_sign_state;
3774 break;
3775 case 0x0000: // NULL
3776 tokenizer = script_data_escaped_state;
3777 textrun.push(0xFFFD); // REPLACEMENT CHARACTER
3778 break;
3779 case -1: // EOF
3780 emitEOF();
3781 break;
3782 default:
3783 tokenizer = script_data_escaped_state;
3784 textrun.push(c);
3785 break;
3786 }
3787 }
3788
3789 function script_data_escaped_dash_dash_state(c) {
3790 switch(c) {
3791 case 0x002D: // HYPHEN-MINUS
3792 textrun.push(0x002D); // HYPHEN-MINUS
3793 break;
3794 case 0x003C: // LESS-THAN SIGN
3795 tokenizer = script_data_escaped_less_than_sign_state;
3796 break;
3797 case 0x003E: // GREATER-THAN SIGN
3798 tokenizer = script_data_state;
3799 textrun.push(0x003E); // GREATER-THAN SIGN
3800 break;
3801 case 0x0000: // NULL
3802 tokenizer = script_data_escaped_state;
3803 textrun.push(0xFFFD); // REPLACEMENT CHARACTER
3804 break;
3805 case -1: // EOF
3806 emitEOF();
3807 break;
3808 default:
3809 tokenizer = script_data_escaped_state;
3810 textrun.push(c);
3811 break;
3812 }
3813 }
3814
3815 function script_data_escaped_less_than_sign_state(c) {
3816 switch(c) {
3817 case 0x002F: // SOLIDUS
3818 beginTempBuf();
3819 tokenizer = script_data_escaped_end_tag_open_state;
3820 break;
3821 case 0x0041: // [A-Z]
3822 case 0x0042:case 0x0043:case 0x0044:case 0x0045:case 0x0046:
3823 case 0x0047:case 0x0048:case 0x0049:case 0x004A:case 0x004B:
3824 case 0x004C:case 0x004D:case 0x004E:case 0x004F:case 0x0050:
3825 case 0x0051:case 0x0052:case 0x0053:case 0x0054:case 0x0055:
3826 case 0x0056:case 0x0057:case 0x0058:case 0x0059:case 0x005A:
3827 case 0x0061: // [a-z]
3828 case 0x0062:case 0x0063:case 0x0064:case 0x0065:case 0x0066:
3829 case 0x0067:case 0x0068:case 0x0069:case 0x006A:case 0x006B:
3830 case 0x006C:case 0x006D:case 0x006E:case 0x006F:case 0x0070:
3831 case 0x0071:case 0x0072:case 0x0073:case 0x0074:case 0x0075:
3832 case 0x0076:case 0x0077:case 0x0078:case 0x0079:case 0x007A:
3833 beginTempBuf();
3834 textrun.push(0x003C); // LESS-THAN SIGN
3835 reconsume(c, script_data_double_escape_start_state);
3836 break;
3837 default:
3838 textrun.push(0x003C); // LESS-THAN SIGN
3839 reconsume(c, script_data_escaped_state);
3840 break;
3841 }
3842 }
3843
3844 function script_data_escaped_end_tag_open_state(c) {
3845 switch(c) {
3846 case 0x0041: // [A-Z]
3847 case 0x0042:case 0x0043:case 0x0044:case 0x0045:case 0x0046:
3848 case 0x0047:case 0x0048:case 0x0049:case 0x004A:case 0x004B:
3849 case 0x004C:case 0x004D:case 0x004E:case 0x004F:case 0x0050:
3850 case 0x0051:case 0x0052:case 0x0053:case 0x0054:case 0x0055:
3851 case 0x0056:case 0x0057:case 0x0058:case 0x0059:case 0x005A:
3852 case 0x0061: // [a-z]
3853 case 0x0062:case 0x0063:case 0x0064:case 0x0065:case 0x0066:
3854 case 0x0067:case 0x0068:case 0x0069:case 0x006A:case 0x006B:
3855 case 0x006C:case 0x006D:case 0x006E:case 0x006F:case 0x0070:
3856 case 0x0071:case 0x0072:case 0x0073:case 0x0074:case 0x0075:
3857 case 0x0076:case 0x0077:case 0x0078:case 0x0079:case 0x007A:
3858 beginEndTagName();
3859 reconsume(c, script_data_escaped_end_tag_name_state);
3860 break;
3861 default:
3862 textrun.push(0x003C); // LESS-THAN SIGN
3863 textrun.push(0x002F); // SOLIDUS
3864 reconsume(c, script_data_escaped_state);
3865 break;
3866 }
3867 }
3868
3869 function script_data_escaped_end_tag_name_state(c) {
3870 switch(c) {
3871 case 0x0009: // CHARACTER TABULATION (tab)
3872 case 0x000A: // LINE FEED (LF)
3873 case 0x000C: // FORM FEED (FF)
3874 case 0x0020: // SPACE
3875 if (appropriateEndTag(tagnamebuf)) {
3876 tokenizer = before_attribute_name_state;
3877 return;
3878 }
3879 break;
3880 case 0x002F: // SOLIDUS
3881 if (appropriateEndTag(tagnamebuf)) {
3882 tokenizer = self_closing_start_tag_state;
3883 return;
3884 }
3885 break;
3886 case 0x003E: // GREATER-THAN SIGN
3887 if (appropriateEndTag(tagnamebuf)) {
3888 tokenizer = data_state;
3889 emitTag();
3890 return;
3891 }
3892 break;
3893 case 0x0041: // [A-Z]
3894 case 0x0042:case 0x0043:case 0x0044:case 0x0045:case 0x0046:
3895 case 0x0047:case 0x0048:case 0x0049:case 0x004A:case 0x004B:
3896 case 0x004C:case 0x004D:case 0x004E:case 0x004F:case 0x0050:
3897 case 0x0051:case 0x0052:case 0x0053:case 0x0054:case 0x0055:
3898 case 0x0056:case 0x0057:case 0x0058:case 0x0059:case 0x005A:
3899 tagnamebuf += String.fromCharCode(c + 0x0020);
3900 tempbuf.push(c);
3901 return;
3902 case 0x0061: // [a-z]
3903 case 0x0062:case 0x0063:case 0x0064:case 0x0065:case 0x0066:
3904 case 0x0067:case 0x0068:case 0x0069:case 0x006A:case 0x006B:
3905 case 0x006C:case 0x006D:case 0x006E:case 0x006F:case 0x0070:
3906 case 0x0071:case 0x0072:case 0x0073:case 0x0074:case 0x0075:
3907 case 0x0076:case 0x0077:case 0x0078:case 0x0079:case 0x007A:
3908 tagnamebuf += String.fromCharCode(c);
3909 tempbuf.push(c);
3910 return;
3911 default:
3912 break;
3913 }
3914
3915 // We get here in the default case, and if the closing tagname
3916 // is not an appropriate tagname.
3917 textrun.push(0x003C); // LESS-THAN SIGN
3918 textrun.push(0x002F); // SOLIDUS
3919 pushAll(textrun,tempbuf);
3920 reconsume(c, script_data_escaped_state);
3921 }
3922
3923 function script_data_double_escape_start_state(c) {
3924 switch(c) {
3925 case 0x0009: // CHARACTER TABULATION (tab)
3926 case 0x000A: // LINE FEED (LF)
3927 case 0x000C: // FORM FEED (FF)
3928 case 0x0020: // SPACE
3929 case 0x002F: // SOLIDUS
3930 case 0x003E: // GREATER-THAN SIGN
3931 if (buf2str(tempbuf) === "script") {
3932 tokenizer = script_data_double_escaped_state;
3933 }
3934 else {
3935 tokenizer = script_data_escaped_state;
3936 }
3937 textrun.push(c);
3938 break;
3939 case 0x0041: // [A-Z]
3940 case 0x0042:case 0x0043:case 0x0044:case 0x0045:case 0x0046:
3941 case 0x0047:case 0x0048:case 0x0049:case 0x004A:case 0x004B:
3942 case 0x004C:case 0x004D:case 0x004E:case 0x004F:case 0x0050:
3943 case 0x0051:case 0x0052:case 0x0053:case 0x0054:case 0x0055:
3944 case 0x0056:case 0x0057:case 0x0058:case 0x0059:case 0x005A:
3945 tempbuf.push(c + 0x0020);
3946 textrun.push(c);
3947 break;
3948 case 0x0061: // [a-z]
3949 case 0x0062:case 0x0063:case 0x0064:case 0x0065:case 0x0066:
3950 case 0x0067:case 0x0068:case 0x0069:case 0x006A:case 0x006B:
3951 case 0x006C:case 0x006D:case 0x006E:case 0x006F:case 0x0070:
3952 case 0x0071:case 0x0072:case 0x0073:case 0x0074:case 0x0075:
3953 case 0x0076:case 0x0077:case 0x0078:case 0x0079:case 0x007A:
3954 tempbuf.push(c);
3955 textrun.push(c);
3956 break;
3957 default:
3958 reconsume(c, script_data_escaped_state);
3959 break;
3960 }
3961 }
3962
3963 function script_data_double_escaped_state(c) {
3964 switch(c) {
3965 case 0x002D: // HYPHEN-MINUS
3966 tokenizer = script_data_double_escaped_dash_state;
3967 textrun.push(0x002D); // HYPHEN-MINUS
3968 break;
3969 case 0x003C: // LESS-THAN SIGN
3970 tokenizer = script_data_double_escaped_less_than_sign_state;
3971 textrun.push(0x003C); // LESS-THAN SIGN
3972 break;
3973 case 0x0000: // NULL
3974 textrun.push(0xFFFD); // REPLACEMENT CHARACTER
3975 break;
3976 case -1: // EOF
3977 emitEOF();
3978 break;
3979 default:
3980 textrun.push(c);
3981 break;
3982 }
3983 }
3984
3985 function script_data_double_escaped_dash_state(c) {
3986 switch(c) {
3987 case 0x002D: // HYPHEN-MINUS
3988 tokenizer = script_data_double_escaped_dash_dash_state;
3989 textrun.push(0x002D); // HYPHEN-MINUS
3990 break;
3991 case 0x003C: // LESS-THAN SIGN
3992 tokenizer = script_data_double_escaped_less_than_sign_state;
3993 textrun.push(0x003C); // LESS-THAN SIGN
3994 break;
3995 case 0x0000: // NULL
3996 tokenizer = script_data_double_escaped_state;
3997 textrun.push(0xFFFD); // REPLACEMENT CHARACTER
3998 break;
3999 case -1: // EOF
4000 emitEOF();
4001 break;
4002 default:
4003 tokenizer = script_data_double_escaped_state;
4004 textrun.push(c);
4005 break;
4006 }
4007 }
4008
4009 function script_data_double_escaped_dash_dash_state(c) {
4010 switch(c) {
4011 case 0x002D: // HYPHEN-MINUS
4012 textrun.push(0x002D); // HYPHEN-MINUS
4013 break;
4014 case 0x003C: // LESS-THAN SIGN
4015 tokenizer = script_data_double_escaped_less_than_sign_state;
4016 textrun.push(0x003C); // LESS-THAN SIGN
4017 break;
4018 case 0x003E: // GREATER-THAN SIGN
4019 tokenizer = script_data_state;
4020 textrun.push(0x003E); // GREATER-THAN SIGN
4021 break;
4022 case 0x0000: // NULL
4023 tokenizer = script_data_double_escaped_state;
4024 textrun.push(0xFFFD); // REPLACEMENT CHARACTER
4025 break;
4026 case -1: // EOF
4027 emitEOF();
4028 break;
4029 default:
4030 tokenizer = script_data_double_escaped_state;
4031 textrun.push(c);
4032 break;
4033 }
4034 }
4035
4036 function script_data_double_escaped_less_than_sign_state(c) {
4037 if (c === 0x002F) { // SOLIDUS
4038 beginTempBuf();
4039 tokenizer = script_data_double_escape_end_state;
4040 textrun.push(0x002F); // SOLIDUS
4041 }
4042 else {
4043 reconsume(c, script_data_double_escaped_state);
4044 }
4045 }
4046
4047 function script_data_double_escape_end_state(c) {
4048 switch(c) {
4049 case 0x0009: // CHARACTER TABULATION (tab)
4050 case 0x000A: // LINE FEED (LF)
4051 case 0x000C: // FORM FEED (FF)
4052 case 0x0020: // SPACE
4053 case 0x002F: // SOLIDUS
4054 case 0x003E: // GREATER-THAN SIGN
4055 if (buf2str(tempbuf) === "script") {
4056 tokenizer = script_data_escaped_state;
4057 }
4058 else {
4059 tokenizer = script_data_double_escaped_state;
4060 }
4061 textrun.push(c);
4062 break;
4063 case 0x0041: // [A-Z]
4064 case 0x0042:case 0x0043:case 0x0044:case 0x0045:case 0x0046:
4065 case 0x0047:case 0x0048:case 0x0049:case 0x004A:case 0x004B:
4066 case 0x004C:case 0x004D:case 0x004E:case 0x004F:case 0x0050:
4067 case 0x0051:case 0x0052:case 0x0053:case 0x0054:case 0x0055:
4068 case 0x0056:case 0x0057:case 0x0058:case 0x0059:case 0x005A:
4069 tempbuf.push(c + 0x0020);
4070 textrun.push(c);
4071 break;
4072 case 0x0061: // [a-z]
4073 case 0x0062:case 0x0063:case 0x0064:case 0x0065:case 0x0066:
4074 case 0x0067:case 0x0068:case 0x0069:case 0x006A:case 0x006B:
4075 case 0x006C:case 0x006D:case 0x006E:case 0x006F:case 0x0070:
4076 case 0x0071:case 0x0072:case 0x0073:case 0x0074:case 0x0075:
4077 case 0x0076:case 0x0077:case 0x0078:case 0x0079:case 0x007A:
4078 tempbuf.push(c);
4079 textrun.push(c);
4080 break;
4081 default:
4082 reconsume(c, script_data_double_escaped_state);
4083 break;
4084 }
4085 }
4086
4087 function before_attribute_name_state(c) {
4088 switch(c) {
4089 case 0x0009: // CHARACTER TABULATION (tab)
4090 case 0x000A: // LINE FEED (LF)
4091 case 0x000C: // FORM FEED (FF)
4092 case 0x0020: // SPACE
4093 /* Ignore the character. */
4094 break;
4095 // For SOLIDUS, GREATER-THAN SIGN, and EOF, spec says "reconsume in
4096 // the after attribute name state", but in our implementation that
4097 // state always has an active attribute in attrnamebuf. Just clone
4098 // the rules here, without the addAttribute business.
4099 case 0x002F: // SOLIDUS
4100 tokenizer = self_closing_start_tag_state;
4101 break;
4102 case 0x003E: // GREATER-THAN SIGN
4103 tokenizer = data_state;
4104 emitTag();
4105 break;
4106 case -1: // EOF
4107 emitEOF();
4108 break;
4109 case 0x003D: // EQUALS SIGN
4110 beginAttrName();
4111 attrnamebuf += String.fromCharCode(c);
4112 tokenizer = attribute_name_state;
4113 break;
4114 default:
4115 if (handleSimpleAttribute()) break;
4116 beginAttrName();
4117 reconsume(c, attribute_name_state);
4118 break;
4119 }
4120 }
4121
4122 // beginAttrName() must have been called before this point
4123 // There is an active attribute in attrnamebuf (but not attrvaluebuf)
4124 function attribute_name_state(c) {
4125 switch(c) {
4126 case 0x0009: // CHARACTER TABULATION (tab)
4127 case 0x000A: // LINE FEED (LF)
4128 case 0x000C: // FORM FEED (FF)
4129 case 0x0020: // SPACE
4130 case 0x002F: // SOLIDUS
4131 case 0x003E: // GREATER-THAN SIGN
4132 case -1: // EOF
4133 reconsume(c, after_attribute_name_state);
4134 break;
4135 case 0x003D: // EQUALS SIGN
4136 tokenizer = before_attribute_value_state;
4137 break;
4138 case 0x0041: // [A-Z]
4139 case 0x0042:case 0x0043:case 0x0044:case 0x0045:case 0x0046:
4140 case 0x0047:case 0x0048:case 0x0049:case 0x004A:case 0x004B:
4141 case 0x004C:case 0x004D:case 0x004E:case 0x004F:case 0x0050:
4142 case 0x0051:case 0x0052:case 0x0053:case 0x0054:case 0x0055:
4143 case 0x0056:case 0x0057:case 0x0058:case 0x0059:case 0x005A:
4144 attrnamebuf += String.fromCharCode(c + 0x0020);
4145 break;
4146 case 0x0000: // NULL
4147 attrnamebuf += String.fromCharCode(0xFFFD /* REPLACEMENT CHARACTER */);
4148 break;
4149 case 0x0022: // QUOTATION MARK
4150 case 0x0027: // APOSTROPHE
4151 case 0x003C: // LESS-THAN SIGN
4152 /* falls through */
4153 default:
4154 attrnamebuf += getMatchingChars(ATTRNAME);
4155 break;
4156 }
4157 }
4158
4159 // There is an active attribute in attrnamebuf, but not yet in attrvaluebuf.
4160 function after_attribute_name_state(c) {
4161 switch(c) {
4162 case 0x0009: // CHARACTER TABULATION (tab)
4163 case 0x000A: // LINE FEED (LF)
4164 case 0x000C: // FORM FEED (FF)
4165 case 0x0020: // SPACE
4166 /* Ignore the character. */
4167 break;
4168 case 0x002F: // SOLIDUS
4169 // Keep in sync with before_attribute_name_state.
4170 addAttribute(attrnamebuf);
4171 tokenizer = self_closing_start_tag_state;
4172 break;
4173 case 0x003D: // EQUALS SIGN
4174 tokenizer = before_attribute_value_state;
4175 break;
4176 case 0x003E: // GREATER-THAN SIGN
4177 // Keep in sync with before_attribute_name_state.
4178 tokenizer = data_state;
4179 addAttribute(attrnamebuf);
4180 emitTag();
4181 break;
4182 case -1: // EOF
4183 // Keep in sync with before_attribute_name_state.
4184 addAttribute(attrnamebuf);
4185 emitEOF();
4186 break;
4187 default:
4188 addAttribute(attrnamebuf);
4189 beginAttrName();
4190 reconsume(c, attribute_name_state);
4191 break;
4192 }
4193 }
4194
4195 function before_attribute_value_state(c) {
4196 switch(c) {
4197 case 0x0009: // CHARACTER TABULATION (tab)
4198 case 0x000A: // LINE FEED (LF)
4199 case 0x000C: // FORM FEED (FF)
4200 case 0x0020: // SPACE
4201 /* Ignore the character. */
4202 break;
4203 case 0x0022: // QUOTATION MARK
4204 beginAttrValue();
4205 tokenizer = attribute_value_double_quoted_state;
4206 break;
4207 case 0x0027: // APOSTROPHE
4208 beginAttrValue();
4209 tokenizer = attribute_value_single_quoted_state;
4210 break;
4211 case 0x003E: // GREATER-THAN SIGN
4212 /* falls through */
4213 default:
4214 beginAttrValue();
4215 reconsume(c, attribute_value_unquoted_state);
4216 break;
4217 }
4218 }
4219
4220 function attribute_value_double_quoted_state(c) {
4221 switch(c) {
4222 case 0x0022: // QUOTATION MARK
4223 addAttribute(attrnamebuf, attrvaluebuf);
4224 tokenizer = after_attribute_value_quoted_state;
4225 break;
4226 case 0x0026: // AMPERSAND
4227 return_state = attribute_value_double_quoted_state;
4228 tokenizer = character_reference_state;
4229 break;
4230 case 0x0000: // NULL
4231 attrvaluebuf += String.fromCharCode(0xFFFD /* REPLACEMENT CHARACTER */);
4232 break;
4233 case -1: // EOF
4234 emitEOF();
4235 break;
4236 case 0x000A: // LF
4237 // this could be a converted \r, so don't use getMatchingChars
4238 attrvaluebuf += String.fromCharCode(c);
4239 break;
4240 default:
4241 attrvaluebuf += getMatchingChars(DBLQUOTEATTRVAL);
4242 break;
4243 }
4244 }
4245
4246 function attribute_value_single_quoted_state(c) {
4247 switch(c) {
4248 case 0x0027: // APOSTROPHE
4249 addAttribute(attrnamebuf, attrvaluebuf);
4250 tokenizer = after_attribute_value_quoted_state;
4251 break;
4252 case 0x0026: // AMPERSAND
4253 return_state = attribute_value_single_quoted_state;
4254 tokenizer = character_reference_state;
4255 break;
4256 case 0x0000: // NULL
4257 attrvaluebuf += String.fromCharCode(0xFFFD /* REPLACEMENT CHARACTER */);
4258 break;
4259 case -1: // EOF
4260 emitEOF();
4261 break;
4262 case 0x000A: // LF
4263 // this could be a converted \r, so don't use getMatchingChars
4264 attrvaluebuf += String.fromCharCode(c);
4265 break;
4266 default:
4267 attrvaluebuf += getMatchingChars(SINGLEQUOTEATTRVAL);
4268 break;
4269 }
4270 }
4271
4272 function attribute_value_unquoted_state(c) {
4273 switch(c) {
4274 case 0x0009: // CHARACTER TABULATION (tab)
4275 case 0x000A: // LINE FEED (LF)
4276 case 0x000C: // FORM FEED (FF)
4277 case 0x0020: // SPACE
4278 addAttribute(attrnamebuf, attrvaluebuf);
4279 tokenizer = before_attribute_name_state;
4280 break;
4281 case 0x0026: // AMPERSAND
4282 return_state = attribute_value_unquoted_state;
4283 tokenizer = character_reference_state;
4284 break;
4285 case 0x003E: // GREATER-THAN SIGN
4286 addAttribute(attrnamebuf, attrvaluebuf);
4287 tokenizer = data_state;
4288 emitTag();
4289 break;
4290 case 0x0000: // NULL
4291 attrvaluebuf += String.fromCharCode(0xFFFD /* REPLACEMENT CHARACTER */);
4292 break;
4293 case -1: // EOF
4294 nextchar--; // pushback
4295 tokenizer = data_state;
4296 break;
4297 case 0x0022: // QUOTATION MARK
4298 case 0x0027: // APOSTROPHE
4299 case 0x003C: // LESS-THAN SIGN
4300 case 0x003D: // EQUALS SIGN
4301 case 0x0060: // GRAVE ACCENT
4302 /* falls through */
4303 default:
4304 attrvaluebuf += getMatchingChars(UNQUOTEDATTRVAL);
4305 break;
4306 }
4307 }
4308
4309 function after_attribute_value_quoted_state(c) {
4310 switch(c) {
4311 case 0x0009: // CHARACTER TABULATION (tab)
4312 case 0x000A: // LINE FEED (LF)
4313 case 0x000C: // FORM FEED (FF)
4314 case 0x0020: // SPACE
4315 tokenizer = before_attribute_name_state;
4316 break;
4317 case 0x002F: // SOLIDUS
4318 tokenizer = self_closing_start_tag_state;
4319 break;
4320 case 0x003E: // GREATER-THAN SIGN
4321 tokenizer = data_state;
4322 emitTag();
4323 break;
4324 case -1: // EOF
4325 emitEOF();
4326 break;
4327 default:
4328 reconsume(c, before_attribute_name_state);
4329 break;
4330 }
4331 }
4332
4333 function self_closing_start_tag_state(c) {
4334 switch(c) {
4335 case 0x003E: // GREATER-THAN SIGN
4336 // Set the <i>self-closing flag</i> of the current tag token.
4337 tokenizer = data_state;
4338 emitSelfClosingTag(true);
4339 break;
4340 case -1: // EOF
4341 emitEOF();
4342 break;
4343 default:
4344 reconsume(c, before_attribute_name_state);
4345 break;
4346 }
4347 }
4348
4349 function bogus_comment_state(c, lookahead, eof) {
4350 var len = lookahead.length;
4351
4352 if (eof) {
4353 nextchar += len-1; // don't consume the eof
4354 }
4355 else {
4356 nextchar += len;
4357 }
4358
4359 var comment = lookahead.substring(0, len-1);
4360
4361 comment = comment.replace(/\u0000/g,"\uFFFD");
4362 comment = comment.replace(/\u000D\u000A/g,"\u000A");
4363 comment = comment.replace(/\u000D/g,"\u000A");
4364
4365 insertToken(COMMENT, comment);
4366 tokenizer = data_state;
4367 }
4368 bogus_comment_state.lookahead = ">";
4369
4370 function markup_declaration_open_state(c, lookahead, eof) {
4371 if (lookahead[0] === "-" && lookahead[1] === "-") {
4372 nextchar += 2;
4373 beginComment();
4374 tokenizer = comment_start_state;
4375 return;
4376 }
4377
4378 if (lookahead.toUpperCase() === "DOCTYPE") {
4379 nextchar += 7;
4380 tokenizer = doctype_state;
4381 }
4382 else if (lookahead === "[CDATA[" && cdataAllowed()) {
4383 nextchar += 7;
4384 tokenizer = cdata_section_state;
4385 }
4386 else {
4387 tokenizer = bogus_comment_state;
4388 }
4389 }
4390 markup_declaration_open_state.lookahead = 7;
4391
4392 function comment_start_state(c) {
4393 beginComment();
4394 switch(c) {
4395 case 0x002D: // HYPHEN-MINUS
4396 tokenizer = comment_start_dash_state;
4397 break;
4398 case 0x003E: // GREATER-THAN SIGN
4399 tokenizer = data_state;
4400 insertToken(COMMENT, buf2str(commentbuf));
4401 break; /* see comment in comment end state */
4402 default:
4403 reconsume(c, comment_state);
4404 break;
4405 }
4406 }
4407
4408 function comment_start_dash_state(c) {
4409 switch(c) {
4410 case 0x002D: // HYPHEN-MINUS
4411 tokenizer = comment_end_state;
4412 break;
4413 case 0x003E: // GREATER-THAN SIGN
4414 tokenizer = data_state;
4415 insertToken(COMMENT, buf2str(commentbuf));
4416 break;
4417 case -1: // EOF
4418 insertToken(COMMENT, buf2str(commentbuf));
4419 emitEOF();
4420 break; /* see comment in comment end state */
4421 default:
4422 commentbuf.push(0x002D /* HYPHEN-MINUS */);
4423 reconsume(c, comment_state);
4424 break;
4425 }
4426 }
4427
4428 function comment_state(c) {
4429 switch(c) {
4430 case 0x003C: // LESS-THAN SIGN
4431 commentbuf.push(c);
4432 tokenizer = comment_less_than_sign_state;
4433 break;
4434 case 0x002D: // HYPHEN-MINUS
4435 tokenizer = comment_end_dash_state;
4436 break;
4437 case 0x0000: // NULL
4438 commentbuf.push(0xFFFD /* REPLACEMENT CHARACTER */);
4439 break;
4440 case -1: // EOF
4441 insertToken(COMMENT, buf2str(commentbuf));
4442 emitEOF();
4443 break; /* see comment in comment end state */
4444 default:
4445 commentbuf.push(c);
4446 break;
4447 }
4448 }
4449
4450 function comment_less_than_sign_state(c) {
4451 switch(c) {
4452 case 0x0021: // EXCLAMATION MARK
4453 commentbuf.push(c);
4454 tokenizer = comment_less_than_sign_bang_state;
4455 break;
4456 case 0x003C: // LESS-THAN SIGN
4457 commentbuf.push(c);
4458 break;
4459 default:
4460 reconsume(c, comment_state);
4461 break;
4462 }
4463 }
4464
4465 function comment_less_than_sign_bang_state(c) {
4466 switch(c) {
4467 case 0x002D: // HYPHEN-MINUS
4468 tokenizer = comment_less_than_sign_bang_dash_state;
4469 break;
4470 default:
4471 reconsume(c, comment_state);
4472 break;
4473 }
4474 }
4475
4476 function comment_less_than_sign_bang_dash_state(c) {
4477 switch(c) {
4478 case 0x002D: // HYPHEN-MINUS
4479 tokenizer = comment_less_than_sign_bang_dash_dash_state;
4480 break;
4481 default:
4482 reconsume(c, comment_end_dash_state);
4483 break;
4484 }
4485 }
4486
4487 function comment_less_than_sign_bang_dash_dash_state(c) {
4488 switch(c) {
4489 case 0x003E: // GREATER-THAN SIGN
4490 case -1: // EOF
4491 reconsume(c, comment_end_state);
4492 break;
4493 default:
4494 // parse error
4495 reconsume(c, comment_end_state);
4496 break;
4497 }
4498 }
4499
4500 function comment_end_dash_state(c) {
4501 switch(c) {
4502 case 0x002D: // HYPHEN-MINUS
4503 tokenizer = comment_end_state;
4504 break;
4505 case -1: // EOF
4506 insertToken(COMMENT, buf2str(commentbuf));
4507 emitEOF();
4508 break; /* see comment in comment end state */
4509 default:
4510 commentbuf.push(0x002D /* HYPHEN-MINUS */);
4511 reconsume(c, comment_state);
4512 break;
4513 }
4514 }
4515
4516 function comment_end_state(c) {
4517 switch(c) {
4518 case 0x003E: // GREATER-THAN SIGN
4519 tokenizer = data_state;
4520 insertToken(COMMENT, buf2str(commentbuf));
4521 break;
4522 case 0x0021: // EXCLAMATION MARK
4523 tokenizer = comment_end_bang_state;
4524 break;
4525 case 0x002D: // HYPHEN-MINUS
4526 commentbuf.push(0x002D);
4527 break;
4528 case -1: // EOF
4529 insertToken(COMMENT, buf2str(commentbuf));
4530 emitEOF();
4531 break; /* For security reasons: otherwise, hostile user could put a script in a comment e.g. in a blog comment and then DOS the server so that the end tag isn't read, and then the commented script tag would be treated as live code */
4532 default:
4533 commentbuf.push(0x002D);
4534 commentbuf.push(0x002D);
4535 reconsume(c, comment_state);
4536 break;
4537 }
4538 }
4539
4540 function comment_end_bang_state(c) {
4541 switch(c) {
4542 case 0x002D: // HYPHEN-MINUS
4543 commentbuf.push(0x002D);
4544 commentbuf.push(0x002D);
4545 commentbuf.push(0x0021);
4546 tokenizer = comment_end_dash_state;
4547 break;
4548 case 0x003E: // GREATER-THAN SIGN
4549 tokenizer = data_state;
4550 insertToken(COMMENT, buf2str(commentbuf));
4551 break;
4552 case -1: // EOF
4553 insertToken(COMMENT, buf2str(commentbuf));
4554 emitEOF();
4555 break; /* see comment in comment end state */
4556 default:
4557 commentbuf.push(0x002D);
4558 commentbuf.push(0x002D);
4559 commentbuf.push(0x0021);
4560 reconsume(c, comment_state);
4561 break;
4562 }
4563 }
4564
4565 function doctype_state(c) {
4566 switch(c) {
4567 case 0x0009: // CHARACTER TABULATION (tab)
4568 case 0x000A: // LINE FEED (LF)
4569 case 0x000C: // FORM FEED (FF)
4570 case 0x0020: // SPACE
4571 tokenizer = before_doctype_name_state;
4572 break;
4573 case -1: // EOF
4574 beginDoctype();
4575 forcequirks();
4576 emitDoctype();
4577 emitEOF();
4578 break;
4579 default:
4580 reconsume(c, before_doctype_name_state);
4581 break;
4582 }
4583 }
4584
4585 function before_doctype_name_state(c) {
4586 switch(c) {
4587 case 0x0009: // CHARACTER TABULATION (tab)
4588 case 0x000A: // LINE FEED (LF)
4589 case 0x000C: // FORM FEED (FF)
4590 case 0x0020: // SPACE
4591 /* Ignore the character. */
4592 break;
4593 case 0x0041: // [A-Z]
4594 case 0x0042:case 0x0043:case 0x0044:case 0x0045:case 0x0046:
4595 case 0x0047:case 0x0048:case 0x0049:case 0x004A:case 0x004B:
4596 case 0x004C:case 0x004D:case 0x004E:case 0x004F:case 0x0050:
4597 case 0x0051:case 0x0052:case 0x0053:case 0x0054:case 0x0055:
4598 case 0x0056:case 0x0057:case 0x0058:case 0x0059:case 0x005A:
4599 beginDoctype();
4600 doctypenamebuf.push(c + 0x0020);
4601 tokenizer = doctype_name_state;
4602 break;
4603 case 0x0000: // NULL
4604 beginDoctype();
4605 doctypenamebuf.push(0xFFFD);
4606 tokenizer = doctype_name_state;
4607 break;
4608 case 0x003E: // GREATER-THAN SIGN
4609 beginDoctype();
4610 forcequirks();
4611 tokenizer = data_state;
4612 emitDoctype();
4613 break;
4614 case -1: // EOF
4615 beginDoctype();
4616 forcequirks();
4617 emitDoctype();
4618 emitEOF();
4619 break;
4620 default:
4621 beginDoctype();
4622 doctypenamebuf.push(c);
4623 tokenizer = doctype_name_state;
4624 break;
4625 }
4626 }
4627
4628 function doctype_name_state(c) {
4629 switch(c) {
4630 case 0x0009: // CHARACTER TABULATION (tab)
4631 case 0x000A: // LINE FEED (LF)
4632 case 0x000C: // FORM FEED (FF)
4633 case 0x0020: // SPACE
4634 tokenizer = after_doctype_name_state;
4635 break;
4636 case 0x003E: // GREATER-THAN SIGN
4637 tokenizer = data_state;
4638 emitDoctype();
4639 break;
4640 case 0x0041: // [A-Z]
4641 case 0x0042:case 0x0043:case 0x0044:case 0x0045:case 0x0046:
4642 case 0x0047:case 0x0048:case 0x0049:case 0x004A:case 0x004B:
4643 case 0x004C:case 0x004D:case 0x004E:case 0x004F:case 0x0050:
4644 case 0x0051:case 0x0052:case 0x0053:case 0x0054:case 0x0055:
4645 case 0x0056:case 0x0057:case 0x0058:case 0x0059:case 0x005A:
4646 doctypenamebuf.push(c + 0x0020);
4647 break;
4648 case 0x0000: // NULL
4649 doctypenamebuf.push(0xFFFD /* REPLACEMENT CHARACTER */);
4650 break;
4651 case -1: // EOF
4652 forcequirks();
4653 emitDoctype();
4654 emitEOF();
4655 break;
4656 default:
4657 doctypenamebuf.push(c);
4658 break;
4659 }
4660 }
4661
4662 function after_doctype_name_state(c, lookahead, eof) {
4663 switch(c) {
4664 case 0x0009: // CHARACTER TABULATION (tab)
4665 case 0x000A: // LINE FEED (LF)
4666 case 0x000C: // FORM FEED (FF)
4667 case 0x0020: // SPACE
4668 /* Ignore the character. */
4669 nextchar += 1;
4670 break;
4671 case 0x003E: // GREATER-THAN SIGN
4672 tokenizer = data_state;
4673 nextchar += 1;
4674 emitDoctype();
4675 break;
4676 case -1: // EOF
4677 forcequirks();
4678 emitDoctype();
4679 emitEOF();
4680 break;
4681 default:
4682 lookahead = lookahead.toUpperCase();
4683 if (lookahead === "PUBLIC") {
4684 nextchar += 6;
4685 tokenizer = after_doctype_public_keyword_state;
4686 }
4687 else if (lookahead === "SYSTEM") {
4688 nextchar += 6;
4689 tokenizer = after_doctype_system_keyword_state;
4690 }
4691 else {
4692 forcequirks();
4693 tokenizer = bogus_doctype_state;
4694 }
4695 break;
4696 }
4697 }
4698 after_doctype_name_state.lookahead = 6;
4699
4700 function after_doctype_public_keyword_state(c) {
4701 switch(c) {
4702 case 0x0009: // CHARACTER TABULATION (tab)
4703 case 0x000A: // LINE FEED (LF)
4704 case 0x000C: // FORM FEED (FF)
4705 case 0x0020: // SPACE
4706 tokenizer = before_doctype_public_identifier_state;
4707 break;
4708 case 0x0022: // QUOTATION MARK
4709 beginDoctypePublicId();
4710 tokenizer = doctype_public_identifier_double_quoted_state;
4711 break;
4712 case 0x0027: // APOSTROPHE
4713 beginDoctypePublicId();
4714 tokenizer = doctype_public_identifier_single_quoted_state;
4715 break;
4716 case 0x003E: // GREATER-THAN SIGN
4717 forcequirks();
4718 tokenizer = data_state;
4719 emitDoctype();
4720 break;
4721 case -1: // EOF
4722 forcequirks();
4723 emitDoctype();
4724 emitEOF();
4725 break;
4726 default:
4727 forcequirks();
4728 tokenizer = bogus_doctype_state;
4729 break;
4730 }
4731 }
4732
4733 function before_doctype_public_identifier_state(c) {
4734 switch(c) {
4735 case 0x0009: // CHARACTER TABULATION (tab)
4736 case 0x000A: // LINE FEED (LF)
4737 case 0x000C: // FORM FEED (FF)
4738 case 0x0020: // SPACE
4739 /* Ignore the character. */
4740 break;
4741 case 0x0022: // QUOTATION MARK
4742 beginDoctypePublicId();
4743 tokenizer = doctype_public_identifier_double_quoted_state;
4744 break;
4745 case 0x0027: // APOSTROPHE
4746 beginDoctypePublicId();
4747 tokenizer = doctype_public_identifier_single_quoted_state;
4748 break;
4749 case 0x003E: // GREATER-THAN SIGN
4750 forcequirks();
4751 tokenizer = data_state;
4752 emitDoctype();
4753 break;
4754 case -1: // EOF
4755 forcequirks();
4756 emitDoctype();
4757 emitEOF();
4758 break;
4759 default:
4760 forcequirks();
4761 tokenizer = bogus_doctype_state;
4762 break;
4763 }
4764 }
4765
4766 function doctype_public_identifier_double_quoted_state(c) {
4767 switch(c) {
4768 case 0x0022: // QUOTATION MARK
4769 tokenizer = after_doctype_public_identifier_state;
4770 break;
4771 case 0x0000: // NULL
4772 doctypepublicbuf.push(0xFFFD /* REPLACEMENT CHARACTER */);
4773 break;
4774 case 0x003E: // GREATER-THAN SIGN
4775 forcequirks();
4776 tokenizer = data_state;
4777 emitDoctype();
4778 break;
4779 case -1: // EOF
4780 forcequirks();
4781 emitDoctype();
4782 emitEOF();
4783 break;
4784 default:
4785 doctypepublicbuf.push(c);
4786 break;
4787 }
4788 }
4789
4790 function doctype_public_identifier_single_quoted_state(c) {
4791 switch(c) {
4792 case 0x0027: // APOSTROPHE
4793 tokenizer = after_doctype_public_identifier_state;
4794 break;
4795 case 0x0000: // NULL
4796 doctypepublicbuf.push(0xFFFD /* REPLACEMENT CHARACTER */);
4797 break;
4798 case 0x003E: // GREATER-THAN SIGN
4799 forcequirks();
4800 tokenizer = data_state;
4801 emitDoctype();
4802 break;
4803 case -1: // EOF
4804 forcequirks();
4805 emitDoctype();
4806 emitEOF();
4807 break;
4808 default:
4809 doctypepublicbuf.push(c);
4810 break;
4811 }
4812 }
4813
4814 function after_doctype_public_identifier_state(c) {
4815 switch(c) {
4816 case 0x0009: // CHARACTER TABULATION (tab)
4817 case 0x000A: // LINE FEED (LF)
4818 case 0x000C: // FORM FEED (FF)
4819 case 0x0020: // SPACE
4820 tokenizer = between_doctype_public_and_system_identifiers_state;
4821 break;
4822 case 0x003E: // GREATER-THAN SIGN
4823 tokenizer = data_state;
4824 emitDoctype();
4825 break;
4826 case 0x0022: // QUOTATION MARK
4827 beginDoctypeSystemId();
4828 tokenizer = doctype_system_identifier_double_quoted_state;
4829 break;
4830 case 0x0027: // APOSTROPHE
4831 beginDoctypeSystemId();
4832 tokenizer = doctype_system_identifier_single_quoted_state;
4833 break;
4834 case -1: // EOF
4835 forcequirks();
4836 emitDoctype();
4837 emitEOF();
4838 break;
4839 default:
4840 forcequirks();
4841 tokenizer = bogus_doctype_state;
4842 break;
4843 }
4844 }
4845
4846 function between_doctype_public_and_system_identifiers_state(c) {
4847 switch(c) {
4848 case 0x0009: // CHARACTER TABULATION (tab)
4849 case 0x000A: // LINE FEED (LF)
4850 case 0x000C: // FORM FEED (FF)
4851 case 0x0020: // SPACE Ignore the character.
4852 break;
4853 case 0x003E: // GREATER-THAN SIGN
4854 tokenizer = data_state;
4855 emitDoctype();
4856 break;
4857 case 0x0022: // QUOTATION MARK
4858 beginDoctypeSystemId();
4859 tokenizer = doctype_system_identifier_double_quoted_state;
4860 break;
4861 case 0x0027: // APOSTROPHE
4862 beginDoctypeSystemId();
4863 tokenizer = doctype_system_identifier_single_quoted_state;
4864 break;
4865 case -1: // EOF
4866 forcequirks();
4867 emitDoctype();
4868 emitEOF();
4869 break;
4870 default:
4871 forcequirks();
4872 tokenizer = bogus_doctype_state;
4873 break;
4874 }
4875 }
4876
4877 function after_doctype_system_keyword_state(c) {
4878 switch(c) {
4879 case 0x0009: // CHARACTER TABULATION (tab)
4880 case 0x000A: // LINE FEED (LF)
4881 case 0x000C: // FORM FEED (FF)
4882 case 0x0020: // SPACE
4883 tokenizer = before_doctype_system_identifier_state;
4884 break;
4885 case 0x0022: // QUOTATION MARK
4886 beginDoctypeSystemId();
4887 tokenizer = doctype_system_identifier_double_quoted_state;
4888 break;
4889 case 0x0027: // APOSTROPHE
4890 beginDoctypeSystemId();
4891 tokenizer = doctype_system_identifier_single_quoted_state;
4892 break;
4893 case 0x003E: // GREATER-THAN SIGN
4894 forcequirks();
4895 tokenizer = data_state;
4896 emitDoctype();
4897 break;
4898 case -1: // EOF
4899 forcequirks();
4900 emitDoctype();
4901 emitEOF();
4902 break;
4903 default:
4904 forcequirks();
4905 tokenizer = bogus_doctype_state;
4906 break;
4907 }
4908 }
4909
4910 function before_doctype_system_identifier_state(c) {
4911 switch(c) {
4912 case 0x0009: // CHARACTER TABULATION (tab)
4913 case 0x000A: // LINE FEED (LF)
4914 case 0x000C: // FORM FEED (FF)
4915 case 0x0020: // SPACE Ignore the character.
4916 break;
4917 case 0x0022: // QUOTATION MARK
4918 beginDoctypeSystemId();
4919 tokenizer = doctype_system_identifier_double_quoted_state;
4920 break;
4921 case 0x0027: // APOSTROPHE
4922 beginDoctypeSystemId();
4923 tokenizer = doctype_system_identifier_single_quoted_state;
4924 break;
4925 case 0x003E: // GREATER-THAN SIGN
4926 forcequirks();
4927 tokenizer = data_state;
4928 emitDoctype();
4929 break;
4930 case -1: // EOF
4931 forcequirks();
4932 emitDoctype();
4933 emitEOF();
4934 break;
4935 default:
4936 forcequirks();
4937 tokenizer = bogus_doctype_state;
4938 break;
4939 }
4940 }
4941
4942 function doctype_system_identifier_double_quoted_state(c) {
4943 switch(c) {
4944 case 0x0022: // QUOTATION MARK
4945 tokenizer = after_doctype_system_identifier_state;
4946 break;
4947 case 0x0000: // NULL
4948 doctypesystembuf.push(0xFFFD /* REPLACEMENT CHARACTER */);
4949 break;
4950 case 0x003E: // GREATER-THAN SIGN
4951 forcequirks();
4952 tokenizer = data_state;
4953 emitDoctype();
4954 break;
4955 case -1: // EOF
4956 forcequirks();
4957 emitDoctype();
4958 emitEOF();
4959 break;
4960 default:
4961 doctypesystembuf.push(c);
4962 break;
4963 }
4964 }
4965
4966 function doctype_system_identifier_single_quoted_state(c) {
4967 switch(c) {
4968 case 0x0027: // APOSTROPHE
4969 tokenizer = after_doctype_system_identifier_state;
4970 break;
4971 case 0x0000: // NULL
4972 doctypesystembuf.push(0xFFFD /* REPLACEMENT CHARACTER */);
4973 break;
4974 case 0x003E: // GREATER-THAN SIGN
4975 forcequirks();
4976 tokenizer = data_state;
4977 emitDoctype();
4978 break;
4979 case -1: // EOF
4980 forcequirks();
4981 emitDoctype();
4982 emitEOF();
4983 break;
4984 default:
4985 doctypesystembuf.push(c);
4986 break;
4987 }
4988 }
4989
4990 function after_doctype_system_identifier_state(c) {
4991 switch(c) {
4992 case 0x0009: // CHARACTER TABULATION (tab)
4993 case 0x000A: // LINE FEED (LF)
4994 case 0x000C: // FORM FEED (FF)
4995 case 0x0020: // SPACE
4996 /* Ignore the character. */
4997 break;
4998 case 0x003E: // GREATER-THAN SIGN
4999 tokenizer = data_state;
5000 emitDoctype();
5001 break;
5002 case -1: // EOF
5003 forcequirks();
5004 emitDoctype();
5005 emitEOF();
5006 break;
5007 default:
5008 tokenizer = bogus_doctype_state;
5009 /* This does *not* set the DOCTYPE token's force-quirks flag. */
5010 break;
5011 }
5012 }
5013
5014 function bogus_doctype_state(c) {
5015 switch(c) {
5016 case 0x003E: // GREATER-THAN SIGN
5017 tokenizer = data_state;
5018 emitDoctype();
5019 break;
5020 case -1: // EOF
5021 emitDoctype();
5022 emitEOF();
5023 break;
5024 default:
5025 /* Ignore the character. */
5026 break;
5027 }
5028 }
5029
5030 function cdata_section_state(c) {
5031 switch(c) {
5032 case 0x005D: // RIGHT SQUARE BRACKET
5033 tokenizer = cdata_section_bracket_state;
5034 break;
5035 case -1: // EOF
5036 emitEOF();
5037 break;
5038 case 0x0000: // NULL
5039 textIncludesNUL = true;
5040 /* fall through */
5041 default:
5042 // Instead of just pushing a single character and then
5043 // coming back to the very same place, lookahead and
5044 // emit everything we can at once.
5045 /*jshint -W030 */
5046 emitCharsWhile(CDATATEXT) || textrun.push(c);
5047 break;
5048 }
5049 }
5050
5051 function cdata_section_bracket_state(c) {
5052 switch(c) {
5053 case 0x005D: // RIGHT SQUARE BRACKET
5054 tokenizer = cdata_section_end_state;
5055 break;
5056 default:
5057 textrun.push(0x005D);
5058 reconsume(c, cdata_section_state);
5059 break;
5060 }
5061 }
5062
5063 function cdata_section_end_state(c) {
5064 switch(c) {
5065 case 0x005D: // RIGHT SQUARE BRACKET
5066 textrun.push(0x005D);
5067 break;
5068 case 0x003E: // GREATER-THAN SIGN
5069 flushText();
5070 tokenizer = data_state;
5071 break;
5072 default:
5073 textrun.push(0x005D);
5074 textrun.push(0x005D);
5075 reconsume(c, cdata_section_state);
5076 break;
5077 }
5078 }
5079
5080 function character_reference_state(c) {
5081 beginTempBuf();
5082 tempbuf.push(0x0026);
5083 switch(c) {
5084 case 0x0009: // TAB
5085 case 0x000A: // LINE FEED
5086 case 0x000C: // FORM FEED
5087 case 0x0020: // SPACE
5088 case 0x003C: // LESS-THAN SIGN
5089 case 0x0026: // AMPERSAND
5090 case -1: // EOF
5091 reconsume(c, character_reference_end_state);
5092 break;
5093 case 0x0023: // NUMBER SIGN
5094 tempbuf.push(c);
5095 tokenizer = numeric_character_reference_state;
5096 break;
5097 default:
5098 reconsume(c, named_character_reference_state);
5099 break;
5100 }
5101 }
5102
5103 function named_character_reference_state(c) {
5104 NAMEDCHARREF.lastIndex = nextchar; // w/ lookahead no char has been consumed
5105 var matched = NAMEDCHARREF.exec(chars);
5106 if (!matched) throw new Error("should never happen");
5107 var name = matched[1];
5108 if (!name) {
5109 // If no match can be made, switch to the character reference end state
5110 tokenizer = character_reference_end_state;
5111 return;
5112 }
5113
5114 // Consume the matched characters and append them to temporary buffer
5115 nextchar += name.length;
5116 pushAll(tempbuf, str2buf(name));
5117
5118 switch(return_state) {
5119 case attribute_value_double_quoted_state:
5120 case attribute_value_single_quoted_state:
5121 case attribute_value_unquoted_state:
5122 // If the character reference was consumed as part of an attribute...
5123 if (name[name.length-1] !== ';') { // ...and the last char is not ;
5124 if (/[=A-Za-z0-9]/.test(chars[nextchar])) {
5125 tokenizer = character_reference_end_state;
5126 return;
5127 }
5128 }
5129 break;
5130 default:
5131 break;
5132 }
5133
5134 beginTempBuf();
5135 var rv = namedCharRefs[name];
5136 if (typeof rv === 'number') {
5137 tempbuf.push(rv);
5138 } else {
5139 pushAll(tempbuf, rv);
5140 }
5141 tokenizer = character_reference_end_state;
5142 }
5143 // We might need to pause tokenization until we have enough characters
5144 // in the buffer for longest possible character reference.
5145 named_character_reference_state.lookahead = -NAMEDCHARREF_MAXLEN;
5146
5147 function numeric_character_reference_state(c) {
5148 character_reference_code = 0;
5149 switch(c) {
5150 case 0x0078: // x
5151 case 0x0058: // X
5152 tempbuf.push(c);
5153 tokenizer = hexadecimal_character_reference_start_state;
5154 break;
5155 default:
5156 reconsume(c, decimal_character_reference_start_state);
5157 break;
5158 }
5159 }
5160
5161 function hexadecimal_character_reference_start_state(c) {
5162 switch(c) {
5163 case 0x0030: case 0x0031: case 0x0032: case 0x0033: case 0x0034:
5164 case 0x0035: case 0x0036: case 0x0037: case 0x0038: case 0x0039: // [0-9]
5165 case 0x0041: case 0x0042: case 0x0043: case 0x0044: case 0x0045:
5166 case 0x0046: // [A-F]
5167 case 0x0061: case 0x0062: case 0x0063: case 0x0064: case 0x0065:
5168 case 0x0066: // [a-f]
5169 reconsume(c, hexadecimal_character_reference_state);
5170 break;
5171 default:
5172 reconsume(c, character_reference_end_state);
5173 break;
5174 }
5175 }
5176
5177 function decimal_character_reference_start_state(c) {
5178 switch(c) {
5179 case 0x0030: case 0x0031: case 0x0032: case 0x0033: case 0x0034:
5180 case 0x0035: case 0x0036: case 0x0037: case 0x0038: case 0x0039: // [0-9]
5181 reconsume(c, decimal_character_reference_state);
5182 break;
5183 default:
5184 reconsume(c, character_reference_end_state);
5185 break;
5186 }
5187 }
5188
5189 function hexadecimal_character_reference_state(c) {
5190 switch(c) {
5191 case 0x0041: case 0x0042: case 0x0043: case 0x0044: case 0x0045:
5192 case 0x0046: // [A-F]
5193 character_reference_code *= 16;
5194 character_reference_code += (c - 0x0037);
5195 break;
5196 case 0x0061: case 0x0062: case 0x0063: case 0x0064: case 0x0065:
5197 case 0x0066: // [a-f]
5198 character_reference_code *= 16;
5199 character_reference_code += (c - 0x0057);
5200 break;
5201 case 0x0030: case 0x0031: case 0x0032: case 0x0033: case 0x0034:
5202 case 0x0035: case 0x0036: case 0x0037: case 0x0038: case 0x0039: // [0-9]
5203 character_reference_code *= 16;
5204 character_reference_code += (c - 0x0030);
5205 break;
5206 case 0x003B: // SEMICOLON
5207 tokenizer = numeric_character_reference_end_state;
5208 break;
5209 default:
5210 reconsume(c, numeric_character_reference_end_state);
5211 break;
5212 }
5213 }
5214
5215 function decimal_character_reference_state(c) {
5216 switch(c) {
5217 case 0x0030: case 0x0031: case 0x0032: case 0x0033: case 0x0034:
5218 case 0x0035: case 0x0036: case 0x0037: case 0x0038: case 0x0039: // [0-9]
5219 character_reference_code *= 10;
5220 character_reference_code += (c - 0x0030);
5221 break;
5222 case 0x003B: // SEMICOLON
5223 tokenizer = numeric_character_reference_end_state;
5224 break;
5225 default:
5226 reconsume(c, numeric_character_reference_end_state);
5227 break;
5228 }
5229 }
5230
5231 function numeric_character_reference_end_state(c) {
5232 if (character_reference_code in numericCharRefReplacements) {
5233 character_reference_code = numericCharRefReplacements[character_reference_code];
5234 } else if (character_reference_code > 0x10FFFF || (character_reference_code >= 0xD800 && character_reference_code < 0xE000)) {
5235 character_reference_code = 0xFFFD;
5236 }
5237
5238 beginTempBuf();
5239 if (character_reference_code <= 0xFFFF) {
5240 tempbuf.push(character_reference_code);
5241 } else {
5242 character_reference_code = character_reference_code - 0x10000;
5243 /* jshint bitwise: false */
5244 tempbuf.push(0xD800 + (character_reference_code >> 10));
5245 tempbuf.push(0xDC00 + (character_reference_code & 0x03FF));
5246 }
5247 reconsume(c, character_reference_end_state);
5248 }
5249
5250 function character_reference_end_state(c) {
5251 switch(return_state) {
5252 case attribute_value_double_quoted_state:
5253 case attribute_value_single_quoted_state:
5254 case attribute_value_unquoted_state:
5255 // append each character to the current attribute's value
5256 attrvaluebuf += buf2str(tempbuf);
5257 break;
5258 default:
5259 pushAll(textrun, tempbuf);
5260 break;
5261 }
5262 reconsume(c, return_state);
5263 }
5264
5265 /***
5266 * The tree builder insertion modes
5267 */
5268
5269 // 11.2.5.4.1 The "initial" insertion mode
5270 function initial_mode(t, value, arg3, arg4) {
5271 switch(t) {
5272 case 1: // TEXT
5273 value = value.replace(LEADINGWS, ""); // Ignore spaces
5274 if (value.length === 0) return; // Are we done?
5275 break; // Handle anything non-space text below
5276 case 4: // COMMENT
5277 doc._appendChild(doc.createComment(value));
5278 return;
5279 case 5: // DOCTYPE
5280 var name = value;
5281 var publicid = arg3;
5282 var systemid = arg4;
5283 // Use the constructor directly instead of
5284 // implementation.createDocumentType because the create
5285 // function throws errors on invalid characters, and
5286 // we don't want the parser to throw them.
5287 doc.appendChild(new DocumentType(name,publicid, systemid));
5288
5289 // Note that there is no public API for setting quirks mode We can
5290 // do this here because we have access to implementation details
5291 if (force_quirks ||
5292 name.toLowerCase() !== "html" ||
5293 quirkyPublicIds.test(publicid) ||
5294 (systemid && systemid.toLowerCase() === quirkySystemId) ||
5295 (systemid === undefined &&
5296 conditionallyQuirkyPublicIds.test(publicid)))
5297 doc._quirks = true;
5298 else if (limitedQuirkyPublicIds.test(publicid) ||
5299 (systemid !== undefined &&
5300 conditionallyQuirkyPublicIds.test(publicid)))
5301 doc._limitedQuirks = true;
5302 parser = before_html_mode;
5303 return;
5304 }
5305
5306 // tags or non-whitespace text
5307 doc._quirks = true;
5308 parser = before_html_mode;
5309 parser(t,value,arg3,arg4);
5310 }
5311
5312 // 11.2.5.4.2 The "before html" insertion mode
5313 function before_html_mode(t,value,arg3,arg4) {
5314 var elt;
5315 switch(t) {
5316 case 1: // TEXT
5317 value = value.replace(LEADINGWS, ""); // Ignore spaces
5318 if (value.length === 0) return; // Are we done?
5319 break; // Handle anything non-space text below
5320 case 5: // DOCTYPE
5321 /* ignore the token */
5322 return;
5323 case 4: // COMMENT
5324 doc._appendChild(doc.createComment(value));
5325 return;
5326 case 2: // TAG
5327 if (value === "html") {
5328 elt = createHTMLElt(doc, value, arg3);
5329 stack.push(elt);
5330 doc.appendChild(elt);
5331 // XXX: handle application cache here
5332 parser = before_head_mode;
5333 return;
5334 }
5335 break;
5336 case 3: // ENDTAG
5337 switch(value) {
5338 case "html":
5339 case "head":
5340 case "body":
5341 case "br":
5342 break; // fall through on these
5343 default:
5344 return; // ignore most end tags
5345 }
5346 }
5347
5348 // Anything that didn't get handled above is handled like this:
5349 elt = createHTMLElt(doc, "html", null);
5350 stack.push(elt);
5351 doc.appendChild(elt);
5352 // XXX: handle application cache here
5353 parser = before_head_mode;
5354 parser(t,value,arg3,arg4);
5355 }
5356
5357 // 11.2.5.4.3 The "before head" insertion mode
5358 function before_head_mode(t,value,arg3,arg4) {
5359 switch(t) {
5360 case 1: // TEXT
5361 value = value.replace(LEADINGWS, ""); // Ignore spaces
5362 if (value.length === 0) return; // Are we done?
5363 break; // Handle anything non-space text below
5364 case 5: // DOCTYPE
5365 /* ignore the token */
5366 return;
5367 case 4: // COMMENT
5368 insertComment(value);
5369 return;
5370 case 2: // TAG
5371 switch(value) {
5372 case "html":
5373 in_body_mode(t,value,arg3,arg4);
5374 return;
5375 case "head":
5376 var elt = insertHTMLElement(value, arg3);
5377 head_element_pointer = elt;
5378 parser = in_head_mode;
5379 return;
5380 }
5381 break;
5382 case 3: // ENDTAG
5383 switch(value) {
5384 case "html":
5385 case "head":
5386 case "body":
5387 case "br":
5388 break;
5389 default:
5390 return; // ignore most end tags
5391 }
5392 }
5393
5394 // If not handled explicitly above
5395 before_head_mode(TAG, "head", null); // create a head tag
5396 parser(t, value, arg3, arg4); // then try again with this token
5397 }
5398
5399 function in_head_mode(t, value, arg3, arg4) {
5400 switch(t) {
5401 case 1: // TEXT
5402 var ws = value.match(LEADINGWS);
5403 if (ws) {
5404 insertText(ws[0]);
5405 value = value.substring(ws[0].length);
5406 }
5407 if (value.length === 0) return;
5408 break; // Handle non-whitespace below
5409 case 4: // COMMENT
5410 insertComment(value);
5411 return;
5412 case 5: // DOCTYPE
5413 return;
5414 case 2: // TAG
5415 switch(value) {
5416 case "html":
5417 in_body_mode(t, value, arg3, arg4);
5418 return;
5419 case "meta":
5420 // XXX:
5421 // May need to change the encoding based on this tag
5422 /* falls through */
5423 case "base":
5424 case "basefont":
5425 case "bgsound":
5426 case "link":
5427 insertHTMLElement(value, arg3);
5428 stack.pop();
5429 return;
5430 case "title":
5431 parseRCDATA(value, arg3);
5432 return;
5433 case "noscript":
5434 if (!scripting_enabled) {
5435 insertHTMLElement(value, arg3);
5436 parser = in_head_noscript_mode;
5437 return;
5438 }
5439 // Otherwise, if scripting is enabled...
5440 /* falls through */
5441 case "noframes":
5442 case "style":
5443 parseRawText(value,arg3);
5444 return;
5445 case "script":
5446 insertElement(function(doc) {
5447 var elt = createHTMLElt(doc, value, arg3);
5448 elt._parser_inserted = true;
5449 elt._force_async = false;
5450 if (fragment) elt._already_started = true;
5451 flushText();
5452 return elt;
5453 });
5454 tokenizer = script_data_state;
5455 originalInsertionMode = parser;
5456 parser = text_mode;
5457 return;
5458 case "template":
5459 insertHTMLElement(value, arg3);
5460 afe.insertMarker();
5461 frameset_ok = false;
5462 parser = in_template_mode;
5463 templateInsertionModes.push(parser);
5464 return;
5465 case "head":
5466 return; // ignore it
5467 }
5468 break;
5469 case 3: // ENDTAG
5470 switch(value) {
5471 case "head":
5472 stack.pop();
5473 parser = after_head_mode;
5474 return;
5475 case "body":
5476 case "html":
5477 case "br":
5478 break; // handle these at the bottom of the function
5479 case "template":
5480 if (!stack.contains("template")) {
5481 return;
5482 }
5483 stack.generateImpliedEndTags(null, "thorough");
5484 stack.popTag("template");
5485 afe.clearToMarker();
5486 templateInsertionModes.pop();
5487 resetInsertionMode();
5488 return;
5489 default:
5490 // ignore any other end tag
5491 return;
5492 }
5493 break;
5494 }
5495
5496 // If not handled above
5497 in_head_mode(ENDTAG, "head", null); // synthetic </head>
5498 parser(t, value, arg3, arg4); // Then redo this one
5499 }
5500
5501 // 13.2.5.4.5 The "in head noscript" insertion mode
5502 function in_head_noscript_mode(t, value, arg3, arg4) {
5503 switch(t) {
5504 case 5: // DOCTYPE
5505 return;
5506 case 4: // COMMENT
5507 in_head_mode(t, value);
5508 return;
5509 case 1: // TEXT
5510 var ws = value.match(LEADINGWS);
5511 if (ws) {
5512 in_head_mode(t, ws[0]);
5513 value = value.substring(ws[0].length);
5514 }
5515 if (value.length === 0) return; // no more text
5516 break; // Handle non-whitespace below
5517 case 2: // TAG
5518 switch(value) {
5519 case "html":
5520 in_body_mode(t, value, arg3, arg4);
5521 return;
5522 case "basefont":
5523 case "bgsound":
5524 case "link":
5525 case "meta":
5526 case "noframes":
5527 case "style":
5528 in_head_mode(t, value, arg3);
5529 return;
5530 case "head":
5531 case "noscript":
5532 return;
5533 }
5534 break;
5535 case 3: // ENDTAG
5536 switch(value) {
5537 case "noscript":
5538 stack.pop();
5539 parser = in_head_mode;
5540 return;
5541 case "br":
5542 break; // goes to the outer default
5543 default:
5544 return; // ignore other end tags
5545 }
5546 break;
5547 }
5548
5549 // If not handled above
5550 in_head_noscript_mode(ENDTAG, "noscript", null);
5551 parser(t, value, arg3, arg4);
5552 }
5553
5554 function after_head_mode(t, value, arg3, arg4) {
5555 switch(t) {
5556 case 1: // TEXT
5557 var ws = value.match(LEADINGWS);
5558 if (ws) {
5559 insertText(ws[0]);
5560 value = value.substring(ws[0].length);
5561 }
5562 if (value.length === 0) return;
5563 break; // Handle non-whitespace below
5564 case 4: // COMMENT
5565 insertComment(value);
5566 return;
5567 case 5: // DOCTYPE
5568 return;
5569 case 2: // TAG
5570 switch(value) {
5571 case "html":
5572 in_body_mode(t, value, arg3, arg4);
5573 return;
5574 case "body":
5575 insertHTMLElement(value, arg3);
5576 frameset_ok = false;
5577 parser = in_body_mode;
5578 return;
5579 case "frameset":
5580 insertHTMLElement(value, arg3);
5581 parser = in_frameset_mode;
5582 return;
5583 case "base":
5584 case "basefont":
5585 case "bgsound":
5586 case "link":
5587 case "meta":
5588 case "noframes":
5589 case "script":
5590 case "style":
5591 case "template":
5592 case "title":
5593 stack.push(head_element_pointer);
5594 in_head_mode(TAG, value, arg3);
5595 stack.removeElement(head_element_pointer);
5596 return;
5597 case "head":
5598 return;
5599 }
5600 break;
5601 case 3: // ENDTAG
5602 switch(value) {
5603 case "template":
5604 return in_head_mode(t, value, arg3, arg4);
5605 case "body":
5606 case "html":
5607 case "br":
5608 break;
5609 default:
5610 return; // ignore any other end tag
5611 }
5612 break;
5613 }
5614
5615 after_head_mode(TAG, "body", null);
5616 frameset_ok = true;
5617 parser(t, value, arg3, arg4);
5618 }
5619
5620 // 13.2.5.4.7 The "in body" insertion mode
5621 function in_body_mode(t,value,arg3,arg4) {
5622 var body, i, node, elt;
5623 switch(t) {
5624 case 1: // TEXT
5625 if (textIncludesNUL) {
5626 value = value.replace(NULCHARS, "");
5627 if (value.length === 0) return;
5628 }
5629 // If any non-space characters
5630 if (frameset_ok && NONWS.test(value))
5631 frameset_ok = false;
5632 afereconstruct();
5633 insertText(value);
5634 return;
5635 case 5: // DOCTYPE
5636 return;
5637 case 4: // COMMENT
5638 insertComment(value);
5639 return;
5640 case -1: // EOF
5641 if (templateInsertionModes.length) {
5642 return in_template_mode(t);
5643 }
5644 stopParsing();
5645 return;
5646 case 2: // TAG
5647 switch(value) {
5648 case "html":
5649 if (stack.contains("template")) {
5650 return;
5651 }
5652 transferAttributes(arg3, stack.elements[0]);
5653 return;
5654 case "base":
5655 case "basefont":
5656 case "bgsound":
5657 case "link":
5658 case "meta":
5659 case "noframes":
5660 case "script":
5661 case "style":
5662 case "template":
5663 case "title":
5664 in_head_mode(TAG, value, arg3);
5665 return;
5666 case "body":
5667 body = stack.elements[1];
5668 if (!body || !(body instanceof impl.HTMLBodyElement) ||
5669 stack.contains("template"))
5670 return;
5671 frameset_ok = false;
5672 transferAttributes(arg3, body);
5673 return;
5674 case "frameset":
5675 if (!frameset_ok) return;
5676 body = stack.elements[1];
5677 if (!body || !(body instanceof impl.HTMLBodyElement))
5678 return;
5679 if (body.parentNode) body.parentNode.removeChild(body);
5680 while(!(stack.top instanceof impl.HTMLHtmlElement))
5681 stack.pop();
5682 insertHTMLElement(value, arg3);
5683 parser = in_frameset_mode;
5684 return;
5685
5686 case "address":
5687 case "article":
5688 case "aside":
5689 case "blockquote":
5690 case "center":
5691 case "details":
5692 case "dialog":
5693 case "dir":
5694 case "div":
5695 case "dl":
5696 case "fieldset":
5697 case "figcaption":
5698 case "figure":
5699 case "footer":
5700 case "header":
5701 case "hgroup":
5702 case "main":
5703 case "nav":
5704 case "ol":
5705 case "p":
5706 case "section":
5707 case "summary":
5708 case "ul":
5709 if (stack.inButtonScope("p")) in_body_mode(ENDTAG, "p");
5710 insertHTMLElement(value, arg3);
5711 return;
5712
5713 case "menu":
5714 if (stack.inButtonScope("p")) in_body_mode(ENDTAG, "p");
5715 if (isA(stack.top, 'menuitem')) {
5716 stack.pop();
5717 }
5718 insertHTMLElement(value, arg3);
5719 return;
5720
5721 case "h1":
5722 case "h2":
5723 case "h3":
5724 case "h4":
5725 case "h5":
5726 case "h6":
5727 if (stack.inButtonScope("p")) in_body_mode(ENDTAG, "p");
5728 if (stack.top instanceof impl.HTMLHeadingElement)
5729 stack.pop();
5730 insertHTMLElement(value, arg3);
5731 return;
5732
5733 case "pre":
5734 case "listing":
5735 if (stack.inButtonScope("p")) in_body_mode(ENDTAG, "p");
5736 insertHTMLElement(value, arg3);
5737 ignore_linefeed = true;
5738 frameset_ok = false;
5739 return;
5740
5741 case "form":
5742 if (form_element_pointer && !stack.contains("template")) return;
5743 if (stack.inButtonScope("p")) in_body_mode(ENDTAG, "p");
5744 elt = insertHTMLElement(value, arg3);
5745 if (!stack.contains("template"))
5746 form_element_pointer = elt;
5747 return;
5748
5749 case "li":
5750 frameset_ok = false;
5751 for(i = stack.elements.length-1; i >= 0; i--) {
5752 node = stack.elements[i];
5753 if (node instanceof impl.HTMLLIElement) {
5754 in_body_mode(ENDTAG, "li");
5755 break;
5756 }
5757 if (isA(node, specialSet) && !isA(node, addressdivpSet))
5758 break;
5759 }
5760 if (stack.inButtonScope("p")) in_body_mode(ENDTAG, "p");
5761 insertHTMLElement(value, arg3);
5762 return;
5763
5764 case "dd":
5765 case "dt":
5766 frameset_ok = false;
5767 for(i = stack.elements.length-1; i >= 0; i--) {
5768 node = stack.elements[i];
5769 if (isA(node, dddtSet)) {
5770 in_body_mode(ENDTAG, node.localName);
5771 break;
5772 }
5773 if (isA(node, specialSet) && !isA(node, addressdivpSet))
5774 break;
5775 }
5776 if (stack.inButtonScope("p")) in_body_mode(ENDTAG, "p");
5777 insertHTMLElement(value, arg3);
5778 return;
5779
5780 case "plaintext":
5781 if (stack.inButtonScope("p")) in_body_mode(ENDTAG, "p");
5782 insertHTMLElement(value, arg3);
5783 tokenizer = plaintext_state;
5784 return;
5785
5786 case "button":
5787 if (stack.inScope("button")) {
5788 in_body_mode(ENDTAG, "button");
5789 parser(t, value, arg3, arg4);
5790 }
5791 else {
5792 afereconstruct();
5793 insertHTMLElement(value, arg3);
5794 frameset_ok = false;
5795 }
5796 return;
5797
5798 case "a":
5799 var activeElement = afe.findElementByTag("a");
5800 if (activeElement) {
5801 in_body_mode(ENDTAG, value);
5802 afe.remove(activeElement);
5803 stack.removeElement(activeElement);
5804 }
5805 /* falls through */
5806 case "b":
5807 case "big":
5808 case "code":
5809 case "em":
5810 case "font":
5811 case "i":
5812 case "s":
5813 case "small":
5814 case "strike":
5815 case "strong":
5816 case "tt":
5817 case "u":
5818 afereconstruct();
5819 afe.push(insertHTMLElement(value,arg3), arg3);
5820 return;
5821
5822 case "nobr":
5823 afereconstruct();
5824
5825 if (stack.inScope(value)) {
5826 in_body_mode(ENDTAG, value);
5827 afereconstruct();
5828 }
5829 afe.push(insertHTMLElement(value,arg3), arg3);
5830 return;
5831
5832 case "applet":
5833 case "marquee":
5834 case "object":
5835 afereconstruct();
5836 insertHTMLElement(value,arg3);
5837 afe.insertMarker();
5838 frameset_ok = false;
5839 return;
5840
5841 case "table":
5842 if (!doc._quirks && stack.inButtonScope("p")) {
5843 in_body_mode(ENDTAG, "p");
5844 }
5845 insertHTMLElement(value,arg3);
5846 frameset_ok = false;
5847 parser = in_table_mode;
5848 return;
5849
5850 case "area":
5851 case "br":
5852 case "embed":
5853 case "img":
5854 case "keygen":
5855 case "wbr":
5856 afereconstruct();
5857 insertHTMLElement(value,arg3);
5858 stack.pop();
5859 frameset_ok = false;
5860 return;
5861
5862 case "input":
5863 afereconstruct();
5864 elt = insertHTMLElement(value,arg3);
5865 stack.pop();
5866 var type = elt.getAttribute("type");
5867 if (!type || type.toLowerCase() !== "hidden")
5868 frameset_ok = false;
5869 return;
5870
5871 case "param":
5872 case "source":
5873 case "track":
5874 insertHTMLElement(value,arg3);
5875 stack.pop();
5876 return;
5877
5878 case "hr":
5879 if (stack.inButtonScope("p")) in_body_mode(ENDTAG, "p");
5880 if (isA(stack.top, 'menuitem')) {
5881 stack.pop();
5882 }
5883 insertHTMLElement(value,arg3);
5884 stack.pop();
5885 frameset_ok = false;
5886 return;
5887
5888 case "image":
5889 in_body_mode(TAG, "img", arg3, arg4);
5890 return;
5891
5892 case "textarea":
5893 insertHTMLElement(value,arg3);
5894 ignore_linefeed = true;
5895 frameset_ok = false;
5896 tokenizer = rcdata_state;
5897 originalInsertionMode = parser;
5898 parser = text_mode;
5899 return;
5900
5901 case "xmp":
5902 if (stack.inButtonScope("p")) in_body_mode(ENDTAG, "p");
5903 afereconstruct();
5904 frameset_ok = false;
5905 parseRawText(value, arg3);
5906 return;
5907
5908 case "iframe":
5909 frameset_ok = false;
5910 parseRawText(value, arg3);
5911 return;
5912
5913 case "noembed":
5914 parseRawText(value,arg3);
5915 return;
5916
5917 case "noscript":
5918 if (scripting_enabled) {
5919 parseRawText(value,arg3);
5920 return;
5921 }
5922 break; // XXX Otherwise treat it as any other open tag?
5923
5924 case "select":
5925 afereconstruct();
5926 insertHTMLElement(value,arg3);
5927 frameset_ok = false;
5928 if (parser === in_table_mode ||
5929 parser === in_caption_mode ||
5930 parser === in_table_body_mode ||
5931 parser === in_row_mode ||
5932 parser === in_cell_mode)
5933 parser = in_select_in_table_mode;
5934 else
5935 parser = in_select_mode;
5936 return;
5937
5938 case "optgroup":
5939 case "option":
5940 if (stack.top instanceof impl.HTMLOptionElement) {
5941 in_body_mode(ENDTAG, "option");
5942 }
5943 afereconstruct();
5944 insertHTMLElement(value,arg3);
5945 return;
5946
5947 case "menuitem":
5948 if (isA(stack.top, 'menuitem')) {
5949 stack.pop();
5950 }
5951 afereconstruct();
5952 insertHTMLElement(value, arg3);
5953 return;
5954
5955 case "rb":
5956 case "rtc":
5957 if (stack.inScope("ruby")) {
5958 stack.generateImpliedEndTags();
5959 }
5960 insertHTMLElement(value,arg3);
5961 return;
5962
5963 case "rp":
5964 case "rt":
5965 if (stack.inScope("ruby")) {
5966 stack.generateImpliedEndTags("rtc");
5967 }
5968 insertHTMLElement(value,arg3);
5969 return;
5970
5971 case "math":
5972 afereconstruct();
5973 adjustMathMLAttributes(arg3);
5974 adjustForeignAttributes(arg3);
5975 insertForeignElement(value, arg3, NAMESPACE.MATHML);
5976 if (arg4) // self-closing flag
5977 stack.pop();
5978 return;
5979
5980 case "svg":
5981 afereconstruct();
5982 adjustSVGAttributes(arg3);
5983 adjustForeignAttributes(arg3);
5984 insertForeignElement(value, arg3, NAMESPACE.SVG);
5985 if (arg4) // self-closing flag
5986 stack.pop();
5987 return;
5988
5989 case "caption":
5990 case "col":
5991 case "colgroup":
5992 case "frame":
5993 case "head":
5994 case "tbody":
5995 case "td":
5996 case "tfoot":
5997 case "th":
5998 case "thead":
5999 case "tr":
6000 // Ignore table tags if we're not in_table mode
6001 return;
6002 }
6003
6004 // Handle any other start tag here
6005 // (and also noscript tags when scripting is disabled)
6006 afereconstruct();
6007 insertHTMLElement(value,arg3);
6008 return;
6009
6010 case 3: // ENDTAG
6011 switch(value) {
6012 case "template":
6013 in_head_mode(ENDTAG, value, arg3);
6014 return;
6015 case "body":
6016 if (!stack.inScope("body")) return;
6017 parser = after_body_mode;
6018 return;
6019 case "html":
6020 if (!stack.inScope("body")) return;
6021 parser = after_body_mode;
6022 parser(t, value, arg3);
6023 return;
6024
6025 case "address":
6026 case "article":
6027 case "aside":
6028 case "blockquote":
6029 case "button":
6030 case "center":
6031 case "details":
6032 case "dialog":
6033 case "dir":
6034 case "div":
6035 case "dl":
6036 case "fieldset":
6037 case "figcaption":
6038 case "figure":
6039 case "footer":
6040 case "header":
6041 case "hgroup":
6042 case "listing":
6043 case "main":
6044 case "menu":
6045 case "nav":
6046 case "ol":
6047 case "pre":
6048 case "section":
6049 case "summary":
6050 case "ul":
6051 // Ignore if there is not a matching open tag
6052 if (!stack.inScope(value)) return;
6053 stack.generateImpliedEndTags();
6054 stack.popTag(value);
6055 return;
6056
6057 case "form":
6058 if (!stack.contains("template")) {
6059 var openform = form_element_pointer;
6060 form_element_pointer = null;
6061 if (!openform || !stack.elementInScope(openform)) return;
6062 stack.generateImpliedEndTags();
6063 stack.removeElement(openform);
6064 } else {
6065 if (!stack.inScope("form")) return;
6066 stack.generateImpliedEndTags();
6067 stack.popTag("form");
6068 }
6069 return;
6070
6071 case "p":
6072 if (!stack.inButtonScope(value)) {
6073 in_body_mode(TAG, value, null);
6074 parser(t, value, arg3, arg4);
6075 }
6076 else {
6077 stack.generateImpliedEndTags(value);
6078 stack.popTag(value);
6079 }
6080 return;
6081
6082 case "li":
6083 if (!stack.inListItemScope(value)) return;
6084 stack.generateImpliedEndTags(value);
6085 stack.popTag(value);
6086 return;
6087
6088 case "dd":
6089 case "dt":
6090 if (!stack.inScope(value)) return;
6091 stack.generateImpliedEndTags(value);
6092 stack.popTag(value);
6093 return;
6094
6095 case "h1":
6096 case "h2":
6097 case "h3":
6098 case "h4":
6099 case "h5":
6100 case "h6":
6101 if (!stack.elementTypeInScope(impl.HTMLHeadingElement)) return;
6102 stack.generateImpliedEndTags();
6103 stack.popElementType(impl.HTMLHeadingElement);
6104 return;
6105
6106 case "sarcasm":
6107 // Take a deep breath, and then:
6108 break;
6109
6110 case "a":
6111 case "b":
6112 case "big":
6113 case "code":
6114 case "em":
6115 case "font":
6116 case "i":
6117 case "nobr":
6118 case "s":
6119 case "small":
6120 case "strike":
6121 case "strong":
6122 case "tt":
6123 case "u":
6124 var result = adoptionAgency(value);
6125 if (result) return; // If we did something we're done
6126 break; // Go to the "any other end tag" case
6127
6128 case "applet":
6129 case "marquee":
6130 case "object":
6131 if (!stack.inScope(value)) return;
6132 stack.generateImpliedEndTags();
6133 stack.popTag(value);
6134 afe.clearToMarker();
6135 return;
6136
6137 case "br":
6138 in_body_mode(TAG, value, null); // Turn </br> into <br>
6139 return;
6140 }
6141
6142 // Any other end tag goes here
6143 for(i = stack.elements.length-1; i >= 0; i--) {
6144 node = stack.elements[i];
6145 if (isA(node, value)) {
6146 stack.generateImpliedEndTags(value);
6147 stack.popElement(node);
6148 break;
6149 }
6150 else if (isA(node, specialSet)) {
6151 return;
6152 }
6153 }
6154
6155 return;
6156 }
6157 }
6158
6159 function text_mode(t, value, arg3, arg4) {
6160 switch(t) {
6161 case 1: // TEXT
6162 insertText(value);
6163 return;
6164 case -1: // EOF
6165 if (stack.top instanceof impl.HTMLScriptElement)
6166 stack.top._already_started = true;
6167 stack.pop();
6168 parser = originalInsertionMode;
6169 parser(t);
6170 return;
6171 case 3: // ENDTAG
6172 if (value === "script") {
6173 handleScriptEnd();
6174 }
6175 else {
6176 stack.pop();
6177 parser = originalInsertionMode;
6178 }
6179 return;
6180 default:
6181 // We should never get any other token types
6182 return;
6183 }
6184 }
6185
6186 function in_table_mode(t, value, arg3, arg4) {
6187 function getTypeAttr(attrs) {
6188 for(var i = 0, n = attrs.length; i < n; i++) {
6189 if (attrs[i][0] === "type")
6190 return attrs[i][1].toLowerCase();
6191 }
6192 return null;
6193 }
6194
6195 switch(t) {
6196 case 1: // TEXT
6197 // XXX the text_integration_mode stuff is
6198 // just a hack I made up
6199 if (text_integration_mode) {
6200 in_body_mode(t, value, arg3, arg4);
6201 return;
6202 }
6203 else if (isA(stack.top, tablesectionrowSet)) {
6204 pending_table_text = [];
6205 originalInsertionMode = parser;
6206 parser = in_table_text_mode;
6207 parser(t, value, arg3, arg4);
6208 return;
6209 }
6210 break;
6211 case 4: // COMMENT
6212 insertComment(value);
6213 return;
6214 case 5: // DOCTYPE
6215 return;
6216 case 2: // TAG
6217 switch(value) {
6218 case "caption":
6219 stack.clearToContext(tableContextSet);
6220 afe.insertMarker();
6221 insertHTMLElement(value,arg3);
6222 parser = in_caption_mode;
6223 return;
6224 case "colgroup":
6225 stack.clearToContext(tableContextSet);
6226 insertHTMLElement(value,arg3);
6227 parser = in_column_group_mode;
6228 return;
6229 case "col":
6230 in_table_mode(TAG, "colgroup", null);
6231 parser(t, value, arg3, arg4);
6232 return;
6233 case "tbody":
6234 case "tfoot":
6235 case "thead":
6236 stack.clearToContext(tableContextSet);
6237 insertHTMLElement(value,arg3);
6238 parser = in_table_body_mode;
6239 return;
6240 case "td":
6241 case "th":
6242 case "tr":
6243 in_table_mode(TAG, "tbody", null);
6244 parser(t, value, arg3, arg4);
6245 return;
6246
6247 case "table":
6248 if (!stack.inTableScope(value)) {
6249 return; // Ignore the token
6250 }
6251 in_table_mode(ENDTAG, value);
6252 parser(t, value, arg3, arg4);
6253 return;
6254
6255 case "style":
6256 case "script":
6257 case "template":
6258 in_head_mode(t, value, arg3, arg4);
6259 return;
6260
6261 case "input":
6262 var type = getTypeAttr(arg3);
6263 if (type !== "hidden") break; // to the anything else case
6264 insertHTMLElement(value,arg3);
6265 stack.pop();
6266 return;
6267
6268 case "form":
6269 if (form_element_pointer || stack.contains("template")) return;
6270 form_element_pointer = insertHTMLElement(value, arg3);
6271 stack.popElement(form_element_pointer);
6272 return;
6273 }
6274 break;
6275 case 3: // ENDTAG
6276 switch(value) {
6277 case "table":
6278 if (!stack.inTableScope(value)) return;
6279 stack.popTag(value);
6280 resetInsertionMode();
6281 return;
6282 case "body":
6283 case "caption":
6284 case "col":
6285 case "colgroup":
6286 case "html":
6287 case "tbody":
6288 case "td":
6289 case "tfoot":
6290 case "th":
6291 case "thead":
6292 case "tr":
6293 return;
6294 case "template":
6295 in_head_mode(t, value, arg3, arg4);
6296 return;
6297 }
6298
6299 break;
6300 case -1: // EOF
6301 in_body_mode(t, value, arg3, arg4);
6302 return;
6303 }
6304
6305 // This is the anything else case
6306 foster_parent_mode = true;
6307 in_body_mode(t, value, arg3, arg4);
6308 foster_parent_mode = false;
6309 }
6310
6311 function in_table_text_mode(t, value, arg3, arg4) {
6312 if (t === TEXT) {
6313 if (textIncludesNUL) {
6314 value = value.replace(NULCHARS, "");
6315 if (value.length === 0) return;
6316 }
6317 pending_table_text.push(value);
6318 }
6319 else {
6320 var s = pending_table_text.join("");
6321 pending_table_text.length = 0;
6322 if (NONWS.test(s)) { // If any non-whitespace characters
6323 // This must be the same code as the "anything else"
6324 // case of the in_table mode above.
6325 foster_parent_mode = true;
6326 in_body_mode(TEXT, s);
6327 foster_parent_mode = false;
6328 }
6329 else {
6330 insertText(s);
6331 }
6332 parser = originalInsertionMode;
6333 parser(t, value, arg3, arg4);
6334 }
6335 }
6336
6337
6338 function in_caption_mode(t, value, arg3, arg4) {
6339 function end_caption() {
6340 if (!stack.inTableScope("caption")) return false;
6341 stack.generateImpliedEndTags();
6342 stack.popTag("caption");
6343 afe.clearToMarker();
6344 parser = in_table_mode;
6345 return true;
6346 }
6347
6348 switch(t) {
6349 case 2: // TAG
6350 switch(value) {
6351 case "caption":
6352 case "col":
6353 case "colgroup":
6354 case "tbody":
6355 case "td":
6356 case "tfoot":
6357 case "th":
6358 case "thead":
6359 case "tr":
6360 if (end_caption()) parser(t, value, arg3, arg4);
6361 return;
6362 }
6363 break;
6364 case 3: // ENDTAG
6365 switch(value) {
6366 case "caption":
6367 end_caption();
6368 return;
6369 case "table":
6370 if (end_caption()) parser(t, value, arg3, arg4);
6371 return;
6372 case "body":
6373 case "col":
6374 case "colgroup":
6375 case "html":
6376 case "tbody":
6377 case "td":
6378 case "tfoot":
6379 case "th":
6380 case "thead":
6381 case "tr":
6382 return;
6383 }
6384 break;
6385 }
6386
6387 // The Anything Else case
6388 in_body_mode(t, value, arg3, arg4);
6389 }
6390
6391 function in_column_group_mode(t, value, arg3, arg4) {
6392 switch(t) {
6393 case 1: // TEXT
6394 var ws = value.match(LEADINGWS);
6395 if (ws) {
6396 insertText(ws[0]);
6397 value = value.substring(ws[0].length);
6398 }
6399 if (value.length === 0) return;
6400 break; // Handle non-whitespace below
6401
6402 case 4: // COMMENT
6403 insertComment(value);
6404 return;
6405 case 5: // DOCTYPE
6406 return;
6407 case 2: // TAG
6408 switch(value) {
6409 case "html":
6410 in_body_mode(t, value, arg3, arg4);
6411 return;
6412 case "col":
6413 insertHTMLElement(value, arg3);
6414 stack.pop();
6415 return;
6416 case "template":
6417 in_head_mode(t, value, arg3, arg4);
6418 return;
6419 }
6420 break;
6421 case 3: // ENDTAG
6422 switch(value) {
6423 case "colgroup":
6424 if (!isA(stack.top, 'colgroup')) {
6425 return; // Ignore the token.
6426 }
6427 stack.pop();
6428 parser = in_table_mode;
6429 return;
6430 case "col":
6431 return;
6432 case "template":
6433 in_head_mode(t, value, arg3, arg4);
6434 return;
6435 }
6436 break;
6437 case -1: // EOF
6438 in_body_mode(t, value, arg3, arg4);
6439 return;
6440 }
6441
6442 // Anything else
6443 if (!isA(stack.top, 'colgroup')) {
6444 return; // Ignore the token.
6445 }
6446 in_column_group_mode(ENDTAG, "colgroup");
6447 parser(t, value, arg3, arg4);
6448 }
6449
6450 function in_table_body_mode(t, value, arg3, arg4) {
6451 function endsect() {
6452 if (!stack.inTableScope("tbody") &&
6453 !stack.inTableScope("thead") &&
6454 !stack.inTableScope("tfoot"))
6455 return;
6456 stack.clearToContext(tableBodyContextSet);
6457 in_table_body_mode(ENDTAG, stack.top.localName, null);
6458 parser(t, value, arg3, arg4);
6459 }
6460
6461 switch(t) {
6462 case 2: // TAG
6463 switch(value) {
6464 case "tr":
6465 stack.clearToContext(tableBodyContextSet);
6466 insertHTMLElement(value, arg3);
6467 parser = in_row_mode;
6468 return;
6469 case "th":
6470 case "td":
6471 in_table_body_mode(TAG, "tr", null);
6472 parser(t, value, arg3, arg4);
6473 return;
6474 case "caption":
6475 case "col":
6476 case "colgroup":
6477 case "tbody":
6478 case "tfoot":
6479 case "thead":
6480 endsect();
6481 return;
6482 }
6483 break;
6484 case 3: // ENDTAG
6485 switch(value) {
6486 case "table":
6487 endsect();
6488 return;
6489 case "tbody":
6490 case "tfoot":
6491 case "thead":
6492 if (stack.inTableScope(value)) {
6493 stack.clearToContext(tableBodyContextSet);
6494 stack.pop();
6495 parser = in_table_mode;
6496 }
6497 return;
6498 case "body":
6499 case "caption":
6500 case "col":
6501 case "colgroup":
6502 case "html":
6503 case "td":
6504 case "th":
6505 case "tr":
6506 return;
6507 }
6508 break;
6509 }
6510
6511 // Anything else:
6512 in_table_mode(t, value, arg3, arg4);
6513 }
6514
6515 function in_row_mode(t, value, arg3, arg4) {
6516 function endrow() {
6517 if (!stack.inTableScope("tr")) return false;
6518 stack.clearToContext(tableRowContextSet);
6519 stack.pop();
6520 parser = in_table_body_mode;
6521 return true;
6522 }
6523
6524 switch(t) {
6525 case 2: // TAG
6526 switch(value) {
6527 case "th":
6528 case "td":
6529 stack.clearToContext(tableRowContextSet);
6530 insertHTMLElement(value, arg3);
6531 parser = in_cell_mode;
6532 afe.insertMarker();
6533 return;
6534 case "caption":
6535 case "col":
6536 case "colgroup":
6537 case "tbody":
6538 case "tfoot":
6539 case "thead":
6540 case "tr":
6541 if (endrow()) parser(t, value, arg3, arg4);
6542 return;
6543 }
6544 break;
6545 case 3: // ENDTAG
6546 switch(value) {
6547 case "tr":
6548 endrow();
6549 return;
6550 case "table":
6551 if (endrow()) parser(t, value, arg3, arg4);
6552 return;
6553 case "tbody":
6554 case "tfoot":
6555 case "thead":
6556 if (stack.inTableScope(value)) {
6557 if (endrow()) parser(t, value, arg3, arg4);
6558 }
6559 return;
6560 case "body":
6561 case "caption":
6562 case "col":
6563 case "colgroup":
6564 case "html":
6565 case "td":
6566 case "th":
6567 return;
6568 }
6569 break;
6570 }
6571
6572 // anything else
6573 in_table_mode(t, value, arg3, arg4);
6574 }
6575
6576 function in_cell_mode(t, value, arg3, arg4) {
6577 switch(t) {
6578 case 2: // TAG
6579 switch(value) {
6580 case "caption":
6581 case "col":
6582 case "colgroup":
6583 case "tbody":
6584 case "td":
6585 case "tfoot":
6586 case "th":
6587 case "thead":
6588 case "tr":
6589 if (stack.inTableScope("td")) {
6590 in_cell_mode(ENDTAG, "td");
6591 parser(t, value, arg3, arg4);
6592 }
6593 else if (stack.inTableScope("th")) {
6594 in_cell_mode(ENDTAG, "th");
6595 parser(t, value, arg3, arg4);
6596 }
6597 return;
6598 }
6599 break;
6600 case 3: // ENDTAG
6601 switch(value) {
6602 case "td":
6603 case "th":
6604 if (!stack.inTableScope(value)) return;
6605 stack.generateImpliedEndTags();
6606 stack.popTag(value);
6607 afe.clearToMarker();
6608 parser = in_row_mode;
6609 return;
6610
6611 case "body":
6612 case "caption":
6613 case "col":
6614 case "colgroup":
6615 case "html":
6616 return;
6617
6618 case "table":
6619 case "tbody":
6620 case "tfoot":
6621 case "thead":
6622 case "tr":
6623 if (!stack.inTableScope(value)) return;
6624 in_cell_mode(ENDTAG, stack.inTableScope("td") ? "td" : "th");
6625 parser(t, value, arg3, arg4);
6626 return;
6627 }
6628 break;
6629 }
6630
6631 // anything else
6632 in_body_mode(t, value, arg3, arg4);
6633 }
6634
6635 function in_select_mode(t, value, arg3, arg4) {
6636 switch(t) {
6637 case 1: // TEXT
6638 if (textIncludesNUL) {
6639 value = value.replace(NULCHARS, "");
6640 if (value.length === 0) return;
6641 }
6642 insertText(value);
6643 return;
6644 case 4: // COMMENT
6645 insertComment(value);
6646 return;
6647 case 5: // DOCTYPE
6648 return;
6649 case -1: // EOF
6650 in_body_mode(t, value, arg3, arg4);
6651 return;
6652 case 2: // TAG
6653 switch(value) {
6654 case "html":
6655 in_body_mode(t, value, arg3, arg4);
6656 return;
6657 case "option":
6658 if (stack.top instanceof impl.HTMLOptionElement)
6659 in_select_mode(ENDTAG, value);
6660 insertHTMLElement(value, arg3);
6661 return;
6662 case "optgroup":
6663 if (stack.top instanceof impl.HTMLOptionElement)
6664 in_select_mode(ENDTAG, "option");
6665 if (stack.top instanceof impl.HTMLOptGroupElement)
6666 in_select_mode(ENDTAG, value);
6667 insertHTMLElement(value, arg3);
6668 return;
6669 case "select":
6670 in_select_mode(ENDTAG, value); // treat it as a close tag
6671 return;
6672
6673 case "input":
6674 case "keygen":
6675 case "textarea":
6676 if (!stack.inSelectScope("select")) return;
6677 in_select_mode(ENDTAG, "select");
6678 parser(t, value, arg3, arg4);
6679 return;
6680
6681 case "script":
6682 case "template":
6683 in_head_mode(t, value, arg3, arg4);
6684 return;
6685 }
6686 break;
6687 case 3: // ENDTAG
6688 switch(value) {
6689 case "optgroup":
6690 if (stack.top instanceof impl.HTMLOptionElement &&
6691 stack.elements[stack.elements.length-2] instanceof
6692 impl.HTMLOptGroupElement) {
6693 in_select_mode(ENDTAG, "option");
6694 }
6695 if (stack.top instanceof impl.HTMLOptGroupElement)
6696 stack.pop();
6697
6698 return;
6699
6700 case "option":
6701 if (stack.top instanceof impl.HTMLOptionElement)
6702 stack.pop();
6703 return;
6704
6705 case "select":
6706 if (!stack.inSelectScope(value)) return;
6707 stack.popTag(value);
6708 resetInsertionMode();
6709 return;
6710
6711 case "template":
6712 in_head_mode(t, value, arg3, arg4);
6713 return;
6714 }
6715
6716 break;
6717 }
6718
6719 // anything else: just ignore the token
6720 }
6721
6722 function in_select_in_table_mode(t, value, arg3, arg4) {
6723 switch(value) {
6724 case "caption":
6725 case "table":
6726 case "tbody":
6727 case "tfoot":
6728 case "thead":
6729 case "tr":
6730 case "td":
6731 case "th":
6732 switch(t) {
6733 case 2: // TAG
6734 in_select_in_table_mode(ENDTAG, "select");
6735 parser(t, value, arg3, arg4);
6736 return;
6737 case 3: // ENDTAG
6738 if (stack.inTableScope(value)) {
6739 in_select_in_table_mode(ENDTAG, "select");
6740 parser(t, value, arg3, arg4);
6741 }
6742 return;
6743 }
6744 }
6745
6746 // anything else
6747 in_select_mode(t, value, arg3, arg4);
6748 }
6749
6750 function in_template_mode(t, value, arg3, arg4) {
6751 function switchModeAndReprocess(mode) {
6752 parser = mode;
6753 templateInsertionModes[templateInsertionModes.length-1] = parser;
6754 parser(t, value, arg3, arg4);
6755 }
6756 switch(t) {
6757 case 1: // TEXT
6758 case 4: // COMMENT
6759 case 5: // DOCTYPE
6760 in_body_mode(t, value, arg3, arg4);
6761 return;
6762 case -1: // EOF
6763 if (!stack.contains("template")) {
6764 stopParsing();
6765 } else {
6766 stack.popTag("template");
6767 afe.clearToMarker();
6768 templateInsertionModes.pop();
6769 resetInsertionMode();
6770 parser(t, value, arg3, arg4);
6771 }
6772 return;
6773 case 2: // TAG
6774 switch(value) {
6775 case "base":
6776 case "basefont":
6777 case "bgsound":
6778 case "link":
6779 case "meta":
6780 case "noframes":
6781 case "script":
6782 case "style":
6783 case "template":
6784 case "title":
6785 in_head_mode(t, value, arg3, arg4);
6786 return;
6787 case "caption":
6788 case "colgroup":
6789 case "tbody":
6790 case "tfoot":
6791 case "thead":
6792 switchModeAndReprocess(in_table_mode);
6793 return;
6794 case "col":
6795 switchModeAndReprocess(in_column_group_mode);
6796 return;
6797 case "tr":
6798 switchModeAndReprocess(in_table_body_mode);
6799 return;
6800 case "td":
6801 case "th":
6802 switchModeAndReprocess(in_row_mode);
6803 return;
6804 }
6805 switchModeAndReprocess(in_body_mode);
6806 return;
6807 case 3: // ENDTAG
6808 switch(value) {
6809 case "template":
6810 in_head_mode(t, value, arg3, arg4);
6811 return;
6812 default:
6813 return;
6814 }
6815 }
6816 }
6817
6818 function after_body_mode(t, value, arg3, arg4) {
6819 switch(t) {
6820 case 1: // TEXT
6821 // If any non-space chars, handle below
6822 if (NONWS.test(value)) break;
6823 in_body_mode(t, value);
6824 return;
6825 case 4: // COMMENT
6826 // Append it to the <html> element
6827 stack.elements[0]._appendChild(doc.createComment(value));
6828 return;
6829 case 5: // DOCTYPE
6830 return;
6831 case -1: // EOF
6832 stopParsing();
6833 return;
6834 case 2: // TAG
6835 if (value === "html") {
6836 in_body_mode(t, value, arg3, arg4);
6837 return;
6838 }
6839 break; // for any other tags
6840 case 3: // ENDTAG
6841 if (value === "html") {
6842 if (fragment) return;
6843 parser = after_after_body_mode;
6844 return;
6845 }
6846 break; // for any other tags
6847 }
6848
6849 // anything else
6850 parser = in_body_mode;
6851 parser(t, value, arg3, arg4);
6852 }
6853
6854 function in_frameset_mode(t, value, arg3, arg4) {
6855 switch(t) {
6856 case 1: // TEXT
6857 // Ignore any non-space characters
6858 value = value.replace(ALLNONWS, "");
6859 if (value.length > 0) insertText(value);
6860 return;
6861 case 4: // COMMENT
6862 insertComment(value);
6863 return;
6864 case 5: // DOCTYPE
6865 return;
6866 case -1: // EOF
6867 stopParsing();
6868 return;
6869 case 2: // TAG
6870 switch(value) {
6871 case "html":
6872 in_body_mode(t, value, arg3, arg4);
6873 return;
6874 case "frameset":
6875 insertHTMLElement(value, arg3);
6876 return;
6877 case "frame":
6878 insertHTMLElement(value, arg3);
6879 stack.pop();
6880 return;
6881 case "noframes":
6882 in_head_mode(t, value, arg3, arg4);
6883 return;
6884 }
6885 break;
6886 case 3: // ENDTAG
6887 if (value === "frameset") {
6888 if (fragment && stack.top instanceof impl.HTMLHtmlElement)
6889 return;
6890 stack.pop();
6891 if (!fragment &&
6892 !(stack.top instanceof impl.HTMLFrameSetElement))
6893 parser = after_frameset_mode;
6894 return;
6895 }
6896 break;
6897 }
6898
6899 // ignore anything else
6900 }
6901
6902 function after_frameset_mode(t, value, arg3, arg4) {
6903 switch(t) {
6904 case 1: // TEXT
6905 // Ignore any non-space characters
6906 value = value.replace(ALLNONWS, "");
6907 if (value.length > 0) insertText(value);
6908 return;
6909 case 4: // COMMENT
6910 insertComment(value);
6911 return;
6912 case 5: // DOCTYPE
6913 return;
6914 case -1: // EOF
6915 stopParsing();
6916 return;
6917 case 2: // TAG
6918 switch(value) {
6919 case "html":
6920 in_body_mode(t, value, arg3, arg4);
6921 return;
6922 case "noframes":
6923 in_head_mode(t, value, arg3, arg4);
6924 return;
6925 }
6926 break;
6927 case 3: // ENDTAG
6928 if (value === "html") {
6929 parser = after_after_frameset_mode;
6930 return;
6931 }
6932 break;
6933 }
6934
6935 // ignore anything else
6936 }
6937
6938 function after_after_body_mode(t, value, arg3, arg4) {
6939 switch(t) {
6940 case 1: // TEXT
6941 // If any non-space chars, handle below
6942 if (NONWS.test(value)) break;
6943 in_body_mode(t, value, arg3, arg4);
6944 return;
6945 case 4: // COMMENT
6946 doc._appendChild(doc.createComment(value));
6947 return;
6948 case 5: // DOCTYPE
6949 in_body_mode(t, value, arg3, arg4);
6950 return;
6951 case -1: // EOF
6952 stopParsing();
6953 return;
6954 case 2: // TAG
6955 if (value === "html") {
6956 in_body_mode(t, value, arg3, arg4);
6957 return;
6958 }
6959 break;
6960 }
6961
6962 // anything else
6963 parser = in_body_mode;
6964 parser(t, value, arg3, arg4);
6965 }
6966
6967 function after_after_frameset_mode(t, value, arg3, arg4) {
6968 switch(t) {
6969 case 1: // TEXT
6970 // Ignore any non-space characters
6971 value = value.replace(ALLNONWS, "");
6972 if (value.length > 0)
6973 in_body_mode(t, value, arg3, arg4);
6974 return;
6975 case 4: // COMMENT
6976 doc._appendChild(doc.createComment(value));
6977 return;
6978 case 5: // DOCTYPE
6979 in_body_mode(t, value, arg3, arg4);
6980 return;
6981 case -1: // EOF
6982 stopParsing();
6983 return;
6984 case 2: // TAG
6985 switch(value) {
6986 case "html":
6987 in_body_mode(t, value, arg3, arg4);
6988 return;
6989 case "noframes":
6990 in_head_mode(t, value, arg3, arg4);
6991 return;
6992 }
6993 break;
6994 }
6995
6996 // ignore anything else
6997 }
6998
6999
7000 // 13.2.5.5 The rules for parsing tokens in foreign content
7001 //
7002 // This is like one of the insertion modes above, but is
7003 // invoked somewhat differently when the current token is not HTML.
7004 // See the insertToken() function.
7005 function insertForeignToken(t, value, arg3, arg4) {
7006 // A <font> tag is an HTML font tag if it has a color, font, or size
7007 // attribute. Otherwise we assume it is foreign content
7008 function isHTMLFont(attrs) {
7009 for(var i = 0, n = attrs.length; i < n; i++) {
7010 switch(attrs[i][0]) {
7011 case "color":
7012 case "face":
7013 case "size":
7014 return true;
7015 }
7016 }
7017 return false;
7018 }
7019
7020 var current;
7021
7022 switch(t) {
7023 case 1: // TEXT
7024 // If any non-space, non-nul characters
7025 if (frameset_ok && NONWSNONNUL.test(value))
7026 frameset_ok = false;
7027 if (textIncludesNUL) {
7028 value = value.replace(NULCHARS, "\uFFFD");
7029 }
7030 insertText(value);
7031 return;
7032 case 4: // COMMENT
7033 insertComment(value);
7034 return;
7035 case 5: // DOCTYPE
7036 // ignore it
7037 return;
7038 case 2: // TAG
7039 switch(value) {
7040 case "font":
7041 if (!isHTMLFont(arg3)) break;
7042 /* falls through */
7043 case "b":
7044 case "big":
7045 case "blockquote":
7046 case "body":
7047 case "br":
7048 case "center":
7049 case "code":
7050 case "dd":
7051 case "div":
7052 case "dl":
7053 case "dt":
7054 case "em":
7055 case "embed":
7056 case "h1":
7057 case "h2":
7058 case "h3":
7059 case "h4":
7060 case "h5":
7061 case "h6":
7062 case "head":
7063 case "hr":
7064 case "i":
7065 case "img":
7066 case "li":
7067 case "listing":
7068 case "menu":
7069 case "meta":
7070 case "nobr":
7071 case "ol":
7072 case "p":
7073 case "pre":
7074 case "ruby":
7075 case "s":
7076 case "small":
7077 case "span":
7078 case "strong":
7079 case "strike":
7080 case "sub":
7081 case "sup":
7082 case "table":
7083 case "tt":
7084 case "u":
7085 case "ul":
7086 case "var":
7087 if (fragment) {
7088 break;
7089 }
7090 do {
7091 stack.pop();
7092 current = stack.top;
7093 } while(current.namespaceURI !== NAMESPACE.HTML &&
7094 !isMathmlTextIntegrationPoint(current) &&
7095 !isHTMLIntegrationPoint(current));
7096
7097 insertToken(t, value, arg3, arg4); // reprocess
7098 return;
7099 }
7100
7101 // Any other start tag case goes here
7102 current = (stack.elements.length===1 && fragment) ? fragmentContext :
7103 stack.top;
7104 if (current.namespaceURI === NAMESPACE.MATHML) {
7105 adjustMathMLAttributes(arg3);
7106 }
7107 else if (current.namespaceURI === NAMESPACE.SVG) {
7108 value = adjustSVGTagName(value);
7109 adjustSVGAttributes(arg3);
7110 }
7111 adjustForeignAttributes(arg3);
7112
7113 insertForeignElement(value, arg3, current.namespaceURI);
7114 if (arg4) { // the self-closing flag
7115 if (value === 'script' && current.namespaceURI === NAMESPACE.SVG) {
7116 // XXX deal with SVG scripts here
7117 }
7118 stack.pop();
7119 }
7120 return;
7121
7122 case 3: // ENDTAG
7123 current = stack.top;
7124 if (value === "script" &&
7125 current.namespaceURI === NAMESPACE.SVG &&
7126 current.localName === "script") {
7127
7128 stack.pop();
7129
7130 // XXX
7131 // Deal with SVG scripts here
7132 }
7133 else {
7134 // The any other end tag case
7135 var i = stack.elements.length-1;
7136 var node = stack.elements[i];
7137 for(;;) {
7138 if (node.localName.toLowerCase() === value) {
7139 stack.popElement(node);
7140 break;
7141 }
7142 node = stack.elements[--i];
7143 // If non-html, keep looping
7144 if (node.namespaceURI !== NAMESPACE.HTML)
7145 continue;
7146 // Otherwise process the end tag as html
7147 parser(t, value, arg3, arg4);
7148 break;
7149 }
7150 }
7151 return;
7152 }
7153 }
7154
7155 /***
7156 * Finally, this is the end of the HTMLParser() factory function.
7157 * It returns the htmlparser object with the append() and end() methods.
7158 */
7159
7160 // Sneak another method into the htmlparser object to allow us to run
7161 // tokenizer tests. This can be commented out in production code.
7162 // This is a hook for testing the tokenizer. It has to be here
7163 // because the tokenizer details are all hidden away within the closure.
7164 // It should return an array of tokens generated while parsing the
7165 // input string.
7166 htmlparser.testTokenizer = function(input, initialState, lastStartTag, charbychar) {
7167 var tokens = [];
7168
7169 switch(initialState) {
7170 case "PCDATA state":
7171 tokenizer = data_state;
7172 break;
7173 case "RCDATA state":
7174 tokenizer = rcdata_state;
7175 break;
7176 case "RAWTEXT state":
7177 tokenizer = rawtext_state;
7178 break;
7179 case "PLAINTEXT state":
7180 tokenizer = plaintext_state;
7181 break;
7182 }
7183
7184 if (lastStartTag) {
7185 lasttagname = lastStartTag;
7186 }
7187
7188 insertToken = function(t, value, arg3, arg4) {
7189 flushText();
7190 switch(t) {
7191 case 1: // TEXT
7192 if (tokens.length > 0 &&
7193 tokens[tokens.length-1][0] === "Character") {
7194 tokens[tokens.length-1][1] += value;
7195 }
7196 else tokens.push(["Character", value]);
7197 break;
7198 case 4: // COMMENT
7199 tokens.push(["Comment", value]);
7200 break;
7201 case 5: // DOCTYPE
7202 tokens.push(["DOCTYPE", value,
7203 arg3 === undefined ? null : arg3,
7204 arg4 === undefined ? null : arg4,
7205 !force_quirks]);
7206 break;
7207 case 2: // TAG
7208 var attrs = Object.create(null);
7209 for(var i = 0; i < arg3.length; i++) {
7210 // XXX: does attribute order matter?
7211 var a = arg3[i];
7212 if (a.length === 1) {
7213 attrs[a[0]] = "";
7214 }
7215 else {
7216 attrs[a[0]] = a[1];
7217 }
7218 }
7219 var token = ["StartTag", value, attrs];
7220 if (arg4) token.push(true);
7221 tokens.push(token);
7222 break;
7223 case 3: // ENDTAG
7224 tokens.push(["EndTag", value]);
7225 break;
7226 case -1: // EOF
7227 break;
7228 }
7229 };
7230
7231 if (!charbychar) {
7232 this.parse(input, true);
7233 }
7234 else {
7235 for(var i = 0; i < input.length; i++) {
7236 this.parse(input[i]);
7237 }
7238 this.parse("", true);
7239 }
7240 return tokens;
7241 };
7242
7243 // Return the parser object from the HTMLParser() factory function
7244 return htmlparser;
7245}