UNPKG

7.58 kBJavaScriptView Raw
1var Tree = require("./htmltree");
2var Fatal = require("./fatal");
3
4// Void elements do not have any children.
5var VOID_ELEMENTS = ["area", "base", "br", "col", "command", "embed", "hr", "img", "input", "keygen", "link", "meta", "param", "source", "track", "wbr"];
6
7var rxEntity = /^&(#[0-9]+|[a-zA-Z]+);/;
8var rxStartTag = /^<([a-zA-Z_$][a-zA-Z0-9$_:\.-]*)/;
9var rxAttrib = /^[ \t\n\r]*([a-zA-Z_$][a-zA-Z0-9$_:-]*)([ \t\n\r]*=[ \t\n\r]*)?/;
10var rxEndTag = /^[ \t\n\r]*>/;
11var rxAutoCloseTag = /^[ \t\n\r]*\/>/;
12var rxCloseTag = /^<\/([a-zA-Z_$][a-zA-Z0-9$_:-]*)>/;
13var rxComment = /^<\!--(.+)-->/;
14var rxDocType = /^<\!([a-zA-Z_$][a-zA-Z0-9$_:-]*)/;
15var rxStartProcessing = /^<\?([a-zA-Z_$][a-zA-Z0-9$_:-]*)/;
16var rxEndProcessing = /^\?>/;
17
18
19function parse(content) {
20 var cursor = 0,
21 index = 0,
22 node,
23 root = {children: []},
24 stack = [root];
25
26 function append(node) {
27 try {
28 if (node.type == Tree.TEXT &&
29 root.children.length > 0 &&
30 root.children[root.children.length - 1].type == Tree.TEXT)
31 {
32 root.children[root.children - 1].text += node.text;
33 } else {
34 root.children.push(node);
35 }
36 }
37 catch( ex ) {
38 //console.info("[tlk-htmlparser] node=...", node);
39 throw( ex );
40 }
41 }
42 function flushText() {
43 var text = content.substr(cursor, index - cursor);
44 if (text.length > 0) {
45 append({type: Tree.TEXT, text: text});
46 }
47 cursor = index;
48 }
49 /**
50 * @param {array...} rules Each rule is an array with two elements:
51 * a regular expression and a function to call if that regexp
52 * matches.
53 */
54 function match() {
55 var i, rule, rx, m, lastIndex = index;
56 for (i = 0 ; i < arguments.length ; i++) {
57 rule = arguments[i];
58 rx = rule[0];
59 m = rx.exec(content.substr(index));
60 if (m) {
61 if (rule[1](m)) {
62 return true;
63 }
64 index = lastIndex;
65 }
66 }
67 return false;
68 }
69
70 function parseAttribs(node) {
71 while (match([rxAttrib, function (m) {
72 var name = m[1];
73 var value = null;
74 index += m[0].length;
75 if (m[2]) {
76 // There is a value between single or double quotes.
77 var quote = content.charAt(index),
78 c;
79 if (quote == '"' || quote == "'") {
80 value = "";
81 index++;
82 while (index < content.length) {
83 c = content.charAt(index);
84 if (c == quote) {
85 index++;
86 break;
87 }
88 if (c == '\\') {
89 index++;
90 c = content.charAt(index);
91 if (index >= content.length) break;
92 }
93 value += c;
94 index++;
95 }
96 }
97 }
98 node.attribs[name] = value;
99 return true;
100 }]));
101 }
102 try {
103 while (index < content.length) {
104 if (!match(
105 [rxStartTag, function (m) {
106 flushText();
107 node = {type: Tree.TAG, name: m[1], attribs: {}, children: [], pos: index};
108 index += m[0].length;
109 parseAttribs(node);
110 return match(
111 [rxAutoCloseTag, function (m) {
112 node.autoclose = true;
113 root.children.push(node);
114 index += m[0].length;
115 cursor = index;
116 return true;
117 }],
118 [rxEndTag, function (m) {
119 if (VOID_ELEMENTS.indexOf(node.name.toLowerCase()) > -1) {
120 // Void elements have no children and do not need any closing syntax.
121 node.void = true;
122 root.children.push(node);
123 index += m[0].length;
124 cursor = index;
125 return true;
126 } else {
127 root.children.push(node);
128 stack.push(node);
129 root = node;
130 index += m[0].length;
131 cursor = index;
132 return true;
133 }
134 }]
135 );
136 }],
137 [rxCloseTag, function (m) {
138 if (stack.length == 1) {
139 throw {msg: "Unexpected closing tag " + m[0] + "!", pos: index};
140 }
141 if (root.name != m[1]) {
142 throw {msg: "Invalid closing tag " + m[0] + ", expected </"
143 + root.name + ">!", pos: index};
144 }
145 flushText();
146 stack.pop();
147 root = stack[stack.length - 1];
148 index += m[0].length;
149 cursor = index;
150 return true;
151 }],
152 [rxEntity, function (m) {
153 // This is an HTML entity.
154 flushText();
155 append({type: Tree.ENTITY, text: m[0], pos: index});
156 cursor = index = index + m[0].length;
157 return true;
158 }],
159 [rxComment, function (m) {
160 flushText();
161 append({type: Tree.COMMENT, text: m[1], pos: index});
162 index += m[0].length;
163 cursor = index;
164 return true;
165 }],
166 [rxDocType, function (m) {
167 flushText();
168 node = {type: Tree.DOCTYPE, name: m[1], attribs: {}, pos: index};
169 index += m[0].length;
170 parseAttribs(node);
171 return match([rxEndTag, function (m) {
172 append(node);
173 index += m[0].length;
174 cursor = index;
175 return true;
176 }]);
177 }],
178 [rxStartProcessing, function (m) {
179 flushText();
180 node = {type: Tree.PROCESSING, name: m[1], attribs: {}, pos: index};
181 index += m[0].length;
182 parseAttribs(node);
183 return match([rxEndProcessing, function (m) {
184 append(node);
185 index += m[0].length;
186 cursor = index;
187 return true;
188 }]);
189 }]
190 ))
191 {
192 index++;
193 }
194 }
195 flushText();
196 }
197 catch (ex) {
198 if (typeof ex.pos !== 'undefined') {
199 Fatal.fire(ex.msg + "\n\n" + Fatal.extractCodeAtPos(content, ex.pos));
200 } else {
201 Fatal.bubble(ex);
202 }
203 }
204 return stack[0];
205}
206
207
208exports.parse = parse;