1 | var Tree = require("./htmltree");
|
2 | var Fatal = require("./fatal");
|
3 |
|
4 |
|
5 | var VOID_ELEMENTS = ["area", "base", "br", "col", "command", "embed", "hr", "img", "input", "keygen", "link", "meta", "param", "source", "track", "wbr"];
|
6 |
|
7 | var rxEntity = /^&(#[0-9]+|[a-zA-Z]+);/;
|
8 | var rxStartTag = /^<([a-zA-Z_$][a-zA-Z0-9$_:\.-]*)/;
|
9 | var rxAttrib = /^[ \t\n\r]*([a-zA-Z_$][a-zA-Z0-9$_:-]*)([ \t\n\r]*=[ \t\n\r]*)?/;
|
10 | var rxEndTag = /^[ \t\n\r]*>/;
|
11 | var rxAutoCloseTag = /^[ \t\n\r]*\/>/;
|
12 | var rxCloseTag = /^<\/([a-zA-Z_$][a-zA-Z0-9$_:-]*)>/;
|
13 | var rxComment = /^<\!--(.+)-->/;
|
14 | var rxDocType = /^<\!([a-zA-Z_$][a-zA-Z0-9$_:-]*)/;
|
15 | var rxStartProcessing = /^<\?([a-zA-Z_$][a-zA-Z0-9$_:-]*)/;
|
16 | var rxEndProcessing = /^\?>/;
|
17 |
|
18 |
|
19 | function parse(content) {
|
20 | var cursor = 0,
|
21 | index = 0,
|
22 | node,
|
23 | root = {children: []},
|
24 | stack = [root];
|
25 |
|
26 | function append(node) {
|
27 | try {
|
28 | if (node.type == Tree.TEXT &&
|
29 | root.children.length > 0 &&
|
30 | root.children[root.children.length - 1].type == Tree.TEXT)
|
31 | {
|
32 | root.children[root.children - 1].text += node.text;
|
33 | } else {
|
34 | root.children.push(node);
|
35 | }
|
36 | }
|
37 | catch( ex ) {
|
38 |
|
39 | throw( ex );
|
40 | }
|
41 | }
|
42 | function flushText() {
|
43 | var text = content.substr(cursor, index - cursor);
|
44 | if (text.length > 0) {
|
45 | append({type: Tree.TEXT, text: text});
|
46 | }
|
47 | cursor = index;
|
48 | }
|
49 | |
50 |
|
51 |
|
52 |
|
53 |
|
54 | function match() {
|
55 | var i, rule, rx, m, lastIndex = index;
|
56 | for (i = 0 ; i < arguments.length ; i++) {
|
57 | rule = arguments[i];
|
58 | rx = rule[0];
|
59 | m = rx.exec(content.substr(index));
|
60 | if (m) {
|
61 | if (rule[1](m)) {
|
62 | return true;
|
63 | }
|
64 | index = lastIndex;
|
65 | }
|
66 | }
|
67 | return false;
|
68 | }
|
69 |
|
70 | function parseAttribs(node) {
|
71 | while (match([rxAttrib, function (m) {
|
72 | var name = m[1];
|
73 | var value = null;
|
74 | index += m[0].length;
|
75 | if (m[2]) {
|
76 |
|
77 | var quote = content.charAt(index),
|
78 | c;
|
79 | if (quote == '"' || quote == "'") {
|
80 | value = "";
|
81 | index++;
|
82 | while (index < content.length) {
|
83 | c = content.charAt(index);
|
84 | if (c == quote) {
|
85 | index++;
|
86 | break;
|
87 | }
|
88 | if (c == '\\') {
|
89 | index++;
|
90 | c = content.charAt(index);
|
91 | if (index >= content.length) break;
|
92 | }
|
93 | value += c;
|
94 | index++;
|
95 | }
|
96 | }
|
97 | }
|
98 | node.attribs[name] = value;
|
99 | return true;
|
100 | }]));
|
101 | }
|
102 | try {
|
103 | while (index < content.length) {
|
104 | if (!match(
|
105 | [rxStartTag, function (m) {
|
106 | flushText();
|
107 | node = {type: Tree.TAG, name: m[1], attribs: {}, children: [], pos: index};
|
108 | index += m[0].length;
|
109 | parseAttribs(node);
|
110 | return match(
|
111 | [rxAutoCloseTag, function (m) {
|
112 | node.autoclose = true;
|
113 | root.children.push(node);
|
114 | index += m[0].length;
|
115 | cursor = index;
|
116 | return true;
|
117 | }],
|
118 | [rxEndTag, function (m) {
|
119 | if (VOID_ELEMENTS.indexOf(node.name.toLowerCase()) > -1) {
|
120 |
|
121 | node.void = true;
|
122 | root.children.push(node);
|
123 | index += m[0].length;
|
124 | cursor = index;
|
125 | return true;
|
126 | } else {
|
127 | root.children.push(node);
|
128 | stack.push(node);
|
129 | root = node;
|
130 | index += m[0].length;
|
131 | cursor = index;
|
132 | return true;
|
133 | }
|
134 | }]
|
135 | );
|
136 | }],
|
137 | [rxCloseTag, function (m) {
|
138 | if (stack.length == 1) {
|
139 | throw {msg: "Unexpected closing tag " + m[0] + "!", pos: index};
|
140 | }
|
141 | if (root.name != m[1]) {
|
142 | throw {msg: "Invalid closing tag " + m[0] + ", expected </"
|
143 | + root.name + ">!", pos: index};
|
144 | }
|
145 | flushText();
|
146 | stack.pop();
|
147 | root = stack[stack.length - 1];
|
148 | index += m[0].length;
|
149 | cursor = index;
|
150 | return true;
|
151 | }],
|
152 | [rxEntity, function (m) {
|
153 |
|
154 | flushText();
|
155 | append({type: Tree.ENTITY, text: m[0], pos: index});
|
156 | cursor = index = index + m[0].length;
|
157 | return true;
|
158 | }],
|
159 | [rxComment, function (m) {
|
160 | flushText();
|
161 | append({type: Tree.COMMENT, text: m[1], pos: index});
|
162 | index += m[0].length;
|
163 | cursor = index;
|
164 | return true;
|
165 | }],
|
166 | [rxDocType, function (m) {
|
167 | flushText();
|
168 | node = {type: Tree.DOCTYPE, name: m[1], attribs: {}, pos: index};
|
169 | index += m[0].length;
|
170 | parseAttribs(node);
|
171 | return match([rxEndTag, function (m) {
|
172 | append(node);
|
173 | index += m[0].length;
|
174 | cursor = index;
|
175 | return true;
|
176 | }]);
|
177 | }],
|
178 | [rxStartProcessing, function (m) {
|
179 | flushText();
|
180 | node = {type: Tree.PROCESSING, name: m[1], attribs: {}, pos: index};
|
181 | index += m[0].length;
|
182 | parseAttribs(node);
|
183 | return match([rxEndProcessing, function (m) {
|
184 | append(node);
|
185 | index += m[0].length;
|
186 | cursor = index;
|
187 | return true;
|
188 | }]);
|
189 | }]
|
190 | ))
|
191 | {
|
192 | index++;
|
193 | }
|
194 | }
|
195 | flushText();
|
196 | }
|
197 | catch (ex) {
|
198 | if (typeof ex.pos !== 'undefined') {
|
199 | Fatal.fire(ex.msg + "\n\n" + Fatal.extractCodeAtPos(content, ex.pos));
|
200 | } else {
|
201 | Fatal.bubble(ex);
|
202 | }
|
203 | }
|
204 | return stack[0];
|
205 | }
|
206 |
|
207 |
|
208 | exports.parse = parse;
|