1 | var htmlparser = require('htmlparser2');
|
2 | var Errors = require('./errors');
|
3 | var rLineSplit = /\r\n|\r|\n/;
|
4 | var rHasNonWhitespace = /\S/;
|
5 |
|
6 | /**
|
7 | * Html file representation (needed for errors output).
|
8 | *
|
9 | * @name HtmlFile
|
10 | * @param {Object} params
|
11 | * @param {String} params.filename
|
12 | * @param {String} params.source
|
13 | */
|
14 | var HtmlFile = function(params) {
|
15 | this._filename = params.filename;
|
16 | this._lines = params.source.split(rLineSplit);
|
17 | };
|
18 |
|
19 | HtmlFile.prototype = {
|
20 | /**
|
21 | * Returns source filename for this object representation.
|
22 | *
|
23 | * @returns {String}
|
24 | */
|
25 | getFilename: function() {
|
26 | return this._filename;
|
27 | },
|
28 |
|
29 | /**
|
30 | * Returns array of source lines for the file.
|
31 | *
|
32 | * @returns {String[]}
|
33 | */
|
34 | getLines: function() {
|
35 | return this._lines;
|
36 | }
|
37 | };
|
38 |
|
39 | /**
|
40 | * Parse html and retrieve script sources.
|
41 | *
|
42 | * @param {String} html
|
43 | * @returns {Object[]}
|
44 | */
|
45 | function getScripts(html) {
|
46 | function onopen(name, attrs) {
|
47 | // tag should be a <script>
|
48 | if (name !== 'script' ||
|
49 | // ignore scripts with src attribute
|
50 | attrs.src ||
|
51 | // script tag should has no type attribute or attribute should be equal to text/javascript
|
52 | (attrs.type && attrs.type.toLowerCase() !== 'text/javascript')) {
|
53 | return;
|
54 | }
|
55 |
|
56 | // store script content start pos
|
57 | scriptStartPos = parser.endIndex + 1;
|
58 | }
|
59 |
|
60 | function onclose() {
|
61 | if (!scriptStartPos) {
|
62 | return;
|
63 | }
|
64 |
|
65 | // get script content
|
66 | var scriptEndPos = parser.startIndex;
|
67 | var source = html.substring(scriptStartPos, scriptEndPos);
|
68 |
|
69 | // store script content only if it contains non-whitespace characters
|
70 | if (rHasNonWhitespace.test(source)) {
|
71 | scripts.push({
|
72 | source: source,
|
73 | start: scriptStartPos,
|
74 | end: scriptEndPos
|
75 | });
|
76 | }
|
77 |
|
78 | // reset script start position
|
79 | scriptStartPos = 0;
|
80 | }
|
81 |
|
82 | var scriptStartPos = 0;
|
83 | var scripts = [];
|
84 | var parser = new htmlparser.Parser({
|
85 | onopentag: onopen,
|
86 | onclosetag: onclose
|
87 | });
|
88 |
|
89 | parser.parseComplete(html);
|
90 |
|
91 | return scripts;
|
92 | }
|
93 |
|
94 | /**
|
95 | * JavaScript in HTML usually shifted based on first JS line. For example
|
96 | * if first line of fragment is offset by 4 spaces, each line in this
|
97 | * fragment will have offset 4 to restore the original column.
|
98 | * This function trim script source and normalize lines offset.
|
99 | *
|
100 | * @param {String} source
|
101 | * @returns {Object[]}
|
102 | */
|
103 | function normalizeSource(source) {
|
104 | var lines = source.split(rLineSplit);
|
105 | var lineCount = lines.length;
|
106 | var tabOnlyOffset = false;
|
107 | var spaceOnlyOffset = false;
|
108 | var offset;
|
109 |
|
110 | // remove first list if it's an empty string
|
111 | // usually <script> starts with new line
|
112 | if (!rHasNonWhitespace.test(lines[0])) {
|
113 | lines.shift();
|
114 | }
|
115 |
|
116 | // replace last line by empty string if it contains only whitespaces
|
117 | // it helps avoid disallowTrailingWhitespace errors on last line
|
118 | if (!rHasNonWhitespace.test(lines[lines.length - 1])) {
|
119 | lines[lines.length - 1] = '';
|
120 | }
|
121 |
|
122 | // calculate min line offset
|
123 | offset = Math.min.apply(null, lines.map(function(line) {
|
124 | // skip empty lines
|
125 | if (!line) {
|
126 | return Infinity;
|
127 | }
|
128 |
|
129 | // fetch whitespaces at the line beginning
|
130 | var offsetStr = line.match(/^\s*/)[0];
|
131 | var tabCount = offsetStr.match(/\t*/)[0].length;
|
132 |
|
133 | if (offsetStr.length === line.length) {
|
134 | return 0;
|
135 | }
|
136 |
|
137 | // mixed spaces and tabs in one offset -> don't remove offsets
|
138 | if (tabCount && tabCount !== offsetStr.length) {
|
139 | return 0;
|
140 | }
|
141 |
|
142 | if (tabCount) {
|
143 | if (spaceOnlyOffset) {
|
144 | // no spaces, but previous offset has ony spaces -> mixed spaces and tabs
|
145 | return 0;
|
146 | } else {
|
147 | // remember offset contains only tabs
|
148 | tabOnlyOffset = true;
|
149 | }
|
150 | } else {
|
151 | if (tabOnlyOffset) {
|
152 | // no tabs, but previous offset has only tabs -> mixed spaces and tabs
|
153 | return 0;
|
154 | } else {
|
155 | // remember offset contains only spaces
|
156 | spaceOnlyOffset = true;
|
157 | }
|
158 | }
|
159 |
|
160 | return offsetStr.length;
|
161 | }));
|
162 |
|
163 | // remove common offsets if possible
|
164 | if (offset) {
|
165 | lines = lines.map(function(line) {
|
166 | return line.substr(offset);
|
167 | });
|
168 | }
|
169 |
|
170 | return {
|
171 | source: lines.join('\n'),
|
172 | offset: offset,
|
173 | lineCount: lineCount
|
174 | };
|
175 | }
|
176 |
|
177 | /**
|
178 | * Parse HTML and search for <script> sources. Each script source also normalize
|
179 | * by line offset. Result contains script sources with information about line
|
180 | * offset (that was removed for each line) and lines count before script source.
|
181 | * This information helps restore absolute positions in html file for errors.
|
182 | *
|
183 | * @param {String} filename
|
184 | * @param {String} data
|
185 | * @returns {Object[]}
|
186 | */
|
187 | function extractJs(filename, data) {
|
188 | var errors = new Errors(new HtmlFile({
|
189 | filename: filename,
|
190 | source: data
|
191 | }));
|
192 | var scripts = getScripts(data);
|
193 | var sources = [];
|
194 | var line = 1;
|
195 | var lastHtmlPos = 0;
|
196 |
|
197 | scripts.forEach(function(scriptInfo) {
|
198 | // fetch script source and normalize it
|
199 | var normalized = normalizeSource(scriptInfo.source);
|
200 |
|
201 | // add line offset before script
|
202 | line += data.substring(lastHtmlPos, scriptInfo.start).split(rLineSplit).length - 1;
|
203 |
|
204 | sources.push({
|
205 | source: normalized.source,
|
206 | offset: normalized.offset,
|
207 | line: line
|
208 | });
|
209 |
|
210 | // save offsets for next fragment
|
211 | line += normalized.lineCount - 1;
|
212 | lastHtmlPos = scriptInfo.end;
|
213 | });
|
214 |
|
215 | return {
|
216 | sources: sources,
|
217 | errors: errors,
|
218 | addError: function(error) {
|
219 | errors._errorList.push({
|
220 | filename: filename,
|
221 | rule: error.rule,
|
222 | message: error.message,
|
223 | line: error.line,
|
224 | column: error.column
|
225 | });
|
226 | }
|
227 | };
|
228 | }
|
229 |
|
230 | module.exports = extractJs;
|