UNPKG

6.25 kBJavaScriptView Raw
1var htmlparser = require('htmlparser2');
2var Errors = require('./errors');
3var rLineSplit = /\r\n|\r|\n/;
4var rHasNonWhitespace = /\S/;
5
6/**
7 * Html file representation (needed for errors output).
8 *
9 * @name HtmlFile
10 * @param {Object} params
11 * @param {String} params.filename
12 * @param {String} params.source
13 */
14var HtmlFile = function(params) {
15 this._filename = params.filename;
16 this._lines = params.source.split(rLineSplit);
17};
18
19HtmlFile.prototype = {
20 /**
21 * Returns source filename for this object representation.
22 *
23 * @returns {String}
24 */
25 getFilename: function() {
26 return this._filename;
27 },
28
29 /**
30 * Returns array of source lines for the file.
31 *
32 * @returns {String[]}
33 */
34 getLines: function() {
35 return this._lines;
36 }
37};
38
39/**
40 * Parse html and retrieve script sources.
41 *
42 * @param {String} html
43 * @returns {Object[]}
44 */
45function getScripts(html) {
46 function onopen(name, attrs) {
47 // tag should be a <script>
48 if (name !== 'script' ||
49 // ignore scripts with src attribute
50 attrs.src ||
51 // script tag should has no type attribute or attribute should be equal to text/javascript
52 (attrs.type && attrs.type.toLowerCase() !== 'text/javascript')) {
53 return;
54 }
55
56 // store script content start pos
57 scriptStartPos = parser.endIndex + 1;
58 }
59
60 function onclose() {
61 if (!scriptStartPos) {
62 return;
63 }
64
65 // get script content
66 var scriptEndPos = parser.startIndex;
67 var source = html.substring(scriptStartPos, scriptEndPos);
68
69 // store script content only if it contains non-whitespace characters
70 if (rHasNonWhitespace.test(source)) {
71 scripts.push({
72 source: source,
73 start: scriptStartPos,
74 end: scriptEndPos
75 });
76 }
77
78 // reset script start position
79 scriptStartPos = 0;
80 }
81
82 var scriptStartPos = 0;
83 var scripts = [];
84 var parser = new htmlparser.Parser({
85 onopentag: onopen,
86 onclosetag: onclose
87 });
88
89 parser.parseComplete(html);
90
91 return scripts;
92}
93
94/**
95 * JavaScript in HTML usually shifted based on first JS line. For example
96 * if first line of fragment is offset by 4 spaces, each line in this
97 * fragment will have offset 4 to restore the original column.
98 * This function trim script source and normalize lines offset.
99 *
100 * @param {String} source
101 * @returns {Object[]}
102 */
103function normalizeSource(source) {
104 var lines = source.split(rLineSplit);
105 var lineCount = lines.length;
106 var tabOnlyOffset = false;
107 var spaceOnlyOffset = false;
108 var offset;
109
110 // remove first list if it's an empty string
111 // usually <script> starts with new line
112 if (!rHasNonWhitespace.test(lines[0])) {
113 lines.shift();
114 }
115
116 // replace last line by empty string if it contains only whitespaces
117 // it helps avoid disallowTrailingWhitespace errors on last line
118 if (!rHasNonWhitespace.test(lines[lines.length - 1])) {
119 lines[lines.length - 1] = '';
120 }
121
122 // calculate min line offset
123 offset = Math.min.apply(null, lines.map(function(line) {
124 // skip empty lines
125 if (!line) {
126 return Infinity;
127 }
128
129 // fetch whitespaces at the line beginning
130 var offsetStr = line.match(/^\s*/)[0];
131 var tabCount = offsetStr.match(/\t*/)[0].length;
132
133 if (offsetStr.length === line.length) {
134 return 0;
135 }
136
137 // mixed spaces and tabs in one offset -> don't remove offsets
138 if (tabCount && tabCount !== offsetStr.length) {
139 return 0;
140 }
141
142 if (tabCount) {
143 if (spaceOnlyOffset) {
144 // no spaces, but previous offset has ony spaces -> mixed spaces and tabs
145 return 0;
146 } else {
147 // remember offset contains only tabs
148 tabOnlyOffset = true;
149 }
150 } else {
151 if (tabOnlyOffset) {
152 // no tabs, but previous offset has only tabs -> mixed spaces and tabs
153 return 0;
154 } else {
155 // remember offset contains only spaces
156 spaceOnlyOffset = true;
157 }
158 }
159
160 return offsetStr.length;
161 }));
162
163 // remove common offsets if possible
164 if (offset) {
165 lines = lines.map(function(line) {
166 return line.substr(offset);
167 });
168 }
169
170 return {
171 source: lines.join('\n'),
172 offset: offset,
173 lineCount: lineCount
174 };
175}
176
177/**
178 * Parse HTML and search for <script> sources. Each script source also normalize
179 * by line offset. Result contains script sources with information about line
180 * offset (that was removed for each line) and lines count before script source.
181 * This information helps restore absolute positions in html file for errors.
182 *
183 * @param {String} filename
184 * @param {String} data
185 * @returns {Object[]}
186 */
187function extractJs(filename, data) {
188 var errors = new Errors(new HtmlFile({
189 filename: filename,
190 source: data
191 }));
192 var scripts = getScripts(data);
193 var sources = [];
194 var line = 1;
195 var lastHtmlPos = 0;
196
197 scripts.forEach(function(scriptInfo) {
198 // fetch script source and normalize it
199 var normalized = normalizeSource(scriptInfo.source);
200
201 // add line offset before script
202 line += data.substring(lastHtmlPos, scriptInfo.start).split(rLineSplit).length - 1;
203
204 sources.push({
205 source: normalized.source,
206 offset: normalized.offset,
207 line: line
208 });
209
210 // save offsets for next fragment
211 line += normalized.lineCount - 1;
212 lastHtmlPos = scriptInfo.end;
213 });
214
215 return {
216 sources: sources,
217 errors: errors,
218 addError: function(error) {
219 errors._errorList.push({
220 filename: filename,
221 rule: error.rule,
222 message: error.message,
223 line: error.line,
224 column: error.column
225 });
226 }
227 };
228}
229
230module.exports = extractJs;