UNPKG

jscs/lib/extract-js.js

Version:

6.25 kBJavaScriptView Raw

1var htmlparser = require('htmlparser2');
2var Errors = require('./errors');
3var rLineSplit = /\r\n|\r|\n/;
4var rHasNonWhitespace = /\S/;
5
6/**
* Html file representation (needed for errors output).
*
* @name HtmlFile
* @param {Object} params
* @param {String} params.filename
* @param {String} params.source
*/
14var HtmlFile = function(params) {
  this._filename = params.filename;
  this._lines = params.source.split(rLineSplit);
17};
18
19HtmlFile.prototype = {
  /**
   * Returns source filename for this object representation.
   *
   * @returns {String}
   */
  getFilename: function() {
      return this._filename;
  },
28
  /**
   * Returns array of source lines for the file.
   *
   * @returns {String[]}
   */
  getLines: function() {
      return this._lines;
  }
37};
38
39/**
* Parse html and retrieve script sources.
*
* @param {String} html
* @returns {Object[]}
*/
45function getScripts(html) {
  function onopen(name, attrs) {
      // tag should be a <script>
      if (name !== 'script' ||
          // ignore scripts with src attribute
          attrs.src ||
          // script tag should has no type attribute or attribute should be equal to text/javascript
          (attrs.type && attrs.type.toLowerCase() !== 'text/javascript')) {
          return;
      }
55
      // store script content start pos
      scriptStartPos = parser.endIndex + 1;
  }
59
  function onclose() {
      if (!scriptStartPos) {
          return;
      }
64
      // get script content
      var scriptEndPos = parser.startIndex;
      var source = html.substring(scriptStartPos, scriptEndPos);
68
      // store script content only if it contains non-whitespace characters
      if (rHasNonWhitespace.test(source)) {
          scripts.push({
              source: source,
              start: scriptStartPos,
              end: scriptEndPos
          });
      }
77
      // reset script start position
      scriptStartPos = 0;
  }
81
  var scriptStartPos = 0;
  var scripts = [];
  var parser = new htmlparser.Parser({
      onopentag: onopen,
      onclosetag: onclose
  });
88
  parser.parseComplete(html);
90
  return scripts;
92}
93
94/**
* JavaScript in HTML usually shifted based on first JS line. For example
* if first line of fragment is offset by 4 spaces, each line in this
* fragment will have offset 4 to restore the original column.
* This function trim script source and normalize lines offset.
*
* @param {String} source
* @returns {Object[]}
*/
103function normalizeSource(source) {
  var lines = source.split(rLineSplit);
  var lineCount = lines.length;
  var tabOnlyOffset = false;
  var spaceOnlyOffset = false;
  var offset;
109
  // remove first list if it's an empty string
  // usually <script> starts with new line
  if (!rHasNonWhitespace.test(lines[0])) {
      lines.shift();
  }
115
  // replace last line by empty string if it contains only whitespaces
  // it helps avoid disallowTrailingWhitespace errors on last line
  if (!rHasNonWhitespace.test(lines[lines.length - 1])) {
      lines[lines.length - 1] = '';
  }
121
  // calculate min line offset
  offset = Math.min.apply(null, lines.map(function(line) {
      // skip empty lines
      if (!line) {
          return Infinity;
      }
128
      // fetch whitespaces at the line beginning
      var offsetStr = line.match(/^\s*/)[0];
      var tabCount = offsetStr.match(/\t*/)[0].length;
132
      if (offsetStr.length === line.length) {
          return 0;
      }
136
      // mixed spaces and tabs in one offset -> don't remove offsets
      if (tabCount && tabCount !== offsetStr.length) {
          return 0;
      }
141
      if (tabCount) {
          if (spaceOnlyOffset) {
              // no spaces, but previous offset has ony spaces -> mixed spaces and tabs
              return 0;
          } else {
              // remember offset contains only tabs
              tabOnlyOffset = true;
          }
      } else {
          if (tabOnlyOffset) {
              // no tabs, but previous offset has only tabs -> mixed spaces and tabs
              return 0;
          } else {
              // remember offset contains only spaces
              spaceOnlyOffset = true;
          }
      }
159
      return offsetStr.length;
  }));
162
  // remove common offsets if possible
  if (offset) {
      lines = lines.map(function(line) {
          return line.substr(offset);
      });
  }
169
  return {
      source: lines.join('\n'),
      offset: offset,
      lineCount: lineCount
  };
175}
176
177/**
* Parse HTML and search for <script> sources. Each script source also normalize
* by line offset. Result contains script sources with information about line
* offset (that was removed for each line) and lines count before script source.
* This information helps restore absolute positions in html file for errors.
*
* @param {String} filename
* @param {String} data
* @returns {Object[]}
*/
187function extractJs(filename, data) {
  var errors = new Errors(new HtmlFile({
      filename: filename,
      source: data
  }));
  var scripts = getScripts(data);
  var sources = [];
  var line = 1;
  var lastHtmlPos = 0;
196
  scripts.forEach(function(scriptInfo) {
      // fetch script source and normalize it
      var normalized = normalizeSource(scriptInfo.source);
200
      // add line offset before script
      line += data.substring(lastHtmlPos, scriptInfo.start).split(rLineSplit).length - 1;
203
      sources.push({
          source: normalized.source,
          offset: normalized.offset,
          line: line
      });
209
      // save offsets for next fragment
      line += normalized.lineCount - 1;
      lastHtmlPos = scriptInfo.end;
  });
214
  return {
      sources: sources,
      errors: errors,
      addError: function(error) {
          errors._errorList.push({
              filename: filename,
              rule: error.rule,
              message: error.message,
              line: error.line,
              column: error.column
          });
      }
  };
228}
229
230module.exports = extractJs;

1	`var htmlparser = require('htmlparser2');`
2	`var Errors = require('./errors');`
3	`var rLineSplit = /\r\n\|\r\|\n/;`
4	`var rHasNonWhitespace = /\S/;`
5
6	`/**`
7	`* Html file representation (needed for errors output).`
8	`*`
9	`* @name HtmlFile`
10	`* @param {Object} params`
11	`* @param {String} params.filename`
12	`* @param {String} params.source`
13	`*/`
14	`var HtmlFile = function(params) {`
15	`this._filename = params.filename;`
16	`this._lines = params.source.split(rLineSplit);`
17	`};`
18
19	`HtmlFile.prototype = {`
20	`/**`
21	`* Returns source filename for this object representation.`
22	`*`
23	`* @returns {String}`
24	`*/`
25	`getFilename: function() {`
26	`return this._filename;`
27	`},`
28
29	`/**`
30	`* Returns array of source lines for the file.`
31	`*`
32	`* @returns {String[]}`
33	`*/`
34	`getLines: function() {`
35	`return this._lines;`
36	`}`
37	`};`
38
39	`/**`
40	`* Parse html and retrieve script sources.`
41	`*`
42	`* @param {String} html`
43	`* @returns {Object[]}`
44	`*/`
45	`function getScripts(html) {`
46	`function onopen(name, attrs) {`
47	`// tag should be a <script>`
48	`if (name !== 'script' \|\|`
49	`// ignore scripts with src attribute`
50	`attrs.src \|\|`
51	`// script tag should has no type attribute or attribute should be equal to text/javascript`
52	`(attrs.type && attrs.type.toLowerCase() !== 'text/javascript')) {`
53	`return;`
54	`}`
55
56	`// store script content start pos`
57	`scriptStartPos = parser.endIndex + 1;`
58	`}`
59
60	`function onclose() {`
61	`if (!scriptStartPos) {`
62	`return;`
63	`}`
64
65	`// get script content`
66	`var scriptEndPos = parser.startIndex;`
67	`var source = html.substring(scriptStartPos, scriptEndPos);`
68
69	`// store script content only if it contains non-whitespace characters`
70	`if (rHasNonWhitespace.test(source)) {`
71	`scripts.push({`
72	`source: source,`
73	`start: scriptStartPos,`
74	`end: scriptEndPos`
75	`});`
76	`}`
77
78	`// reset script start position`
79	`scriptStartPos = 0;`
80	`}`
81
82	`var scriptStartPos = 0;`
83	`var scripts = [];`
84	`var parser = new htmlparser.Parser({`
85	`onopentag: onopen,`
86	`onclosetag: onclose`
87	`});`
88
89	`parser.parseComplete(html);`
90
91	`return scripts;`
92	`}`
93
94	`/**`
95	`* JavaScript in HTML usually shifted based on first JS line. For example`
96	`* if first line of fragment is offset by 4 spaces, each line in this`
97	`* fragment will have offset 4 to restore the original column.`
98	`* This function trim script source and normalize lines offset.`
99	`*`
100	`* @param {String} source`
101	`* @returns {Object[]}`
102	`*/`
103	`function normalizeSource(source) {`
104	`var lines = source.split(rLineSplit);`
105	`var lineCount = lines.length;`
106	`var tabOnlyOffset = false;`
107	`var spaceOnlyOffset = false;`
108	`var offset;`
109
110	`// remove first list if it's an empty string`
111	`// usually <script> starts with new line`
112	`if (!rHasNonWhitespace.test(lines[0])) {`
113	`lines.shift();`
114	`}`
115
116	`// replace last line by empty string if it contains only whitespaces`
117	`// it helps avoid disallowTrailingWhitespace errors on last line`
118	`if (!rHasNonWhitespace.test(lines[lines.length - 1])) {`
119	`lines[lines.length - 1] = '';`
120	`}`
121
122	`// calculate min line offset`
123	`offset = Math.min.apply(null, lines.map(function(line) {`
124	`// skip empty lines`
125	`if (!line) {`
126	`return Infinity;`
127	`}`
128
129	`// fetch whitespaces at the line beginning`
130	`var offsetStr = line.match(/^\s*/)[0];`
131	`var tabCount = offsetStr.match(/\t*/)[0].length;`
132
133	`if (offsetStr.length === line.length) {`
134	`return 0;`
135	`}`
136
137	`// mixed spaces and tabs in one offset -> don't remove offsets`
138	`if (tabCount && tabCount !== offsetStr.length) {`
139	`return 0;`
140	`}`
141
142	`if (tabCount) {`
143	`if (spaceOnlyOffset) {`
144	`// no spaces, but previous offset has ony spaces -> mixed spaces and tabs`
145	`return 0;`
146	`} else {`
147	`// remember offset contains only tabs`
148	`tabOnlyOffset = true;`
149	`}`
150	`} else {`
151	`if (tabOnlyOffset) {`
152	`// no tabs, but previous offset has only tabs -> mixed spaces and tabs`
153	`return 0;`
154	`} else {`
155	`// remember offset contains only spaces`
156	`spaceOnlyOffset = true;`
157	`}`
158	`}`
159
160	`return offsetStr.length;`
161	`}));`
162
163	`// remove common offsets if possible`
164	`if (offset) {`
165	`lines = lines.map(function(line) {`
166	`return line.substr(offset);`
167	`});`
168	`}`
169
170	`return {`
171	`source: lines.join('\n'),`
172	`offset: offset,`
173	`lineCount: lineCount`
174	`};`
175	`}`
176
177	`/**`
178	`* Parse HTML and search for <script> sources. Each script source also normalize`
179	`* by line offset. Result contains script sources with information about line`
180	`* offset (that was removed for each line) and lines count before script source.`
181	`* This information helps restore absolute positions in html file for errors.`
182	`*`
183	`* @param {String} filename`
184	`* @param {String} data`
185	`* @returns {Object[]}`
186	`*/`
187	`function extractJs(filename, data) {`
188	`var errors = new Errors(new HtmlFile({`
189	`filename: filename,`
190	`source: data`
191	`}));`
192	`var scripts = getScripts(data);`
193	`var sources = [];`
194	`var line = 1;`
195	`var lastHtmlPos = 0;`
196
197	`scripts.forEach(function(scriptInfo) {`
198	`// fetch script source and normalize it`
199	`var normalized = normalizeSource(scriptInfo.source);`
200
201	`// add line offset before script`
202	`line += data.substring(lastHtmlPos, scriptInfo.start).split(rLineSplit).length - 1;`
203
204	`sources.push({`
205	`source: normalized.source,`
206	`offset: normalized.offset,`
207	`line: line`
208	`});`
209
210	`// save offsets for next fragment`
211	`line += normalized.lineCount - 1;`
212	`lastHtmlPos = scriptInfo.end;`
213	`});`
214
215	`return {`
216	`sources: sources,`
217	`errors: errors,`
218	`addError: function(error) {`
219	`errors._errorList.push({`
220	`filename: filename,`
221	`rule: error.rule,`
222	`message: error.message,`
223	`line: error.line,`
224	`column: error.column`
225	`});`
226	`}`
227	`};`
228	`}`
229
230	`module.exports = extractJs;`