UNPKG

5.6 kBJavaScriptView Raw
1"use strict"
2
3const htmlparser = require("htmlparser2")
4const TransformableString = require("./TransformableString")
5
6function iterateScripts(code, options, onChunk) {
7 if (!code) return
8
9 const xmlMode = options.xmlMode
10 const isJavaScriptMIMEType = options.isJavaScriptMIMEType || (() => true)
11 let index = 0
12 let inScript = false
13 let cdata = []
14
15 const chunks = []
16 function pushChunk(type, end) {
17 chunks.push({ type, start: index, end, cdata })
18 cdata = []
19 index = end
20 }
21
22 const parser = new htmlparser.Parser(
23 {
24 onopentag(name, attrs) {
25 // Test if current tag is a valid <script> tag.
26 if (name !== "script") {
27 return
28 }
29
30 if (attrs.type && !isJavaScriptMIMEType(attrs.type)) {
31 return
32 }
33
34 if (attrs.src) {
35 return
36 }
37
38 inScript = true
39 pushChunk("html", parser.endIndex + 1)
40 },
41
42 oncdatastart() {
43 cdata.push(
44 {
45 start: parser.startIndex,
46 end: parser.startIndex + 9,
47 },
48 {
49 start: parser.endIndex - 2,
50 end: parser.endIndex + 1,
51 }
52 )
53 },
54
55 onclosetag(name) {
56 if (name !== "script" || !inScript) {
57 return
58 }
59
60 inScript = false
61
62 if (parser.startIndex < chunks[chunks.length - 1].end) {
63 // The parser didn't move its index after the previous chunk emited. It occurs on
64 // self-closing tags (xml mode). Just ignore this script.
65 return
66 }
67
68 pushChunk("script", parser.startIndex)
69 },
70
71 ontext() {
72 if (!inScript) {
73 return
74 }
75
76 pushChunk("script", parser.endIndex + 1)
77 },
78 },
79 {
80 xmlMode: xmlMode === true,
81 }
82 )
83
84 parser.parseComplete(code)
85
86 pushChunk("html", parser.endIndex + 1)
87
88 {
89 const emitChunk = () => {
90 const cdata = []
91 for (let i = startChunkIndex; i < index; i += 1) {
92 cdata.push.apply(cdata, chunks[i].cdata)
93 }
94 onChunk({
95 type: chunks[startChunkIndex].type,
96 start: chunks[startChunkIndex].start,
97 end: chunks[index - 1].end,
98 cdata,
99 })
100 }
101 let startChunkIndex = 0
102 let index
103 for (index = 1; index < chunks.length; index += 1) {
104 if (chunks[startChunkIndex].type === chunks[index].type) continue
105 emitChunk()
106 startChunkIndex = index
107 }
108
109 emitChunk()
110 }
111}
112
113function computeIndent(descriptor, previousHTML, slice) {
114 if (!descriptor) {
115 const indentMatch = /[\n\r]+([ \t]*)/.exec(slice)
116 return indentMatch ? indentMatch[1] : ""
117 }
118
119 if (descriptor.relative) {
120 return previousHTML.match(/([^\n\r]*)<[^<]*$/)[1] + descriptor.spaces
121 }
122
123 return descriptor.spaces
124}
125
126function* dedent(indent, slice) {
127 let hadNonEmptyLine = false
128 const re = /(\r\n|\n|\r)([ \t]*)(.*)/g
129 let lastIndex = 0
130
131 while (true) {
132 const match = re.exec(slice)
133 if (!match) break
134
135 const newLine = match[1]
136 const lineIndent = match[2]
137 const lineText = match[3]
138
139 const isEmptyLine = !lineText
140 const isFirstNonEmptyLine = !isEmptyLine && !hadNonEmptyLine
141
142 const badIndentation =
143 // Be stricter on the first line
144 isFirstNonEmptyLine
145 ? indent !== lineIndent
146 : lineIndent.indexOf(indent) !== 0
147
148 if (!badIndentation) {
149 lastIndex = match.index + newLine.length + indent.length
150 // Remove the first line if it is empty
151 const fromIndex = match.index === 0 ? 0 : match.index + newLine.length
152 yield {
153 type: "dedent",
154 from: fromIndex,
155 to: lastIndex,
156 }
157 } else if (isEmptyLine) {
158 yield {
159 type: "empty",
160 }
161 } else {
162 yield {
163 type: "bad-indent",
164 }
165 }
166
167 if (!isEmptyLine) {
168 hadNonEmptyLine = true
169 }
170 }
171
172 const endSpaces = slice.slice(lastIndex).match(/[ \t]*$/)[0].length
173 if (endSpaces) {
174 yield {
175 type: "dedent",
176 from: slice.length - endSpaces,
177 to: slice.length,
178 }
179 }
180}
181
182function extract(code, indentDescriptor, xmlMode, isJavaScriptMIMEType) {
183 const badIndentationLines = []
184 const codeParts = []
185 let lineNumber = 1
186 let previousHTML = ""
187
188 iterateScripts(code, { xmlMode, isJavaScriptMIMEType }, (chunk) => {
189 const slice = code.slice(chunk.start, chunk.end)
190 if (chunk.type === "html") {
191 const match = slice.match(/\r\n|\n|\r/g)
192 if (match) lineNumber += match.length
193 previousHTML = slice
194 } else if (chunk.type === "script") {
195 const transformedCode = new TransformableString(code)
196 let indentSlice = slice
197 for (const cdata of chunk.cdata) {
198 transformedCode.replace(cdata.start, cdata.end, "")
199 if (cdata.end === chunk.end) {
200 indentSlice = code.slice(chunk.start, cdata.start)
201 }
202 }
203 transformedCode.replace(0, chunk.start, "")
204 transformedCode.replace(chunk.end, code.length, "")
205 for (const action of dedent(
206 computeIndent(indentDescriptor, previousHTML, indentSlice),
207 indentSlice
208 )) {
209 lineNumber += 1
210 if (action.type === "dedent") {
211 transformedCode.replace(
212 chunk.start + action.from,
213 chunk.start + action.to,
214 ""
215 )
216 } else if (action.type === "bad-indent") {
217 badIndentationLines.push(lineNumber)
218 }
219 }
220 codeParts.push(transformedCode)
221 }
222 })
223
224 return {
225 code: codeParts,
226 badIndentationLines,
227 hasBOM: code.startsWith("\uFEFF"),
228 }
229}
230
231module.exports = extract