1 | "use strict"
|
2 |
|
3 | const htmlparser = require("htmlparser2")
|
4 | const TransformableString = require("./TransformableString")
|
5 |
|
6 | function iterateScripts(code, options, onChunk) {
|
7 | if (!code) return
|
8 |
|
9 | const xmlMode = options.xmlMode
|
10 | const isJavaScriptMIMEType = options.isJavaScriptMIMEType || (() => true)
|
11 | let index = 0
|
12 | let inScript = false
|
13 | let cdata = []
|
14 |
|
15 | const chunks = []
|
16 | function pushChunk(type, end) {
|
17 | chunks.push({ type, start: index, end, cdata })
|
18 | cdata = []
|
19 | index = end
|
20 | }
|
21 |
|
22 | const parser = new htmlparser.Parser(
|
23 | {
|
24 | onopentag(name, attrs) {
|
25 |
|
26 | if (name !== "script") {
|
27 | return
|
28 | }
|
29 |
|
30 | if (attrs.type && !isJavaScriptMIMEType(attrs.type)) {
|
31 | return
|
32 | }
|
33 |
|
34 | if (attrs.src) {
|
35 | return
|
36 | }
|
37 |
|
38 | inScript = true
|
39 | pushChunk("html", parser.endIndex + 1)
|
40 | },
|
41 |
|
42 | oncdatastart() {
|
43 | cdata.push(
|
44 | {
|
45 | start: parser.startIndex,
|
46 | end: parser.startIndex + 9,
|
47 | },
|
48 | {
|
49 | start: parser.endIndex - 2,
|
50 | end: parser.endIndex + 1,
|
51 | }
|
52 | )
|
53 | },
|
54 |
|
55 | onclosetag(name) {
|
56 | if (name !== "script" || !inScript) {
|
57 | return
|
58 | }
|
59 |
|
60 | inScript = false
|
61 |
|
62 | if (parser.startIndex < chunks[chunks.length - 1].end) {
|
63 |
|
64 |
|
65 | return
|
66 | }
|
67 |
|
68 | pushChunk("script", parser.startIndex)
|
69 | },
|
70 |
|
71 | ontext() {
|
72 | if (!inScript) {
|
73 | return
|
74 | }
|
75 |
|
76 | pushChunk("script", parser.endIndex + 1)
|
77 | },
|
78 | },
|
79 | {
|
80 | xmlMode: xmlMode === true,
|
81 | }
|
82 | )
|
83 |
|
84 | parser.parseComplete(code)
|
85 |
|
86 | pushChunk("html", parser.endIndex + 1)
|
87 |
|
88 | {
|
89 | const emitChunk = () => {
|
90 | const cdata = []
|
91 | for (let i = startChunkIndex; i < index; i += 1) {
|
92 | cdata.push.apply(cdata, chunks[i].cdata)
|
93 | }
|
94 | onChunk({
|
95 | type: chunks[startChunkIndex].type,
|
96 | start: chunks[startChunkIndex].start,
|
97 | end: chunks[index - 1].end,
|
98 | cdata,
|
99 | })
|
100 | }
|
101 | let startChunkIndex = 0
|
102 | let index
|
103 | for (index = 1; index < chunks.length; index += 1) {
|
104 | if (chunks[startChunkIndex].type === chunks[index].type) continue
|
105 | emitChunk()
|
106 | startChunkIndex = index
|
107 | }
|
108 |
|
109 | emitChunk()
|
110 | }
|
111 | }
|
112 |
|
113 | function computeIndent(descriptor, previousHTML, slice) {
|
114 | if (!descriptor) {
|
115 | const indentMatch = /[\n\r]+([ \t]*)/.exec(slice)
|
116 | return indentMatch ? indentMatch[1] : ""
|
117 | }
|
118 |
|
119 | if (descriptor.relative) {
|
120 | return previousHTML.match(/([^\n\r]*)<[^<]*$/)[1] + descriptor.spaces
|
121 | }
|
122 |
|
123 | return descriptor.spaces
|
124 | }
|
125 |
|
126 | function* dedent(indent, slice) {
|
127 | let hadNonEmptyLine = false
|
128 | const re = /(\r\n|\n|\r)([ \t]*)(.*)/g
|
129 | let lastIndex = 0
|
130 |
|
131 | while (true) {
|
132 | const match = re.exec(slice)
|
133 | if (!match) break
|
134 |
|
135 | const newLine = match[1]
|
136 | const lineIndent = match[2]
|
137 | const lineText = match[3]
|
138 |
|
139 | const isEmptyLine = !lineText
|
140 | const isFirstNonEmptyLine = !isEmptyLine && !hadNonEmptyLine
|
141 |
|
142 | const badIndentation =
|
143 |
|
144 | isFirstNonEmptyLine
|
145 | ? indent !== lineIndent
|
146 | : lineIndent.indexOf(indent) !== 0
|
147 |
|
148 | if (!badIndentation) {
|
149 | lastIndex = match.index + newLine.length + indent.length
|
150 |
|
151 | const fromIndex = match.index === 0 ? 0 : match.index + newLine.length
|
152 | yield {
|
153 | type: "dedent",
|
154 | from: fromIndex,
|
155 | to: lastIndex,
|
156 | }
|
157 | } else if (isEmptyLine) {
|
158 | yield {
|
159 | type: "empty",
|
160 | }
|
161 | } else {
|
162 | yield {
|
163 | type: "bad-indent",
|
164 | }
|
165 | }
|
166 |
|
167 | if (!isEmptyLine) {
|
168 | hadNonEmptyLine = true
|
169 | }
|
170 | }
|
171 |
|
172 | const endSpaces = slice.slice(lastIndex).match(/[ \t]*$/)[0].length
|
173 | if (endSpaces) {
|
174 | yield {
|
175 | type: "dedent",
|
176 | from: slice.length - endSpaces,
|
177 | to: slice.length,
|
178 | }
|
179 | }
|
180 | }
|
181 |
|
182 | function extract(code, indentDescriptor, xmlMode, isJavaScriptMIMEType) {
|
183 | const badIndentationLines = []
|
184 | const codeParts = []
|
185 | let lineNumber = 1
|
186 | let previousHTML = ""
|
187 |
|
188 | iterateScripts(code, { xmlMode, isJavaScriptMIMEType }, (chunk) => {
|
189 | const slice = code.slice(chunk.start, chunk.end)
|
190 | if (chunk.type === "html") {
|
191 | const match = slice.match(/\r\n|\n|\r/g)
|
192 | if (match) lineNumber += match.length
|
193 | previousHTML = slice
|
194 | } else if (chunk.type === "script") {
|
195 | const transformedCode = new TransformableString(code)
|
196 | let indentSlice = slice
|
197 | for (const cdata of chunk.cdata) {
|
198 | transformedCode.replace(cdata.start, cdata.end, "")
|
199 | if (cdata.end === chunk.end) {
|
200 | indentSlice = code.slice(chunk.start, cdata.start)
|
201 | }
|
202 | }
|
203 | transformedCode.replace(0, chunk.start, "")
|
204 | transformedCode.replace(chunk.end, code.length, "")
|
205 | for (const action of dedent(
|
206 | computeIndent(indentDescriptor, previousHTML, indentSlice),
|
207 | indentSlice
|
208 | )) {
|
209 | lineNumber += 1
|
210 | if (action.type === "dedent") {
|
211 | transformedCode.replace(
|
212 | chunk.start + action.from,
|
213 | chunk.start + action.to,
|
214 | ""
|
215 | )
|
216 | } else if (action.type === "bad-indent") {
|
217 | badIndentationLines.push(lineNumber)
|
218 | }
|
219 | }
|
220 | codeParts.push(transformedCode)
|
221 | }
|
222 | })
|
223 |
|
224 | return {
|
225 | code: codeParts,
|
226 | badIndentationLines,
|
227 | hasBOM: code.startsWith("\uFEFF"),
|
228 | }
|
229 | }
|
230 |
|
231 | module.exports = extract
|