1 | "use strict"
2 |
3 | const htmlparser = require("htmlparser2")
4 | const TransformableString = require("./TransformableString")
5 |
6 | function iterateScripts(code, options, onChunk) {
7 | if (!code) return
8 |
9 | const xmlMode = options.xmlMode
10 | const isJavaScriptMIMEType = options.isJavaScriptMIMEType || (() => true)
11 | let index = 0
12 | let inScript = false
13 | let cdata = []
14 |
15 | const chunks = []
16 | function pushChunk(type, end) {
17 | chunks.push({ type, start: index, end, cdata })
18 | cdata = []
19 | index = end
20 | }
21 |
22 | const parser = new htmlparser.Parser(
23 | {
24 | onopentag(name, attrs) {
25 |
26 | if (name !== "script") {
27 | return
28 | }
29 |
30 | if (attrs.type && !isJavaScriptMIMEType(attrs.type)) {
31 | return
32 | }
33 |
34 | if (attrs.src) {
35 | return
36 | }
37 |
38 | inScript = true
39 | pushChunk("html", parser.endIndex + 1)
40 | },
41 |
42 | oncdatastart() {
43 | cdata.push(
44 | {
45 | start: parser.startIndex,
46 | end: parser.startIndex + 9,
47 | },
48 | {
49 | start: parser.endIndex - 2,
50 | end: parser.endIndex + 1,
51 | }
52 | )
53 | },
54 |
55 | onclosetag(name) {
56 | if (name !== "script" || !inScript) {
57 | return
58 | }
59 |
60 | inScript = false
61 |
62 | if (parser.startIndex < chunks[chunks.length - 1].end) {
63 |
64 |
65 | return
66 | }
67 |
68 | pushChunk("script", parser.startIndex)
69 | },
70 |
71 | ontext() {
72 | if (!inScript) {
73 | return
74 | }
75 |
76 | pushChunk("script", parser.endIndex + 1)
77 | },
78 | },
79 | {
80 | xmlMode: xmlMode === true,
81 | }
82 | )
83 |
84 | parser.parseComplete(code)
85 |
86 | pushChunk("html", parser.endIndex + 1)
87 |
88 | {
89 | const emitChunk = () => {
90 | const cdata = []
91 | for (let i = startChunkIndex; i < index; i += 1) {
92 | cdata.push.apply(cdata, chunks[i].cdata)
93 | }
94 | onChunk({
95 | type: chunks[startChunkIndex].type,
96 | start: chunks[startChunkIndex].start,
97 | end: chunks[index - 1].end,
98 | cdata,
99 | })
100 | }
101 | let startChunkIndex = 0
102 | let index
103 | for (index = 1; index < chunks.length; index += 1) {
104 | if (chunks[startChunkIndex].type === chunks[index].type) continue
105 | emitChunk()
106 | startChunkIndex = index
107 | }
108 |
109 | emitChunk()
110 | }
111 | }
112 |
113 | function computeIndent(descriptor, previousHTML, slice) {
114 | if (!descriptor) {
115 | const indentMatch = /[\n\r]+([ \t]*)/.exec(slice)
116 | return indentMatch ? indentMatch[1] : ""
117 | }
118 |
119 | if (descriptor.relative) {
120 | return previousHTML.match(/([^\n\r]*)<[^<]*$/)[1] + descriptor.spaces
121 | }
122 |
123 | return descriptor.spaces
124 | }
125 |
126 | function* dedent(indent, slice) {
127 | let hadNonEmptyLine = false
128 | const re = /(\r\n|\n|\r)([ \t]*)(.*)/g
129 | let lastIndex = 0
130 |
131 | while (true) {
132 | const match = re.exec(slice)
133 | if (!match) break
134 |
135 | const newLine = match[1]
136 | const lineIndent = match[2]
137 | const lineText = match[3]
138 |
139 | const isEmptyLine = !lineText
140 | const isFirstNonEmptyLine = !isEmptyLine && !hadNonEmptyLine
141 |
142 | const badIndentation =
143 |
144 | isFirstNonEmptyLine
145 | ? indent !== lineIndent
146 | : lineIndent.indexOf(indent) !== 0
147 |
148 | if (!badIndentation) {
149 | lastIndex = match.index + newLine.length + indent.length
150 |
151 | const fromIndex = match.index === 0 ? 0 : match.index + newLine.length
152 | yield {
153 | type: "dedent",
154 | from: fromIndex,
155 | to: lastIndex,
156 | }
157 | } else if (isEmptyLine) {
158 | yield {
159 | type: "empty",
160 | }
161 | } else {
162 | yield {
163 | type: "bad-indent",
164 | }
165 | }
166 |
167 | if (!isEmptyLine) {
168 | hadNonEmptyLine = true
169 | }
170 | }
171 |
172 | const endSpaces = slice.slice(lastIndex).match(/[ \t]*$/)[0].length
173 | if (endSpaces) {
174 | yield {
175 | type: "dedent",
176 | from: slice.length - endSpaces,
177 | to: slice.length,
178 | }
179 | }
180 | }
181 |
182 | function extract(code, indentDescriptor, xmlMode, isJavaScriptMIMEType) {
183 | const badIndentationLines = []
184 | const codeParts = []
185 | let lineNumber = 1
186 | let previousHTML = ""
187 |
188 | iterateScripts(code, { xmlMode, isJavaScriptMIMEType }, (chunk) => {
189 | const slice = code.slice(chunk.start, chunk.end)
190 | if (chunk.type === "html") {
191 | const match = slice.match(/\r\n|\n|\r/g)
192 | if (match) lineNumber += match.length
193 | previousHTML = slice
194 | } else if (chunk.type === "script") {
195 | const transformedCode = new TransformableString(code)
196 | let indentSlice = slice
197 | for (const cdata of chunk.cdata) {
198 | transformedCode.replace(cdata.start, cdata.end, "")
199 | if (cdata.end === chunk.end) {
200 | indentSlice = code.slice(chunk.start, cdata.start)
201 | }
202 | }
203 | transformedCode.replace(0, chunk.start, "")
204 | transformedCode.replace(chunk.end, code.length, "")
205 | for (const action of dedent(
206 | computeIndent(indentDescriptor, previousHTML, indentSlice),
207 | indentSlice
208 | )) {
209 | lineNumber += 1
210 | if (action.type === "dedent") {
211 | transformedCode.replace(
212 | chunk.start + action.from,
213 | chunk.start + action.to,
214 | ""
215 | )
216 | } else if (action.type === "bad-indent") {
217 | badIndentationLines.push(lineNumber)
218 | }
219 | }
220 | codeParts.push(transformedCode)
221 | }
222 | })
223 |
224 | return {
225 | code: codeParts,
226 | badIndentationLines,
227 | hasBOM: code.startsWith("\uFEFF"),
228 | }
229 | }
230 |
231 | module.exports = extract