1 | "use strict";
|
2 |
|
3 |
|
4 |
|
5 |
|
6 |
|
7 |
|
8 |
|
9 |
|
10 |
|
11 |
|
12 |
|
13 |
|
14 |
|
15 |
|
16 |
|
17 |
|
18 |
|
19 |
|
20 |
|
21 |
|
22 | var __decorate = (this && this.__decorate) || function (decorators, target, key, desc) {
|
23 | var c = arguments.length, r = c < 3 ? target : desc === null ? desc = Object.getOwnPropertyDescriptor(target, key) : desc, d;
|
24 | if (typeof Reflect === "object" && typeof Reflect.decorate === "function") r = Reflect.decorate(decorators, target, key, desc);
|
25 | else for (var i = decorators.length - 1; i >= 0; i--) if (d = decorators[i]) r = (c < 3 ? d(r) : c > 3 ? d(target, key, r) : d(target, key)) || r;
|
26 | return c > 3 && r && Object.defineProperty(target, key, r), r;
|
27 | };
|
28 | Object.defineProperty(exports, "__esModule", { value: true });
|
29 | exports.EncodingService = void 0;
|
30 |
|
31 | const iconv = require("iconv-lite");
|
32 | const safer_buffer_1 = require("safer-buffer");
|
33 | const inversify_1 = require("inversify");
|
34 | const buffer_1 = require("./buffer");
|
35 | const encodings_1 = require("./encodings");
|
36 | const stream_1 = require("./stream");
|
37 | const ZERO_BYTE_DETECTION_BUFFER_MAX_LEN = 512;
|
38 | const NO_ENCODING_GUESS_MIN_BYTES = 512;
|
39 | const AUTO_ENCODING_GUESS_MIN_BYTES = 512 * 8;
|
40 | const AUTO_ENCODING_GUESS_MAX_BYTES = 512 * 128;
|
41 |
|
42 |
|
43 |
|
44 |
|
45 |
|
46 | const IGNORE_ENCODINGS = ['ascii', 'utf-16', 'utf-32'];
|
47 | let EncodingService = class EncodingService {
|
48 | encode(value, options) {
|
49 | let encoding = options === null || options === void 0 ? void 0 : options.encoding;
|
50 | const addBOM = options === null || options === void 0 ? void 0 : options.hasBOM;
|
51 | encoding = this.toIconvEncoding(encoding);
|
52 | if (encoding === encodings_1.UTF8 && !addBOM) {
|
53 | return buffer_1.BinaryBuffer.fromString(value);
|
54 | }
|
55 | const buffer = iconv.encode(value, encoding, { addBOM });
|
56 | return buffer_1.BinaryBuffer.wrap(buffer);
|
57 | }
|
58 | decode(value, encoding) {
|
59 | const buffer = safer_buffer_1.Buffer.from(value.buffer);
|
60 | encoding = this.toIconvEncoding(encoding);
|
61 | return iconv.decode(buffer, encoding);
|
62 | }
|
63 | exists(encoding) {
|
64 | encoding = this.toIconvEncoding(encoding);
|
65 | return iconv.encodingExists(encoding);
|
66 | }
|
67 | toIconvEncoding(encoding) {
|
68 | if (encoding === encodings_1.UTF8_with_bom || !encoding) {
|
69 | return encodings_1.UTF8;
|
70 | }
|
71 | return encoding;
|
72 | }
|
73 | async toResourceEncoding(encoding, options) {
|
74 |
|
75 | if (encoding === encodings_1.UTF16be || encoding === encodings_1.UTF16le || encoding === encodings_1.UTF8_with_bom) {
|
76 | return { encoding, hasBOM: true };
|
77 | }
|
78 |
|
79 |
|
80 | const overwriteEncoding = options === null || options === void 0 ? void 0 : options.overwriteEncoding;
|
81 | if (!overwriteEncoding && encoding === encodings_1.UTF8) {
|
82 | try {
|
83 |
|
84 | const buffer = await options.read(encodings_1.UTF8_BOM.length);
|
85 | if (this.detectEncodingByBOMFromBuffer(safer_buffer_1.Buffer.from(buffer), buffer.byteLength) === encodings_1.UTF8_with_bom) {
|
86 | return { encoding, hasBOM: true };
|
87 | }
|
88 | }
|
89 | catch (error) {
|
90 |
|
91 | }
|
92 | }
|
93 | return { encoding, hasBOM: false };
|
94 | }
|
95 | async detectEncoding(data, autoGuessEncoding) {
|
96 | const buffer = safer_buffer_1.Buffer.from(data.buffer);
|
97 | const bytesRead = data.byteLength;
|
98 |
|
99 | let encoding = this.detectEncodingByBOMFromBuffer(buffer, bytesRead);
|
100 |
|
101 |
|
102 | let seemsBinary = false;
|
103 | if (encoding !== encodings_1.UTF16be && encoding !== encodings_1.UTF16le && buffer) {
|
104 | let couldBeUTF16LE = true;
|
105 | let couldBeUTF16BE = true;
|
106 | let containsZeroByte = false;
|
107 |
|
108 |
|
109 |
|
110 |
|
111 |
|
112 |
|
113 | for (let i = 0; i < bytesRead && i < ZERO_BYTE_DETECTION_BUFFER_MAX_LEN; i++) {
|
114 | const isEndian = (i % 2 === 1);
|
115 | const isZeroByte = (buffer.readUInt8(i) === 0);
|
116 | if (isZeroByte) {
|
117 | containsZeroByte = true;
|
118 | }
|
119 |
|
120 | if (couldBeUTF16LE && (isEndian && !isZeroByte || !isEndian && isZeroByte)) {
|
121 | couldBeUTF16LE = false;
|
122 | }
|
123 |
|
124 | if (couldBeUTF16BE && (isEndian && isZeroByte || !isEndian && !isZeroByte)) {
|
125 | couldBeUTF16BE = false;
|
126 | }
|
127 |
|
128 | if (isZeroByte && !couldBeUTF16LE && !couldBeUTF16BE) {
|
129 | break;
|
130 | }
|
131 | }
|
132 |
|
133 | if (containsZeroByte) {
|
134 | if (couldBeUTF16LE) {
|
135 | encoding = encodings_1.UTF16le;
|
136 | }
|
137 | else if (couldBeUTF16BE) {
|
138 | encoding = encodings_1.UTF16be;
|
139 | }
|
140 | else {
|
141 | seemsBinary = true;
|
142 | }
|
143 | }
|
144 | }
|
145 |
|
146 | if (autoGuessEncoding && !seemsBinary && !encoding && buffer) {
|
147 | const guessedEncoding = await this.guessEncodingByBuffer(buffer.slice(0, bytesRead));
|
148 | return {
|
149 | seemsBinary: false,
|
150 | encoding: guessedEncoding
|
151 | };
|
152 | }
|
153 | return { seemsBinary, encoding };
|
154 | }
|
155 | detectEncodingByBOMFromBuffer(buffer, bytesRead) {
|
156 | if (!buffer || bytesRead < encodings_1.UTF16be_BOM.length) {
|
157 | return undefined;
|
158 | }
|
159 | const b0 = buffer.readUInt8(0);
|
160 | const b1 = buffer.readUInt8(1);
|
161 |
|
162 | if (b0 === encodings_1.UTF16be_BOM[0] && b1 === encodings_1.UTF16be_BOM[1]) {
|
163 | return encodings_1.UTF16be;
|
164 | }
|
165 |
|
166 | if (b0 === encodings_1.UTF16le_BOM[0] && b1 === encodings_1.UTF16le_BOM[1]) {
|
167 | return encodings_1.UTF16le;
|
168 | }
|
169 | if (bytesRead < encodings_1.UTF8_BOM.length) {
|
170 | return undefined;
|
171 | }
|
172 | const b2 = buffer.readUInt8(2);
|
173 |
|
174 | if (b0 === encodings_1.UTF8_BOM[0] && b1 === encodings_1.UTF8_BOM[1] && b2 === encodings_1.UTF8_BOM[2]) {
|
175 | return encodings_1.UTF8_with_bom;
|
176 | }
|
177 | return undefined;
|
178 | }
|
179 | async guessEncodingByBuffer(buffer) {
|
180 | const jschardet = await Promise.resolve().then(() => require('jschardet'));
|
181 | const guessed = jschardet.detect(buffer.slice(0, AUTO_ENCODING_GUESS_MAX_BYTES));
|
182 | if (!guessed || !guessed.encoding) {
|
183 | return undefined;
|
184 | }
|
185 | const enc = guessed.encoding.toLowerCase();
|
186 | if (0 <= IGNORE_ENCODINGS.indexOf(enc)) {
|
187 | return undefined;
|
188 | }
|
189 | return this.toIconvEncoding(guessed.encoding);
|
190 | }
|
191 | decodeStream(source, options) {
|
192 | var _a;
|
193 | const minBytesRequiredForDetection = ((_a = options.minBytesRequiredForDetection) !== null && _a !== void 0 ? _a : options.guessEncoding) ? AUTO_ENCODING_GUESS_MIN_BYTES : NO_ENCODING_GUESS_MIN_BYTES;
|
194 | return new Promise((resolve, reject) => {
|
195 | const target = (0, stream_1.newWriteableStream)(strings => strings.join(''));
|
196 | const bufferedChunks = [];
|
197 | let bytesBuffered = 0;
|
198 | let decoder = undefined;
|
199 | const createDecoder = async () => {
|
200 | try {
|
201 |
|
202 | const detected = await this.detectEncoding(buffer_1.BinaryBuffer.concat(bufferedChunks), options.guessEncoding);
|
203 |
|
204 | detected.encoding = await options.overwriteEncoding(detected.encoding);
|
205 |
|
206 | decoder = iconv.getDecoder(this.toIconvEncoding(detected.encoding));
|
207 | const decoded = decoder.write(safer_buffer_1.Buffer.from(buffer_1.BinaryBuffer.concat(bufferedChunks).buffer));
|
208 | target.write(decoded);
|
209 | bufferedChunks.length = 0;
|
210 | bytesBuffered = 0;
|
211 |
|
212 | resolve({
|
213 | stream: target,
|
214 | detected
|
215 | });
|
216 | }
|
217 | catch (error) {
|
218 | reject(error);
|
219 | }
|
220 | };
|
221 |
|
222 | source.on('error', error => target.error(error));
|
223 |
|
224 | source.on('data', async (chunk) => {
|
225 |
|
226 | if (decoder) {
|
227 | target.write(decoder.write(safer_buffer_1.Buffer.from(chunk.buffer)));
|
228 | }
|
229 | else {
|
230 | bufferedChunks.push(chunk);
|
231 | bytesBuffered += chunk.byteLength;
|
232 |
|
233 | if (bytesBuffered >= minBytesRequiredForDetection) {
|
234 |
|
235 | source.pause();
|
236 | await createDecoder();
|
237 |
|
238 |
|
239 | setTimeout(() => source.resume());
|
240 | }
|
241 | }
|
242 | });
|
243 |
|
244 | source.on('end', async () => {
|
245 |
|
246 |
|
247 |
|
248 | if (!decoder) {
|
249 | await createDecoder();
|
250 | }
|
251 |
|
252 | target.end(decoder === null || decoder === void 0 ? void 0 : decoder.end());
|
253 | });
|
254 | });
|
255 | }
|
256 | async encodeStream(value, options) {
|
257 | let encoding = options === null || options === void 0 ? void 0 : options.encoding;
|
258 | const addBOM = options === null || options === void 0 ? void 0 : options.hasBOM;
|
259 | encoding = this.toIconvEncoding(encoding);
|
260 | if (encoding === encodings_1.UTF8 && !addBOM) {
|
261 | return value === undefined ? undefined : typeof value === 'string' ?
|
262 | buffer_1.BinaryBuffer.fromString(value) : buffer_1.BinaryBufferReadable.fromReadable(value);
|
263 | }
|
264 | value = value || '';
|
265 | const readable = typeof value === 'string' ? stream_1.Readable.fromString(value) : value;
|
266 | const encoder = iconv.getEncoder(encoding, { addBOM });
|
267 | let bytesWritten = false;
|
268 | let done = false;
|
269 | return {
|
270 | read() {
|
271 | if (done) {
|
272 | return null;
|
273 | }
|
274 | const chunk = readable.read();
|
275 | if (typeof chunk !== 'string') {
|
276 | done = true;
|
277 |
|
278 |
|
279 |
|
280 | if (!bytesWritten && addBOM) {
|
281 | switch (encoding) {
|
282 | case encodings_1.UTF8:
|
283 | case encodings_1.UTF8_with_bom:
|
284 | return buffer_1.BinaryBuffer.wrap(Uint8Array.from(encodings_1.UTF8_BOM));
|
285 | case encodings_1.UTF16be:
|
286 | return buffer_1.BinaryBuffer.wrap(Uint8Array.from(encodings_1.UTF16be_BOM));
|
287 | case encodings_1.UTF16le:
|
288 | return buffer_1.BinaryBuffer.wrap(Uint8Array.from(encodings_1.UTF16le_BOM));
|
289 | }
|
290 | }
|
291 | const leftovers = encoder.end();
|
292 | if (leftovers && leftovers.length > 0) {
|
293 | bytesWritten = true;
|
294 | return buffer_1.BinaryBuffer.wrap(leftovers);
|
295 | }
|
296 | return null;
|
297 | }
|
298 | bytesWritten = true;
|
299 | return buffer_1.BinaryBuffer.wrap(encoder.write(chunk));
|
300 | }
|
301 | };
|
302 | }
|
303 | };
|
304 | EncodingService = __decorate([
|
305 | (0, inversify_1.injectable)()
|
306 | ], EncodingService);
|
307 | exports.EncodingService = EncodingService;
|
308 |
|
\ | No newline at end of file |