1 | /**
|
2 | * Efficient schema-less binary encoding with support for variable length encoding.
|
3 | *
|
4 | * Use [lib0/encoding] with [lib0/decoding]. Every encoding function has a corresponding decoding function.
|
5 | *
|
6 | * Encodes numbers in little-endian order (least to most significant byte order)
|
7 | * and is compatible with Golang's binary encoding (https://golang.org/pkg/encoding/binary/)
|
8 | * which is also used in Protocol Buffers.
|
9 | *
|
10 | * ```js
|
11 | * // encoding step
|
12 | * const encoder = encoding.createEncoder()
|
13 | * encoding.writeVarUint(encoder, 256)
|
14 | * encoding.writeVarString(encoder, 'Hello world!')
|
15 | * const buf = encoding.toUint8Array(encoder)
|
16 | * ```
|
17 | *
|
18 | * ```js
|
19 | * // decoding step
|
20 | * const decoder = decoding.createDecoder(buf)
|
21 | * decoding.readVarUint(decoder) // => 256
|
22 | * decoding.readVarString(decoder) // => 'Hello world!'
|
23 | * decoding.hasContent(decoder) // => false - all data is read
|
24 | * ```
|
25 | *
|
26 | * @module encoding
|
27 | */
|
28 |
|
29 | import * as buffer from './buffer.js'
|
30 | import * as math from './math.js'
|
31 | import * as number from './number.js'
|
32 | import * as binary from './binary.js'
|
33 | import * as string from './string.js'
|
34 | import * as array from './array.js'
|
35 |
|
36 | /**
|
37 | * A BinaryEncoder handles the encoding to an Uint8Array.
|
38 | */
|
39 | export class Encoder {
|
40 | constructor () {
|
41 | this.cpos = 0
|
42 | this.cbuf = new Uint8Array(100)
|
43 | /**
|
44 | * @type {Array<Uint8Array>}
|
45 | */
|
46 | this.bufs = []
|
47 | }
|
48 | }
|
49 |
|
50 | /**
|
51 | * @function
|
52 | * @return {Encoder}
|
53 | */
|
54 | export const createEncoder = () => new Encoder()
|
55 |
|
56 | /**
|
57 | * @param {function(Encoder):void} f
|
58 | */
|
59 | export const encode = (f) => {
|
60 | const encoder = createEncoder()
|
61 | f(encoder)
|
62 | return toUint8Array(encoder)
|
63 | }
|
64 |
|
65 | /**
|
66 | * The current length of the encoded data.
|
67 | *
|
68 | * @function
|
69 | * @param {Encoder} encoder
|
70 | * @return {number}
|
71 | */
|
72 | export const length = encoder => {
|
73 | let len = encoder.cpos
|
74 | for (let i = 0; i < encoder.bufs.length; i++) {
|
75 | len += encoder.bufs[i].length
|
76 | }
|
77 | return len
|
78 | }
|
79 |
|
80 | /**
|
81 | * Check whether encoder is empty.
|
82 | *
|
83 | * @function
|
84 | * @param {Encoder} encoder
|
85 | * @return {boolean}
|
86 | */
|
87 | export const hasContent = encoder => encoder.cpos > 0 || encoder.bufs.length > 0
|
88 |
|
89 | /**
|
90 | * Transform to Uint8Array.
|
91 | *
|
92 | * @function
|
93 | * @param {Encoder} encoder
|
94 | * @return {Uint8Array} The created ArrayBuffer.
|
95 | */
|
96 | export const toUint8Array = encoder => {
|
97 | const uint8arr = new Uint8Array(length(encoder))
|
98 | let curPos = 0
|
99 | for (let i = 0; i < encoder.bufs.length; i++) {
|
100 | const d = encoder.bufs[i]
|
101 | uint8arr.set(d, curPos)
|
102 | curPos += d.length
|
103 | }
|
104 | uint8arr.set(buffer.createUint8ArrayViewFromArrayBuffer(encoder.cbuf.buffer, 0, encoder.cpos), curPos)
|
105 | return uint8arr
|
106 | }
|
107 |
|
108 | /**
|
109 | * Verify that it is possible to write `len` bytes wtihout checking. If
|
110 | * necessary, a new Buffer with the required length is attached.
|
111 | *
|
112 | * @param {Encoder} encoder
|
113 | * @param {number} len
|
114 | */
|
115 | export const verifyLen = (encoder, len) => {
|
116 | const bufferLen = encoder.cbuf.length
|
117 | if (bufferLen - encoder.cpos < len) {
|
118 | encoder.bufs.push(buffer.createUint8ArrayViewFromArrayBuffer(encoder.cbuf.buffer, 0, encoder.cpos))
|
119 | encoder.cbuf = new Uint8Array(math.max(bufferLen, len) * 2)
|
120 | encoder.cpos = 0
|
121 | }
|
122 | }
|
123 |
|
124 | /**
|
125 | * Write one byte to the encoder.
|
126 | *
|
127 | * @function
|
128 | * @param {Encoder} encoder
|
129 | * @param {number} num The byte that is to be encoded.
|
130 | */
|
131 | export const write = (encoder, num) => {
|
132 | const bufferLen = encoder.cbuf.length
|
133 | if (encoder.cpos === bufferLen) {
|
134 | encoder.bufs.push(encoder.cbuf)
|
135 | encoder.cbuf = new Uint8Array(bufferLen * 2)
|
136 | encoder.cpos = 0
|
137 | }
|
138 | encoder.cbuf[encoder.cpos++] = num
|
139 | }
|
140 |
|
141 | /**
|
142 | * Write one byte at a specific position.
|
143 | * Position must already be written (i.e. encoder.length > pos)
|
144 | *
|
145 | * @function
|
146 | * @param {Encoder} encoder
|
147 | * @param {number} pos Position to which to write data
|
148 | * @param {number} num Unsigned 8-bit integer
|
149 | */
|
150 | export const set = (encoder, pos, num) => {
|
151 | let buffer = null
|
152 | // iterate all buffers and adjust position
|
153 | for (let i = 0; i < encoder.bufs.length && buffer === null; i++) {
|
154 | const b = encoder.bufs[i]
|
155 | if (pos < b.length) {
|
156 | buffer = b // found buffer
|
157 | } else {
|
158 | pos -= b.length
|
159 | }
|
160 | }
|
161 | if (buffer === null) {
|
162 | // use current buffer
|
163 | buffer = encoder.cbuf
|
164 | }
|
165 | buffer[pos] = num
|
166 | }
|
167 |
|
168 | /**
|
169 | * Write one byte as an unsigned integer.
|
170 | *
|
171 | * @function
|
172 | * @param {Encoder} encoder
|
173 | * @param {number} num The number that is to be encoded.
|
174 | */
|
175 | export const writeUint8 = write
|
176 |
|
177 | /**
|
178 | * Write one byte as an unsigned Integer at a specific location.
|
179 | *
|
180 | * @function
|
181 | * @param {Encoder} encoder
|
182 | * @param {number} pos The location where the data will be written.
|
183 | * @param {number} num The number that is to be encoded.
|
184 | */
|
185 | export const setUint8 = set
|
186 |
|
187 | /**
|
188 | * Write two bytes as an unsigned integer.
|
189 | *
|
190 | * @function
|
191 | * @param {Encoder} encoder
|
192 | * @param {number} num The number that is to be encoded.
|
193 | */
|
194 | export const writeUint16 = (encoder, num) => {
|
195 | write(encoder, num & binary.BITS8)
|
196 | write(encoder, (num >>> 8) & binary.BITS8)
|
197 | }
|
198 | /**
|
199 | * Write two bytes as an unsigned integer at a specific location.
|
200 | *
|
201 | * @function
|
202 | * @param {Encoder} encoder
|
203 | * @param {number} pos The location where the data will be written.
|
204 | * @param {number} num The number that is to be encoded.
|
205 | */
|
206 | export const setUint16 = (encoder, pos, num) => {
|
207 | set(encoder, pos, num & binary.BITS8)
|
208 | set(encoder, pos + 1, (num >>> 8) & binary.BITS8)
|
209 | }
|
210 |
|
211 | /**
|
212 | * Write two bytes as an unsigned integer
|
213 | *
|
214 | * @function
|
215 | * @param {Encoder} encoder
|
216 | * @param {number} num The number that is to be encoded.
|
217 | */
|
218 | export const writeUint32 = (encoder, num) => {
|
219 | for (let i = 0; i < 4; i++) {
|
220 | write(encoder, num & binary.BITS8)
|
221 | num >>>= 8
|
222 | }
|
223 | }
|
224 |
|
225 | /**
|
226 | * Write two bytes as an unsigned integer in big endian order.
|
227 | * (most significant byte first)
|
228 | *
|
229 | * @function
|
230 | * @param {Encoder} encoder
|
231 | * @param {number} num The number that is to be encoded.
|
232 | */
|
233 | export const writeUint32BigEndian = (encoder, num) => {
|
234 | for (let i = 3; i >= 0; i--) {
|
235 | write(encoder, (num >>> (8 * i)) & binary.BITS8)
|
236 | }
|
237 | }
|
238 |
|
239 | /**
|
240 | * Write two bytes as an unsigned integer at a specific location.
|
241 | *
|
242 | * @function
|
243 | * @param {Encoder} encoder
|
244 | * @param {number} pos The location where the data will be written.
|
245 | * @param {number} num The number that is to be encoded.
|
246 | */
|
247 | export const setUint32 = (encoder, pos, num) => {
|
248 | for (let i = 0; i < 4; i++) {
|
249 | set(encoder, pos + i, num & binary.BITS8)
|
250 | num >>>= 8
|
251 | }
|
252 | }
|
253 |
|
254 | /**
|
255 | * Write a variable length unsigned integer. Max encodable integer is 2^53.
|
256 | *
|
257 | * @function
|
258 | * @param {Encoder} encoder
|
259 | * @param {number} num The number that is to be encoded.
|
260 | */
|
261 | export const writeVarUint = (encoder, num) => {
|
262 | while (num > binary.BITS7) {
|
263 | write(encoder, binary.BIT8 | (binary.BITS7 & num))
|
264 | num = math.floor(num / 128) // shift >>> 7
|
265 | }
|
266 | write(encoder, binary.BITS7 & num)
|
267 | }
|
268 |
|
269 | /**
|
270 | * Write a variable length integer.
|
271 | *
|
272 | * We use the 7th bit instead for signaling that this is a negative number.
|
273 | *
|
274 | * @function
|
275 | * @param {Encoder} encoder
|
276 | * @param {number} num The number that is to be encoded.
|
277 | */
|
278 | export const writeVarInt = (encoder, num) => {
|
279 | const isNegative = math.isNegativeZero(num)
|
280 | if (isNegative) {
|
281 | num = -num
|
282 | }
|
283 | // |- whether to continue reading |- whether is negative |- number
|
284 | write(encoder, (num > binary.BITS6 ? binary.BIT8 : 0) | (isNegative ? binary.BIT7 : 0) | (binary.BITS6 & num))
|
285 | num = math.floor(num / 64) // shift >>> 6
|
286 | // We don't need to consider the case of num === 0 so we can use a different
|
287 | // pattern here than above.
|
288 | while (num > 0) {
|
289 | write(encoder, (num > binary.BITS7 ? binary.BIT8 : 0) | (binary.BITS7 & num))
|
290 | num = math.floor(num / 128) // shift >>> 7
|
291 | }
|
292 | }
|
293 |
|
294 | /**
|
295 | * A cache to store strings temporarily
|
296 | */
|
297 | const _strBuffer = new Uint8Array(30000)
|
298 | const _maxStrBSize = _strBuffer.length / 3
|
299 |
|
300 | /**
|
301 | * Write a variable length string.
|
302 | *
|
303 | * @function
|
304 | * @param {Encoder} encoder
|
305 | * @param {String} str The string that is to be encoded.
|
306 | */
|
307 | export const _writeVarStringNative = (encoder, str) => {
|
308 | if (str.length < _maxStrBSize) {
|
309 | // We can encode the string into the existing buffer
|
310 | /* c8 ignore next */
|
311 | const written = string.utf8TextEncoder.encodeInto(str, _strBuffer).written || 0
|
312 | writeVarUint(encoder, written)
|
313 | for (let i = 0; i < written; i++) {
|
314 | write(encoder, _strBuffer[i])
|
315 | }
|
316 | } else {
|
317 | writeVarUint8Array(encoder, string.encodeUtf8(str))
|
318 | }
|
319 | }
|
320 |
|
321 | /**
|
322 | * Write a variable length string.
|
323 | *
|
324 | * @function
|
325 | * @param {Encoder} encoder
|
326 | * @param {String} str The string that is to be encoded.
|
327 | */
|
328 | export const _writeVarStringPolyfill = (encoder, str) => {
|
329 | const encodedString = unescape(encodeURIComponent(str))
|
330 | const len = encodedString.length
|
331 | writeVarUint(encoder, len)
|
332 | for (let i = 0; i < len; i++) {
|
333 | write(encoder, /** @type {number} */ (encodedString.codePointAt(i)))
|
334 | }
|
335 | }
|
336 |
|
337 | /**
|
338 | * Write a variable length string.
|
339 | *
|
340 | * @function
|
341 | * @param {Encoder} encoder
|
342 | * @param {String} str The string that is to be encoded.
|
343 | */
|
344 | /* c8 ignore next */
|
345 | export const writeVarString = (string.utf8TextEncoder && /** @type {any} */ (string.utf8TextEncoder).encodeInto) ? _writeVarStringNative : _writeVarStringPolyfill
|
346 |
|
347 | /**
|
348 | * Write a string terminated by a special byte sequence. This is not very performant and is
|
349 | * generally discouraged. However, the resulting byte arrays are lexiographically ordered which
|
350 | * makes this a nice feature for databases.
|
351 | *
|
352 | * The string will be encoded using utf8 and then terminated and escaped using writeTerminatingUint8Array.
|
353 | *
|
354 | * @function
|
355 | * @param {Encoder} encoder
|
356 | * @param {String} str The string that is to be encoded.
|
357 | */
|
358 | export const writeTerminatedString = (encoder, str) =>
|
359 | writeTerminatedUint8Array(encoder, string.encodeUtf8(str))
|
360 |
|
361 | /**
|
362 | * Write a terminating Uint8Array. Note that this is not performant and is generally
|
363 | * discouraged. There are few situations when this is needed.
|
364 | *
|
365 | * We use 0x0 as a terminating character. 0x1 serves as an escape character for 0x0 and 0x1.
|
366 | *
|
367 | * Example: [0,1,2] is encoded to [1,0,1,1,2,0]. 0x0, and 0x1 needed to be escaped using 0x1. Then
|
368 | * the result is terminated using the 0x0 character.
|
369 | *
|
370 | * This is basically how many systems implement null terminated strings. However, we use an escape
|
371 | * character 0x1 to avoid issues and potenial attacks on our database (if this is used as a key
|
372 | * encoder for NoSql databases).
|
373 | *
|
374 | * @function
|
375 | * @param {Encoder} encoder
|
376 | * @param {Uint8Array} buf The string that is to be encoded.
|
377 | */
|
378 | export const writeTerminatedUint8Array = (encoder, buf) => {
|
379 | for (let i = 0; i < buf.length; i++) {
|
380 | const b = buf[i]
|
381 | if (b === 0 || b === 1) {
|
382 | write(encoder, 1)
|
383 | }
|
384 | write(encoder, buf[i])
|
385 | }
|
386 | write(encoder, 0)
|
387 | }
|
388 |
|
389 | /**
|
390 | * Write the content of another Encoder.
|
391 | *
|
392 | * @TODO: can be improved!
|
393 | * - Note: Should consider that when appending a lot of small Encoders, we should rather clone than referencing the old structure.
|
394 | * Encoders start with a rather big initial buffer.
|
395 | *
|
396 | * @function
|
397 | * @param {Encoder} encoder The enUint8Arr
|
398 | * @param {Encoder} append The BinaryEncoder to be written.
|
399 | */
|
400 | export const writeBinaryEncoder = (encoder, append) => writeUint8Array(encoder, toUint8Array(append))
|
401 |
|
402 | /**
|
403 | * Append fixed-length Uint8Array to the encoder.
|
404 | *
|
405 | * @function
|
406 | * @param {Encoder} encoder
|
407 | * @param {Uint8Array} uint8Array
|
408 | */
|
409 | export const writeUint8Array = (encoder, uint8Array) => {
|
410 | const bufferLen = encoder.cbuf.length
|
411 | const cpos = encoder.cpos
|
412 | const leftCopyLen = math.min(bufferLen - cpos, uint8Array.length)
|
413 | const rightCopyLen = uint8Array.length - leftCopyLen
|
414 | encoder.cbuf.set(uint8Array.subarray(0, leftCopyLen), cpos)
|
415 | encoder.cpos += leftCopyLen
|
416 | if (rightCopyLen > 0) {
|
417 | // Still something to write, write right half..
|
418 | // Append new buffer
|
419 | encoder.bufs.push(encoder.cbuf)
|
420 | // must have at least size of remaining buffer
|
421 | encoder.cbuf = new Uint8Array(math.max(bufferLen * 2, rightCopyLen))
|
422 | // copy array
|
423 | encoder.cbuf.set(uint8Array.subarray(leftCopyLen))
|
424 | encoder.cpos = rightCopyLen
|
425 | }
|
426 | }
|
427 |
|
428 | /**
|
429 | * Append an Uint8Array to Encoder.
|
430 | *
|
431 | * @function
|
432 | * @param {Encoder} encoder
|
433 | * @param {Uint8Array} uint8Array
|
434 | */
|
435 | export const writeVarUint8Array = (encoder, uint8Array) => {
|
436 | writeVarUint(encoder, uint8Array.byteLength)
|
437 | writeUint8Array(encoder, uint8Array)
|
438 | }
|
439 |
|
440 | /**
|
441 | * Create an DataView of the next `len` bytes. Use it to write data after
|
442 | * calling this function.
|
443 | *
|
444 | * ```js
|
445 | * // write float32 using DataView
|
446 | * const dv = writeOnDataView(encoder, 4)
|
447 | * dv.setFloat32(0, 1.1)
|
448 | * // read float32 using DataView
|
449 | * const dv = readFromDataView(encoder, 4)
|
450 | * dv.getFloat32(0) // => 1.100000023841858 (leaving it to the reader to find out why this is the correct result)
|
451 | * ```
|
452 | *
|
453 | * @param {Encoder} encoder
|
454 | * @param {number} len
|
455 | * @return {DataView}
|
456 | */
|
457 | export const writeOnDataView = (encoder, len) => {
|
458 | verifyLen(encoder, len)
|
459 | const dview = new DataView(encoder.cbuf.buffer, encoder.cpos, len)
|
460 | encoder.cpos += len
|
461 | return dview
|
462 | }
|
463 |
|
464 | /**
|
465 | * @param {Encoder} encoder
|
466 | * @param {number} num
|
467 | */
|
468 | export const writeFloat32 = (encoder, num) => writeOnDataView(encoder, 4).setFloat32(0, num, false)
|
469 |
|
470 | /**
|
471 | * @param {Encoder} encoder
|
472 | * @param {number} num
|
473 | */
|
474 | export const writeFloat64 = (encoder, num) => writeOnDataView(encoder, 8).setFloat64(0, num, false)
|
475 |
|
476 | /**
|
477 | * @param {Encoder} encoder
|
478 | * @param {bigint} num
|
479 | */
|
480 | export const writeBigInt64 = (encoder, num) => /** @type {any} */ (writeOnDataView(encoder, 8)).setBigInt64(0, num, false)
|
481 |
|
482 | /**
|
483 | * @param {Encoder} encoder
|
484 | * @param {bigint} num
|
485 | */
|
486 | export const writeBigUint64 = (encoder, num) => /** @type {any} */ (writeOnDataView(encoder, 8)).setBigUint64(0, num, false)
|
487 |
|
488 | const floatTestBed = new DataView(new ArrayBuffer(4))
|
489 | /**
|
490 | * Check if a number can be encoded as a 32 bit float.
|
491 | *
|
492 | * @param {number} num
|
493 | * @return {boolean}
|
494 | */
|
495 | const isFloat32 = num => {
|
496 | floatTestBed.setFloat32(0, num)
|
497 | return floatTestBed.getFloat32(0) === num
|
498 | }
|
499 |
|
500 | /**
|
501 | * Encode data with efficient binary format.
|
502 | *
|
503 | * Differences to JSON:
|
504 | * • Transforms data to a binary format (not to a string)
|
505 | * • Encodes undefined, NaN, and ArrayBuffer (these can't be represented in JSON)
|
506 | * • Numbers are efficiently encoded either as a variable length integer, as a
|
507 | * 32 bit float, as a 64 bit float, or as a 64 bit bigint.
|
508 | *
|
509 | * Encoding table:
|
510 | *
|
511 | * | Data Type | Prefix | Encoding Method | Comment |
|
512 | * | ------------------- | -------- | ------------------ | ------- |
|
513 | * | undefined | 127 | | Functions, symbol, and everything that cannot be identified is encoded as undefined |
|
514 | * | null | 126 | | |
|
515 | * | integer | 125 | writeVarInt | Only encodes 32 bit signed integers |
|
516 | * | float32 | 124 | writeFloat32 | |
|
517 | * | float64 | 123 | writeFloat64 | |
|
518 | * | bigint | 122 | writeBigInt64 | |
|
519 | * | boolean (false) | 121 | | True and false are different data types so we save the following byte |
|
520 | * | boolean (true) | 120 | | - 0b01111000 so the last bit determines whether true or false |
|
521 | * | string | 119 | writeVarString | |
|
522 | * | object<string,any> | 118 | custom | Writes {length} then {length} key-value pairs |
|
523 | * | array<any> | 117 | custom | Writes {length} then {length} json values |
|
524 | * | Uint8Array | 116 | writeVarUint8Array | We use Uint8Array for any kind of binary data |
|
525 | *
|
526 | * Reasons for the decreasing prefix:
|
527 | * We need the first bit for extendability (later we may want to encode the
|
528 | * prefix with writeVarUint). The remaining 7 bits are divided as follows:
|
529 | * [0-30] the beginning of the data range is used for custom purposes
|
530 | * (defined by the function that uses this library)
|
531 | * [31-127] the end of the data range is used for data encoding by
|
532 | * lib0/encoding.js
|
533 | *
|
534 | * @param {Encoder} encoder
|
535 | * @param {undefined|null|number|bigint|boolean|string|Object<string,any>|Array<any>|Uint8Array} data
|
536 | */
|
537 | export const writeAny = (encoder, data) => {
|
538 | switch (typeof data) {
|
539 | case 'string':
|
540 | // TYPE 119: STRING
|
541 | write(encoder, 119)
|
542 | writeVarString(encoder, data)
|
543 | break
|
544 | case 'number':
|
545 | if (number.isInteger(data) && math.abs(data) <= binary.BITS31) {
|
546 | // TYPE 125: INTEGER
|
547 | write(encoder, 125)
|
548 | writeVarInt(encoder, data)
|
549 | } else if (isFloat32(data)) {
|
550 | // TYPE 124: FLOAT32
|
551 | write(encoder, 124)
|
552 | writeFloat32(encoder, data)
|
553 | } else {
|
554 | // TYPE 123: FLOAT64
|
555 | write(encoder, 123)
|
556 | writeFloat64(encoder, data)
|
557 | }
|
558 | break
|
559 | case 'bigint':
|
560 | // TYPE 122: BigInt
|
561 | write(encoder, 122)
|
562 | writeBigInt64(encoder, data)
|
563 | break
|
564 | case 'object':
|
565 | if (data === null) {
|
566 | // TYPE 126: null
|
567 | write(encoder, 126)
|
568 | } else if (array.isArray(data)) {
|
569 | // TYPE 117: Array
|
570 | write(encoder, 117)
|
571 | writeVarUint(encoder, data.length)
|
572 | for (let i = 0; i < data.length; i++) {
|
573 | writeAny(encoder, data[i])
|
574 | }
|
575 | } else if (data instanceof Uint8Array) {
|
576 | // TYPE 116: ArrayBuffer
|
577 | write(encoder, 116)
|
578 | writeVarUint8Array(encoder, data)
|
579 | } else {
|
580 | // TYPE 118: Object
|
581 | write(encoder, 118)
|
582 | const keys = Object.keys(data)
|
583 | writeVarUint(encoder, keys.length)
|
584 | for (let i = 0; i < keys.length; i++) {
|
585 | const key = keys[i]
|
586 | writeVarString(encoder, key)
|
587 | writeAny(encoder, data[key])
|
588 | }
|
589 | }
|
590 | break
|
591 | case 'boolean':
|
592 | // TYPE 120/121: boolean (true/false)
|
593 | write(encoder, data ? 120 : 121)
|
594 | break
|
595 | default:
|
596 | // TYPE 127: undefined
|
597 | write(encoder, 127)
|
598 | }
|
599 | }
|
600 |
|
601 | /**
|
602 | * Now come a few stateful encoder that have their own classes.
|
603 | */
|
604 |
|
605 | /**
|
606 | * Basic Run Length Encoder - a basic compression implementation.
|
607 | *
|
608 | * Encodes [1,1,1,7] to [1,3,7,1] (3 times 1, 1 time 7). This encoder might do more harm than good if there are a lot of values that are not repeated.
|
609 | *
|
610 | * It was originally used for image compression. Cool .. article http://csbruce.com/cbm/transactor/pdfs/trans_v7_i06.pdf
|
611 | *
|
612 | * @note T must not be null!
|
613 | *
|
614 | * @template T
|
615 | */
|
616 | export class RleEncoder extends Encoder {
|
617 | /**
|
618 | * @param {function(Encoder, T):void} writer
|
619 | */
|
620 | constructor (writer) {
|
621 | super()
|
622 | /**
|
623 | * The writer
|
624 | */
|
625 | this.w = writer
|
626 | /**
|
627 | * Current state
|
628 | * @type {T|null}
|
629 | */
|
630 | this.s = null
|
631 | this.count = 0
|
632 | }
|
633 |
|
634 | /**
|
635 | * @param {T} v
|
636 | */
|
637 | write (v) {
|
638 | if (this.s === v) {
|
639 | this.count++
|
640 | } else {
|
641 | if (this.count > 0) {
|
642 | // flush counter, unless this is the first value (count = 0)
|
643 | writeVarUint(this, this.count - 1) // since count is always > 0, we can decrement by one. non-standard encoding ftw
|
644 | }
|
645 | this.count = 1
|
646 | // write first value
|
647 | this.w(this, v)
|
648 | this.s = v
|
649 | }
|
650 | }
|
651 | }
|
652 |
|
653 | /**
|
654 | * Basic diff decoder using variable length encoding.
|
655 | *
|
656 | * Encodes the values [3, 1100, 1101, 1050, 0] to [3, 1097, 1, -51, -1050] using writeVarInt.
|
657 | */
|
658 | export class IntDiffEncoder extends Encoder {
|
659 | /**
|
660 | * @param {number} start
|
661 | */
|
662 | constructor (start) {
|
663 | super()
|
664 | /**
|
665 | * Current state
|
666 | * @type {number}
|
667 | */
|
668 | this.s = start
|
669 | }
|
670 |
|
671 | /**
|
672 | * @param {number} v
|
673 | */
|
674 | write (v) {
|
675 | writeVarInt(this, v - this.s)
|
676 | this.s = v
|
677 | }
|
678 | }
|
679 |
|
680 | /**
|
681 | * A combination of IntDiffEncoder and RleEncoder.
|
682 | *
|
683 | * Basically first writes the IntDiffEncoder and then counts duplicate diffs using RleEncoding.
|
684 | *
|
685 | * Encodes the values [1,1,1,2,3,4,5,6] as [1,1,0,2,1,5] (RLE([1,0,0,1,1,1,1,1]) ⇒ RleIntDiff[1,1,0,2,1,5])
|
686 | */
|
687 | export class RleIntDiffEncoder extends Encoder {
|
688 | /**
|
689 | * @param {number} start
|
690 | */
|
691 | constructor (start) {
|
692 | super()
|
693 | /**
|
694 | * Current state
|
695 | * @type {number}
|
696 | */
|
697 | this.s = start
|
698 | this.count = 0
|
699 | }
|
700 |
|
701 | /**
|
702 | * @param {number} v
|
703 | */
|
704 | write (v) {
|
705 | if (this.s === v && this.count > 0) {
|
706 | this.count++
|
707 | } else {
|
708 | if (this.count > 0) {
|
709 | // flush counter, unless this is the first value (count = 0)
|
710 | writeVarUint(this, this.count - 1) // since count is always > 0, we can decrement by one. non-standard encoding ftw
|
711 | }
|
712 | this.count = 1
|
713 | // write first value
|
714 | writeVarInt(this, v - this.s)
|
715 | this.s = v
|
716 | }
|
717 | }
|
718 | }
|
719 |
|
720 | /**
|
721 | * @param {UintOptRleEncoder} encoder
|
722 | */
|
723 | const flushUintOptRleEncoder = encoder => {
|
724 | if (encoder.count > 0) {
|
725 | // flush counter, unless this is the first value (count = 0)
|
726 | // case 1: just a single value. set sign to positive
|
727 | // case 2: write several values. set sign to negative to indicate that there is a length coming
|
728 | writeVarInt(encoder.encoder, encoder.count === 1 ? encoder.s : -encoder.s)
|
729 | if (encoder.count > 1) {
|
730 | writeVarUint(encoder.encoder, encoder.count - 2) // since count is always > 1, we can decrement by one. non-standard encoding ftw
|
731 | }
|
732 | }
|
733 | }
|
734 |
|
735 | /**
|
736 | * Optimized Rle encoder that does not suffer from the mentioned problem of the basic Rle encoder.
|
737 | *
|
738 | * Internally uses VarInt encoder to write unsigned integers. If the input occurs multiple times, we write
|
739 | * write it as a negative number. The UintOptRleDecoder then understands that it needs to read a count.
|
740 | *
|
741 | * Encodes [1,2,3,3,3] as [1,2,-3,3] (once 1, once 2, three times 3)
|
742 | */
|
743 | export class UintOptRleEncoder {
|
744 | constructor () {
|
745 | this.encoder = new Encoder()
|
746 | /**
|
747 | * @type {number}
|
748 | */
|
749 | this.s = 0
|
750 | this.count = 0
|
751 | }
|
752 |
|
753 | /**
|
754 | * @param {number} v
|
755 | */
|
756 | write (v) {
|
757 | if (this.s === v) {
|
758 | this.count++
|
759 | } else {
|
760 | flushUintOptRleEncoder(this)
|
761 | this.count = 1
|
762 | this.s = v
|
763 | }
|
764 | }
|
765 |
|
766 | toUint8Array () {
|
767 | flushUintOptRleEncoder(this)
|
768 | return toUint8Array(this.encoder)
|
769 | }
|
770 | }
|
771 |
|
772 | /**
|
773 | * Increasing Uint Optimized RLE Encoder
|
774 | *
|
775 | * The RLE encoder counts the number of same occurences of the same value.
|
776 | * The IncUintOptRle encoder counts if the value increases.
|
777 | * I.e. 7, 8, 9, 10 will be encoded as [-7, 4]. 1, 3, 5 will be encoded
|
778 | * as [1, 3, 5].
|
779 | */
|
780 | export class IncUintOptRleEncoder {
|
781 | constructor () {
|
782 | this.encoder = new Encoder()
|
783 | /**
|
784 | * @type {number}
|
785 | */
|
786 | this.s = 0
|
787 | this.count = 0
|
788 | }
|
789 |
|
790 | /**
|
791 | * @param {number} v
|
792 | */
|
793 | write (v) {
|
794 | if (this.s + this.count === v) {
|
795 | this.count++
|
796 | } else {
|
797 | flushUintOptRleEncoder(this)
|
798 | this.count = 1
|
799 | this.s = v
|
800 | }
|
801 | }
|
802 |
|
803 | toUint8Array () {
|
804 | flushUintOptRleEncoder(this)
|
805 | return toUint8Array(this.encoder)
|
806 | }
|
807 | }
|
808 |
|
809 | /**
|
810 | * @param {IntDiffOptRleEncoder} encoder
|
811 | */
|
812 | const flushIntDiffOptRleEncoder = encoder => {
|
813 | if (encoder.count > 0) {
|
814 | // 31 bit making up the diff | wether to write the counter
|
815 | // const encodedDiff = encoder.diff << 1 | (encoder.count === 1 ? 0 : 1)
|
816 | const encodedDiff = encoder.diff * 2 + (encoder.count === 1 ? 0 : 1)
|
817 | // flush counter, unless this is the first value (count = 0)
|
818 | // case 1: just a single value. set first bit to positive
|
819 | // case 2: write several values. set first bit to negative to indicate that there is a length coming
|
820 | writeVarInt(encoder.encoder, encodedDiff)
|
821 | if (encoder.count > 1) {
|
822 | writeVarUint(encoder.encoder, encoder.count - 2) // since count is always > 1, we can decrement by one. non-standard encoding ftw
|
823 | }
|
824 | }
|
825 | }
|
826 |
|
827 | /**
|
828 | * A combination of the IntDiffEncoder and the UintOptRleEncoder.
|
829 | *
|
830 | * The count approach is similar to the UintDiffOptRleEncoder, but instead of using the negative bitflag, it encodes
|
831 | * in the LSB whether a count is to be read. Therefore this Encoder only supports 31 bit integers!
|
832 | *
|
833 | * Encodes [1, 2, 3, 2] as [3, 1, 6, -1] (more specifically [(1 << 1) | 1, (3 << 0) | 0, -1])
|
834 | *
|
835 | * Internally uses variable length encoding. Contrary to normal UintVar encoding, the first byte contains:
|
836 | * * 1 bit that denotes whether the next value is a count (LSB)
|
837 | * * 1 bit that denotes whether this value is negative (MSB - 1)
|
838 | * * 1 bit that denotes whether to continue reading the variable length integer (MSB)
|
839 | *
|
840 | * Therefore, only five bits remain to encode diff ranges.
|
841 | *
|
842 | * Use this Encoder only when appropriate. In most cases, this is probably a bad idea.
|
843 | */
|
844 | export class IntDiffOptRleEncoder {
|
845 | constructor () {
|
846 | this.encoder = new Encoder()
|
847 | /**
|
848 | * @type {number}
|
849 | */
|
850 | this.s = 0
|
851 | this.count = 0
|
852 | this.diff = 0
|
853 | }
|
854 |
|
855 | /**
|
856 | * @param {number} v
|
857 | */
|
858 | write (v) {
|
859 | if (this.diff === v - this.s) {
|
860 | this.s = v
|
861 | this.count++
|
862 | } else {
|
863 | flushIntDiffOptRleEncoder(this)
|
864 | this.count = 1
|
865 | this.diff = v - this.s
|
866 | this.s = v
|
867 | }
|
868 | }
|
869 |
|
870 | toUint8Array () {
|
871 | flushIntDiffOptRleEncoder(this)
|
872 | return toUint8Array(this.encoder)
|
873 | }
|
874 | }
|
875 |
|
876 | /**
|
877 | * Optimized String Encoder.
|
878 | *
|
879 | * Encoding many small strings in a simple Encoder is not very efficient. The function call to decode a string takes some time and creates references that must be eventually deleted.
|
880 | * In practice, when decoding several million small strings, the GC will kick in more and more often to collect orphaned string objects (or maybe there is another reason?).
|
881 | *
|
882 | * This string encoder solves the above problem. All strings are concatenated and written as a single string using a single encoding call.
|
883 | *
|
884 | * The lengths are encoded using a UintOptRleEncoder.
|
885 | */
|
886 | export class StringEncoder {
|
887 | constructor () {
|
888 | /**
|
889 | * @type {Array<string>}
|
890 | */
|
891 | this.sarr = []
|
892 | this.s = ''
|
893 | this.lensE = new UintOptRleEncoder()
|
894 | }
|
895 |
|
896 | /**
|
897 | * @param {string} string
|
898 | */
|
899 | write (string) {
|
900 | this.s += string
|
901 | if (this.s.length > 19) {
|
902 | this.sarr.push(this.s)
|
903 | this.s = ''
|
904 | }
|
905 | this.lensE.write(string.length)
|
906 | }
|
907 |
|
908 | toUint8Array () {
|
909 | const encoder = new Encoder()
|
910 | this.sarr.push(this.s)
|
911 | this.s = ''
|
912 | writeVarString(encoder, this.sarr.join(''))
|
913 | writeUint8Array(encoder, this.lensE.toUint8Array())
|
914 | return toUint8Array(encoder)
|
915 | }
|
916 | }
|