UNPKG

26.1 kBJavaScriptView Raw
1/**
2 * Efficient schema-less binary encoding with support for variable length encoding.
3 *
4 * Use [lib0/encoding] with [lib0/decoding]. Every encoding function has a corresponding decoding function.
5 *
6 * Encodes numbers in little-endian order (least to most significant byte order)
7 * and is compatible with Golang's binary encoding (https://golang.org/pkg/encoding/binary/)
8 * which is also used in Protocol Buffers.
9 *
10 * ```js
11 * // encoding step
12 * const encoder = encoding.createEncoder()
13 * encoding.writeVarUint(encoder, 256)
14 * encoding.writeVarString(encoder, 'Hello world!')
15 * const buf = encoding.toUint8Array(encoder)
16 * ```
17 *
18 * ```js
19 * // decoding step
20 * const decoder = decoding.createDecoder(buf)
21 * decoding.readVarUint(decoder) // => 256
22 * decoding.readVarString(decoder) // => 'Hello world!'
23 * decoding.hasContent(decoder) // => false - all data is read
24 * ```
25 *
26 * @module encoding
27 */
28
29import * as buffer from './buffer.js'
30import * as math from './math.js'
31import * as number from './number.js'
32import * as binary from './binary.js'
33import * as string from './string.js'
34import * as array from './array.js'
35
36/**
37 * A BinaryEncoder handles the encoding to an Uint8Array.
38 */
39export class Encoder {
40 constructor () {
41 this.cpos = 0
42 this.cbuf = new Uint8Array(100)
43 /**
44 * @type {Array<Uint8Array>}
45 */
46 this.bufs = []
47 }
48}
49
50/**
51 * @function
52 * @return {Encoder}
53 */
54export const createEncoder = () => new Encoder()
55
56/**
57 * @param {function(Encoder):void} f
58 */
59export const encode = (f) => {
60 const encoder = createEncoder()
61 f(encoder)
62 return toUint8Array(encoder)
63}
64
65/**
66 * The current length of the encoded data.
67 *
68 * @function
69 * @param {Encoder} encoder
70 * @return {number}
71 */
72export const length = encoder => {
73 let len = encoder.cpos
74 for (let i = 0; i < encoder.bufs.length; i++) {
75 len += encoder.bufs[i].length
76 }
77 return len
78}
79
80/**
81 * Check whether encoder is empty.
82 *
83 * @function
84 * @param {Encoder} encoder
85 * @return {boolean}
86 */
87export const hasContent = encoder => encoder.cpos > 0 || encoder.bufs.length > 0
88
89/**
90 * Transform to Uint8Array.
91 *
92 * @function
93 * @param {Encoder} encoder
94 * @return {Uint8Array} The created ArrayBuffer.
95 */
96export const toUint8Array = encoder => {
97 const uint8arr = new Uint8Array(length(encoder))
98 let curPos = 0
99 for (let i = 0; i < encoder.bufs.length; i++) {
100 const d = encoder.bufs[i]
101 uint8arr.set(d, curPos)
102 curPos += d.length
103 }
104 uint8arr.set(buffer.createUint8ArrayViewFromArrayBuffer(encoder.cbuf.buffer, 0, encoder.cpos), curPos)
105 return uint8arr
106}
107
108/**
109 * Verify that it is possible to write `len` bytes wtihout checking. If
110 * necessary, a new Buffer with the required length is attached.
111 *
112 * @param {Encoder} encoder
113 * @param {number} len
114 */
115export const verifyLen = (encoder, len) => {
116 const bufferLen = encoder.cbuf.length
117 if (bufferLen - encoder.cpos < len) {
118 encoder.bufs.push(buffer.createUint8ArrayViewFromArrayBuffer(encoder.cbuf.buffer, 0, encoder.cpos))
119 encoder.cbuf = new Uint8Array(math.max(bufferLen, len) * 2)
120 encoder.cpos = 0
121 }
122}
123
124/**
125 * Write one byte to the encoder.
126 *
127 * @function
128 * @param {Encoder} encoder
129 * @param {number} num The byte that is to be encoded.
130 */
131export const write = (encoder, num) => {
132 const bufferLen = encoder.cbuf.length
133 if (encoder.cpos === bufferLen) {
134 encoder.bufs.push(encoder.cbuf)
135 encoder.cbuf = new Uint8Array(bufferLen * 2)
136 encoder.cpos = 0
137 }
138 encoder.cbuf[encoder.cpos++] = num
139}
140
141/**
142 * Write one byte at a specific position.
143 * Position must already be written (i.e. encoder.length > pos)
144 *
145 * @function
146 * @param {Encoder} encoder
147 * @param {number} pos Position to which to write data
148 * @param {number} num Unsigned 8-bit integer
149 */
150export const set = (encoder, pos, num) => {
151 let buffer = null
152 // iterate all buffers and adjust position
153 for (let i = 0; i < encoder.bufs.length && buffer === null; i++) {
154 const b = encoder.bufs[i]
155 if (pos < b.length) {
156 buffer = b // found buffer
157 } else {
158 pos -= b.length
159 }
160 }
161 if (buffer === null) {
162 // use current buffer
163 buffer = encoder.cbuf
164 }
165 buffer[pos] = num
166}
167
168/**
169 * Write one byte as an unsigned integer.
170 *
171 * @function
172 * @param {Encoder} encoder
173 * @param {number} num The number that is to be encoded.
174 */
175export const writeUint8 = write
176
177/**
178 * Write one byte as an unsigned Integer at a specific location.
179 *
180 * @function
181 * @param {Encoder} encoder
182 * @param {number} pos The location where the data will be written.
183 * @param {number} num The number that is to be encoded.
184 */
185export const setUint8 = set
186
187/**
188 * Write two bytes as an unsigned integer.
189 *
190 * @function
191 * @param {Encoder} encoder
192 * @param {number} num The number that is to be encoded.
193 */
194export const writeUint16 = (encoder, num) => {
195 write(encoder, num & binary.BITS8)
196 write(encoder, (num >>> 8) & binary.BITS8)
197}
198/**
199 * Write two bytes as an unsigned integer at a specific location.
200 *
201 * @function
202 * @param {Encoder} encoder
203 * @param {number} pos The location where the data will be written.
204 * @param {number} num The number that is to be encoded.
205 */
206export const setUint16 = (encoder, pos, num) => {
207 set(encoder, pos, num & binary.BITS8)
208 set(encoder, pos + 1, (num >>> 8) & binary.BITS8)
209}
210
211/**
212 * Write two bytes as an unsigned integer
213 *
214 * @function
215 * @param {Encoder} encoder
216 * @param {number} num The number that is to be encoded.
217 */
218export const writeUint32 = (encoder, num) => {
219 for (let i = 0; i < 4; i++) {
220 write(encoder, num & binary.BITS8)
221 num >>>= 8
222 }
223}
224
225/**
226 * Write two bytes as an unsigned integer in big endian order.
227 * (most significant byte first)
228 *
229 * @function
230 * @param {Encoder} encoder
231 * @param {number} num The number that is to be encoded.
232 */
233export const writeUint32BigEndian = (encoder, num) => {
234 for (let i = 3; i >= 0; i--) {
235 write(encoder, (num >>> (8 * i)) & binary.BITS8)
236 }
237}
238
239/**
240 * Write two bytes as an unsigned integer at a specific location.
241 *
242 * @function
243 * @param {Encoder} encoder
244 * @param {number} pos The location where the data will be written.
245 * @param {number} num The number that is to be encoded.
246 */
247export const setUint32 = (encoder, pos, num) => {
248 for (let i = 0; i < 4; i++) {
249 set(encoder, pos + i, num & binary.BITS8)
250 num >>>= 8
251 }
252}
253
254/**
255 * Write a variable length unsigned integer. Max encodable integer is 2^53.
256 *
257 * @function
258 * @param {Encoder} encoder
259 * @param {number} num The number that is to be encoded.
260 */
261export const writeVarUint = (encoder, num) => {
262 while (num > binary.BITS7) {
263 write(encoder, binary.BIT8 | (binary.BITS7 & num))
264 num = math.floor(num / 128) // shift >>> 7
265 }
266 write(encoder, binary.BITS7 & num)
267}
268
269/**
270 * Write a variable length integer.
271 *
272 * We use the 7th bit instead for signaling that this is a negative number.
273 *
274 * @function
275 * @param {Encoder} encoder
276 * @param {number} num The number that is to be encoded.
277 */
278export const writeVarInt = (encoder, num) => {
279 const isNegative = math.isNegativeZero(num)
280 if (isNegative) {
281 num = -num
282 }
283 // |- whether to continue reading |- whether is negative |- number
284 write(encoder, (num > binary.BITS6 ? binary.BIT8 : 0) | (isNegative ? binary.BIT7 : 0) | (binary.BITS6 & num))
285 num = math.floor(num / 64) // shift >>> 6
286 // We don't need to consider the case of num === 0 so we can use a different
287 // pattern here than above.
288 while (num > 0) {
289 write(encoder, (num > binary.BITS7 ? binary.BIT8 : 0) | (binary.BITS7 & num))
290 num = math.floor(num / 128) // shift >>> 7
291 }
292}
293
294/**
295 * A cache to store strings temporarily
296 */
297const _strBuffer = new Uint8Array(30000)
298const _maxStrBSize = _strBuffer.length / 3
299
300/**
301 * Write a variable length string.
302 *
303 * @function
304 * @param {Encoder} encoder
305 * @param {String} str The string that is to be encoded.
306 */
307export const _writeVarStringNative = (encoder, str) => {
308 if (str.length < _maxStrBSize) {
309 // We can encode the string into the existing buffer
310 /* c8 ignore next */
311 const written = string.utf8TextEncoder.encodeInto(str, _strBuffer).written || 0
312 writeVarUint(encoder, written)
313 for (let i = 0; i < written; i++) {
314 write(encoder, _strBuffer[i])
315 }
316 } else {
317 writeVarUint8Array(encoder, string.encodeUtf8(str))
318 }
319}
320
321/**
322 * Write a variable length string.
323 *
324 * @function
325 * @param {Encoder} encoder
326 * @param {String} str The string that is to be encoded.
327 */
328export const _writeVarStringPolyfill = (encoder, str) => {
329 const encodedString = unescape(encodeURIComponent(str))
330 const len = encodedString.length
331 writeVarUint(encoder, len)
332 for (let i = 0; i < len; i++) {
333 write(encoder, /** @type {number} */ (encodedString.codePointAt(i)))
334 }
335}
336
337/**
338 * Write a variable length string.
339 *
340 * @function
341 * @param {Encoder} encoder
342 * @param {String} str The string that is to be encoded.
343 */
344/* c8 ignore next */
345export const writeVarString = (string.utf8TextEncoder && /** @type {any} */ (string.utf8TextEncoder).encodeInto) ? _writeVarStringNative : _writeVarStringPolyfill
346
347/**
348 * Write a string terminated by a special byte sequence. This is not very performant and is
349 * generally discouraged. However, the resulting byte arrays are lexiographically ordered which
350 * makes this a nice feature for databases.
351 *
352 * The string will be encoded using utf8 and then terminated and escaped using writeTerminatingUint8Array.
353 *
354 * @function
355 * @param {Encoder} encoder
356 * @param {String} str The string that is to be encoded.
357 */
358export const writeTerminatedString = (encoder, str) =>
359 writeTerminatedUint8Array(encoder, string.encodeUtf8(str))
360
361/**
362 * Write a terminating Uint8Array. Note that this is not performant and is generally
363 * discouraged. There are few situations when this is needed.
364 *
365 * We use 0x0 as a terminating character. 0x1 serves as an escape character for 0x0 and 0x1.
366 *
367 * Example: [0,1,2] is encoded to [1,0,1,1,2,0]. 0x0, and 0x1 needed to be escaped using 0x1. Then
368 * the result is terminated using the 0x0 character.
369 *
370 * This is basically how many systems implement null terminated strings. However, we use an escape
371 * character 0x1 to avoid issues and potenial attacks on our database (if this is used as a key
372 * encoder for NoSql databases).
373 *
374 * @function
375 * @param {Encoder} encoder
376 * @param {Uint8Array} buf The string that is to be encoded.
377 */
378export const writeTerminatedUint8Array = (encoder, buf) => {
379 for (let i = 0; i < buf.length; i++) {
380 const b = buf[i]
381 if (b === 0 || b === 1) {
382 write(encoder, 1)
383 }
384 write(encoder, buf[i])
385 }
386 write(encoder, 0)
387}
388
389/**
390 * Write the content of another Encoder.
391 *
392 * @TODO: can be improved!
393 * - Note: Should consider that when appending a lot of small Encoders, we should rather clone than referencing the old structure.
394 * Encoders start with a rather big initial buffer.
395 *
396 * @function
397 * @param {Encoder} encoder The enUint8Arr
398 * @param {Encoder} append The BinaryEncoder to be written.
399 */
400export const writeBinaryEncoder = (encoder, append) => writeUint8Array(encoder, toUint8Array(append))
401
402/**
403 * Append fixed-length Uint8Array to the encoder.
404 *
405 * @function
406 * @param {Encoder} encoder
407 * @param {Uint8Array} uint8Array
408 */
409export const writeUint8Array = (encoder, uint8Array) => {
410 const bufferLen = encoder.cbuf.length
411 const cpos = encoder.cpos
412 const leftCopyLen = math.min(bufferLen - cpos, uint8Array.length)
413 const rightCopyLen = uint8Array.length - leftCopyLen
414 encoder.cbuf.set(uint8Array.subarray(0, leftCopyLen), cpos)
415 encoder.cpos += leftCopyLen
416 if (rightCopyLen > 0) {
417 // Still something to write, write right half..
418 // Append new buffer
419 encoder.bufs.push(encoder.cbuf)
420 // must have at least size of remaining buffer
421 encoder.cbuf = new Uint8Array(math.max(bufferLen * 2, rightCopyLen))
422 // copy array
423 encoder.cbuf.set(uint8Array.subarray(leftCopyLen))
424 encoder.cpos = rightCopyLen
425 }
426}
427
428/**
429 * Append an Uint8Array to Encoder.
430 *
431 * @function
432 * @param {Encoder} encoder
433 * @param {Uint8Array} uint8Array
434 */
435export const writeVarUint8Array = (encoder, uint8Array) => {
436 writeVarUint(encoder, uint8Array.byteLength)
437 writeUint8Array(encoder, uint8Array)
438}
439
440/**
441 * Create an DataView of the next `len` bytes. Use it to write data after
442 * calling this function.
443 *
444 * ```js
445 * // write float32 using DataView
446 * const dv = writeOnDataView(encoder, 4)
447 * dv.setFloat32(0, 1.1)
448 * // read float32 using DataView
449 * const dv = readFromDataView(encoder, 4)
450 * dv.getFloat32(0) // => 1.100000023841858 (leaving it to the reader to find out why this is the correct result)
451 * ```
452 *
453 * @param {Encoder} encoder
454 * @param {number} len
455 * @return {DataView}
456 */
457export const writeOnDataView = (encoder, len) => {
458 verifyLen(encoder, len)
459 const dview = new DataView(encoder.cbuf.buffer, encoder.cpos, len)
460 encoder.cpos += len
461 return dview
462}
463
464/**
465 * @param {Encoder} encoder
466 * @param {number} num
467 */
468export const writeFloat32 = (encoder, num) => writeOnDataView(encoder, 4).setFloat32(0, num, false)
469
470/**
471 * @param {Encoder} encoder
472 * @param {number} num
473 */
474export const writeFloat64 = (encoder, num) => writeOnDataView(encoder, 8).setFloat64(0, num, false)
475
476/**
477 * @param {Encoder} encoder
478 * @param {bigint} num
479 */
480export const writeBigInt64 = (encoder, num) => /** @type {any} */ (writeOnDataView(encoder, 8)).setBigInt64(0, num, false)
481
482/**
483 * @param {Encoder} encoder
484 * @param {bigint} num
485 */
486export const writeBigUint64 = (encoder, num) => /** @type {any} */ (writeOnDataView(encoder, 8)).setBigUint64(0, num, false)
487
488const floatTestBed = new DataView(new ArrayBuffer(4))
489/**
490 * Check if a number can be encoded as a 32 bit float.
491 *
492 * @param {number} num
493 * @return {boolean}
494 */
495const isFloat32 = num => {
496 floatTestBed.setFloat32(0, num)
497 return floatTestBed.getFloat32(0) === num
498}
499
500/**
501 * Encode data with efficient binary format.
502 *
503 * Differences to JSON:
504 * • Transforms data to a binary format (not to a string)
505 * • Encodes undefined, NaN, and ArrayBuffer (these can't be represented in JSON)
506 * • Numbers are efficiently encoded either as a variable length integer, as a
507 * 32 bit float, as a 64 bit float, or as a 64 bit bigint.
508 *
509 * Encoding table:
510 *
511 * | Data Type | Prefix | Encoding Method | Comment |
512 * | ------------------- | -------- | ------------------ | ------- |
513 * | undefined | 127 | | Functions, symbol, and everything that cannot be identified is encoded as undefined |
514 * | null | 126 | | |
515 * | integer | 125 | writeVarInt | Only encodes 32 bit signed integers |
516 * | float32 | 124 | writeFloat32 | |
517 * | float64 | 123 | writeFloat64 | |
518 * | bigint | 122 | writeBigInt64 | |
519 * | boolean (false) | 121 | | True and false are different data types so we save the following byte |
520 * | boolean (true) | 120 | | - 0b01111000 so the last bit determines whether true or false |
521 * | string | 119 | writeVarString | |
522 * | object<string,any> | 118 | custom | Writes {length} then {length} key-value pairs |
523 * | array<any> | 117 | custom | Writes {length} then {length} json values |
524 * | Uint8Array | 116 | writeVarUint8Array | We use Uint8Array for any kind of binary data |
525 *
526 * Reasons for the decreasing prefix:
527 * We need the first bit for extendability (later we may want to encode the
528 * prefix with writeVarUint). The remaining 7 bits are divided as follows:
529 * [0-30] the beginning of the data range is used for custom purposes
530 * (defined by the function that uses this library)
531 * [31-127] the end of the data range is used for data encoding by
532 * lib0/encoding.js
533 *
534 * @param {Encoder} encoder
535 * @param {undefined|null|number|bigint|boolean|string|Object<string,any>|Array<any>|Uint8Array} data
536 */
537export const writeAny = (encoder, data) => {
538 switch (typeof data) {
539 case 'string':
540 // TYPE 119: STRING
541 write(encoder, 119)
542 writeVarString(encoder, data)
543 break
544 case 'number':
545 if (number.isInteger(data) && math.abs(data) <= binary.BITS31) {
546 // TYPE 125: INTEGER
547 write(encoder, 125)
548 writeVarInt(encoder, data)
549 } else if (isFloat32(data)) {
550 // TYPE 124: FLOAT32
551 write(encoder, 124)
552 writeFloat32(encoder, data)
553 } else {
554 // TYPE 123: FLOAT64
555 write(encoder, 123)
556 writeFloat64(encoder, data)
557 }
558 break
559 case 'bigint':
560 // TYPE 122: BigInt
561 write(encoder, 122)
562 writeBigInt64(encoder, data)
563 break
564 case 'object':
565 if (data === null) {
566 // TYPE 126: null
567 write(encoder, 126)
568 } else if (array.isArray(data)) {
569 // TYPE 117: Array
570 write(encoder, 117)
571 writeVarUint(encoder, data.length)
572 for (let i = 0; i < data.length; i++) {
573 writeAny(encoder, data[i])
574 }
575 } else if (data instanceof Uint8Array) {
576 // TYPE 116: ArrayBuffer
577 write(encoder, 116)
578 writeVarUint8Array(encoder, data)
579 } else {
580 // TYPE 118: Object
581 write(encoder, 118)
582 const keys = Object.keys(data)
583 writeVarUint(encoder, keys.length)
584 for (let i = 0; i < keys.length; i++) {
585 const key = keys[i]
586 writeVarString(encoder, key)
587 writeAny(encoder, data[key])
588 }
589 }
590 break
591 case 'boolean':
592 // TYPE 120/121: boolean (true/false)
593 write(encoder, data ? 120 : 121)
594 break
595 default:
596 // TYPE 127: undefined
597 write(encoder, 127)
598 }
599}
600
601/**
602 * Now come a few stateful encoder that have their own classes.
603 */
604
605/**
606 * Basic Run Length Encoder - a basic compression implementation.
607 *
608 * Encodes [1,1,1,7] to [1,3,7,1] (3 times 1, 1 time 7). This encoder might do more harm than good if there are a lot of values that are not repeated.
609 *
610 * It was originally used for image compression. Cool .. article http://csbruce.com/cbm/transactor/pdfs/trans_v7_i06.pdf
611 *
612 * @note T must not be null!
613 *
614 * @template T
615 */
616export class RleEncoder extends Encoder {
617 /**
618 * @param {function(Encoder, T):void} writer
619 */
620 constructor (writer) {
621 super()
622 /**
623 * The writer
624 */
625 this.w = writer
626 /**
627 * Current state
628 * @type {T|null}
629 */
630 this.s = null
631 this.count = 0
632 }
633
634 /**
635 * @param {T} v
636 */
637 write (v) {
638 if (this.s === v) {
639 this.count++
640 } else {
641 if (this.count > 0) {
642 // flush counter, unless this is the first value (count = 0)
643 writeVarUint(this, this.count - 1) // since count is always > 0, we can decrement by one. non-standard encoding ftw
644 }
645 this.count = 1
646 // write first value
647 this.w(this, v)
648 this.s = v
649 }
650 }
651}
652
653/**
654 * Basic diff decoder using variable length encoding.
655 *
656 * Encodes the values [3, 1100, 1101, 1050, 0] to [3, 1097, 1, -51, -1050] using writeVarInt.
657 */
658export class IntDiffEncoder extends Encoder {
659 /**
660 * @param {number} start
661 */
662 constructor (start) {
663 super()
664 /**
665 * Current state
666 * @type {number}
667 */
668 this.s = start
669 }
670
671 /**
672 * @param {number} v
673 */
674 write (v) {
675 writeVarInt(this, v - this.s)
676 this.s = v
677 }
678}
679
680/**
681 * A combination of IntDiffEncoder and RleEncoder.
682 *
683 * Basically first writes the IntDiffEncoder and then counts duplicate diffs using RleEncoding.
684 *
685 * Encodes the values [1,1,1,2,3,4,5,6] as [1,1,0,2,1,5] (RLE([1,0,0,1,1,1,1,1]) ⇒ RleIntDiff[1,1,0,2,1,5])
686 */
687export class RleIntDiffEncoder extends Encoder {
688 /**
689 * @param {number} start
690 */
691 constructor (start) {
692 super()
693 /**
694 * Current state
695 * @type {number}
696 */
697 this.s = start
698 this.count = 0
699 }
700
701 /**
702 * @param {number} v
703 */
704 write (v) {
705 if (this.s === v && this.count > 0) {
706 this.count++
707 } else {
708 if (this.count > 0) {
709 // flush counter, unless this is the first value (count = 0)
710 writeVarUint(this, this.count - 1) // since count is always > 0, we can decrement by one. non-standard encoding ftw
711 }
712 this.count = 1
713 // write first value
714 writeVarInt(this, v - this.s)
715 this.s = v
716 }
717 }
718}
719
720/**
721 * @param {UintOptRleEncoder} encoder
722 */
723const flushUintOptRleEncoder = encoder => {
724 if (encoder.count > 0) {
725 // flush counter, unless this is the first value (count = 0)
726 // case 1: just a single value. set sign to positive
727 // case 2: write several values. set sign to negative to indicate that there is a length coming
728 writeVarInt(encoder.encoder, encoder.count === 1 ? encoder.s : -encoder.s)
729 if (encoder.count > 1) {
730 writeVarUint(encoder.encoder, encoder.count - 2) // since count is always > 1, we can decrement by one. non-standard encoding ftw
731 }
732 }
733}
734
735/**
736 * Optimized Rle encoder that does not suffer from the mentioned problem of the basic Rle encoder.
737 *
738 * Internally uses VarInt encoder to write unsigned integers. If the input occurs multiple times, we write
739 * write it as a negative number. The UintOptRleDecoder then understands that it needs to read a count.
740 *
741 * Encodes [1,2,3,3,3] as [1,2,-3,3] (once 1, once 2, three times 3)
742 */
743export class UintOptRleEncoder {
744 constructor () {
745 this.encoder = new Encoder()
746 /**
747 * @type {number}
748 */
749 this.s = 0
750 this.count = 0
751 }
752
753 /**
754 * @param {number} v
755 */
756 write (v) {
757 if (this.s === v) {
758 this.count++
759 } else {
760 flushUintOptRleEncoder(this)
761 this.count = 1
762 this.s = v
763 }
764 }
765
766 toUint8Array () {
767 flushUintOptRleEncoder(this)
768 return toUint8Array(this.encoder)
769 }
770}
771
772/**
773 * Increasing Uint Optimized RLE Encoder
774 *
775 * The RLE encoder counts the number of same occurences of the same value.
776 * The IncUintOptRle encoder counts if the value increases.
777 * I.e. 7, 8, 9, 10 will be encoded as [-7, 4]. 1, 3, 5 will be encoded
778 * as [1, 3, 5].
779 */
780export class IncUintOptRleEncoder {
781 constructor () {
782 this.encoder = new Encoder()
783 /**
784 * @type {number}
785 */
786 this.s = 0
787 this.count = 0
788 }
789
790 /**
791 * @param {number} v
792 */
793 write (v) {
794 if (this.s + this.count === v) {
795 this.count++
796 } else {
797 flushUintOptRleEncoder(this)
798 this.count = 1
799 this.s = v
800 }
801 }
802
803 toUint8Array () {
804 flushUintOptRleEncoder(this)
805 return toUint8Array(this.encoder)
806 }
807}
808
809/**
810 * @param {IntDiffOptRleEncoder} encoder
811 */
812const flushIntDiffOptRleEncoder = encoder => {
813 if (encoder.count > 0) {
814 // 31 bit making up the diff | wether to write the counter
815 // const encodedDiff = encoder.diff << 1 | (encoder.count === 1 ? 0 : 1)
816 const encodedDiff = encoder.diff * 2 + (encoder.count === 1 ? 0 : 1)
817 // flush counter, unless this is the first value (count = 0)
818 // case 1: just a single value. set first bit to positive
819 // case 2: write several values. set first bit to negative to indicate that there is a length coming
820 writeVarInt(encoder.encoder, encodedDiff)
821 if (encoder.count > 1) {
822 writeVarUint(encoder.encoder, encoder.count - 2) // since count is always > 1, we can decrement by one. non-standard encoding ftw
823 }
824 }
825}
826
827/**
828 * A combination of the IntDiffEncoder and the UintOptRleEncoder.
829 *
830 * The count approach is similar to the UintDiffOptRleEncoder, but instead of using the negative bitflag, it encodes
831 * in the LSB whether a count is to be read. Therefore this Encoder only supports 31 bit integers!
832 *
833 * Encodes [1, 2, 3, 2] as [3, 1, 6, -1] (more specifically [(1 << 1) | 1, (3 << 0) | 0, -1])
834 *
835 * Internally uses variable length encoding. Contrary to normal UintVar encoding, the first byte contains:
836 * * 1 bit that denotes whether the next value is a count (LSB)
837 * * 1 bit that denotes whether this value is negative (MSB - 1)
838 * * 1 bit that denotes whether to continue reading the variable length integer (MSB)
839 *
840 * Therefore, only five bits remain to encode diff ranges.
841 *
842 * Use this Encoder only when appropriate. In most cases, this is probably a bad idea.
843 */
844export class IntDiffOptRleEncoder {
845 constructor () {
846 this.encoder = new Encoder()
847 /**
848 * @type {number}
849 */
850 this.s = 0
851 this.count = 0
852 this.diff = 0
853 }
854
855 /**
856 * @param {number} v
857 */
858 write (v) {
859 if (this.diff === v - this.s) {
860 this.s = v
861 this.count++
862 } else {
863 flushIntDiffOptRleEncoder(this)
864 this.count = 1
865 this.diff = v - this.s
866 this.s = v
867 }
868 }
869
870 toUint8Array () {
871 flushIntDiffOptRleEncoder(this)
872 return toUint8Array(this.encoder)
873 }
874}
875
876/**
877 * Optimized String Encoder.
878 *
879 * Encoding many small strings in a simple Encoder is not very efficient. The function call to decode a string takes some time and creates references that must be eventually deleted.
880 * In practice, when decoding several million small strings, the GC will kick in more and more often to collect orphaned string objects (or maybe there is another reason?).
881 *
882 * This string encoder solves the above problem. All strings are concatenated and written as a single string using a single encoding call.
883 *
884 * The lengths are encoded using a UintOptRleEncoder.
885 */
886export class StringEncoder {
887 constructor () {
888 /**
889 * @type {Array<string>}
890 */
891 this.sarr = []
892 this.s = ''
893 this.lensE = new UintOptRleEncoder()
894 }
895
896 /**
897 * @param {string} string
898 */
899 write (string) {
900 this.s += string
901 if (this.s.length > 19) {
902 this.sarr.push(this.s)
903 this.s = ''
904 }
905 this.lensE.write(string.length)
906 }
907
908 toUint8Array () {
909 const encoder = new Encoder()
910 this.sarr.push(this.s)
911 this.s = ''
912 writeVarString(encoder, this.sarr.join(''))
913 writeUint8Array(encoder, this.lensE.toUint8Array())
914 return toUint8Array(encoder)
915 }
916}