1 | /**
|
2 | * Efficient schema-less binary encoding with support for variable length encoding.
|
3 | *
|
4 | * Use [lib0/encoding] with [lib0/decoding]. Every encoding function has a corresponding decoding function.
|
5 | *
|
6 | * Encodes numbers in little-endian order (least to most significant byte order)
|
7 | * and is compatible with Golang's binary encoding (https://golang.org/pkg/encoding/binary/)
|
8 | * which is also used in Protocol Buffers.
|
9 | *
|
10 | * ```js
|
11 | * // encoding step
|
12 | * const encoder = new encoding.createEncoder()
|
13 | * encoding.writeVarUint(encoder, 256)
|
14 | * encoding.writeVarString(encoder, 'Hello world!')
|
15 | * const buf = encoding.toUint8Array(encoder)
|
16 | * ```
|
17 | *
|
18 | * ```js
|
19 | * // decoding step
|
20 | * const decoder = new decoding.createDecoder(buf)
|
21 | * decoding.readVarUint(decoder) // => 256
|
22 | * decoding.readVarString(decoder) // => 'Hello world!'
|
23 | * decoding.hasContent(decoder) // => false - all data is read
|
24 | * ```
|
25 | *
|
26 | * @module encoding
|
27 | */
|
28 |
|
29 | import * as buffer from './buffer.js'
|
30 | import * as math from './math.js'
|
31 | import * as number from './number.js'
|
32 | import * as binary from './binary.js'
|
33 | import * as string from './string.js'
|
34 |
|
35 | /**
|
36 | * A BinaryEncoder handles the encoding to an Uint8Array.
|
37 | */
|
38 | export class Encoder {
|
39 | constructor () {
|
40 | this.cpos = 0
|
41 | this.cbuf = new Uint8Array(100)
|
42 | /**
|
43 | * @type {Array<Uint8Array>}
|
44 | */
|
45 | this.bufs = []
|
46 | }
|
47 | }
|
48 |
|
49 | /**
|
50 | * @function
|
51 | * @return {Encoder}
|
52 | */
|
53 | export const createEncoder = () => new Encoder()
|
54 |
|
55 | /**
|
56 | * The current length of the encoded data.
|
57 | *
|
58 | * @function
|
59 | * @param {Encoder} encoder
|
60 | * @return {number}
|
61 | */
|
62 | export const length = encoder => {
|
63 | let len = encoder.cpos
|
64 | for (let i = 0; i < encoder.bufs.length; i++) {
|
65 | len += encoder.bufs[i].length
|
66 | }
|
67 | return len
|
68 | }
|
69 |
|
70 | /**
|
71 | * Transform to Uint8Array.
|
72 | *
|
73 | * @function
|
74 | * @param {Encoder} encoder
|
75 | * @return {Uint8Array} The created ArrayBuffer.
|
76 | */
|
77 | export const toUint8Array = encoder => {
|
78 | const uint8arr = new Uint8Array(length(encoder))
|
79 | let curPos = 0
|
80 | for (let i = 0; i < encoder.bufs.length; i++) {
|
81 | const d = encoder.bufs[i]
|
82 | uint8arr.set(d, curPos)
|
83 | curPos += d.length
|
84 | }
|
85 | uint8arr.set(buffer.createUint8ArrayViewFromArrayBuffer(encoder.cbuf.buffer, 0, encoder.cpos), curPos)
|
86 | return uint8arr
|
87 | }
|
88 |
|
89 | /**
|
90 | * Verify that it is possible to write `len` bytes wtihout checking. If
|
91 | * necessary, a new Buffer with the required length is attached.
|
92 | *
|
93 | * @param {Encoder} encoder
|
94 | * @param {number} len
|
95 | */
|
96 | export const verifyLen = (encoder, len) => {
|
97 | const bufferLen = encoder.cbuf.length
|
98 | if (bufferLen - encoder.cpos < len) {
|
99 | encoder.bufs.push(buffer.createUint8ArrayViewFromArrayBuffer(encoder.cbuf.buffer, 0, encoder.cpos))
|
100 | encoder.cbuf = new Uint8Array(math.max(bufferLen, len) * 2)
|
101 | encoder.cpos = 0
|
102 | }
|
103 | }
|
104 |
|
105 | /**
|
106 | * Write one byte to the encoder.
|
107 | *
|
108 | * @function
|
109 | * @param {Encoder} encoder
|
110 | * @param {number} num The byte that is to be encoded.
|
111 | */
|
112 | export const write = (encoder, num) => {
|
113 | const bufferLen = encoder.cbuf.length
|
114 | if (encoder.cpos === bufferLen) {
|
115 | encoder.bufs.push(encoder.cbuf)
|
116 | encoder.cbuf = new Uint8Array(bufferLen * 2)
|
117 | encoder.cpos = 0
|
118 | }
|
119 | encoder.cbuf[encoder.cpos++] = num
|
120 | }
|
121 |
|
122 | /**
|
123 | * Write one byte at a specific position.
|
124 | * Position must already be written (i.e. encoder.length > pos)
|
125 | *
|
126 | * @function
|
127 | * @param {Encoder} encoder
|
128 | * @param {number} pos Position to which to write data
|
129 | * @param {number} num Unsigned 8-bit integer
|
130 | */
|
131 | export const set = (encoder, pos, num) => {
|
132 | let buffer = null
|
133 | // iterate all buffers and adjust position
|
134 | for (let i = 0; i < encoder.bufs.length && buffer === null; i++) {
|
135 | const b = encoder.bufs[i]
|
136 | if (pos < b.length) {
|
137 | buffer = b // found buffer
|
138 | } else {
|
139 | pos -= b.length
|
140 | }
|
141 | }
|
142 | if (buffer === null) {
|
143 | // use current buffer
|
144 | buffer = encoder.cbuf
|
145 | }
|
146 | buffer[pos] = num
|
147 | }
|
148 |
|
149 | /**
|
150 | * Write one byte as an unsigned integer.
|
151 | *
|
152 | * @function
|
153 | * @param {Encoder} encoder
|
154 | * @param {number} num The number that is to be encoded.
|
155 | */
|
156 | export const writeUint8 = write
|
157 |
|
158 | /**
|
159 | * Write one byte as an unsigned Integer at a specific location.
|
160 | *
|
161 | * @function
|
162 | * @param {Encoder} encoder
|
163 | * @param {number} pos The location where the data will be written.
|
164 | * @param {number} num The number that is to be encoded.
|
165 | */
|
166 | export const setUint8 = set
|
167 |
|
168 | /**
|
169 | * Write two bytes as an unsigned integer.
|
170 | *
|
171 | * @function
|
172 | * @param {Encoder} encoder
|
173 | * @param {number} num The number that is to be encoded.
|
174 | */
|
175 | export const writeUint16 = (encoder, num) => {
|
176 | write(encoder, num & binary.BITS8)
|
177 | write(encoder, (num >>> 8) & binary.BITS8)
|
178 | }
|
179 | /**
|
180 | * Write two bytes as an unsigned integer at a specific location.
|
181 | *
|
182 | * @function
|
183 | * @param {Encoder} encoder
|
184 | * @param {number} pos The location where the data will be written.
|
185 | * @param {number} num The number that is to be encoded.
|
186 | */
|
187 | export const setUint16 = (encoder, pos, num) => {
|
188 | set(encoder, pos, num & binary.BITS8)
|
189 | set(encoder, pos + 1, (num >>> 8) & binary.BITS8)
|
190 | }
|
191 |
|
192 | /**
|
193 | * Write two bytes as an unsigned integer
|
194 | *
|
195 | * @function
|
196 | * @param {Encoder} encoder
|
197 | * @param {number} num The number that is to be encoded.
|
198 | */
|
199 | export const writeUint32 = (encoder, num) => {
|
200 | for (let i = 0; i < 4; i++) {
|
201 | write(encoder, num & binary.BITS8)
|
202 | num >>>= 8
|
203 | }
|
204 | }
|
205 |
|
206 | /**
|
207 | * Write two bytes as an unsigned integer in big endian order.
|
208 | * (most significant byte first)
|
209 | *
|
210 | * @function
|
211 | * @param {Encoder} encoder
|
212 | * @param {number} num The number that is to be encoded.
|
213 | */
|
214 | export const writeUint32BigEndian = (encoder, num) => {
|
215 | for (let i = 3; i >= 0; i--) {
|
216 | write(encoder, (num >>> (8 * i)) & binary.BITS8)
|
217 | }
|
218 | }
|
219 |
|
220 | /**
|
221 | * Write two bytes as an unsigned integer at a specific location.
|
222 | *
|
223 | * @function
|
224 | * @param {Encoder} encoder
|
225 | * @param {number} pos The location where the data will be written.
|
226 | * @param {number} num The number that is to be encoded.
|
227 | */
|
228 | export const setUint32 = (encoder, pos, num) => {
|
229 | for (let i = 0; i < 4; i++) {
|
230 | set(encoder, pos + i, num & binary.BITS8)
|
231 | num >>>= 8
|
232 | }
|
233 | }
|
234 |
|
235 | /**
|
236 | * Write a variable length unsigned integer. Max encodable integer is 2^53.
|
237 | *
|
238 | * @function
|
239 | * @param {Encoder} encoder
|
240 | * @param {number} num The number that is to be encoded.
|
241 | */
|
242 | export const writeVarUint = (encoder, num) => {
|
243 | while (num > binary.BITS7) {
|
244 | write(encoder, binary.BIT8 | (binary.BITS7 & num))
|
245 | num = math.floor(num / 128) // shift >>> 7
|
246 | }
|
247 | write(encoder, binary.BITS7 & num)
|
248 | }
|
249 |
|
250 | /**
|
251 | * Write a variable length integer.
|
252 | *
|
253 | * We use the 7th bit instead for signaling that this is a negative number.
|
254 | *
|
255 | * @function
|
256 | * @param {Encoder} encoder
|
257 | * @param {number} num The number that is to be encoded.
|
258 | */
|
259 | export const writeVarInt = (encoder, num) => {
|
260 | const isNegative = math.isNegativeZero(num)
|
261 | if (isNegative) {
|
262 | num = -num
|
263 | }
|
264 | // |- whether to continue reading |- whether is negative |- number
|
265 | write(encoder, (num > binary.BITS6 ? binary.BIT8 : 0) | (isNegative ? binary.BIT7 : 0) | (binary.BITS6 & num))
|
266 | num = math.floor(num / 64) // shift >>> 6
|
267 | // We don't need to consider the case of num === 0 so we can use a different
|
268 | // pattern here than above.
|
269 | while (num > 0) {
|
270 | write(encoder, (num > binary.BITS7 ? binary.BIT8 : 0) | (binary.BITS7 & num))
|
271 | num = math.floor(num / 128) // shift >>> 7
|
272 | }
|
273 | }
|
274 |
|
275 | /**
|
276 | * A cache to store strings temporarily
|
277 | */
|
278 | const _strBuffer = new Uint8Array(30000)
|
279 | const _maxStrBSize = _strBuffer.length / 3
|
280 |
|
281 | /**
|
282 | * Write a variable length string.
|
283 | *
|
284 | * @function
|
285 | * @param {Encoder} encoder
|
286 | * @param {String} str The string that is to be encoded.
|
287 | */
|
288 | export const _writeVarStringNative = (encoder, str) => {
|
289 | if (str.length < _maxStrBSize) {
|
290 | // We can encode the string into the existing buffer
|
291 | /* istanbul ignore else */
|
292 | const written = string.utf8TextEncoder.encodeInto(str, _strBuffer).written || 0
|
293 | writeVarUint(encoder, written)
|
294 | for (let i = 0; i < written; i++) {
|
295 | write(encoder, _strBuffer[i])
|
296 | }
|
297 | } else {
|
298 | writeVarUint8Array(encoder, string.encodeUtf8(str))
|
299 | }
|
300 | }
|
301 |
|
302 | /**
|
303 | * Write a variable length string.
|
304 | *
|
305 | * @function
|
306 | * @param {Encoder} encoder
|
307 | * @param {String} str The string that is to be encoded.
|
308 | */
|
309 | export const _writeVarStringPolyfill = (encoder, str) => {
|
310 | const encodedString = unescape(encodeURIComponent(str))
|
311 | const len = encodedString.length
|
312 | writeVarUint(encoder, len)
|
313 | for (let i = 0; i < len; i++) {
|
314 | write(encoder, /** @type {number} */ (encodedString.codePointAt(i)))
|
315 | }
|
316 | }
|
317 |
|
318 | /**
|
319 | * Write a variable length string.
|
320 | *
|
321 | * @function
|
322 | * @param {Encoder} encoder
|
323 | * @param {String} str The string that is to be encoded.
|
324 | */
|
325 | /* istanbul ignore next */
|
326 | export const writeVarString = string.utf8TextEncoder ? _writeVarStringNative : _writeVarStringPolyfill
|
327 |
|
328 | /**
|
329 | * Write the content of another Encoder.
|
330 | *
|
331 | * @TODO: can be improved!
|
332 | * - Note: Should consider that when appending a lot of small Encoders, we should rather clone than referencing the old structure.
|
333 | * Encoders start with a rather big initial buffer.
|
334 | *
|
335 | * @function
|
336 | * @param {Encoder} encoder The enUint8Arr
|
337 | * @param {Encoder} append The BinaryEncoder to be written.
|
338 | */
|
339 | export const writeBinaryEncoder = (encoder, append) => writeUint8Array(encoder, toUint8Array(append))
|
340 |
|
341 | /**
|
342 | * Append fixed-length Uint8Array to the encoder.
|
343 | *
|
344 | * @function
|
345 | * @param {Encoder} encoder
|
346 | * @param {Uint8Array} uint8Array
|
347 | */
|
348 | export const writeUint8Array = (encoder, uint8Array) => {
|
349 | const bufferLen = encoder.cbuf.length
|
350 | const cpos = encoder.cpos
|
351 | const leftCopyLen = math.min(bufferLen - cpos, uint8Array.length)
|
352 | const rightCopyLen = uint8Array.length - leftCopyLen
|
353 | encoder.cbuf.set(uint8Array.subarray(0, leftCopyLen), cpos)
|
354 | encoder.cpos += leftCopyLen
|
355 | if (rightCopyLen > 0) {
|
356 | // Still something to write, write right half..
|
357 | // Append new buffer
|
358 | encoder.bufs.push(encoder.cbuf)
|
359 | // must have at least size of remaining buffer
|
360 | encoder.cbuf = new Uint8Array(math.max(bufferLen * 2, rightCopyLen))
|
361 | // copy array
|
362 | encoder.cbuf.set(uint8Array.subarray(leftCopyLen))
|
363 | encoder.cpos = rightCopyLen
|
364 | }
|
365 | }
|
366 |
|
367 | /**
|
368 | * Append an Uint8Array to Encoder.
|
369 | *
|
370 | * @function
|
371 | * @param {Encoder} encoder
|
372 | * @param {Uint8Array} uint8Array
|
373 | */
|
374 | export const writeVarUint8Array = (encoder, uint8Array) => {
|
375 | writeVarUint(encoder, uint8Array.byteLength)
|
376 | writeUint8Array(encoder, uint8Array)
|
377 | }
|
378 |
|
379 | /**
|
380 | * Create an DataView of the next `len` bytes. Use it to write data after
|
381 | * calling this function.
|
382 | *
|
383 | * ```js
|
384 | * // write float32 using DataView
|
385 | * const dv = writeOnDataView(encoder, 4)
|
386 | * dv.setFloat32(0, 1.1)
|
387 | * // read float32 using DataView
|
388 | * const dv = readFromDataView(encoder, 4)
|
389 | * dv.getFloat32(0) // => 1.100000023841858 (leaving it to the reader to find out why this is the correct result)
|
390 | * ```
|
391 | *
|
392 | * @param {Encoder} encoder
|
393 | * @param {number} len
|
394 | * @return {DataView}
|
395 | */
|
396 | export const writeOnDataView = (encoder, len) => {
|
397 | verifyLen(encoder, len)
|
398 | const dview = new DataView(encoder.cbuf.buffer, encoder.cpos, len)
|
399 | encoder.cpos += len
|
400 | return dview
|
401 | }
|
402 |
|
403 | /**
|
404 | * @param {Encoder} encoder
|
405 | * @param {number} num
|
406 | */
|
407 | export const writeFloat32 = (encoder, num) => writeOnDataView(encoder, 4).setFloat32(0, num, false)
|
408 |
|
409 | /**
|
410 | * @param {Encoder} encoder
|
411 | * @param {number} num
|
412 | */
|
413 | export const writeFloat64 = (encoder, num) => writeOnDataView(encoder, 8).setFloat64(0, num, false)
|
414 |
|
415 | /**
|
416 | * @param {Encoder} encoder
|
417 | * @param {bigint} num
|
418 | */
|
419 | export const writeBigInt64 = (encoder, num) => /** @type {any} */ (writeOnDataView(encoder, 8)).setBigInt64(0, num, false)
|
420 |
|
421 | /**
|
422 | * @param {Encoder} encoder
|
423 | * @param {bigint} num
|
424 | */
|
425 | export const writeBigUint64 = (encoder, num) => /** @type {any} */ (writeOnDataView(encoder, 8)).setBigUint64(0, num, false)
|
426 |
|
427 | const floatTestBed = new DataView(new ArrayBuffer(4))
|
428 | /**
|
429 | * Check if a number can be encoded as a 32 bit float.
|
430 | *
|
431 | * @param {number} num
|
432 | * @return {boolean}
|
433 | */
|
434 | const isFloat32 = num => {
|
435 | floatTestBed.setFloat32(0, num)
|
436 | return floatTestBed.getFloat32(0) === num
|
437 | }
|
438 |
|
439 | /**
|
440 | * Encode data with efficient binary format.
|
441 | *
|
442 | * Differences to JSON:
|
443 | * • Transforms data to a binary format (not to a string)
|
444 | * • Encodes undefined, NaN, and ArrayBuffer (these can't be represented in JSON)
|
445 | * • Numbers are efficiently encoded either as a variable length integer, as a
|
446 | * 32 bit float, as a 64 bit float, or as a 64 bit bigint.
|
447 | *
|
448 | * Encoding table:
|
449 | *
|
450 | * | Data Type | Prefix | Encoding Method | Comment |
|
451 | * | ------------------- | -------- | ------------------ | ------- |
|
452 | * | undefined | 127 | | Functions, symbol, and everything that cannot be identified is encoded as undefined |
|
453 | * | null | 126 | | |
|
454 | * | integer | 125 | writeVarInt | Only encodes 32 bit signed integers |
|
455 | * | float32 | 124 | writeFloat32 | |
|
456 | * | float64 | 123 | writeFloat64 | |
|
457 | * | bigint | 122 | writeBigInt64 | |
|
458 | * | boolean (false) | 121 | | True and false are different data types so we save the following byte |
|
459 | * | boolean (true) | 120 | | - 0b01111000 so the last bit determines whether true or false |
|
460 | * | string | 119 | writeVarString | |
|
461 | * | object<string,any> | 118 | custom | Writes {length} then {length} key-value pairs |
|
462 | * | array<any> | 117 | custom | Writes {length} then {length} json values |
|
463 | * | Uint8Array | 116 | writeVarUint8Array | We use Uint8Array for any kind of binary data |
|
464 | *
|
465 | * Reasons for the decreasing prefix:
|
466 | * We need the first bit for extendability (later we may want to encode the
|
467 | * prefix with writeVarUint). The remaining 7 bits are divided as follows:
|
468 | * [0-30] the beginning of the data range is used for custom purposes
|
469 | * (defined by the function that uses this library)
|
470 | * [31-127] the end of the data range is used for data encoding by
|
471 | * lib0/encoding.js
|
472 | *
|
473 | * @param {Encoder} encoder
|
474 | * @param {undefined|null|number|bigint|boolean|string|Object<string,any>|Array<any>|Uint8Array} data
|
475 | */
|
476 | export const writeAny = (encoder, data) => {
|
477 | switch (typeof data) {
|
478 | case 'string':
|
479 | // TYPE 119: STRING
|
480 | write(encoder, 119)
|
481 | writeVarString(encoder, data)
|
482 | break
|
483 | case 'number':
|
484 | if (number.isInteger(data) && math.abs(data) <= binary.BITS31) {
|
485 | // TYPE 125: INTEGER
|
486 | write(encoder, 125)
|
487 | writeVarInt(encoder, data)
|
488 | } else if (isFloat32(data)) {
|
489 | // TYPE 124: FLOAT32
|
490 | write(encoder, 124)
|
491 | writeFloat32(encoder, data)
|
492 | } else {
|
493 | // TYPE 123: FLOAT64
|
494 | write(encoder, 123)
|
495 | writeFloat64(encoder, data)
|
496 | }
|
497 | break
|
498 | case 'bigint':
|
499 | // TYPE 122: BigInt
|
500 | write(encoder, 122)
|
501 | writeBigInt64(encoder, data)
|
502 | break
|
503 | case 'object':
|
504 | if (data === null) {
|
505 | // TYPE 126: null
|
506 | write(encoder, 126)
|
507 | } else if (data instanceof Array) {
|
508 | // TYPE 117: Array
|
509 | write(encoder, 117)
|
510 | writeVarUint(encoder, data.length)
|
511 | for (let i = 0; i < data.length; i++) {
|
512 | writeAny(encoder, data[i])
|
513 | }
|
514 | } else if (data instanceof Uint8Array) {
|
515 | // TYPE 116: ArrayBuffer
|
516 | write(encoder, 116)
|
517 | writeVarUint8Array(encoder, data)
|
518 | } else {
|
519 | // TYPE 118: Object
|
520 | write(encoder, 118)
|
521 | const keys = Object.keys(data)
|
522 | writeVarUint(encoder, keys.length)
|
523 | for (let i = 0; i < keys.length; i++) {
|
524 | const key = keys[i]
|
525 | writeVarString(encoder, key)
|
526 | writeAny(encoder, data[key])
|
527 | }
|
528 | }
|
529 | break
|
530 | case 'boolean':
|
531 | // TYPE 120/121: boolean (true/false)
|
532 | write(encoder, data ? 120 : 121)
|
533 | break
|
534 | default:
|
535 | // TYPE 127: undefined
|
536 | write(encoder, 127)
|
537 | }
|
538 | }
|
539 |
|
540 | /**
|
541 | * Now come a few stateful encoder that have their own classes.
|
542 | */
|
543 |
|
544 | /**
|
545 | * Basic Run Length Encoder - a basic compression implementation.
|
546 | *
|
547 | * Encodes [1,1,1,7] to [1,3,7,1] (3 times 1, 1 time 7). This encoder might do more harm than good if there are a lot of values that are not repeated.
|
548 | *
|
549 | * It was originally used for image compression. Cool .. article http://csbruce.com/cbm/transactor/pdfs/trans_v7_i06.pdf
|
550 | *
|
551 | * @note T must not be null!
|
552 | *
|
553 | * @template T
|
554 | */
|
555 | export class RleEncoder extends Encoder {
|
556 | /**
|
557 | * @param {function(Encoder, T):void} writer
|
558 | */
|
559 | constructor (writer) {
|
560 | super()
|
561 | /**
|
562 | * The writer
|
563 | */
|
564 | this.w = writer
|
565 | /**
|
566 | * Current state
|
567 | * @type {T|null}
|
568 | */
|
569 | this.s = null
|
570 | this.count = 0
|
571 | }
|
572 |
|
573 | /**
|
574 | * @param {T} v
|
575 | */
|
576 | write (v) {
|
577 | if (this.s === v) {
|
578 | this.count++
|
579 | } else {
|
580 | if (this.count > 0) {
|
581 | // flush counter, unless this is the first value (count = 0)
|
582 | writeVarUint(this, this.count - 1) // since count is always > 0, we can decrement by one. non-standard encoding ftw
|
583 | }
|
584 | this.count = 1
|
585 | // write first value
|
586 | this.w(this, v)
|
587 | this.s = v
|
588 | }
|
589 | }
|
590 | }
|
591 |
|
592 | /**
|
593 | * Basic diff decoder using variable length encoding.
|
594 | *
|
595 | * Encodes the values [3, 1100, 1101, 1050, 0] to [3, 1097, 1, -51, -1050] using writeVarInt.
|
596 | */
|
597 | export class IntDiffEncoder extends Encoder {
|
598 | /**
|
599 | * @param {number} start
|
600 | */
|
601 | constructor (start) {
|
602 | super()
|
603 | /**
|
604 | * Current state
|
605 | * @type {number}
|
606 | */
|
607 | this.s = start
|
608 | }
|
609 |
|
610 | /**
|
611 | * @param {number} v
|
612 | */
|
613 | write (v) {
|
614 | writeVarInt(this, v - this.s)
|
615 | this.s = v
|
616 | }
|
617 | }
|
618 |
|
619 | /**
|
620 | * A combination of IntDiffEncoder and RleEncoder.
|
621 | *
|
622 | * Basically first writes the IntDiffEncoder and then counts duplicate diffs using RleEncoding.
|
623 | *
|
624 | * Encodes the values [1,1,1,2,3,4,5,6] as [1,1,0,2,1,5] (RLE([1,0,0,1,1,1,1,1]) ⇒ RleIntDiff[1,1,0,2,1,5])
|
625 | */
|
626 | export class RleIntDiffEncoder extends Encoder {
|
627 | /**
|
628 | * @param {number} start
|
629 | */
|
630 | constructor (start) {
|
631 | super()
|
632 | /**
|
633 | * Current state
|
634 | * @type {number}
|
635 | */
|
636 | this.s = start
|
637 | this.count = 0
|
638 | }
|
639 |
|
640 | /**
|
641 | * @param {number} v
|
642 | */
|
643 | write (v) {
|
644 | if (this.s === v && this.count > 0) {
|
645 | this.count++
|
646 | } else {
|
647 | if (this.count > 0) {
|
648 | // flush counter, unless this is the first value (count = 0)
|
649 | writeVarUint(this, this.count - 1) // since count is always > 0, we can decrement by one. non-standard encoding ftw
|
650 | }
|
651 | this.count = 1
|
652 | // write first value
|
653 | writeVarInt(this, v - this.s)
|
654 | this.s = v
|
655 | }
|
656 | }
|
657 | }
|
658 |
|
659 | /**
|
660 | * @param {UintOptRleEncoder} encoder
|
661 | */
|
662 | const flushUintOptRleEncoder = encoder => {
|
663 | /* istanbul ignore else */
|
664 | if (encoder.count > 0) {
|
665 | // flush counter, unless this is the first value (count = 0)
|
666 | // case 1: just a single value. set sign to positive
|
667 | // case 2: write several values. set sign to negative to indicate that there is a length coming
|
668 | writeVarInt(encoder.encoder, encoder.count === 1 ? encoder.s : -encoder.s)
|
669 | if (encoder.count > 1) {
|
670 | writeVarUint(encoder.encoder, encoder.count - 2) // since count is always > 1, we can decrement by one. non-standard encoding ftw
|
671 | }
|
672 | }
|
673 | }
|
674 |
|
675 | /**
|
676 | * Optimized Rle encoder that does not suffer from the mentioned problem of the basic Rle encoder.
|
677 | *
|
678 | * Internally uses VarInt encoder to write unsigned integers. If the input occurs multiple times, we write
|
679 | * write it as a negative number. The UintOptRleDecoder then understands that it needs to read a count.
|
680 | *
|
681 | * Encodes [1,2,3,3,3] as [1,2,-3,3] (once 1, once 2, three times 3)
|
682 | */
|
683 | export class UintOptRleEncoder {
|
684 | constructor () {
|
685 | this.encoder = new Encoder()
|
686 | /**
|
687 | * @type {number}
|
688 | */
|
689 | this.s = 0
|
690 | this.count = 0
|
691 | }
|
692 |
|
693 | /**
|
694 | * @param {number} v
|
695 | */
|
696 | write (v) {
|
697 | if (this.s === v) {
|
698 | this.count++
|
699 | } else {
|
700 | flushUintOptRleEncoder(this)
|
701 | this.count = 1
|
702 | this.s = v
|
703 | }
|
704 | }
|
705 |
|
706 | toUint8Array () {
|
707 | flushUintOptRleEncoder(this)
|
708 | return toUint8Array(this.encoder)
|
709 | }
|
710 | }
|
711 |
|
712 | /**
|
713 | * Increasing Uint Optimized RLE Encoder
|
714 | *
|
715 | * The RLE encoder counts the number of same occurences of the same value.
|
716 | * The IncUintOptRle encoder counts if the value increases.
|
717 | * I.e. 7, 8, 9, 10 will be encoded as [-7, 4]. 1, 3, 5 will be encoded
|
718 | * as [1, 3, 5].
|
719 | */
|
720 | export class IncUintOptRleEncoder {
|
721 | constructor () {
|
722 | this.encoder = new Encoder()
|
723 | /**
|
724 | * @type {number}
|
725 | */
|
726 | this.s = 0
|
727 | this.count = 0
|
728 | }
|
729 |
|
730 | /**
|
731 | * @param {number} v
|
732 | */
|
733 | write (v) {
|
734 | if (this.s + this.count === v) {
|
735 | this.count++
|
736 | } else {
|
737 | flushUintOptRleEncoder(this)
|
738 | this.count = 1
|
739 | this.s = v
|
740 | }
|
741 | }
|
742 |
|
743 | toUint8Array () {
|
744 | flushUintOptRleEncoder(this)
|
745 | return toUint8Array(this.encoder)
|
746 | }
|
747 | }
|
748 |
|
749 | /**
|
750 | * @param {IntDiffOptRleEncoder} encoder
|
751 | */
|
752 | const flushIntDiffOptRleEncoder = encoder => {
|
753 | if (encoder.count > 0) {
|
754 | // 31 bit making up the diff | wether to write the counter
|
755 | // const encodedDiff = encoder.diff << 1 | (encoder.count === 1 ? 0 : 1)
|
756 | const encodedDiff = encoder.diff * 2 + (encoder.count === 1 ? 0 : 1)
|
757 | // flush counter, unless this is the first value (count = 0)
|
758 | // case 1: just a single value. set first bit to positive
|
759 | // case 2: write several values. set first bit to negative to indicate that there is a length coming
|
760 | writeVarInt(encoder.encoder, encodedDiff)
|
761 | if (encoder.count > 1) {
|
762 | writeVarUint(encoder.encoder, encoder.count - 2) // since count is always > 1, we can decrement by one. non-standard encoding ftw
|
763 | }
|
764 | }
|
765 | }
|
766 |
|
767 | /**
|
768 | * A combination of the IntDiffEncoder and the UintOptRleEncoder.
|
769 | *
|
770 | * The count approach is similar to the UintDiffOptRleEncoder, but instead of using the negative bitflag, it encodes
|
771 | * in the LSB whether a count is to be read. Therefore this Encoder only supports 31 bit integers!
|
772 | *
|
773 | * Encodes [1, 2, 3, 2] as [3, 1, 6, -1] (more specifically [(1 << 1) | 1, (3 << 0) | 0, -1])
|
774 | *
|
775 | * Internally uses variable length encoding. Contrary to normal UintVar encoding, the first byte contains:
|
776 | * * 1 bit that denotes whether the next value is a count (LSB)
|
777 | * * 1 bit that denotes whether this value is negative (MSB - 1)
|
778 | * * 1 bit that denotes whether to continue reading the variable length integer (MSB)
|
779 | *
|
780 | * Therefore, only five bits remain to encode diff ranges.
|
781 | *
|
782 | * Use this Encoder only when appropriate. In most cases, this is probably a bad idea.
|
783 | */
|
784 | export class IntDiffOptRleEncoder {
|
785 | constructor () {
|
786 | this.encoder = new Encoder()
|
787 | /**
|
788 | * @type {number}
|
789 | */
|
790 | this.s = 0
|
791 | this.count = 0
|
792 | this.diff = 0
|
793 | }
|
794 |
|
795 | /**
|
796 | * @param {number} v
|
797 | */
|
798 | write (v) {
|
799 | if (this.diff === v - this.s) {
|
800 | this.s = v
|
801 | this.count++
|
802 | } else {
|
803 | flushIntDiffOptRleEncoder(this)
|
804 | this.count = 1
|
805 | this.diff = v - this.s
|
806 | this.s = v
|
807 | }
|
808 | }
|
809 |
|
810 | toUint8Array () {
|
811 | flushIntDiffOptRleEncoder(this)
|
812 | return toUint8Array(this.encoder)
|
813 | }
|
814 | }
|
815 |
|
816 | /**
|
817 | * Optimized String Encoder.
|
818 | *
|
819 | * Encoding many small strings in a simple Encoder is not very efficient. The function call to decode a string takes some time and creates references that must be eventually deleted.
|
820 | * In practice, when decoding several million small strings, the GC will kick in more and more often to collect orphaned string objects (or maybe there is another reason?).
|
821 | *
|
822 | * This string encoder solves the above problem. All strings are concatenated and written as a single string using a single encoding call.
|
823 | *
|
824 | * The lengths are encoded using a UintOptRleEncoder.
|
825 | */
|
826 | export class StringEncoder {
|
827 | constructor () {
|
828 | /**
|
829 | * @type {Array<string>}
|
830 | */
|
831 | this.sarr = []
|
832 | this.s = ''
|
833 | this.lensE = new UintOptRleEncoder()
|
834 | }
|
835 |
|
836 | /**
|
837 | * @param {string} string
|
838 | */
|
839 | write (string) {
|
840 | this.s += string
|
841 | if (this.s.length > 19) {
|
842 | this.sarr.push(this.s)
|
843 | this.s = ''
|
844 | }
|
845 | this.lensE.write(string.length)
|
846 | }
|
847 |
|
848 | toUint8Array () {
|
849 | const encoder = new Encoder()
|
850 | this.sarr.push(this.s)
|
851 | this.s = ''
|
852 | writeVarString(encoder, this.sarr.join(''))
|
853 | writeUint8Array(encoder, this.lensE.toUint8Array())
|
854 | return toUint8Array(encoder)
|
855 | }
|
856 | }
|