1 |
|
2 |
|
3 |
|
4 |
|
5 | CND = require 'cnd'
|
6 | rpr = CND.rpr
|
7 | badge = 'HOLLERITH/CODEC'
|
8 | debug = CND.get_logger 'debug', badge
|
9 | warn = CND.get_logger 'warn', badge
|
10 |
|
11 | CND.shim()
|
12 |
|
13 |
|
14 |
|
15 | @[ 'typemarkers' ] = {}
|
16 |
|
17 | tm_lo = @[ 'typemarkers' ][ 'lo' ] = 0x00
|
18 | tm_null = @[ 'typemarkers' ][ 'null' ] = 'B'.codePointAt 0
|
19 | tm_false = @[ 'typemarkers' ][ 'false' ] = 'C'.codePointAt 0
|
20 | tm_true = @[ 'typemarkers' ][ 'true' ] = 'D'.codePointAt 0
|
21 | tm_list = @[ 'typemarkers' ][ 'list' ] = 'E'.codePointAt 0
|
22 | tm_date = @[ 'typemarkers' ][ 'date' ] = 'G'.codePointAt 0
|
23 | tm_ninfinity = @[ 'typemarkers' ][ 'ninfinity' ] = 'J'.codePointAt 0
|
24 | tm_nnumber = @[ 'typemarkers' ][ 'nnumber' ] = 'K'.codePointAt 0
|
25 | tm_pnumber = @[ 'typemarkers' ][ 'pnumber' ] = 'L'.codePointAt 0
|
26 | tm_pinfinity = @[ 'typemarkers' ][ 'pinfinity' ] = 'M'.codePointAt 0
|
27 | tm_text = @[ 'typemarkers' ][ 'text' ] = 'T'.codePointAt 0
|
28 | tm_private = @[ 'typemarkers' ][ 'private' ] = 'Z'.codePointAt 0
|
29 | tm_hi = @[ 'typemarkers' ][ 'hi' ] = 0xff
|
30 |
|
31 |
|
32 | @[ 'bytecounts' ] = {}
|
33 | bytecount_singular = @[ 'bytecounts' ][ 'singular' ] = 1
|
34 | bytecount_typemarker = @[ 'bytecounts' ][ 'typemarker' ] = 1
|
35 | bytecount_number = @[ 'bytecounts' ][ 'number' ] = 9
|
36 | bytecount_date = @[ 'bytecounts' ][ 'date' ] = bytecount_number + 1
|
37 |
|
38 |
|
39 | @[ 'sentinels' ] = {}
|
40 |
|
41 |
|
42 | @[ 'sentinels' ][ 'firstdate' ] = new Date -8640000000000000
|
43 | @[ 'sentinels' ][ 'lastdate' ] = new Date +8640000000000000
|
44 |
|
45 |
|
46 | @[ 'keys' ] = {}
|
47 |
|
48 | @[ 'keys' ][ 'lo' ] = new Buffer [ @[ 'typemarkers' ][ 'lo' ] ]
|
49 | @[ 'keys' ][ 'hi' ] = new Buffer [ @[ 'typemarkers' ][ 'hi' ] ]
|
50 |
|
51 |
|
52 | @[ 'symbols' ] = {}
|
53 | symbol_fallback = @[ 'fallback' ] = Symbol 'fallback'
|
54 |
|
55 |
|
56 |
|
57 |
|
58 |
|
59 | rbuffer_min_size = 1024
|
60 | rbuffer_max_size = 65536
|
61 | rbuffer = new Buffer rbuffer_min_size
|
62 |
|
63 |
|
64 | grow_rbuffer = ->
|
65 | factor = 2
|
66 | new_size = Math.floor rbuffer.length * factor + 0.5
|
67 |
|
68 | new_result_buffer = new Buffer new_size
|
69 | rbuffer.copy new_result_buffer
|
70 | rbuffer = new_result_buffer
|
71 | return null
|
72 |
|
73 |
|
74 | release_extraneous_rbuffer_bytes = ->
|
75 | if rbuffer.length > rbuffer_max_size
|
76 |
|
77 | rbuffer = new Buffer rbuffer_max_size
|
78 | return null
|
79 |
|
80 |
|
81 |
|
82 |
|
83 |
|
84 | write_singular = ( idx, value ) ->
|
85 | grow_rbuffer() until rbuffer.length >= idx + bytecount_singular
|
86 | if value is null then typemarker = tm_null
|
87 | else if value is false then typemarker = tm_false
|
88 | else if value is true then typemarker = tm_true
|
89 | else throw new Error "unable to encode value of type #{CND.type_of value}"
|
90 | rbuffer[ idx ] = typemarker
|
91 | return idx + bytecount_singular
|
92 |
|
93 |
|
94 | read_singular = ( buffer, idx ) ->
|
95 | switch typemarker = buffer[ idx ]
|
96 | when tm_null then value = null
|
97 | when tm_false then value = false
|
98 | when tm_true then value = true
|
99 | else throw new Error "unable to decode 0x#{typemarker.toString 16} at index #{idx} (#{rpr buffer})"
|
100 | return [ idx + bytecount_singular, value, ]
|
101 |
|
102 |
|
103 |
|
104 |
|
105 |
|
106 | write_private = ( idx, value, encoder ) ->
|
107 | grow_rbuffer() until rbuffer.length >= idx + 3 * bytecount_typemarker
|
108 |
|
109 | rbuffer[ idx ] = tm_private
|
110 | idx += bytecount_typemarker
|
111 |
|
112 | rbuffer[ idx ] = tm_list
|
113 | idx += bytecount_typemarker
|
114 |
|
115 | type = value[ 'type' ] ? 'private'
|
116 | value = value[ 'value' ]
|
117 |
|
118 | if encoder?
|
119 | encoded_value = encoder type, value, symbol_fallback
|
120 | value = encoded_value unless encoded_value is symbol_fallback
|
121 |
|
122 | wrapped_value = [ type, value, ]
|
123 | idx = _encode wrapped_value, idx
|
124 |
|
125 | rbuffer[ idx ] = tm_lo
|
126 | idx += bytecount_typemarker
|
127 |
|
128 | return idx
|
129 |
|
130 |
|
131 | read_private = ( buffer, idx, decoder ) ->
|
132 | idx += bytecount_typemarker
|
133 | [ idx, [ type, value, ] ] = read_list buffer, idx
|
134 | if decoder?
|
135 | R = decoder type, value, symbol_fallback
|
136 | throw new Error "encountered illegal value `undefined` when reading private type" if R is undefined
|
137 | if R is symbol_fallback or not decoder?
|
138 | R = { type, value, }
|
139 | return [ idx, R, ]
|
140 |
|
141 |
|
142 |
|
143 |
|
144 |
|
145 | write_number = ( idx, number ) ->
|
146 | grow_rbuffer() until rbuffer.length >= idx + bytecount_number
|
147 | if number < 0
|
148 | type = tm_nnumber
|
149 | number = -number
|
150 | else
|
151 | type = tm_pnumber
|
152 | rbuffer[ idx ] = type
|
153 | rbuffer.writeDoubleBE number, idx + 1
|
154 | _invert_buffer rbuffer, idx if type is tm_nnumber
|
155 | return idx + bytecount_number
|
156 |
|
157 |
|
158 | write_infinity = ( idx, number ) ->
|
159 | grow_rbuffer() until rbuffer.length >= idx + bytecount_singular
|
160 | rbuffer[ idx ] = if number is -Infinity then tm_ninfinity else tm_pinfinity
|
161 | return idx + bytecount_singular
|
162 |
|
163 |
|
164 | read_nnumber = ( buffer, idx ) ->
|
165 | throw new Error "not a negative number at index #{idx}" unless buffer[ idx ] is tm_nnumber
|
166 | copy = _invert_buffer ( new Buffer buffer.slice idx, idx + bytecount_number ), 0
|
167 | return [ idx + bytecount_number, -( copy.readDoubleBE 1 ), ]
|
168 |
|
169 |
|
170 | read_pnumber = ( buffer, idx ) ->
|
171 | throw new Error "not a positive number at index #{idx}" unless buffer[ idx ] is tm_pnumber
|
172 | return [ idx + bytecount_number, buffer.readDoubleBE idx + 1, ]
|
173 |
|
174 |
|
175 | _invert_buffer = ( buffer, idx ) ->
|
176 | buffer[ i ] = ~buffer[ i ] for i in [ idx + 1 .. idx + 8 ]
|
177 | return buffer
|
178 |
|
179 |
|
180 |
|
181 |
|
182 |
|
183 | write_date = ( idx, date ) ->
|
184 | grow_rbuffer() until rbuffer.length >= idx + bytecount_date
|
185 | number = +date
|
186 | rbuffer[ idx ] = tm_date
|
187 | return write_number idx + 1, number
|
188 |
|
189 |
|
190 | read_date = ( buffer, idx ) ->
|
191 | throw new Error "not a date at index #{idx}" unless buffer[ idx ] is tm_date
|
192 | switch type = buffer[ idx + 1 ]
|
193 | when tm_nnumber then [ idx, value, ] = read_nnumber buffer, idx + 1
|
194 | when tm_pnumber then [ idx, value, ] = read_pnumber buffer, idx + 1
|
195 | else throw new Error "unknown date type marker 0x#{type.toString 16} at index #{idx}"
|
196 | return [ idx, ( new Date value ), ]
|
197 |
|
198 |
|
199 |
|
200 |
|
201 |
|
202 | write_text = ( idx, text ) ->
|
203 | text = text.replace /\x01/g, '\x01\x02'
|
204 | text = text.replace /\x00/g, '\x01\x01'
|
205 | bytecount_text = ( Buffer.byteLength text, 'utf-8' ) + 2
|
206 | grow_rbuffer() until rbuffer.length >= idx + bytecount_text
|
207 | rbuffer[ idx ] = tm_text
|
208 | rbuffer.write text, idx + 1
|
209 | rbuffer[ idx + bytecount_text - 1 ] = tm_lo
|
210 | return idx + bytecount_text
|
211 |
|
212 |
|
213 | read_text = ( buffer, idx ) ->
|
214 |
|
215 | throw new Error "not a text at index #{idx}" unless buffer[ idx ] is tm_text
|
216 | stop_idx = idx
|
217 | loop
|
218 | stop_idx += +1
|
219 | break if ( byte = buffer[ stop_idx ] ) is tm_lo
|
220 | throw new Error "runaway string at index #{idx}" unless byte?
|
221 | R = buffer.toString 'utf-8', idx + 1, stop_idx
|
222 | R = R.replace /\x01\x01/g, '\x00'
|
223 | R = R.replace /\x01\x02/g, '\x01'
|
224 | return [ stop_idx + 1, R, ]
|
225 |
|
226 |
|
227 |
|
228 |
|
229 |
|
230 | read_list = ( buffer, idx ) ->
|
231 | throw new Error "not a list at index #{idx}" unless buffer[ idx ] is tm_list
|
232 | R = []
|
233 | idx += +1
|
234 | loop
|
235 | break if ( byte = buffer[ idx ] ) is tm_lo
|
236 | [ idx, value, ] = _decode buffer, idx, true
|
237 | R.push value[ 0 ]
|
238 | throw new Error "runaway list at index #{idx}" unless byte?
|
239 | return [ idx + 1, R, ]
|
240 |
|
241 |
|
242 |
|
243 |
|
244 |
|
245 | write = ( idx, value, encoder ) ->
|
246 | switch type = CND.type_of value
|
247 | when 'text' then return write_text idx, value
|
248 | when 'number' then return write_number idx, value
|
249 | when 'jsinfinity' then return write_infinity idx, value
|
250 | when 'jsdate' then return write_date idx, value
|
251 |
|
252 | return write_private idx, value, encoder if CND.isa_pod value
|
253 | return write_singular idx, value
|
254 |
|
255 |
|
256 |
|
257 |
|
258 |
|
259 | @encode = ( key, encoder ) ->
|
260 | rbuffer.fill 0x00
|
261 | throw new Error "expected a list, got a #{type}" unless ( type = CND.type_of key ) is 'list'
|
262 | idx = _encode key, 0, encoder
|
263 | R = new Buffer idx
|
264 | rbuffer.copy R, 0, 0, idx
|
265 | release_extraneous_rbuffer_bytes()
|
266 |
|
267 | return R
|
268 |
|
269 |
|
270 | @encode_plus_hi = ( key, encoder ) ->
|
271 |
|
272 | rbuffer.fill 0x00
|
273 | throw new Error "expected a list, got a #{type}" unless ( type = CND.type_of key ) is 'list'
|
274 | idx = _encode key, 0, encoder
|
275 | grow_rbuffer() until rbuffer.length >= idx + 1
|
276 | rbuffer[ idx ] = tm_hi
|
277 | idx += +1
|
278 | R = new Buffer idx
|
279 | rbuffer.copy R, 0, 0, idx
|
280 | release_extraneous_rbuffer_bytes()
|
281 |
|
282 | return R
|
283 |
|
284 |
|
285 | _encode = ( key, idx, encoder ) ->
|
286 | last_element_idx = key.length - 1
|
287 | for element, element_idx in key
|
288 | try
|
289 | if CND.isa_list element
|
290 | rbuffer[ idx ] = tm_list
|
291 | idx += +1
|
292 | for sub_element in element
|
293 | idx = _encode [ sub_element, ], idx, encoder
|
294 | rbuffer[ idx ] = tm_lo
|
295 | idx += +1
|
296 | else
|
297 | idx = write idx, element, encoder
|
298 | catch error
|
299 | key_rpr = []
|
300 | for element in key
|
301 | if CND.isa_jsbuffer element
|
302 | key_rpr.push "#{@rpr_of_buffer null, key[ 2 ]}"
|
303 | else
|
304 | key_rpr.push rpr element
|
305 | warn "detected problem with key [ #{rpr key_rpr.join ', '} ]"
|
306 | throw error
|
307 |
|
308 | return idx
|
309 |
|
310 |
|
311 | @decode = ( buffer, decoder ) ->
|
312 | return ( _decode buffer, 0, false, decoder )[ 1 ]
|
313 |
|
314 |
|
315 | _decode = ( buffer, idx, single, decoder ) ->
|
316 | R = []
|
317 | last_idx = buffer.length - 1
|
318 | loop
|
319 | break if idx > last_idx
|
320 | switch type = buffer[ idx ]
|
321 | when tm_list then [ idx, value, ] = read_list buffer, idx
|
322 | when tm_text then [ idx, value, ] = read_text buffer, idx
|
323 | when tm_nnumber then [ idx, value, ] = read_nnumber buffer, idx
|
324 | when tm_ninfinity then [ idx, value, ] = [ idx + 1, -Infinity, ]
|
325 | when tm_pnumber then [ idx, value, ] = read_pnumber buffer, idx
|
326 | when tm_pinfinity then [ idx, value, ] = [ idx + 1, +Infinity, ]
|
327 | when tm_date then [ idx, value, ] = read_date buffer, idx
|
328 | when tm_private then [ idx, value, ] = read_private buffer, idx, decoder
|
329 | else [ idx, value, ] = read_singular buffer, idx
|
330 | R.push value
|
331 | break if single
|
332 |
|
333 | return [ idx, R ]
|
334 |
|
335 |
|
336 |
|
337 |
|
338 |
|
339 |
|
340 |
|
341 |
|
342 | @encodings =
|
343 |
|
344 |
|
345 | dbcs2: """
|
346 | ⓪①②③④⑤⑥⑦⑧⑨⑩⑪⑫⑬⑭⑮⑯⑰⑱⑲⑳㉑㉒㉓㉔㉕㉖㉗㉘㉙㉚㉛
|
347 | ㉜!"#$%&'()*+,-./0123456789:;<=>?
|
348 | @ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_
|
349 | `abcdefghijklmnopqrstuvwxyz{|}~㉠
|
350 | ㉝㉞㉟㊱㊲㊳㊴㊵㊶㊷㊸㊹㊺㊻㊼㊽㊾㊿㋐㋑㋒㋓㋔㋕㋖㋗㋘㋙㋚㋛㋜㋝
|
351 | ㋞㋟㋠㋡㋢㋣㋤㋥㋦㋧㋨㋩㋪㋫㋬㋭㋮㋯㋰㋱㋲㋳㋴㋵㋶㋷㋸㋹㋺㋻㋼㋽
|
352 | ㋾㊊㊋㊌㊍㊎㊏㊐㊑㊒㊓㊔㊕㊖㊗㊘㊙㊚㊛㊜㊝㊞㊟㊠㊡㊢㊣㊤㊥㊦㊧㊨
|
353 | ㊩㊪㊫㊬㊭㊮㊯㊰㊀㊁㊂㊃㊄㊅㊆㊇㊈㊉㉈㉉㉊㉋㉌㉍㉎㉏⓵⓶⓷⓸⓹〓
|
354 | """
|
355 |
|
356 | aleph: """
|
357 | БДИЛЦЧШЭЮƆƋƏƐƔƥƧƸψŐőŒœŊŁłЯɔɘɐɕəɞ
|
358 | ␣!"#$%&'()*+,-./0123456789:;<=>?
|
359 | @ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_
|
360 | `abcdefghijklmnopqrstuvwxyz{|}~ω
|
361 | ΓΔΘΛΞΠΣΦΨΩαβγδεζηθικλμνξπρςστυφχ
|
362 | Ж¡¢£¤¥¦§¨©ª«¬Я®¯°±²³´µ¶·¸¹º»¼½¾¿
|
363 | ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖ×ØÙÚÛÜÝÞß
|
364 | àáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ
|
365 | """
|
366 |
|
367 | rdctn: """
|
368 | ∇≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡≡
|
369 | ␣!"#$%&'()*+,-./0123456789:;<=>?
|
370 | @ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_
|
371 | `abcdefghijklmnopqrstuvwxyz{|}~≡
|
372 | ∃∃∃∃∃∃∃∃∃∃∃∃∃∃∃∃∃∃∃∃∃∃∃∃∃∃∃∃∃∃∃∃
|
373 | ∃∃¢£¤¥¦§¨©ª«¬Я®¯°±²³´µ¶·¸¹º»¼½¾¿
|
374 | ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖ×ØÙÚÛÜÝÞß
|
375 | àáâãäåæçèéêëìíîïðñò≢≢≢≢≢≢≢≢≢≢≢≢Δ
|
376 | """
|
377 |
|
378 |
|
379 |
|
380 | @rpr_of_buffer = ( buffer, encoding ) ->
|
381 | return ( rpr buffer ) + ' ' + @_encode_buffer buffer, encoding
|
382 |
|
383 |
|
384 | @_encode_buffer = ( buffer, encoding = 'rdctn' ) ->
|
385 |
|
386 | encoding = @encodings[ encoding ] unless CND.isa_list encoding
|
387 | return ( encoding[ buffer[ idx ] ] for idx in [ 0 ... buffer.length ] ).join ''
|
388 |
|
389 |
|
390 | @_compile_encodings = ->
|
391 |
|
392 | chrs_of = ( text ) ->
|
393 | text = text.split /([\ud800-\udbff].|.)/
|
394 | return ( chr for chr in text when chr isnt '' )
|
395 |
|
396 | for name, encoding of @encodings
|
397 | encoding = chrs_of encoding.replace /\n+/g, ''
|
398 | unless ( length = encoding.length ) is 256
|
399 | throw new Error "expected 256 characters, found #{length} in encoding #{rpr name}"
|
400 | @encodings[ name ] = encoding
|
401 | return null
|
402 | @_compile_encodings()
|