1 |
|
2 | (function() {
|
3 | var CND, alert, badge, binary_interval_search, character_sets_and_ranges, debug, dec, decG, echo, help, hex, hexG, info, log, name, nameO, nameOG, rpr, urge, warn, whisper;
|
4 |
|
5 | CND = require('cnd');
|
6 |
|
7 | rpr = CND.rpr.bind(CND);
|
8 |
|
9 | badge = 'coffeenode-chr';
|
10 |
|
11 | log = CND.get_logger('plain', badge);
|
12 |
|
13 | info = CND.get_logger('info', badge);
|
14 |
|
15 | alert = CND.get_logger('alert', badge);
|
16 |
|
17 | debug = CND.get_logger('debug', badge);
|
18 |
|
19 | warn = CND.get_logger('warn', badge);
|
20 |
|
21 | urge = CND.get_logger('urge', badge);
|
22 |
|
23 | whisper = CND.get_logger('whisper', badge);
|
24 |
|
25 | help = CND.get_logger('help', badge);
|
26 |
|
27 | echo = CND.echo.bind(CND);
|
28 |
|
29 | character_sets_and_ranges = require('./character-sets-and-ranges');
|
30 |
|
31 | this._names_and_ranges_by_csg = character_sets_and_ranges['names-and-ranges-by-csg'];
|
32 |
|
33 | this._ranges_by_rsg = character_sets_and_ranges['ranges-by-rsg'];
|
34 |
|
35 | binary_interval_search = require('./binary-interval-search');
|
36 |
|
37 | this.chrs_from_text = function(text, options) {
|
38 | var input_mode, splitter, _ref;
|
39 | if (text.length === 0) {
|
40 | return [];
|
41 | }
|
42 | switch (input_mode = (_ref = options != null ? options['input'] : void 0) != null ? _ref : 'plain') {
|
43 | case 'plain':
|
44 | splitter = this._plain_splitter;
|
45 | break;
|
46 | case 'ncr':
|
47 | splitter = this._ncr_splitter;
|
48 | break;
|
49 | case 'xncr':
|
50 | splitter = this._xncr_splitter;
|
51 | break;
|
52 | default:
|
53 | throw new Error("unknown input mode: " + (rpr(input_mode)));
|
54 | }
|
55 | return (text.split(splitter)).filter(function(element, idx) {
|
56 | return element.length !== 0;
|
57 | });
|
58 | };
|
59 |
|
60 | this._new_chunk = function(csg, rsg, chrs) {
|
61 | var R;
|
62 | R = {
|
63 | '~isa': 'CHR/chunk',
|
64 | 'csg': csg,
|
65 | 'rsg': rsg,
|
66 | 'text': chrs.join('')
|
67 | };
|
68 | return R;
|
69 | };
|
70 |
|
71 | this.chunks_from_text = function(text, options) {
|
72 |
|
73 | |
74 |
|
75 |
|
76 |
|
77 | var R, chr, chrs, csg, description, last_csg, last_rsg, output_mode, rsg, transform_output, _i, _len, _ref, _ref1;
|
78 | R = [];
|
79 | if (text.length === 0) {
|
80 | return R;
|
81 | }
|
82 | last_csg = 'u';
|
83 | last_rsg = null;
|
84 | chrs = [];
|
85 | switch (output_mode = (_ref = options != null ? options['output'] : void 0) != null ? _ref : 'plain') {
|
86 | case 'plain':
|
87 | transform_output = function(chr) {
|
88 | return chr;
|
89 | };
|
90 | break;
|
91 | case 'html':
|
92 | transform_output = function(chr) {
|
93 | switch (chr) {
|
94 | case '&':
|
95 | return '&';
|
96 | case '<':
|
97 | return '<';
|
98 | case '>':
|
99 | return '>';
|
100 | default:
|
101 | return chr;
|
102 | }
|
103 | };
|
104 | break;
|
105 | default:
|
106 | throw new Error("unknown output mode: " + (rpr(output_mode)));
|
107 | }
|
108 | _ref1 = this.chrs_from_text(text, options);
|
109 | for (_i = 0, _len = _ref1.length; _i < _len; _i++) {
|
110 | chr = _ref1[_i];
|
111 | description = this.analyze(chr, options);
|
112 | csg = description.csg, rsg = description.rsg;
|
113 | chr = description[csg === 'u' ? 'chr' : 'ncr'];
|
114 | if (rsg !== last_rsg) {
|
115 | if (chrs.length > 0) {
|
116 | R.push(this._new_chunk(last_csg, last_rsg, chrs));
|
117 | }
|
118 | last_csg = csg;
|
119 | last_rsg = rsg;
|
120 | chrs = [];
|
121 | }
|
122 | chrs.push(transform_output(chr));
|
123 | }
|
124 | if (chrs.length > 0) {
|
125 | R.push(this._new_chunk(last_csg, last_rsg, chrs));
|
126 | }
|
127 | return R;
|
128 | };
|
129 |
|
130 | this.html_from_text = function(text, options) {
|
131 | var R, chunk, chunks, input_mode, _i, _len, _ref;
|
132 | R = [];
|
133 | input_mode = (_ref = options != null ? options['input'] : void 0) != null ? _ref : 'plain';
|
134 | chunks = this.chunks_from_text(text, {
|
135 | input: input_mode,
|
136 | output: 'html'
|
137 | });
|
138 | for (_i = 0, _len = chunks.length; _i < _len; _i++) {
|
139 | chunk = chunks[_i];
|
140 | R.push("<span class=\"" + chunk['rsg'] + "\">" + chunk['text'] + "</span>");
|
141 | }
|
142 | return R.join('');
|
143 | };
|
144 |
|
145 | this.cid_from_chr = function(chr, options) {
|
146 | var input_mode, _ref;
|
147 | input_mode = (_ref = options != null ? options['input'] : void 0) != null ? _ref : 'plain';
|
148 | return (this._chr_csg_cid_from_chr(chr, input_mode))[2];
|
149 | };
|
150 |
|
151 | this.csg_cid_from_chr = function(chr, options) {
|
152 | var input_mode, _ref;
|
153 | input_mode = (_ref = options != null ? options['input'] : void 0) != null ? _ref : 'plain';
|
154 | return (this._chr_csg_cid_from_chr(chr, input_mode)).slice(1);
|
155 | };
|
156 |
|
157 | this._chr_csg_cid_from_chr = function(chr, input_mode) {
|
158 |
|
159 | |
160 |
|
161 |
|
162 | var cid, cid_dec, cid_hex, csg, first_chr, hi, lo, match, matcher;
|
163 | if (chr.length === 0) {
|
164 | throw new Error("unable to obtain CID from empty string");
|
165 | }
|
166 | if (input_mode == null) {
|
167 | input_mode = 'plain';
|
168 | }
|
169 | switch (input_mode) {
|
170 | case 'plain':
|
171 | matcher = this._first_chr_matcher_plain;
|
172 | break;
|
173 | case 'ncr':
|
174 | matcher = this._first_chr_matcher_ncr;
|
175 | break;
|
176 | case 'xncr':
|
177 | matcher = this._first_chr_matcher_xncr;
|
178 | break;
|
179 | default:
|
180 | throw new Error("unknown input mode: " + (rpr(input_mode)));
|
181 | }
|
182 | match = chr.match(matcher);
|
183 | if (match == null) {
|
184 | throw new Error("illegal character sequence in " + (rpr(chr)));
|
185 | }
|
186 | first_chr = match[0];
|
187 | switch (first_chr.length) {
|
188 | case 1:
|
189 | return [first_chr, 'u', first_chr.charCodeAt(0)];
|
190 | case 2:
|
191 |
|
192 |
|
193 | hi = first_chr.charCodeAt(0);
|
194 | lo = first_chr.charCodeAt(1);
|
195 | cid = (hi - 0xD800) * 0x400 + (lo - 0xDC00) + 0x10000;
|
196 | return [first_chr, 'u', cid];
|
197 | default:
|
198 | chr = match[0], csg = match[1], cid_hex = match[2], cid_dec = match[3];
|
199 | cid = cid_hex != null ? parseInt(cid_hex, 16) : parseInt(cid_dec, 10);
|
200 | if (csg.length === 0) {
|
201 | csg = 'u';
|
202 | }
|
203 | return [first_chr, csg, cid];
|
204 | }
|
205 | };
|
206 |
|
207 | this.as_csg = function(cid_hint, O) {
|
208 | return (this._csg_cid_from_hint(cid_hint, O))[0];
|
209 | };
|
210 |
|
211 | this.as_cid = function(cid_hint, O) {
|
212 | return (this._csg_cid_from_hint(cid_hint, O))[1];
|
213 | };
|
214 |
|
215 | this.as_chr = function(cid_hint, O) {
|
216 | return this._as_chr.apply(this, this._csg_cid_from_hint(cid_hint, O));
|
217 | };
|
218 |
|
219 | this.as_uchr = function(cid_hint, O) {
|
220 | return this._as_uchr.apply(this, this._csg_cid_from_hint(cid_hint, O));
|
221 | };
|
222 |
|
223 | this.as_fncr = function(cid_hint, O) {
|
224 | return this._as_fncr.apply(this, this._csg_cid_from_hint(cid_hint, O));
|
225 | };
|
226 |
|
227 | this.as_sfncr = function(cid_hint, O) {
|
228 | return this._as_sfncr.apply(this, this._csg_cid_from_hint(cid_hint, O));
|
229 | };
|
230 |
|
231 | this.as_xncr = function(cid_hint, O) {
|
232 | return this._as_xncr.apply(this, this._csg_cid_from_hint(cid_hint, O));
|
233 | };
|
234 |
|
235 | this.as_ncr = function(cid_hint, O) {
|
236 | return this._as_xncr.apply(this, this._csg_cid_from_hint(cid_hint, O));
|
237 | };
|
238 |
|
239 | this.as_rsg = function(cid_hint, O) {
|
240 | return this._as_rsg.apply(this, this._csg_cid_from_hint(cid_hint, O));
|
241 | };
|
242 |
|
243 | this.as_range_name = function(cid_hint, O) {
|
244 | return this._as_range_name.apply(this, this._csg_cid_from_hint(cid_hint, O));
|
245 | };
|
246 |
|
247 | this.analyze = function(cid_hint, O) {
|
248 | return this._analyze.apply(this, this._csg_cid_from_hint(cid_hint, O));
|
249 | };
|
250 |
|
251 | this._analyze = function(csg, cid) {
|
252 | var R, chr, ncr, xncr;
|
253 | if (csg === 'u') {
|
254 | chr = this._unicode_chr_from_cid(cid);
|
255 | ncr = xncr = this._as_xncr(csg, cid);
|
256 | } else {
|
257 | chr = this._as_xncr(csg, cid);
|
258 | xncr = this._as_xncr(csg, cid);
|
259 | ncr = this._as_xncr('u', cid);
|
260 | }
|
261 | R = {
|
262 | '~isa': 'CHR/info',
|
263 | 'chr': chr,
|
264 | 'uchr': this._unicode_chr_from_cid(cid),
|
265 | 'csg': csg,
|
266 | 'cid': cid,
|
267 | 'fncr': this._as_fncr(csg, cid),
|
268 | 'sfncr': this._as_sfncr(csg, cid),
|
269 | 'ncr': ncr,
|
270 | 'xncr': xncr,
|
271 | 'rsg': this._as_rsg(csg, cid)
|
272 | };
|
273 | return R;
|
274 | };
|
275 |
|
276 | this._as_chr = function(csg, cid) {
|
277 | if (csg === 'u') {
|
278 | return this._unicode_chr_from_cid(cid);
|
279 | }
|
280 | return (this._analyze(csg, cid))['chr'];
|
281 | };
|
282 |
|
283 | this._as_uchr = function(csg, cid) {
|
284 | return this._unicode_chr_from_cid(cid);
|
285 | };
|
286 |
|
287 | this._unicode_chr_from_cid = function(cid) {
|
288 | var hi, lo;
|
289 | if (cid <= 0xffff) {
|
290 | return String.fromCharCode(cid);
|
291 | }
|
292 |
|
293 |
|
294 | hi = (Math.floor((cid - 0x10000) / 0x400)) + 0xD800;
|
295 | lo = (cid - 0x10000) % 0x400 + 0xDC00;
|
296 | return (String.fromCharCode(hi)) + (String.fromCharCode(lo));
|
297 | };
|
298 |
|
299 | this._as_fncr = function(csg, cid) {
|
300 | var rsg, _ref;
|
301 | rsg = (_ref = this._as_rsg(csg, cid)) != null ? _ref : csg;
|
302 | return "" + rsg + "-" + (cid.toString(16));
|
303 | };
|
304 |
|
305 | this._as_sfncr = function(csg, cid) {
|
306 | return "" + csg + "-" + (cid.toString(16));
|
307 | };
|
308 |
|
309 | this._as_xncr = function(csg, cid) {
|
310 | if (csg === 'u' || (csg == null)) {
|
311 | csg = '';
|
312 | }
|
313 | return "&" + csg + "#x" + (cid.toString(16)) + ";";
|
314 | };
|
315 |
|
316 | this._as_rsg = function(csg, cid) {
|
317 | return binary_interval_search(this._names_and_ranges_by_csg[csg], 'first-cid', 'last-cid', 'rsg', cid);
|
318 | };
|
319 |
|
320 | this._as_range_name = function(csg, cid) {
|
321 | return binary_interval_search(this._names_and_ranges_by_csg[csg], 'first-cid', 'last-cid', 'range-name', cid);
|
322 | };
|
323 |
|
324 | this._csg_cid_from_hint = function(cid_hint, options) {
|
325 |
|
326 | |
327 |
|
328 |
|
329 |
|
330 |
|
331 |
|
332 |
|
333 |
|
334 |
|
335 |
|
336 |
|
337 |
|
338 |
|
339 |
|
340 |
|
341 |
|
342 |
|
343 |
|
344 |
|
345 |
|
346 |
|
347 |
|
348 |
|
349 |
|
350 | var cid, csg, csg_of_cid_hint, csg_of_options, input_mode, type, _ref;
|
351 | switch (type = CND.type_of(options)) {
|
352 | case 'null':
|
353 | case 'jsundefined':
|
354 | csg_of_options = null;
|
355 | input_mode = null;
|
356 | break;
|
357 | case 'pod':
|
358 | csg_of_options = options['csg'];
|
359 | input_mode = options['input'];
|
360 | break;
|
361 | default:
|
362 | throw new Error("expected a POD as second argument, got a " + type);
|
363 | }
|
364 | switch (type = CND.type_of(cid_hint)) {
|
365 | case 'number':
|
366 | csg_of_cid_hint = null;
|
367 | cid = cid_hint;
|
368 | break;
|
369 | case 'text':
|
370 | _ref = this.csg_cid_from_chr(cid_hint, {
|
371 | input: input_mode
|
372 | }), csg_of_cid_hint = _ref[0], cid = _ref[1];
|
373 | break;
|
374 | default:
|
375 | throw new Error("expected a text or a number as first argument, got a " + type);
|
376 | }
|
377 | if (csg_of_options != null) {
|
378 | csg = csg_of_options;
|
379 | } else if (csg_of_cid_hint != null) {
|
380 | csg = csg_of_cid_hint;
|
381 | } else {
|
382 | csg = 'u';
|
383 | }
|
384 | this.validate_is_csg(csg);
|
385 | this.validate_is_cid(cid);
|
386 | return [csg, cid];
|
387 | };
|
388 |
|
389 | name = /(?:[a-z][a-z0-9]*)/.source;
|
390 |
|
391 | nameO = /(?:(?:[a-z][a-z0-9]*)|)/.source;
|
392 |
|
393 | nameOG = /((?:[a-z][a-z0-9]*)|)/.source;
|
394 |
|
395 | hex = /(?:x[a-fA-F0-9]+)/.source;
|
396 |
|
397 | hexG = /(?:x([a-fA-F0-9]+))/.source;
|
398 |
|
399 | dec = /(?:[0-9]+)/.source;
|
400 |
|
401 | decG = /(?:([0-9]+))/.source;
|
402 |
|
403 | this._csg_matcher = RegExp("^" + name + "$");
|
404 |
|
405 | this._ncr_matcher = RegExp("(?:&\\#(?:" + hex + "|" + dec + ");)");
|
406 |
|
407 | this._xncr_matcher = RegExp("(?:&" + nameO + "\\#(?:" + hex + "|" + dec + ");)");
|
408 |
|
409 | this._ncr_csg_cid_matcher = RegExp("(?:&()\\#(?:" + hexG + "|" + decG + ");)");
|
410 |
|
411 | this._xncr_csg_cid_matcher = RegExp("(?:&" + nameOG + "\\#(?:" + hexG + "|" + decG + ");)");
|
412 |
|
413 |
|
414 |
|
415 |
|
416 | this._surrogate_matcher = /(?:[\ud800-\udbff][\udc00-\udfff])/;
|
417 |
|
418 | this._nonsurrogate_matcher = /[^\ud800-\udbff\udc00-\udfff]/;
|
419 |
|
420 |
|
421 |
|
422 |
|
423 | this._first_chr_matcher_plain = RegExp("^(?:" + this._surrogate_matcher.source + "|" + this._nonsurrogate_matcher.source + ")");
|
424 |
|
425 | this._first_chr_matcher_ncr = RegExp("^(?:" + this._surrogate_matcher.source + "|" + this._ncr_csg_cid_matcher.source + "|" + this._nonsurrogate_matcher.source + ")");
|
426 |
|
427 | this._first_chr_matcher_xncr = RegExp("^(?:" + this._surrogate_matcher.source + "|" + this._xncr_csg_cid_matcher.source + "|" + this._nonsurrogate_matcher.source + ")");
|
428 |
|
429 | this._plain_splitter = RegExp("(" + this._surrogate_matcher.source + "|" + this._nonsurrogate_matcher.source + ")");
|
430 |
|
431 | this._ncr_splitter = RegExp("(" + this._ncr_matcher.source + "|" + this._surrogate_matcher.source + "|" + this._nonsurrogate_matcher.source + ")");
|
432 |
|
433 | this._xncr_splitter = RegExp("(" + this._xncr_matcher.source + "|" + this._surrogate_matcher.source + "|" + this._nonsurrogate_matcher.source + ")");
|
434 |
|
435 | this.cid_range_from_rsg = function(rsg) {
|
436 | var R;
|
437 | if ((R = this._ranges_by_rsg[rsg]) == null) {
|
438 | throw new Error("unknown RSG: " + (rpr(rsg)));
|
439 | }
|
440 | return R;
|
441 | };
|
442 |
|
443 | this.validate_is_csg = function(x) {
|
444 | CND.validate_isa_text(x);
|
445 | if ((x.match(this._csg_matcher)) == null) {
|
446 | throw new Error("not a valid CSG: " + (rpr(x)));
|
447 | }
|
448 | if (this._names_and_ranges_by_csg[x] == null) {
|
449 | throw new Error("unknown CSG: " + (rpr(x)));
|
450 | }
|
451 | return null;
|
452 | };
|
453 |
|
454 | this.validate_is_cid = function(x) {
|
455 | CND.validate_isa_number(x);
|
456 | if (x < 0 || x > 0xffffffff || (parseInt(x)) !== x) {
|
457 | throw new Error("expected an integer between 0x0 and 0x10ffff, got 0x" + (x.toString(16)));
|
458 | }
|
459 | return null;
|
460 | };
|
461 |
|
462 | }).call(this);
|