UNPKG

1.82 kBJavaScriptView Raw
1const HALF_CHARS = '\u0020-\u007F'; // half letters + half puncs
2
3const GEN_PUNCS = '\u2000-\u206f';
4const CJK_PUNCS = '\u3000-\u303f';
5const CJK_LETTERS = '\u4e00-\u9fbf';
6const FULL_CHARS = '\uff00-\uffef'; // full letters + full puncs
7
8const HALF_NUM = '0-9';
9const HALF_UPPER = 'A-Z';
10const HALF_LOWER = 'a-z';
11const FULL_NUM = '0-9'; // 0xff10 - 0xff19
12
13const FULL_UPPER = 'A-Z'; // 0xff21 - 0xff3a
14
15const FULL_LOWER = 'a-z'; // 0xff41 - 0xff5a
16
17const ANSI_ALPHA = /(?:(?:[a-zA-Z\d]*(?:;[-a-zA-Z\d\/#&.:=?%@~_]*)*)?)/;
18const ANSI_BETA = /(?:(?:\d{1,4}(?:;\d{0,4})*)?[\dA-PR-TZcf-ntqry=><~])/;
19const ANSI = new RegExp(`[›][[\\]()#;?]*(?:${ANSI_ALPHA.source}|${ANSI_BETA.source})`);
20const ASTRAL = /[\uD800-\uDBFF][\uDC00-\uDFFF]/; // 1024 * 1024
21
22const HAN = new RegExp(`[${CJK_PUNCS}${CJK_LETTERS}${FULL_CHARS}]`); // HAN ideographs
23//
24// Block Range Comment
25// CJK Unified Ideographs 4E00-9FFF Common
26// CJK Unified Ideographs Extension A 3400-4DBF Rare
27// CJK Unified Ideographs Extension B 20000-2A6DF Rare, historic
28// CJK Unified Ideographs Extension C 2A700–2B73F Rare, historic
29// CJK Unified Ideographs Extension D 2B740–2B81F Uncommon, some in current use
30// CJK Unified Ideographs Extension E 2B820–2CEAF Rare, historic
31// CJK Compatibility Ideographs F900-FAFF Duplicates, unifiable variants, corporate characters
32// CJK Compatibility Ideographs Supplement 2F800-2FA1F Unifiable variants
33
34const ANSI_G = new RegExp(ANSI, 'g');
35const ASTRAL_G = new RegExp(ASTRAL, 'g');
36const HAN_G = new RegExp(HAN, 'g');
37
38export { ANSI, ANSI_G, ASTRAL, ASTRAL_G, CJK_LETTERS, CJK_PUNCS, FULL_CHARS, FULL_LOWER, FULL_NUM, FULL_UPPER, GEN_PUNCS, HALF_CHARS, HALF_LOWER, HALF_NUM, HALF_UPPER, HAN, HAN_G };