UNPKG

75.8 kBPlain TextView Raw
1#
2# -*- coding: utf-8 -*-
3
4
5############################################################################################################
6from _xxrange import xxrange as _xxrange
7from _xxrange import urange as _urange
8
9
10
11
12############################################################################################################
13class UMX_data_000:
14
15 #---------------------------------------------------------------------------------------------------------
16 chr_by_namedcref = {
17 '"': '\u0022', # quotation mark (= APL quote)
18 '&': '\u0026', # ampersand
19 ''': '\u0027', # apostrophe (= apostrophe-quote); see below
20 '<': '\u003c', # less-than sign
21 '>': '\u003e', # greater-than sign
22 ' ': '\u00a0', # no-break space (= non-breaking space)
23 '¡': '\u00a1', # inverted exclamation mark
24 '¢': '\u00a2', # cent sign
25 '£': '\u00a3', # pound sign
26 '¤': '\u00a4', # currency sign
27 '¥': '\u00a5', # yen sign (= yuan sign)
28 '¦': '\u00a6', # broken bar (= broken vertical bar)
29 '§': '\u00a7', # section sign
30 '¨': '\u00a8', # diaeresis (= spacing diaeresis); see German umlaut
31 '©': '\u00a9', # copyright sign
32 'ª': '\u00aa', # feminine ordinal indicator
33 '«': '\u00ab', # left-pointing double angle quotation mark (= left pointing guillemet)
34 '¬': '\u00ac', # not sign
35 '­': '\u00ad', # soft hyphen (= discretionary hyphen)
36 '®': '\u00ae', # registered sign ( = registered trade mark sign)
37 '¯': '\u00af', # macron (= spacing macron = overline = APL overbar)
38 '°': '\u00b0', # degree sign
39 '±': '\u00b1', # plus-minus sign (= plus-or-minus sign)
40 '²': '\u00b2', # superscript two (= superscript digit two = squared)
41 '³': '\u00b3', # superscript three (= superscript digit three = cubed)
42 '´': '\u00b4', # acute accent (= spacing acute)
43 'µ': '\u00b5', # micro sign
44 '¶': '\u00b6', # pilcrow sign ( = paragraph sign)
45 '·': '\u00b7', # middle dot (= Georgian comma = Greek middle dot)
46 '¸': '\u00b8', # cedilla (= spacing cedilla)
47 '¹': '\u00b9', # superscript one (= superscript digit one)
48 'º': '\u00ba', # masculine ordinal indicator
49 '»': '\u00bb', # right-pointing double angle quotation mark (= right pointing guillemet)
50 '¼': '\u00bc', # vulgar fraction one quarter (= fraction one quarter)
51 '½': '\u00bd', # vulgar fraction one half (= fraction one half)
52 '¾': '\u00be', # vulgar fraction three quarters (= fraction three quarters)
53 '¿': '\u00bf', # inverted question mark (= turned question mark)
54 'À': '\u00c0', # Latin capital letter A with grave (= Latin capital letter A grave)
55 'Á': '\u00c1', # Latin capital letter A with acute
56 'Â': '\u00c2', # Latin capital letter A with circumflex
57 'Ã': '\u00c3', # Latin capital letter A with tilde
58 'Ä': '\u00c4', # Latin capital letter A with diaeresis
59 'Å': '\u00c5', # Latin capital letter A with ring above (= Latin capital letter A ring)
60 'Æ': '\u00c6', # Latin capital letter AE (= Latin capital ligature AE)
61 'Ç': '\u00c7', # Latin capital letter C with cedilla
62 'È': '\u00c8', # Latin capital letter E with grave
63 'É': '\u00c9', # Latin capital letter E with acute
64 'Ê': '\u00ca', # Latin capital letter E with circumflex
65 'Ë': '\u00cb', # Latin capital letter E with diaeresis
66 'Ì': '\u00cc', # Latin capital letter I with grave
67 'Í': '\u00cd', # Latin capital letter I with acute
68 'Î': '\u00ce', # Latin capital letter I with circumflex
69 'Ï': '\u00cf', # Latin capital letter I with diaeresis
70 'Ð': '\u00d0', # Latin capital letter ETH
71 'Ñ': '\u00d1', # Latin capital letter N with tilde
72 'Ò': '\u00d2', # Latin capital letter O with grave
73 'Ó': '\u00d3', # Latin capital letter O with acute
74 'Ô': '\u00d4', # Latin capital letter O with circumflex
75 'Õ': '\u00d5', # Latin capital letter O with tilde
76 'Ö': '\u00d6', # Latin capital letter O with diaeresis
77 '×': '\u00d7', # multiplication sign
78 'Ø': '\u00d8', # Latin capital letter O with stroke (= Latin capital letter O slash)
79 'Ù': '\u00d9', # Latin capital letter U with grave
80 'Ú': '\u00da', # Latin capital letter U with acute
81 'Û': '\u00db', # Latin capital letter U with circumflex
82 'Ü': '\u00dc', # Latin capital letter U with diaeresis
83 'Ý': '\u00dd', # Latin capital letter Y with acute
84 'Þ': '\u00de', # Latin capital letter THORN
85 'ß': '\u00df', # Latin small letter sharp s (= ess-zed); see German Eszett
86 'à': '\u00e0', # Latin small letter a with grave
87 'á': '\u00e1', # Latin small letter a with acute
88 'â': '\u00e2', # Latin small letter a with circumflex
89 'ã': '\u00e3', # Latin small letter a with tilde
90 'ä': '\u00e4', # Latin small letter a with diaeresis
91 'å': '\u00e5', # Latin small letter a with ring above
92 'æ': '\u00e6', # Latin small letter ae (= Latin small ligature ae)
93 'ç': '\u00e7', # Latin small letter c with cedilla
94 'è': '\u00e8', # Latin small letter e with grave
95 'é': '\u00e9', # Latin small letter e with acute
96 'ê': '\u00ea', # Latin small letter e with circumflex
97 'ë': '\u00eb', # Latin small letter e with diaeresis
98 'ì': '\u00ec', # Latin small letter i with grave
99 'í': '\u00ed', # Latin small letter i with acute
100 'î': '\u00ee', # Latin small letter i with circumflex
101 'ï': '\u00ef', # Latin small letter i with diaeresis
102 'ð': '\u00f0', # Latin small letter eth
103 'ñ': '\u00f1', # Latin small letter n with tilde
104 'ò': '\u00f2', # Latin small letter o with grave
105 'ó': '\u00f3', # Latin small letter o with acute
106 'ô': '\u00f4', # Latin small letter o with circumflex
107 'õ': '\u00f5', # Latin small letter o with tilde
108 'ö': '\u00f6', # Latin small letter o with diaeresis
109 '÷': '\u00f7', # division sign
110 'ø': '\u00f8', # Latin small letter o with stroke (= Latin small letter o slash)
111 'ù': '\u00f9', # Latin small letter u with grave
112 'ú': '\u00fa', # Latin small letter u with acute
113 'û': '\u00fb', # Latin small letter u with circumflex
114 'ü': '\u00fc', # Latin small letter u with diaeresis
115 'ý': '\u00fd', # Latin small letter y with acute
116 'þ': '\u00fe', # Latin small letter thorn
117 'ÿ': '\u00ff', # Latin small letter y with diaeresis
118 'Œ': '\u0152', # Latin capital ligature oe
119 'œ': '\u0153', # Latin small ligature oe
120 'Š': '\u0160', # Latin capital letter s with caron
121 'š': '\u0161', # Latin small letter s with caron
122 'Ÿ': '\u0178', # Latin capital letter y with diaeresis
123 'ƒ': '\u0192', # Latin small letter f with hook (= function = florin)
124 'ˆ': '\u02c6', # modifier letter circumflex accent
125 '˜': '\u02dc', # small tilde
126 'Α': '\u0391', # Greek capital letter Alpha
127 'Β': '\u0392', # Greek capital letter Beta
128 'Γ': '\u0393', # Greek capital letter Gamma
129 'Δ': '\u0394', # Greek capital letter Delta
130 'Ε': '\u0395', # Greek capital letter Epsilon
131 'Ζ': '\u0396', # Greek capital letter Zeta
132 'Η': '\u0397', # Greek capital letter Eta
133 'Θ': '\u0398', # Greek capital letter Theta
134 'Ι': '\u0399', # Greek capital letter Iota
135 'Κ': '\u039a', # Greek capital letter Kappa
136 'Λ': '\u039b', # Greek capital letter Lambda
137 'Μ': '\u039c', # Greek capital letter Mu
138 'Ν': '\u039d', # Greek capital letter Nu
139 'Ξ': '\u039e', # Greek capital letter Xi
140 'Ο': '\u039f', # Greek capital letter Omicron
141 'Π': '\u03a0', # Greek capital letter Pi
142 'Ρ': '\u03a1', # Greek capital letter Rho
143 'Σ': '\u03a3', # Greek capital letter Sigma
144 'Τ': '\u03a4', # Greek capital letter Tau
145 'Υ': '\u03a5', # Greek capital letter Upsilon
146 'Φ': '\u03a6', # Greek capital letter Phi
147 'Χ': '\u03a7', # Greek capital letter Chi
148 'Ψ': '\u03a8', # Greek capital letter Psi
149 'Ω': '\u03a9', # Greek capital letter Omega
150 'α': '\u03b1', # Greek small letter alpha
151 'β': '\u03b2', # Greek small letter beta
152 'γ': '\u03b3', # Greek small letter gamma
153 'δ': '\u03b4', # Greek small letter delta
154 'ε': '\u03b5', # Greek small letter epsilon
155 'ζ': '\u03b6', # Greek small letter zeta
156 'η': '\u03b7', # Greek small letter eta
157 'θ': '\u03b8', # Greek small letter theta
158 'ι': '\u03b9', # Greek small letter iota
159 'κ': '\u03ba', # Greek small letter kappa
160 'λ': '\u03bb', # Greek small letter lambda
161 'μ': '\u03bc', # Greek small letter mu
162 'ν': '\u03bd', # Greek small letter nu
163 'ξ': '\u03be', # Greek small letter xi
164 'ο': '\u03bf', # Greek small letter omicron
165 'π': '\u03c0', # Greek small letter pi
166 'ρ': '\u03c1', # Greek small letter rho
167 'ς': '\u03c2', # Greek small letter final sigma
168 'σ': '\u03c3', # Greek small letter sigma
169 'τ': '\u03c4', # Greek small letter tau
170 'υ': '\u03c5', # Greek small letter upsilon
171 'φ': '\u03c6', # Greek small letter phi
172 'χ': '\u03c7', # Greek small letter chi
173 'ψ': '\u03c8', # Greek small letter psi
174 'ω': '\u03c9', # Greek small letter omega
175 'ϑ': '\u03d1', # Greek theta symbol
176 'ϒ': '\u03d2', # Greek Upsilon with hook symbol
177 'ϖ': '\u03d6', # Greek pi symbol
178 ' ': '\u2002', # en space
179 ' ': '\u2003', # em space
180 ' ': '\u2009', # thin space
181 '‌': '\u200c', # zero-width non-joiner
182 '‍': '\u200d', # zero-width joiner
183 '‎': '\u200e', # left-to-right mark
184 '‏': '\u200f', # right-to-left mark
185 '–': '\u2013', # en dash
186 '—': '\u2014', # em dash
187 '‘': '\u2018', # left single quotation mark
188 '’': '\u2019', # right single quotation mark
189 '‚': '\u201a', # single low-9 quotation mark
190 '“': '\u201c', # left double quotation mark
191 '”': '\u201d', # right double quotation mark
192 '„': '\u201e', # double low-9 quotation mark
193 '†': '\u2020', # dagger
194 '‡': '\u2021', # double dagger
195 '•': '\u2022', # bullet (= black small circle)
196 '…': '\u2026', # horizontal ellipsis (= three dot leader)
197 '‰': '\u2030', # per mille sign
198 '′': '\u2032', # prime (= minutes = feet)
199 '″': '\u2033', # double prime (= seconds = inches)
200 '‹': '\u2039', # single left-pointing angle quotation mark
201 '›': '\u203a', # single right-pointing angle quotation mark
202 '‾': '\u203e', # overline (= spacing overscore)
203 '⁄': '\u2044', # fraction slash (= solidus)
204 '€': '\u20ac', # euro sign
205 'ℑ': '\u2111', # black-letter capital I (= imaginary part)
206 '℘': '\u2118', # script capital P (= power set = Weierstrass p)
207 'ℜ': '\u211c', # black-letter capital R (= real part symbol)
208 '™': '\u2122', # trademark sign
209 'ℵ': '\u2135', # alef symbol (= first transfinite cardinal)
210 '←': '\u2190', # leftwards arrow
211 '↑': '\u2191', # upwards arrow
212 '→': '\u2192', # rightwards arrow
213 '↓': '\u2193', # downwards arrow
214 '↔': '\u2194', # left right arrow
215 '↵': '\u21b5', # downwards arrow with corner leftwards (= carriage return)
216 '⇐': '\u21d0', # leftwards double arrow
217 '⇑': '\u21d1', # upwards double arrow
218 '⇒': '\u21d2', # rightwards double arrow
219 '⇓': '\u21d3', # downwards double arrow
220 '⇔': '\u21d4', # left right double arrow
221 '∀': '\u2200', # for all
222 '∂': '\u2202', # partial differential
223 '∃': '\u2203', # there exists
224 '∅': '\u2205', # empty set (= null set = diameter)
225 '∇': '\u2207', # nabla (= backward difference)
226 '∈': '\u2208', # element of
227 '∉': '\u2209', # not an element of
228 '∋': '\u220b', # contains as member
229 '∏': '\u220f', # n-ary product (= product sign)
230 '∑': '\u2211', # n-ary summation
231 '−': '\u2212', # minus sign
232 '∗': '\u2217', # asterisk operator
233 '√': '\u221a', # square root (= radical sign)
234 '∝': '\u221d', # proportional to
235 '∞': '\u221e', # infinity
236 '∠': '\u2220', # angle
237 '∧': '\u2227', # logical and (= wedge)
238 '∨': '\u2228', # logical or (= vee)
239 '∩': '\u2229', # intersection (= cap)
240 '∪': '\u222a', # union (= cup)
241 '∫': '\u222b', # integral
242 '∴': '\u2234', # therefore
243 '∼': '\u223c', # tilde operator (= varies with = similar to)
244 '≅': '\u2245', # congruent to
245 '≈': '\u2248', # almost equal to (= asymptotic to)
246 '≠': '\u2260', # not equal to
247 '≡': '\u2261', # identical to; sometimes used for 'equivalent to'
248 '≤': '\u2264', # less-than or equal to
249 '≥': '\u2265', # greater-than or equal to
250 '⊂': '\u2282', # subset of
251 '⊃': '\u2283', # superset of
252 '⊄': '\u2284', # not a subset of
253 '⊆': '\u2286', # subset of or equal to
254 '⊇': '\u2287', # superset of or equal to
255 '⊕': '\u2295', # circled plus (= direct sum)
256 '⊗': '\u2297', # circled times (= vector product)
257 '⊥': '\u22a5', # up tack (= orthogonal to = perpendicular)
258 '⋅': '\u22c5', # dot operator
259 '⌈': '\u2308', # left ceiling (= APL upstile)
260 '⌉': '\u2309', # right ceiling
261 '⌊': '\u230a', # left floor (= APL downstile)
262 '⌋': '\u230b', # right floor
263 '⟨': '\u2329', # left-pointing angle bracket (= bra)
264 '⟩': '\u232a', # right-pointing angle bracket (= ket)
265 '◊': '\u25ca', # lozenge
266 '♠': '\u2660', # black spade suit
267 '♣': '\u2663', # black club suit (= shamrock)
268 '♥': '\u2665', # black heart suit (= valentine)
269 '♦': '\u2666', # black diamond suit
270 }
271
272 #---------------------------------------------------------------------------------------------------------
273 namedcref_by_chr = dict(
274 ( value, name, ) for name, value in chr_by_namedcref.items() )
275
276
277 #---------------------------------------------------------------------------------------------------------
278 # UPDATE: this list has been manually updated to accommodate changes for CJK characters in Unicode 6.0,
279 # but not for other scripts; a revised and improved version is pending.
280
281 # This list has been generated with JIZURA/_extra/#populate-jizura-db.py/defined_unicode_cscid_ranges
282 # from the Unicode 5.1.0 `http://www.unicode.org/Public/UNIDATA/UnicodeData.txt`. It contains all
283 # the start and endpoints of contiguous subranges within the Unicode CID range, `0x0...0x10fffe`. This
284 # list is used by `~.is_defined_unicode_cid()`.
285 #
286 # ###OBS### this list could probably more efficiently computed using this snippet from
287 # http://docs.python.org/library/itertools.html#examples ::
288 #
289 # >>> # Find runs of consecutive numbers using groupby. The key to the solution
290 # >>> # is differencing with a range so that consecutive numbers all appear in
291 # >>> # same group.
292 # >>> data = [ 1, 4,5,6, 10, 15,16,17,18, 22, 25,26,27,28]
293 # >>> for k, g in groupby(enumerate(data), lambda (i,x):i-x):
294 # ... print map(itemgetter(1), g)
295 # ...
296 # [1]
297 # [4, 5, 6]
298 # [10]
299 # [15, 16, 17, 18]
300 # [22]
301 # [25, 26, 27, 28]
302 #
303 # and combining it with pickling.
304 #
305 ranges_of_defined_unicode_cids = ( ###OBS### these should be rewritten using the more efficient and versatile urange object
306 range( 0x00000000, 0x00000378 ), range( 0x0000037a, 0x0000037f ), range( 0x00000384, 0x0000038b ),
307 range( 0x0000038c, 0x0000038d ), range( 0x0000038e, 0x000003a2 ), range( 0x000003a3, 0x00000524 ),
308 range( 0x00000531, 0x00000557 ), range( 0x00000559, 0x00000560 ), range( 0x00000561, 0x00000588 ),
309 range( 0x00000589, 0x0000058b ), range( 0x00000591, 0x000005c8 ), range( 0x000005d0, 0x000005eb ),
310 range( 0x000005f0, 0x000005f5 ), range( 0x00000600, 0x00000604 ), range( 0x00000606, 0x0000061c ),
311 range( 0x0000061e, 0x00000620 ), range( 0x00000621, 0x0000065f ), range( 0x00000660, 0x0000070e ),
312 range( 0x0000070f, 0x0000074b ), range( 0x0000074d, 0x000007b2 ), range( 0x000007c0, 0x000007fb ),
313 range( 0x00000901, 0x0000093a ), range( 0x0000093c, 0x0000094e ), range( 0x00000950, 0x00000955 ),
314 range( 0x00000958, 0x00000973 ), range( 0x0000097b, 0x00000980 ), range( 0x00000981, 0x00000984 ),
315 range( 0x00000985, 0x0000098d ), range( 0x0000098f, 0x00000991 ), range( 0x00000993, 0x000009a9 ),
316 range( 0x000009aa, 0x000009b1 ), range( 0x000009b2, 0x000009b3 ), range( 0x000009b6, 0x000009ba ),
317 range( 0x000009bc, 0x000009c5 ), range( 0x000009c7, 0x000009c9 ), range( 0x000009cb, 0x000009cf ),
318 range( 0x000009d7, 0x000009d8 ), range( 0x000009dc, 0x000009de ), range( 0x000009df, 0x000009e4 ),
319 range( 0x000009e6, 0x000009fb ), range( 0x00000a01, 0x00000a04 ), range( 0x00000a05, 0x00000a0b ),
320 range( 0x00000a0f, 0x00000a11 ), range( 0x00000a13, 0x00000a29 ), range( 0x00000a2a, 0x00000a31 ),
321 range( 0x00000a32, 0x00000a34 ), range( 0x00000a35, 0x00000a37 ), range( 0x00000a38, 0x00000a3a ),
322 range( 0x00000a3c, 0x00000a3d ), range( 0x00000a3e, 0x00000a43 ), range( 0x00000a47, 0x00000a49 ),
323 range( 0x00000a4b, 0x00000a4e ), range( 0x00000a51, 0x00000a52 ), range( 0x00000a59, 0x00000a5d ),
324 range( 0x00000a5e, 0x00000a5f ), range( 0x00000a66, 0x00000a76 ), range( 0x00000a81, 0x00000a84 ),
325 range( 0x00000a85, 0x00000a8e ), range( 0x00000a8f, 0x00000a92 ), range( 0x00000a93, 0x00000aa9 ),
326 range( 0x00000aaa, 0x00000ab1 ), range( 0x00000ab2, 0x00000ab4 ), range( 0x00000ab5, 0x00000aba ),
327 range( 0x00000abc, 0x00000ac6 ), range( 0x00000ac7, 0x00000aca ), range( 0x00000acb, 0x00000ace ),
328 range( 0x00000ad0, 0x00000ad1 ), range( 0x00000ae0, 0x00000ae4 ), range( 0x00000ae6, 0x00000af0 ),
329 range( 0x00000af1, 0x00000af2 ), range( 0x00000b01, 0x00000b04 ), range( 0x00000b05, 0x00000b0d ),
330 range( 0x00000b0f, 0x00000b11 ), range( 0x00000b13, 0x00000b29 ), range( 0x00000b2a, 0x00000b31 ),
331 range( 0x00000b32, 0x00000b34 ), range( 0x00000b35, 0x00000b3a ), range( 0x00000b3c, 0x00000b45 ),
332 range( 0x00000b47, 0x00000b49 ), range( 0x00000b4b, 0x00000b4e ), range( 0x00000b56, 0x00000b58 ),
333 range( 0x00000b5c, 0x00000b5e ), range( 0x00000b5f, 0x00000b64 ), range( 0x00000b66, 0x00000b72 ),
334 range( 0x00000b82, 0x00000b84 ), range( 0x00000b85, 0x00000b8b ), range( 0x00000b8e, 0x00000b91 ),
335 range( 0x00000b92, 0x00000b96 ), range( 0x00000b99, 0x00000b9b ), range( 0x00000b9c, 0x00000b9d ),
336 range( 0x00000b9e, 0x00000ba0 ), range( 0x00000ba3, 0x00000ba5 ), range( 0x00000ba8, 0x00000bab ),
337 range( 0x00000bae, 0x00000bba ), range( 0x00000bbe, 0x00000bc3 ), range( 0x00000bc6, 0x00000bc9 ),
338 range( 0x00000bca, 0x00000bce ), range( 0x00000bd0, 0x00000bd1 ), range( 0x00000bd7, 0x00000bd8 ),
339 range( 0x00000be6, 0x00000bfb ), range( 0x00000c01, 0x00000c04 ), range( 0x00000c05, 0x00000c0d ),
340 range( 0x00000c0e, 0x00000c11 ), range( 0x00000c12, 0x00000c29 ), range( 0x00000c2a, 0x00000c34 ),
341 range( 0x00000c35, 0x00000c3a ), range( 0x00000c3d, 0x00000c45 ), range( 0x00000c46, 0x00000c49 ),
342 range( 0x00000c4a, 0x00000c4e ), range( 0x00000c55, 0x00000c57 ), range( 0x00000c58, 0x00000c5a ),
343 range( 0x00000c60, 0x00000c64 ), range( 0x00000c66, 0x00000c70 ), range( 0x00000c78, 0x00000c80 ),
344 range( 0x00000c82, 0x00000c84 ), range( 0x00000c85, 0x00000c8d ), range( 0x00000c8e, 0x00000c91 ),
345 range( 0x00000c92, 0x00000ca9 ), range( 0x00000caa, 0x00000cb4 ), range( 0x00000cb5, 0x00000cba ),
346 range( 0x00000cbc, 0x00000cc5 ), range( 0x00000cc6, 0x00000cc9 ), range( 0x00000cca, 0x00000cce ),
347 range( 0x00000cd5, 0x00000cd7 ), range( 0x00000cde, 0x00000cdf ), range( 0x00000ce0, 0x00000ce4 ),
348 range( 0x00000ce6, 0x00000cf0 ), range( 0x00000cf1, 0x00000cf3 ), range( 0x00000d02, 0x00000d04 ),
349 range( 0x00000d05, 0x00000d0d ), range( 0x00000d0e, 0x00000d11 ), range( 0x00000d12, 0x00000d29 ),
350 range( 0x00000d2a, 0x00000d3a ), range( 0x00000d3d, 0x00000d45 ), range( 0x00000d46, 0x00000d49 ),
351 range( 0x00000d4a, 0x00000d4e ), range( 0x00000d57, 0x00000d58 ), range( 0x00000d60, 0x00000d64 ),
352 range( 0x00000d66, 0x00000d76 ), range( 0x00000d79, 0x00000d80 ), range( 0x00000d82, 0x00000d84 ),
353 range( 0x00000d85, 0x00000d97 ), range( 0x00000d9a, 0x00000db2 ), range( 0x00000db3, 0x00000dbc ),
354 range( 0x00000dbd, 0x00000dbe ), range( 0x00000dc0, 0x00000dc7 ), range( 0x00000dca, 0x00000dcb ),
355 range( 0x00000dcf, 0x00000dd5 ), range( 0x00000dd6, 0x00000dd7 ), range( 0x00000dd8, 0x00000de0 ),
356 range( 0x00000df2, 0x00000df5 ), range( 0x00000e01, 0x00000e3b ), range( 0x00000e3f, 0x00000e5c ),
357 range( 0x00000e81, 0x00000e83 ), range( 0x00000e84, 0x00000e85 ), range( 0x00000e87, 0x00000e89 ),
358 range( 0x00000e8a, 0x00000e8b ), range( 0x00000e8d, 0x00000e8e ), range( 0x00000e94, 0x00000e98 ),
359 range( 0x00000e99, 0x00000ea0 ), range( 0x00000ea1, 0x00000ea4 ), range( 0x00000ea5, 0x00000ea6 ),
360 range( 0x00000ea7, 0x00000ea8 ), range( 0x00000eaa, 0x00000eac ), range( 0x00000ead, 0x00000eba ),
361 range( 0x00000ebb, 0x00000ebe ), range( 0x00000ec0, 0x00000ec5 ), range( 0x00000ec6, 0x00000ec7 ),
362 range( 0x00000ec8, 0x00000ece ), range( 0x00000ed0, 0x00000eda ), range( 0x00000edc, 0x00000ede ),
363 range( 0x00000f00, 0x00000f48 ), range( 0x00000f49, 0x00000f6d ), range( 0x00000f71, 0x00000f8c ),
364 range( 0x00000f90, 0x00000f98 ), range( 0x00000f99, 0x00000fbd ), range( 0x00000fbe, 0x00000fcd ),
365 range( 0x00000fce, 0x00000fd5 ), range( 0x00001000, 0x0000109a ), range( 0x0000109e, 0x000010c6 ),
366 range( 0x000010d0, 0x000010fd ), range( 0x00001100, 0x0000115a ), range( 0x0000115f, 0x000011a3 ),
367 range( 0x000011a8, 0x000011fa ), range( 0x00001200, 0x00001249 ), range( 0x0000124a, 0x0000124e ),
368 range( 0x00001250, 0x00001257 ), range( 0x00001258, 0x00001259 ), range( 0x0000125a, 0x0000125e ),
369 range( 0x00001260, 0x00001289 ), range( 0x0000128a, 0x0000128e ), range( 0x00001290, 0x000012b1 ),
370 range( 0x000012b2, 0x000012b6 ), range( 0x000012b8, 0x000012bf ), range( 0x000012c0, 0x000012c1 ),
371 range( 0x000012c2, 0x000012c6 ), range( 0x000012c8, 0x000012d7 ), range( 0x000012d8, 0x00001311 ),
372 range( 0x00001312, 0x00001316 ), range( 0x00001318, 0x0000135b ), range( 0x0000135f, 0x0000137d ),
373 range( 0x00001380, 0x0000139a ), range( 0x000013a0, 0x000013f5 ), range( 0x00001401, 0x00001677 ),
374 range( 0x00001680, 0x0000169d ), range( 0x000016a0, 0x000016f1 ), range( 0x00001700, 0x0000170d ),
375 range( 0x0000170e, 0x00001715 ), range( 0x00001720, 0x00001737 ), range( 0x00001740, 0x00001754 ),
376 range( 0x00001760, 0x0000176d ), range( 0x0000176e, 0x00001771 ), range( 0x00001772, 0x00001774 ),
377 range( 0x00001780, 0x000017de ), range( 0x000017e0, 0x000017ea ), range( 0x000017f0, 0x000017fa ),
378 range( 0x00001800, 0x0000180f ), range( 0x00001810, 0x0000181a ), range( 0x00001820, 0x00001878 ),
379 range( 0x00001880, 0x000018ab ), range( 0x00001900, 0x0000191d ), range( 0x00001920, 0x0000192c ),
380 range( 0x00001930, 0x0000193c ), range( 0x00001940, 0x00001941 ), range( 0x00001944, 0x0000196e ),
381 range( 0x00001970, 0x00001975 ), range( 0x00001980, 0x000019aa ), range( 0x000019b0, 0x000019ca ),
382 range( 0x000019d0, 0x000019da ), range( 0x000019de, 0x00001a1c ), range( 0x00001a1e, 0x00001a20 ),
383 range( 0x00001b00, 0x00001b4c ), range( 0x00001b50, 0x00001b7d ), range( 0x00001b80, 0x00001bab ),
384 range( 0x00001bae, 0x00001bba ), range( 0x00001c00, 0x00001c38 ), range( 0x00001c3b, 0x00001c4a ),
385 range( 0x00001c4d, 0x00001c80 ), range( 0x00001d00, 0x00001de7 ), range( 0x00001dfe, 0x00001f16 ),
386 range( 0x00001f18, 0x00001f1e ), range( 0x00001f20, 0x00001f46 ), range( 0x00001f48, 0x00001f4e ),
387 range( 0x00001f50, 0x00001f58 ), range( 0x00001f59, 0x00001f5a ), range( 0x00001f5b, 0x00001f5c ),
388 range( 0x00001f5d, 0x00001f5e ), range( 0x00001f5f, 0x00001f7e ), range( 0x00001f80, 0x00001fb5 ),
389 range( 0x00001fb6, 0x00001fc5 ), range( 0x00001fc6, 0x00001fd4 ), range( 0x00001fd6, 0x00001fdc ),
390 range( 0x00001fdd, 0x00001ff0 ), range( 0x00001ff2, 0x00001ff5 ), range( 0x00001ff6, 0x00001fff ),
391 range( 0x00002000, 0x00002065 ), range( 0x0000206a, 0x00002072 ), range( 0x00002074, 0x0000208f ),
392 range( 0x00002090, 0x00002095 ), range( 0x000020a0, 0x000020b6 ), range( 0x000020d0, 0x000020f1 ),
393 range( 0x00002100, 0x00002150 ), range( 0x00002153, 0x00002189 ), range( 0x00002190, 0x000023e8 ),
394 range( 0x00002400, 0x00002427 ), range( 0x00002440, 0x0000244b ), range( 0x00002460, 0x0000269e ),
395 range( 0x000026a0, 0x000026bd ), range( 0x000026c0, 0x000026c4 ), range( 0x00002701, 0x00002705 ),
396 range( 0x00002706, 0x0000270a ), range( 0x0000270c, 0x00002728 ), range( 0x00002729, 0x0000274c ),
397 range( 0x0000274d, 0x0000274e ), range( 0x0000274f, 0x00002753 ), range( 0x00002756, 0x00002757 ),
398 range( 0x00002758, 0x0000275f ), range( 0x00002761, 0x00002795 ), range( 0x00002798, 0x000027b0 ),
399 range( 0x000027b1, 0x000027bf ), range( 0x000027c0, 0x000027cb ), range( 0x000027cc, 0x000027cd ),
400 range( 0x000027d0, 0x00002b4d ), range( 0x00002b50, 0x00002b55 ), range( 0x00002c00, 0x00002c2f ),
401 range( 0x00002c30, 0x00002c5f ), range( 0x00002c60, 0x00002c70 ), range( 0x00002c71, 0x00002c7e ),
402 range( 0x00002c80, 0x00002ceb ), range( 0x00002cf9, 0x00002d26 ), range( 0x00002d30, 0x00002d66 ),
403 range( 0x00002d6f, 0x00002d70 ), range( 0x00002d80, 0x00002d97 ), range( 0x00002da0, 0x00002da7 ),
404 range( 0x00002da8, 0x00002daf ), range( 0x00002db0, 0x00002db7 ), range( 0x00002db8, 0x00002dbf ),
405 range( 0x00002dc0, 0x00002dc7 ), range( 0x00002dc8, 0x00002dcf ), range( 0x00002dd0, 0x00002dd7 ),
406 range( 0x00002dd8, 0x00002ddf ), range( 0x00002de0, 0x00002e31 ), range( 0x00002e80, 0x00002e9a ),
407 range( 0x00002e9b, 0x00002ef4 ), range( 0x00002f00, 0x00002fd6 ), range( 0x00002ff0, 0x00002ffc ),
408 range( 0x00003000, 0x00003040 ), range( 0x00003041, 0x00003097 ), range( 0x00003099, 0x00003100 ),
409 range( 0x00003105, 0x0000312e ), range( 0x00003131, 0x0000318f ), range( 0x00003190, 0x000031b8 ),
410 range( 0x000031c0, 0x000031e4 ), range( 0x000031f0, 0x0000321f ), range( 0x00003220, 0x00003244 ),
411 range( 0x00003250, 0x000032ff ), range( 0x00003300, 0x00003400 ), range( 0x00003400, 0x00004db6 ),
412
413 #range( 0x00004dc0, 0x00004e00 ), range( 0x00004e00, 0x00009fc4 ), range( 0x0000a000, 0x0000a48d ),
414 range( 0x00004dc0, 0x00004e00 ), range( 0x00004e00, 0x00009fcc ), range( 0x0000a000, 0x0000a48d ),
415
416 range( 0x0000a490, 0x0000a4c7 ), range( 0x0000a500, 0x0000a62c ), range( 0x0000a640, 0x0000a660 ),
417 range( 0x0000a662, 0x0000a674 ), range( 0x0000a67c, 0x0000a698 ), range( 0x0000a700, 0x0000a78d ),
418 range( 0x0000a7fb, 0x0000a82c ), range( 0x0000a840, 0x0000a878 ), range( 0x0000a880, 0x0000a8c5 ),
419 range( 0x0000a8ce, 0x0000a8da ), range( 0x0000a900, 0x0000a954 ), range( 0x0000a95f, 0x0000a960 ),
420 range( 0x0000aa00, 0x0000aa37 ), range( 0x0000aa40, 0x0000aa4e ), range( 0x0000aa50, 0x0000aa5a ),
421 range( 0x0000aa5c, 0x0000aa60 ), range( 0x0000ac00, 0x0000d7a4 ), range( 0x0000d800, 0x0000db80 ),
422 range( 0x0000db80, 0x0000dc00 ), range( 0x0000dc00, 0x0000e000 ), range( 0x0000e000, 0x0000f900 ),
423 range( 0x0000f900, 0x0000fa2e ), range( 0x0000fa30, 0x0000fa6b ), range( 0x0000fa70, 0x0000fada ),
424 range( 0x0000fb00, 0x0000fb07 ), range( 0x0000fb13, 0x0000fb18 ), range( 0x0000fb1d, 0x0000fb37 ),
425 range( 0x0000fb38, 0x0000fb3d ), range( 0x0000fb3e, 0x0000fb3f ), range( 0x0000fb40, 0x0000fb42 ),
426 range( 0x0000fb43, 0x0000fb45 ), range( 0x0000fb46, 0x0000fbb2 ), range( 0x0000fbd3, 0x0000fd40 ),
427 range( 0x0000fd50, 0x0000fd90 ), range( 0x0000fd92, 0x0000fdc8 ), range( 0x0000fdf0, 0x0000fdfe ),
428 range( 0x0000fe00, 0x0000fe1a ), range( 0x0000fe20, 0x0000fe27 ), range( 0x0000fe30, 0x0000fe53 ),
429 range( 0x0000fe54, 0x0000fe67 ), range( 0x0000fe68, 0x0000fe6c ), range( 0x0000fe70, 0x0000fe75 ),
430 range( 0x0000fe76, 0x0000fefd ), range( 0x0000feff, 0x0000ff00 ), range( 0x0000ff01, 0x0000ffbf ),
431 range( 0x0000ffc2, 0x0000ffc8 ), range( 0x0000ffca, 0x0000ffd0 ), range( 0x0000ffd2, 0x0000ffd8 ),
432 range( 0x0000ffda, 0x0000ffdd ), range( 0x0000ffe0, 0x0000ffe7 ), range( 0x0000ffe8, 0x0000ffef ),
433 range( 0x0000fff9, 0x0000fffe ), range( 0x00010000, 0x0001000c ), range( 0x0001000d, 0x00010027 ),
434 range( 0x00010028, 0x0001003b ), range( 0x0001003c, 0x0001003e ), range( 0x0001003f, 0x0001004e ),
435 range( 0x00010050, 0x0001005e ), range( 0x00010080, 0x000100fb ), range( 0x00010100, 0x00010103 ),
436 range( 0x00010107, 0x00010134 ), range( 0x00010137, 0x0001018b ), range( 0x00010190, 0x0001019c ),
437 range( 0x000101d0, 0x000101fe ), range( 0x00010280, 0x0001029d ), range( 0x000102a0, 0x000102d1 ),
438 range( 0x00010300, 0x0001031f ), range( 0x00010320, 0x00010324 ), range( 0x00010330, 0x0001034b ),
439 range( 0x00010380, 0x0001039e ), range( 0x0001039f, 0x000103c4 ), range( 0x000103c8, 0x000103d6 ),
440 range( 0x00010400, 0x0001049e ), range( 0x000104a0, 0x000104aa ), range( 0x00010800, 0x00010806 ),
441 range( 0x00010808, 0x00010809 ), range( 0x0001080a, 0x00010836 ), range( 0x00010837, 0x00010839 ),
442 range( 0x0001083c, 0x0001083d ), range( 0x0001083f, 0x00010840 ), range( 0x00010900, 0x0001091a ),
443 range( 0x0001091f, 0x0001093a ), range( 0x0001093f, 0x00010940 ), range( 0x00010a00, 0x00010a04 ),
444 range( 0x00010a05, 0x00010a07 ), range( 0x00010a0c, 0x00010a14 ), range( 0x00010a15, 0x00010a18 ),
445 range( 0x00010a19, 0x00010a34 ), range( 0x00010a38, 0x00010a3b ), range( 0x00010a3f, 0x00010a48 ),
446 range( 0x00010a50, 0x00010a59 ), range( 0x00012000, 0x0001236f ), range( 0x00012400, 0x00012463 ),
447 range( 0x00012470, 0x00012474 ), range( 0x0001d000, 0x0001d0f6 ), range( 0x0001d100, 0x0001d127 ),
448 range( 0x0001d129, 0x0001d1de ), range( 0x0001d200, 0x0001d246 ), range( 0x0001d300, 0x0001d357 ),
449 range( 0x0001d360, 0x0001d372 ), range( 0x0001d400, 0x0001d455 ), range( 0x0001d456, 0x0001d49d ),
450 range( 0x0001d49e, 0x0001d4a0 ), range( 0x0001d4a2, 0x0001d4a3 ), range( 0x0001d4a5, 0x0001d4a7 ),
451 range( 0x0001d4a9, 0x0001d4ad ), range( 0x0001d4ae, 0x0001d4ba ), range( 0x0001d4bb, 0x0001d4bc ),
452 range( 0x0001d4bd, 0x0001d4c4 ), range( 0x0001d4c5, 0x0001d506 ), range( 0x0001d507, 0x0001d50b ),
453 range( 0x0001d50d, 0x0001d515 ), range( 0x0001d516, 0x0001d51d ), range( 0x0001d51e, 0x0001d53a ),
454 range( 0x0001d53b, 0x0001d53f ), range( 0x0001d540, 0x0001d545 ), range( 0x0001d546, 0x0001d547 ),
455 range( 0x0001d54a, 0x0001d551 ), range( 0x0001d552, 0x0001d6a6 ), range( 0x0001d6a8, 0x0001d7cc ),
456 range( 0x0001d7ce, 0x0001d800 ), range( 0x0001f000, 0x0001f02c ), range( 0x0001f030, 0x0001f094 ),
457
458 #range( 0x00020000, 0x0002a6d7 ), range( 0x0002f800, 0x0002fa1e ), range( 0x000e0001, 0x000e0002 ),
459 range( 0x00020000, 0x0002a6d7 ),
460 range( 0x0002a700, 0x0002b735 ), # u-cjk-extc
461 range( 0x0002b740, 0x0002b81e ), # u-cjk-extd
462 range( 0x0002f800, 0x0002fa1e ), range( 0x000e0001, 0x000e0002 ),
463
464 range( 0x000e0020, 0x000e0080 ), range( 0x000e0100, 0x000e01f0 ), range( 0x000f0000, 0x000ffffe ),
465 range( 0x00100000, 0x0010fffe ), )
466
467 #---------------------------------------------------------------------------------------------------------
468 ranges_of_defined_jizura_kanji_cscids = (
469 # xrange( 0x000e000, 0x000e0ff ), ###OBS### excluded jzr-misc from valid codepoints
470 _urange( 0x10000e100, 0x10000e13f ),
471 _urange( 0x10000e141, 0x10000e149 ),
472 _urange( 0x10000e14b, 0x10000e14b ),
473 _urange( 0x10000e14d, 0x10000e156 ),
474 _urange( 0x10000e158, 0x10000e163 ),
475 _urange( 0x10000e165, 0x10000e182 ), )
476
477 #---------------------------------------------------------------------------------------------------------
478 # updated to Unicode 6.0
479 unicode_cjk_cid_ranges = {
480 ###OBS### should be extended to cover all Unicode blocks (and other character sets, using CSCIDs!);
481 ### to and from should be expressed like UMXes, `from-cid`, from-chr`, `from-cscid` and so on;
482 ### also, ranges should be extracted from ``range_names_to_cscid_range`` (which, in turn, should list
483 ### ranges by range sigils)
484 'Hangul Jamo': _urange( 0x1100, 0x11ff ),
485 'CJK Radicals Supplement': _urange( 0x2e80, 0x2eff ),
486 'Kangxi Radicals': _urange( 0x2f00, 0x2fdf ),
487 'Ideographic Description Characters': _urange( 0x2ff0, 0x2fff ),
488 'CJK Symbols and Punctuation': _urange( 0x3000, 0x303f ),
489 'Hiragana': _urange( 0x3040, 0x309f ),
490 'Katakana': _urange( 0x30a0, 0x30ff ),
491 'Bopomofo': _urange( 0x3100, 0x312f ),
492 'Hangul Compatibility Jamo': _urange( 0x3130, 0x318f ),
493 'Kanbun': _urange( 0x3190, 0x319f ),
494 'Bopomofo Extended': _urange( 0x31a0, 0x31bf ),
495 'CJK Strokes': _urange( 0x31c0, 0x31ef ),
496 'Katakana Phonetic Extensions': _urange( 0x31f0, 0x31ff ),
497 'Enclosed CJK Letters and Months': _urange( 0x3200, 0x32ff ),
498 'CJK Compatibility': _urange( 0x3300, 0x33ff ),
499 'CJK Unified Ideographs Extension A': _urange( 0x3400, 0x4dbf ),
500 'Yijing Hexagram Symbols': _urange( 0x4dc0, 0x4dff ),
501 'CJK Unified Ideographs': _urange( 0x4e00, 0x9fff ),
502 'Hangul Syllables': _urange( 0xac00, 0xd7af ),
503 'CJK Compatibility Ideographs': _urange( 0xf900, 0xfaff ),
504 'CJK Compatibility Forms': _urange( 0xfe30, 0xfe4f ),
505 'Tai Xuan Jing Symbols': _urange( 0x1d300, 0x1d35f ),
506 'CJK Unified Ideographs Extension B': _urange( 0x20000, 0x2a6df ),
507
508 'CJK Unified Ideographs Extension C': _urange( 0x2a700, 0x2b73f ),
509 'CJK Unified Ideographs Extension D': _urange( 0x2b740, 0x2b81f ),
510
511 'CJK Compatibility Ideographs Supplement': _urange( 0x2f800, 0x2fa1f ),
512 'Enclosed Ideographic Supplement': _urange( 0x1f200, 0x1f2ff ),
513 }
514
515 #---------------------------------------------------------------------------------------------------------
516 # updated to Unicode 6.0
517 unicode_kanji_cid_ranges = {
518 'CJK Radicals Supplement': _urange( 0x2e80, 0x2eff ),
519 'Kangxi Radicals': _urange( 0x2f00, 0x2fdf ),
520 # A selection of codepoints from the ``CJK Symbols and Punctuation`` block:
521 'CJK Zero': _urange( 0x3007, 0x3007 ),
522 'Ad hoc Group 1': _urange( 0x3005, 0x3006 ),
523 'Ad hoc Group 2': _urange( 0x3021, 0x3029 ),
524 'Ad hoc Group 3': _urange( 0x3038, 0x3039 ),
525 'Ad hoc Group 4': _urange( 0x303A, 0x303B ),
526 'Ad hoc Group 5': _urange( 0x303D, 0x303D ),
527 'CJK Strokes': _urange( 0x31c0, 0x31ef ),
528 'CJK Unified Ideographs Extension A': _urange( 0x3400, 0x4dbf ),
529 'CJK Unified Ideographs': _urange( 0x4e00, 0x9fff ),
530 'CJK Compatibility Ideographs': _urange( 0xf900, 0xfaff ),
531 'CJK Unified Ideographs Extension B': _urange( 0x20000, 0x2a6df ),
532
533 'CJK Unified Ideographs Extension C': _urange( 0x2a700, 0x2b73f ),
534 'CJK Unified Ideographs Extension D': _urange( 0x2b740, 0x2b81f ),
535
536 'CJK Compatibility Ideographs Supplement': _urange( 0x2f800, 0x2fa1f ), }
537
538
539 #---------------------------------------------------------------------------------------------------------
540 # These names were taken from http://www.unicode.org/Public/UNIDATA/Blocks.txt for Unicode 5.1;
541 # CJK Extension C has been added.
542 range_names_to_cscid_range = {
543
544 #=======================================================================================================
545 # UNICODE
546 #-------------------------------------------------------------------------------------------------------
547 'Aegean Numbers': _urange( 0x10100, 0x1013F ),
548 'Alphabetic Presentation Forms': _urange( 0xFB00, 0xFB4F ),
549 'Ancient Greek Musical Notation': _urange( 0x1D200, 0x1D24F ),
550 'Ancient Greek Numbers': _urange( 0x10140, 0x1018F ),
551 'Ancient Symbols': _urange( 0x10190, 0x101CF ),
552 'Arabic Presentation Forms-A': _urange( 0xFB50, 0xFDFF ),
553 'Arabic Presentation Forms-B': _urange( 0xFE70, 0xFEFF ),
554 'Arabic Supplement': _urange( 0x0750, 0x077F ),
555 'Arabic': _urange( 0x0600, 0x06FF ),
556 'Armenian': _urange( 0x0530, 0x058F ),
557 'Arrows': _urange( 0x2190, 0x21FF ),
558 'Balinese': _urange( 0x1B00, 0x1B7F ),
559 'Basic Latin': _urange( 0x0000, 0x007F ),
560 'Bengali': _urange( 0x0980, 0x09FF ),
561 'Block Elements': _urange( 0x2580, 0x259F ),
562 'Bopomofo Extended': _urange( 0x31A0, 0x31BF ),
563 'Bopomofo': _urange( 0x3100, 0x312F ),
564 'Box Drawing': _urange( 0x2500, 0x257F ),
565 'Braille Patterns': _urange( 0x2800, 0x28FF ),
566 'Buginese': _urange( 0x1A00, 0x1A1F ),
567 'Buhid': _urange( 0x1740, 0x175F ),
568 'Byzantine Musical Symbols': _urange( 0x1D000, 0x1D0FF ),
569 'Carian': _urange( 0x102A0, 0x102DF ),
570 'Cham': _urange( 0xAA00, 0xAA5F ),
571 'Cherokee': _urange( 0x13A0, 0x13FF ),
572 'CJK Compatibility Forms': _urange( 0xFE30, 0xFE4F ),
573 'CJK Compatibility Ideographs Supplement': _urange( 0x2F800, 0x2FA1F ),
574 'CJK Compatibility Ideographs': _urange( 0xF900, 0xFAFF ),
575 'CJK Compatibility': _urange( 0x3300, 0x33FF ),
576 'CJK Radicals Supplement': _urange( 0x2E80, 0x2EFF ),
577 'CJK Strokes': _urange( 0x31C0, 0x31EF ),
578 'CJK Symbols and Punctuation': _urange( 0x3000, 0x303F ),
579 'CJK Unified Ideographs Extension A': _urange( 0x3400, 0x4DBF ),
580 'CJK Unified Ideographs Extension B': _urange( 0x20000, 0x2A6DF ),
581 'CJK Unified Ideographs Extension C': _urange( 0x2A700, 0x2B73F ),
582 'CJK Unified Ideographs Extension D': _urange( 0x2B740, 0x2B81F ),
583 'CJK Unified Ideographs': _urange( 0x4E00, 0x9FFF ),
584 'Combining Diacritical Marks for Symbols': _urange( 0x20D0, 0x20FF ),
585 'Combining Diacritical Marks Supplement': _urange( 0x1DC0, 0x1DFF ),
586 'Combining Diacritical Marks': _urange( 0x0300, 0x036F ),
587 'Combining Half Marks': _urange( 0xFE20, 0xFE2F ),
588 'Control Pictures': _urange( 0x2400, 0x243F ),
589 'Coptic': _urange( 0x2C80, 0x2CFF ),
590 'Counting Rod Numerals': _urange( 0x1D360, 0x1D37F ),
591 'Cuneiform Numbers and Punctuation': _urange( 0x12400, 0x1247F ),
592 'Cuneiform': _urange( 0x12000, 0x123FF ),
593 'Currency Symbols': _urange( 0x20A0, 0x20CF ),
594 'Cypriot Syllabary': _urange( 0x10800, 0x1083F ),
595 'Cyrillic Extended-A': _urange( 0x2DE0, 0x2DFF ),
596 'Cyrillic Extended-B': _urange( 0xA640, 0xA69F ),
597 'Cyrillic Supplement': _urange( 0x0500, 0x052F ),
598 'Cyrillic': _urange( 0x0400, 0x04FF ),
599 'Deseret': _urange( 0x10400, 0x1044F ),
600 'Devanagari': _urange( 0x0900, 0x097F ),
601 'Dingbats': _urange( 0x2700, 0x27BF ),
602 'Domino Tiles': _urange( 0x1F030, 0x1F09F ),
603 'Enclosed Alphanumerics': _urange( 0x2460, 0x24FF ),
604 'Enclosed Ideographic Supplement': _urange( 0x1f200, 0x1f2ff ),
605 'Enclosed CJK Letters and Months': _urange( 0x3200, 0x32FF ),
606 'Ethiopic Extended': _urange( 0x2D80, 0x2DDF ),
607 'Ethiopic Supplement': _urange( 0x1380, 0x139F ),
608 'Ethiopic': _urange( 0x1200, 0x137F ),
609 'General Punctuation': _urange( 0x2000, 0x206F ),
610 'Geometric Shapes': _urange( 0x25A0, 0x25FF ),
611 'Georgian Supplement': _urange( 0x2D00, 0x2D2F ),
612 'Georgian': _urange( 0x10A0, 0x10FF ),
613 'Glagolitic': _urange( 0x2C00, 0x2C5F ),
614 'Gothic': _urange( 0x10330, 0x1034F ),
615 'Greek and Coptic': _urange( 0x0370, 0x03FF ),
616 'Greek Extended': _urange( 0x1F00, 0x1FFF ),
617 'Gujarati': _urange( 0x0A80, 0x0AFF ),
618 'Gurmukhi': _urange( 0x0A00, 0x0A7F ),
619 'Halfwidth and Fullwidth Forms': _urange( 0xFF00, 0xFFEF ),
620 'Hangul Compatibility Jamo': _urange( 0x3130, 0x318F ),
621 'Hangul Jamo': _urange( 0x1100, 0x11FF ),
622 'Hangul Syllables': _urange( 0xAC00, 0xD7AF ),
623 'Hanunoo': _urange( 0x1720, 0x173F ),
624 'Hebrew': _urange( 0x0590, 0x05FF ),
625 'High Private Use Surrogates': _urange( 0xDB80, 0xDBFF ),
626 'High Surrogates': _urange( 0xD800, 0xDB7F ),
627 'Hiragana': _urange( 0x3040, 0x309F ),
628 'Ideographic Description Characters': _urange( 0x2FF0, 0x2FFF ),
629 'IPA Extensions': _urange( 0x0250, 0x02AF ),
630 'Kanbun': _urange( 0x3190, 0x319F ),
631 'Kangxi Radicals': _urange( 0x2F00, 0x2FDF ),
632 'Kannada': _urange( 0x0C80, 0x0CFF ),
633 'Katakana Phonetic Extensions': _urange( 0x31F0, 0x31FF ),
634 'Katakana': _urange( 0x30A0, 0x30FF ),
635 'Kayah Li': _urange( 0xA900, 0xA92F ),
636 'Kharoshthi': _urange( 0x10A00, 0x10A5F ),
637 'Khmer Symbols': _urange( 0x19E0, 0x19FF ),
638 'Khmer': _urange( 0x1780, 0x17FF ),
639 'Lao': _urange( 0x0E80, 0x0EFF ),
640 'Latin Extended Additional': _urange( 0x1E00, 0x1EFF ),
641 'Latin Extended-A': _urange( 0x0100, 0x017F ),
642 'Latin Extended-B': _urange( 0x0180, 0x024F ),
643 'Latin Extended-C': _urange( 0x2C60, 0x2C7F ),
644 'Latin Extended-D': _urange( 0xA720, 0xA7FF ),
645 'Latin-1 Supplement': _urange( 0x0080, 0x00FF ),
646 'Lepcha': _urange( 0x1C00, 0x1C4F ),
647 'Letterlike Symbols': _urange( 0x2100, 0x214F ),
648 'Limbu': _urange( 0x1900, 0x194F ),
649 'Linear B Ideograms': _urange( 0x10080, 0x100FF ),
650 'Linear B Syllabary': _urange( 0x10000, 0x1007F ),
651 'Low Surrogates': _urange( 0xDC00, 0xDFFF ),
652 'Lycian': _urange( 0x10280, 0x1029F ),
653 'Lydian': _urange( 0x10920, 0x1093F ),
654 'Mahjong Tiles': _urange( 0x1F000, 0x1F02F ),
655 'Malayalam': _urange( 0x0D00, 0x0D7F ),
656 'Mathematical Alphanumeric Symbols': _urange( 0x1D400, 0x1D7FF ),
657 'Mathematical Operators': _urange( 0x2200, 0x22FF ),
658 'Miscellaneous Mathematical Symbols-A': _urange( 0x27C0, 0x27EF ),
659 'Miscellaneous Mathematical Symbols-B': _urange( 0x2980, 0x29FF ),
660 'Miscellaneous Symbols and Arrows': _urange( 0x2B00, 0x2BFF ),
661 'Miscellaneous Symbols': _urange( 0x2600, 0x26FF ),
662 'Miscellaneous Technical': _urange( 0x2300, 0x23FF ),
663 'Modifier Tone Letters': _urange( 0xA700, 0xA71F ),
664 'Mongolian': _urange( 0x1800, 0x18AF ),
665 'Musical Symbols': _urange( 0x1D100, 0x1D1FF ),
666 'Myanmar': _urange( 0x1000, 0x109F ),
667 'New Tai Lue': _urange( 0x1980, 0x19DF ),
668 'NKo': _urange( 0x07C0, 0x07FF ),
669 'Number Forms': _urange( 0x2150, 0x218F ),
670 'Ogham': _urange( 0x1680, 0x169F ),
671 'Ol Chiki': _urange( 0x1C50, 0x1C7F ),
672 'Old Italic': _urange( 0x10300, 0x1032F ),
673 'Old Persian': _urange( 0x103A0, 0x103DF ),
674 'Optical Character Recognition': _urange( 0x2440, 0x245F ),
675 'Oriya': _urange( 0x0B00, 0x0B7F ),
676 'Osmanya': _urange( 0x10480, 0x104AF ),
677 'Phags-pa': _urange( 0xA840, 0xA87F ),
678 'Phaistos Disc': _urange( 0x101D0, 0x101FF ),
679 'Phoenician': _urange( 0x10900, 0x1091F ),
680 'Phonetic Extensions Supplement': _urange( 0x1D80, 0x1DBF ),
681 'Phonetic Extensions': _urange( 0x1D00, 0x1D7F ),
682 'Private Use Area': _urange( 0xE000, 0xF8FF ),
683 'Rejang': _urange( 0xA930, 0xA95F ),
684 'Runic': _urange( 0x16A0, 0x16FF ),
685 'Saurashtra': _urange( 0xA880, 0xA8DF ),
686 'Shavian': _urange( 0x10450, 0x1047F ),
687 'Sinhala': _urange( 0x0D80, 0x0DFF ),
688 'Small Form Variants': _urange( 0xFE50, 0xFE6F ),
689 'Spacing Modifier Letters': _urange( 0x02B0, 0x02FF ),
690 'Specials': _urange( 0xFFF0, 0xFFFF ),
691 'Sundanese': _urange( 0x1B80, 0x1BBF ),
692 'Superscripts and Subscripts': _urange( 0x2070, 0x209F ),
693 'Supplemental Arrows-A': _urange( 0x27F0, 0x27FF ),
694 'Supplemental Arrows-B': _urange( 0x2900, 0x297F ),
695 'Supplemental Mathematical Operators': _urange( 0x2A00, 0x2AFF ),
696 'Supplemental Punctuation': _urange( 0x2E00, 0x2E7F ),
697 'Supplementary Private Use Area-A': _urange( 0xF0000, 0xFFFFF ),
698 'Supplementary Private Use Area-B': _urange( 0x100000, 0x10FFFF ),
699 'Syloti Nagri': _urange( 0xA800, 0xA82F ),
700 'Syriac': _urange( 0x0700, 0x074F ),
701 'Tagalog': _urange( 0x1700, 0x171F ),
702 'Tagbanwa': _urange( 0x1760, 0x177F ),
703 'Tags': _urange( 0xE0000, 0xE007F ),
704 'Tai Le': _urange( 0x1950, 0x197F ),
705 'Tai Xuan Jing Symbols': _urange( 0x1D300, 0x1D35F ),
706 'Tamil': _urange( 0x0B80, 0x0BFF ),
707 'Telugu': _urange( 0x0C00, 0x0C7F ),
708 'Thaana': _urange( 0x0780, 0x07BF ),
709 'Thai': _urange( 0x0E00, 0x0E7F ),
710 'Tibetan': _urange( 0x0F00, 0x0FFF ),
711 'Tifinagh': _urange( 0x2D30, 0x2D7F ),
712 'Ugaritic': _urange( 0x10380, 0x1039F ),
713 'Unified Canadian Aboriginal Syllabics': _urange( 0x1400, 0x167F ),
714 'Vai': _urange( 0xA500, 0xA63F ),
715 'Variation Selectors Supplement': _urange( 0xE0100, 0xE01EF ),
716 'Variation Selectors': _urange( 0xFE00, 0xFE0F ),
717 'Vertical Forms': _urange( 0xFE10, 0xFE1F ),
718 'Yi Radicals': _urange( 0xA490, 0xA4CF ),
719 'Yi Syllables': _urange( 0xA000, 0xA48F ),
720 'Yijing Hexagram Symbols': _urange( 0x4DC0, 0x4DFF ),
721
722 #=======================================================================================================
723 # JIZURA
724 #-------------------------------------------------------------------------------------------------------
725 'JZR Miscellaneous': _urange( 0x100000000, 0x10000e0ff ),
726 'JZR CJK Figures': _urange( 0x10000e100, 0x10000f0ff ),
727 }
728
729
730
731 ############################################################################################################
732 _range_names_to_range_sigils = {
733 #=========================================================================================================
734 # UNICODE
735 #---------------------------------------------------------------------------------------------------------
736 'Aegean Numbers': None,
737 'Alphabetic Presentation Forms': 'u-abc-pf',
738 'Ancient Greek Musical Notation': None,
739 'Ancient Greek Numbers': None,
740 'Ancient Symbols': None,
741 'Arabic Presentation Forms-A': 'u-arab-pf-a',
742 'Arabic Presentation Forms-B': 'u-arab-pf-b',
743 'Arabic Supplement': 'u-arab-s',
744 'Arabic': 'u-arab',
745 'Armenian': None,
746 'Arrows': 'u-arrow',
747 'Balinese': None,
748 'Basic Latin': 'u-latn',
749 'Bengali': None,
750 'Block Elements': 'u-block',
751 'Bopomofo Extended': 'u-bopo-x',
752 'Bopomofo': 'u-bopo',
753 'Box Drawing': 'u-boxdr',
754 'Braille Patterns': 'u-brail',
755 'Buginese': None,
756 'Buhid': None,
757 'Byzantine Musical Symbols': None,
758 'Carian': None,
759 'Cham': None,
760 'Cherokee': None,
761 'CJK Compatibility Forms': 'u-cjk-cmpf',
762 'CJK Compatibility Ideographs Supplement': 'u-cjk-cmpi2',
763 'CJK Compatibility Ideographs': 'u-cjk-cmpi1',
764 'CJK Compatibility': 'u-cjk-cmp',
765 'CJK Radicals Supplement': 'u-cjk-rad2',
766 'CJK Strokes': 'u-cjk-strk',
767 'CJK Symbols and Punctuation': 'u-cjk-sym',
768 'CJK Unified Ideographs Extension A': 'u-cjk-xa',
769 'CJK Unified Ideographs Extension B': 'u-cjk-xb',
770 'CJK Unified Ideographs Extension C': 'u-cjk-xc',
771 'CJK Unified Ideographs Extension D': 'u-cjk-xd',
772 'CJK Unified Ideographs': 'u-cjk',
773 'Combining Diacritical Marks for Symbols': 'u-cdm-sy',
774 'Combining Diacritical Marks Supplement': 'u-cdm-s',
775 'Combining Diacritical Marks': 'u-cdm',
776 'Combining Half Marks': None,
777 'Control Pictures': 'u-ctrlp',
778 'Coptic': None,
779 'Counting Rod Numerals': None,
780 'Cuneiform Numbers and Punctuation': None,
781 'Cuneiform': None,
782 'Currency Symbols': 'u-currn',
783 'Cypriot Syllabary': None,
784 'Cyrillic Extended-A': 'u-cyrl-a',
785 'Cyrillic Extended-B': 'u-cyrl-b',
786 'Cyrillic Supplement': 'u-cyrl-s',
787 'Cyrillic': 'u-cyrl',
788 'Deseret': None,
789 'Devanagari': None,
790 'Dingbats': 'u-dingb',
791 'Domino Tiles': None,
792 'Enclosed Alphanumerics': 'u-enalp',
793 'Enclosed CJK Letters and Months': 'u-cjk-enclett',
794 'Enclosed Ideographic Supplement': 'u-cjk-encsupp',
795 'Ethiopic Extended': None,
796 'Ethiopic Supplement': None,
797 'Ethiopic': None,
798 'General Punctuation': 'u-punct',
799 'Geometric Shapes': 'u-geoms',
800 'Georgian Supplement': None,
801 'Georgian': None,
802 'Glagolitic': None,
803 'Gothic': None,
804 'Greek and Coptic': 'u-grek',
805 'Greek Extended': 'u-grek-x',
806 'Gujarati': None,
807 'Gurmukhi': None,
808 'Halfwidth and Fullwidth Forms': 'u-halfull',
809 'Hangul Compatibility Jamo': 'u-hang-comp-jm',
810 'Hangul Jamo': 'u-hang-jm',
811 'Hangul Syllables': 'u-hang-syl',
812 'Hanunoo': None,
813 'Hebrew': None,
814 'High Private Use Surrogates': None,
815 'High Surrogates': None,
816 'Hiragana': 'u-cjk-hira',
817 'Ideographic Description Characters': 'u-cjk-idc',
818 'IPA Extensions': 'u-ipa-x',
819 'Kanbun': 'u-cjk-kanbun',
820 'Kangxi Radicals': 'u-cjk-rad1',
821 'Kannada': None,
822 'Katakana Phonetic Extensions': 'u-cjk-kata-x',
823 'Katakana': 'u-cjk-kata',
824 'Kayah Li': None,
825 'Kharoshthi': None,
826 'Khmer Symbols': None,
827 'Khmer': None,
828 'Lao': None,
829 'Latin Extended Additional': 'u-latn-xa',
830 'Latin Extended-A': 'u-latn-a',
831 'Latin Extended-B': 'u-latn-b',
832 'Latin Extended-C': 'u-latn-c',
833 'Latin Extended-D': 'u-latn-d',
834 'Latin-1 Supplement': 'u-latn-1',
835 'Lepcha': None,
836 'Letterlike Symbols': 'u-llsym',
837 'Limbu': None,
838 'Linear B Ideograms': None,
839 'Linear B Syllabary': None,
840 'Low Surrogates': None,
841 'Lycian': None,
842 'Lydian': None,
843 'Mahjong Tiles': None,
844 'Malayalam': None,
845 'Mathematical Alphanumeric Symbols': None,
846 'Mathematical Operators': None,
847 'Miscellaneous Mathematical Symbols-A': 'u-maths-a',
848 'Miscellaneous Mathematical Symbols-B': None,
849 'Miscellaneous Symbols and Arrows': None,
850 'Miscellaneous Symbols': 'u-sym',
851 'Miscellaneous Technical': None,
852 'Modifier Tone Letters': None,
853 'Mongolian': None,
854 'Musical Symbols': None,
855 'Myanmar': None,
856 'New Tai Lue': None,
857 'NKo': None,
858 'Number Forms': 'u-num',
859 'Ogham': None,
860 'Ol Chiki': 'u-olck',
861 'Old Italic': None,
862 'Old Persian': None,
863 'Optical Character Recognition': 'u-ocr',
864 'Oriya': None,
865 'Osmanya': None,
866 'Phags-pa': None,
867 'Phaistos Disc': None,
868 'Phoenician': None,
869 'Phonetic Extensions Supplement': 'u-phon-xs',
870 'Phonetic Extensions': 'u-phon-x',
871 'Private Use Area': 'u-pua',
872 'Rejang': None,
873 'Runic': None,
874 'Saurashtra': None,
875 'Shavian': None,
876 'Sinhala': None,
877 'Small Form Variants': 'u-small',
878 'Spacing Modifier Letters': 'u-sml',
879 'Specials': 'u-special',
880 'Sundanese': None,
881 'Superscripts and Subscripts': 'u-supsub',
882 'Supplemental Arrows-A': 'u-arrow-a',
883 'Supplemental Arrows-B': 'u-arrow-b',
884 'Supplemental Mathematical Operators': None,
885 'Supplemental Punctuation': 'u-punct-s',
886 'Supplementary Private Use Area-A': None,
887 'Supplementary Private Use Area-B': None,
888 'Syloti Nagri': None,
889 'Syriac': None,
890 'Tagalog': None,
891 'Tagbanwa': None,
892 'Tags': None,
893 'Tai Le': None,
894 'Tai Xuan Jing Symbols': 'u-txj-sym',
895 'Tamil': None,
896 'Telugu': None,
897 'Thaana': None,
898 'Thai': None,
899 'Tibetan': None,
900 'Tifinagh': None,
901 'Ugaritic': None,
902 'Unified Canadian Aboriginal Syllabics': None,
903 'Vai': None,
904 'Variation Selectors Supplement': 'u-varsl-s',
905 'Variation Selectors': 'u-varsl',
906 'Vertical Forms': 'u-vertf',
907 'Yi Radicals': None,
908 'Yi Syllables': None,
909 'Yijing Hexagram Symbols': 'u-yijng',
910
911 #=========================================================================================================
912 # JIZURA
913 #---------------------------------------------------------------------------------------------------------
914 'JZR Miscellaneous': 'jzr-misc',
915 'JZR CJK Figures': 'jzr-fig',
916 }
917
918 #===========================================================================================================
919 range_names_to_range_sigils = dict(
920 ( name, rsg, )
921 for name, rsg in _range_names_to_range_sigils.items()
922 if rsg )
923 #-----------------------------------------------------------------------------------------------------------
924 range_sigils_to_range_names = dict(
925 ( rsg, name, )
926 for name, rsg in range_names_to_range_sigils.items() )
927
928 #-----------------------------------------------------------------------------------------------------------
929 range_sigils_for_sortcid = [
930 'u-cjk',
931 'u-cjk-xa',
932 'u-cjk-xb',
933 'u-cjk-xc',
934 'u-cjk-xd',
935 'u-cjk-sym',
936 'u-cjk-rad1',
937 'u-cjk-rad2',
938 'u-cjk-strk',
939 'u-cjk-cmpi1',
940 'u-cjk-cmpi2',
941 'u-cjk-cmpf',
942
943 'jzr-fig',
944 'jzr-misc',
945
946 'c1',
947 'c2',
948 'c3',
949 'c4',
950 'c5',
951 'c6',
952 'c7',
953 'cb',
954 'cdp',
955 'gt',
956 'gtk',
957 'hzk1',
958 'hzk2',
959 'hzk3',
960 'hzk4',
961 'hzk5',
962 'hzk6',
963 'hzk7',
964 'hzk8',
965 'hzk9',
966 'hzk10',
967 'hzk11',
968 'hzk12',
969 'j83',
970 'j90',
971 'jc3',
972 'jsp',
973 'jx1',
974 'jx2',
975 'jx3',
976 'jzr',
977 'mcs',
978 'morohashi',
979 'rui6',
980 'aj',
981 'b',
982
983 'u-boxdr',
984 'u-bopo',
985 'u-geoms',
986 'u-halfull', ]
987
988
989 ############################################################################################################
990 u_script_codes_to_script_name = {
991 'Arab': 'Arabic',
992 'Armi': 'Imperial Aramaic',
993 'Armn': 'Armenian',
994 'Avst': 'Avestan',
995 'Bali': 'Balinese',
996 'Batk': 'Batak',
997 'Beng': 'Bengali',
998 'Blis': 'Blissymbols',
999 'Bopo': 'Bopomofo',
1000 'Brah': 'Brahmi',
1001 'Brai': 'Braille',
1002 'Bugi': 'Buginese',
1003 'Buhd': 'Buhid',
1004 'Cakm': 'Chakma',
1005 'Cans': 'Unified Canadian Aboriginal Syllabics',
1006 'Cari': 'Carian',
1007 'Cham': 'Cham',
1008 'Cher': 'Cherokee',
1009 'Cirt': 'Cirth',
1010 'Copt': 'Coptic',
1011 'Cprt': 'Cypriot',
1012 'Cyrl': 'Cyrillic',
1013 'Cyrs': 'Cyrillic (Old Church Slavonic variant)',
1014 'Deva': 'Devanagari (Nagari)',
1015 'Dsrt': 'Deseret (Mormon)',
1016 'Egyd': 'Egyptian demotic',
1017 'Egyh': 'Egyptian hieratic',
1018 'Egyp': 'Egyptian hieroglyphs',
1019 'Ethi': 'Ethiopic (Ge?ez)',
1020 'Geok': 'Khutsuri (Asomtavruli and Nuskhuri)',
1021 'Geor': 'Georgian (Mkhedruli)',
1022 'Glag': 'Glagolitic',
1023 'Goth': 'Gothic',
1024 'Grek': 'Greek',
1025 'Gujr': 'Gujarati',
1026 'Guru': 'Gurmukhi',
1027 'Hang': 'Hangul (Hangul, Hangeul)',
1028 'Hani': 'Han (Hanzi, Kanji, Hanja)',
1029 'Hano': 'Hanunoo (Hanunóo)',
1030 'Hans': 'Han (Simplified variant)',
1031 'Hant': 'Han (Traditional variant)',
1032 'Hebr': 'Hebrew',
1033 'Hira': 'Hiragana',
1034 'Hmng': 'Pahawh Hmong',
1035 'Hrkt': '(alias for Hiragana + Katakana)',
1036 'Hung': 'Old Hungarian',
1037 'Inds': 'Indus (Harappan)',
1038 'Ital': 'Old Italic (Etruscan, Oscan, etc.)',
1039 'Java': 'Javanese',
1040 'Jpan': 'Japanese (alias for Han + Hiragana + Katakana)',
1041 'Kali': 'Kayah Li',
1042 'Kana': 'Katakana',
1043 'Khar': 'Kharoshthi',
1044 'Khmr': 'Khmer',
1045 'Knda': 'Kannada',
1046 'Kore': 'Korean (alias for Hangul + Han)',
1047 'Kthi': 'Kaithi',
1048 'Lana': 'Tai Tham (Lanna)',
1049 'Laoo': 'Lao',
1050 'Latf': 'Latin (Fraktur variant)',
1051 'Latg': 'Latin (Gaelic variant)',
1052 'Latn': 'Latin',
1053 'Lepc': 'Lepcha (Róng)',
1054 'Limb': 'Limbu',
1055 'Lina': 'Linear A',
1056 'Linb': 'Linear B',
1057 'Lisu': 'Lisu (Fraser)',
1058 'Lyci': 'Lycian',
1059 'Lydi': 'Lydian',
1060 'Mand': 'Mandaic, Mandaean',
1061 'Mani': 'Manichaean',
1062 'Maya': 'Mayan hieroglyphs',
1063 'Mero': 'Meroitic',
1064 'Mlym': 'Malayalam',
1065 'Mong': 'Mongolian',
1066 'Moon': 'Moon (Moon code, Moon script, Moon type)',
1067 'Mtei': 'Meitei Mayek (Meithei, Meetei)',
1068 'Mymr': 'Myanmar (Burmese)',
1069 'Nkgb': 'Nakhi Geba Naxi Geba)',
1070 'Nkoo': 'N’Ko',
1071 'Ogam': 'Ogham',
1072 'Olck': 'Ol Chiki (Ol Cemet’, Ol, Santali)',
1073 'Orkh': 'Orkhon',
1074 'Orya': 'Oriya',
1075 'Osma': 'Osmanya',
1076 'Perm': 'Old Permic',
1077 'Phag': 'Phags-pa',
1078 'Phli': 'Inscriptional Pahlavi',
1079 'Phlp': 'Psalter Pahlavi',
1080 'Phlv': 'Book Pahlavi',
1081 'Phnx': 'Phoenician',
1082 'Plrd': 'Miao (Pollard)',
1083 'Prti': 'Inscriptional Parthian',
1084 'Qaaa': 'Reserved for private use (start)',
1085 'Qabx': 'Reserved for private use (end)',
1086 'Rjng': 'Rejang (Redjang, Kaganga)',
1087 'Roro': 'Rongorongo',
1088 'Runr': 'Runic',
1089 'Samr': 'Samaritan',
1090 'Sara': 'Sarati',
1091 'Saur': 'Saurashtra',
1092 'Sgnw': 'SignWriting',
1093 'Shaw': 'Shavian (Shaw)',
1094 'Sinh': 'Sinhala',
1095 'Sund': 'Sundanese',
1096 'Sylo': 'Syloti Nagri',
1097 'Syrc': 'Syriac',
1098 'Syre': 'Syriac (Estrangelo variant)',
1099 'Syrj': 'Syriac (Western variant)',
1100 'Syrn': 'Syriac (Eastern variant)',
1101 'Tagb': 'Tagbanwa',
1102 'Tale': 'Tai Le',
1103 'Talu': 'New Tai Lue',
1104 'Taml': 'Tamil',
1105 'Tavt': 'Tai Viet',
1106 'Telu': 'Telugu',
1107 'Teng': 'Tengwar',
1108 'Tfng': 'Tifinagh (Berber)',
1109 'Tglg': 'Tagalog (Baybayin, Alibata)',
1110 'Thaa': 'Thaana',
1111 'Thai': 'Thai',
1112 'Tibt': 'Tibetan',
1113 'Ugar': 'Ugaritic',
1114 'Vaii': 'Vai',
1115 'Visp': 'Visible Speech',
1116 'Xpeo': 'Old Persian',
1117 'Xsux': 'Cuneiform, Sumero-Akkadian',
1118 'Yiii': 'Yi',
1119 'Zinh': 'Code for inherited script',
1120 'Zmth': 'Mathematical notation',
1121 'Zsym': 'Symbols',
1122 'Zxxx': 'Code for unwritten documents',
1123 'Zyyy': 'Code for undetermined script',
1124 'Zzzz': 'Code for uncoded script', }
1125
1126
1127UMX_data = UMX_data_000()