1 | ;
|
2 | Object.defineProperty(exports, "__esModule", { value: true });
|
3 | exports.isUtf8 = void 0;
|
4 | const toU8a_js_1 = require("../u8a/toU8a.js");
|
5 | const string_js_1 = require("./string.js");
|
6 | /**
|
7 | * @name isUtf8
|
8 | * @summary Tests if the input is valid Utf8
|
9 | * @description
|
10 | * Checks to see if the input string or Uint8Array is valid Utf8
|
11 | */
|
12 | function isUtf8(value) {
|
13 | if (!value) {
|
14 | return (0, string_js_1.isString)(value);
|
15 | }
|
16 | const u8a = (0, toU8a_js_1.u8aToU8a)(value);
|
17 | const len = u8a.length;
|
18 | let i = 0;
|
19 | while (i < len) {
|
20 | if (u8a[i] <= 0x7F) /* 00..7F */ {
|
21 | i += 1;
|
22 | }
|
23 | else if (u8a[i] >= 0xC2 && u8a[i] <= 0xDF) /* C2..DF 80..BF */ {
|
24 | if (i + 1 < len) /* Expect a 2nd byte */ {
|
25 | if (u8a[i + 1] < 0x80 || u8a[i + 1] > 0xBF) {
|
26 | // *message = "After a first byte between C2 and DF, expecting a 2nd byte between 80 and BF";
|
27 | // *faulty_bytes = 2;
|
28 | return false;
|
29 | }
|
30 | }
|
31 | else {
|
32 | // *message = "After a first byte between C2 and DF, expecting a 2nd byte.";
|
33 | // *faulty_bytes = 1;
|
34 | return false;
|
35 | }
|
36 | i += 2;
|
37 | }
|
38 | else if (u8a[i] === 0xE0) /* E0 A0..BF 80..BF */ {
|
39 | if (i + 2 < len) /* Expect a 2nd and 3rd byte */ {
|
40 | if (u8a[i + 1] < 0xA0 || u8a[i + 1] > 0xBF) {
|
41 | // *message = "After a first byte of E0, expecting a 2nd byte between A0 and BF.";
|
42 | // *faulty_bytes = 2;
|
43 | return false;
|
44 | }
|
45 | if (u8a[i + 2] < 0x80 || u8a[i + 2] > 0xBF) {
|
46 | // *message = "After a first byte of E0, expecting a 3nd byte between 80 and BF.";
|
47 | // *faulty_bytes = 3;
|
48 | return false;
|
49 | }
|
50 | }
|
51 | else {
|
52 | // *message = "After a first byte of E0, expecting two following bytes.";
|
53 | // *faulty_bytes = 1;
|
54 | return false;
|
55 | }
|
56 | i += 3;
|
57 | }
|
58 | else if (u8a[i] >= 0xE1 && u8a[i] <= 0xEC) /* E1..EC 80..BF 80..BF */ {
|
59 | if (i + 2 < len) /* Expect a 2nd and 3rd byte */ {
|
60 | if (u8a[i + 1] < 0x80 || u8a[i + 1] > 0xBF) {
|
61 | // *message = "After a first byte between E1 and EC, expecting the 2nd byte between 80 and BF.";
|
62 | // *faulty_bytes = 2;
|
63 | return false;
|
64 | }
|
65 | if (u8a[i + 2] < 0x80 || u8a[i + 2] > 0xBF) {
|
66 | // *message = "After a first byte between E1 and EC, expecting the 3rd byte between 80 and BF.";
|
67 | // *faulty_bytes = 3;
|
68 | return false;
|
69 | }
|
70 | }
|
71 | else {
|
72 | // *message = "After a first byte between E1 and EC, expecting two following bytes.";
|
73 | // *faulty_bytes = 1;
|
74 | return false;
|
75 | }
|
76 | i += 3;
|
77 | }
|
78 | else if (u8a[i] === 0xED) /* ED 80..9F 80..BF */ {
|
79 | if (i + 2 < len) /* Expect a 2nd and 3rd byte */ {
|
80 | if (u8a[i + 1] < 0x80 || u8a[i + 1] > 0x9F) {
|
81 | // *message = "After a first byte of ED, expecting 2nd byte between 80 and 9F.";
|
82 | // *faulty_bytes = 2;
|
83 | return false;
|
84 | }
|
85 | if (u8a[i + 2] < 0x80 || u8a[i + 2] > 0xBF) {
|
86 | // *message = "After a first byte of ED, expecting 3rd byte between 80 and BF.";
|
87 | // *faulty_bytes = 3;
|
88 | return false;
|
89 | }
|
90 | }
|
91 | else {
|
92 | // *message = "After a first byte of ED, expecting two following bytes.";
|
93 | // *faulty_bytes = 1;
|
94 | return false;
|
95 | }
|
96 | i += 3;
|
97 | }
|
98 | else if (u8a[i] >= 0xEE && u8a[i] <= 0xEF) /* EE..EF 80..BF 80..BF */ {
|
99 | if (i + 2 < len) /* Expect a 2nd and 3rd byte */ {
|
100 | if (u8a[i + 1] < 0x80 || u8a[i + 1] > 0xBF) {
|
101 | // *message = "After a first byte between EE and EF, expecting 2nd byte between 80 and BF.";
|
102 | // *faulty_bytes = 2;
|
103 | return false;
|
104 | }
|
105 | if (u8a[i + 2] < 0x80 || u8a[i + 2] > 0xBF) {
|
106 | // *message = "After a first byte between EE and EF, expecting 3rd byte between 80 and BF.";
|
107 | // *faulty_bytes = 3;
|
108 | return false;
|
109 | }
|
110 | }
|
111 | else {
|
112 | // *message = "After a first byte between EE and EF, two following bytes.";
|
113 | // *faulty_bytes = 1;
|
114 | return false;
|
115 | }
|
116 | i += 3;
|
117 | }
|
118 | else if (u8a[i] === 0xF0) /* F0 90..BF 80..BF 80..BF */ {
|
119 | if (i + 3 < len) /* Expect a 2nd, 3rd 3th byte */ {
|
120 | if (u8a[i + 1] < 0x90 || u8a[i + 1] > 0xBF) {
|
121 | // *message = "After a first byte of F0, expecting 2nd byte between 90 and BF.";
|
122 | // *faulty_bytes = 2;
|
123 | return false;
|
124 | }
|
125 | if (u8a[i + 2] < 0x80 || u8a[i + 2] > 0xBF) {
|
126 | // *message = "After a first byte of F0, expecting 3rd byte between 80 and BF.";
|
127 | // *faulty_bytes = 3;
|
128 | return false;
|
129 | }
|
130 | if (u8a[i + 3] < 0x80 || u8a[i + 3] > 0xBF) {
|
131 | // *message = "After a first byte of F0, expecting 4th byte between 80 and BF.";
|
132 | // *faulty_bytes = 4;
|
133 | return false;
|
134 | }
|
135 | }
|
136 | else {
|
137 | // *message = "After a first byte of F0, expecting three following bytes.";
|
138 | // *faulty_bytes = 1;
|
139 | return false;
|
140 | }
|
141 | i += 4;
|
142 | }
|
143 | else if (u8a[i] >= 0xF1 && u8a[i] <= 0xF3) /* F1..F3 80..BF 80..BF 80..BF */ {
|
144 | if (i + 3 < len) /* Expect a 2nd, 3rd 3th byte */ {
|
145 | if (u8a[i + 1] < 0x80 || u8a[i + 1] > 0xBF) {
|
146 | // *message = "After a first byte of F1, F2, or F3, expecting a 2nd byte between 80 and BF.";
|
147 | // *faulty_bytes = 2;
|
148 | return false;
|
149 | }
|
150 | if (u8a[i + 2] < 0x80 || u8a[i + 2] > 0xBF) {
|
151 | // *message = "After a first byte of F1, F2, or F3, expecting a 3rd byte between 80 and BF.";
|
152 | // *faulty_bytes = 3;
|
153 | return false;
|
154 | }
|
155 | if (u8a[i + 3] < 0x80 || u8a[i + 3] > 0xBF) {
|
156 | // *message = "After a first byte of F1, F2, or F3, expecting a 4th byte between 80 and BF.";
|
157 | // *faulty_bytes = 4;
|
158 | return false;
|
159 | }
|
160 | }
|
161 | else {
|
162 | // *message = "After a first byte of F1, F2, or F3, expecting three following bytes.";
|
163 | // *faulty_bytes = 1;
|
164 | return false;
|
165 | }
|
166 | i += 4;
|
167 | }
|
168 | else if (u8a[i] === 0xF4) /* F4 80..8F 80..BF 80..BF */ {
|
169 | if (i + 3 < len) /* Expect a 2nd, 3rd 3th byte */ {
|
170 | if (u8a[i + 1] < 0x80 || u8a[i + 1] > 0x8F) {
|
171 | // *message = "After a first byte of F4, expecting 2nd byte between 80 and 8F.";
|
172 | // *faulty_bytes = 2;
|
173 | return false;
|
174 | }
|
175 | if (u8a[i + 2] < 0x80 || u8a[i + 2] > 0xBF) {
|
176 | // *message = "After a first byte of F4, expecting 3rd byte between 80 and BF.";
|
177 | // *faulty_bytes = 3;
|
178 | return false;
|
179 | }
|
180 | if (u8a[i + 3] < 0x80 || u8a[i + 3] > 0xBF) {
|
181 | // *message = "After a first byte of F4, expecting 4th byte between 80 and BF.";
|
182 | // *faulty_bytes = 4;
|
183 | return false;
|
184 | }
|
185 | }
|
186 | else {
|
187 | // *message = "After a first byte of F4, expecting three following bytes.";
|
188 | // *faulty_bytes = 1;
|
189 | return false;
|
190 | }
|
191 | i += 4;
|
192 | }
|
193 | else {
|
194 | // *message = "Expecting bytes in the following ranges: 00..7F C2..F4.";
|
195 | // *faulty_bytes = 1;
|
196 | return false;
|
197 | }
|
198 | }
|
199 | return true;
|
200 | }
|
201 | exports.isUtf8 = isUtf8;
|