UNPKG

7.92 kBJavaScriptView Raw
1// Copyright 2017-2022 @polkadot/util authors & contributors
2// SPDX-License-Identifier: Apache-2.0
3// Adapted from https://github.com/JulienPalard/is_utf8/blob/master/is_utf8.c
4import { u8aToU8a } from "../u8a/toU8a.js";
5import { isString } from "./string.js";
6/**
7 * @name isUtf8
8 * @summary Tests if the input is valid Utf8
9 * @description
10 * Checks to see if the input string or Uint8Array is valid Utf8
11 */
12
13export function isUtf8(value) {
14 if (!value) {
15 return isString(value);
16 }
17
18 const u8a = u8aToU8a(value);
19 const len = u8a.length;
20 let i = 0;
21
22 while (i < len) {
23 if (u8a[i] <= 0x7F)
24 /* 00..7F */
25 {
26 i += 1;
27 } else if (u8a[i] >= 0xC2 && u8a[i] <= 0xDF)
28 /* C2..DF 80..BF */
29 {
30 if (i + 1 < len)
31 /* Expect a 2nd byte */
32 {
33 if (u8a[i + 1] < 0x80 || u8a[i + 1] > 0xBF) {
34 // *message = "After a first byte between C2 and DF, expecting a 2nd byte between 80 and BF";
35 // *faulty_bytes = 2;
36 return false;
37 }
38 } else {
39 // *message = "After a first byte between C2 and DF, expecting a 2nd byte.";
40 // *faulty_bytes = 1;
41 return false;
42 }
43
44 i += 2;
45 } else if (u8a[i] === 0xE0)
46 /* E0 A0..BF 80..BF */
47 {
48 if (i + 2 < len)
49 /* Expect a 2nd and 3rd byte */
50 {
51 if (u8a[i + 1] < 0xA0 || u8a[i + 1] > 0xBF) {
52 // *message = "After a first byte of E0, expecting a 2nd byte between A0 and BF.";
53 // *faulty_bytes = 2;
54 return false;
55 }
56
57 if (u8a[i + 2] < 0x80 || u8a[i + 2] > 0xBF) {
58 // *message = "After a first byte of E0, expecting a 3nd byte between 80 and BF.";
59 // *faulty_bytes = 3;
60 return false;
61 }
62 } else {
63 // *message = "After a first byte of E0, expecting two following bytes.";
64 // *faulty_bytes = 1;
65 return false;
66 }
67
68 i += 3;
69 } else if (u8a[i] >= 0xE1 && u8a[i] <= 0xEC)
70 /* E1..EC 80..BF 80..BF */
71 {
72 if (i + 2 < len)
73 /* Expect a 2nd and 3rd byte */
74 {
75 if (u8a[i + 1] < 0x80 || u8a[i + 1] > 0xBF) {
76 // *message = "After a first byte between E1 and EC, expecting the 2nd byte between 80 and BF.";
77 // *faulty_bytes = 2;
78 return false;
79 }
80
81 if (u8a[i + 2] < 0x80 || u8a[i + 2] > 0xBF) {
82 // *message = "After a first byte between E1 and EC, expecting the 3rd byte between 80 and BF.";
83 // *faulty_bytes = 3;
84 return false;
85 }
86 } else {
87 // *message = "After a first byte between E1 and EC, expecting two following bytes.";
88 // *faulty_bytes = 1;
89 return false;
90 }
91
92 i += 3;
93 } else if (u8a[i] === 0xED)
94 /* ED 80..9F 80..BF */
95 {
96 if (i + 2 < len)
97 /* Expect a 2nd and 3rd byte */
98 {
99 if (u8a[i + 1] < 0x80 || u8a[i + 1] > 0x9F) {
100 // *message = "After a first byte of ED, expecting 2nd byte between 80 and 9F.";
101 // *faulty_bytes = 2;
102 return false;
103 }
104
105 if (u8a[i + 2] < 0x80 || u8a[i + 2] > 0xBF) {
106 // *message = "After a first byte of ED, expecting 3rd byte between 80 and BF.";
107 // *faulty_bytes = 3;
108 return false;
109 }
110 } else {
111 // *message = "After a first byte of ED, expecting two following bytes.";
112 // *faulty_bytes = 1;
113 return false;
114 }
115
116 i += 3;
117 } else if (u8a[i] >= 0xEE && u8a[i] <= 0xEF)
118 /* EE..EF 80..BF 80..BF */
119 {
120 if (i + 2 < len)
121 /* Expect a 2nd and 3rd byte */
122 {
123 if (u8a[i + 1] < 0x80 || u8a[i + 1] > 0xBF) {
124 // *message = "After a first byte between EE and EF, expecting 2nd byte between 80 and BF.";
125 // *faulty_bytes = 2;
126 return false;
127 }
128
129 if (u8a[i + 2] < 0x80 || u8a[i + 2] > 0xBF) {
130 // *message = "After a first byte between EE and EF, expecting 3rd byte between 80 and BF.";
131 // *faulty_bytes = 3;
132 return false;
133 }
134 } else {
135 // *message = "After a first byte between EE and EF, two following bytes.";
136 // *faulty_bytes = 1;
137 return false;
138 }
139
140 i += 3;
141 } else if (u8a[i] === 0xF0)
142 /* F0 90..BF 80..BF 80..BF */
143 {
144 if (i + 3 < len)
145 /* Expect a 2nd, 3rd 3th byte */
146 {
147 if (u8a[i + 1] < 0x90 || u8a[i + 1] > 0xBF) {
148 // *message = "After a first byte of F0, expecting 2nd byte between 90 and BF.";
149 // *faulty_bytes = 2;
150 return false;
151 }
152
153 if (u8a[i + 2] < 0x80 || u8a[i + 2] > 0xBF) {
154 // *message = "After a first byte of F0, expecting 3rd byte between 80 and BF.";
155 // *faulty_bytes = 3;
156 return false;
157 }
158
159 if (u8a[i + 3] < 0x80 || u8a[i + 3] > 0xBF) {
160 // *message = "After a first byte of F0, expecting 4th byte between 80 and BF.";
161 // *faulty_bytes = 4;
162 return false;
163 }
164 } else {
165 // *message = "After a first byte of F0, expecting three following bytes.";
166 // *faulty_bytes = 1;
167 return false;
168 }
169
170 i += 4;
171 } else if (u8a[i] >= 0xF1 && u8a[i] <= 0xF3)
172 /* F1..F3 80..BF 80..BF 80..BF */
173 {
174 if (i + 3 < len)
175 /* Expect a 2nd, 3rd 3th byte */
176 {
177 if (u8a[i + 1] < 0x80 || u8a[i + 1] > 0xBF) {
178 // *message = "After a first byte of F1, F2, or F3, expecting a 2nd byte between 80 and BF.";
179 // *faulty_bytes = 2;
180 return false;
181 }
182
183 if (u8a[i + 2] < 0x80 || u8a[i + 2] > 0xBF) {
184 // *message = "After a first byte of F1, F2, or F3, expecting a 3rd byte between 80 and BF.";
185 // *faulty_bytes = 3;
186 return false;
187 }
188
189 if (u8a[i + 3] < 0x80 || u8a[i + 3] > 0xBF) {
190 // *message = "After a first byte of F1, F2, or F3, expecting a 4th byte between 80 and BF.";
191 // *faulty_bytes = 4;
192 return false;
193 }
194 } else {
195 // *message = "After a first byte of F1, F2, or F3, expecting three following bytes.";
196 // *faulty_bytes = 1;
197 return false;
198 }
199
200 i += 4;
201 } else if (u8a[i] === 0xF4)
202 /* F4 80..8F 80..BF 80..BF */
203 {
204 if (i + 3 < len)
205 /* Expect a 2nd, 3rd 3th byte */
206 {
207 if (u8a[i + 1] < 0x80 || u8a[i + 1] > 0x8F) {
208 // *message = "After a first byte of F4, expecting 2nd byte between 80 and 8F.";
209 // *faulty_bytes = 2;
210 return false;
211 }
212
213 if (u8a[i + 2] < 0x80 || u8a[i + 2] > 0xBF) {
214 // *message = "After a first byte of F4, expecting 3rd byte between 80 and BF.";
215 // *faulty_bytes = 3;
216 return false;
217 }
218
219 if (u8a[i + 3] < 0x80 || u8a[i + 3] > 0xBF) {
220 // *message = "After a first byte of F4, expecting 4th byte between 80 and BF.";
221 // *faulty_bytes = 4;
222 return false;
223 }
224 } else {
225 // *message = "After a first byte of F4, expecting three following bytes.";
226 // *faulty_bytes = 1;
227 return false;
228 }
229
230 i += 4;
231 } else {
232 // *message = "Expecting bytes in the following ranges: 00..7F C2..F4.";
233 // *faulty_bytes = 1;
234 return false;
235 }
236 }
237
238 return true;
239}
\No newline at end of file