1 | import { u8aToU8a } from '../u8a/toU8a.js';
|
2 | import { isString } from './string.js';
|
3 | /**
|
4 | * @name isUtf8
|
5 | * @summary Tests if the input is valid Utf8
|
6 | * @description
|
7 | * Checks to see if the input string or Uint8Array is valid Utf8
|
8 | */
|
9 | export function isUtf8(value) {
|
10 | if (!value) {
|
11 | return isString(value);
|
12 | }
|
13 | const u8a = u8aToU8a(value);
|
14 | const len = u8a.length;
|
15 | let i = 0;
|
16 | while (i < len) {
|
17 | if (u8a[i] <= 0x7F) /* 00..7F */ {
|
18 | i += 1;
|
19 | }
|
20 | else if (u8a[i] >= 0xC2 && u8a[i] <= 0xDF) /* C2..DF 80..BF */ {
|
21 | if (i + 1 < len) /* Expect a 2nd byte */ {
|
22 | if (u8a[i + 1] < 0x80 || u8a[i + 1] > 0xBF) {
|
23 | // *message = "After a first byte between C2 and DF, expecting a 2nd byte between 80 and BF";
|
24 | // *faulty_bytes = 2;
|
25 | return false;
|
26 | }
|
27 | }
|
28 | else {
|
29 | // *message = "After a first byte between C2 and DF, expecting a 2nd byte.";
|
30 | // *faulty_bytes = 1;
|
31 | return false;
|
32 | }
|
33 | i += 2;
|
34 | }
|
35 | else if (u8a[i] === 0xE0) /* E0 A0..BF 80..BF */ {
|
36 | if (i + 2 < len) /* Expect a 2nd and 3rd byte */ {
|
37 | if (u8a[i + 1] < 0xA0 || u8a[i + 1] > 0xBF) {
|
38 | // *message = "After a first byte of E0, expecting a 2nd byte between A0 and BF.";
|
39 | // *faulty_bytes = 2;
|
40 | return false;
|
41 | }
|
42 | if (u8a[i + 2] < 0x80 || u8a[i + 2] > 0xBF) {
|
43 | // *message = "After a first byte of E0, expecting a 3nd byte between 80 and BF.";
|
44 | // *faulty_bytes = 3;
|
45 | return false;
|
46 | }
|
47 | }
|
48 | else {
|
49 | // *message = "After a first byte of E0, expecting two following bytes.";
|
50 | // *faulty_bytes = 1;
|
51 | return false;
|
52 | }
|
53 | i += 3;
|
54 | }
|
55 | else if (u8a[i] >= 0xE1 && u8a[i] <= 0xEC) /* E1..EC 80..BF 80..BF */ {
|
56 | if (i + 2 < len) /* Expect a 2nd and 3rd byte */ {
|
57 | if (u8a[i + 1] < 0x80 || u8a[i + 1] > 0xBF) {
|
58 | // *message = "After a first byte between E1 and EC, expecting the 2nd byte between 80 and BF.";
|
59 | // *faulty_bytes = 2;
|
60 | return false;
|
61 | }
|
62 | if (u8a[i + 2] < 0x80 || u8a[i + 2] > 0xBF) {
|
63 | // *message = "After a first byte between E1 and EC, expecting the 3rd byte between 80 and BF.";
|
64 | // *faulty_bytes = 3;
|
65 | return false;
|
66 | }
|
67 | }
|
68 | else {
|
69 | // *message = "After a first byte between E1 and EC, expecting two following bytes.";
|
70 | // *faulty_bytes = 1;
|
71 | return false;
|
72 | }
|
73 | i += 3;
|
74 | }
|
75 | else if (u8a[i] === 0xED) /* ED 80..9F 80..BF */ {
|
76 | if (i + 2 < len) /* Expect a 2nd and 3rd byte */ {
|
77 | if (u8a[i + 1] < 0x80 || u8a[i + 1] > 0x9F) {
|
78 | // *message = "After a first byte of ED, expecting 2nd byte between 80 and 9F.";
|
79 | // *faulty_bytes = 2;
|
80 | return false;
|
81 | }
|
82 | if (u8a[i + 2] < 0x80 || u8a[i + 2] > 0xBF) {
|
83 | // *message = "After a first byte of ED, expecting 3rd byte between 80 and BF.";
|
84 | // *faulty_bytes = 3;
|
85 | return false;
|
86 | }
|
87 | }
|
88 | else {
|
89 | // *message = "After a first byte of ED, expecting two following bytes.";
|
90 | // *faulty_bytes = 1;
|
91 | return false;
|
92 | }
|
93 | i += 3;
|
94 | }
|
95 | else if (u8a[i] >= 0xEE && u8a[i] <= 0xEF) /* EE..EF 80..BF 80..BF */ {
|
96 | if (i + 2 < len) /* Expect a 2nd and 3rd byte */ {
|
97 | if (u8a[i + 1] < 0x80 || u8a[i + 1] > 0xBF) {
|
98 | // *message = "After a first byte between EE and EF, expecting 2nd byte between 80 and BF.";
|
99 | // *faulty_bytes = 2;
|
100 | return false;
|
101 | }
|
102 | if (u8a[i + 2] < 0x80 || u8a[i + 2] > 0xBF) {
|
103 | // *message = "After a first byte between EE and EF, expecting 3rd byte between 80 and BF.";
|
104 | // *faulty_bytes = 3;
|
105 | return false;
|
106 | }
|
107 | }
|
108 | else {
|
109 | // *message = "After a first byte between EE and EF, two following bytes.";
|
110 | // *faulty_bytes = 1;
|
111 | return false;
|
112 | }
|
113 | i += 3;
|
114 | }
|
115 | else if (u8a[i] === 0xF0) /* F0 90..BF 80..BF 80..BF */ {
|
116 | if (i + 3 < len) /* Expect a 2nd, 3rd 3th byte */ {
|
117 | if (u8a[i + 1] < 0x90 || u8a[i + 1] > 0xBF) {
|
118 | // *message = "After a first byte of F0, expecting 2nd byte between 90 and BF.";
|
119 | // *faulty_bytes = 2;
|
120 | return false;
|
121 | }
|
122 | if (u8a[i + 2] < 0x80 || u8a[i + 2] > 0xBF) {
|
123 | // *message = "After a first byte of F0, expecting 3rd byte between 80 and BF.";
|
124 | // *faulty_bytes = 3;
|
125 | return false;
|
126 | }
|
127 | if (u8a[i + 3] < 0x80 || u8a[i + 3] > 0xBF) {
|
128 | // *message = "After a first byte of F0, expecting 4th byte between 80 and BF.";
|
129 | // *faulty_bytes = 4;
|
130 | return false;
|
131 | }
|
132 | }
|
133 | else {
|
134 | // *message = "After a first byte of F0, expecting three following bytes.";
|
135 | // *faulty_bytes = 1;
|
136 | return false;
|
137 | }
|
138 | i += 4;
|
139 | }
|
140 | else if (u8a[i] >= 0xF1 && u8a[i] <= 0xF3) /* F1..F3 80..BF 80..BF 80..BF */ {
|
141 | if (i + 3 < len) /* Expect a 2nd, 3rd 3th byte */ {
|
142 | if (u8a[i + 1] < 0x80 || u8a[i + 1] > 0xBF) {
|
143 | // *message = "After a first byte of F1, F2, or F3, expecting a 2nd byte between 80 and BF.";
|
144 | // *faulty_bytes = 2;
|
145 | return false;
|
146 | }
|
147 | if (u8a[i + 2] < 0x80 || u8a[i + 2] > 0xBF) {
|
148 | // *message = "After a first byte of F1, F2, or F3, expecting a 3rd byte between 80 and BF.";
|
149 | // *faulty_bytes = 3;
|
150 | return false;
|
151 | }
|
152 | if (u8a[i + 3] < 0x80 || u8a[i + 3] > 0xBF) {
|
153 | // *message = "After a first byte of F1, F2, or F3, expecting a 4th byte between 80 and BF.";
|
154 | // *faulty_bytes = 4;
|
155 | return false;
|
156 | }
|
157 | }
|
158 | else {
|
159 | // *message = "After a first byte of F1, F2, or F3, expecting three following bytes.";
|
160 | // *faulty_bytes = 1;
|
161 | return false;
|
162 | }
|
163 | i += 4;
|
164 | }
|
165 | else if (u8a[i] === 0xF4) /* F4 80..8F 80..BF 80..BF */ {
|
166 | if (i + 3 < len) /* Expect a 2nd, 3rd 3th byte */ {
|
167 | if (u8a[i + 1] < 0x80 || u8a[i + 1] > 0x8F) {
|
168 | // *message = "After a first byte of F4, expecting 2nd byte between 80 and 8F.";
|
169 | // *faulty_bytes = 2;
|
170 | return false;
|
171 | }
|
172 | if (u8a[i + 2] < 0x80 || u8a[i + 2] > 0xBF) {
|
173 | // *message = "After a first byte of F4, expecting 3rd byte between 80 and BF.";
|
174 | // *faulty_bytes = 3;
|
175 | return false;
|
176 | }
|
177 | if (u8a[i + 3] < 0x80 || u8a[i + 3] > 0xBF) {
|
178 | // *message = "After a first byte of F4, expecting 4th byte between 80 and BF.";
|
179 | // *faulty_bytes = 4;
|
180 | return false;
|
181 | }
|
182 | }
|
183 | else {
|
184 | // *message = "After a first byte of F4, expecting three following bytes.";
|
185 | // *faulty_bytes = 1;
|
186 | return false;
|
187 | }
|
188 | i += 4;
|
189 | }
|
190 | else {
|
191 | // *message = "Expecting bytes in the following ranges: 00..7F C2..F4.";
|
192 | // *faulty_bytes = 1;
|
193 | return false;
|
194 | }
|
195 | }
|
196 | return true;
|
197 | } |
\ | No newline at end of file |