UNPKG

20.7 kBJavaScriptView Raw
1'use strict';
2Object.defineProperty(exports, "__esModule", { value: true });
3exports.LineBreaker = exports.inlineBreakOpportunities = exports.lineBreakAtIndex = exports.codePointsToCharacterClasses = exports.UnicodeTrie = exports.BREAK_ALLOWED = exports.BREAK_NOT_ALLOWED = exports.BREAK_MANDATORY = exports.classes = exports.LETTER_NUMBER_MODIFIER = void 0;
4var utrie_1 = require("utrie");
5var linebreak_trie_1 = require("./linebreak-trie");
6var Util_1 = require("./Util");
7exports.LETTER_NUMBER_MODIFIER = 50;
8// Non-tailorable Line Breaking Classes
9var BK = 1; // Cause a line break (after)
10var CR = 2; // Cause a line break (after), except between CR and LF
11var LF = 3; // Cause a line break (after)
12var CM = 4; // Prohibit a line break between the character and the preceding character
13var NL = 5; // Cause a line break (after)
14var SG = 6; // Do not occur in well-formed text
15var WJ = 7; // Prohibit line breaks before and after
16var ZW = 8; // Provide a break opportunity
17var GL = 9; // Prohibit line breaks before and after
18var SP = 10; // Enable indirect line breaks
19var ZWJ = 11; // Prohibit line breaks within joiner sequences
20// Break Opportunities
21var B2 = 12; // Provide a line break opportunity before and after the character
22var BA = 13; // Generally provide a line break opportunity after the character
23var BB = 14; // Generally provide a line break opportunity before the character
24var HY = 15; // Provide a line break opportunity after the character, except in numeric context
25var CB = 16; // Provide a line break opportunity contingent on additional information
26// Characters Prohibiting Certain Breaks
27var CL = 17; // Prohibit line breaks before
28var CP = 18; // Prohibit line breaks before
29var EX = 19; // Prohibit line breaks before
30var IN = 20; // Allow only indirect line breaks between pairs
31var NS = 21; // Allow only indirect line breaks before
32var OP = 22; // Prohibit line breaks after
33var QU = 23; // Act like they are both opening and closing
34// Numeric Context
35var IS = 24; // Prevent breaks after any and before numeric
36var NU = 25; // Form numeric expressions for line breaking purposes
37var PO = 26; // Do not break following a numeric expression
38var PR = 27; // Do not break in front of a numeric expression
39var SY = 28; // Prevent a break before; and allow a break after
40// Other Characters
41var AI = 29; // Act like AL when the resolvedEAW is N; otherwise; act as ID
42var AL = 30; // Are alphabetic characters or symbols that are used with alphabetic characters
43var CJ = 31; // Treat as NS or ID for strict or normal breaking.
44var EB = 32; // Do not break from following Emoji Modifier
45var EM = 33; // Do not break from preceding Emoji Base
46var H2 = 34; // Form Korean syllable blocks
47var H3 = 35; // Form Korean syllable blocks
48var HL = 36; // Do not break around a following hyphen; otherwise act as Alphabetic
49var ID = 37; // Break before or after; except in some numeric context
50var JL = 38; // Form Korean syllable blocks
51var JV = 39; // Form Korean syllable blocks
52var JT = 40; // Form Korean syllable blocks
53var RI = 41; // Keep pairs together. For pairs; break before and after other classes
54var SA = 42; // Provide a line break opportunity contingent on additional, language-specific context analysis
55var XX = 43; // Have as yet unknown line breaking behavior or unassigned code positions
56var ea_OP = [0x2329, 0xff08];
57exports.classes = {
58 BK: BK,
59 CR: CR,
60 LF: LF,
61 CM: CM,
62 NL: NL,
63 SG: SG,
64 WJ: WJ,
65 ZW: ZW,
66 GL: GL,
67 SP: SP,
68 ZWJ: ZWJ,
69 B2: B2,
70 BA: BA,
71 BB: BB,
72 HY: HY,
73 CB: CB,
74 CL: CL,
75 CP: CP,
76 EX: EX,
77 IN: IN,
78 NS: NS,
79 OP: OP,
80 QU: QU,
81 IS: IS,
82 NU: NU,
83 PO: PO,
84 PR: PR,
85 SY: SY,
86 AI: AI,
87 AL: AL,
88 CJ: CJ,
89 EB: EB,
90 EM: EM,
91 H2: H2,
92 H3: H3,
93 HL: HL,
94 ID: ID,
95 JL: JL,
96 JV: JV,
97 JT: JT,
98 RI: RI,
99 SA: SA,
100 XX: XX,
101};
102exports.BREAK_MANDATORY = '!';
103exports.BREAK_NOT_ALLOWED = '×';
104exports.BREAK_ALLOWED = '÷';
105exports.UnicodeTrie = utrie_1.createTrieFromBase64(linebreak_trie_1.base64, linebreak_trie_1.byteLength);
106var ALPHABETICS = [AL, HL];
107var HARD_LINE_BREAKS = [BK, CR, LF, NL];
108var SPACE = [SP, ZW];
109var PREFIX_POSTFIX = [PR, PO];
110var LINE_BREAKS = HARD_LINE_BREAKS.concat(SPACE);
111var KOREAN_SYLLABLE_BLOCK = [JL, JV, JT, H2, H3];
112var HYPHEN = [HY, BA];
113var codePointsToCharacterClasses = function (codePoints, lineBreak) {
114 if (lineBreak === void 0) { lineBreak = 'strict'; }
115 var types = [];
116 var indices = [];
117 var categories = [];
118 codePoints.forEach(function (codePoint, index) {
119 var classType = exports.UnicodeTrie.get(codePoint);
120 if (classType > exports.LETTER_NUMBER_MODIFIER) {
121 categories.push(true);
122 classType -= exports.LETTER_NUMBER_MODIFIER;
123 }
124 else {
125 categories.push(false);
126 }
127 if (['normal', 'auto', 'loose'].indexOf(lineBreak) !== -1) {
128 // U+2010, – U+2013, 〜 U+301C, ゠ U+30A0
129 if ([0x2010, 0x2013, 0x301c, 0x30a0].indexOf(codePoint) !== -1) {
130 indices.push(index);
131 return types.push(CB);
132 }
133 }
134 if (classType === CM || classType === ZWJ) {
135 // LB10 Treat any remaining combining mark or ZWJ as AL.
136 if (index === 0) {
137 indices.push(index);
138 return types.push(AL);
139 }
140 // LB9 Do not break a combining character sequence; treat it as if it has the line breaking class of
141 // the base character in all of the following rules. Treat ZWJ as if it were CM.
142 var prev = types[index - 1];
143 if (LINE_BREAKS.indexOf(prev) === -1) {
144 indices.push(indices[index - 1]);
145 return types.push(prev);
146 }
147 indices.push(index);
148 return types.push(AL);
149 }
150 indices.push(index);
151 if (classType === CJ) {
152 return types.push(lineBreak === 'strict' ? NS : ID);
153 }
154 if (classType === SA) {
155 return types.push(AL);
156 }
157 if (classType === AI) {
158 return types.push(AL);
159 }
160 // For supplementary characters, a useful default is to treat characters in the range 10000..1FFFD as AL
161 // and characters in the ranges 20000..2FFFD and 30000..3FFFD as ID, until the implementation can be revised
162 // to take into account the actual line breaking properties for these characters.
163 if (classType === XX) {
164 if ((codePoint >= 0x20000 && codePoint <= 0x2fffd) || (codePoint >= 0x30000 && codePoint <= 0x3fffd)) {
165 return types.push(ID);
166 }
167 else {
168 return types.push(AL);
169 }
170 }
171 types.push(classType);
172 });
173 return [indices, types, categories];
174};
175exports.codePointsToCharacterClasses = codePointsToCharacterClasses;
176var isAdjacentWithSpaceIgnored = function (a, b, currentIndex, classTypes) {
177 var current = classTypes[currentIndex];
178 if (Array.isArray(a) ? a.indexOf(current) !== -1 : a === current) {
179 var i = currentIndex;
180 while (i <= classTypes.length) {
181 i++;
182 var next = classTypes[i];
183 if (next === b) {
184 return true;
185 }
186 if (next !== SP) {
187 break;
188 }
189 }
190 }
191 if (current === SP) {
192 var i = currentIndex;
193 while (i > 0) {
194 i--;
195 var prev = classTypes[i];
196 if (Array.isArray(a) ? a.indexOf(prev) !== -1 : a === prev) {
197 var n = currentIndex;
198 while (n <= classTypes.length) {
199 n++;
200 var next = classTypes[n];
201 if (next === b) {
202 return true;
203 }
204 if (next !== SP) {
205 break;
206 }
207 }
208 }
209 if (prev !== SP) {
210 break;
211 }
212 }
213 }
214 return false;
215};
216var previousNonSpaceClassType = function (currentIndex, classTypes) {
217 var i = currentIndex;
218 while (i >= 0) {
219 var type = classTypes[i];
220 if (type === SP) {
221 i--;
222 }
223 else {
224 return type;
225 }
226 }
227 return 0;
228};
229var _lineBreakAtIndex = function (codePoints, classTypes, indicies, index, forbiddenBreaks) {
230 if (indicies[index] === 0) {
231 return exports.BREAK_NOT_ALLOWED;
232 }
233 var currentIndex = index - 1;
234 if (Array.isArray(forbiddenBreaks) && forbiddenBreaks[currentIndex] === true) {
235 return exports.BREAK_NOT_ALLOWED;
236 }
237 var beforeIndex = currentIndex - 1;
238 var afterIndex = currentIndex + 1;
239 var current = classTypes[currentIndex];
240 // LB4 Always break after hard line breaks.
241 // LB5 Treat CR followed by LF, as well as CR, LF, and NL as hard line breaks.
242 var before = beforeIndex >= 0 ? classTypes[beforeIndex] : 0;
243 var next = classTypes[afterIndex];
244 if (current === CR && next === LF) {
245 return exports.BREAK_NOT_ALLOWED;
246 }
247 if (HARD_LINE_BREAKS.indexOf(current) !== -1) {
248 return exports.BREAK_MANDATORY;
249 }
250 // LB6 Do not break before hard line breaks.
251 if (HARD_LINE_BREAKS.indexOf(next) !== -1) {
252 return exports.BREAK_NOT_ALLOWED;
253 }
254 // LB7 Do not break before spaces or zero width space.
255 if (SPACE.indexOf(next) !== -1) {
256 return exports.BREAK_NOT_ALLOWED;
257 }
258 // LB8 Break before any character following a zero-width space, even if one or more spaces intervene.
259 if (previousNonSpaceClassType(currentIndex, classTypes) === ZW) {
260 return exports.BREAK_ALLOWED;
261 }
262 // LB8a Do not break after a zero width joiner.
263 if (exports.UnicodeTrie.get(codePoints[currentIndex]) === ZWJ) {
264 return exports.BREAK_NOT_ALLOWED;
265 }
266 // zwj emojis
267 if ((current === EB || current === EM) && exports.UnicodeTrie.get(codePoints[afterIndex]) === ZWJ) {
268 return exports.BREAK_NOT_ALLOWED;
269 }
270 // LB11 Do not break before or after Word joiner and related characters.
271 if (current === WJ || next === WJ) {
272 return exports.BREAK_NOT_ALLOWED;
273 }
274 // LB12 Do not break after NBSP and related characters.
275 if (current === GL) {
276 return exports.BREAK_NOT_ALLOWED;
277 }
278 // LB12a Do not break before NBSP and related characters, except after spaces and hyphens.
279 if ([SP, BA, HY].indexOf(current) === -1 && next === GL) {
280 return exports.BREAK_NOT_ALLOWED;
281 }
282 // LB13 Do not break before ‘]’ or ‘!’ or ‘;’ or ‘/’, even after spaces.
283 if ([CL, CP, EX, IS, SY].indexOf(next) !== -1) {
284 return exports.BREAK_NOT_ALLOWED;
285 }
286 // LB14 Do not break after ‘[’, even after spaces.
287 if (previousNonSpaceClassType(currentIndex, classTypes) === OP) {
288 return exports.BREAK_NOT_ALLOWED;
289 }
290 // LB15 Do not break within ‘”[’, even with intervening spaces.
291 if (isAdjacentWithSpaceIgnored(QU, OP, currentIndex, classTypes)) {
292 return exports.BREAK_NOT_ALLOWED;
293 }
294 // LB16 Do not break between closing punctuation and a nonstarter (lb=NS), even with intervening spaces.
295 if (isAdjacentWithSpaceIgnored([CL, CP], NS, currentIndex, classTypes)) {
296 return exports.BREAK_NOT_ALLOWED;
297 }
298 // LB17 Do not break within ‘——’, even with intervening spaces.
299 if (isAdjacentWithSpaceIgnored(B2, B2, currentIndex, classTypes)) {
300 return exports.BREAK_NOT_ALLOWED;
301 }
302 // LB18 Break after spaces.
303 if (current === SP) {
304 return exports.BREAK_ALLOWED;
305 }
306 // LB19 Do not break before or after quotation marks, such as ‘ ” ’.
307 if (current === QU || next === QU) {
308 return exports.BREAK_NOT_ALLOWED;
309 }
310 // LB20 Break before and after unresolved CB.
311 if (next === CB || current === CB) {
312 return exports.BREAK_ALLOWED;
313 }
314 // LB21 Do not break before hyphen-minus, other hyphens, fixed-width spaces, small kana, and other non-starters, or after acute accents.
315 if ([BA, HY, NS].indexOf(next) !== -1 || current === BB) {
316 return exports.BREAK_NOT_ALLOWED;
317 }
318 // LB21a Don't break after Hebrew + Hyphen.
319 if (before === HL && HYPHEN.indexOf(current) !== -1) {
320 return exports.BREAK_NOT_ALLOWED;
321 }
322 // LB21b Don’t break between Solidus and Hebrew letters.
323 if (current === SY && next === HL) {
324 return exports.BREAK_NOT_ALLOWED;
325 }
326 // LB22 Do not break before ellipsis.
327 if (next === IN) {
328 return exports.BREAK_NOT_ALLOWED;
329 }
330 // LB23 Do not break between digits and letters.
331 if ((ALPHABETICS.indexOf(next) !== -1 && current === NU) || (ALPHABETICS.indexOf(current) !== -1 && next === NU)) {
332 return exports.BREAK_NOT_ALLOWED;
333 }
334 // LB23a Do not break between numeric prefixes and ideographs, or between ideographs and numeric postfixes.
335 if ((current === PR && [ID, EB, EM].indexOf(next) !== -1) ||
336 ([ID, EB, EM].indexOf(current) !== -1 && next === PO)) {
337 return exports.BREAK_NOT_ALLOWED;
338 }
339 // LB24 Do not break between numeric prefix/postfix and letters, or between letters and prefix/postfix.
340 if ((ALPHABETICS.indexOf(current) !== -1 && PREFIX_POSTFIX.indexOf(next) !== -1) ||
341 (PREFIX_POSTFIX.indexOf(current) !== -1 && ALPHABETICS.indexOf(next) !== -1)) {
342 return exports.BREAK_NOT_ALLOWED;
343 }
344 // LB25 Do not break between the following pairs of classes relevant to numbers:
345 if (
346 // (PR | PO) × ( OP | HY )? NU
347 ([PR, PO].indexOf(current) !== -1 &&
348 (next === NU || ([OP, HY].indexOf(next) !== -1 && classTypes[afterIndex + 1] === NU))) ||
349 // ( OP | HY ) × NU
350 ([OP, HY].indexOf(current) !== -1 && next === NU) ||
351 // NU × (NU | SY | IS)
352 (current === NU && [NU, SY, IS].indexOf(next) !== -1)) {
353 return exports.BREAK_NOT_ALLOWED;
354 }
355 // NU (NU | SY | IS)* × (NU | SY | IS | CL | CP)
356 if ([NU, SY, IS, CL, CP].indexOf(next) !== -1) {
357 var prevIndex = currentIndex;
358 while (prevIndex >= 0) {
359 var type = classTypes[prevIndex];
360 if (type === NU) {
361 return exports.BREAK_NOT_ALLOWED;
362 }
363 else if ([SY, IS].indexOf(type) !== -1) {
364 prevIndex--;
365 }
366 else {
367 break;
368 }
369 }
370 }
371 // NU (NU | SY | IS)* (CL | CP)? × (PO | PR))
372 if ([PR, PO].indexOf(next) !== -1) {
373 var prevIndex = [CL, CP].indexOf(current) !== -1 ? beforeIndex : currentIndex;
374 while (prevIndex >= 0) {
375 var type = classTypes[prevIndex];
376 if (type === NU) {
377 return exports.BREAK_NOT_ALLOWED;
378 }
379 else if ([SY, IS].indexOf(type) !== -1) {
380 prevIndex--;
381 }
382 else {
383 break;
384 }
385 }
386 }
387 // LB26 Do not break a Korean syllable.
388 if ((JL === current && [JL, JV, H2, H3].indexOf(next) !== -1) ||
389 ([JV, H2].indexOf(current) !== -1 && [JV, JT].indexOf(next) !== -1) ||
390 ([JT, H3].indexOf(current) !== -1 && next === JT)) {
391 return exports.BREAK_NOT_ALLOWED;
392 }
393 // LB27 Treat a Korean Syllable Block the same as ID.
394 if ((KOREAN_SYLLABLE_BLOCK.indexOf(current) !== -1 && [IN, PO].indexOf(next) !== -1) ||
395 (KOREAN_SYLLABLE_BLOCK.indexOf(next) !== -1 && current === PR)) {
396 return exports.BREAK_NOT_ALLOWED;
397 }
398 // LB28 Do not break between alphabetics (“at”).
399 if (ALPHABETICS.indexOf(current) !== -1 && ALPHABETICS.indexOf(next) !== -1) {
400 return exports.BREAK_NOT_ALLOWED;
401 }
402 // LB29 Do not break between numeric punctuation and alphabetics (“e.g.”).
403 if (current === IS && ALPHABETICS.indexOf(next) !== -1) {
404 return exports.BREAK_NOT_ALLOWED;
405 }
406 // LB30 Do not break between letters, numbers, or ordinary symbols and opening or closing parentheses.
407 if ((ALPHABETICS.concat(NU).indexOf(current) !== -1 &&
408 next === OP &&
409 ea_OP.indexOf(codePoints[afterIndex]) === -1) ||
410 (ALPHABETICS.concat(NU).indexOf(next) !== -1 && current === CP)) {
411 return exports.BREAK_NOT_ALLOWED;
412 }
413 // LB30a Break between two regional indicator symbols if and only if there are an even number of regional
414 // indicators preceding the position of the break.
415 if (current === RI && next === RI) {
416 var i = indicies[currentIndex];
417 var count = 1;
418 while (i > 0) {
419 i--;
420 if (classTypes[i] === RI) {
421 count++;
422 }
423 else {
424 break;
425 }
426 }
427 if (count % 2 !== 0) {
428 return exports.BREAK_NOT_ALLOWED;
429 }
430 }
431 // LB30b Do not break between an emoji base and an emoji modifier.
432 if (current === EB && next === EM) {
433 return exports.BREAK_NOT_ALLOWED;
434 }
435 return exports.BREAK_ALLOWED;
436};
437var lineBreakAtIndex = function (codePoints, index) {
438 // LB2 Never break at the start of text.
439 if (index === 0) {
440 return exports.BREAK_NOT_ALLOWED;
441 }
442 // LB3 Always break at the end of text.
443 if (index >= codePoints.length) {
444 return exports.BREAK_MANDATORY;
445 }
446 var _a = exports.codePointsToCharacterClasses(codePoints), indices = _a[0], classTypes = _a[1];
447 return _lineBreakAtIndex(codePoints, classTypes, indices, index);
448};
449exports.lineBreakAtIndex = lineBreakAtIndex;
450var cssFormattedClasses = function (codePoints, options) {
451 if (!options) {
452 options = { lineBreak: 'normal', wordBreak: 'normal' };
453 }
454 var _a = exports.codePointsToCharacterClasses(codePoints, options.lineBreak), indicies = _a[0], classTypes = _a[1], isLetterNumber = _a[2];
455 if (options.wordBreak === 'break-all' || options.wordBreak === 'break-word') {
456 classTypes = classTypes.map(function (type) { return ([NU, AL, SA].indexOf(type) !== -1 ? ID : type); });
457 }
458 var forbiddenBreakpoints = options.wordBreak === 'keep-all'
459 ? isLetterNumber.map(function (letterNumber, i) {
460 return letterNumber && codePoints[i] >= 0x4e00 && codePoints[i] <= 0x9fff;
461 })
462 : undefined;
463 return [indicies, classTypes, forbiddenBreakpoints];
464};
465var inlineBreakOpportunities = function (str, options) {
466 var codePoints = Util_1.toCodePoints(str);
467 var output = exports.BREAK_NOT_ALLOWED;
468 var _a = cssFormattedClasses(codePoints, options), indicies = _a[0], classTypes = _a[1], forbiddenBreakpoints = _a[2];
469 codePoints.forEach(function (codePoint, i) {
470 output +=
471 Util_1.fromCodePoint(codePoint) +
472 (i >= codePoints.length - 1
473 ? exports.BREAK_MANDATORY
474 : _lineBreakAtIndex(codePoints, classTypes, indicies, i + 1, forbiddenBreakpoints));
475 });
476 return output;
477};
478exports.inlineBreakOpportunities = inlineBreakOpportunities;
479var Break = /** @class */ (function () {
480 function Break(codePoints, lineBreak, start, end) {
481 this.codePoints = codePoints;
482 this.required = lineBreak === exports.BREAK_MANDATORY;
483 this.start = start;
484 this.end = end;
485 }
486 Break.prototype.slice = function () {
487 return Util_1.fromCodePoint.apply(void 0, this.codePoints.slice(this.start, this.end));
488 };
489 return Break;
490}());
491var LineBreaker = function (str, options) {
492 var codePoints = Util_1.toCodePoints(str);
493 var _a = cssFormattedClasses(codePoints, options), indicies = _a[0], classTypes = _a[1], forbiddenBreakpoints = _a[2];
494 var length = codePoints.length;
495 var lastEnd = 0;
496 var nextIndex = 0;
497 return {
498 next: function () {
499 if (nextIndex >= length) {
500 return { done: true, value: null };
501 }
502 var lineBreak = exports.BREAK_NOT_ALLOWED;
503 while (nextIndex < length &&
504 (lineBreak = _lineBreakAtIndex(codePoints, classTypes, indicies, ++nextIndex, forbiddenBreakpoints)) ===
505 exports.BREAK_NOT_ALLOWED) { }
506 if (lineBreak !== exports.BREAK_NOT_ALLOWED || nextIndex === length) {
507 var value = new Break(codePoints, lineBreak, lastEnd, nextIndex);
508 lastEnd = nextIndex;
509 return { value: value, done: false };
510 }
511 return { done: true, value: null };
512 },
513 };
514};
515exports.LineBreaker = LineBreaker;
516//# sourceMappingURL=LineBreak.js.map
\No newline at end of file