Press n or j to go to the next uncovered block, b, p or k for the previous block.
| 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 | 4x 4x 4x 19x 19x 572x 572x 19x 23x 510x 2x 19x 19x 46x 19x 19x 48x 17x 17x 31x 31x 8x 8x 8x 23x 23x 2x 2x 21x 21x 17x 4x | // https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/String/Trim
const SPACE_REGEX = '\\s\\uFEFF\\xA0';
// https://remarkablemark.org/blog/2019/09/28/javascript-remove-punctuation/
const DEFAULT_PUNCTUATION_REGEX = '!"#$%&\'()*+,-./:;<=>?@[\\]^_`{|}~';
interface Option {
maxLength?: number;
splitPunct?: string;
}
/**
* split the long text to short texts
* Time Complexity: O(n)
*
* @param {string} text
* @param {object?} option
* @param {number?} option.maxLength default is 200
* @param {string?} option.splitPunct default is ''
* @returns {string[]} short text list
*/
const splitLongText = (
text: string,
{ maxLength = 200, splitPunct = '' }: Option = {}
): string[] => {
const isSpaceOrPunct = (s: string, i: number) => {
const regex = new RegExp('[' + SPACE_REGEX + DEFAULT_PUNCTUATION_REGEX + splitPunct + ']');
return regex.test(s.charAt(i));
};
const lastIndexOfSpaceOrPunct = (s: string, left: number, right: number): number => {
for (let i = right; i >= left; i--) {
if (isSpaceOrPunct(s, i)) return i;
}
return -1; // not found
};
const result: string[] = [];
const addResult = (text: string, start: number, end: number) => {
result.push(text.slice(start, end + 1));
};
let start = 0;
for (;;) {
// check text's length
if (text.length - start <= maxLength) {
addResult(text, start, text.length - 1);
break; // end of text
}
// check whether the word is cut in the middle.
let end = start + maxLength - 1;
if (isSpaceOrPunct(text, end) || isSpaceOrPunct(text, end + 1)) {
addResult(text, start, end);
start = end + 1;
continue;
}
// find last index of space
end = lastIndexOfSpaceOrPunct(text, start, end);
if (end === -1) {
const str = text.slice(start, start + maxLength);
throw new Error(
'The word is too long to split into a short text:' +
`\n${str} ...` +
'\n\nTry the option "splitPunct" to split the text by punctuation.'
);
}
// add result
addResult(text, start, end);
start = end + 1;
}
return result;
};
export default splitLongText;
|