All files splitLongText.ts

100% Statements 38/38
100% Branches 12/12
100% Functions 4/4
100% Lines 32/32

Press n or j to go to the next uncovered block, b, p or k for the previous block.

1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79  4x     4x                                 4x   19x   19x 572x 572x     19x 23x 510x   2x     19x 19x 46x     19x 19x   48x 17x 17x       31x 31x 8x 8x 8x       23x 23x 2x 2x               21x 21x     17x     4x  
// https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/String/Trim
const SPACE_REGEX = '\\s\\uFEFF\\xA0';
 
// https://remarkablemark.org/blog/2019/09/28/javascript-remove-punctuation/
const DEFAULT_PUNCTUATION_REGEX = '!"#$%&\'()*+,-./:;<=>?@[\\]^_`{|}~';
 
interface Option {
  maxLength?: number;
  splitPunct?: string;
}
 
/**
 * split the long text to short texts
 * Time Complexity: O(n)
 *
 * @param {string}  text
 * @param {object?} option
 * @param {number?} option.maxLength  default is 200
 * @param {string?} option.splitPunct default is ''
 * @returns {string[]} short text list
 */
const splitLongText = (
    text: string,
    { maxLength = 200, splitPunct = '' }: Option = {}
): string[] => {
  const isSpaceOrPunct = (s: string, i: number) => {
    const regex = new RegExp('[' + SPACE_REGEX + DEFAULT_PUNCTUATION_REGEX + splitPunct + ']');
    return regex.test(s.charAt(i));
  };
 
  const lastIndexOfSpaceOrPunct = (s: string, left: number, right: number): number => {
    for (let i = right; i >= left; i--) {
      if (isSpaceOrPunct(s, i)) return i;
    }
    return -1; // not found
  };
 
  const result: string[] = [];
  const addResult = (text: string, start: number, end: number) => {
    result.push(text.slice(start, end + 1));
  };
 
  let start = 0;
  for (;;) {
    // check text's length
    if (text.length - start <= maxLength) {
      addResult(text, start, text.length - 1);
      break; // end of text
    }
 
    // check whether the word is cut in the middle.
    let end = start + maxLength - 1;
    if (isSpaceOrPunct(text, end) || isSpaceOrPunct(text, end + 1)) {
      addResult(text, start, end);
      start = end + 1;
      continue;
    }
 
    // find last index of space
    end = lastIndexOfSpaceOrPunct(text, start, end);
    if (end === -1) {
      const str = text.slice(start, start + maxLength);
      throw new Error(
          'The word is too long to split into a short text:' +
          `\n${str} ...` +
          '\n\nTry the option "splitPunct" to split the text by punctuation.'
      );
    }
 
    // add result
    addResult(text, start, end);
    start = end + 1;
  }
 
  return result;
};
 
export default splitLongText;