1 | // Partial port of python's argparse module, version 3.9.0 (only wrap and fill functions):
|
2 | // https://github.com/python/cpython/blob/v3.9.0b4/Lib/textwrap.py
|
3 |
|
4 |
|
5 |
|
6 | /*
|
7 | * Text wrapping and filling.
|
8 | */
|
9 |
|
10 | // Copyright (C) 1999-2001 Gregory P. Ward.
|
11 | // Copyright (C) 2002, 2003 Python Software Foundation.
|
12 | // Copyright (C) 2020 argparse.js authors
|
13 | // Originally written by Greg Ward <gward@python.net>
|
14 |
|
15 | // Hardcode the recognized whitespace characters to the US-ASCII
|
16 | // whitespace characters. The main reason for doing this is that
|
17 | // some Unicode spaces (like \u00a0) are non-breaking whitespaces.
|
18 | //
|
19 | // This less funky little regex just split on recognized spaces. E.g.
|
20 | // "Hello there -- you goof-ball, use the -b option!"
|
21 | // splits into
|
22 | // Hello/ /there/ /--/ /you/ /goof-ball,/ /use/ /the/ /-b/ /option!/
|
23 | const wordsep_simple_re = /([\t\n\x0b\x0c\r ]+)/
|
24 |
|
25 | class TextWrapper {
|
26 | /*
|
27 | * Object for wrapping/filling text. The public interface consists of
|
28 | * the wrap() and fill() methods; the other methods are just there for
|
29 | * subclasses to override in order to tweak the default behaviour.
|
30 | * If you want to completely replace the main wrapping algorithm,
|
31 | * you'll probably have to override _wrap_chunks().
|
32 | *
|
33 | * Several instance attributes control various aspects of wrapping:
|
34 | * width (default: 70)
|
35 | * the maximum width of wrapped lines (unless break_long_words
|
36 | * is false)
|
37 | * initial_indent (default: "")
|
38 | * string that will be prepended to the first line of wrapped
|
39 | * output. Counts towards the line's width.
|
40 | * subsequent_indent (default: "")
|
41 | * string that will be prepended to all lines save the first
|
42 | * of wrapped output; also counts towards each line's width.
|
43 | * expand_tabs (default: true)
|
44 | * Expand tabs in input text to spaces before further processing.
|
45 | * Each tab will become 0 .. 'tabsize' spaces, depending on its position
|
46 | * in its line. If false, each tab is treated as a single character.
|
47 | * tabsize (default: 8)
|
48 | * Expand tabs in input text to 0 .. 'tabsize' spaces, unless
|
49 | * 'expand_tabs' is false.
|
50 | * replace_whitespace (default: true)
|
51 | * Replace all whitespace characters in the input text by spaces
|
52 | * after tab expansion. Note that if expand_tabs is false and
|
53 | * replace_whitespace is true, every tab will be converted to a
|
54 | * single space!
|
55 | * fix_sentence_endings (default: false)
|
56 | * Ensure that sentence-ending punctuation is always followed
|
57 | * by two spaces. Off by default because the algorithm is
|
58 | * (unavoidably) imperfect.
|
59 | * break_long_words (default: true)
|
60 | * Break words longer than 'width'. If false, those words will not
|
61 | * be broken, and some lines might be longer than 'width'.
|
62 | * break_on_hyphens (default: true)
|
63 | * Allow breaking hyphenated words. If true, wrapping will occur
|
64 | * preferably on whitespaces and right after hyphens part of
|
65 | * compound words.
|
66 | * drop_whitespace (default: true)
|
67 | * Drop leading and trailing whitespace from lines.
|
68 | * max_lines (default: None)
|
69 | * Truncate wrapped lines.
|
70 | * placeholder (default: ' [...]')
|
71 | * Append to the last line of truncated text.
|
72 | */
|
73 |
|
74 | constructor(options = {}) {
|
75 | let {
|
76 | width = 70,
|
77 | initial_indent = '',
|
78 | subsequent_indent = '',
|
79 | expand_tabs = true,
|
80 | replace_whitespace = true,
|
81 | fix_sentence_endings = false,
|
82 | break_long_words = true,
|
83 | drop_whitespace = true,
|
84 | break_on_hyphens = true,
|
85 | tabsize = 8,
|
86 | max_lines = undefined,
|
87 | placeholder=' [...]'
|
88 | } = options
|
89 |
|
90 | this.width = width
|
91 | this.initial_indent = initial_indent
|
92 | this.subsequent_indent = subsequent_indent
|
93 | this.expand_tabs = expand_tabs
|
94 | this.replace_whitespace = replace_whitespace
|
95 | this.fix_sentence_endings = fix_sentence_endings
|
96 | this.break_long_words = break_long_words
|
97 | this.drop_whitespace = drop_whitespace
|
98 | this.break_on_hyphens = break_on_hyphens
|
99 | this.tabsize = tabsize
|
100 | this.max_lines = max_lines
|
101 | this.placeholder = placeholder
|
102 | }
|
103 |
|
104 |
|
105 | // -- Private methods -----------------------------------------------
|
106 | // (possibly useful for subclasses to override)
|
107 |
|
108 | _munge_whitespace(text) {
|
109 | /*
|
110 | * _munge_whitespace(text : string) -> string
|
111 | *
|
112 | * Munge whitespace in text: expand tabs and convert all other
|
113 | * whitespace characters to spaces. Eg. " foo\\tbar\\n\\nbaz"
|
114 | * becomes " foo bar baz".
|
115 | */
|
116 | if (this.expand_tabs) {
|
117 | text = text.replace(/\t/g, ' '.repeat(this.tabsize)) // not strictly correct in js
|
118 | }
|
119 | if (this.replace_whitespace) {
|
120 | text = text.replace(/[\t\n\x0b\x0c\r]/g, ' ')
|
121 | }
|
122 | return text
|
123 | }
|
124 |
|
125 | _split(text) {
|
126 | /*
|
127 | * _split(text : string) -> [string]
|
128 | *
|
129 | * Split the text to wrap into indivisible chunks. Chunks are
|
130 | * not quite the same as words; see _wrap_chunks() for full
|
131 | * details. As an example, the text
|
132 | * Look, goof-ball -- use the -b option!
|
133 | * breaks into the following chunks:
|
134 | * 'Look,', ' ', 'goof-', 'ball', ' ', '--', ' ',
|
135 | * 'use', ' ', 'the', ' ', '-b', ' ', 'option!'
|
136 | * if break_on_hyphens is True, or in:
|
137 | * 'Look,', ' ', 'goof-ball', ' ', '--', ' ',
|
138 | * 'use', ' ', 'the', ' ', '-b', ' ', option!'
|
139 | * otherwise.
|
140 | */
|
141 | let chunks = text.split(wordsep_simple_re)
|
142 | chunks = chunks.filter(Boolean)
|
143 | return chunks
|
144 | }
|
145 |
|
146 | _handle_long_word(reversed_chunks, cur_line, cur_len, width) {
|
147 | /*
|
148 | * _handle_long_word(chunks : [string],
|
149 | * cur_line : [string],
|
150 | * cur_len : int, width : int)
|
151 | *
|
152 | * Handle a chunk of text (most likely a word, not whitespace) that
|
153 | * is too long to fit in any line.
|
154 | */
|
155 | // Figure out when indent is larger than the specified width, and make
|
156 | // sure at least one character is stripped off on every pass
|
157 | let space_left
|
158 | if (width < 1) {
|
159 | space_left = 1
|
160 | } else {
|
161 | space_left = width - cur_len
|
162 | }
|
163 |
|
164 | // If we're allowed to break long words, then do so: put as much
|
165 | // of the next chunk onto the current line as will fit.
|
166 | if (this.break_long_words) {
|
167 | cur_line.push(reversed_chunks[reversed_chunks.length - 1].slice(0, space_left))
|
168 | reversed_chunks[reversed_chunks.length - 1] = reversed_chunks[reversed_chunks.length - 1].slice(space_left)
|
169 |
|
170 | // Otherwise, we have to preserve the long word intact. Only add
|
171 | // it to the current line if there's nothing already there --
|
172 | // that minimizes how much we violate the width constraint.
|
173 | } else if (!cur_line) {
|
174 | cur_line.push(...reversed_chunks.pop())
|
175 | }
|
176 |
|
177 | // If we're not allowed to break long words, and there's already
|
178 | // text on the current line, do nothing. Next time through the
|
179 | // main loop of _wrap_chunks(), we'll wind up here again, but
|
180 | // cur_len will be zero, so the next line will be entirely
|
181 | // devoted to the long word that we can't handle right now.
|
182 | }
|
183 |
|
184 | _wrap_chunks(chunks) {
|
185 | /*
|
186 | * _wrap_chunks(chunks : [string]) -> [string]
|
187 | *
|
188 | * Wrap a sequence of text chunks and return a list of lines of
|
189 | * length 'self.width' or less. (If 'break_long_words' is false,
|
190 | * some lines may be longer than this.) Chunks correspond roughly
|
191 | * to words and the whitespace between them: each chunk is
|
192 | * indivisible (modulo 'break_long_words'), but a line break can
|
193 | * come between any two chunks. Chunks should not have internal
|
194 | * whitespace; ie. a chunk is either all whitespace or a "word".
|
195 | * Whitespace chunks will be removed from the beginning and end of
|
196 | * lines, but apart from that whitespace is preserved.
|
197 | */
|
198 | let lines = []
|
199 | let indent
|
200 | if (this.width <= 0) {
|
201 | throw Error(`invalid width ${this.width} (must be > 0)`)
|
202 | }
|
203 | if (this.max_lines !== undefined) {
|
204 | if (this.max_lines > 1) {
|
205 | indent = this.subsequent_indent
|
206 | } else {
|
207 | indent = this.initial_indent
|
208 | }
|
209 | if (indent.length + this.placeholder.trimStart().length > this.width) {
|
210 | throw Error('placeholder too large for max width')
|
211 | }
|
212 | }
|
213 |
|
214 | // Arrange in reverse order so items can be efficiently popped
|
215 | // from a stack of chucks.
|
216 | chunks = chunks.reverse()
|
217 |
|
218 | while (chunks.length > 0) {
|
219 |
|
220 | // Start the list of chunks that will make up the current line.
|
221 | // cur_len is just the length of all the chunks in cur_line.
|
222 | let cur_line = []
|
223 | let cur_len = 0
|
224 |
|
225 | // Figure out which static string will prefix this line.
|
226 | let indent
|
227 | if (lines) {
|
228 | indent = this.subsequent_indent
|
229 | } else {
|
230 | indent = this.initial_indent
|
231 | }
|
232 |
|
233 | // Maximum width for this line.
|
234 | let width = this.width - indent.length
|
235 |
|
236 | // First chunk on line is whitespace -- drop it, unless this
|
237 | // is the very beginning of the text (ie. no lines started yet).
|
238 | if (this.drop_whitespace && chunks[chunks.length - 1].trim() === '' && lines.length > 0) {
|
239 | chunks.pop()
|
240 | }
|
241 |
|
242 | while (chunks.length > 0) {
|
243 | let l = chunks[chunks.length - 1].length
|
244 |
|
245 | // Can at least squeeze this chunk onto the current line.
|
246 | if (cur_len + l <= width) {
|
247 | cur_line.push(chunks.pop())
|
248 | cur_len += l
|
249 |
|
250 | // Nope, this line is full.
|
251 | } else {
|
252 | break
|
253 | }
|
254 | }
|
255 |
|
256 | // The current line is full, and the next chunk is too big to
|
257 | // fit on *any* line (not just this one).
|
258 | if (chunks.length && chunks[chunks.length - 1].length > width) {
|
259 | this._handle_long_word(chunks, cur_line, cur_len, width)
|
260 | cur_len = cur_line.map(l => l.length).reduce((a, b) => a + b, 0)
|
261 | }
|
262 |
|
263 | // If the last chunk on this line is all whitespace, drop it.
|
264 | if (this.drop_whitespace && cur_line.length > 0 && cur_line[cur_line.length - 1].trim() === '') {
|
265 | cur_len -= cur_line[cur_line.length - 1].length
|
266 | cur_line.pop()
|
267 | }
|
268 |
|
269 | if (cur_line) {
|
270 | if (this.max_lines === undefined ||
|
271 | lines.length + 1 < this.max_lines ||
|
272 | (chunks.length === 0 ||
|
273 | this.drop_whitespace &&
|
274 | chunks.length === 1 &&
|
275 | !chunks[0].trim()) && cur_len <= width) {
|
276 | // Convert current line back to a string and store it in
|
277 | // list of all lines (return value).
|
278 | lines.push(indent + cur_line.join(''))
|
279 | } else {
|
280 | let had_break = false
|
281 | while (cur_line) {
|
282 | if (cur_line[cur_line.length - 1].trim() &&
|
283 | cur_len + this.placeholder.length <= width) {
|
284 | cur_line.push(this.placeholder)
|
285 | lines.push(indent + cur_line.join(''))
|
286 | had_break = true
|
287 | break
|
288 | }
|
289 | cur_len -= cur_line[-1].length
|
290 | cur_line.pop()
|
291 | }
|
292 | if (!had_break) {
|
293 | if (lines) {
|
294 | let prev_line = lines[lines.length - 1].trimEnd()
|
295 | if (prev_line.length + this.placeholder.length <=
|
296 | this.width) {
|
297 | lines[lines.length - 1] = prev_line + this.placeholder
|
298 | break
|
299 | }
|
300 | }
|
301 | lines.push(indent + this.placeholder.lstrip())
|
302 | }
|
303 | break
|
304 | }
|
305 | }
|
306 | }
|
307 |
|
308 | return lines
|
309 | }
|
310 |
|
311 | _split_chunks(text) {
|
312 | text = this._munge_whitespace(text)
|
313 | return this._split(text)
|
314 | }
|
315 |
|
316 | // -- Public interface ----------------------------------------------
|
317 |
|
318 | wrap(text) {
|
319 | /*
|
320 | * wrap(text : string) -> [string]
|
321 | *
|
322 | * Reformat the single paragraph in 'text' so it fits in lines of
|
323 | * no more than 'self.width' columns, and return a list of wrapped
|
324 | * lines. Tabs in 'text' are expanded with string.expandtabs(),
|
325 | * and all other whitespace characters (including newline) are
|
326 | * converted to space.
|
327 | */
|
328 | let chunks = this._split_chunks(text)
|
329 | // not implemented in js
|
330 | //if (this.fix_sentence_endings) {
|
331 | // this._fix_sentence_endings(chunks)
|
332 | //}
|
333 | return this._wrap_chunks(chunks)
|
334 | }
|
335 |
|
336 | fill(text) {
|
337 | /*
|
338 | * fill(text : string) -> string
|
339 | *
|
340 | * Reformat the single paragraph in 'text' to fit in lines of no
|
341 | * more than 'self.width' columns, and return a new string
|
342 | * containing the entire wrapped paragraph.
|
343 | */
|
344 | return this.wrap(text).join('\n')
|
345 | }
|
346 | }
|
347 |
|
348 |
|
349 | // -- Convenience interface ---------------------------------------------
|
350 |
|
351 | function wrap(text, options = {}) {
|
352 | /*
|
353 | * Wrap a single paragraph of text, returning a list of wrapped lines.
|
354 | *
|
355 | * Reformat the single paragraph in 'text' so it fits in lines of no
|
356 | * more than 'width' columns, and return a list of wrapped lines. By
|
357 | * default, tabs in 'text' are expanded with string.expandtabs(), and
|
358 | * all other whitespace characters (including newline) are converted to
|
359 | * space. See TextWrapper class for available keyword args to customize
|
360 | * wrapping behaviour.
|
361 | */
|
362 | let { width = 70, ...kwargs } = options
|
363 | let w = new TextWrapper(Object.assign({ width }, kwargs))
|
364 | return w.wrap(text)
|
365 | }
|
366 |
|
367 | function fill(text, options = {}) {
|
368 | /*
|
369 | * Fill a single paragraph of text, returning a new string.
|
370 | *
|
371 | * Reformat the single paragraph in 'text' to fit in lines of no more
|
372 | * than 'width' columns, and return a new string containing the entire
|
373 | * wrapped paragraph. As with wrap(), tabs are expanded and other
|
374 | * whitespace characters converted to space. See TextWrapper class for
|
375 | * available keyword args to customize wrapping behaviour.
|
376 | */
|
377 | let { width = 70, ...kwargs } = options
|
378 | let w = new TextWrapper(Object.assign({ width }, kwargs))
|
379 | return w.fill(text)
|
380 | }
|
381 |
|
382 | // -- Loosely related functionality -------------------------------------
|
383 |
|
384 | let _whitespace_only_re = /^[ \t]+$/mg
|
385 | let _leading_whitespace_re = /(^[ \t]*)(?:[^ \t\n])/mg
|
386 |
|
387 | function dedent(text) {
|
388 | /*
|
389 | * Remove any common leading whitespace from every line in `text`.
|
390 | *
|
391 | * This can be used to make triple-quoted strings line up with the left
|
392 | * edge of the display, while still presenting them in the source code
|
393 | * in indented form.
|
394 | *
|
395 | * Note that tabs and spaces are both treated as whitespace, but they
|
396 | * are not equal: the lines " hello" and "\\thello" are
|
397 | * considered to have no common leading whitespace.
|
398 | *
|
399 | * Entirely blank lines are normalized to a newline character.
|
400 | */
|
401 | // Look for the longest leading string of spaces and tabs common to
|
402 | // all lines.
|
403 | let margin = undefined
|
404 | text = text.replace(_whitespace_only_re, '')
|
405 | let indents = text.match(_leading_whitespace_re) || []
|
406 | for (let indent of indents) {
|
407 | indent = indent.slice(0, -1)
|
408 |
|
409 | if (margin === undefined) {
|
410 | margin = indent
|
411 |
|
412 | // Current line more deeply indented than previous winner:
|
413 | // no change (previous winner is still on top).
|
414 | } else if (indent.startsWith(margin)) {
|
415 | // pass
|
416 |
|
417 | // Current line consistent with and no deeper than previous winner:
|
418 | // it's the new winner.
|
419 | } else if (margin.startsWith(indent)) {
|
420 | margin = indent
|
421 |
|
422 | // Find the largest common whitespace between current line and previous
|
423 | // winner.
|
424 | } else {
|
425 | for (let i = 0; i < margin.length && i < indent.length; i++) {
|
426 | if (margin[i] !== indent[i]) {
|
427 | margin = margin.slice(0, i)
|
428 | break
|
429 | }
|
430 | }
|
431 | }
|
432 | }
|
433 |
|
434 | if (margin) {
|
435 | text = text.replace(new RegExp('^' + margin, 'mg'), '')
|
436 | }
|
437 | return text
|
438 | }
|
439 |
|
440 | module.exports = { wrap, fill, dedent }
|