UNPKG

17.4 kBJavaScriptView Raw
1// Partial port of python's argparse module, version 3.9.0 (only wrap and fill functions):
2// https://github.com/python/cpython/blob/v3.9.0b4/Lib/textwrap.py
3
4'use strict'
5
6/*
7 * Text wrapping and filling.
8 */
9
10// Copyright (C) 1999-2001 Gregory P. Ward.
11// Copyright (C) 2002, 2003 Python Software Foundation.
12// Copyright (C) 2020 argparse.js authors
13// Originally written by Greg Ward <gward@python.net>
14
15// Hardcode the recognized whitespace characters to the US-ASCII
16// whitespace characters. The main reason for doing this is that
17// some Unicode spaces (like \u00a0) are non-breaking whitespaces.
18//
19// This less funky little regex just split on recognized spaces. E.g.
20// "Hello there -- you goof-ball, use the -b option!"
21// splits into
22// Hello/ /there/ /--/ /you/ /goof-ball,/ /use/ /the/ /-b/ /option!/
23const wordsep_simple_re = /([\t\n\x0b\x0c\r ]+)/
24
25class TextWrapper {
26 /*
27 * Object for wrapping/filling text. The public interface consists of
28 * the wrap() and fill() methods; the other methods are just there for
29 * subclasses to override in order to tweak the default behaviour.
30 * If you want to completely replace the main wrapping algorithm,
31 * you'll probably have to override _wrap_chunks().
32 *
33 * Several instance attributes control various aspects of wrapping:
34 * width (default: 70)
35 * the maximum width of wrapped lines (unless break_long_words
36 * is false)
37 * initial_indent (default: "")
38 * string that will be prepended to the first line of wrapped
39 * output. Counts towards the line's width.
40 * subsequent_indent (default: "")
41 * string that will be prepended to all lines save the first
42 * of wrapped output; also counts towards each line's width.
43 * expand_tabs (default: true)
44 * Expand tabs in input text to spaces before further processing.
45 * Each tab will become 0 .. 'tabsize' spaces, depending on its position
46 * in its line. If false, each tab is treated as a single character.
47 * tabsize (default: 8)
48 * Expand tabs in input text to 0 .. 'tabsize' spaces, unless
49 * 'expand_tabs' is false.
50 * replace_whitespace (default: true)
51 * Replace all whitespace characters in the input text by spaces
52 * after tab expansion. Note that if expand_tabs is false and
53 * replace_whitespace is true, every tab will be converted to a
54 * single space!
55 * fix_sentence_endings (default: false)
56 * Ensure that sentence-ending punctuation is always followed
57 * by two spaces. Off by default because the algorithm is
58 * (unavoidably) imperfect.
59 * break_long_words (default: true)
60 * Break words longer than 'width'. If false, those words will not
61 * be broken, and some lines might be longer than 'width'.
62 * break_on_hyphens (default: true)
63 * Allow breaking hyphenated words. If true, wrapping will occur
64 * preferably on whitespaces and right after hyphens part of
65 * compound words.
66 * drop_whitespace (default: true)
67 * Drop leading and trailing whitespace from lines.
68 * max_lines (default: None)
69 * Truncate wrapped lines.
70 * placeholder (default: ' [...]')
71 * Append to the last line of truncated text.
72 */
73
74 constructor(options = {}) {
75 let {
76 width = 70,
77 initial_indent = '',
78 subsequent_indent = '',
79 expand_tabs = true,
80 replace_whitespace = true,
81 fix_sentence_endings = false,
82 break_long_words = true,
83 drop_whitespace = true,
84 break_on_hyphens = true,
85 tabsize = 8,
86 max_lines = undefined,
87 placeholder=' [...]'
88 } = options
89
90 this.width = width
91 this.initial_indent = initial_indent
92 this.subsequent_indent = subsequent_indent
93 this.expand_tabs = expand_tabs
94 this.replace_whitespace = replace_whitespace
95 this.fix_sentence_endings = fix_sentence_endings
96 this.break_long_words = break_long_words
97 this.drop_whitespace = drop_whitespace
98 this.break_on_hyphens = break_on_hyphens
99 this.tabsize = tabsize
100 this.max_lines = max_lines
101 this.placeholder = placeholder
102 }
103
104
105 // -- Private methods -----------------------------------------------
106 // (possibly useful for subclasses to override)
107
108 _munge_whitespace(text) {
109 /*
110 * _munge_whitespace(text : string) -> string
111 *
112 * Munge whitespace in text: expand tabs and convert all other
113 * whitespace characters to spaces. Eg. " foo\\tbar\\n\\nbaz"
114 * becomes " foo bar baz".
115 */
116 if (this.expand_tabs) {
117 text = text.replace(/\t/g, ' '.repeat(this.tabsize)) // not strictly correct in js
118 }
119 if (this.replace_whitespace) {
120 text = text.replace(/[\t\n\x0b\x0c\r]/g, ' ')
121 }
122 return text
123 }
124
125 _split(text) {
126 /*
127 * _split(text : string) -> [string]
128 *
129 * Split the text to wrap into indivisible chunks. Chunks are
130 * not quite the same as words; see _wrap_chunks() for full
131 * details. As an example, the text
132 * Look, goof-ball -- use the -b option!
133 * breaks into the following chunks:
134 * 'Look,', ' ', 'goof-', 'ball', ' ', '--', ' ',
135 * 'use', ' ', 'the', ' ', '-b', ' ', 'option!'
136 * if break_on_hyphens is True, or in:
137 * 'Look,', ' ', 'goof-ball', ' ', '--', ' ',
138 * 'use', ' ', 'the', ' ', '-b', ' ', option!'
139 * otherwise.
140 */
141 let chunks = text.split(wordsep_simple_re)
142 chunks = chunks.filter(Boolean)
143 return chunks
144 }
145
146 _handle_long_word(reversed_chunks, cur_line, cur_len, width) {
147 /*
148 * _handle_long_word(chunks : [string],
149 * cur_line : [string],
150 * cur_len : int, width : int)
151 *
152 * Handle a chunk of text (most likely a word, not whitespace) that
153 * is too long to fit in any line.
154 */
155 // Figure out when indent is larger than the specified width, and make
156 // sure at least one character is stripped off on every pass
157 let space_left
158 if (width < 1) {
159 space_left = 1
160 } else {
161 space_left = width - cur_len
162 }
163
164 // If we're allowed to break long words, then do so: put as much
165 // of the next chunk onto the current line as will fit.
166 if (this.break_long_words) {
167 cur_line.push(reversed_chunks[reversed_chunks.length - 1].slice(0, space_left))
168 reversed_chunks[reversed_chunks.length - 1] = reversed_chunks[reversed_chunks.length - 1].slice(space_left)
169
170 // Otherwise, we have to preserve the long word intact. Only add
171 // it to the current line if there's nothing already there --
172 // that minimizes how much we violate the width constraint.
173 } else if (!cur_line) {
174 cur_line.push(...reversed_chunks.pop())
175 }
176
177 // If we're not allowed to break long words, and there's already
178 // text on the current line, do nothing. Next time through the
179 // main loop of _wrap_chunks(), we'll wind up here again, but
180 // cur_len will be zero, so the next line will be entirely
181 // devoted to the long word that we can't handle right now.
182 }
183
184 _wrap_chunks(chunks) {
185 /*
186 * _wrap_chunks(chunks : [string]) -> [string]
187 *
188 * Wrap a sequence of text chunks and return a list of lines of
189 * length 'self.width' or less. (If 'break_long_words' is false,
190 * some lines may be longer than this.) Chunks correspond roughly
191 * to words and the whitespace between them: each chunk is
192 * indivisible (modulo 'break_long_words'), but a line break can
193 * come between any two chunks. Chunks should not have internal
194 * whitespace; ie. a chunk is either all whitespace or a "word".
195 * Whitespace chunks will be removed from the beginning and end of
196 * lines, but apart from that whitespace is preserved.
197 */
198 let lines = []
199 let indent
200 if (this.width <= 0) {
201 throw Error(`invalid width ${this.width} (must be > 0)`)
202 }
203 if (this.max_lines !== undefined) {
204 if (this.max_lines > 1) {
205 indent = this.subsequent_indent
206 } else {
207 indent = this.initial_indent
208 }
209 if (indent.length + this.placeholder.trimStart().length > this.width) {
210 throw Error('placeholder too large for max width')
211 }
212 }
213
214 // Arrange in reverse order so items can be efficiently popped
215 // from a stack of chucks.
216 chunks = chunks.reverse()
217
218 while (chunks.length > 0) {
219
220 // Start the list of chunks that will make up the current line.
221 // cur_len is just the length of all the chunks in cur_line.
222 let cur_line = []
223 let cur_len = 0
224
225 // Figure out which static string will prefix this line.
226 let indent
227 if (lines) {
228 indent = this.subsequent_indent
229 } else {
230 indent = this.initial_indent
231 }
232
233 // Maximum width for this line.
234 let width = this.width - indent.length
235
236 // First chunk on line is whitespace -- drop it, unless this
237 // is the very beginning of the text (ie. no lines started yet).
238 if (this.drop_whitespace && chunks[chunks.length - 1].trim() === '' && lines.length > 0) {
239 chunks.pop()
240 }
241
242 while (chunks.length > 0) {
243 let l = chunks[chunks.length - 1].length
244
245 // Can at least squeeze this chunk onto the current line.
246 if (cur_len + l <= width) {
247 cur_line.push(chunks.pop())
248 cur_len += l
249
250 // Nope, this line is full.
251 } else {
252 break
253 }
254 }
255
256 // The current line is full, and the next chunk is too big to
257 // fit on *any* line (not just this one).
258 if (chunks.length && chunks[chunks.length - 1].length > width) {
259 this._handle_long_word(chunks, cur_line, cur_len, width)
260 cur_len = cur_line.map(l => l.length).reduce((a, b) => a + b, 0)
261 }
262
263 // If the last chunk on this line is all whitespace, drop it.
264 if (this.drop_whitespace && cur_line.length > 0 && cur_line[cur_line.length - 1].trim() === '') {
265 cur_len -= cur_line[cur_line.length - 1].length
266 cur_line.pop()
267 }
268
269 if (cur_line) {
270 if (this.max_lines === undefined ||
271 lines.length + 1 < this.max_lines ||
272 (chunks.length === 0 ||
273 this.drop_whitespace &&
274 chunks.length === 1 &&
275 !chunks[0].trim()) && cur_len <= width) {
276 // Convert current line back to a string and store it in
277 // list of all lines (return value).
278 lines.push(indent + cur_line.join(''))
279 } else {
280 let had_break = false
281 while (cur_line) {
282 if (cur_line[cur_line.length - 1].trim() &&
283 cur_len + this.placeholder.length <= width) {
284 cur_line.push(this.placeholder)
285 lines.push(indent + cur_line.join(''))
286 had_break = true
287 break
288 }
289 cur_len -= cur_line[-1].length
290 cur_line.pop()
291 }
292 if (!had_break) {
293 if (lines) {
294 let prev_line = lines[lines.length - 1].trimEnd()
295 if (prev_line.length + this.placeholder.length <=
296 this.width) {
297 lines[lines.length - 1] = prev_line + this.placeholder
298 break
299 }
300 }
301 lines.push(indent + this.placeholder.lstrip())
302 }
303 break
304 }
305 }
306 }
307
308 return lines
309 }
310
311 _split_chunks(text) {
312 text = this._munge_whitespace(text)
313 return this._split(text)
314 }
315
316 // -- Public interface ----------------------------------------------
317
318 wrap(text) {
319 /*
320 * wrap(text : string) -> [string]
321 *
322 * Reformat the single paragraph in 'text' so it fits in lines of
323 * no more than 'self.width' columns, and return a list of wrapped
324 * lines. Tabs in 'text' are expanded with string.expandtabs(),
325 * and all other whitespace characters (including newline) are
326 * converted to space.
327 */
328 let chunks = this._split_chunks(text)
329 // not implemented in js
330 //if (this.fix_sentence_endings) {
331 // this._fix_sentence_endings(chunks)
332 //}
333 return this._wrap_chunks(chunks)
334 }
335
336 fill(text) {
337 /*
338 * fill(text : string) -> string
339 *
340 * Reformat the single paragraph in 'text' to fit in lines of no
341 * more than 'self.width' columns, and return a new string
342 * containing the entire wrapped paragraph.
343 */
344 return this.wrap(text).join('\n')
345 }
346}
347
348
349// -- Convenience interface ---------------------------------------------
350
351function wrap(text, options = {}) {
352 /*
353 * Wrap a single paragraph of text, returning a list of wrapped lines.
354 *
355 * Reformat the single paragraph in 'text' so it fits in lines of no
356 * more than 'width' columns, and return a list of wrapped lines. By
357 * default, tabs in 'text' are expanded with string.expandtabs(), and
358 * all other whitespace characters (including newline) are converted to
359 * space. See TextWrapper class for available keyword args to customize
360 * wrapping behaviour.
361 */
362 let { width = 70, ...kwargs } = options
363 let w = new TextWrapper(Object.assign({ width }, kwargs))
364 return w.wrap(text)
365}
366
367function fill(text, options = {}) {
368 /*
369 * Fill a single paragraph of text, returning a new string.
370 *
371 * Reformat the single paragraph in 'text' to fit in lines of no more
372 * than 'width' columns, and return a new string containing the entire
373 * wrapped paragraph. As with wrap(), tabs are expanded and other
374 * whitespace characters converted to space. See TextWrapper class for
375 * available keyword args to customize wrapping behaviour.
376 */
377 let { width = 70, ...kwargs } = options
378 let w = new TextWrapper(Object.assign({ width }, kwargs))
379 return w.fill(text)
380}
381
382// -- Loosely related functionality -------------------------------------
383
384let _whitespace_only_re = /^[ \t]+$/mg
385let _leading_whitespace_re = /(^[ \t]*)(?:[^ \t\n])/mg
386
387function dedent(text) {
388 /*
389 * Remove any common leading whitespace from every line in `text`.
390 *
391 * This can be used to make triple-quoted strings line up with the left
392 * edge of the display, while still presenting them in the source code
393 * in indented form.
394 *
395 * Note that tabs and spaces are both treated as whitespace, but they
396 * are not equal: the lines " hello" and "\\thello" are
397 * considered to have no common leading whitespace.
398 *
399 * Entirely blank lines are normalized to a newline character.
400 */
401 // Look for the longest leading string of spaces and tabs common to
402 // all lines.
403 let margin = undefined
404 text = text.replace(_whitespace_only_re, '')
405 let indents = text.match(_leading_whitespace_re) || []
406 for (let indent of indents) {
407 indent = indent.slice(0, -1)
408
409 if (margin === undefined) {
410 margin = indent
411
412 // Current line more deeply indented than previous winner:
413 // no change (previous winner is still on top).
414 } else if (indent.startsWith(margin)) {
415 // pass
416
417 // Current line consistent with and no deeper than previous winner:
418 // it's the new winner.
419 } else if (margin.startsWith(indent)) {
420 margin = indent
421
422 // Find the largest common whitespace between current line and previous
423 // winner.
424 } else {
425 for (let i = 0; i < margin.length && i < indent.length; i++) {
426 if (margin[i] !== indent[i]) {
427 margin = margin.slice(0, i)
428 break
429 }
430 }
431 }
432 }
433
434 if (margin) {
435 text = text.replace(new RegExp('^' + margin, 'mg'), '')
436 }
437 return text
438}
439
440module.exports = { wrap, fill, dedent }