1 | /// <reference types="node" />
|
2 |
|
3 | import { Duplex } from "stream";
|
4 |
|
5 | export as namespace Papa;
|
6 |
|
7 | export {}; // Don't export all declarations!
|
8 |
|
9 | /**
|
10 | * Parse local files
|
11 | * @param file a File object obtained from the DOM.
|
12 | * @param config a config object which contains a callback.
|
13 | * @returns Doesn't return anything. Results are provided asynchronously to a callback function.
|
14 | */
|
15 | // eslint-disable-next-line @definitelytyped/no-unnecessary-generics
|
16 | export function parse<T, TFile extends LocalFile = LocalFile>(file: TFile, config: ParseLocalConfig<T, TFile>): void;
|
17 | /**
|
18 | * Parse remote files
|
19 | * @param url the path or URL to the file to download.
|
20 | * @param config a config object.
|
21 | * @returns Doesn't return anything. Results are provided asynchronously to a callback function.
|
22 | */
|
23 | // eslint-disable-next-line @definitelytyped/no-unnecessary-generics
|
24 | export function parse<T>(url: string, config: ParseRemoteConfig<T>): void;
|
25 | /**
|
26 | * Parse string in web worker
|
27 | * @param csvString a string of delimited text to be parsed.
|
28 | * @param config an optional config object.
|
29 | * @returns Doesn't return anything. Results are provided asynchronously to a callback function.
|
30 | */
|
31 | /* eslint-disable @definitelytyped/no-unnecessary-generics */
|
32 | // tslint:disable-next-line:unified-signatures
|
33 | export function parse<T>(csvString: string, config: ParseWorkerConfig<T> & { download?: false | undefined }): void;
|
34 | /* eslint-enable @definitelytyped/no-unnecessary-generics */
|
35 | /**
|
36 | * Parse string
|
37 | * @param csvString a string of delimited text to be parsed.
|
38 | * @param config an optional config object.
|
39 | * @returns a parse results object
|
40 | */
|
41 | export function parse<T>(
|
42 | csvString: string,
|
43 | config?: ParseConfig<T> & { download?: false | undefined; worker?: false | undefined },
|
44 | ): ParseResult<T>;
|
45 | /**
|
46 | * Parse string, remote files or local files
|
47 | * @param source data to be parsed.
|
48 | * @param config a config object.
|
49 | * @returns Doesn't return anything. Results are provided asynchronously to a callback function.
|
50 | */
|
51 | export function parse<T>(
|
52 | source: LocalFile | string,
|
53 | config:
|
54 | & ParseLocalConfig<T, LocalFile>
|
55 | & (
|
56 | | (ParseConfig<T> & { download?: false | undefined; worker?: false | undefined })
|
57 | | (ParseWorkerConfig<T> & { download?: false | undefined })
|
58 | | ParseRemoteConfig<T>
|
59 | ),
|
60 | ): void;
|
61 | /**
|
62 | * Parse in a node streaming style
|
63 | * @param stream `NODE_STREAM_INPUT`
|
64 | * @param config a config object.
|
65 | * @returns a node duplex stream.
|
66 | *
|
67 | * @see https://github.com/mholt/PapaParse#papa-parse-for-node
|
68 | */
|
69 | export function parse(stream: typeof NODE_STREAM_INPUT, config?: ParseConfig): Duplex;
|
70 |
|
71 | /**
|
72 | * Unparses javascript data objects and returns a csv string
|
73 | * @param data can be one of: An array of arrays; An array of objects; An object explicitly defining `fields` and `data`
|
74 | * @param config an optional config object
|
75 | */
|
76 | export function unparse<T>(data: T[] | UnparseObject<T>, config?: UnparseConfig): string;
|
77 |
|
78 | /**
|
79 | * Read-Only Properties
|
80 | */
|
81 |
|
82 | /** An array of characters that are not allowed as delimiters. `\r`, `\n`, `"`, `\ufeff` */
|
83 | export const BAD_DELIMITERS: readonly string[];
|
84 |
|
85 | /** The true delimiter. Invisible. ASCII code 30. Should be doing the job we strangely rely upon commas and tabs for. */
|
86 | export const RECORD_SEP: "\x1E";
|
87 |
|
88 | /** Also sometimes used as a delimiting character. ASCII code 31. */
|
89 | export const UNIT_SEP: "\x1F";
|
90 | /**
|
91 | * Whether or not the browser supports HTML5 Web Workers.
|
92 | * If false, `worker: true` will have no effect.
|
93 | */
|
94 | export const WORKERS_SUPPORTED: boolean;
|
95 |
|
96 | /**
|
97 | * When passed to Papa Parse a Readable stream is returned.
|
98 | */
|
99 | export const NODE_STREAM_INPUT: unique symbol;
|
100 |
|
101 | /**
|
102 | * Configurable Properties
|
103 | */
|
104 |
|
105 | /**
|
106 | * The size in bytes of each file chunk. Used when streaming files obtained from the DOM that exist on the local computer. Default 10 MB.
|
107 | * @default 10485760
|
108 | */
|
109 | export let LocalChunkSize: number;
|
110 |
|
111 | /**
|
112 | * Same as `LocalChunkSize`, but for downloading files from remote locations. Default 5 MB.
|
113 | * @default 5242880
|
114 | */
|
115 | export let RemoteChunkSize: number;
|
116 |
|
117 | /**
|
118 | * The delimiter used when it is left unspecified and cannot be detected automatically. Default is comma.
|
119 | * @default ','
|
120 | */
|
121 | export let DefaultDelimiter: string;
|
122 |
|
123 | /** File object */
|
124 | export type LocalFile = File | NodeJS.ReadableStream;
|
125 |
|
126 | /**
|
127 | * On Papa there are actually more classes exposed
|
128 | * but none of them are officially documented
|
129 | * Since we can interact with the Parser from one of the callbacks
|
130 | * I have included the API for this class.
|
131 | */
|
132 | export class Parser {
|
133 | constructor(config: ParseConfig);
|
134 |
|
135 | parse(input: string, baseIndex: number, ignoreLastRow: boolean): any;
|
136 |
|
137 | // Sets the abort flag
|
138 | abort(): void;
|
139 |
|
140 | // Gets the cursor position
|
141 | getCharIndex(): number;
|
142 |
|
143 | pause(): void;
|
144 | resume(): void;
|
145 | }
|
146 |
|
147 | export interface ParseConfig<T = any, TInput = undefined> {
|
148 | /**
|
149 | * The delimiting character.
|
150 | * Leave blank to auto-detect from a list of most common delimiters, or any values passed in through `delimitersToGuess`.
|
151 | * It can be a string or a function.
|
152 | * If a string, it can be of any length (so multi-character delimiters are supported).
|
153 | * If a function, it must accept the input as first parameter and it must return a string which will be used as delimiter.
|
154 | * In both cases it cannot be found in `Papa.BAD_DELIMITERS`.
|
155 | * @default // auto-detect
|
156 | */
|
157 | delimiter?: string | ((input: string) => string) | undefined;
|
158 | /**
|
159 | * The newline sequence. Leave blank to auto-detect. Must be one of `\r`, `\n`, or `\r\n`.
|
160 | * @default // auto-detect
|
161 | */
|
162 | newline?: "\r" | "\n" | "\r\n" | undefined;
|
163 | /**
|
164 | * The character used to quote fields. The quoting of all fields is not mandatory. Any field which is not quoted will correctly read.
|
165 | * @default '"'
|
166 | */
|
167 | quoteChar?: string | undefined;
|
168 | /**
|
169 | * The character used to escape the quote character within a field.
|
170 | * If not set, this option will default to the value of `quoteChar`,
|
171 | * meaning that the default escaping of quote character within a quoted field is using the quote character two times.
|
172 | * (e.g. `"column with ""quotes"" in text"`)
|
173 | * @default '"'
|
174 | */
|
175 | escapeChar?: string | undefined;
|
176 | /**
|
177 | * If `true`, the first row of parsed data will be interpreted as field names.
|
178 | * An array of field names will be returned in meta, and each row of data will be an object of values keyed by field name instead of a simple array.
|
179 | * Rows with a different number of fields from the header row will produce an error.
|
180 | * Warning: Duplicate field names will overwrite values in previous fields having the same name.
|
181 | * @default false
|
182 | */
|
183 | header?: boolean | undefined;
|
184 | /**
|
185 | * A function to apply on each header. Requires header to be true. The function receives the header as its first argument and the index as second.
|
186 | */
|
187 | transformHeader?(header: string, index: number): string;
|
188 | /**
|
189 | * If `true`, numeric and boolean data will be converted to their type instead of remaining strings.
|
190 | * Numeric data must conform to the definition of a decimal literal.
|
191 | * Numerical values greater than 2^53 or less than -2^53 will not be converted to numbers to preserve precision.
|
192 | * European-formatted numbers must have commas and dots swapped.
|
193 | * If also accepts an object or a function.
|
194 | * If object it's values should be a boolean to indicate if dynamic typing should be applied for each column number (or header name if using headers).
|
195 | * If it's a function, it should return a boolean value for each field number (or name if using headers) which will be passed as first argument.
|
196 | * @default false
|
197 | */
|
198 | dynamicTyping?:
|
199 | | boolean
|
200 | | { [headerName: string]: boolean; [columnNumber: number]: boolean }
|
201 | | ((field: string | number) => boolean)
|
202 | | undefined;
|
203 | /** If > 0, only that many rows will be parsed. */
|
204 | preview?: number | undefined;
|
205 | /**
|
206 | * A string that indicates a comment (for example, "#" or "//").
|
207 | * When Papa encounters a line starting with this string, it will skip the line.
|
208 | * @default false
|
209 | */
|
210 | comments?: false | string | undefined;
|
211 | /**
|
212 | * If `true`, lines that are completely empty (those which evaluate to an empty string) will be skipped.
|
213 | * If set to `'greedy'`, lines that don't have any content (those which have only whitespace after parsing) will also be skipped.
|
214 | * @default false
|
215 | */
|
216 | skipEmptyLines?: boolean | "greedy" | undefined;
|
217 | /**
|
218 | * Fast mode speeds up parsing significantly for large inputs.
|
219 | * However, it only works when the input has no quoted fields.
|
220 | * Fast mode will automatically be enabled if no " characters appear in the input.
|
221 | * You can force fast mode either way by setting it to true or false.
|
222 | */
|
223 | fastMode?: boolean | undefined;
|
224 | /**
|
225 | * A function to apply on each value.
|
226 | * The function receives the value as its first argument and the column number or header name when enabled as its second argument.
|
227 | * The return value of the function will replace the value it received.
|
228 | * The transform function is applied before `dynamicTyping`.
|
229 | */
|
230 | transform?(value: string, field: string | number): any;
|
231 | /**
|
232 | * An array of delimiters to guess from if the delimiter option is not set.
|
233 | * @default [',', '\t', '|', ';', Papa.RECORD_SEP, Papa.UNIT_SEP]
|
234 | */
|
235 | delimitersToGuess?: string[] | undefined;
|
236 | /**
|
237 | * To stream the input, define a callback function.
|
238 | * Streaming is necessary for large files which would otherwise crash the browser.
|
239 | * You can call parser.abort() to abort parsing.
|
240 | * And, except when using a Web Worker, you can call parser.pause() to pause it, and parser.resume() to resume.
|
241 | */
|
242 | step?(results: ParseStepResult<T>, parser: Parser): void;
|
243 | /**
|
244 | * The callback to execute when parsing is complete.
|
245 | * It receives the parse results. If parsing a local file, the File is passed in, too.
|
246 | * When streaming, parse results are not available in this callback.
|
247 | */
|
248 | complete?(results: ParseResult<T>, file: TInput): void;
|
249 | /**
|
250 | * A function to execute before parsing the first chunk.
|
251 | * Can be used with chunk or step streaming modes.
|
252 | * The function receives as an argument the chunk about to be parsed, and it may return a modified chunk to parse.
|
253 | * This is useful for stripping header lines (as long as the header fits in a single chunk).
|
254 | */
|
255 | // eslint-disable-next-line @typescript-eslint/no-invalid-void-type
|
256 | beforeFirstChunk?(chunk: string): string | void;
|
257 | }
|
258 |
|
259 | export interface ParseWorkerConfig<T = any> extends ParseConfig<T> {
|
260 | /**
|
261 | * Whether or not to use a worker thread.
|
262 | * Using a worker will keep your page reactive, but may be slightly slower.
|
263 | */
|
264 | worker: true;
|
265 | /**
|
266 | * The callback to execute when parsing is complete.
|
267 | * It receives the parse results. If parsing a local file, the File is passed in, too.
|
268 | * When streaming, parse results are not available in this callback.
|
269 | */
|
270 | complete(results: ParseResult<T>): void;
|
271 | }
|
272 |
|
273 | // Base interface for all async parsing
|
274 | interface ParseAsyncConfigBase<T = any, TInput = undefined> extends ParseConfig<T, TInput> {
|
275 | /**
|
276 | * Whether or not to use a worker thread.
|
277 | * Using a worker will keep your page reactive, but may be slightly slower.
|
278 | * @default false
|
279 | */
|
280 | worker?: boolean | undefined;
|
281 | /**
|
282 | * Overrides `Papa.LocalChunkSize` and `Papa.RemoteChunkSize`.
|
283 | */
|
284 | chunkSize?: number | undefined;
|
285 | /**
|
286 | * A callback function, identical to `step`, which activates streaming.
|
287 | * However, this function is executed after every chunk of the file is loaded and parsed rather than every row.
|
288 | * Works only with local and remote files.
|
289 | * Do not use both `chunk` and `step` callbacks together.
|
290 | */
|
291 | chunk?(results: ParseResult<T>, parser: Parser): void;
|
292 | /**
|
293 | * A callback to execute if FileReader encounters an error.
|
294 | * The function is passed two arguments: the error and the File.
|
295 | */
|
296 | error?(error: Error, file: TInput): void;
|
297 | }
|
298 |
|
299 | // Async parsing local file can specify encoding
|
300 | interface ParseLocalConfigBase<T = any, TInput = undefined> extends ParseAsyncConfigBase<T, TInput> {
|
301 | /** The encoding to use when opening local files. If specified, it must be a value supported by the FileReader API. */
|
302 | encoding?: string | undefined;
|
303 | }
|
304 |
|
305 | interface ParseLocalConfigStep<T = any, TInput = undefined> extends ParseLocalConfigBase<T, TInput> {
|
306 | /** @inheritdoc */
|
307 | step(results: ParseStepResult<T>, parser: Parser): void;
|
308 | }
|
309 | interface ParseLocalConfigNoStep<T = any, TInput = undefined> extends ParseLocalConfigBase<T, TInput> {
|
310 | /** @inheritdoc */
|
311 | complete(results: ParseResult<T>, file: TInput): void;
|
312 | }
|
313 |
|
314 | // Local parsing is async and thus must specify either `step` or `complete` (but may specify both)
|
315 | export type ParseLocalConfig<T = any, TInput = undefined> =
|
316 | | ParseLocalConfigStep<T, TInput>
|
317 | | ParseLocalConfigNoStep<T, TInput>;
|
318 |
|
319 | // Remote parsing has options for the backing web request
|
320 | interface ParseRemoteConfigBase<T = any> extends ParseAsyncConfigBase<T, string> {
|
321 | /**
|
322 | * This indicates that the string you passed as the first argument to `parse()`
|
323 | * is actually a URL from which to download a file and parse its contents.
|
324 | */
|
325 | download: true;
|
326 | /**
|
327 | * If defined, should be an object that describes the headers.
|
328 | * @example { 'Authorization': 'token 123345678901234567890' }
|
329 | * @default undefined
|
330 | */
|
331 | downloadRequestHeaders?: { [headerName: string]: string } | undefined;
|
332 | /**
|
333 | * Use POST request on the URL of the download option. The value passed will be set as the body of the request.
|
334 | * @default undefined
|
335 | */
|
336 | downloadRequestBody?: Blob | BufferSource | FormData | URLSearchParams | string | undefined;
|
337 | /**
|
338 | * A boolean value passed directly into XMLHttpRequest's "withCredentials" property.
|
339 | * @default undefined
|
340 | */
|
341 | withCredentials?: boolean | undefined;
|
342 | }
|
343 |
|
344 | interface ParseRemoteConfigStep<T = any> extends ParseRemoteConfigBase<T> {
|
345 | /** @inheritdoc */
|
346 | step(results: ParseStepResult<T>, parser: Parser): void;
|
347 | }
|
348 | interface ParseRemoteConfigNoStep<T = any> extends ParseRemoteConfigBase<T> {
|
349 | /** @inheritdoc */
|
350 | complete(results: ParseResult<T>, file: string): void;
|
351 | }
|
352 |
|
353 | // Remote parsing is async and thus must specify either `step` or `complete` (but may specify both)
|
354 | export type ParseRemoteConfig<T = any> = ParseRemoteConfigStep<T> | ParseRemoteConfigNoStep<T>;
|
355 |
|
356 | export interface UnparseConfig {
|
357 | /**
|
358 | * If `true`, forces all fields to be enclosed in quotes.
|
359 | * If an array of `true`/`false` values, specifies which fields should be force-quoted (first boolean is for the first column, second boolean for the second column, ...).
|
360 | * A function that returns a boolean values can be used to determine the quotes value of a cell.
|
361 | * This function accepts the cell value and column index as parameters.
|
362 | * Note that this option is ignored for `undefined`, `null` and `date-object` values.
|
363 | * The option `escapeFormulae` also takes precedence over this.
|
364 | *
|
365 | * @default false
|
366 | */
|
367 | quotes?: boolean | boolean[] | ((value: any, columnIndex: number) => boolean) | undefined;
|
368 | /**
|
369 | * The character used to quote fields.
|
370 | * @default '"'
|
371 | */
|
372 | quoteChar?: string | undefined;
|
373 | /**
|
374 | * The character used to escape `quoteChar` inside field values.
|
375 | * @default '"'
|
376 | */
|
377 | escapeChar?: string | undefined;
|
378 | /**
|
379 | * The delimiting character. Multi-character delimiters are supported. It must not be found in `Papa.BAD_DELIMITERS`.
|
380 | * @default ','
|
381 | */
|
382 | delimiter?: string | undefined;
|
383 | /**
|
384 | * If `false`, will omit the header row.
|
385 | * If `data` is an array of arrays this option is ignored.
|
386 | * If `data` is an array of objects the keys of the first object are the header row.
|
387 | * If `data` is an object with the keys `fields` and `data` the `fields` are the header row.
|
388 | * @default true
|
389 | */
|
390 | header?: boolean | undefined;
|
391 | /**
|
392 | * The character used to determine newline sequence.
|
393 | * @default '\r\n'
|
394 | */
|
395 | newline?: string | undefined;
|
396 | /**
|
397 | * If `true`, lines that are completely empty (those which evaluate to an empty string) will be skipped.
|
398 | * If set to `'greedy'`, lines that don't have any content (those which have only whitespace after parsing) will also be skipped.
|
399 | * @default false
|
400 | */
|
401 | skipEmptyLines?: boolean | "greedy" | undefined;
|
402 | /**
|
403 | * If `data` is an array of objects this option can be used to manually specify the keys (columns) you expect in the objects.
|
404 | * If not set the keys of the first objects are used as column.
|
405 | * @default undefined
|
406 | */
|
407 | columns?: string[] | undefined;
|
408 | /**
|
409 | * If `true`, field values that begin with `=`, `+`, `-`, or `@`,
|
410 | * will be prepended with a ` to defend against [injection attacks](https://www.contextis.com/en/blog/comma-separated-vulnerabilities),
|
411 | * because Excel and LibreOffice will automatically parse such cells as formulae.
|
412 | * @default false
|
413 | */
|
414 | escapeFormulae?: boolean | RegExp | undefined;
|
415 | }
|
416 |
|
417 | export interface UnparseObject<T> {
|
418 | fields: string[];
|
419 | data: T[];
|
420 | }
|
421 |
|
422 | /** Error structure */
|
423 | export interface ParseError {
|
424 | /** A generalization of the error */
|
425 | type: "Quotes" | "Delimiter" | "FieldMismatch";
|
426 | /** Standardized error code */
|
427 | code: "MissingQuotes" | "UndetectableDelimiter" | "TooFewFields" | "TooManyFields" | "InvalidQuotes";
|
428 | /** Human-readable details */
|
429 | message: string;
|
430 | /** Row index of parsed data where error is */
|
431 | row?: number | undefined;
|
432 | /** Index within the row where error is */
|
433 | index?: number | undefined;
|
434 | }
|
435 |
|
436 | export interface ParseMeta {
|
437 | /** Delimiter used */
|
438 | delimiter: string;
|
439 | /** Line break sequence used */
|
440 | linebreak: string;
|
441 | /** Whether process was aborted */
|
442 | aborted: boolean;
|
443 | /** Array of field names */
|
444 | fields?: string[] | undefined;
|
445 | /** Whether preview consumed all input */
|
446 | truncated: boolean;
|
447 | cursor: number;
|
448 | }
|
449 |
|
450 | /**
|
451 | * A parse result always contains three objects: data, errors, and meta.
|
452 | * Data and errors are arrays, and meta is an object. In the step callback, the data array will only contain one element.
|
453 | */
|
454 | export interface ParseStepResult<T> {
|
455 | /**
|
456 | * In the step callback, the data array will only contain one element.
|
457 | */
|
458 | data: T;
|
459 | /** an array of errors. */
|
460 | errors: ParseError[];
|
461 | /**
|
462 | * contains extra information about the parse, such as delimiter used,
|
463 | * the newline sequence, whether the process was aborted, etc.
|
464 | * Properties in this object are not guaranteed to exist in all situations.
|
465 | */
|
466 | meta: ParseMeta;
|
467 | }
|
468 |
|
469 | /**
|
470 | * A parse result always contains three objects: data, errors, and meta.
|
471 | * Data and errors are arrays, and meta is an object. In the step callback, the data array will only contain one element.
|
472 | */
|
473 | export interface ParseResult<T> {
|
474 | /**
|
475 | * an array of rows. If header is false, rows are arrays; otherwise they are objects of data keyed by the field name.
|
476 | */
|
477 | data: T[];
|
478 | /** an array of errors. */
|
479 | errors: ParseError[];
|
480 | /**
|
481 | * contains extra information about the parse, such as delimiter used,
|
482 | * the newline sequence, whether the process was aborted, etc.
|
483 | * Properties in this object are not guaranteed to exist in all situations.
|
484 | */
|
485 | meta: ParseMeta;
|
486 | }
|
487 |
|
\ | No newline at end of file |