UNPKG

19.7 kBTypeScriptView Raw
1/// <reference types="node" />
2
3import { Duplex } from "stream";
4
5export as namespace Papa;
6
7export {}; // Don't export all declarations!
8
9/**
10 * Parse local files
11 * @param file a File object obtained from the DOM.
12 * @param config a config object which contains a callback.
13 * @returns Doesn't return anything. Results are provided asynchronously to a callback function.
14 */
15// eslint-disable-next-line @definitelytyped/no-unnecessary-generics
16export function parse<T, TFile extends LocalFile = LocalFile>(file: TFile, config: ParseLocalConfig<T, TFile>): void;
17/**
18 * Parse remote files
19 * @param url the path or URL to the file to download.
20 * @param config a config object.
21 * @returns Doesn't return anything. Results are provided asynchronously to a callback function.
22 */
23// eslint-disable-next-line @definitelytyped/no-unnecessary-generics
24export function parse<T>(url: string, config: ParseRemoteConfig<T>): void;
25/**
26 * Parse string in web worker
27 * @param csvString a string of delimited text to be parsed.
28 * @param config an optional config object.
29 * @returns Doesn't return anything. Results are provided asynchronously to a callback function.
30 */
31/* eslint-disable @definitelytyped/no-unnecessary-generics */
32// tslint:disable-next-line:unified-signatures
33export function parse<T>(csvString: string, config: ParseWorkerConfig<T> & { download?: false | undefined }): void;
34/* eslint-enable @definitelytyped/no-unnecessary-generics */
35/**
36 * Parse string
37 * @param csvString a string of delimited text to be parsed.
38 * @param config an optional config object.
39 * @returns a parse results object
40 */
41export function parse<T>(
42 csvString: string,
43 config?: ParseConfig<T> & { download?: false | undefined; worker?: false | undefined },
44): ParseResult<T>;
45/**
46 * Parse string, remote files or local files
47 * @param source data to be parsed.
48 * @param config a config object.
49 * @returns Doesn't return anything. Results are provided asynchronously to a callback function.
50 */
51export function parse<T>(
52 source: LocalFile | string,
53 config:
54 & ParseLocalConfig<T, LocalFile>
55 & (
56 | (ParseConfig<T> & { download?: false | undefined; worker?: false | undefined })
57 | (ParseWorkerConfig<T> & { download?: false | undefined })
58 | ParseRemoteConfig<T>
59 ),
60): void;
61/**
62 * Parse in a node streaming style
63 * @param stream `NODE_STREAM_INPUT`
64 * @param config a config object.
65 * @returns a node duplex stream.
66 *
67 * @see https://github.com/mholt/PapaParse#papa-parse-for-node
68 */
69export function parse(stream: typeof NODE_STREAM_INPUT, config?: ParseConfig): Duplex;
70
71/**
72 * Unparses javascript data objects and returns a csv string
73 * @param data can be one of: An array of arrays; An array of objects; An object explicitly defining `fields` and `data`
74 * @param config an optional config object
75 */
76export function unparse<T>(data: T[] | UnparseObject<T>, config?: UnparseConfig): string;
77
78/**
79 * Read-Only Properties
80 */
81
82/** An array of characters that are not allowed as delimiters. `\r`, `\n`, `"`, `\ufeff` */
83export const BAD_DELIMITERS: readonly string[];
84
85/** The true delimiter. Invisible. ASCII code 30. Should be doing the job we strangely rely upon commas and tabs for. */
86export const RECORD_SEP: "\x1E";
87
88/** Also sometimes used as a delimiting character. ASCII code 31. */
89export const UNIT_SEP: "\x1F";
90/**
91 * Whether or not the browser supports HTML5 Web Workers.
92 * If false, `worker: true` will have no effect.
93 */
94export const WORKERS_SUPPORTED: boolean;
95
96/**
97 * When passed to Papa Parse a Readable stream is returned.
98 */
99export const NODE_STREAM_INPUT: unique symbol;
100
101/**
102 * Configurable Properties
103 */
104
105/**
106 * The size in bytes of each file chunk. Used when streaming files obtained from the DOM that exist on the local computer. Default 10 MB.
107 * @default 10485760
108 */
109export let LocalChunkSize: number;
110
111/**
112 * Same as `LocalChunkSize`, but for downloading files from remote locations. Default 5 MB.
113 * @default 5242880
114 */
115export let RemoteChunkSize: number;
116
117/**
118 * The delimiter used when it is left unspecified and cannot be detected automatically. Default is comma.
119 * @default ','
120 */
121export let DefaultDelimiter: string;
122
123/** File object */
124export type LocalFile = File | NodeJS.ReadableStream;
125
126/**
127 * On Papa there are actually more classes exposed
128 * but none of them are officially documented
129 * Since we can interact with the Parser from one of the callbacks
130 * I have included the API for this class.
131 */
132export class Parser {
133 constructor(config: ParseConfig);
134
135 parse(input: string, baseIndex: number, ignoreLastRow: boolean): any;
136
137 // Sets the abort flag
138 abort(): void;
139
140 // Gets the cursor position
141 getCharIndex(): number;
142
143 pause(): void;
144 resume(): void;
145}
146
147export interface ParseConfig<T = any, TInput = undefined> {
148 /**
149 * The delimiting character.
150 * Leave blank to auto-detect from a list of most common delimiters, or any values passed in through `delimitersToGuess`.
151 * It can be a string or a function.
152 * If a string, it can be of any length (so multi-character delimiters are supported).
153 * If a function, it must accept the input as first parameter and it must return a string which will be used as delimiter.
154 * In both cases it cannot be found in `Papa.BAD_DELIMITERS`.
155 * @default // auto-detect
156 */
157 delimiter?: string | ((input: string) => string) | undefined;
158 /**
159 * The newline sequence. Leave blank to auto-detect. Must be one of `\r`, `\n`, or `\r\n`.
160 * @default // auto-detect
161 */
162 newline?: "\r" | "\n" | "\r\n" | undefined;
163 /**
164 * The character used to quote fields. The quoting of all fields is not mandatory. Any field which is not quoted will correctly read.
165 * @default '"'
166 */
167 quoteChar?: string | undefined;
168 /**
169 * The character used to escape the quote character within a field.
170 * If not set, this option will default to the value of `quoteChar`,
171 * meaning that the default escaping of quote character within a quoted field is using the quote character two times.
172 * (e.g. `"column with ""quotes"" in text"`)
173 * @default '"'
174 */
175 escapeChar?: string | undefined;
176 /**
177 * If `true`, the first row of parsed data will be interpreted as field names.
178 * An array of field names will be returned in meta, and each row of data will be an object of values keyed by field name instead of a simple array.
179 * Rows with a different number of fields from the header row will produce an error.
180 * Warning: Duplicate field names will overwrite values in previous fields having the same name.
181 * @default false
182 */
183 header?: boolean | undefined;
184 /**
185 * A function to apply on each header. Requires header to be true. The function receives the header as its first argument and the index as second.
186 */
187 transformHeader?(header: string, index: number): string;
188 /**
189 * If `true`, numeric and boolean data will be converted to their type instead of remaining strings.
190 * Numeric data must conform to the definition of a decimal literal.
191 * Numerical values greater than 2^53 or less than -2^53 will not be converted to numbers to preserve precision.
192 * European-formatted numbers must have commas and dots swapped.
193 * If also accepts an object or a function.
194 * If object it's values should be a boolean to indicate if dynamic typing should be applied for each column number (or header name if using headers).
195 * If it's a function, it should return a boolean value for each field number (or name if using headers) which will be passed as first argument.
196 * @default false
197 */
198 dynamicTyping?:
199 | boolean
200 | { [headerName: string]: boolean; [columnNumber: number]: boolean }
201 | ((field: string | number) => boolean)
202 | undefined;
203 /** If > 0, only that many rows will be parsed. */
204 preview?: number | undefined;
205 /**
206 * A string that indicates a comment (for example, "#" or "//").
207 * When Papa encounters a line starting with this string, it will skip the line.
208 * @default false
209 */
210 comments?: false | string | undefined;
211 /**
212 * If `true`, lines that are completely empty (those which evaluate to an empty string) will be skipped.
213 * If set to `'greedy'`, lines that don't have any content (those which have only whitespace after parsing) will also be skipped.
214 * @default false
215 */
216 skipEmptyLines?: boolean | "greedy" | undefined;
217 /**
218 * Fast mode speeds up parsing significantly for large inputs.
219 * However, it only works when the input has no quoted fields.
220 * Fast mode will automatically be enabled if no " characters appear in the input.
221 * You can force fast mode either way by setting it to true or false.
222 */
223 fastMode?: boolean | undefined;
224 /**
225 * A function to apply on each value.
226 * The function receives the value as its first argument and the column number or header name when enabled as its second argument.
227 * The return value of the function will replace the value it received.
228 * The transform function is applied before `dynamicTyping`.
229 */
230 transform?(value: string, field: string | number): any;
231 /**
232 * An array of delimiters to guess from if the delimiter option is not set.
233 * @default [',', '\t', '|', ';', Papa.RECORD_SEP, Papa.UNIT_SEP]
234 */
235 delimitersToGuess?: string[] | undefined;
236 /**
237 * To stream the input, define a callback function.
238 * Streaming is necessary for large files which would otherwise crash the browser.
239 * You can call parser.abort() to abort parsing.
240 * And, except when using a Web Worker, you can call parser.pause() to pause it, and parser.resume() to resume.
241 */
242 step?(results: ParseStepResult<T>, parser: Parser): void;
243 /**
244 * The callback to execute when parsing is complete.
245 * It receives the parse results. If parsing a local file, the File is passed in, too.
246 * When streaming, parse results are not available in this callback.
247 */
248 complete?(results: ParseResult<T>, file: TInput): void;
249 /**
250 * A function to execute before parsing the first chunk.
251 * Can be used with chunk or step streaming modes.
252 * The function receives as an argument the chunk about to be parsed, and it may return a modified chunk to parse.
253 * This is useful for stripping header lines (as long as the header fits in a single chunk).
254 */
255 // eslint-disable-next-line @typescript-eslint/no-invalid-void-type
256 beforeFirstChunk?(chunk: string): string | void;
257}
258
259export interface ParseWorkerConfig<T = any> extends ParseConfig<T> {
260 /**
261 * Whether or not to use a worker thread.
262 * Using a worker will keep your page reactive, but may be slightly slower.
263 */
264 worker: true;
265 /**
266 * The callback to execute when parsing is complete.
267 * It receives the parse results. If parsing a local file, the File is passed in, too.
268 * When streaming, parse results are not available in this callback.
269 */
270 complete(results: ParseResult<T>): void;
271}
272
273// Base interface for all async parsing
274interface ParseAsyncConfigBase<T = any, TInput = undefined> extends ParseConfig<T, TInput> {
275 /**
276 * Whether or not to use a worker thread.
277 * Using a worker will keep your page reactive, but may be slightly slower.
278 * @default false
279 */
280 worker?: boolean | undefined;
281 /**
282 * Overrides `Papa.LocalChunkSize` and `Papa.RemoteChunkSize`.
283 */
284 chunkSize?: number | undefined;
285 /**
286 * A callback function, identical to `step`, which activates streaming.
287 * However, this function is executed after every chunk of the file is loaded and parsed rather than every row.
288 * Works only with local and remote files.
289 * Do not use both `chunk` and `step` callbacks together.
290 */
291 chunk?(results: ParseResult<T>, parser: Parser): void;
292 /**
293 * A callback to execute if FileReader encounters an error.
294 * The function is passed two arguments: the error and the File.
295 */
296 error?(error: Error, file: TInput): void;
297}
298
299// Async parsing local file can specify encoding
300interface ParseLocalConfigBase<T = any, TInput = undefined> extends ParseAsyncConfigBase<T, TInput> {
301 /** The encoding to use when opening local files. If specified, it must be a value supported by the FileReader API. */
302 encoding?: string | undefined;
303}
304
305interface ParseLocalConfigStep<T = any, TInput = undefined> extends ParseLocalConfigBase<T, TInput> {
306 /** @inheritdoc */
307 step(results: ParseStepResult<T>, parser: Parser): void;
308}
309interface ParseLocalConfigNoStep<T = any, TInput = undefined> extends ParseLocalConfigBase<T, TInput> {
310 /** @inheritdoc */
311 complete(results: ParseResult<T>, file: TInput): void;
312}
313
314// Local parsing is async and thus must specify either `step` or `complete` (but may specify both)
315export type ParseLocalConfig<T = any, TInput = undefined> =
316 | ParseLocalConfigStep<T, TInput>
317 | ParseLocalConfigNoStep<T, TInput>;
318
319// Remote parsing has options for the backing web request
320interface ParseRemoteConfigBase<T = any> extends ParseAsyncConfigBase<T, string> {
321 /**
322 * This indicates that the string you passed as the first argument to `parse()`
323 * is actually a URL from which to download a file and parse its contents.
324 */
325 download: true;
326 /**
327 * If defined, should be an object that describes the headers.
328 * @example { 'Authorization': 'token 123345678901234567890' }
329 * @default undefined
330 */
331 downloadRequestHeaders?: { [headerName: string]: string } | undefined;
332 /**
333 * Use POST request on the URL of the download option. The value passed will be set as the body of the request.
334 * @default undefined
335 */
336 downloadRequestBody?: Blob | BufferSource | FormData | URLSearchParams | string | undefined;
337 /**
338 * A boolean value passed directly into XMLHttpRequest's "withCredentials" property.
339 * @default undefined
340 */
341 withCredentials?: boolean | undefined;
342}
343
344interface ParseRemoteConfigStep<T = any> extends ParseRemoteConfigBase<T> {
345 /** @inheritdoc */
346 step(results: ParseStepResult<T>, parser: Parser): void;
347}
348interface ParseRemoteConfigNoStep<T = any> extends ParseRemoteConfigBase<T> {
349 /** @inheritdoc */
350 complete(results: ParseResult<T>, file: string): void;
351}
352
353// Remote parsing is async and thus must specify either `step` or `complete` (but may specify both)
354export type ParseRemoteConfig<T = any> = ParseRemoteConfigStep<T> | ParseRemoteConfigNoStep<T>;
355
356export interface UnparseConfig {
357 /**
358 * If `true`, forces all fields to be enclosed in quotes.
359 * If an array of `true`/`false` values, specifies which fields should be force-quoted (first boolean is for the first column, second boolean for the second column, ...).
360 * A function that returns a boolean values can be used to determine the quotes value of a cell.
361 * This function accepts the cell value and column index as parameters.
362 * Note that this option is ignored for `undefined`, `null` and `date-object` values.
363 * The option `escapeFormulae` also takes precedence over this.
364 *
365 * @default false
366 */
367 quotes?: boolean | boolean[] | ((value: any, columnIndex: number) => boolean) | undefined;
368 /**
369 * The character used to quote fields.
370 * @default '"'
371 */
372 quoteChar?: string | undefined;
373 /**
374 * The character used to escape `quoteChar` inside field values.
375 * @default '"'
376 */
377 escapeChar?: string | undefined;
378 /**
379 * The delimiting character. Multi-character delimiters are supported. It must not be found in `Papa.BAD_DELIMITERS`.
380 * @default ','
381 */
382 delimiter?: string | undefined;
383 /**
384 * If `false`, will omit the header row.
385 * If `data` is an array of arrays this option is ignored.
386 * If `data` is an array of objects the keys of the first object are the header row.
387 * If `data` is an object with the keys `fields` and `data` the `fields` are the header row.
388 * @default true
389 */
390 header?: boolean | undefined;
391 /**
392 * The character used to determine newline sequence.
393 * @default '\r\n'
394 */
395 newline?: string | undefined;
396 /**
397 * If `true`, lines that are completely empty (those which evaluate to an empty string) will be skipped.
398 * If set to `'greedy'`, lines that don't have any content (those which have only whitespace after parsing) will also be skipped.
399 * @default false
400 */
401 skipEmptyLines?: boolean | "greedy" | undefined;
402 /**
403 * If `data` is an array of objects this option can be used to manually specify the keys (columns) you expect in the objects.
404 * If not set the keys of the first objects are used as column.
405 * @default undefined
406 */
407 columns?: string[] | undefined;
408 /**
409 * If `true`, field values that begin with `=`, `+`, `-`, or `@`,
410 * will be prepended with a ` to defend against [injection attacks](https://www.contextis.com/en/blog/comma-separated-vulnerabilities),
411 * because Excel and LibreOffice will automatically parse such cells as formulae.
412 * @default false
413 */
414 escapeFormulae?: boolean | RegExp | undefined;
415}
416
417export interface UnparseObject<T> {
418 fields: string[];
419 data: T[];
420}
421
422/** Error structure */
423export interface ParseError {
424 /** A generalization of the error */
425 type: "Quotes" | "Delimiter" | "FieldMismatch";
426 /** Standardized error code */
427 code: "MissingQuotes" | "UndetectableDelimiter" | "TooFewFields" | "TooManyFields" | "InvalidQuotes";
428 /** Human-readable details */
429 message: string;
430 /** Row index of parsed data where error is */
431 row?: number | undefined;
432 /** Index within the row where error is */
433 index?: number | undefined;
434}
435
436export interface ParseMeta {
437 /** Delimiter used */
438 delimiter: string;
439 /** Line break sequence used */
440 linebreak: string;
441 /** Whether process was aborted */
442 aborted: boolean;
443 /** Array of field names */
444 fields?: string[] | undefined;
445 /** Whether preview consumed all input */
446 truncated: boolean;
447 cursor: number;
448}
449
450/**
451 * A parse result always contains three objects: data, errors, and meta.
452 * Data and errors are arrays, and meta is an object. In the step callback, the data array will only contain one element.
453 */
454export interface ParseStepResult<T> {
455 /**
456 * In the step callback, the data array will only contain one element.
457 */
458 data: T;
459 /** an array of errors. */
460 errors: ParseError[];
461 /**
462 * contains extra information about the parse, such as delimiter used,
463 * the newline sequence, whether the process was aborted, etc.
464 * Properties in this object are not guaranteed to exist in all situations.
465 */
466 meta: ParseMeta;
467}
468
469/**
470 * A parse result always contains three objects: data, errors, and meta.
471 * Data and errors are arrays, and meta is an object. In the step callback, the data array will only contain one element.
472 */
473export interface ParseResult<T> {
474 /**
475 * an array of rows. If header is false, rows are arrays; otherwise they are objects of data keyed by the field name.
476 */
477 data: T[];
478 /** an array of errors. */
479 errors: ParseError[];
480 /**
481 * contains extra information about the parse, such as delimiter used,
482 * the newline sequence, whether the process was aborted, etc.
483 * Properties in this object are not guaranteed to exist in all situations.
484 */
485 meta: ParseMeta;
486}
487
\No newline at end of file