/// <reference types="node" />
/// <reference types="node" />
import { ParquetRecord, ParquetValueArray } from './declare';
import { ParquetSchema } from './schema';
import { ColumnChunk, FileMetaData, RowGroup, ColumnMetaData } from './thrift';
/**
 * Variation of ParquetData which always has Int32Array for dLevels and rLevels.
 */
export interface ParquetReadData {
    dLevels: Int32Array;
    rLevels: Int32Array;
    values: ParquetValueArray;
    count: number;
}
/**
 * Variation of ParquetBuffer which always has Int32Array for dLevels and rLevels.
 */
export interface ParquetReadBuffer {
    rowCount: number;
    columnData: Record<string, ParquetReadData>;
}
/**
 * A parquet cursor is used to retrieve rows from a parquet file in order
 */
export declare class ParquetCursor<T> implements AsyncIterable<T> {
    metadata: FileMetaData;
    envelopeReader: ParquetEnvelopeReader;
    schema: ParquetSchema;
    columnList: string[][];
    rowGroup: ParquetRecord[];
    rowGroupIndex: number;
    cursorIndex: number;
    /**
     * Create a new parquet reader from the file metadata and an envelope reader.
     * It is usually not recommended to call this constructor directly except for
     * advanced and internal use cases. Consider using getCursor() on the
     * ParquetReader instead
     */
    constructor(metadata: FileMetaData, envelopeReader: ParquetEnvelopeReader, schema: ParquetSchema, columnList: string[][]);
    /**
     * Retrieve the next row from the cursor. Returns a row or NULL if the end
     * of the file was reached
     */
    next<T = any>(): Promise<T>;
    /**
     * Rewind the cursor the the beginning of the file
     */
    rewind(): void;
    /**
     * Implement AsyncIterable
     */
    [Symbol.asyncIterator](): AsyncIterator<T>;
}
/**
 * A parquet reader allows retrieving the rows from a parquet file in order.
 * The basic usage is to create a reader and then retrieve a cursor/iterator
 * which allows you to consume row after row until all rows have been read. It is
 * important that you call close() after you are finished reading the file to
 * avoid leaking file descriptors.
 */
export declare class ParquetReader<T> implements AsyncIterable<T> {
    /**
     * Open the parquet file pointed to by the specified path and return a new
     * parquet reader
     */
    static openFile<T>(filePath: string): Promise<ParquetReader<T>>;
    static openBuffer<T>(buffer: Buffer): Promise<ParquetReader<T>>;
    metadata: FileMetaData;
    envelopeReader: ParquetEnvelopeReader;
    schema: ParquetSchema;
    /**
     * Create a new parquet reader from the file metadata and an envelope reader.
     * It is not recommended to call this constructor directly except for advanced
     * and internal use cases. Consider using one of the open{File,Buffer} methods
     * instead
     */
    constructor(metadata: FileMetaData, envelopeReader: ParquetEnvelopeReader);
    /**
     * Return a cursor to the file. You may open more than one cursor and use
     * them concurrently. All cursors become invalid once close() is called on
     * the reader object.
     *
     * The required_columns parameter controls which columns are actually read
     * from disk. An empty array or no value implies all columns. A list of column
     * names means that only those columns should be loaded from disk.
     */
    getCursor(): ParquetCursor<T>;
    getCursor<K extends keyof T>(columnList: (K | K[])[]): ParquetCursor<Pick<T, K>>;
    getCursor(columnList: (string | string[])[]): ParquetCursor<Partial<T>>;
    /**
     * Get an iterable over a single column.  The column is specified as an array of
     * strings in order to support nested records.
     *
     * The path should not reference a nested record column.
     *
     * When a column is repeated the iterable will an array for each row.
     *
     * When a column is optional the iterable will produce null for any row missing
     * the value.
     *
     * If a column is repeated and also nested inside another repeated object, then an array of arrays
     * is returned for each row in the dataset.
     *
     * If a column is optional and also nested inside a repeated nested object, then it will be in an array
     * where the array elements may be null.
     *
     * This means you can iterate multiple of these in parallel to walk multiple
     * columns at once and they will stay in sync as long as the calls to next()
     * are made in sync.
     *
     * @param columnPath
     */
    getColumnValues(columnPath: string[]): AsyncIterable<any>;
    /**
     * Return the number of rows in this file. Note that the number of rows is
     * not neccessarily equal to the number of rows in each column.
     */
    getRowCount(): number;
    /**
     * Returns the ParquetSchema for this file
     */
    getSchema(): ParquetSchema;
    /**
     * Returns the user (key/value) metadata for this file
     */
    getMetadata(): Record<string, string>;
    /**
     * Returns the column metadata for all columns.
     */
    getColumnMetadata(): Record<string, ColumnMetaData[]>;
    /**
     * Close this parquet reader. You MUST call this method once you're finished
     * reading rows
     */
    close(): Promise<void>;
    /**
     * Implement AsyncIterable
     */
    [Symbol.asyncIterator](): AsyncIterator<T>;
}
/**
 * The parquet envelope reader allows direct, unbuffered access to the individual
 * sections of the parquet file, namely the header, footer and the row groups.
 * This class is intended for advanced/internal users; if you just want to retrieve
 * rows from a parquet file use the ParquetReader instead
 */
export declare class ParquetEnvelopeReader {
    read: (position: number, length: number) => Promise<Buffer>;
    close: () => Promise<void>;
    fileSize: number;
    static openFile(filePath: string): Promise<ParquetEnvelopeReader>;
    /**
     * Read parquet data from an in-memory buffer.  This provides an asynchronous
     * interface compatible with reading from a file.
     *
     * Note that you can also use ParquetEnvelopeBufferReader if you don't need your code to be able
     * to handle files and buffers both.  It may offer some performance benefit because it does not yield
     * to the event loop in between operations.
     */
    static openBuffer(buffer: Buffer): Promise<ParquetEnvelopeReader>;
    constructor(read: (position: number, length: number) => Promise<Buffer>, close: () => Promise<void>, fileSize: number);
    readHeader(): Promise<void>;
    readRowGroup(schema: ParquetSchema, rowGroup: RowGroup, columnList: string[][]): Promise<ParquetReadBuffer>;
    readColumnChunk(schema: ParquetSchema, colChunk: ColumnChunk): Promise<ParquetReadData>;
    readFooter(): Promise<FileMetaData>;
}
/**
 * A parquet cursor is used to retrieve rows from a parquet file in order
 */
export declare class ParquetBufferCursor<T> implements Iterable<T> {
    metadata: FileMetaData;
    envelopeReader: ParquetEnvelopeBufferReader;
    schema: ParquetSchema;
    columnList: string[][];
    rows: ParquetRecord[];
    rowsIndex: number;
    cursorIndex: number;
    /**
     * Create a new parquet reader from the file metadata and an envelope reader.
     * It is usually not recommended to call this constructor directly except for
     * advanced and internal use cases. Consider using getCursor() on the
     * ParquetReader instead
     */
    constructor(metadata: FileMetaData, envelopeReader: ParquetEnvelopeBufferReader, schema: ParquetSchema, columnList: string[][]);
    /**
     * Retrieve the next row from the cursor. Returns a row or NULL if the end
     * of the file was reached
     */
    next<T = any>(): T;
    /**
     * Rewind the cursor the the beginning of the file
     */
    rewind(): void;
    /**
     * Implement Iterable
     */
    [Symbol.iterator](): Iterator<T>;
}
/**
 * A parquet reader allows retrieving the rows from a parquet file in order.
 * The basic usage is to create a reader and then retrieve a cursor/iterator
 * which allows you to consume row after row until all rows have been read. It is
 * important that you call close() after you are finished reading the file to
 * avoid leaking file descriptors.
 */
export declare class ParquetBufferReader<T> implements Iterable<T> {
    buffer: Buffer;
    static openBuffer<T>(buffer: Buffer): ParquetBufferReader<T>;
    metadata: FileMetaData;
    envelopeReader: ParquetEnvelopeBufferReader;
    schema: ParquetSchema;
    /**
     * Create a new parquet reader from a buffer.  This version of ParquetReader
     * runs synchronously so it may be more efficient when reading from a Buffer.
     *
     * However, it doesn't have a compatible API with ParquetReader.
     */
    constructor(buffer: Buffer);
    /**
     * Return a cursor to the buffer. You may open more than one cursor and use
     * them concurrently.
     *
     * The required_columns parameter controls which columns are actually read
     * from disk. An empty array or no value implies all columns. A list of column
     * names means that only those columns should be loaded from disk.
     *
     * When the schema has nested records, you will need to specify each column as an array
     * of strings specifying the "path" to the actual leaf column to fetch.
     */
    getCursor(): ParquetBufferCursor<T>;
    getCursor<K extends keyof T>(columnList: (K | K[])[]): ParquetBufferCursor<Pick<T, K>>;
    getCursor(columnList: (string | string[])[]): ParquetBufferCursor<Partial<T>>;
    /**
     * Get an iterable over a single column.  The column is specified as an array of
     * strings in order to support nested records.
     *
     * The path should not reference a nested record column.
     *
     * When a column is repeated the iterable will an array for each row.
     *
     * When a column is optional the iterable will produce null for any row missing
     * the value.
     *
     * If a column is repeated and also nested inside another repeated object, then an array of arrays
     * is returned for each row in the dataset.
     *
     * If a column is optional and also nested inside a repeated nested object, then it will be in an array
     * where the array elements may be null.
     *
     * This means you can iterate multiple of these in parallel to walk multiple
     * columns at once and they will stay in sync as long as the calls to next()
     * are made in sync.
     *
     * @param columnPath
     */
    getColumnValues(columnPath: string[]): Iterable<any>;
    /**
     * Return the number of rows in this file. Note that the number of rows is
     * not necessarily equal to the number of rows in each column.
     */
    getRowCount(): number;
    /**
     * Returns the ParquetSchema for this file
     */
    getSchema(): ParquetSchema;
    /**
     * Returns the user (key/value) metadata for this file
     */
    getMetadata(): Record<string, string>;
    /**
     * Implement Iterable
     */
    [Symbol.iterator](): Iterator<T>;
}
export declare class ParquetEnvelopeBufferReader {
    buffer: Buffer;
    constructor(buffer: Buffer);
    read(offset: number, length: number): Buffer;
    readHeader(): void;
    readRowGroup(schema: ParquetSchema, rowGroup: RowGroup, columnList: string[][]): ParquetReadBuffer;
    readColumnChunk(schema: ParquetSchema, colChunk: ColumnChunk): ParquetReadData;
    readFooter(): FileMetaData;
}