/// import Int64 from 'node-int64'; import parquet_thrift from '../gen-nodejs/parquet_types'; import * as parquet_shredder from './shred'; import * as parquet_schema from './schema'; import { BufferReaderOptions } from './bufferReader'; import { Parameter, PageData, ClientS3, ClientParameters, FileMetaDataExt, RowGroupExt, ColumnChunkExt } from './declare'; import { Options } from './codec/types'; /** * A parquet cursor is used to retrieve rows from a parquet file in order */ declare class ParquetCursor { metadata: FileMetaDataExt; envelopeReader: ParquetEnvelopeReader; schema: parquet_schema.ParquetSchema; columnList: Array>; rowGroup: Array; rowGroupIndex: number; cursorIndex: number; /** * Create a new parquet reader from the file metadata and an envelope reader. * It is usually not recommended to call this constructor directly except for * advanced and internal use cases. Consider using getCursor() on the * ParquetReader instead */ constructor(metadata: FileMetaDataExt, envelopeReader: ParquetEnvelopeReader, schema: parquet_schema.ParquetSchema, columnList: Array>); /** * Retrieve the next row from the cursor. Returns a row or NULL if the end * of the file was reached */ next(): Promise; /** * Rewind the cursor to the beginning of the file */ rewind(): void; } /** * A parquet reader allows retrieving the rows from a parquet file in order. * The basic usage is to create a reader and then retrieve a cursor/iterator * which allows you to consume row after row until all rows have been read. It is * important that you call close() after you are finished reading the file to * avoid leaking file descriptors. */ export declare class ParquetReader { envelopeReader: ParquetEnvelopeReader | null; metadata: FileMetaDataExt | null; schema: parquet_schema.ParquetSchema; /** * Open the parquet file pointed to by the specified path and return a new * parquet reader */ static openFile(filePath: string | Buffer | URL, options?: BufferReaderOptions): Promise; static openBuffer(buffer: Buffer, options?: BufferReaderOptions): Promise; /** * Open the parquet file from S3 using the supplied aws client and params * The params have to include `Bucket` and `Key` to the file requested * This function returns a new parquet reader */ static openS3(client: ClientS3, params: ClientParameters, options?: BufferReaderOptions): Promise; /** * Open the parquet file from a url using the supplied request module * params should either be a string (url) or an object that includes * a `url` property. * This function returns a new parquet reader */ static openUrl(params: Parameter | URL | string, options?: BufferReaderOptions): Promise; static openEnvelopeReader(envelopeReader: ParquetEnvelopeReader, opts?: BufferReaderOptions): Promise; /** * Create a new parquet reader from the file metadata and an envelope reader. * It is not recommended to call this constructor directly except for advanced * and internal use cases. Consider using one of the open{File,Buffer} methods * instead */ constructor(metadata: FileMetaDataExt, envelopeReader: ParquetEnvelopeReader, opts?: BufferReaderOptions); /** * Support `for await` iterators on the reader object * Uses `ParquetCursor` still under the hood. * * ```js * for await (const record of reader) { * console.log(record); * } * ``` */ [Symbol.asyncIterator](): AsyncGenerator<{}, void, unknown>; /** * Return a cursor to the file. You may open more than one cursor and use * them concurrently. All cursors become invalid once close() is called on * the reader object. * * The required_columns parameter controls which columns are actually read * from disk. An empty array or no value implies all columns. A list of column * names means that only those columns should be loaded from disk. */ getCursor(columnList?: Array>): ParquetCursor; /** * Return the number of rows in this file. Note that the number of rows is * not necessarily equal to the number of rows in each column. */ getRowCount(): Int64; /** * Returns the ParquetSchema for this file */ getSchema(): parquet_schema.ParquetSchema; /** * Returns the user (key/value) metadata for this file */ getMetadata(): Record; exportMetadata(indent: string | number | undefined): Promise; /** * Close this parquet reader. You MUST call this method once you're finished * reading rows */ close(): Promise; decodePages(buffer: Buffer, opts: Options): Promise; } export declare class ParquetEnvelopeReader { readFn: (offset: number, length: number, file?: string) => Promise; close: () => unknown; id: number; fileSize: Function | number; default_dictionary_size: number; metadata?: FileMetaDataExt; schema?: parquet_schema.ParquetSchema; static openFile(filePath: string | Buffer | URL, options?: BufferReaderOptions): Promise; static openBuffer(buffer: Buffer, options?: BufferReaderOptions): Promise; static openS3(client: ClientS3, params: ClientParameters, options?: BufferReaderOptions): Promise; static openUrl(url: Parameter | URL | string, options?: BufferReaderOptions): Promise; constructor(readFn: (offset: number, length: number, file?: string) => Promise, closeFn: () => unknown, fileSize: Function | number, options?: BufferReaderOptions, metadata?: FileMetaDataExt); read(offset: number, length: number, file?: string): Promise; readHeader(): Promise; getColumn(path: string | parquet_thrift.ColumnChunk, row_group: RowGroupExt | number | string | null): ColumnChunkExt; getAllColumnChunkDataFor(paths: Array, row_groups?: Array): { rowGroupIndex: number; column: ColumnChunkExt; }[]; readOffsetIndex(path: string | ColumnChunkExt, row_group: RowGroupExt | number | null, opts: Options): Promise; readColumnIndex(path: string | ColumnChunkExt, row_group: RowGroupExt | number, opts: Options): Promise; readPage(column: ColumnChunkExt, page: parquet_thrift.PageLocation | number, records: Array>, opts: Options): Promise[]>; readRowGroup(schema: parquet_schema.ParquetSchema, rowGroup: RowGroupExt, columnList: Array>): Promise; readColumnChunk(schema: parquet_schema.ParquetSchema, colChunk: ColumnChunkExt, opts?: Options): Promise; readFooter(): Promise; } export {};