import pako from 'pako'; import { IHasher } from 'hash-wasm/dist/lib/WASMInterface.js'; import { WritableStreamBuffer } from 'stream-buffers'; import { Argv } from 'yargs'; type SourceReader = { read: Function; }; type SourceReadable = { getReader: (...args: any) => { read: Function; }; }; type Source = SourceReader | SourceReadable | AsyncIterable | Iterable; type StreamResult = { filename: string; reader: AsyncIterable; }; type StreamResults = StreamResult[]; type IndexerOffsetLength = { offset: number; recordLength: number; }; type Request = { method: string; url: string; headers: Map | Headers; postData?: Uint8Array | string | undefined | null; requestBody?: any; }; type Response = Request; declare class NoConcatInflator extends pako.Inflate { reader: T; ended: boolean; chunks: Uint8Array[]; constructor(options: pako.InflateOptions, reader: T); onEnd(status: pako.ReturnCodes): void; } declare abstract class BaseAsyncIterReader { static readFully(iter: AsyncIterable | Iterable): Promise; abstract [Symbol.asyncIterator](): AsyncIterator; getReadableStream(): ReadableStream; readFully(): Promise; abstract readlineRaw(maxLength?: number): Promise; readline(maxLength?: number): Promise; iterLines(maxLength?: number): AsyncGenerator; } type AsyncIterReaderOpts = { raw: boolean; }; declare class AsyncIterReader extends BaseAsyncIterReader { compressed: string | null; opts: AsyncIterReaderOpts; inflator: NoConcatInflator | null; _sourceIter: AsyncIterator; lastValue: Uint8Array | null; errored: boolean; _savedChunk: Uint8Array | null; _rawOffset: number; _readOffset: number; numChunks: number; constructor(streamOrIter: Source, compressed?: string | null, dechunk?: boolean); _loadNext(): Promise; dechunk(source: AsyncIterable): AsyncIterator; unread(chunk: Uint8Array): void; _next(): Promise; _push(value: Uint8Array): void; _getNextChunk(original?: Uint8Array): Uint8Array | null | undefined; [Symbol.asyncIterator](): AsyncGenerator; readlineRaw(maxLength?: number): Promise; readFully(): Promise; readSize(sizeLimit: number): Promise; skipSize(sizeLimit: number): Promise; _readOrSkip(sizeLimit?: number, skip?: boolean): Promise; getReadOffset(): number; getRawOffset(): number; getRawLength(prevOffset: number): number; static fromReadable(source: Readable): { [Symbol.asyncIterator](): AsyncGenerator; }; static fromIter(source: Iterable): { [Symbol.asyncIterator](): AsyncGenerator; }; } declare class LimitReader extends BaseAsyncIterReader { sourceIter: AsyncIterReader; length: number; limit: number; skip: number; constructor(streamIter: AsyncIterReader, limit: number, skip?: number); setLimitSkip(limit: number, skip?: number): void; [Symbol.asyncIterator](): AsyncGenerator; readlineRaw(maxLength?: number): Promise; skipFully(): Promise; } declare function binaryToString(data: Uint8Array | string | undefined | null): string; declare function rxEscape(string: string): string; declare function getSurt(url: string): string; declare function postToGetUrl(request: Request, maxQuerySize?: number): boolean; declare function appendRequestQuery(url: string, query: string, method: string): string; declare function jsonToQueryParams(json: unknown, ignoreInvalid?: boolean): URLSearchParams; declare function mfdToQueryParams(mfd: string | Uint8Array | null | undefined, contentType: string): URLSearchParams; declare function jsonToQueryString(json?: string | Record | undefined | null, ignoreInvalid?: boolean): string; declare function mfdToQueryString(mfd: string | Uint8Array | undefined | null, contentType: string): string; declare function concatChunks(chunks: Uint8Array[], size: number): Uint8Array; declare function splitChunk(chunk: Uint8Array, inx: number): [Uint8Array, Uint8Array]; declare function UTFToLatin1(value: string): string; declare function latin1ToUTF(str: string): string; declare const WARC_ALLOWED_MULTI_VALUE_HEADERS: string[]; declare function isValidMultiValueHeaderName(name: string): boolean; declare function multiValueHeader(name: string, value: string[]): string; declare class HeadersMultiMap extends Map { constructor(headersInit?: HeadersInit); isMultiValue(name: string, value: string): boolean; getMultiple(name: string): string[] | undefined; append(name: string, value: string): void; [Symbol.iterator](): IterableIterator<[string, string]>; } declare const CRLF: Uint8Array; declare const CRLFCRLF: Uint8Array; declare class StatusAndHeaders { statusline: string; headers: HeadersMultiMap | Headers; readonly reencodeHeaders?: Set; constructor({ statusline, headers, reencodeHeaders, }: { statusline: string; headers: HeadersMultiMap | Headers; reencodeHeaders?: Set; }); toString(): string; iterSerialize(encoder: TextEncoder): AsyncGenerator; _protocol: string | undefined; _statusCode: number | string | undefined; _statusText: string | undefined; _parseResponseStatusLine(): void; get statusCode(): string | number | undefined; get protocol(): string | undefined; get statusText(): string | undefined; _method: string | undefined; _requestPath: string | undefined; _parseRequestStatusLine(): void; get method(): string | undefined; get requestPath(): string | undefined; } declare class StatusAndHeadersParser { reencodeHeaders: Set; parse(reader: AsyncIterReader, { headersClass, firstLine, }?: { firstLine?: string; headersClass: typeof HeadersMultiMap | typeof Headers; }): Promise; setHeader(name: string, value: string, headers: Headers | HeadersMultiMap, reencoded?: boolean): void; } declare function indexOfDoubleCRLF(buffer: Uint8Array, iter: AsyncIterator): Promise; declare function readToDoubleCRLF(reader: AsyncIterReader): Promise; declare const WARC_1_1 = "WARC/1.1"; declare const WARC_1_0 = "WARC/1.0"; type WARCType = "warcinfo" | "response" | "resource" | "request" | "metadata" | "revisit" | "conversion" | "continuation"; type WARCRecordOpts = { url?: string; date?: string; type?: WARCType; warcHeaders?: Record | [string, string][]; filename?: string; httpHeaders?: HeadersInit; statusline?: string; warcVersion?: typeof WARC_1_0 | typeof WARC_1_1; keepHeadersCase?: boolean; refersToUrl?: string; refersToDate?: string; }; declare class WARCRecord extends BaseAsyncIterReader { static create({ url, date, type, warcHeaders, filename, httpHeaders, statusline, warcVersion, keepHeadersCase, refersToUrl, refersToDate, }?: WARCRecordOpts, reader?: AsyncIterable | Iterable): WARCRecord; static createWARCInfo(opts: WARCRecordOpts | undefined, info: Record): WARCRecord; warcHeaders: StatusAndHeaders; _reader: AsyncIterable | Iterable; _contentReader: BaseAsyncIterReader | null; payload: Uint8Array | null; httpHeaders: StatusAndHeaders | null; consumed: "content" | "raw" | "skipped" | ""; _offset: number | undefined; _length: number; _urlkey: string; constructor({ warcHeaders, reader, }: { warcHeaders: StatusAndHeaders; reader: AsyncIterable | Iterable; }); getResponseInfo(): { headers: Headers | HeadersMultiMap; status: string | number | undefined; statusText: string | undefined; } | null; fixUp(): void; readFully(isContent?: boolean): Promise; get reader(): AsyncIterable | Iterable; get contentReader(): AsyncIterable | Iterable; _createDecodingReader(source: Source): AsyncIterReader; readlineRaw(maxLength?: number): Promise; contentText(): Promise; [Symbol.asyncIterator](): AsyncGenerator; skipFully(): Promise; warcHeader(name: string): string | null | undefined; get warcType(): WARCType; get warcTargetURI(): string | null | undefined; get warcDate(): string | null | undefined; get warcRefersToTargetURI(): string | null | undefined; get warcRefersToDate(): string | null | undefined; get warcPayloadDigest(): string | null | undefined; get warcBlockDigest(): string | null | undefined; get warcContentType(): string | null | undefined; get warcContentLength(): number; get warcConcurrentTo(): string[] | undefined; } type WARCParserOpts = { keepHeadersCase?: boolean; parseHttp?: boolean; }; declare class WARCParser implements IndexerOffsetLength { static parse(source: Source, options?: WARCParserOpts): Promise; static iterRecords(source: Source, options?: WARCParserOpts): AsyncGenerator; _offset: number; _warcHeadersLength: number; _headersClass: typeof HeadersMultiMap | typeof Headers; _parseHttp: boolean; _reader: AsyncIterReader; _record: WARCRecord | null; constructor(source: Source, { keepHeadersCase, parseHttp }?: WARCParserOpts); readToNextRecord(): Promise; _initRecordReader(warcHeaders: StatusAndHeaders): LimitReader; parse(): Promise; get offset(): number; get recordLength(): number; [Symbol.asyncIterator](): AsyncGenerator; _addHttpHeaders(record: WARCRecord, headersParser: StatusAndHeadersParser): Promise; } type WARCSerializerOpts = { gzip?: boolean; digest?: { algo?: AlgorithmIdentifier; prefix?: string; base32?: boolean; }; preferPako?: boolean; }; declare abstract class BaseSerializerBuffer { abstract write(chunk: Uint8Array): void; abstract readAll(): AsyncIterable; abstract purge(): void; } declare class SerializerInMemBuffer extends BaseSerializerBuffer { buffers: Uint8Array[]; write(chunk: Uint8Array): void; readAll(): AsyncIterable; purge(): void; } declare class WARCSerializer extends BaseAsyncIterReader { gzip: boolean; digestAlgo: AlgorithmIdentifier; digestAlgoPrefix: string; digestBase32: boolean; preferPako: boolean; record: WARCRecord; externalBuffer: BaseSerializerBuffer | undefined; private _alreadyDigested; blockHasher: IHasher | null; payloadHasher: IHasher | null; httpHeadersBuff: Uint8Array | null; warcHeadersBuff: Uint8Array | null; static serialize(record: WARCRecord, opts?: WARCSerializerOpts, externalBuffer?: BaseSerializerBuffer): Promise; constructor(record: WARCRecord, opts?: WARCSerializerOpts, externalBuffer?: BaseSerializerBuffer); [Symbol.asyncIterator](): AsyncGenerator; readlineRaw(maxLength?: number): Promise; pakoCompress(): AsyncGenerator; streamCompress(cs: CompressionStream): AsyncGenerator; newHasher(): Promise; getDigest(hasher: IHasher): string; digestRecord({ recompute, returnPayloadOnlySize, payloadDigestForRevisit, }?: { recompute?: boolean | undefined; returnPayloadOnlySize?: boolean | undefined; payloadDigestForRevisit?: string | undefined; }): Promise; generateRecord(): AsyncGenerator; } declare const indexCommandArgs: (yarg: Argv) => Argv<{ filenames: string[]; } & { fields: string[]; }>; type IndexCommandArgs = Awaited["argv"]>; declare const cdxIndexCommandArgs: (yarg: Argv) => Argv<{ filenames: string[]; } & { all: boolean | undefined; } & { format: string; } & { noSurt: boolean | undefined; } & { fields: string[]; }>; type CdxIndexCommandArgs = Awaited["argv"]>; declare const DEFAULT_FIELDS: string[]; declare const DEFAULT_MAX_QUERY_SIZE = 4096; declare abstract class BaseIndexer { opts: Partial; fields: string[]; reqFields: string[]; parseHttp: boolean; constructor(opts?: Partial, defaultFields?: string[]); serialize(result: Record): string; write(result: Record, out: WritableStreamBuffer | NodeJS.WriteStream): void; writeAll(files: StreamResults, out: WritableStreamBuffer | NodeJS.WriteStream): Promise; iterIndex(files: StreamResults): AsyncGenerator, void, unknown>; iterRecords(parser: WARCParser, filename: string): AsyncGenerator, void, unknown>; filterRecord?(record: WARCRecord): boolean; indexRecord(record: WARCRecord, indexerOffset: IndexerOffsetLength, filename: string): Record | null; setField(field: string, record: WARCRecord, result: Record): void; getField(field: string, record: WARCRecord): string | number | null | undefined; } declare class Indexer extends BaseIndexer { constructor(opts?: Partial, defaultFields?: string[]); } declare const DEFAULT_CDX_FIELDS: string[]; declare const DEFAULT_LEGACY_CDX_FIELDS: string[]; interface CDXAndRecord { cdx: Record; record: WARCRecord; reqRecord: WARCRecord | null; } declare class CDXIndexer extends Indexer { includeAll: boolean; overrideIndexForAll: boolean; noSurt: boolean; _lastRecord: WARCRecord | null; constructor(opts?: Partial); iterRecords(parser: WARCParser, filename: string): AsyncGenerator, void, unknown>; filterRecord(record: WARCRecord): boolean; indexRecord(record: WARCRecord | null, indexOffset: IndexerOffsetLength, filename: string): Record | null; indexRecordPair(record: WARCRecord, reqRecord: WARCRecord | null, indexOffset: IndexerOffsetLength, filename: string, maxQuerySize?: number): Record | null; serializeCDXJ(result: Record): string; serializeCDX11(result: Record): string; getField(field: string, record: WARCRecord): string | number | null | undefined; } declare class CDXAndRecordIndexer extends CDXIndexer { constructor(opts?: Partial); indexRecordPair(record: WARCRecord, reqRecord: WARCRecord | null, indexOffset: IndexerOffsetLength, filename: string): CDXAndRecord | null; } declare function isRequestHeader(header: string): boolean; export { AsyncIterReader, AsyncIterReaderOpts, BaseAsyncIterReader, BaseSerializerBuffer, CDXAndRecord, CDXAndRecordIndexer, CDXIndexer, CRLF, CRLFCRLF, DEFAULT_CDX_FIELDS, DEFAULT_FIELDS, DEFAULT_LEGACY_CDX_FIELDS, DEFAULT_MAX_QUERY_SIZE, HeadersMultiMap, Indexer, IndexerOffsetLength, LimitReader, NoConcatInflator, Request, Response, SerializerInMemBuffer, Source, SourceReadable, SourceReader, StatusAndHeaders, StatusAndHeadersParser, StreamResult, StreamResults, UTFToLatin1, WARCParser, WARCParserOpts, WARCRecord, WARCRecordOpts, WARCSerializer, WARCSerializerOpts, WARCType, WARC_1_0, WARC_1_1, WARC_ALLOWED_MULTI_VALUE_HEADERS, appendRequestQuery, binaryToString, concatChunks, getSurt, indexOfDoubleCRLF, isRequestHeader, isValidMultiValueHeaderName, jsonToQueryParams, jsonToQueryString, latin1ToUTF, mfdToQueryParams, mfdToQueryString, multiValueHeader, postToGetUrl, readToDoubleCRLF, rxEscape, splitChunk };