import { Env } from "../Env"; import { Day } from "../Time"; import { Config, Index } from "../Types"; import { FileInput } from "../parse/File"; import { Parser } from "../parse/Parser"; import { PAuthor, PCall, PChannel, PGuild, RawID } from "../parse/Types"; import { ChannelMessages } from "../process/ChannelMessages"; import { IndexedMap } from "../process/IndexedMap"; import { MessageProcessor } from "../process/MessageProcessor"; import { Database, Emoji } from "../process/Types"; /** * Builds the Database object from input files. * * ⚠️ You probably want to use `generateDatabase` instead of manually creating a DatabaseBuilder. */ export declare class DatabaseBuilder { private readonly config; private readonly env; parser: Parser; guilds: IndexedMap; channels: IndexedMap; authors: IndexedMap; calls: IndexedMap; words: IndexedMap; emojis: IndexedMap; mentions: IndexedMap; domains: IndexedMap; replyIds: RawID[]; /** Each channel has its own ChannelMessages instance */ messagesInChannel: Map; /** Global messages processor */ messageProcessor: MessageProcessor; get numChannels(): number; get numAuthors(): number; get numMessages(): number; constructor(config: Config, env: Env); private stopwords?; /** Initialize static data. Must be called before `processFiles` */ init(): Promise; /** Process the provided files */ processFiles(files: FileInput[]): Promise; /** Process the provided file. Throws in case of error. */ private processFile; /** Goes through all ChannelMessage and process all the messages that remain pending */ private processPendingMessages; /** Singnals EOF to all ChannelMessages. MUST be called */ private markEOF; minDate: Day; maxDate: Day; guildCounts: number[]; channelCounts: number[]; authorCounts: number[]; wordsCounts: number[]; langCounts: number[]; guildsRank: Index[]; channelsRank: Index[]; authorsRank: Index[]; wordsRank: Index[]; /** We want to store participants for DM chats to later override the channel name with "Alice & Bob" */ dmParticipants: Map; private postProcessMessage; /** Detects languages that appear more than a threshold */ private detectLanguages; /** Filter words. Skip unfrequent words and stopwords. */ private filterWords; /** * [+] Why indexing? * While we process messages it is more efficient to store an index to the actual author/word/whatever * than storing a full RawID that may potentially be a large string. * [+] Why the burden of reindexing? * We want to sort authors, channels and such by the number of messages they have, so it doesn't need to * be done in the UI (also all indexes end up being nice :) ) * [+] How? * During processing we use the index that IndexedMap provides. After all processing is done we use the * counts to generate a mapping between the old index and the "final" ones with `rank`. */ private countAndReindex; /** Makes final objects for Guilds, Channels and Authors */ private makeFinalObjects; /** Transforms a parser PGuild into a final Guild */ private makeFinalGuild; /** Transforms a parser PChannel into a final Channel */ private makeFinalChannel; /** Transforms a parser PAuthor into a final Author */ private makeFinalAuthor; private compactMessagesData; private processCalls; build(): Database; /** Builds the final report title. Do we want to have this here? */ private buildTitle; }