/** * Copyright (c) Microsoft Corporation. All rights reserved. * Licensed under the MIT License. */ /** * Represents a token extracted from a text string. */ export interface Token { /** * The start index of the token in the original text. */ start: number; /** * The end index of the token in the original text. */ end: number; /** * The original text of the token. */ text: string; /** * The normalized (lowercase) version of the token text. */ normalized: string; } /** * Signature for an alternate word breaker. * * @param text The text to be tokenized. * @param locale (Optional) locale of the text if known. */ export type TokenizerFunction = (text: string, locale?: string) => Token[]; /** * Simple tokenizer that breaks on spaces and punctuation. * * @param text The input text. * @param _locale Optional, identifies the locale of the input text. * @returns A list of tokens. */ export declare function defaultTokenizer(text: string, _locale?: string): Token[];