import { OpUnitType } from "dayjs"; import { matchAnyPattern, repeatedTimeunitPattern } from "../../utils/pattern"; import { findMostLikelyADYear } from "../../calculation/years"; import { TimeUnits } from "../../utils/timeunits"; export const WEEKDAY_DICTIONARY: { [word: string]: number } = { "sonntag": 0, "so": 0, "montag": 1, "mo": 1, "dienstag": 2, "di": 2, "mittwoch": 3, "mi": 3, "donnerstag": 4, "do": 4, "freitag": 5, "fr": 5, "samstag": 6, "sa": 6, }; export const MONTH_DICTIONARY: { [word: string]: number } = { "januar": 1, "jan": 1, "jan.": 1, "februar": 2, "feb": 2, "feb.": 2, "märz": 3, "maerz": 3, "mär": 3, "mär.": 3, "mrz": 3, "mrz.": 3, "april": 4, "apr": 4, "apr.": 4, "mai": 5, "juni": 6, "jun": 6, "jun.": 6, "juli": 7, "jul": 7, "jul.": 7, "august": 8, "aug": 8, "aug.": 8, "september": 9, "sep": 9, "sep.": 9, "sept": 9, "sept.": 9, "oktober": 10, "okt": 10, "okt.": 10, "november": 11, "nov": 11, "nov.": 11, "dezember": 12, "dez": 12, "dez.": 12, }; export const INTEGER_WORD_DICTIONARY: { [word: string]: number } = { "eins": 1, "zwei": 2, "drei": 3, "vier": 4, "fünf": 5, "fuenf": 5, "sechs": 6, "sieben": 7, "acht": 8, "neun": 9, "zehn": 10, "elf": 11, "zwölf": 12, "zwoelf": 12, }; export const TIME_UNIT_DICTIONARY: { [word: string]: OpUnitType } = { sec: "second", second: "second", seconds: "second", min: "minute", mins: "minute", minute: "minute", minutes: "minute", h: "hour", hr: "hour", hrs: "hour", hour: "hour", hours: "hour", day: "d", days: "d", week: "week", weeks: "week", month: "month", months: "month", y: "year", yr: "year", year: "year", years: "year", }; //----------------------------- export const NUMBER_PATTERN = `(?:${matchAnyPattern( INTEGER_WORD_DICTIONARY )}|[0-9]+|[0-9]+\\.[0-9]+|half(?:\\s*an?)?|an?\\b(?:\\s*few)?|few|several|a?\\s*couple\\s*(?:of)?)`; export function parseNumberPattern(match: string): number { const num = match.toLowerCase(); if (INTEGER_WORD_DICTIONARY[num] !== undefined) { return INTEGER_WORD_DICTIONARY[num]; } else if (num === "a" || num === "an") { return 1; } else if (num.match(/few/)) { return 3; } else if (num.match(/half/)) { return 0.5; } else if (num.match(/couple/)) { return 2; } else if (num.match(/several/)) { return 7; } return parseFloat(num); } //----------------------------- export const YEAR_PATTERN = `(?:[0-9]{1,4}(?:\\s*[vn]\\.?\\s*C(?:hr)?\\.?)?)`; export function parseYear(match: string): number { if (/v/i.test(match)) { // v.Chr. return -parseInt(match.replace(/[^0-9]+/gi, "")); } if (/n/i.test(match)) { // n.Chr. return parseInt(match.replace(/[^0-9]+/gi, "")); } const rawYearNumber = parseInt(match); return findMostLikelyADYear(rawYearNumber); } //----------------------------- const SINGLE_TIME_UNIT_PATTERN = `(${NUMBER_PATTERN})\\s{0,5}(${matchAnyPattern(TIME_UNIT_DICTIONARY)})\\s{0,5}`; const SINGLE_TIME_UNIT_REGEX = new RegExp(SINGLE_TIME_UNIT_PATTERN, "i"); export const TIME_UNITS_PATTERN = repeatedTimeunitPattern("", SINGLE_TIME_UNIT_PATTERN); export function parseTimeUnits(timeunitText): TimeUnits { const fragments = {}; let remainingText = timeunitText; let match = SINGLE_TIME_UNIT_REGEX.exec(remainingText); while (match) { collectDateTimeFragment(fragments, match); remainingText = remainingText.substring(match[0].length); match = SINGLE_TIME_UNIT_REGEX.exec(remainingText); } return fragments; } function collectDateTimeFragment(fragments, match) { const num = parseNumberPattern(match[1]); const unit = TIME_UNIT_DICTIONARY[match[2].toLowerCase()]; fragments[unit] = num; }