import { calcBayesFactorFromMandU, calcPartialMatchWeightFromBayesFactor, calcPartialMatchWeightFromProbability, } from "./convertMatchScoreMetrics"; type OutputData = { comparison_name: string; sql_condition: string; label_for_charts: string; m_probability: number; u_probability: number; comparison_vector_value: number; probability_two_random_records_match: number; log2_bayes_factor: number; }; export function addComparisonVectorValue(settings) { const updatedSettings = { ...settings }; updatedSettings.comparisons = settings.comparisons.map((comparison) => { const nonNullLevels = comparison.comparison_levels.filter( (level) => !level.is_null_level ); let vectorValue = nonNullLevels.length - 1; const updatedLevels = comparison.comparison_levels.map((level, index) => { return { ...level, comparison_vector_value: level.is_null_level ? null : vectorValue--, }; }); return { ...comparison, comparison_levels: updatedLevels, }; }); return updatedSettings; } function calculatePartialMatchWeightsInitialRecord( probability: number ): OutputData { const log2_bayes_factor = calcPartialMatchWeightFromProbability(probability); return { comparison_name: "probability_two_random_records_match", sql_condition: "", label_for_charts: "", m_probability: 0, u_probability: 0, log2_bayes_factor: log2_bayes_factor, comparison_vector_value: 0, probability_two_random_records_match: probability, }; } export function settingsToPartialMatchWeightsData(settings): OutputData[] { const updatedSettings = addComparisonVectorValue(settings); const specialOutput = calculatePartialMatchWeightsInitialRecord( updatedSettings.probability_two_random_records_match ); const output: OutputData[] = [specialOutput]; for (const comparison of updatedSettings.comparisons) { for (const level of comparison.comparison_levels) { if (level.is_null_level) continue; const m = level.m_probability ?? 0; const u = level.u_probability ?? 0; const bayesFactor = calcBayesFactorFromMandU(m, u); const partial_match_weight = calcPartialMatchWeightFromBayesFactor(bayesFactor); output.push({ comparison_name: comparison.output_column_name, sql_condition: level.sql_condition, label_for_charts: level.label_for_charts, m_probability: m, u_probability: u, comparison_vector_value: level.comparison_vector_value, probability_two_random_records_match: updatedSettings.probability_two_random_records_match, log2_bayes_factor: partial_match_weight, }); } } return output; } function getComparisonLevels(settings, comparisonVector) { const result = {}; const updatedSettings = addComparisonVectorValue(settings); for (const [key, value] of Object.entries(comparisonVector)) { if (key.startsWith("γ_")) { const columnName = key.slice(2); const comparisonInfo = updatedSettings.comparisons.find( (comp) => comp.output_column_name === columnName ); if (comparisonInfo) { const comparisonLevel = comparisonInfo.comparison_levels.find( (level) => level.comparison_vector_value === value ); if (comparisonLevel) { result[key] = comparisonLevel; } } } } return result; } function getInitialWaterfallRecord(settings) { const priorProbability = settings.probability_two_random_records_match ?? 0; const priorBayesFactor = calcBayesFactorFromMandU( priorProbability, 1 - priorProbability ); const priorLog2BayesFactor = calcPartialMatchWeightFromBayesFactor(priorBayesFactor); return { column_name: "Prior", label_for_charts: "Starting match weight (prior)", sql_condition: null, log2_bayes_factor: priorLog2BayesFactor, bayes_factor: priorBayesFactor, comparison_vector_value: null, m_probability: null, u_probability: null, bayes_factor_description: null, value_l: "", value_r: "", term_frequency_adjustment: null, bar_sort_order: 0, record_number: 0, }; } function getFinalWaterfallRecord(output: any[]): any { const totalLog2BayesFactor = output.reduce( (sum, record) => sum + record.log2_bayes_factor, 0 ); const finalBayesFactor = Math.pow(2, totalLog2BayesFactor); return { column_name: "Final score", label_for_charts: "Final score", sql_condition: null, log2_bayes_factor: totalLog2BayesFactor, bayes_factor: finalBayesFactor, comparison_vector_value: null, m_probability: null, u_probability: null, bayes_factor_description: null, value_l: "", value_r: "", term_frequency_adjustment: null, bar_sort_order: 5, }; } export function settingsToWaterfall(settings, comparisonVector) { let comparisonLevels = getComparisonLevels(settings, comparisonVector); const output: any[] = []; const initialRecord = getInitialWaterfallRecord(settings); output.unshift(initialRecord); for (const [gammaKey, comparisonInfo] of Object.entries(comparisonLevels) as [ string, any ][]) { const columnName = gammaKey.slice(2); const valueL = comparisonVector[`${columnName}_l`]; const valueR = comparisonVector[`${columnName}_r`]; const m = comparisonInfo.m_probability ?? 0; const u = comparisonInfo.u_probability ?? 0; const bayesFactor = calcBayesFactorFromMandU(m, u); const log2BayesFactor = calcPartialMatchWeightFromBayesFactor(bayesFactor); output.push({ column_name: columnName, label_for_charts: comparisonInfo.label_for_charts, sql_condition: comparisonInfo.sql_condition, log2_bayes_factor: log2BayesFactor, bayes_factor: bayesFactor, comparison_vector_value: comparisonInfo.comparison_vector_value, m_probability: m, u_probability: u, bayes_factor_description: `If comparison level is \`${ comparisonInfo.label_for_charts }\` then comparison is ${bayesFactor.toFixed( 2 )} times more likely to be a match`, value_l: valueL, value_r: valueR, term_frequency_adjustment: false, bar_sort_order: 1, }); } const finalRecord = getFinalWaterfallRecord(output); output.push(finalRecord); return output; } export function transformComparisonLevelsToTable(settings) { let settings_with_cvv = addComparisonVectorValue(settings); let table = [ ["column_name", "scenario_name", "m_probability", "u_probability"], ]; settings_with_cvv.comparisons.forEach((comparison) => { const column_name = comparison.output_column_name; comparison.comparison_levels.forEach((level) => { const scenario_name = level.label_for_charts; const comparison_vector_value = level.comparison_vector_value; const m_probability = level.m_probability?.toString() || "N/A"; const u_probability = level.u_probability?.toString() || "N/A"; table.push([ column_name, scenario_name, comparison_vector_value, m_probability, u_probability, ]); }); }); return table; }