import { addComparisonVectorValue } from "./settings_transformers"; import { calcBayesFactorFromMandU, calcPartialMatchWeightFromBayesFactor, calcPartialMatchWeightFromProbability, } from "./convertMatchScoreMetrics"; export function createComparisonRows( tableData: any[], columns_to_compare: string[], unique_id_column_name: string = "uid" ) { const comparisonRows: any[] = []; for (let i = 0; i < tableData.length; i++) { for (let j = i + 1; j < tableData.length; j++) { const row1 = tableData[i]; const row2 = tableData[j]; const comparisonRow: { [key: string]: any } = {}; comparisonRow[`${unique_id_column_name}_l`] = row1[unique_id_column_name]; comparisonRow[`${unique_id_column_name}_r`] = row2[unique_id_column_name]; for (const prop of columns_to_compare) { comparisonRow[`${prop}_l`] = row1[prop]; comparisonRow[`${prop}_r`] = row2[prop]; } comparisonRows.push(comparisonRow); } } return comparisonRows; } type ComparisonFunc = (...args: any[]) => any; type Dict = { [key: string]: any }; export function applyComparisonFunctions( data: Dict[], funcs: { [key: string]: ComparisonFunc } ): Dict[] { return data.map((row) => { const newRow: Dict = {}; for (const key in row) { const baseKey = key.endsWith("_l") ? key.slice(0, -2) : null; if (baseKey && funcs.hasOwnProperty(baseKey)) { const leftKey = `${baseKey}_l`; const rightKey = `${baseKey}_r`; const gammaKey = `γ_${baseKey}`; newRow[leftKey] = row[leftKey]; newRow[rightKey] = row[rightKey]; newRow[gammaKey] = funcs[baseKey](row[leftKey], row[rightKey]); } else { newRow[key] = row[key]; } } return newRow; }); } export function createComparisonRowsLinkOnly( multipleTableData: any[], columns_to_compare: string[], unique_id_column_name: string = "uid", source_dataset_column_name: string = "source_dataset" ) { const comparisonRows: any[] = []; const allData: any[] = []; // Concatenate tables and add table identifiers multipleTableData.forEach((tableData, tableIndex) => { tableData.forEach((row) => { allData.push({ ...row, [source_dataset_column_name]: tableIndex }); }); }); for (let i = 0; i < allData.length; i++) { for (let j = i + 1; j < allData.length; j++) { const row1 = allData[i]; const row2 = allData[j]; // Skip comparisons within the same table if (row1[source_dataset_column_name] === row2[source_dataset_column_name]) continue; const comparisonRow: { [key: string]: any } = {}; comparisonRow[`${unique_id_column_name}_l`] = row1[unique_id_column_name]; comparisonRow[`${unique_id_column_name}_r`] = row2[unique_id_column_name]; comparisonRow[`${source_dataset_column_name}_l`] = row1[source_dataset_column_name]; comparisonRow[`${source_dataset_column_name}_r`] = row2[source_dataset_column_name]; for (const prop of columns_to_compare) { comparisonRow[`${prop}_l`] = row1[prop]; comparisonRow[`${prop}_r`] = row2[prop]; } comparisonRows.push(comparisonRow); } } return comparisonRows; } export function addModelParametersToComparisonVector( comparisonVectorRows, settings ) { let settings_with_cvv = addComparisonVectorValue(settings); const prior_ω = calcPartialMatchWeightFromProbability( settings.probability_two_random_records_match ); return comparisonVectorRows.map((originalRow) => { const newRow = {}; // Common fields like uid_l, uid_r, etc. for (const field of [ "uid_l", "uid_r", "source_dataset_l", "source_dataset_r", ]) { newRow[field] = originalRow[field]; } newRow["ω_prior"] = prior_ω; settings_with_cvv.comparisons.forEach((comparison) => { const output_column_name = comparison.output_column_name; const gamma_value = originalRow[`γ_${output_column_name}`]; // Fields for each output_column_name, in the desired order newRow[`${output_column_name}_l`] = originalRow[`${output_column_name}_l`]; newRow[`${output_column_name}_r`] = originalRow[`${output_column_name}_r`]; newRow[`γ_${output_column_name}`] = gamma_value; const level = comparison.comparison_levels.find( (level) => level.comparison_vector_value === gamma_value ); if (level) { newRow[`m_${output_column_name}`] = level.m_probability; newRow[`u_${output_column_name}`] = level.u_probability; newRow[`bf_${output_column_name}`] = calcBayesFactorFromMandU( level.m_probability, level.u_probability ); newRow[`ω_${output_column_name}`] = calcPartialMatchWeightFromBayesFactor( newRow[`bf_${output_column_name}`] ); } }); // Compute ω_final_match_weight let omegaSum = 0; for (const [key, value] of Object.entries(newRow)) { if (key.startsWith("ω_")) { omegaSum += value as number; } } newRow["ω_final_match_weight"] = omegaSum; return newRow; }); }