{
    "config": {
        "view": {
            "continuousWidth": 400,
            "continuousHeight": 300
        }
    },
    "layer": [
        {
            "layer": [
                {
                    "mark": "rule",
                    "encoding": {
                        "color": {
                            "value": "black"
                        },
                        "size": {
                            "value": 0.5
                        },
                        "y": {
                            "field": "zero",
                            "type": "quantitative"
                        }
                    }
                },
                {
                    "mark": {
                        "type": "bar",
                        "width": 60
                    },
                    "encoding": {
                        "color": {
                            "condition": {
                                "test": "(datum.log2_bayes_factor < 0)",
                                "value": "red"
                            },
                            "value": "green"
                        },
                        "opacity": {
                            "condition": {
                                "test": "datum.column_name == 'Prior match weight' || datum.column_name == 'Final score'",
                                "value": 1
                            },
                            "value": 0.5
                        },
                        "tooltip": [
                            {
                                "field": "column_name",
                                "title": "Comparison column",
                                "type": "nominal"
                            },
                            {
                                "field": "value_l",
                                "title": "Value (L)",
                                "type": "nominal"
                            },
                            {
                                "field": "value_r",
                                "title": "Value (R)",
                                "type": "nominal"
                            },
                            {
                                "field": "label_for_charts",
                                "title": "Label",
                                "type": "ordinal"
                            },
                            {
                                "field": "sql_condition",
                                "title": "SQL condition",
                                "type": "nominal"
                            },
                            {
                                "field": "comparison_vector_value",
                                "title": "Comparison vector value",
                                "type": "nominal"
                            },
                            {
                                "field": "bayes_factor",
                                "format": ",.4f",
                                "title": "Bayes factor = m/u",
                                "type": "quantitative"
                            },
                            {
                                "field": "log2_bayes_factor",
                                "format": ",.4f",
                                "title": "Match weight = log2(m/u)",
                                "type": "quantitative"
                            },
                            {
                                "field": "prob",
                                "format": ".4f",
                                "title": "Cumulative match probability",
                                "type": "quantitative"
                            },
                            {
                                "field": "bayes_factor_description",
                                "title": "Match weight description",
                                "type": "nominal"
                            }
                        ],
                        "x": {
                            "axis": {
                                "grid": true,
                                "labelAlign": "center",
                                "labelAngle": -20,
                                "labelExpr": "datum.value == 'Prior' || datum.value == 'Final score' ? '' : datum.value",
                                "labelPadding": 10,
                                "tickBand": "extent",
                                "title": "Column"
                            },
                            "field": "column_name",
                            "sort": {
                                "field": "bar_sort_order",
                                "order": "ascending"
                            },
                            "type": "nominal"
                        },
                        "y": {
                            "axis": {
                                "grid": false,
                                "orient": "left",
                                "title": "Match Weight"
                            },
                            "field": "previous_sum",
                            "type": "quantitative"
                        },
                        "y2": {
                            "field": "sum"
                        }
                    }
                },
                {
                    "mark": {
                        "type": "text",
                        "fontWeight": "bold"
                    },
                    "encoding": {
                        "color": {
                            "value": "white"
                        },
                        "text": {
                            "condition": {
                                "test": "abs(datum.log2_bayes_factor) > 1",
                                "field": "log2_bayes_factor",
                                "format": ".2f",
                                "type": "nominal"
                            },
                            "value": ""
                        },
                        "x": {
                            "axis": {
                                "labelAngle": -20,
                                "title": "Column"
                            },
                            "field": "column_name",
                            "sort": {
                                "field": "bar_sort_order",
                                "order": "ascending"
                            },
                            "type": "nominal"
                        },
                        "y": {
                            "axis": {
                                "orient": "left"
                            },
                            "field": "center",
                            "type": "quantitative"
                        }
                    }
                },
                {
                    "mark": {
                        "type": "text",
                        "baseline": "bottom",
                        "dy": -25,
                        "fontWeight": "bold"
                    },
                    "encoding": {
                        "color": {
                            "value": "black"
                        },
                        "text": {
                            "field": "column_name",
                            "type": "nominal"
                        },
                        "x": {
                            "axis": {
                                "labelAngle": -20,
                                "title": "Column"
                            },
                            "field": "column_name",
                            "sort": {
                                "field": "bar_sort_order",
                                "order": "ascending"
                            },
                            "type": "nominal"
                        },
                        "y": {
                            "field": "sum_top",
                            "type": "quantitative"
                        }
                    }
                },
                {
                    "mark": {
                        "type": "text",
                        "baseline": "bottom",
                        "dy": -13,
                        "fontSize": 8
                    },
                    "encoding": {
                        "color": {
                            "value": "grey"
                        },
                        "text": {
                            "field": "value_l",
                            "type": "nominal"
                        },
                        "x": {
                            "axis": {
                                "labelAngle": -20,
                                "title": "Column"
                            },
                            "field": "column_name",
                            "sort": {
                                "field": "bar_sort_order",
                                "order": "ascending"
                            },
                            "type": "nominal"
                        },
                        "y": {
                            "field": "sum_top",
                            "type": "quantitative"
                        }
                    }
                },
                {
                    "mark": {
                        "type": "text",
                        "baseline": "bottom",
                        "dy": -5,
                        "fontSize": 8
                    },
                    "encoding": {
                        "color": {
                            "value": "grey"
                        },
                        "text": {
                            "field": "value_r",
                            "type": "nominal"
                        },
                        "x": {
                            "axis": {
                                "labelAngle": -20,
                                "title": "Column"
                            },
                            "field": "column_name",
                            "sort": {
                                "field": "bar_sort_order",
                                "order": "ascending"
                            },
                            "type": "nominal"
                        },
                        "y": {
                            "field": "sum_top",
                            "type": "quantitative"
                        }
                    }
                }
            ]
        },
        {
            "mark": {
                "type": "rule",
                "color": "black",
                "strokeWidth": 2,
                "x2Offset": 30,
                "xOffset": -30
            },
            "encoding": {
                "x": {
                    "axis": {
                        "labelAngle": -20,
                        "title": "Column"
                    },
                    "field": "column_name",
                    "sort": {
                        "field": "bar_sort_order",
                        "order": "ascending"
                    },
                    "type": "nominal"
                },
                "x2": {
                    "field": "lead"
                },
                "y": {
                    "axis": {
                        "labelExpr": "format(1 / (1 + pow(2, -1*datum.value)), '.2r')",
                        "orient": "right",
                        "title": "Probability"
                    },
                    "field": "sum",
                    "scale": {
                        "zero": false
                    },
                    "type": "quantitative"
                }
            }
        }
    ],
    "data": {
        "name": "mydata"
    },
    "height": 450,
    "resolve": {
        "axis": {
            "y": "independent"
        }
    },
    "title": {
        "text": "Match weights waterfall chart",
        "subtitle": "How each comparison contributes to the final match score"
    },
    "transform": [
        {
            "window": [
                {
                    "op": "sum",
                    "field": "log2_bayes_factor",
                    "as": "sum"
                },
                {
                    "op": "lead",
                    "field": "column_name",
                    "as": "lead"
                }
            ],
            "frame": [
                null,
                0
            ]
        },
        {
            "calculate": "datum.column_name === \"Final score\" ? datum.sum - datum.log2_bayes_factor : datum.sum",
            "as": "sum"
        },
        {
            "calculate": "datum.lead === null ? datum.column_name : datum.lead",
            "as": "lead"
        },
        {
            "calculate": "datum.column_name === \"Final score\" || datum.column_name === \"Prior match weight\" ? 0 : datum.sum - datum.log2_bayes_factor",
            "as": "previous_sum"
        },
        {
            "calculate": "datum.sum > datum.previous_sum ? datum.column_name : \"\"",
            "as": "top_label"
        },
        {
            "calculate": "datum.sum < datum.previous_sum ? datum.column_name : \"\"",
            "as": "bottom_label"
        },
        {
            "calculate": "datum.sum > datum.previous_sum ? datum.sum : datum.previous_sum",
            "as": "sum_top"
        },
        {
            "calculate": "datum.sum < datum.previous_sum ? datum.sum : datum.previous_sum",
            "as": "sum_bottom"
        },
        {
            "calculate": "(datum.sum + datum.previous_sum) / 2",
            "as": "center"
        },
        {
            "calculate": "(datum.log2_bayes_factor > 0 ? \"+\" : \"\") + datum.log2_bayes_factor",
            "as": "text_log2_bayes_factor"
        },
        {
            "calculate": "datum.sum < datum.previous_sum ? 4 : -4",
            "as": "dy"
        },
        {
            "calculate": "datum.sum < datum.previous_sum ? \"top\" : \"bottom\"",
            "as": "baseline"
        },
        {
            "calculate": "1. / (1 + pow(2, -1.*datum.sum))",
            "as": "prob"
        },
        {
            "calculate": "0*datum.sum",
            "as": "zero"
        }
    ],
    "width": {
        "step": 75
    },
    "$schema": "https://vega.github.io/schema/vega-lite/v5.9.3.json",
    "datasets": {
        "mydata": [
            {
                "column_name": "Prior",
                "label_for_charts": "Starting match weight (prior)",
                "sql_condition": null,
                "log2_bayes_factor": -19.931567126628412,
                "bayes_factor": 1.000001000001e-06,
                "comparison_vector_value": null,
                "m_probability": null,
                "u_probability": null,
                "bayes_factor_description": null,
                "value_l": "",
                "value_r": "",
                "term_frequency_adjustment": null,
                "bar_sort_order": 0,
                "record_number": 0
            },
            {
                "column_name": "first_name",
                "label_for_charts": "Exact match",
                "sql_condition": "\"first_name_l\" = \"first_name_r\"",
                "log2_bayes_factor": 7.307917141382366,
                "bayes_factor": 158.45365646258503,
                "comparison_vector_value": 2,
                "m_probability": 0.85,
                "u_probability": 0.005364344496529222,
                "bayes_factor_description": "If comparison level is `exact match` then comparison is 158.45 times more likely to be a match",
                "value_l": "rob",
                "value_r": "rob",
                "term_frequency_adjustment": false,
                "bar_sort_order": 1,
                "record_number": 0
            },
            {
                "column_name": "surname",
                "label_for_charts": "Exact match",
                "sql_condition": "\"surname_l\" = \"surname_r\"",
                "log2_bayes_factor": 10.425173183160398,
                "bayes_factor": 1374.9592850049653,
                "comparison_vector_value": 2,
                "m_probability": 0.8,
                "u_probability": 0.0005818354104915267,
                "bayes_factor_description": "If comparison level is `exact match` then comparison is 1,374.96 times more likely to be a match",
                "value_l": "jefferies",
                "value_r": "jefferies",
                "term_frequency_adjustment": false,
                "bar_sort_order": 2,
                "record_number": 0
            },
            {
                "column_name": "postcode",
                "label_for_charts": "Null",
                "sql_condition": "\"postcode_l\" IS NULL OR \"postcode_r\" IS NULL",
                "log2_bayes_factor": 0.0,
                "bayes_factor": 1.0,
                "comparison_vector_value": -1,
                "m_probability": null,
                "u_probability": null,
                "bayes_factor_description": "If comparison level is `null` then comparison is 1.00 times more likely to be a match",
                "value_l": "sw8 2lh",
                "value_r": "nan",
                "term_frequency_adjustment": false,
                "bar_sort_order": 3,
                "record_number": 0
            },
            {
                "column_name": "gender",
                "label_for_charts": "Exact match",
                "sql_condition": "\"gender_l\" = \"gender_r\"",
                "log2_bayes_factor": 0.4714908512885071,
                "bayes_factor": 1.3865415514719295,
                "comparison_vector_value": 1,
                "m_probability": 0.995,
                "u_probability": 0.7176128251935361,
                "bayes_factor_description": "If comparison level is `exact match` then comparison is 1.39 times more likely to be a match",
                "value_l": "male",
                "value_r": "male",
                "term_frequency_adjustment": false,
                "bar_sort_order": 4,
                "record_number": 0
            },
            {
                "column_name": "Final score",
                "label_for_charts": "Final score",
                "sql_condition": null,
                "log2_bayes_factor": -1.7269859507971401,
                "bayes_factor": 0.30208240256154806,
                "comparison_vector_value": null,
                "m_probability": null,
                "u_probability": null,
                "bayes_factor_description": null,
                "value_l": "",
                "value_r": "",
                "term_frequency_adjustment": null,
                "bar_sort_order": 5,
                "record_number": 0
            }
        ]
    }
}
