// Convert samples to x-y pairs for a PDF
// Uses kernel density estimation (KDE) with a triangular kernel
// samples:      Must be sorted!
// outputLength: Number of points in output, >= 4
// xWidth:       Width of the kernel in x axis units

import { nrd0 } from "./bandwidth.js";

export type kdeParams = {
  samples: number[];
  outputLength: number;
  weight: number;
  kernelWidth?: number;
};

// weight:       Probability mass for each point
export const kde = ({
  samples,
  outputLength,
  weight,
  kernelWidth,
}: kdeParams) => {
  let xWidth = kernelWidth ?? nrd0(samples);
  samples = samples.filter((v) => Number.isFinite(v)); // Not sure if this is needed?
  const len = samples.length;

  // It's not clear what to do when xWidth is zero. We might want to throw an error or otherwise instead. This was an issue for discrete distributions, like binomial.
  if (len === 0 || xWidth === 0) return { usedWidth: xWidth, xs: [], ys: [] };

  // Sample min and range
  const smin = samples[0];
  const srange = samples[len - 1] - smin;

  // width is in units of the step size dx and has to be a whole number
  // If we set width = wantedWidth exactly then we have:
  // width * dx === width * srange / (outputLength - 1 - 2 * width)
  //            === srange / ((outputLength - 1) / width - 2)
  //            === srange / ((srange + 2 * xWidth) / xWidth - 2)
  //            === srange / (srange / xWidth + 2 - 2)
  //            === xWidth
  const wantedWidth = ((outputLength - 1) * xWidth) / (srange + 2 * xWidth);
  const width = Math.max(1, Math.floor(wantedWidth));

  // To give room for all the kernels, we'll have width steps
  // on either side as padding.
  // Subtract from the outputLength - 1 total intervals.
  const stepsInside = outputLength - 1 - 2 * width;
  const dx = srange / stepsInside;

  // Width in x units, not steps
  xWidth = width * dx;

  // Output min and range
  // Total number of intervals is:
  // range / dx === (srange + 2 * margin) / dx
  //            === stepsInside + 2 * width
  //            === outputLength - 1
  const min = smin - xWidth;

  // On our discrete set of x values, each triangle is indistinguishable
  // from a sum of two triangles with peaks at the sample's two nearest
  // neighbors. Here we sum up the peak values from all samples.
  // Only the indices width + 1 + [0, outputLength) in ysum are ever nonzero
  // The extra zeros are to avoid out-of-bounds lookups for ddy below
  const ysum = Array(outputLength + 2 * width).fill(0);
  const dxInv = 1 / dx;
  samples.forEach((x) => {
    const off = x - min;
    const index = Math.floor(off * dxInv);
    const leftWeight = off - index * dx;
    const rightWeight = dx - leftWeight;
    // Offset by 1; see below
    ysum[width + index + 1] += rightWeight;
    ysum[width + index + 2] += leftWeight;
  });

  // Sum for each sample will be width * width * dx
  // but we want weight / dx
  const normalizer = weight / (xWidth * xWidth);

  const xs = Array(outputLength)
    .fill(0)
    .map((_, i) => min + i * dx);

  // A triangle at i causes the KDE to
  // - start increasing (+1) at i - width,
  // - change from increasing to decreasing (-2) at i, and
  // - stop decreasing (+1) at i + width.
  // So the result is a double prefix sum.
  // The actual increase doesn't affect the point where the change
  // happens, but rather the next one. Since y and dy perform inclusive
  // prefix sums, the ysum values are offset by 1 above.
  let dy = 0;
  let y = 0;
  const ys = xs.map((_, i) => {
    const ddy = ysum[i] - 2 * ysum[i + width] + ysum[i + 2 * width];
    dy += ddy;
    y += dy;
    // Sometimes the KDE can be very slightly negative due to floating point errors
    return Math.max(normalizer * y, 0);
  });

  return { usedWidth: xWidth, xs, ys };
};