import { Matrix, inverse } from 'ml-matrix';
import { IICP, IRefineEstimations } from '../utils/types/estimation';
import {
  ICP_BREAK_LOOP_ERROR_RATIO_THRESH,
  ICP_BREAK_LOOP_ERROR_THRESH,
  ICP_MAX_LOOP,
  K2_FACTOR,
  TRACKING_THRESH,
} from '../utils/constant/estimation';
import {
  applyModelViewProjectionTransform,
  buildModelViewProjectionTransform,
  computeScreenCoordiate,
} from './utils';

// some temporary/intermediate variables used later. Declare them beforehand to reduce new object allocations
const mat: number[][] = [[], [], []];
const J_U_Xc: number[][] = [[], []]; // 2x3
const J_Xc_S: number[][] = [[], [], []]; // 3x6

const refineEstimate = ({
  initialModelViewTransform,
  projectionTransform,
  worldCoords,
  screenCoords,
}: IRefineEstimations) => {
  // Question: shall we normlize the screen coords as well?
  // Question: do we need to normlize the scale as well, i.e. make coords from -1 to 1
  //
  // normalize world coords - reposition them to center of mass
  //   assume z coordinate is always zero (in our case, the image target is planar with z = 0
  const dx = worldCoords.reduce((acc, cur) => acc + cur.x, 0) / worldCoords.length;
  const dy = worldCoords.reduce((acc, cur) => acc + cur.y, 0) / worldCoords.length;

  const normalizedWorldCoords: Vector3[] = [];

  for (const worldCoord of worldCoords) {
    normalizedWorldCoords.push({ x: worldCoord.x - dx, y: worldCoord.y - dy, z: worldCoord.z });
  }

  const diffModelViewTransform: number[][] = [[], [], []];

  for (let j = 0; j < 3; j++) {
    for (let i = 0; i < 3; i++) {
      diffModelViewTransform[j][i] = initialModelViewTransform[j][i];
    }
  }

  diffModelViewTransform[0][3] =
    initialModelViewTransform[0][0] * dx +
    initialModelViewTransform[0][1] * dy +
    initialModelViewTransform[0][3];

  diffModelViewTransform[1][3] =
    initialModelViewTransform[1][0] * dx +
    initialModelViewTransform[1][1] * dy +
    initialModelViewTransform[1][3];

  diffModelViewTransform[2][3] =
    initialModelViewTransform[2][0] * dx +
    initialModelViewTransform[2][1] * dy +
    initialModelViewTransform[2][3];

  // use iterative closest point algorithm to refine the modelViewTransform
  const inlierProbs = [1.0, 0.8, 0.6, 0.4, 0.0];

  let updatedModelViewTransform = diffModelViewTransform; // iteratively update this transform
  let finalModelViewTransform = null;

  for (const inlierProb of inlierProbs) {
    const ret = _doICP({
      initialModelViewTransform: updatedModelViewTransform,
      projectionTransform,
      worldCoords: normalizedWorldCoords,
      screenCoords,
      inlierProb,
    });

    updatedModelViewTransform = ret.modelViewTransform;

    if (ret.err < TRACKING_THRESH) {
      finalModelViewTransform = updatedModelViewTransform;
      break;
    }
  }

  if (finalModelViewTransform === null) return null;

  // de-normalize
  finalModelViewTransform[0][3] =
    finalModelViewTransform[0][3] -
    finalModelViewTransform[0][0] * dx -
    finalModelViewTransform[0][1] * dy;
  finalModelViewTransform[1][3] =
    finalModelViewTransform[1][3] -
    finalModelViewTransform[1][0] * dx -
    finalModelViewTransform[1][1] * dy;
  finalModelViewTransform[2][3] =
    finalModelViewTransform[2][3] -
    finalModelViewTransform[2][0] * dx -
    finalModelViewTransform[2][1] * dy;

  return finalModelViewTransform;
};

// ICP iteration
// Question: can someone provide theoretical reference / mathematical proof for the following computations?
const _doICP = ({
  initialModelViewTransform,
  projectionTransform,
  worldCoords,
  screenCoords,
  inlierProb,
}: IICP) => {
  const isRobustMode = inlierProb < 1;

  let modelViewTransform = initialModelViewTransform;

  let err0 = 0.0;
  let err1 = 0.0;

  const E: number[] = new Array(worldCoords.length);
  const E2: number[] = new Array(worldCoords.length);
  const dxs: number[] = new Array(worldCoords.length);
  const dys: number[] = new Array(worldCoords.length);

  for (let l = 0; l <= ICP_MAX_LOOP; l++) {
    const modelViewProjectionTransform = buildModelViewProjectionTransform(
      projectionTransform,
      modelViewTransform
    );

    for (let n = 0; n < worldCoords.length; n++) {
      const u = computeScreenCoordiate(
        modelViewProjectionTransform,
        worldCoords[n].x,
        worldCoords[n].y,
        worldCoords[n].z
      );

      const dx = screenCoords[n].x - u.x;
      const dy = screenCoords[n].y - u.y;

      dxs[n] = dx;
      dys[n] = dy;

      E[n] = dx ** 2 + dy ** 2;
    }

    let K2 = 0; // robust mode only
    err1 = 0.0;

    if (isRobustMode) {
      const inlierNum = Math.max(3, Math.floor(worldCoords.length * inlierProb) - 1);

      for (let n = 0; n < worldCoords.length; n++) {
        E2[n] = E[n];
      }

      E2.sort((a, b) => a - b);

      K2 = Math.max(E2[inlierNum] * K2_FACTOR, 16.0);

      for (let n = 0; n < worldCoords.length; n++) {
        if (E2[n] > K2) err1 += K2 / 6;
        else
          err1 += (K2 / 6.0) * (1.0 - (1.0 - E2[n] / K2) * (1.0 - E2[n] / K2) * (1.0 - E2[n] / K2));
      }
    } else {
      err1 = E.reduce((acc, cur) => acc + cur, 0.0);
    }

    err1 /= worldCoords.length;

    if (err1 < ICP_BREAK_LOOP_ERROR_THRESH) break;
    if (l > 0 && err1 / err0 > ICP_BREAK_LOOP_ERROR_RATIO_THRESH) break;
    if (l === ICP_MAX_LOOP) break;

    err0 = err1;

    const dU: number[][] = [];
    const allJ_U_S = [];

    for (let n = 0; n < worldCoords.length; n++) {
      if (isRobustMode && E[n] > K2) continue;

      const J_U_S = _getJ_U_S({
        modelViewProjectionTransform,
        modelViewTransform,
        projectionTransform,
        worldCoord: worldCoords[n],
      });

      if (isRobustMode) {
        const W = (1.0 - E[n] / K2) * (1.0 - E[n] / K2);

        for (let j = 0; j < 2; j++) {
          for (let i = 0; i < 6; i++) {
            J_U_S[j][i] *= W;
          }
        }

        dU.push([dxs[n] * W]);
        dU.push([dys[n] * W]);
      } else {
        dU.push([dxs[n]]);
        dU.push([dys[n]]);
      }

      for (let i = 0; i < J_U_S.length; i++) {
        allJ_U_S.push(J_U_S[i]);
      }
    }

    const dS = _getDeltaS({ dU, J_U_S: allJ_U_S });

    if (dS === null) break;

    modelViewTransform = _updateModelViewTransform({ modelViewTransform, dS });
  }
  return {
    modelViewTransform,
    err: err1,
  };
};

const _updateModelViewTransform = ({
  modelViewTransform,
  dS,
}: {
  modelViewTransform: number[][];
  dS: number[];
}) => {
  /**
   * dS has 6 paragrams, first half is rotation, second half is translation
   * rotation is expressed in angle-axis,
   *   [S[0], S[1] ,S[2]] is the axis of rotation, and the magnitude is the angle
   */
  let ra = dS[0] ** 2 + dS[1] ** 2 + dS[2] ** 2;
  let q0, q1, q2;

  if (ra < 0.000001) {
    q0 = 1.0;
    q1 = 0.0;
    q2 = 0.0;
    ra = 0.0;
  } else {
    ra = Math.sqrt(ra);
    q0 = dS[0] / ra;
    q1 = dS[1] / ra;
    q2 = dS[2] / ra;
  }

  const cra = Math.cos(ra);
  const sra = Math.sin(ra);
  const one_cra = 1.0 - cra;

  // mat is [R|t], 3D rotation and translation
  mat[0][0] = q0 * q0 * one_cra + cra;
  mat[0][1] = q0 * q1 * one_cra - q2 * sra;
  mat[0][2] = q0 * q2 * one_cra + q1 * sra;
  mat[0][3] = dS[3];
  mat[1][0] = q1 * q0 * one_cra + q2 * sra;
  mat[1][1] = q1 * q1 * one_cra + cra;
  mat[1][2] = q1 * q2 * one_cra - q0 * sra;
  mat[1][3] = dS[4];
  mat[2][0] = q2 * q0 * one_cra - q1 * sra;
  mat[2][1] = q2 * q1 * one_cra + q0 * sra;
  mat[2][2] = q2 * q2 * one_cra + cra;
  mat[2][3] = dS[5];

  // the updated transform is the original transform x delta transform
  const mat2: number[][] = [[], [], []];

  for (let j = 0; j < 3; j++) {
    for (let i = 0; i < 4; i++) {
      mat2[j][i] =
        modelViewTransform[j][0] * mat[0][i] +
        modelViewTransform[j][1] * mat[1][i] +
        modelViewTransform[j][2] * mat[2][i];
    }

    mat2[j][3] += modelViewTransform[j][3];
  }

  return mat2;
};

const _getDeltaS = ({ dU, J_U_S }: { dU: number[][]; J_U_S: number[][] }) => {
  const J = new Matrix(J_U_S);
  const U = new Matrix(dU);

  const JT = J.transpose();
  const JTJ = JT.mmul(J);
  const JTU = JT.mmul(U);

  let JTJInv;

  try {
    JTJInv = inverse(JTJ);
  } catch (e) {
    return null;
  }

  const S = JTJInv.mmul(JTU);

  return S.to1DArray();
};

const _getJ_U_S = ({
  modelViewProjectionTransform,
  modelViewTransform,
  projectionTransform,
  worldCoord,
}: {
  modelViewProjectionTransform: number[][];
  modelViewTransform: number[][];
  projectionTransform: number[][];
  worldCoord: Vector3;
}) => {
  const T = modelViewTransform;
  const { x, y, z } = worldCoord;

  const u = applyModelViewProjectionTransform(modelViewProjectionTransform, x, y, z);

  const z2 = u.z * u.z;
  // Question: This is the most confusing matrix to me. I've no idea how to derive this.
  //J_U_Xc[0][0] = (projectionTransform[0][0] * u.z - projectionTransform[2][0] * u.x) / z2;
  //J_U_Xc[0][1] = (projectionTransform[0][1] * u.z - projectionTransform[2][1] * u.x) / z2;
  //J_U_Xc[0][2] = (projectionTransform[0][2] * u.z - projectionTransform[2][2] * u.x) / z2;
  //J_U_Xc[1][0] = (projectionTransform[1][0] * u.z - projectionTransform[2][0] * u.y) / z2;
  //J_U_Xc[1][1] = (projectionTransform[1][1] * u.z - projectionTransform[2][1] * u.y) / z2;
  //J_U_Xc[1][2] = (projectionTransform[1][2] * u.z - projectionTransform[2][2] * u.y) / z2;

  // The above is the original implementation, but simplify to below becuase projetionTransform[2][0] and [2][1] are zero
  J_U_Xc[0][0] = (projectionTransform[0][0] * u.z) / z2;
  J_U_Xc[0][1] = (projectionTransform[0][1] * u.z) / z2;
  J_U_Xc[0][2] = (projectionTransform[0][2] * u.z - projectionTransform[2][2] * u.x) / z2;
  J_U_Xc[1][0] = (projectionTransform[1][0] * u.z) / z2;
  J_U_Xc[1][1] = (projectionTransform[1][1] * u.z) / z2;
  J_U_Xc[1][2] = (projectionTransform[1][2] * u.z - projectionTransform[2][2] * u.y) / z2;

  /*
    J_Xc_S should be like this, but z is zero, so we can simplify
    [T[0][2] * y - T[0][1] * z, T[0][0] * z - T[0][2] * x, T[0][1] * x - T[0][0] * y, T[0][0], T[0][1], T[0][2]],
    [T[1][2] * y - T[1][1] * z, T[1][0] * z - T[1][2] * x, T[1][1] * x - T[1][0] * y, T[1][0], T[1][1], T[1][2]],
    [T[2][2] * y - T[2][1] * z, T[2][0] * z - T[2][2] * x, T[2][1] * x - T[2][0] * y, T[2][0], T[2][1], T[2][2]],
  */
  J_Xc_S[0][0] = T[0][2] * y;
  J_Xc_S[0][1] = -T[0][2] * x;
  J_Xc_S[0][2] = T[0][1] * x - T[0][0] * y;
  J_Xc_S[0][3] = T[0][0];
  J_Xc_S[0][4] = T[0][1];
  J_Xc_S[0][5] = T[0][2];

  J_Xc_S[1][0] = T[1][2] * y;
  J_Xc_S[1][1] = -T[1][2] * x;
  J_Xc_S[1][2] = T[1][1] * x - T[1][0] * y;
  J_Xc_S[1][3] = T[1][0];
  J_Xc_S[1][4] = T[1][1];
  J_Xc_S[1][5] = T[1][2];

  J_Xc_S[2][0] = T[2][2] * y;
  J_Xc_S[2][1] = -T[2][2] * x;
  J_Xc_S[2][2] = T[2][1] * x - T[2][0] * y;
  J_Xc_S[2][3] = T[2][0];
  J_Xc_S[2][4] = T[2][1];
  J_Xc_S[2][5] = T[2][2];

  const J_U_S: number[][] = [[], []];

  for (let j = 0; j < 2; j++) {
    for (let i = 0; i < 6; i++) {
      J_U_S[j][i] = 0.0;

      for (let k = 0; k < 3; k++) {
        J_U_S[j][i] += J_U_Xc[j][k] * J_Xc_S[k][i];
      }
    }
  }

  return J_U_S;
};

export default refineEstimate;