// An instance of Qpsc drives the gradient-projection portion of the Projection Solver. import {number} from 'yargs' import {Parameters} from './Parameters' import {Variable} from './Variable' // Store original weight to be restored when done. With the ability to re-Solve() after // updating constraint gaps, we must restore DesiredPos as well. export class QpscVar { Variable: Variable OrigWeight: number OrigScale: number OrigDesiredPos: number constructor(v: Variable) { this.Variable = v this.OrigWeight = v.Weight this.OrigScale = v.Scale this.OrigDesiredPos = this.Variable.DesiredPos } } class MatrixCell { // Initially the summed weights of all variables to which this variable has a relationship // (including self as described above for the diagonal), then modified with scale. Value: number // The index of the variable j for this column in the i row (may be same as i). Column: number constructor(w: number, index: number) { this.Value = w this.Column = index } } export class Qpsc { private solverParameters: Parameters // // This class tracks closely to the Ipsep_Cola paper's ,IPSep-CoLa: An Incremental Procedure for // Separation Constraint Layout of Graphs, solve_QPSC function, outside of // the SplitBlocks() and Project() operations. The relevant data are extracted from the // Variables of the solver and placed within the mxA (Hessian A) matrix and vecWiDi (b) // vector on initialization, and then the vecPrevX (x-hat), vecCurX (x-bar), and vecDeltaX // (d) vectors are filled and the normal matrix operations proceed as in the paper. // // From the Ipsep paper: Qpsc solves a system with a goal function that includes minimization of // distance between pairs of neighbors as well as minimization of distances necessary // to satisfy separation constraints. I.e., our goal function is: // f(X) = Sum(i < n) w_i (x_i - a_i)^2 // + Sum(i,j in Edge list) w_ij (x_i - x_j)^2 // Where // X is the vector of CURRENT axis positions (x_i) // a_i is the desired position for each x_i (called d in the paper) // w_ij is the weight of each edge between i and j (possibly multiple edges between // the same i/j pair) // Now we can write f(x) = x'Ax. // The gradient g at X is computed: // g = Ax+b // where b is the negative of the vector (so it becomes Ax-b) // [w_0*d_0 ... w_n-1*d_n-1] // and the optimal stepsize to reduce f in the direction d is: // alpha = d'g / (g'Ag) // In order to compute any of these efficiently we need an expression for the i'th // term of the product of our sparse symmetric square matrix A and a vector v. // Now the i'th term of Av is the inner product of row i of A and the vector v, // i.e. A[i] is the row vector: // Av[i] = A[i] * v = A[i][0]*v[0] + A[i][1]*v[1] + ... +A[i][n-1]*v[n-1] // So what are A[i][0]...A[i][n-1]? // First the diagonal entries: A[i][i] = wi + Sum(wij for every neighbor j of i). // Then the off diagonal entries: A[i][j] = -Sum(wij for each time j is a neighbor of i). // And all A[i][k] where there is no neighbor relation between i and k is 0. // Then, because this is the partial derivative wrt x, each cell is multiplied by 2. // Thus for the small example of 3 variables i, j, and k, with neighbors ij and ik, // the A matrix is (where w is the weight of the variable or neighbor pair): // i j k // +----------------------------------------- // i | 2(wi+wij+wik) -2wij -2wik // j | -2wij 2(wj+wij) 0 // k | -2wik 0 2(wk+wik) // // Because A is sparse, each row is represented as an array of the A[i][0..n] entries // that are nonzero. // The foregoing was updated to the Diagonal scaling paper: // Constrained Stress Majorization Using Diagonally Scaled Gradient Projection.pdf // which is also checked into the project. The implementation here is somewhat modified from it: // // We store the offset o[i] in the variable. // The position for a variable i in a block B is: // y[i] = (S[B] * Y[B] + o[i])/s[i] // // Then the df/dv[i] is: // df/dv[i] = 2 * w[i] * ( y[i] - d[i] ); // // And comp_dfdv(i , AC, ~c) is a bit different too: // Dfdv = df/dv[i] // For each c in AC s.t. i=lv[c] and c!== ~c: // Lambda[c] = comp_dfdv(rv[c], AC, c) // dfdv += Lambda[c] * s[lv[c]] // For each c in AC s.t. i=rv[c] and c!== ~c: // Lambda[c] = comp_dfdv(lv[c], AC, c) // dfdv -= Lambda[c] * s[rv[c]] // // The statistics for the blocks are calculated as follows: // For each variable i we have: // a[i] = S[B] / s[i] // b[i] = o[i] / s[i] // Then: // AD[B] = sum over i=0..n: a[i] * d[i] * w[i] // AB[B] = sum over i=0..n: a[i] * b[i] * w[i] // A2[B] = sum over i=0..n: a[i] * a[i] * w[i] // And the ideal position calculation for the block is then the same as the paper: // Y[B] = (AD[B] - AB[B])/A2[B] // A MatrixCell implements A[i][j] as above. // // SolveQpsc static data members: These do not change after initialization. // The matrix is sparse in columns for each row, but not in rows, because there is always // at least one entry per row, the diagonal. // private matrixQ: MatrixCell[][] // Sparse matrix A in the Ipsep paper; matrix Q (modified to Q') in the Scaling paper private vectorWiDi: number[] // b (weight * desiredpos) in the Ipsep paper; modified to b' = Sb from the Scaling paper private vectorQpscVars: QpscVar[] // Array of variables, for perf (avoid nested block/variable Array<> iteration) private newMatrixRow: Array = new Array() // current row in AddVariable // // SolveQpsc per-iteration data members: These change with each iteration. // private gradientVector: number[] // g in the paper private vectorQg: number[] // Qg in the paper private vectorPrevY: number[] // y-hat in the paper private vectorCurY: number[] // y-bar in the paper private isFirstProjectCall: boolean // If true we're on our first call to Project // Holds the value of f(x) = yQ'y + b'y as computed on the last iteration; used to test for // convergence and updated before HasConverged() returns. private previousFunctionValue: number = Number.MAX_VALUE constructor(solverParameters: Parameters, cVariables: number) { this.solverParameters = solverParameters this.matrixQ = new Array(cVariables).fill(0) this.vectorWiDi = new Array(cVariables).fill(0) this.vectorQpscVars = new Array(cVariables).fill(0) this.gradientVector = new Array(cVariables).fill(0) this.vectorQg = new Array(cVariables).fill(0) this.vectorPrevY = new Array(cVariables).fill(0) this.vectorCurY = new Array(cVariables).fill(0) } // // solver.SolveQpsc drives the Qpsc instance as follows: // Initialization: // Qpsc qpsc = new Qpsc(numVariables); // foreach (variable in (foreach block)) // qpsc.AddVariable(variable) // qpsc.VariablesComplete() // Per iteration: // if (!qpsc.PreProject()) break; // solver.SplitBlocks() // solver.Project() // if (!qpsc.PostProject()) break; // Done: // qpsc.ProjectComplete() AddVariable(variable: Variable) { /*Assert.assert( this.matrixQ[variable.Ordinal] == null && this.vectorQpscVars[variable.Ordinal].Variable == null , 'variable.Ordinal already exists', )*/ this.isFirstProjectCall = true // This is the weight times desired position, multiplied by 2.0 per the partial derivative. // We'll use this to keep as close as possible to the desired position on each iteration. this.vectorWiDi[variable.Ordinal] = 2 * (variable.Weight * variable.DesiredPos) * -1 // Temporarily hijack vectorPrevY for use as scratch storage, to handle duplicate // neighbor pairs (take the highest weight). // Sum the weight for cell i,i (the diagonal). this.vectorPrevY[variable.Ordinal] = variable.Weight if (null != variable.Neighbors) { for (const neighborWeightPair of variable.Neighbors) { // We should already have verified this in AddNeighbourPair. /*Assert.assert( neighborWeightPair.Neighbor.Ordinal !== variable.Ordinal, 'self-neighbors are not allowed', )*/ // For the neighbor KeyValuePairs, Key === neighboring variable and Value === relationship // weight. If we've already encountered this pair then we'll sum the relationship weights, under // the assumption the caller will be doing something like creating edges for different reasons, // and multiple edges should be like rubber bands, the sum of the strengths. Mathematica also // sums duplicate weights. // Per above comments: // First the diagonal entries: A[i][i] = wi + Sum(wij for every neighbor j of i). this.vectorPrevY[variable.Ordinal] = this.vectorPrevY[variable.Ordinal] + neighborWeightPair.Weight // Then the off diagonal entries: A[i][j] = -Sum(wij for time j is a neighbor of i). this.vectorPrevY[neighborWeightPair.Neighbor.Ordinal] = this.vectorPrevY[neighborWeightPair.Neighbor.Ordinal] - neighborWeightPair.Weight } } // endif null != variable.Neighbors // Add the sparse row to the matrix (all non-zero slots of vectorPrevY are weights to that neighbor). for (let ii = 0; ii < this.vectorPrevY.length; ii++) { if (0 !== this.vectorPrevY[ii]) { // The diagonal must be > 0 and off-diagonal < 0. /*Assert.assert( (ii === variable.Ordinal) === this.vectorPrevY[ii] > 0, 'Diagonal must be > 0.0', )*/ // All 'A' cells must be 2*(summed weights). this.newMatrixRow.push(new MatrixCell(this.vectorPrevY[ii] * 2, ii)) this.vectorPrevY[ii] = 0 } } this.matrixQ[variable.Ordinal] = Array.from(this.newMatrixRow) this.newMatrixRow = [] this.vectorQpscVars[variable.Ordinal] = new QpscVar(variable) // For the non-Qpsc loop, we consider weights in block reference-position calculation. // Here, we have that in vectorWiDi which we use in calculating gradient and alpha, which // in turn we use to set the gradient-stepped desiredPos. So turn it off for the duration // of Qpsc - we restore it in QpscComplete(). variable.Weight = 1 } // end AddVariable() VariablesComplete() { for (const qvar of this.vectorQpscVars) { const variable = qvar.Variable for (const cell of this.matrixQ[variable.Ordinal]) { if (cell.Column === variable.Ordinal) { if (this.solverParameters.Advanced.ScaleInQpsc) { variable.Scale = 1 / Math.sqrt(Math.abs(cell.Value)) if (!Number.isFinite(variable.Scale)) { variable.Scale = 1 } // This is the y = Sx step from the Scaling paper. variable.Scale // This is the b' <- Sb step from the Scaling paper this.vectorWiDi[variable.Ordinal] = this.vectorWiDi[variable.Ordinal] * variable.Scale } // This is needed for block re-initialization. this.vectorCurY[variable.Ordinal] = variable.ActualPos variable.DesiredPos = variable.ActualPos } } } if (!this.solverParameters.Advanced.ScaleInQpsc) { return } // Now convert mxQ to its scaled form S#QS (noting that the transform of a diagonal matrix S is S // so this is optimized), and we've made the S matrix such that Q[i][i] is 1. The result is in-place // conversion of Q to scaledQ s.t. // for all ii // for all jj // if ii === jj, scaledQ[ii][jj] = 1 // else scaledQ[ii][jj] = Q[ii][jj] * var[ii].scale * var[jj].scale // / for (let rowNum = 0; rowNum < this.matrixQ.length; rowNum++) { const row = this.matrixQ[rowNum] for (let sparseCol = 0; sparseCol < row.length; sparseCol++) { if (row[sparseCol].Column === rowNum) { row[sparseCol].Value = 1 } else { // Diagonal on left scales rows [SQ], on right scales columns [QS]. row[sparseCol].Value = row[sparseCol].Value * (this.vectorQpscVars[rowNum].Variable.Scale * this.vectorQpscVars[row[sparseCol].Column].Variable.Scale) } } } } // end VariablesComplete() // Called by SolveQpsc before the split/project phase. Returns false if the difference in the // function value on the current vs. previous iteration is sufficiently small that we're done. // @@PERF: Right now this is distinct matrix/vector operations. Profiling shows most time // in Qpsc is taken by MatrixVectorMultiply. We could gain a bit of performance by combining // some things but keep it simple unless that's needed. PreProject(): boolean { if (this.isFirstProjectCall) { // Due to MergeEqualityConstraints we may have moved some of the variables. This won't // affect feasibility since QpscMakeFeasible would already have ensured that any unsatisfiable // constraints are so marked. for (const qvar of this.vectorQpscVars) { this.vectorCurY[qvar.Variable.Ordinal] = qvar.Variable.ActualPos } } // // Compute: g = Q'y + b' (in the Scaling paper terminology) // // g(radient) = Q'y... this.MatrixVectorMultiply(this.vectorCurY, this.gradientVector) // If we've minimized the goal function (far enough), we're done. // This uses the Q'y value we've just put into gradientVector and tests the goal-function value // to see if it is sufficiently close to the previous value to be considered converged. if (this.HasConverged()) { return false } // ...g = Q'y + b' Qpsc.VectorVectorAdd(this.gradientVector, this.vectorWiDi, this.gradientVector) // // Compute: alpha = g#g / g#Q'g (# === transpose) // const alphaNumerator: number = Qpsc.VectorVectorMultiply(this.gradientVector, this.gradientVector) // Compute numerator of stepsize let alphaDenominator = 0 if (0 !== alphaNumerator) { this.MatrixVectorMultiply(this.gradientVector, this.vectorQg) alphaDenominator = Qpsc.VectorVectorMultiply(this.vectorQg, this.gradientVector) } if (0 === alphaDenominator) { return false } const alpha: number = alphaNumerator / alphaDenominator // // Store off the current position as the previous position (the paper's y^ (y-hat)), // then calculate the new current position by subtracting the (gradient * alpha) // from it and update the Variables' desired position. // Qpsc.VectorCopy(this.vectorPrevY, this.vectorCurY) // Update d(esiredpos) = y - alpha*g // Use vectorCurY as temp as it is not used again here and is updated at start of PostProject. Qpsc.VectorScaledVectorSubtract(this.vectorPrevY, alpha, this.gradientVector, this.vectorCurY) for (let ii = 0; ii < this.vectorCurY.length; ii++) { this.vectorQpscVars[ii].Variable.DesiredPos = this.vectorCurY[ii] } return true } // end PreProject() // Called by SolveQpsc after the split/project phase. PostProject(): boolean { // // Update our copy of current positions (y-bar from the paper) and deltaY (p in the Scaling paper; y-bar minus y-hat). // for (const qvar of this.vectorQpscVars) { this.vectorCurY[qvar.Variable.Ordinal] = qvar.Variable.ActualPos } // vectorCurY temporarily becomes the p-vector from the Scaling paper since we don't use the "current" // position otherwise, until we reset it at the end. Qpsc.VectorVectorSubtract(this.vectorPrevY, this.vectorCurY, this.vectorCurY) // // Compute: Beta = min(g#p / p#Qp, 1) // const betaNumerator: number = Qpsc.VectorVectorMultiply(this.gradientVector, this.vectorCurY) // Compute numerator of stepsize let beta = 0 if (0 !== betaNumerator) { // Calculate Qp first (matrix ops are associative so (AB)C === A(BC), so calculate the rhs first // with MatrixVectorMultiply). Temporarily hijack vectorQg for this operation. this.MatrixVectorMultiply(this.vectorCurY, this.vectorQg) // Now p#(Qp). const betaDenominator: number = Qpsc.VectorVectorMultiply(this.vectorQg, this.vectorCurY) // Dividing by almost-0 would yield a huge value which we'd cap at 1.0 below. beta = 0 === betaDenominator ? 1 : betaNumerator / betaDenominator if (beta > 1) { // Note: With huge ranges, beta is >>1 here - like 50 or millions. This is expected as // we're dividing by p#Qp where p is potentially quite small. beta = 1 } else if (beta < 0) { // Setting it above 0.0 can move us away from convergence, so set it to 0.0 which leaves // vectorCurY unchanged from vectorPrevY and we'll terminate if there are no splits/violations. // If we were close to convergence in preProject, we could have a significantly negative // beta here, which means we're basically done unless split/project still have stuff to do. beta = 0 } } // Beta numerator is nonzero // Update the "Qpsc-local" copy of the current positions for use in the next loop's PreProject(). Qpsc.VectorScaledVectorSubtract(this.vectorPrevY, beta, this.vectorCurY, this.vectorCurY) this.isFirstProjectCall = false return beta > 0 } // end PostProject() QpscComplete(): number { // Restore original desired position and unscale the actual position. for (const qvar of this.vectorQpscVars) { qvar.Variable.Weight = qvar.OrigWeight qvar.Variable.DesiredPos = qvar.OrigDesiredPos if (this.solverParameters.Advanced.ScaleInQpsc) { // This multiplication essentially does what Constraint.Violation does, so the "satisfied" state // of constraints won't be changed. qvar.Variable.ActualPos = qvar.Variable.ActualPos * qvar.Variable.Scale qvar.Variable.Scale = qvar.OrigScale } } // This was updated to the final function value before HasConverged returned. return this.previousFunctionValue } private HasConverged(): boolean { // // Compute the function value relative to the previous iteration to test convergence: // (x#Ax)/2 + bx + (w d).d Note: final term is from Tim's Mathematica // where the last term (w d).d is constant and, because we only test decreasing value, // can therefore be omitted. // // We don't need to do the Ax operation as this is done as part of PreProject which has // already put this into gradientVector. // const currentFunctionValue: number = this.GetFunctionValue(this.vectorCurY) // If this is not our first PreProject call, test for convergence. let fConverged = false if (!this.isFirstProjectCall) { // Check for convergence. We are monotonically decreasing so prev should be > cur // with some allowance for rounding error. const diff: number = this.previousFunctionValue - currentFunctionValue let quotient = 0 if (diff !== 0) { const divisor = 0 !== this.previousFunctionValue ? this.previousFunctionValue : currentFunctionValue quotient = Math.abs(diff / divisor) } if ( Math.abs(diff) < this.solverParameters.QpscConvergenceEpsilon || Math.abs(quotient) < this.solverParameters.QpscConvergenceQuotient ) { fConverged = true } } // endif !isFirstProjectCall this.previousFunctionValue = currentFunctionValue return fConverged } private GetFunctionValue(positions: number[]): number { // (x#Ax)/2... const value: number = Qpsc.VectorVectorMultiply(this.gradientVector, positions) / 2 // (x'Ax)/2 + bx... return value + Qpsc.VectorVectorMultiply(this.vectorWiDi, positions) } // Returns the dot product of two column vectors (with an "implicit transpose"). private static VectorVectorMultiply(lhs: number[], rhs: number[]): number { // Do not use LINQ's Sum, it slows end-to-end by over 10%. let sum = 0 for (let ii = 0; ii < lhs.length; ii++) { sum = sum + lhs[ii] * rhs[ii] } return sum } // Multiplies matrixQ with the column vector rhs leaving the result in column vector in result[]. private MatrixVectorMultiply(rhs: number[], result: number[]) { // The only matrix we have here is (sparse) matrixQ so it's not a parameter. let rowIndex = 0 for (const row of this.matrixQ) { // Do not use LINQ's Sum, it slows end-to-end by over 10%. let sum = 0 for (const cell of row) { sum = sum + cell.Value * rhs[cell.Column] } result[rowIndex++] = sum } } // Returns the addition result in result[] (which may be lhs or rhs or a different vector). private static VectorVectorAdd(lhs: number[], rhs: number[], result: number[]) { for (let ii = 0; ii < lhs.length; ii++) { result[ii] = lhs[ii] + rhs[ii] } } // Returns the subtraction result in result[] (which may be lhs or rhs or a different vector). private static VectorVectorSubtract(lhs: number[], rhs: number[], result: number[]) { for (let ii = 0; ii < lhs.length; ii++) { result[ii] = lhs[ii] - rhs[ii] } } // Same as VectorVectorSubtract except that rhs is multiplied by the scale value. private static VectorScaledVectorSubtract(lhs: number[], scale: number, rhs: number[], result: number[]) { for (let ii = 0; ii < lhs.length; ii++) { result[ii] = lhs[ii] - scale * rhs[ii] } } // Copies src to dest private static VectorCopy(dest: number[], src: number[]) { for (let ii = 0; ii < src.length; ii++) { dest[ii] = src[ii] } } }