All files / src histogram.ts

96.82% Statements 61/63
78.26% Branches 18/23
87.5% Functions 14/16
100% Lines 51/51

Press n or j to go to the next uncovered block, b, p or k for the previous block.

1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149            30x             1x 30x 3x 3x 3x 3x 3x 30x 30x 30x 30x   3x 165x 30x 30x   3x   1x 25x 25x 36x 9x     16x   1x 25x 25x 53x 22x     3x                 1x 60x 4x   4x 4x 4x 25x 25x 25x 25x 25x 25x 25x 13x 13x 13x   25x   25x 11x     4x                                 30x 7x                                                                                                  
/*!
 * Copyright (c) Microsoft. All rights reserved.
 * Licensed under the MIT license. See LICENSE file in the project.
 */
 
/**
 * The Bin holds an array of values that match the bounding criteria,
 * along with an additional pair of properties indicating that bounding.
 */
export interface Bin extends Array<number> {
	/**
	 * Minimum _computed_ value for the bin
	 */
	x0: number
	/**
	 * Maximum _computed_ value for the bin
	 */
	x1: number
}
 
/**
 * A Histogram is just an array of Bins.
 * This follows the d3 format, so these outputs can be used as drop-ins to
 * any code that expects a d3-style histogram.
 */
export type Histogram = Array<Bin>
 
const extent = (values: number[]) =>
	values.reduce(
		(acc, cur) => [Math.min(acc[0], cur), Math.max(acc[1], cur)],
		[Number.MAX_VALUE, Number.MIN_VALUE],
	)
 
const standardHistogram = (
	data: any[],
	bins: number,
	accessor = (d: any) => d,
): Histogram => {
	const values = data.map((d) => accessor(d))
	const ext = extent(values)
	const [min, max] = ext
	const range = max - min
	const binSize = range / bins
	const binStructure = new Array(bins).fill(1).map((d, i) => {
		const bin: any = []
		bin.x0 = binSize * i + min
		bin.x1 = binSize * (i + 1) + min
		return bin
	})
	const histo = values.reduce((acc, cur) => {
		const index = binStructure.findIndex(
			(bin) => cur >= bin.x0 && cur <= bin.x1,
		)
		acc[index < 0 ? acc.length - 1 : index].push(cur)
		return acc
	}, binStructure)
	return histo
}
 
const findForwardBreak = (data: number[], start: number) => {
	const value = data[start - 1]
	for (let i = start; i < data.length; i++) {
		if (data[i] !== value) {
			return i - start
		}
	}
	return data.length - start
}

const findBackwardBreak = (data: number[], start: number) => {
	const value = data[start - 1]
	for (let i = start - 1; i >= 0; i--) {
		if (data[i] !== value) {
			return start - i - 1
		}
	}
	return start
}
 
/**
 * Quantize a list of data values into n bins
 * This adds a correction to make sure bins don't break on the same value,
 * so that values only have one correct bin to lie within.
 * Some distributions need this correction because they have such an extreme shape (e.g., zipf)
 * @param data
 * @param bins
 */
const quantizeHistogram = (
	data: any[],
	bins: number,
	accessor = (d: any) => d,
	smooth?: boolean,
): Histogram => {
	const values = data
		.map((d: any) => accessor(d))
		.sort((a: number, b: number) => a - b)
	let binLength = Math.ceil(values.length / bins) // starting bin length is always the ideal
	const binStructure: any = []
	let start = 0
	for (let i = 0; i < bins; i++) {
		const end = start + binLength
		const forward = findForwardBreak(values, end)
		const backward = findBackwardBreak(values, end)
		const moveBackward = backward < forward && backward < binLength
		const newEnd = smooth && moveBackward ? end - backward : end + forward
		const bin: any = values.slice(start, newEnd)
		if (bin.length > 0) {
			bin.x0 = bin[0]
			bin.x1 = bin[bin.length - 1]
			binStructure.push(bin)
		}
 
		start = newEnd
		// recalculate bin length with remaining values and bins, in case a really large bin skewed everything
		if (newEnd !== end) {
			binLength = Math.ceil((values.length - newEnd) / (bins - (i + 1)))
		}
	}
	return binStructure
}
 
/**
 * Creates a histogram from an array of objects.
 * The histogram will be populated with numeric values.
 * If your input data is not numeric, supply an accessor function.
 * By default, a standard equal-interval histogram will be created.
 * If the quantize parameter is set, it will instead use quantiles (i.e., equal-length bins).
 * If you are quantizing, you can also set a smooth parameter to indicate that breaks in the data should try to minimize variance.
 *
 * Note on quantiles: the bin count and length are ideals, but not guaranteed.
 * The way the quantiles are calculated, a precise number of bins can't be guaranteed unless you will accept empty bins.
 * For example, if your dataset is [1,1,1,1,1,2,2], you will always only get back two bins.
 * Depending on the data distribution, we also may not be able to assure each bin is the same length as the others.
 * Using the same example dataset, here we would have one bin with 5 items, and the other with 2.
 * We _could_ split them evenly and put some of the 1s in the 2 bin, but that seems less preferable.
 *
 */
export const histogram = (
	data: any[],
	bins: number,
	accessor = (d: any) => d,
	quantize?: boolean,
	smooth?: boolean,
): Histogram => {
	return quantize
		? quantizeHistogram(data, bins, accessor, smooth)
		: standardHistogram(data, bins, accessor)
}