UNPKG

135 kBJavaScriptView Raw
1'use strict';
2
3Object.defineProperty(exports, '__esModule', { value: true });
4
5/**
6 * [Simple linear regression](http://en.wikipedia.org/wiki/Simple_linear_regression)
7 * is a simple way to find a fitted line
8 * between a set of coordinates. This algorithm finds the slope and y-intercept of a regression line
9 * using the least sum of squares.
10 *
11 * @param {Array<Array<number>>} data an array of two-element of arrays,
12 * like `[[0, 1], [2, 3]]`
13 * @returns {Object} object containing slope and intersect of regression line
14 * @example
15 * linearRegression([[0, 0], [1, 1]]); // => { m: 1, b: 0 }
16 */
17function linearRegression(data) {
18 var m, b;
19
20 // Store data length in a local variable to reduce
21 // repeated object property lookups
22 var dataLength = data.length;
23
24 //if there's only one point, arbitrarily choose a slope of 0
25 //and a y-intercept of whatever the y of the initial point is
26 if (dataLength === 1) {
27 m = 0;
28 b = data[0][1];
29 } else {
30 // Initialize our sums and scope the `m` and `b`
31 // variables that define the line.
32 var sumX = 0,
33 sumY = 0,
34 sumXX = 0,
35 sumXY = 0;
36
37 // Use local variables to grab point values
38 // with minimal object property lookups
39 var point, x, y;
40
41 // Gather the sum of all x values, the sum of all
42 // y values, and the sum of x^2 and (x*y) for each
43 // value.
44 //
45 // In math notation, these would be SS_x, SS_y, SS_xx, and SS_xy
46 for (var i = 0; i < dataLength; i++) {
47 point = data[i];
48 x = point[0];
49 y = point[1];
50
51 sumX += x;
52 sumY += y;
53
54 sumXX += x * x;
55 sumXY += x * y;
56 }
57
58 // `m` is the slope of the regression line
59 m =
60 (dataLength * sumXY - sumX * sumY) /
61 (dataLength * sumXX - sumX * sumX);
62
63 // `b` is the y-intercept of the line.
64 b = sumY / dataLength - (m * sumX) / dataLength;
65 }
66
67 // Return both values as an object.
68 return {
69 m: m,
70 b: b
71 };
72}
73
74/**
75 * Given the output of `linearRegression`: an object
76 * with `m` and `b` values indicating slope and intercept,
77 * respectively, generate a line function that translates
78 * x values into y values.
79 *
80 * @param {Object} mb object with `m` and `b` members, representing
81 * slope and intersect of desired line
82 * @returns {Function} method that computes y-value at any given
83 * x-value on the line.
84 * @example
85 * var l = linearRegressionLine(linearRegression([[0, 0], [1, 1]]));
86 * l(0) // = 0
87 * l(2) // = 2
88 * linearRegressionLine({ b: 0, m: 1 })(1); // => 1
89 * linearRegressionLine({ b: 1, m: 1 })(1); // => 2
90 */
91function linearRegressionLine(mb /*: { b: number, m: number }*/) {
92 // Return a function that computes a `y` value for each
93 // x value it is given, based on the values of `b` and `a`
94 // that we just computed.
95 return function (x) {
96 return mb.b + mb.m * x;
97 };
98}
99
100/**
101 * Our default sum is the [Kahan-Babuska algorithm](https://pdfs.semanticscholar.org/1760/7d467cda1d0277ad272deb2113533131dc09.pdf).
102 * This method is an improvement over the classical
103 * [Kahan summation algorithm](https://en.wikipedia.org/wiki/Kahan_summation_algorithm).
104 * It aims at computing the sum of a list of numbers while correcting for
105 * floating-point errors. Traditionally, sums are calculated as many
106 * successive additions, each one with its own floating-point roundoff. These
107 * losses in precision add up as the number of numbers increases. This alternative
108 * algorithm is more accurate than the simple way of calculating sums by simple
109 * addition.
110 *
111 * This runs in `O(n)`, linear time, with respect to the length of the array.
112 *
113 * @param {Array<number>} x input
114 * @return {number} sum of all input numbers
115 * @example
116 * sum([1, 2, 3]); // => 6
117 */
118function sum(x) {
119 // If the array is empty, we needn't bother computing its sum
120 if (x.length === 0) {
121 return 0;
122 }
123
124 // Initializing the sum as the first number in the array
125 var sum = x[0];
126
127 // Keeping track of the floating-point error correction
128 var correction = 0;
129
130 var transition;
131
132 for (var i = 1; i < x.length; i++) {
133 transition = sum + x[i];
134
135 // Here we need to update the correction in a different fashion
136 // if the new absolute value is greater than the absolute sum
137 if (Math.abs(sum) >= Math.abs(x[i])) {
138 correction += sum - transition + x[i];
139 } else {
140 correction += x[i] - transition + sum;
141 }
142
143 sum = transition;
144 }
145
146 // Returning the corrected sum
147 return sum + correction;
148}
149
150/**
151 * The mean, _also known as average_,
152 * is the sum of all values over the number of values.
153 * This is a [measure of central tendency](https://en.wikipedia.org/wiki/Central_tendency):
154 * a method of finding a typical or central value of a set of numbers.
155 *
156 * This runs in `O(n)`, linear time, with respect to the length of the array.
157 *
158 * @param {Array<number>} x sample of one or more data points
159 * @throws {Error} if the the length of x is less than one
160 * @returns {number} mean
161 * @example
162 * mean([0, 10]); // => 5
163 */
164function mean(x) {
165 if (x.length === 0) {
166 throw new Error("mean requires at least one data point");
167 }
168
169 return sum(x) / x.length;
170}
171
172/**
173 * The sum of deviations to the Nth power.
174 * When n=2 it's the sum of squared deviations.
175 * When n=3 it's the sum of cubed deviations.
176 *
177 * @param {Array<number>} x
178 * @param {number} n power
179 * @returns {number} sum of nth power deviations
180 *
181 * @example
182 * var input = [1, 2, 3];
183 * // since the variance of a set is the mean squared
184 * // deviations, we can calculate that with sumNthPowerDeviations:
185 * sumNthPowerDeviations(input, 2) / input.length;
186 */
187function sumNthPowerDeviations(x, n) {
188 var meanValue = mean(x);
189 var sum = 0;
190 var tempValue;
191 var i;
192
193 // This is an optimization: when n is 2 (we're computing a number squared),
194 // multiplying the number by itself is significantly faster than using
195 // the Math.pow method.
196 if (n === 2) {
197 for (i = 0; i < x.length; i++) {
198 tempValue = x[i] - meanValue;
199 sum += tempValue * tempValue;
200 }
201 } else {
202 for (i = 0; i < x.length; i++) {
203 sum += Math.pow(x[i] - meanValue, n);
204 }
205 }
206
207 return sum;
208}
209
210/**
211 * The [variance](http://en.wikipedia.org/wiki/Variance)
212 * is the sum of squared deviations from the mean.
213 *
214 * This is an implementation of variance, not sample variance:
215 * see the `sampleVariance` method if you want a sample measure.
216 *
217 * @param {Array<number>} x a population of one or more data points
218 * @returns {number} variance: a value greater than or equal to zero.
219 * zero indicates that all values are identical.
220 * @throws {Error} if x's length is 0
221 * @example
222 * variance([1, 2, 3, 4, 5, 6]); // => 2.9166666666666665
223 */
224function variance(x) {
225 if (x.length === 0) {
226 throw new Error("variance requires at least one data point");
227 }
228
229 // Find the mean of squared deviations between the
230 // mean value and each value.
231 return sumNthPowerDeviations(x, 2) / x.length;
232}
233
234/**
235 * The [standard deviation](http://en.wikipedia.org/wiki/Standard_deviation)
236 * is the square root of the variance. This is also known as the population
237 * standard deviation. It's useful for measuring the amount
238 * of variation or dispersion in a set of values.
239 *
240 * Standard deviation is only appropriate for full-population knowledge: for
241 * samples of a population, {@link sampleStandardDeviation} is
242 * more appropriate.
243 *
244 * @param {Array<number>} x input
245 * @returns {number} standard deviation
246 * @example
247 * variance([2, 4, 4, 4, 5, 5, 7, 9]); // => 4
248 * standardDeviation([2, 4, 4, 4, 5, 5, 7, 9]); // => 2
249 */
250function standardDeviation(x) {
251 if (x.length === 1) {
252 return 0;
253 }
254 var v = variance(x);
255 return Math.sqrt(v);
256}
257
258/**
259 * The [R Squared](http://en.wikipedia.org/wiki/Coefficient_of_determination)
260 * value of data compared with a function `f`
261 * is the sum of the squared differences between the prediction
262 * and the actual value.
263 *
264 * @param {Array<Array<number>>} x input data: this should be doubly-nested
265 * @param {Function} func function called on `[i][0]` values within the dataset
266 * @returns {number} r-squared value
267 * @example
268 * var samples = [[0, 0], [1, 1]];
269 * var regressionLine = linearRegressionLine(linearRegression(samples));
270 * rSquared(samples, regressionLine); // = 1 this line is a perfect fit
271 */
272function rSquared(x, func) {
273 if (x.length < 2) {
274 return 1;
275 }
276
277 // Compute the average y value for the actual
278 // data set in order to compute the
279 // _total sum of squares_
280 var sum = 0;
281 for (var i = 0; i < x.length; i++) {
282 sum += x[i][1];
283 }
284 var average = sum / x.length;
285
286 // Compute the total sum of squares - the
287 // squared difference between each point
288 // and the average of all points.
289 var sumOfSquares = 0;
290 for (var j = 0; j < x.length; j++) {
291 sumOfSquares += Math.pow(average - x[j][1], 2);
292 }
293
294 // Finally estimate the error: the squared
295 // difference between the estimate and the actual data
296 // value at each point.
297 var err = 0;
298 for (var k = 0; k < x.length; k++) {
299 err += Math.pow(x[k][1] - func(x[k][0]), 2);
300 }
301
302 // As the error grows larger, its ratio to the
303 // sum of squares increases and the r squared
304 // value grows lower.
305 return 1 - err / sumOfSquares;
306}
307
308/**
309 * The [mode](https://en.wikipedia.org/wiki/Mode_%28statistics%29) is the number
310 * that appears in a list the highest number of times.
311 * There can be multiple modes in a list: in the event of a tie, this
312 * algorithm will return the most recently seen mode.
313 *
314 * This is a [measure of central tendency](https://en.wikipedia.org/wiki/Central_tendency):
315 * a method of finding a typical or central value of a set of numbers.
316 *
317 * This runs in `O(n)` because the input is sorted.
318 *
319 * @param {Array<number>} sorted a sample of one or more data points
320 * @returns {number} mode
321 * @throws {Error} if sorted is empty
322 * @example
323 * modeSorted([0, 0, 1]); // => 0
324 */
325function modeSorted(sorted) {
326 // Handle edge cases:
327 // The mode of an empty list is undefined
328 if (sorted.length === 0) {
329 throw new Error("mode requires at least one data point");
330 } else if (sorted.length === 1) {
331 return sorted[0];
332 }
333
334 // This assumes it is dealing with an array of size > 1, since size
335 // 0 and 1 are handled immediately. Hence it starts at index 1 in the
336 // array.
337 var last = sorted[0],
338 // store the mode as we find new modes
339 value = NaN,
340 // store how many times we've seen the mode
341 maxSeen = 0,
342 // how many times the current candidate for the mode
343 // has been seen
344 seenThis = 1;
345
346 // end at sorted.length + 1 to fix the case in which the mode is
347 // the highest number that occurs in the sequence. the last iteration
348 // compares sorted[i], which is undefined, to the highest number
349 // in the series
350 for (var i = 1; i < sorted.length + 1; i++) {
351 // we're seeing a new number pass by
352 if (sorted[i] !== last) {
353 // the last number is the new mode since we saw it more
354 // often than the old one
355 if (seenThis > maxSeen) {
356 maxSeen = seenThis;
357 value = last;
358 }
359 seenThis = 1;
360 last = sorted[i];
361 // if this isn't a new number, it's one more occurrence of
362 // the potential mode
363 } else {
364 seenThis++;
365 }
366 }
367 return value;
368}
369
370/**
371 * Sort an array of numbers by their numeric value, ensuring that the
372 * array is not changed in place.
373 *
374 * This is necessary because the default behavior of .sort
375 * in JavaScript is to sort arrays as string values
376 *
377 * [1, 10, 12, 102, 20].sort()
378 * // output
379 * [1, 10, 102, 12, 20]
380 *
381 * @param {Array<number>} x input array
382 * @return {Array<number>} sorted array
383 * @private
384 * @example
385 * numericSort([3, 2, 1]) // => [1, 2, 3]
386 */
387function numericSort(x) {
388 return (
389 x
390 // ensure the array is not changed in-place
391 .slice()
392 // comparator function that treats input as numeric
393 .sort(function (a, b) {
394 return a - b;
395 })
396 );
397}
398
399/**
400 * The [mode](https://en.wikipedia.org/wiki/Mode_%28statistics%29) is the number
401 * that appears in a list the highest number of times.
402 * There can be multiple modes in a list: in the event of a tie, this
403 * algorithm will return the most recently seen mode.
404 *
405 * This is a [measure of central tendency](https://en.wikipedia.org/wiki/Central_tendency):
406 * a method of finding a typical or central value of a set of numbers.
407 *
408 * This runs in `O(n log(n))` because it needs to sort the array internally
409 * before running an `O(n)` search to find the mode.
410 *
411 * @param {Array<number>} x input
412 * @returns {number} mode
413 * @example
414 * mode([0, 0, 1]); // => 0
415 */
416function mode(x) {
417 // Sorting the array lets us iterate through it below and be sure
418 // that every time we see a new number it's new and we'll never
419 // see the same number twice
420 return modeSorted(numericSort(x));
421}
422
423/* globals Map: false */
424
425/**
426 * The [mode](https://en.wikipedia.org/wiki/Mode_%28statistics%29) is the number
427 * that appears in a list the highest number of times.
428 * There can be multiple modes in a list: in the event of a tie, this
429 * algorithm will return the most recently seen mode.
430 *
431 * modeFast uses a Map object to keep track of the mode, instead of the approach
432 * used with `mode`, a sorted array. As a result, it is faster
433 * than `mode` and supports any data type that can be compared with `==`.
434 * It also requires a
435 * [JavaScript environment with support for Map](https://kangax.github.io/compat-table/es6/#test-Map),
436 * and will throw an error if Map is not available.
437 *
438 * This is a [measure of central tendency](https://en.wikipedia.org/wiki/Central_tendency):
439 * a method of finding a typical or central value of a set of numbers.
440 *
441 * @param {Array<*>} x a sample of one or more data points
442 * @returns {?*} mode
443 * @throws {ReferenceError} if the JavaScript environment doesn't support Map
444 * @throws {Error} if x is empty
445 * @example
446 * modeFast(['rabbits', 'rabbits', 'squirrels']); // => 'rabbits'
447 */
448function modeFast(x) {
449 // This index will reflect the incidence of different values, indexing
450 // them like
451 // { value: count }
452 var index = new Map();
453
454 // A running `mode` and the number of times it has been encountered.
455 var mode;
456 var modeCount = 0;
457
458 for (var i = 0; i < x.length; i++) {
459 var newCount = index.get(x[i]);
460 if (newCount === undefined) {
461 newCount = 1;
462 } else {
463 newCount++;
464 }
465 if (newCount > modeCount) {
466 mode = x[i];
467 modeCount = newCount;
468 }
469 index.set(x[i], newCount);
470 }
471
472 if (modeCount === 0) {
473 throw new Error("mode requires at last one data point");
474 }
475
476 return mode;
477}
478
479/**
480 * The min is the lowest number in the array.
481 * This runs in `O(n)`, linear time, with respect to the length of the array.
482 *
483 * @param {Array<number>} x sample of one or more data points
484 * @throws {Error} if the the length of x is less than one
485 * @returns {number} minimum value
486 * @example
487 * min([1, 5, -10, 100, 2]); // => -10
488 */
489function min(x) {
490 if (x.length === 0) {
491 throw new Error("min requires at least one data point");
492 }
493
494 var value = x[0];
495 for (var i = 1; i < x.length; i++) {
496 if (x[i] < value) {
497 value = x[i];
498 }
499 }
500 return value;
501}
502
503/**
504 * This computes the maximum number in an array.
505 *
506 * This runs in `O(n)`, linear time, with respect to the length of the array.
507 *
508 * @param {Array<number>} x sample of one or more data points
509 * @returns {number} maximum value
510 * @throws {Error} if the the length of x is less than one
511 * @example
512 * max([1, 2, 3, 4]);
513 * // => 4
514 */
515function max(x) {
516 if (x.length === 0) {
517 throw new Error("max requires at least one data point");
518 }
519
520 var value = x[0];
521 for (var i = 1; i < x.length; i++) {
522 if (x[i] > value) {
523 value = x[i];
524 }
525 }
526 return value;
527}
528
529/**
530 * This computes the minimum & maximum number in an array.
531 *
532 * This runs in `O(n)`, linear time, with respect to the length of the array.
533 *
534 * @param {Array<number>} x sample of one or more data points
535 * @returns {Array<number>} minimum & maximum value
536 * @throws {Error} if the the length of x is less than one
537 * @example
538 * extent([1, 2, 3, 4]);
539 * // => [1, 4]
540 */
541function extent(x) {
542 if (x.length === 0) {
543 throw new Error("extent requires at least one data point");
544 }
545
546 var min = x[0];
547 var max = x[0];
548 for (var i = 1; i < x.length; i++) {
549 if (x[i] > max) {
550 max = x[i];
551 }
552 if (x[i] < min) {
553 min = x[i];
554 }
555 }
556 return [min, max];
557}
558
559/**
560 * The minimum is the lowest number in the array. With a sorted array,
561 * the first element in the array is always the smallest, so this calculation
562 * can be done in one step, or constant time.
563 *
564 * @param {Array<number>} x input
565 * @returns {number} minimum value
566 * @example
567 * minSorted([-100, -10, 1, 2, 5]); // => -100
568 */
569function minSorted(x) {
570 return x[0];
571}
572
573/**
574 * The maximum is the highest number in the array. With a sorted array,
575 * the last element in the array is always the largest, so this calculation
576 * can be done in one step, or constant time.
577 *
578 * @param {Array<number>} x input
579 * @returns {number} maximum value
580 * @example
581 * maxSorted([-100, -10, 1, 2, 5]); // => 5
582 */
583function maxSorted(x) {
584 return x[x.length - 1];
585}
586
587/**
588 * The extent is the lowest & highest number in the array. With a sorted array,
589 * the first element in the array is always the lowest while the last element is always the largest, so this calculation
590 * can be done in one step, or constant time.
591 *
592 * @param {Array<number>} x input
593 * @returns {Array<number>} minimum & maximum value
594 * @example
595 * extentSorted([-100, -10, 1, 2, 5]); // => [-100, 5]
596 */
597function extentSorted(x) {
598 return [x[0], x[x.length - 1]];
599}
600
601/**
602 * The simple [sum](https://en.wikipedia.org/wiki/Summation) of an array
603 * is the result of adding all numbers together, starting from zero.
604 *
605 * This runs in `O(n)`, linear time, with respect to the length of the array.
606 *
607 * @param {Array<number>} x input
608 * @return {number} sum of all input numbers
609 * @example
610 * sumSimple([1, 2, 3]); // => 6
611 */
612function sumSimple(x) {
613 var value = 0;
614 for (var i = 0; i < x.length; i++) {
615 value += x[i];
616 }
617 return value;
618}
619
620/**
621 * The [product](https://en.wikipedia.org/wiki/Product_(mathematics)) of an array
622 * is the result of multiplying all numbers together, starting using one as the multiplicative identity.
623 *
624 * This runs in `O(n)`, linear time, with respect to the length of the array.
625 *
626 * @param {Array<number>} x input
627 * @return {number} product of all input numbers
628 * @example
629 * product([1, 2, 3, 4]); // => 24
630 */
631function product(x) {
632 var value = 1;
633 for (var i = 0; i < x.length; i++) {
634 value *= x[i];
635 }
636 return value;
637}
638
639/**
640 * This is the internal implementation of quantiles: when you know
641 * that the order is sorted, you don't need to re-sort it, and the computations
642 * are faster.
643 *
644 * @param {Array<number>} x sample of one or more data points
645 * @param {number} p desired quantile: a number between 0 to 1, inclusive
646 * @returns {number} quantile value
647 * @throws {Error} if p ix outside of the range from 0 to 1
648 * @throws {Error} if x is empty
649 * @example
650 * quantileSorted([3, 6, 7, 8, 8, 9, 10, 13, 15, 16, 20], 0.5); // => 9
651 */
652function quantileSorted(x, p) {
653 var idx = x.length * p;
654 if (x.length === 0) {
655 throw new Error("quantile requires at least one data point.");
656 } else if (p < 0 || p > 1) {
657 throw new Error("quantiles must be between 0 and 1");
658 } else if (p === 1) {
659 // If p is 1, directly return the last element
660 return x[x.length - 1];
661 } else if (p === 0) {
662 // If p is 0, directly return the first element
663 return x[0];
664 } else if (idx % 1 !== 0) {
665 // If p is not integer, return the next element in array
666 return x[Math.ceil(idx) - 1];
667 } else if (x.length % 2 === 0) {
668 // If the list has even-length, we'll take the average of this number
669 // and the next value, if there is one
670 return (x[idx - 1] + x[idx]) / 2;
671 } else {
672 // Finally, in the simple case of an integer value
673 // with an odd-length list, return the x value at the index.
674 return x[idx];
675 }
676}
677
678/**
679 * Rearrange items in `arr` so that all items in `[left, k]` range are the smallest.
680 * The `k`-th element will have the `(k - left + 1)`-th smallest value in `[left, right]`.
681 *
682 * Implements Floyd-Rivest selection algorithm https://en.wikipedia.org/wiki/Floyd-Rivest_algorithm
683 *
684 * @param {Array<number>} arr input array
685 * @param {number} k pivot index
686 * @param {number} [left] left index
687 * @param {number} [right] right index
688 * @returns {void} mutates input array
689 * @example
690 * var arr = [65, 28, 59, 33, 21, 56, 22, 95, 50, 12, 90, 53, 28, 77, 39];
691 * quickselect(arr, 8);
692 * // = [39, 28, 28, 33, 21, 12, 22, 50, 53, 56, 59, 65, 90, 77, 95]
693 */
694function quickselect(arr, k, left, right) {
695 left = left || 0;
696 right = right || arr.length - 1;
697
698 while (right > left) {
699 // 600 and 0.5 are arbitrary constants chosen in the original paper to minimize execution time
700 if (right - left > 600) {
701 var n = right - left + 1;
702 var m = k - left + 1;
703 var z = Math.log(n);
704 var s = 0.5 * Math.exp((2 * z) / 3);
705 var sd = 0.5 * Math.sqrt((z * s * (n - s)) / n);
706 if (m - n / 2 < 0) { sd *= -1; }
707 var newLeft = Math.max(left, Math.floor(k - (m * s) / n + sd));
708 var newRight = Math.min(
709 right,
710 Math.floor(k + ((n - m) * s) / n + sd)
711 );
712 quickselect(arr, k, newLeft, newRight);
713 }
714
715 var t = arr[k];
716 var i = left;
717 var j = right;
718
719 swap(arr, left, k);
720 if (arr[right] > t) { swap(arr, left, right); }
721
722 while (i < j) {
723 swap(arr, i, j);
724 i++;
725 j--;
726 while (arr[i] < t) { i++; }
727 while (arr[j] > t) { j--; }
728 }
729
730 if (arr[left] === t) { swap(arr, left, j); }
731 else {
732 j++;
733 swap(arr, j, right);
734 }
735
736 if (j <= k) { left = j + 1; }
737 if (k <= j) { right = j - 1; }
738 }
739}
740
741function swap(arr, i, j) {
742 var tmp = arr[i];
743 arr[i] = arr[j];
744 arr[j] = tmp;
745}
746
747/**
748 * The [quantile](https://en.wikipedia.org/wiki/Quantile):
749 * this is a population quantile, since we assume to know the entire
750 * dataset in this library. This is an implementation of the
751 * [Quantiles of a Population](http://en.wikipedia.org/wiki/Quantile#Quantiles_of_a_population)
752 * algorithm from wikipedia.
753 *
754 * Sample is a one-dimensional array of numbers,
755 * and p is either a decimal number from 0 to 1 or an array of decimal
756 * numbers from 0 to 1.
757 * In terms of a k/q quantile, p = k/q - it's just dealing with fractions or dealing
758 * with decimal values.
759 * When p is an array, the result of the function is also an array containing the appropriate
760 * quantiles in input order
761 *
762 * @param {Array<number>} x sample of one or more numbers
763 * @param {Array<number> | number} p the desired quantile, as a number between 0 and 1
764 * @returns {number} quantile
765 * @example
766 * quantile([3, 6, 7, 8, 8, 9, 10, 13, 15, 16, 20], 0.5); // => 9
767 */
768function quantile(x, p) {
769 var copy = x.slice();
770
771 if (Array.isArray(p)) {
772 // rearrange elements so that each element corresponding to a requested
773 // quantile is on a place it would be if the array was fully sorted
774 multiQuantileSelect(copy, p);
775 // Initialize the result array
776 var results = [];
777 // For each requested quantile
778 for (var i = 0; i < p.length; i++) {
779 results[i] = quantileSorted(copy, p[i]);
780 }
781 return results;
782 } else {
783 var idx = quantileIndex(copy.length, p);
784 quantileSelect(copy, idx, 0, copy.length - 1);
785 return quantileSorted(copy, p);
786 }
787}
788
789function quantileSelect(arr, k, left, right) {
790 if (k % 1 === 0) {
791 quickselect(arr, k, left, right);
792 } else {
793 k = Math.floor(k);
794 quickselect(arr, k, left, right);
795 quickselect(arr, k + 1, k + 1, right);
796 }
797}
798
799function multiQuantileSelect(arr, p) {
800 var indices = [0];
801 for (var i = 0; i < p.length; i++) {
802 indices.push(quantileIndex(arr.length, p[i]));
803 }
804 indices.push(arr.length - 1);
805 indices.sort(compare);
806
807 var stack = [0, indices.length - 1];
808
809 while (stack.length) {
810 var r = Math.ceil(stack.pop());
811 var l = Math.floor(stack.pop());
812 if (r - l <= 1) { continue; }
813
814 var m = Math.floor((l + r) / 2);
815 quantileSelect(
816 arr,
817 indices[m],
818 Math.floor(indices[l]),
819 Math.ceil(indices[r])
820 );
821
822 stack.push(l, m, m, r);
823 }
824}
825
826function compare(a, b) {
827 return a - b;
828}
829
830function quantileIndex(len, p) {
831 var idx = len * p;
832 if (p === 1) {
833 // If p is 1, directly return the last index
834 return len - 1;
835 } else if (p === 0) {
836 // If p is 0, directly return the first index
837 return 0;
838 } else if (idx % 1 !== 0) {
839 // If index is not integer, return the next index in array
840 return Math.ceil(idx) - 1;
841 } else if (len % 2 === 0) {
842 // If the list has even-length, we'll return the middle of two indices
843 // around quantile to indicate that we need an average value of the two
844 return idx - 0.5;
845 } else {
846 // Finally, in the simple case of an integer index
847 // with an odd-length list, return the index
848 return idx;
849 }
850}
851
852/* eslint no-bitwise: 0 */
853
854/**
855 * This function returns the quantile in which one would find the given value in
856 * the given array. With a sorted array, leveraging binary search, we can find
857 * this information in logarithmic time.
858 *
859 * @param {Array<number>} x input
860 * @returns {number} value value
861 * @example
862 * quantileRankSorted([1, 2, 3, 4], 3); // => 0.75
863 * quantileRankSorted([1, 2, 3, 3, 4], 3); // => 0.7
864 * quantileRankSorted([1, 2, 3, 4], 6); // => 1
865 * quantileRankSorted([1, 2, 3, 3, 5], 4); // => 0.8
866 */
867function quantileRankSorted(x, value) {
868 // Value is lesser than any value in the array
869 if (value < x[0]) {
870 return 0;
871 }
872
873 // Value is greater than any value in the array
874 if (value > x[x.length - 1]) {
875 return 1;
876 }
877
878 var l = lowerBound(x, value);
879
880 // Value is not in the array
881 if (x[l] !== value) {
882 return l / x.length;
883 }
884
885 l++;
886
887 var u = upperBound(x, value);
888
889 // The value exists only once in the array
890 if (u === l) {
891 return l / x.length;
892 }
893
894 // Here, we are basically computing the mean of the range of indices
895 // containing our searched value. But, instead, of initializing an
896 // array and looping over it, there is a dedicated math formula that
897 // we apply below to get the result.
898 var r = u - l + 1;
899 var sum = (r * (u + l)) / 2;
900 var mean = sum / r;
901
902 return mean / x.length;
903}
904
905function lowerBound(x, value) {
906 var mid = 0;
907 var lo = 0;
908 var hi = x.length;
909
910 while (lo < hi) {
911 mid = (lo + hi) >>> 1;
912
913 if (value <= x[mid]) {
914 hi = mid;
915 } else {
916 lo = -~mid;
917 }
918 }
919
920 return lo;
921}
922
923function upperBound(x, value) {
924 var mid = 0;
925 var lo = 0;
926 var hi = x.length;
927
928 while (lo < hi) {
929 mid = (lo + hi) >>> 1;
930
931 if (value >= x[mid]) {
932 lo = -~mid;
933 } else {
934 hi = mid;
935 }
936 }
937
938 return lo;
939}
940
941/**
942 * This function returns the quantile in which one would find the given value in
943 * the given array. It will copy and sort your array before each run, so
944 * if you know your array is already sorted, you should use `quantileRankSorted`
945 * instead.
946 *
947 * @param {Array<number>} x input
948 * @returns {number} value value
949 * @example
950 * quantileRank([4, 3, 1, 2], 3); // => 0.75
951 * quantileRank([4, 3, 2, 3, 1], 3); // => 0.7
952 * quantileRank([2, 4, 1, 3], 6); // => 1
953 * quantileRank([5, 3, 1, 2, 3], 4); // => 0.8
954 */
955function quantileRank(x, value) {
956 // Cloning and sorting the array
957 var sortedCopy = numericSort(x);
958
959 return quantileRankSorted(sortedCopy, value);
960}
961
962/**
963 * The [Interquartile range](http://en.wikipedia.org/wiki/Interquartile_range) is
964 * a measure of statistical dispersion, or how scattered, spread, or
965 * concentrated a distribution is. It's computed as the difference between
966 * the third quartile and first quartile.
967 *
968 * @param {Array<number>} x sample of one or more numbers
969 * @returns {number} interquartile range: the span between lower and upper quartile,
970 * 0.25 and 0.75
971 * @example
972 * interquartileRange([0, 1, 2, 3]); // => 2
973 */
974function interquartileRange(x) {
975 // Interquartile range is the span between the upper quartile,
976 // at `0.75`, and lower quartile, `0.25`
977 var q1 = quantile(x, 0.75);
978 var q2 = quantile(x, 0.25);
979
980 if (typeof q1 === "number" && typeof q2 === "number") {
981 return q1 - q2;
982 }
983}
984
985/**
986 * The [median](http://en.wikipedia.org/wiki/Median) is
987 * the middle number of a list. This is often a good indicator of 'the middle'
988 * when there are outliers that skew the `mean()` value.
989 * This is a [measure of central tendency](https://en.wikipedia.org/wiki/Central_tendency):
990 * a method of finding a typical or central value of a set of numbers.
991 *
992 * The median isn't necessarily one of the elements in the list: the value
993 * can be the average of two elements if the list has an even length
994 * and the two central values are different.
995 *
996 * @param {Array<number>} x input
997 * @returns {number} median value
998 * @example
999 * median([10, 2, 5, 100, 2, 1]); // => 3.5
1000 */
1001function median(x) {
1002 return +quantile(x, 0.5);
1003}
1004
1005/**
1006 * The [Median Absolute Deviation](http://en.wikipedia.org/wiki/Median_absolute_deviation) is
1007 * a robust measure of statistical
1008 * dispersion. It is more resilient to outliers than the standard deviation.
1009 *
1010 * @param {Array<number>} x input array
1011 * @returns {number} median absolute deviation
1012 * @example
1013 * medianAbsoluteDeviation([1, 1, 2, 2, 4, 6, 9]); // => 1
1014 */
1015function medianAbsoluteDeviation(x) {
1016 var medianValue = median(x);
1017 var medianAbsoluteDeviations = [];
1018
1019 // Make a list of absolute deviations from the median
1020 for (var i = 0; i < x.length; i++) {
1021 medianAbsoluteDeviations.push(Math.abs(x[i] - medianValue));
1022 }
1023
1024 // Find the median value of that list
1025 return median(medianAbsoluteDeviations);
1026}
1027
1028/**
1029 * Split an array into chunks of a specified size. This function
1030 * has the same behavior as [PHP's array_chunk](http://php.net/manual/en/function.array-chunk.php)
1031 * function, and thus will insert smaller-sized chunks at the end if
1032 * the input size is not divisible by the chunk size.
1033 *
1034 * `x` is expected to be an array, and `chunkSize` a number.
1035 * The `x` array can contain any kind of data.
1036 *
1037 * @param {Array} x a sample
1038 * @param {number} chunkSize size of each output array. must be a positive integer
1039 * @returns {Array<Array>} a chunked array
1040 * @throws {Error} if chunk size is less than 1 or not an integer
1041 * @example
1042 * chunk([1, 2, 3, 4, 5, 6], 2);
1043 * // => [[1, 2], [3, 4], [5, 6]]
1044 */
1045function chunk(x, chunkSize) {
1046 // a list of result chunks, as arrays in an array
1047 var output = [];
1048
1049 // `chunkSize` must be zero or higher - otherwise the loop below,
1050 // in which we call `start += chunkSize`, will loop infinitely.
1051 // So, we'll detect and throw in that case to indicate
1052 // invalid input.
1053 if (chunkSize < 1) {
1054 throw new Error("chunk size must be a positive number");
1055 }
1056
1057 if (Math.floor(chunkSize) !== chunkSize) {
1058 throw new Error("chunk size must be an integer");
1059 }
1060
1061 // `start` is the index at which `.slice` will start selecting
1062 // new array elements
1063 for (var start = 0; start < x.length; start += chunkSize) {
1064 // for each chunk, slice that part of the array and add it
1065 // to the output. The `.slice` function does not change
1066 // the original array.
1067 output.push(x.slice(start, start + chunkSize));
1068 }
1069 return output;
1070}
1071
1072/**
1073 * Sampling with replacement is a type of sampling that allows the same
1074 * item to be picked out of a population more than once.
1075 *
1076 * @param {Array<*>} x an array of any kind of value
1077 * @param {number} n count of how many elements to take
1078 * @param {Function} [randomSource=Math.random] an optional entropy source that
1079 * returns numbers between 0 inclusive and 1 exclusive: the range [0, 1)
1080 * @return {Array} n sampled items from the population
1081 * @example
1082 * var values = [1, 2, 3, 4];
1083 * sampleWithReplacement(values, 2); // returns 2 random values, like [2, 4];
1084 */
1085function sampleWithReplacement(x, n, randomSource) {
1086 if (x.length === 0) {
1087 return [];
1088 }
1089
1090 // a custom random number source can be provided if you want to use
1091 // a fixed seed or another random number generator, like
1092 // [random-js](https://www.npmjs.org/package/random-js)
1093 randomSource = randomSource || Math.random;
1094
1095 var length = x.length;
1096 var sample = [];
1097
1098 for (var i = 0; i < n; i++) {
1099 var index = Math.floor(randomSource() * length);
1100
1101 sample.push(x[index]);
1102 }
1103
1104 return sample;
1105}
1106
1107/**
1108 * A [Fisher-Yates shuffle](http://en.wikipedia.org/wiki/Fisher%E2%80%93Yates_shuffle)
1109 * in-place - which means that it **will change the order of the original
1110 * array by reference**.
1111 *
1112 * This is an algorithm that generates a random [permutation](https://en.wikipedia.org/wiki/Permutation)
1113 * of a set.
1114 *
1115 * @param {Array} x sample of one or more numbers
1116 * @param {Function} [randomSource=Math.random] an optional entropy source that
1117 * returns numbers between 0 inclusive and 1 exclusive: the range [0, 1)
1118 * @returns {Array} x
1119 * @example
1120 * var x = [1, 2, 3, 4];
1121 * shuffleInPlace(x);
1122 * // x is shuffled to a value like [2, 1, 4, 3]
1123 */
1124function shuffleInPlace(x, randomSource) {
1125 // a custom random number source can be provided if you want to use
1126 // a fixed seed or another random number generator, like
1127 // [random-js](https://www.npmjs.org/package/random-js)
1128 randomSource = randomSource || Math.random;
1129
1130 // store the current length of the x to determine
1131 // when no elements remain to shuffle.
1132 var length = x.length;
1133
1134 // temporary is used to hold an item when it is being
1135 // swapped between indices.
1136 var temporary;
1137
1138 // The index to swap at each stage.
1139 var index;
1140
1141 // While there are still items to shuffle
1142 while (length > 0) {
1143 // chose a random index within the subset of the array
1144 // that is not yet shuffled
1145 index = Math.floor(randomSource() * length--);
1146
1147 // store the value that we'll move temporarily
1148 temporary = x[length];
1149
1150 // swap the value at `x[length]` with `x[index]`
1151 x[length] = x[index];
1152 x[index] = temporary;
1153 }
1154
1155 return x;
1156}
1157
1158/**
1159 * A [Fisher-Yates shuffle](http://en.wikipedia.org/wiki/Fisher%E2%80%93Yates_shuffle)
1160 * is a fast way to create a random permutation of a finite set. This is
1161 * a function around `shuffle_in_place` that adds the guarantee that
1162 * it will not modify its input.
1163 *
1164 * @param {Array} x sample of 0 or more numbers
1165 * @param {Function} [randomSource=Math.random] an optional entropy source that
1166 * returns numbers between 0 inclusive and 1 exclusive: the range [0, 1)
1167 * @return {Array} shuffled version of input
1168 * @example
1169 * var shuffled = shuffle([1, 2, 3, 4]);
1170 * shuffled; // = [2, 3, 1, 4] or any other random permutation
1171 */
1172function shuffle(x, randomSource) {
1173 // slice the original array so that it is not modified
1174 var sample = x.slice();
1175
1176 // and then shuffle that shallow-copied array, in place
1177 return shuffleInPlace(sample, randomSource);
1178}
1179
1180/**
1181 * Create a [simple random sample](http://en.wikipedia.org/wiki/Simple_random_sample)
1182 * from a given array of `n` elements.
1183 *
1184 * The sampled values will be in any order, not necessarily the order
1185 * they appear in the input.
1186 *
1187 * @param {Array<any>} x input array. can contain any type
1188 * @param {number} n count of how many elements to take
1189 * @param {Function} [randomSource=Math.random] an optional entropy source that
1190 * returns numbers between 0 inclusive and 1 exclusive: the range [0, 1)
1191 * @return {Array} subset of n elements in original array
1192 *
1193 * @example
1194 * var values = [1, 2, 4, 5, 6, 7, 8, 9];
1195 * sample(values, 3); // returns 3 random values, like [2, 5, 8];
1196 */
1197function sample(x, n, randomSource) {
1198 // shuffle the original array using a fisher-yates shuffle
1199 var shuffled = shuffle(x, randomSource);
1200
1201 // and then return a subset of it - the first `n` elements.
1202 return shuffled.slice(0, n);
1203}
1204
1205/**
1206 * Create a new column x row matrix.
1207 *
1208 * @private
1209 * @param {number} columns
1210 * @param {number} rows
1211 * @return {Array<Array<number>>} matrix
1212 * @example
1213 * makeMatrix(10, 10);
1214 */
1215function makeMatrix(columns, rows) {
1216 var matrix = [];
1217 for (var i = 0; i < columns; i++) {
1218 var column = [];
1219 for (var j = 0; j < rows; j++) {
1220 column.push(0);
1221 }
1222 matrix.push(column);
1223 }
1224 return matrix;
1225}
1226
1227/**
1228 * For a sorted input, counting the number of unique values
1229 * is possible in constant time and constant memory. This is
1230 * a simple implementation of the algorithm.
1231 *
1232 * Values are compared with `===`, so objects and non-primitive objects
1233 * are not handled in any special way.
1234 *
1235 * @param {Array<*>} x an array of any kind of value
1236 * @returns {number} count of unique values
1237 * @example
1238 * uniqueCountSorted([1, 2, 3]); // => 3
1239 * uniqueCountSorted([1, 1, 1]); // => 1
1240 */
1241function uniqueCountSorted(x) {
1242 var uniqueValueCount = 0,
1243 lastSeenValue;
1244 for (var i = 0; i < x.length; i++) {
1245 if (i === 0 || x[i] !== lastSeenValue) {
1246 lastSeenValue = x[i];
1247 uniqueValueCount++;
1248 }
1249 }
1250 return uniqueValueCount;
1251}
1252
1253/**
1254 * Generates incrementally computed values based on the sums and sums of
1255 * squares for the data array
1256 *
1257 * @private
1258 * @param {number} j
1259 * @param {number} i
1260 * @param {Array<number>} sums
1261 * @param {Array<number>} sumsOfSquares
1262 * @return {number}
1263 * @example
1264 * ssq(0, 1, [-1, 0, 2], [1, 1, 5]);
1265 */
1266function ssq(j, i, sums, sumsOfSquares) {
1267 var sji; // s(j, i)
1268 if (j > 0) {
1269 var muji = (sums[i] - sums[j - 1]) / (i - j + 1); // mu(j, i)
1270 sji =
1271 sumsOfSquares[i] - sumsOfSquares[j - 1] - (i - j + 1) * muji * muji;
1272 } else {
1273 sji = sumsOfSquares[i] - (sums[i] * sums[i]) / (i + 1);
1274 }
1275 if (sji < 0) {
1276 return 0;
1277 }
1278 return sji;
1279}
1280
1281/**
1282 * Function that recursively divides and conquers computations
1283 * for cluster j
1284 *
1285 * @private
1286 * @param {number} iMin Minimum index in cluster to be computed
1287 * @param {number} iMax Maximum index in cluster to be computed
1288 * @param {number} cluster Index of the cluster currently being computed
1289 * @param {Array<Array<number>>} matrix
1290 * @param {Array<Array<number>>} backtrackMatrix
1291 * @param {Array<number>} sums
1292 * @param {Array<number>} sumsOfSquares
1293 */
1294function fillMatrixColumn(
1295 iMin,
1296 iMax,
1297 cluster,
1298 matrix,
1299 backtrackMatrix,
1300 sums,
1301 sumsOfSquares
1302) {
1303 if (iMin > iMax) {
1304 return;
1305 }
1306
1307 // Start at midpoint between iMin and iMax
1308 var i = Math.floor((iMin + iMax) / 2);
1309
1310 matrix[cluster][i] = matrix[cluster - 1][i - 1];
1311 backtrackMatrix[cluster][i] = i;
1312
1313 var jlow = cluster; // the lower end for j
1314
1315 if (iMin > cluster) {
1316 jlow = Math.max(jlow, backtrackMatrix[cluster][iMin - 1] || 0);
1317 }
1318 jlow = Math.max(jlow, backtrackMatrix[cluster - 1][i] || 0);
1319
1320 var jhigh = i - 1; // the upper end for j
1321 if (iMax < matrix[0].length - 1) {
1322 jhigh = Math.min(jhigh, backtrackMatrix[cluster][iMax + 1] || 0);
1323 }
1324
1325 var sji;
1326 var sjlowi;
1327 var ssqjlow;
1328 var ssqj;
1329 for (var j = jhigh; j >= jlow; --j) {
1330 sji = ssq(j, i, sums, sumsOfSquares);
1331
1332 if (sji + matrix[cluster - 1][jlow - 1] >= matrix[cluster][i]) {
1333 break;
1334 }
1335
1336 // Examine the lower bound of the cluster border
1337 sjlowi = ssq(jlow, i, sums, sumsOfSquares);
1338
1339 ssqjlow = sjlowi + matrix[cluster - 1][jlow - 1];
1340
1341 if (ssqjlow < matrix[cluster][i]) {
1342 // Shrink the lower bound
1343 matrix[cluster][i] = ssqjlow;
1344 backtrackMatrix[cluster][i] = jlow;
1345 }
1346 jlow++;
1347
1348 ssqj = sji + matrix[cluster - 1][j - 1];
1349 if (ssqj < matrix[cluster][i]) {
1350 matrix[cluster][i] = ssqj;
1351 backtrackMatrix[cluster][i] = j;
1352 }
1353 }
1354
1355 fillMatrixColumn(
1356 iMin,
1357 i - 1,
1358 cluster,
1359 matrix,
1360 backtrackMatrix,
1361 sums,
1362 sumsOfSquares
1363 );
1364 fillMatrixColumn(
1365 i + 1,
1366 iMax,
1367 cluster,
1368 matrix,
1369 backtrackMatrix,
1370 sums,
1371 sumsOfSquares
1372 );
1373}
1374
1375/**
1376 * Initializes the main matrices used in Ckmeans and kicks
1377 * off the divide and conquer cluster computation strategy
1378 *
1379 * @private
1380 * @param {Array<number>} data sorted array of values
1381 * @param {Array<Array<number>>} matrix
1382 * @param {Array<Array<number>>} backtrackMatrix
1383 */
1384function fillMatrices(data, matrix, backtrackMatrix) {
1385 var nValues = matrix[0].length;
1386
1387 // Shift values by the median to improve numeric stability
1388 var shift = data[Math.floor(nValues / 2)];
1389
1390 // Cumulative sum and cumulative sum of squares for all values in data array
1391 var sums = [];
1392 var sumsOfSquares = [];
1393
1394 // Initialize first column in matrix & backtrackMatrix
1395 for (var i = 0, shiftedValue = (void 0); i < nValues; ++i) {
1396 shiftedValue = data[i] - shift;
1397 if (i === 0) {
1398 sums.push(shiftedValue);
1399 sumsOfSquares.push(shiftedValue * shiftedValue);
1400 } else {
1401 sums.push(sums[i - 1] + shiftedValue);
1402 sumsOfSquares.push(
1403 sumsOfSquares[i - 1] + shiftedValue * shiftedValue
1404 );
1405 }
1406
1407 // Initialize for cluster = 0
1408 matrix[0][i] = ssq(0, i, sums, sumsOfSquares);
1409 backtrackMatrix[0][i] = 0;
1410 }
1411
1412 // Initialize the rest of the columns
1413 var iMin;
1414 for (var cluster = 1; cluster < matrix.length; ++cluster) {
1415 if (cluster < matrix.length - 1) {
1416 iMin = cluster;
1417 } else {
1418 // No need to compute matrix[K-1][0] ... matrix[K-1][N-2]
1419 iMin = nValues - 1;
1420 }
1421
1422 fillMatrixColumn(
1423 iMin,
1424 nValues - 1,
1425 cluster,
1426 matrix,
1427 backtrackMatrix,
1428 sums,
1429 sumsOfSquares
1430 );
1431 }
1432}
1433
1434/**
1435 * Ckmeans clustering is an improvement on heuristic-based clustering
1436 * approaches like Jenks. The algorithm was developed in
1437 * [Haizhou Wang and Mingzhou Song](http://journal.r-project.org/archive/2011-2/RJournal_2011-2_Wang+Song.pdf)
1438 * as a [dynamic programming](https://en.wikipedia.org/wiki/Dynamic_programming) approach
1439 * to the problem of clustering numeric data into groups with the least
1440 * within-group sum-of-squared-deviations.
1441 *
1442 * Minimizing the difference within groups - what Wang & Song refer to as
1443 * `withinss`, or within sum-of-squares, means that groups are optimally
1444 * homogenous within and the data is split into representative groups.
1445 * This is very useful for visualization, where you may want to represent
1446 * a continuous variable in discrete color or style groups. This function
1447 * can provide groups that emphasize differences between data.
1448 *
1449 * Being a dynamic approach, this algorithm is based on two matrices that
1450 * store incrementally-computed values for squared deviations and backtracking
1451 * indexes.
1452 *
1453 * This implementation is based on Ckmeans 3.4.6, which introduced a new divide
1454 * and conquer approach that improved runtime from O(kn^2) to O(kn log(n)).
1455 *
1456 * Unlike the [original implementation](https://cran.r-project.org/web/packages/Ckmeans.1d.dp/index.html),
1457 * this implementation does not include any code to automatically determine
1458 * the optimal number of clusters: this information needs to be explicitly
1459 * provided.
1460 *
1461 * ### References
1462 * _Ckmeans.1d.dp: Optimal k-means Clustering in One Dimension by Dynamic
1463 * Programming_ Haizhou Wang and Mingzhou Song ISSN 2073-4859
1464 *
1465 * from The R Journal Vol. 3/2, December 2011
1466 * @param {Array<number>} x input data, as an array of number values
1467 * @param {number} nClusters number of desired classes. This cannot be
1468 * greater than the number of values in the data array.
1469 * @returns {Array<Array<number>>} clustered input
1470 * @throws {Error} if the number of requested clusters is higher than the size of the data
1471 * @example
1472 * ckmeans([-1, 2, -1, 2, 4, 5, 6, -1, 2, -1], 3);
1473 * // The input, clustered into groups of similar numbers.
1474 * //= [[-1, -1, -1, -1], [2, 2, 2], [4, 5, 6]]);
1475 */
1476function ckmeans(x, nClusters) {
1477 if (nClusters > x.length) {
1478 throw new Error(
1479 "cannot generate more classes than there are data values"
1480 );
1481 }
1482
1483 var sorted = numericSort(x);
1484 // we'll use this as the maximum number of clusters
1485 var uniqueCount = uniqueCountSorted(sorted);
1486
1487 // if all of the input values are identical, there's one cluster
1488 // with all of the input in it.
1489 if (uniqueCount === 1) {
1490 return [sorted];
1491 }
1492
1493 // named 'S' originally
1494 var matrix = makeMatrix(nClusters, sorted.length);
1495 // named 'J' originally
1496 var backtrackMatrix = makeMatrix(nClusters, sorted.length);
1497
1498 // This is a dynamic programming way to solve the problem of minimizing
1499 // within-cluster sum of squares. It's similar to linear regression
1500 // in this way, and this calculation incrementally computes the
1501 // sum of squares that are later read.
1502 fillMatrices(sorted, matrix, backtrackMatrix);
1503
1504 // The real work of Ckmeans clustering happens in the matrix generation:
1505 // the generated matrices encode all possible clustering combinations, and
1506 // once they're generated we can solve for the best clustering groups
1507 // very quickly.
1508 var clusters = [];
1509 var clusterRight = backtrackMatrix[0].length - 1;
1510
1511 // Backtrack the clusters from the dynamic programming matrix. This
1512 // starts at the bottom-right corner of the matrix (if the top-left is 0, 0),
1513 // and moves the cluster target with the loop.
1514 for (var cluster = backtrackMatrix.length - 1; cluster >= 0; cluster--) {
1515 var clusterLeft = backtrackMatrix[cluster][clusterRight];
1516
1517 // fill the cluster from the sorted input by taking a slice of the
1518 // array. the backtrack matrix makes this easy - it stores the
1519 // indexes where the cluster should start and end.
1520 clusters[cluster] = sorted.slice(clusterLeft, clusterRight + 1);
1521
1522 if (cluster > 0) {
1523 clusterRight = clusterLeft - 1;
1524 }
1525 }
1526
1527 return clusters;
1528}
1529
1530/**
1531 * Given an array of x, this will find the extent of the
1532 * x and return an array of breaks that can be used
1533 * to categorize the x into a number of classes. The
1534 * returned array will always be 1 longer than the number of
1535 * classes because it includes the minimum value.
1536 *
1537 * @param {Array<number>} x an array of number values
1538 * @param {number} nClasses number of desired classes
1539 * @returns {Array<number>} array of class break positions
1540 * @example
1541 * equalIntervalBreaks([1, 2, 3, 4, 5, 6], 4); // => [1, 2.25, 3.5, 4.75, 6]
1542 */
1543function equalIntervalBreaks(x, nClasses) {
1544 if (x.length < 2) {
1545 return x;
1546 }
1547
1548 var theMin = min(x);
1549 var theMax = max(x);
1550
1551 // the first break will always be the minimum value
1552 // in the xset
1553 var breaks = [theMin];
1554
1555 // The size of each break is the full range of the x
1556 // divided by the number of classes requested
1557 var breakSize = (theMax - theMin) / nClasses;
1558
1559 // In the case of nClasses = 1, this loop won't run
1560 // and the returned breaks will be [min, max]
1561 for (var i = 1; i < nClasses; i++) {
1562 breaks.push(breaks[0] + breakSize * i);
1563 }
1564
1565 // the last break will always be the
1566 // maximum.
1567 breaks.push(theMax);
1568
1569 return breaks;
1570}
1571
1572/**
1573 * [Sample covariance](https://en.wikipedia.org/wiki/Sample_mean_and_covariance) of two datasets:
1574 * how much do the two datasets move together?
1575 * x and y are two datasets, represented as arrays of numbers.
1576 *
1577 * @param {Array<number>} x a sample of two or more data points
1578 * @param {Array<number>} y a sample of two or more data points
1579 * @throws {Error} if x and y do not have equal lengths
1580 * @throws {Error} if x or y have length of one or less
1581 * @returns {number} sample covariance
1582 * @example
1583 * sampleCovariance([1, 2, 3, 4, 5, 6], [6, 5, 4, 3, 2, 1]); // => -3.5
1584 */
1585function sampleCovariance(x, y) {
1586 // The two datasets must have the same length which must be more than 1
1587 if (x.length !== y.length) {
1588 throw new Error("sampleCovariance requires samples with equal lengths");
1589 }
1590
1591 if (x.length < 2) {
1592 throw new Error(
1593 "sampleCovariance requires at least two data points in each sample"
1594 );
1595 }
1596
1597 // determine the mean of each dataset so that we can judge each
1598 // value of the dataset fairly as the difference from the mean. this
1599 // way, if one dataset is [1, 2, 3] and [2, 3, 4], their covariance
1600 // does not suffer because of the difference in absolute values
1601 var xmean = mean(x);
1602 var ymean = mean(y);
1603 var sum = 0;
1604
1605 // for each pair of values, the covariance increases when their
1606 // difference from the mean is associated - if both are well above
1607 // or if both are well below
1608 // the mean, the covariance increases significantly.
1609 for (var i = 0; i < x.length; i++) {
1610 sum += (x[i] - xmean) * (y[i] - ymean);
1611 }
1612
1613 // this is Bessels' Correction: an adjustment made to sample statistics
1614 // that allows for the reduced degree of freedom entailed in calculating
1615 // values from samples rather than complete populations.
1616 var besselsCorrection = x.length - 1;
1617
1618 // the covariance is weighted by the length of the datasets.
1619 return sum / besselsCorrection;
1620}
1621
1622/**
1623 * The [sample variance](https://en.wikipedia.org/wiki/Variance#Sample_variance)
1624 * is the sum of squared deviations from the mean. The sample variance
1625 * is distinguished from the variance by the usage of [Bessel's Correction](https://en.wikipedia.org/wiki/Bessel's_correction):
1626 * instead of dividing the sum of squared deviations by the length of the input,
1627 * it is divided by the length minus one. This corrects the bias in estimating
1628 * a value from a set that you don't know if full.
1629 *
1630 * References:
1631 * * [Wolfram MathWorld on Sample Variance](http://mathworld.wolfram.com/SampleVariance.html)
1632 *
1633 * @param {Array<number>} x a sample of two or more data points
1634 * @throws {Error} if the length of x is less than 2
1635 * @return {number} sample variance
1636 * @example
1637 * sampleVariance([1, 2, 3, 4, 5]); // => 2.5
1638 */
1639function sampleVariance(x) {
1640 if (x.length < 2) {
1641 throw new Error("sampleVariance requires at least two data points");
1642 }
1643
1644 var sumSquaredDeviationsValue = sumNthPowerDeviations(x, 2);
1645
1646 // this is Bessels' Correction: an adjustment made to sample statistics
1647 // that allows for the reduced degree of freedom entailed in calculating
1648 // values from samples rather than complete populations.
1649 var besselsCorrection = x.length - 1;
1650
1651 // Find the mean value of that list
1652 return sumSquaredDeviationsValue / besselsCorrection;
1653}
1654
1655/**
1656 * The [sample standard deviation](http://en.wikipedia.org/wiki/Standard_deviation#Sample_standard_deviation)
1657 * is the square root of the sample variance.
1658 *
1659 * @param {Array<number>} x input array
1660 * @returns {number} sample standard deviation
1661 * @example
1662 * sampleStandardDeviation([2, 4, 4, 4, 5, 5, 7, 9]).toFixed(2);
1663 * // => '2.14'
1664 */
1665function sampleStandardDeviation(x) {
1666 var sampleVarianceX = sampleVariance(x);
1667 return Math.sqrt(sampleVarianceX);
1668}
1669
1670/**
1671 * The [correlation](http://en.wikipedia.org/wiki/Correlation_and_dependence) is
1672 * a measure of how correlated two datasets are, between -1 and 1
1673 *
1674 * @param {Array<number>} x first input
1675 * @param {Array<number>} y second input
1676 * @returns {number} sample correlation
1677 * @example
1678 * sampleCorrelation([1, 2, 3, 4, 5, 6], [2, 2, 3, 4, 5, 60]).toFixed(2);
1679 * // => '0.69'
1680 */
1681function sampleCorrelation(x, y) {
1682 var cov = sampleCovariance(x, y);
1683 var xstd = sampleStandardDeviation(x);
1684 var ystd = sampleStandardDeviation(y);
1685
1686 return cov / xstd / ystd;
1687}
1688
1689/**
1690 * [Skewness](http://en.wikipedia.org/wiki/Skewness) is
1691 * a measure of the extent to which a probability distribution of a
1692 * real-valued random variable "leans" to one side of the mean.
1693 * The skewness value can be positive or negative, or even undefined.
1694 *
1695 * Implementation is based on the adjusted Fisher-Pearson standardized
1696 * moment coefficient, which is the version found in Excel and several
1697 * statistical packages including Minitab, SAS and SPSS.
1698 *
1699 * @since 4.1.0
1700 * @param {Array<number>} x a sample of 3 or more data points
1701 * @returns {number} sample skewness
1702 * @throws {Error} if x has length less than 3
1703 * @example
1704 * sampleSkewness([2, 4, 6, 3, 1]); // => 0.590128656384365
1705 */
1706function sampleSkewness(x) {
1707 if (x.length < 3) {
1708 throw new Error("sampleSkewness requires at least three data points");
1709 }
1710
1711 var meanValue = mean(x);
1712 var tempValue;
1713 var sumSquaredDeviations = 0;
1714 var sumCubedDeviations = 0;
1715
1716 for (var i = 0; i < x.length; i++) {
1717 tempValue = x[i] - meanValue;
1718 sumSquaredDeviations += tempValue * tempValue;
1719 sumCubedDeviations += tempValue * tempValue * tempValue;
1720 }
1721
1722 // this is Bessels' Correction: an adjustment made to sample statistics
1723 // that allows for the reduced degree of freedom entailed in calculating
1724 // values from samples rather than complete populations.
1725 var besselsCorrection = x.length - 1;
1726
1727 // Find the mean value of that list
1728 var theSampleStandardDeviation = Math.sqrt(
1729 sumSquaredDeviations / besselsCorrection
1730 );
1731
1732 var n = x.length;
1733 var cubedS = Math.pow(theSampleStandardDeviation, 3);
1734
1735 return (n * sumCubedDeviations) / ((n - 1) * (n - 2) * cubedS);
1736}
1737
1738/**
1739 * [Kurtosis](http://en.wikipedia.org/wiki/Kurtosis) is
1740 * a measure of the heaviness of a distribution's tails relative to its
1741 * variance. The kurtosis value can be positive or negative, or even undefined.
1742 *
1743 * Implementation is based on Fisher's excess kurtosis definition and uses
1744 * unbiased moment estimators. This is the version found in Excel and available
1745 * in several statistical packages, including SAS and SciPy.
1746 *
1747 * @param {Array<number>} x a sample of 4 or more data points
1748 * @returns {number} sample kurtosis
1749 * @throws {Error} if x has length less than 4
1750 * @example
1751 * sampleKurtosis([1, 2, 2, 3, 5]); // => 1.4555765595463122
1752 */
1753function sampleKurtosis(x) {
1754 var n = x.length;
1755
1756 if (n < 4) {
1757 throw new Error("sampleKurtosis requires at least four data points");
1758 }
1759
1760 var meanValue = mean(x);
1761 var tempValue;
1762 var secondCentralMoment = 0;
1763 var fourthCentralMoment = 0;
1764
1765 for (var i = 0; i < n; i++) {
1766 tempValue = x[i] - meanValue;
1767 secondCentralMoment += tempValue * tempValue;
1768 fourthCentralMoment += tempValue * tempValue * tempValue * tempValue;
1769 }
1770
1771 return (
1772 ((n - 1) / ((n - 2) * (n - 3))) *
1773 ((n * (n + 1) * fourthCentralMoment) /
1774 (secondCentralMoment * secondCentralMoment) -
1775 3 * (n - 1))
1776 );
1777}
1778
1779/**
1780 * Implementation of [Heap's Algorithm](https://en.wikipedia.org/wiki/Heap%27s_algorithm)
1781 * for generating permutations.
1782 *
1783 * @param {Array} elements any type of data
1784 * @returns {Array<Array>} array of permutations
1785 */
1786function permutationsHeap(elements) {
1787 var indexes = new Array(elements.length);
1788 var permutations = [elements.slice()];
1789
1790 for (var i = 0; i < elements.length; i++) {
1791 indexes[i] = 0;
1792 }
1793
1794 for (var i$1 = 0; i$1 < elements.length; ) {
1795 if (indexes[i$1] < i$1) {
1796 // At odd indexes, swap from indexes[i] instead
1797 // of from the beginning of the array
1798 var swapFrom = 0;
1799 if (i$1 % 2 !== 0) {
1800 swapFrom = indexes[i$1];
1801 }
1802
1803 // swap between swapFrom and i, using
1804 // a temporary variable as storage.
1805 var temp = elements[swapFrom];
1806 elements[swapFrom] = elements[i$1];
1807 elements[i$1] = temp;
1808
1809 permutations.push(elements.slice());
1810 indexes[i$1]++;
1811 i$1 = 0;
1812 } else {
1813 indexes[i$1] = 0;
1814 i$1++;
1815 }
1816 }
1817
1818 return permutations;
1819}
1820
1821/**
1822 * Implementation of Combinations
1823 * Combinations are unique subsets of a collection - in this case, k x from a collection at a time.
1824 * https://en.wikipedia.org/wiki/Combination
1825 * @param {Array} x any type of data
1826 * @param {int} k the number of objects in each group (without replacement)
1827 * @returns {Array<Array>} array of permutations
1828 * @example
1829 * combinations([1, 2, 3], 2); // => [[1,2], [1,3], [2,3]]
1830 */
1831
1832function combinations(x, k) {
1833 var i;
1834 var subI;
1835 var combinationList = [];
1836 var subsetCombinations;
1837 var next;
1838
1839 for (i = 0; i < x.length; i++) {
1840 if (k === 1) {
1841 combinationList.push([x[i]]);
1842 } else {
1843 subsetCombinations = combinations(x.slice(i + 1, x.length), k - 1);
1844 for (subI = 0; subI < subsetCombinations.length; subI++) {
1845 next = subsetCombinations[subI];
1846 next.unshift(x[i]);
1847 combinationList.push(next);
1848 }
1849 }
1850 }
1851 return combinationList;
1852}
1853
1854/**
1855 * Implementation of [Combinations](https://en.wikipedia.org/wiki/Combination) with replacement
1856 * Combinations are unique subsets of a collection - in this case, k x from a collection at a time.
1857 * 'With replacement' means that a given element can be chosen multiple times.
1858 * Unlike permutation, order doesn't matter for combinations.
1859 *
1860 * @param {Array} x any type of data
1861 * @param {int} k the number of objects in each group (without replacement)
1862 * @returns {Array<Array>} array of permutations
1863 * @example
1864 * combinationsReplacement([1, 2], 2); // => [[1, 1], [1, 2], [2, 2]]
1865 */
1866function combinationsReplacement(x, k) {
1867 var combinationList = [];
1868
1869 for (var i = 0; i < x.length; i++) {
1870 if (k === 1) {
1871 // If we're requested to find only one element, we don't need
1872 // to recurse: just push `x[i]` onto the list of combinations.
1873 combinationList.push([x[i]]);
1874 } else {
1875 // Otherwise, recursively find combinations, given `k - 1`. Note that
1876 // we request `k - 1`, so if you were looking for k=3 combinations, we're
1877 // requesting k=2. This -1 gets reversed in the for loop right after this
1878 // code, since we concatenate `x[i]` onto the selected combinations,
1879 // bringing `k` back up to your requested level.
1880 // This recursion may go many levels deep, since it only stops once
1881 // k=1.
1882 var subsetCombinations = combinationsReplacement(
1883 x.slice(i, x.length),
1884 k - 1
1885 );
1886
1887 for (var j = 0; j < subsetCombinations.length; j++) {
1888 combinationList.push([x[i]].concat(subsetCombinations[j]));
1889 }
1890 }
1891 }
1892
1893 return combinationList;
1894}
1895
1896/**
1897 * When adding a new value to a list, one does not have to necessary
1898 * recompute the mean of the list in linear time. They can instead use
1899 * this function to compute the new mean by providing the current mean,
1900 * the number of elements in the list that produced it and the new
1901 * value to add.
1902 *
1903 * @since 2.5.0
1904 * @param {number} mean current mean
1905 * @param {number} n number of items in the list
1906 * @param {number} newValue the added value
1907 * @returns {number} the new mean
1908 *
1909 * @example
1910 * addToMean(14, 5, 53); // => 20.5
1911 */
1912function addToMean(mean, n, newValue) {
1913 return mean + (newValue - mean) / (n + 1);
1914}
1915
1916/**
1917 * When combining two lists of values for which one already knows the means,
1918 * one does not have to necessary recompute the mean of the combined lists in
1919 * linear time. They can instead use this function to compute the combined
1920 * mean by providing the mean & number of values of the first list and the mean
1921 * & number of values of the second list.
1922 *
1923 * @since 3.0.0
1924 * @param {number} mean1 mean of the first list
1925 * @param {number} n1 number of items in the first list
1926 * @param {number} mean2 mean of the second list
1927 * @param {number} n2 number of items in the second list
1928 * @returns {number} the combined mean
1929 *
1930 * @example
1931 * combineMeans(5, 3, 4, 3); // => 4.5
1932 */
1933function combineMeans(mean1, n1, mean2, n2) {
1934 return (mean1 * n1 + mean2 * n2) / (n1 + n2);
1935}
1936
1937/**
1938 * When combining two lists of values for which one already knows the variances,
1939 * one does not have to necessary recompute the variance of the combined lists
1940 * in linear time. They can instead use this function to compute the combined
1941 * variance by providing the variance, mean & number of values of the first list
1942 * and the variance, mean & number of values of the second list.
1943 *
1944 * @since 3.0.0
1945 * @param {number} variance1 variance of the first list
1946 * @param {number} mean1 mean of the first list
1947 * @param {number} n1 number of items in the first list
1948 * @param {number} variance2 variance of the second list
1949 * @param {number} mean2 mean of the second list
1950 * @param {number} n2 number of items in the second list
1951 * @returns {number} the combined mean
1952 *
1953 * @example
1954 * combineVariances(14 / 3, 5, 3, 8 / 3, 4, 3); // => 47 / 12
1955 */
1956function combineVariances(variance1, mean1, n1, variance2, mean2, n2) {
1957 var newMean = combineMeans(mean1, n1, mean2, n2);
1958
1959 return (
1960 (n1 * (variance1 + Math.pow(mean1 - newMean, 2)) +
1961 n2 * (variance2 + Math.pow(mean2 - newMean, 2))) /
1962 (n1 + n2)
1963 );
1964}
1965
1966/**
1967 * The [Geometric Mean](https://en.wikipedia.org/wiki/Geometric_mean) is
1968 * a mean function that is more useful for numbers in different
1969 * ranges.
1970 *
1971 * This is the nth root of the input numbers multiplied by each other.
1972 *
1973 * The geometric mean is often useful for
1974 * **[proportional growth](https://en.wikipedia.org/wiki/Geometric_mean#Proportional_growth)**: given
1975 * growth rates for multiple years, like _80%, 16.66% and 42.85%_, a simple
1976 * mean will incorrectly estimate an average growth rate, whereas a geometric
1977 * mean will correctly estimate a growth rate that, over those years,
1978 * will yield the same end value.
1979 *
1980 * This runs in `O(n)`, linear time, with respect to the length of the array.
1981 *
1982 * @param {Array<number>} x sample of one or more data points
1983 * @returns {number} geometric mean
1984 * @throws {Error} if x is empty
1985 * @throws {Error} if x contains a negative number
1986 * @example
1987 * var growthRates = [1.80, 1.166666, 1.428571];
1988 * var averageGrowth = ss.geometricMean(growthRates);
1989 * var averageGrowthRates = [averageGrowth, averageGrowth, averageGrowth];
1990 * var startingValue = 10;
1991 * var startingValueMean = 10;
1992 * growthRates.forEach(function(rate) {
1993 * startingValue *= rate;
1994 * });
1995 * averageGrowthRates.forEach(function(rate) {
1996 * startingValueMean *= rate;
1997 * });
1998 * startingValueMean === startingValue;
1999 */
2000function geometricMean(x) {
2001 if (x.length === 0) {
2002 throw new Error("geometricMean requires at least one data point");
2003 }
2004
2005 // the starting value.
2006 var value = 1;
2007
2008 for (var i = 0; i < x.length; i++) {
2009 // the geometric mean is only valid for positive numbers
2010 if (x[i] <= 0) {
2011 throw new Error(
2012 "geometricMean requires only positive numbers as input"
2013 );
2014 }
2015
2016 // repeatedly multiply the value by each number
2017 value *= x[i];
2018 }
2019
2020 return Math.pow(value, 1 / x.length);
2021}
2022
2023/**
2024 * The [Harmonic Mean](https://en.wikipedia.org/wiki/Harmonic_mean) is
2025 * a mean function typically used to find the average of rates.
2026 * This mean is calculated by taking the reciprocal of the arithmetic mean
2027 * of the reciprocals of the input numbers.
2028 *
2029 * This is a [measure of central tendency](https://en.wikipedia.org/wiki/Central_tendency):
2030 * a method of finding a typical or central value of a set of numbers.
2031 *
2032 * This runs in `O(n)`, linear time, with respect to the length of the array.
2033 *
2034 * @param {Array<number>} x sample of one or more data points
2035 * @returns {number} harmonic mean
2036 * @throws {Error} if x is empty
2037 * @throws {Error} if x contains a negative number
2038 * @example
2039 * harmonicMean([2, 3]).toFixed(2) // => '2.40'
2040 */
2041function harmonicMean(x) {
2042 if (x.length === 0) {
2043 throw new Error("harmonicMean requires at least one data point");
2044 }
2045
2046 var reciprocalSum = 0;
2047
2048 for (var i = 0; i < x.length; i++) {
2049 // the harmonic mean is only valid for positive numbers
2050 if (x[i] <= 0) {
2051 throw new Error(
2052 "harmonicMean requires only positive numbers as input"
2053 );
2054 }
2055
2056 reciprocalSum += 1 / x[i];
2057 }
2058
2059 // divide n by the the reciprocal sum
2060 return x.length / reciprocalSum;
2061}
2062
2063/**
2064 * The mean, _also known as average_,
2065 * is the sum of all values over the number of values.
2066 * This is a [measure of central tendency](https://en.wikipedia.org/wiki/Central_tendency):
2067 * a method of finding a typical or central value of a set of numbers.
2068 *
2069 * The simple mean uses the successive addition method internally
2070 * to calculate it's result. Errors in floating-point addition are
2071 * not accounted for, so if precision is required, the standard {@link mean}
2072 * method should be used instead.
2073 *
2074 * This runs in `O(n)`, linear time, with respect to the length of the array.
2075 *
2076 *
2077 * @param {Array<number>} x sample of one or more data points
2078 * @throws {Error} if the the length of x is less than one
2079 * @returns {number} mean
2080 * @example
2081 * mean([0, 10]); // => 5
2082 */
2083function meanSimple(x) {
2084 if (x.length === 0) {
2085 throw new Error("meanSimple requires at least one data point");
2086 }
2087
2088 return sumSimple(x) / x.length;
2089}
2090
2091/**
2092 * The [median](http://en.wikipedia.org/wiki/Median) is
2093 * the middle number of a list. This is often a good indicator of 'the middle'
2094 * when there are outliers that skew the `mean()` value.
2095 * This is a [measure of central tendency](https://en.wikipedia.org/wiki/Central_tendency):
2096 * a method of finding a typical or central value of a set of numbers.
2097 *
2098 * The median isn't necessarily one of the elements in the list: the value
2099 * can be the average of two elements if the list has an even length
2100 * and the two central values are different.
2101 *
2102 * @param {Array<number>} sorted input
2103 * @returns {number} median value
2104 * @example
2105 * medianSorted([10, 2, 5, 100, 2, 1]); // => 52.5
2106 */
2107function medianSorted(sorted) {
2108 return quantileSorted(sorted, 0.5);
2109}
2110
2111/**
2112 * When removing a value from a list, one does not have to necessary
2113 * recompute the mean of the list in linear time. They can instead use
2114 * this function to compute the new mean by providing the current mean,
2115 * the number of elements in the list that produced it and the value to remove.
2116 *
2117 * @since 3.0.0
2118 * @param {number} mean current mean
2119 * @param {number} n number of items in the list
2120 * @param {number} value the value to remove
2121 * @returns {number} the new mean
2122 *
2123 * @example
2124 * subtractFromMean(20.5, 6, 53); // => 14
2125 */
2126function subtractFromMean(mean, n, value) {
2127 return (mean * n - value) / (n - 1);
2128}
2129
2130/**
2131 * The Root Mean Square (RMS) is
2132 * a mean function used as a measure of the magnitude of a set
2133 * of numbers, regardless of their sign.
2134 * This is the square root of the mean of the squares of the
2135 * input numbers.
2136 * This runs in `O(n)`, linear time, with respect to the length of the array.
2137 *
2138 * @param {Array<number>} x a sample of one or more data points
2139 * @returns {number} root mean square
2140 * @throws {Error} if x is empty
2141 * @example
2142 * rootMeanSquare([-1, 1, -1, 1]); // => 1
2143 */
2144function rootMeanSquare(x) {
2145 if (x.length === 0) {
2146 throw new Error("rootMeanSquare requires at least one data point");
2147 }
2148
2149 var sumOfSquares = 0;
2150 for (var i = 0; i < x.length; i++) {
2151 sumOfSquares += Math.pow(x[i], 2);
2152 }
2153
2154 return Math.sqrt(sumOfSquares / x.length);
2155}
2156
2157/**
2158 * This is to compute [a one-sample t-test](https://en.wikipedia.org/wiki/Student%27s_t-test#One-sample_t-test), comparing the mean
2159 * of a sample to a known value, x.
2160 *
2161 * in this case, we're trying to determine whether the
2162 * population mean is equal to the value that we know, which is `x`
2163 * here. usually the results here are used to look up a
2164 * [p-value](http://en.wikipedia.org/wiki/P-value), which, for
2165 * a certain level of significance, will let you determine that the
2166 * null hypothesis can or cannot be rejected.
2167 *
2168 * @param {Array<number>} x sample of one or more numbers
2169 * @param {number} expectedValue expected value of the population mean
2170 * @returns {number} value
2171 * @example
2172 * tTest([1, 2, 3, 4, 5, 6], 3.385).toFixed(2); // => '0.16'
2173 */
2174function tTest(x, expectedValue) {
2175 // The mean of the sample
2176 var sampleMean = mean(x);
2177
2178 // The standard deviation of the sample
2179 var sd = standardDeviation(x);
2180
2181 // Square root the length of the sample
2182 var rootN = Math.sqrt(x.length);
2183
2184 // returning the t value
2185 return (sampleMean - expectedValue) / (sd / rootN);
2186}
2187
2188/**
2189 * This is to compute [two sample t-test](http://en.wikipedia.org/wiki/Student's_t-test).
2190 * Tests whether "mean(X)-mean(Y) = difference", (
2191 * in the most common case, we often have `difference == 0` to test if two samples
2192 * are likely to be taken from populations with the same mean value) with
2193 * no prior knowledge on standard deviations of both samples
2194 * other than the fact that they have the same standard deviation.
2195 *
2196 * Usually the results here are used to look up a
2197 * [p-value](http://en.wikipedia.org/wiki/P-value), which, for
2198 * a certain level of significance, will let you determine that the
2199 * null hypothesis can or cannot be rejected.
2200 *
2201 * `diff` can be omitted if it equals 0.
2202 *
2203 * [This is used to confirm or deny](http://www.monarchlab.org/Lab/Research/Stats/2SampleT.aspx)
2204 * a null hypothesis that the two populations that have been sampled into
2205 * `sampleX` and `sampleY` are equal to each other.
2206 *
2207 * @param {Array<number>} sampleX a sample as an array of numbers
2208 * @param {Array<number>} sampleY a sample as an array of numbers
2209 * @param {number} [difference=0]
2210 * @returns {number|null} test result
2211 *
2212 * @example
2213 * tTestTwoSample([1, 2, 3, 4], [3, 4, 5, 6], 0); // => -2.1908902300206643
2214 */
2215function tTestTwoSample(sampleX, sampleY, difference) {
2216 var n = sampleX.length;
2217 var m = sampleY.length;
2218
2219 // If either sample doesn't actually have any values, we can't
2220 // compute this at all, so we return `null`.
2221 if (!n || !m) {
2222 return null;
2223 }
2224
2225 // default difference (mu) is zero
2226 if (!difference) {
2227 difference = 0;
2228 }
2229
2230 var meanX = mean(sampleX);
2231 var meanY = mean(sampleY);
2232 var sampleVarianceX = sampleVariance(sampleX);
2233 var sampleVarianceY = sampleVariance(sampleY);
2234
2235 if (
2236 typeof meanX === "number" &&
2237 typeof meanY === "number" &&
2238 typeof sampleVarianceX === "number" &&
2239 typeof sampleVarianceY === "number"
2240 ) {
2241 var weightedVariance =
2242 ((n - 1) * sampleVarianceX + (m - 1) * sampleVarianceY) /
2243 (n + m - 2);
2244
2245 return (
2246 (meanX - meanY - difference) /
2247 Math.sqrt(weightedVariance * (1 / n + 1 / m))
2248 );
2249 }
2250}
2251
2252/**
2253 * [Bayesian Classifier](http://en.wikipedia.org/wiki/Naive_Bayes_classifier)
2254 *
2255 * This is a naïve bayesian classifier that takes
2256 * singly-nested objects.
2257 *
2258 * @class
2259 * @example
2260 * var bayes = new BayesianClassifier();
2261 * bayes.train({
2262 * species: 'Cat'
2263 * }, 'animal');
2264 * var result = bayes.score({
2265 * species: 'Cat'
2266 * })
2267 * // result
2268 * // {
2269 * // animal: 1
2270 * // }
2271 */
2272var BayesianClassifier = function BayesianClassifier() {
2273 // The number of items that are currently
2274 // classified in the model
2275 this.totalCount = 0;
2276 // Every item classified in the model
2277 this.data = {};
2278};
2279
2280/**
2281 * Train the classifier with a new item, which has a single
2282 * dimension of Javascript literal keys and values.
2283 *
2284 * @param {Object} item an object with singly-deep properties
2285 * @param {string} category the category this item belongs to
2286 * @return {undefined} adds the item to the classifier
2287 */
2288BayesianClassifier.prototype.train = function train (item, category) {
2289 // If the data object doesn't have any values
2290 // for this category, create a new object for it.
2291 if (!this.data[category]) {
2292 this.data[category] = {};
2293 }
2294
2295 // Iterate through each key in the item.
2296 for (var k in item) {
2297 var v = item[k];
2298 // Initialize the nested object `data[category][k][item[k]]`
2299 // with an object of keys that equal 0.
2300 if (this.data[category][k] === undefined) {
2301 this.data[category][k] = {};
2302 }
2303 if (this.data[category][k][v] === undefined) {
2304 this.data[category][k][v] = 0;
2305 }
2306
2307 // And increment the key for this key/value combination.
2308 this.data[category][k][v]++;
2309 }
2310
2311 // Increment the number of items classified
2312 this.totalCount++;
2313};
2314
2315/**
2316 * Generate a score of how well this item matches all
2317 * possible categories based on its attributes
2318 *
2319 * @param {Object} item an item in the same format as with train
2320 * @returns {Object} of probabilities that this item belongs to a
2321 * given category.
2322 */
2323BayesianClassifier.prototype.score = function score (item) {
2324 // Initialize an empty array of odds per category.
2325 var odds = {};
2326 var category;
2327 // Iterate through each key in the item,
2328 // then iterate through each category that has been used
2329 // in previous calls to `.train()`
2330 for (var k in item) {
2331 var v = item[k];
2332 for (category in this.data) {
2333 // Create an empty object for storing key - value combinations
2334 // for this category.
2335 odds[category] = {};
2336
2337 // If this item doesn't even have a property, it counts for nothing,
2338 // but if it does have the property that we're looking for from
2339 // the item to categorize, it counts based on how popular it is
2340 // versus the whole population.
2341 if (this.data[category][k]) {
2342 odds[category][k + "_" + v] =
2343 (this.data[category][k][v] || 0) / this.totalCount;
2344 } else {
2345 odds[category][k + "_" + v] = 0;
2346 }
2347 }
2348 }
2349
2350 // Set up a new object that will contain sums of these odds by category
2351 var oddsSums = {};
2352
2353 for (category in odds) {
2354 // Tally all of the odds for each category-combination pair -
2355 // the non-existence of a category does not add anything to the
2356 // score.
2357 oddsSums[category] = 0;
2358 for (var combination in odds[category]) {
2359 oddsSums[category] += odds[category][combination];
2360 }
2361 }
2362
2363 return oddsSums;
2364};
2365
2366/**
2367 * This is a single-layer [Perceptron Classifier](http://en.wikipedia.org/wiki/Perceptron) that takes
2368 * arrays of numbers and predicts whether they should be classified
2369 * as either 0 or 1 (negative or positive examples).
2370 * @class
2371 * @example
2372 * // Create the model
2373 * var p = new PerceptronModel();
2374 * // Train the model with input with a diagonal boundary.
2375 * for (var i = 0; i < 5; i++) {
2376 * p.train([1, 1], 1);
2377 * p.train([0, 1], 0);
2378 * p.train([1, 0], 0);
2379 * p.train([0, 0], 0);
2380 * }
2381 * p.predict([0, 0]); // 0
2382 * p.predict([0, 1]); // 0
2383 * p.predict([1, 0]); // 0
2384 * p.predict([1, 1]); // 1
2385 */
2386var PerceptronModel = function PerceptronModel() {
2387 // The weights, or coefficients of the model;
2388 // weights are only populated when training with data.
2389 this.weights = [];
2390 // The bias term, or intercept; it is also a weight but
2391 // it's stored separately for convenience as it is always
2392 // multiplied by one.
2393 this.bias = 0;
2394};
2395/**
2396 * **Predict**: Use an array of features with the weight array and bias
2397 * to predict whether an example is labeled 0 or 1.
2398 *
2399 * @param {Array<number>} features an array of features as numbers
2400 * @returns {number} 1 if the score is over 0, otherwise 0
2401 */
2402PerceptronModel.prototype.predict = function predict (features) {
2403 // Only predict if previously trained
2404 // on the same size feature array(s).
2405 if (features.length !== this.weights.length) {
2406 return null;
2407 }
2408
2409 // Calculate the sum of features times weights,
2410 // with the bias added (implicitly times one).
2411 var score = 0;
2412 for (var i = 0; i < this.weights.length; i++) {
2413 score += this.weights[i] * features[i];
2414 }
2415 score += this.bias;
2416
2417 // Classify as 1 if the score is over 0, otherwise 0.
2418 if (score > 0) {
2419 return 1;
2420 } else {
2421 return 0;
2422 }
2423};
2424
2425/**
2426 * **Train** the classifier with a new example, which is
2427 * a numeric array of features and a 0 or 1 label.
2428 *
2429 * @param {Array<number>} features an array of features as numbers
2430 * @param {number} label either 0 or 1
2431 * @returns {PerceptronModel} this
2432 */
2433PerceptronModel.prototype.train = function train (features, label) {
2434 // Require that only labels of 0 or 1 are considered.
2435 if (label !== 0 && label !== 1) {
2436 return null;
2437 }
2438 // The length of the feature array determines
2439 // the length of the weight array.
2440 // The perceptron will continue learning as long as
2441 // it keeps seeing feature arrays of the same length.
2442 // When it sees a new data shape, it initializes.
2443 if (features.length !== this.weights.length) {
2444 this.weights = features;
2445 this.bias = 1;
2446 }
2447 // Make a prediction based on current weights.
2448 var prediction = this.predict(features);
2449 // Update the weights if the prediction is wrong.
2450 if (typeof prediction === "number" && prediction !== label) {
2451 var gradient = label - prediction;
2452 for (var i = 0; i < this.weights.length; i++) {
2453 this.weights[i] += gradient * features[i];
2454 }
2455 this.bias += gradient;
2456 }
2457 return this;
2458};
2459
2460/**
2461 * We use `ε`, epsilon, as a stopping criterion when we want to iterate
2462 * until we're "close enough". Epsilon is a very small number: for
2463 * simple statistics, that number is **0.0001**
2464 *
2465 * This is used in calculations like the binomialDistribution, in which
2466 * the process of finding a value is [iterative](https://en.wikipedia.org/wiki/Iterative_method):
2467 * it progresses until it is close enough.
2468 *
2469 * Below is an example of using epsilon in [gradient descent](https://en.wikipedia.org/wiki/Gradient_descent),
2470 * where we're trying to find a local minimum of a function's derivative,
2471 * given by the `fDerivative` method.
2472 *
2473 * @example
2474 * // From calculation, we expect that the local minimum occurs at x=9/4
2475 * var x_old = 0;
2476 * // The algorithm starts at x=6
2477 * var x_new = 6;
2478 * var stepSize = 0.01;
2479 *
2480 * function fDerivative(x) {
2481 * return 4 * Math.pow(x, 3) - 9 * Math.pow(x, 2);
2482 * }
2483 *
2484 * // The loop runs until the difference between the previous
2485 * // value and the current value is smaller than epsilon - a rough
2486 * // meaure of 'close enough'
2487 * while (Math.abs(x_new - x_old) > ss.epsilon) {
2488 * x_old = x_new;
2489 * x_new = x_old - stepSize * fDerivative(x_old);
2490 * }
2491 *
2492 * console.log('Local minimum occurs at', x_new);
2493 */
2494var epsilon = 0.0001;
2495
2496/**
2497 * A [Factorial](https://en.wikipedia.org/wiki/Factorial), usually written n!, is the product of all positive
2498 * integers less than or equal to n. Often factorial is implemented
2499 * recursively, but this iterative approach is significantly faster
2500 * and simpler.
2501 *
2502 * @param {number} n input, must be an integer number 1 or greater
2503 * @returns {number} factorial: n!
2504 * @throws {Error} if n is less than 0 or not an integer
2505 * @example
2506 * factorial(5); // => 120
2507 */
2508function factorial(n) {
2509 // factorial is mathematically undefined for negative numbers
2510 if (n < 0) {
2511 throw new Error("factorial requires a non-negative value");
2512 }
2513
2514 if (Math.floor(n) !== n) {
2515 throw new Error("factorial requires an integer input");
2516 }
2517
2518 // typically you'll expand the factorial function going down, like
2519 // 5! = 5 * 4 * 3 * 2 * 1. This is going in the opposite direction,
2520 // counting from 2 up to the number in question, and since anything
2521 // multiplied by 1 is itself, the loop only needs to start at 2.
2522 var accumulator = 1;
2523 for (var i = 2; i <= n; i++) {
2524 // for each number up to and including the number `n`, multiply
2525 // the accumulator my that number.
2526 accumulator *= i;
2527 }
2528 return accumulator;
2529}
2530
2531/**
2532 * Compute the [gamma function](https://en.wikipedia.org/wiki/Gamma_function) of a value using Nemes' approximation.
2533 * The gamma of n is equivalent to (n-1)!, but unlike the factorial function, gamma is defined for all real n except zero
2534 * and negative integers (where NaN is returned). Note, the gamma function is also well-defined for complex numbers,
2535 * though this implementation currently does not handle complex numbers as input values.
2536 * Nemes' approximation is defined [here](https://arxiv.org/abs/1003.6020) as Theorem 2.2.
2537 * Negative values use [Euler's reflection formula](https://en.wikipedia.org/wiki/Gamma_function#Properties) for computation.
2538 *
2539 * @param {number} n Any real number except for zero and negative integers.
2540 * @returns {number} The gamma of the input value.
2541 *
2542 * @example
2543 * gamma(11.5); // 11899423.084037038
2544 * gamma(-11.5); // 2.29575810481609e-8
2545 * gamma(5); // 24
2546 */
2547function gamma(n) {
2548 if (Number.isInteger(n)) {
2549 if (n <= 0) {
2550 // gamma not defined for zero or negative integers
2551 return NaN;
2552 } else {
2553 // use factorial for integer inputs
2554 return factorial(n - 1);
2555 }
2556 }
2557
2558 // Decrement n, because approximation is defined for n - 1
2559 n--;
2560
2561 if (n < 0) {
2562 // Use Euler's reflection formula for negative inputs
2563 // see: https://en.wikipedia.org/wiki/Gamma_function#Properties
2564 return Math.PI / (Math.sin(Math.PI * -n) * gamma(-n));
2565 } else {
2566 // Nemes' expansion approximation
2567 var seriesCoefficient =
2568 Math.pow(n / Math.E, n) * Math.sqrt(2 * Math.PI * (n + 1 / 6));
2569
2570 var seriesDenom = n + 1 / 4;
2571
2572 var seriesExpansion =
2573 1 +
2574 1 / 144 / Math.pow(seriesDenom, 2) -
2575 1 / 12960 / Math.pow(seriesDenom, 3) -
2576 257 / 207360 / Math.pow(seriesDenom, 4) -
2577 52 / 2612736 / Math.pow(seriesDenom, 5) +
2578 5741173 / 9405849600 / Math.pow(seriesDenom, 6) +
2579 37529 / 18811699200 / Math.pow(seriesDenom, 7);
2580
2581 return seriesCoefficient * seriesExpansion;
2582 }
2583}
2584
2585// Define series coefficients
2586var COEFFICIENTS = [
2587 0.99999999999999709182,
2588 57.156235665862923517,
2589 -59.597960355475491248,
2590 14.136097974741747174,
2591 -0.49191381609762019978,
2592 0.33994649984811888699e-4,
2593 0.46523628927048575665e-4,
2594 -0.98374475304879564677e-4,
2595 0.15808870322491248884e-3,
2596 -0.21026444172410488319e-3,
2597 0.2174396181152126432e-3,
2598 -0.16431810653676389022e-3,
2599 0.84418223983852743293e-4,
2600 -0.2619083840158140867e-4,
2601 0.36899182659531622704e-5
2602];
2603
2604var g = 607 / 128;
2605var LOGSQRT2PI = Math.log(Math.sqrt(2 * Math.PI));
2606
2607/**
2608 * Compute the logarithm of the [gamma function](https://en.wikipedia.org/wiki/Gamma_function) of a value using Lanczos' approximation.
2609 * This function takes as input any real-value n greater than 0.
2610 * This function is useful for values of n too large for the normal gamma function (n > 165).
2611 * The code is based on Lanczo's Gamma approximation, defined [here](http://my.fit.edu/~gabdo/gamma.txt).
2612 *
2613 * @param {number} n Any real number greater than zero.
2614 * @returns {number} The logarithm of gamma of the input value.
2615 *
2616 * @example
2617 * gammaln(500); // 2605.1158503617335
2618 * gammaln(2.4); // 0.21685932244884043
2619 */
2620function gammaln(n) {
2621 // Return infinity if value not in domain
2622 if (n <= 0) {
2623 return Infinity;
2624 }
2625
2626 // Decrement n, because approximation is defined for n - 1
2627 n--;
2628
2629 // Create series approximation
2630 var a = COEFFICIENTS[0];
2631
2632 for (var i = 1; i < 15; i++) {
2633 a += COEFFICIENTS[i] / (n + i);
2634 }
2635
2636 var tmp = g + 0.5 + n;
2637
2638 // Return natural logarithm of gamma(n)
2639 return LOGSQRT2PI + Math.log(a) - tmp + (n + 0.5) * Math.log(tmp);
2640}
2641
2642/**
2643 * The [Bernoulli distribution](http://en.wikipedia.org/wiki/Bernoulli_distribution)
2644 * is the probability discrete
2645 * distribution of a random variable which takes value 1 with success
2646 * probability `p` and value 0 with failure
2647 * probability `q` = 1 - `p`. It can be used, for example, to represent the
2648 * toss of a coin, where "1" is defined to mean "heads" and "0" is defined
2649 * to mean "tails" (or vice versa). It is
2650 * a special case of a Binomial Distribution
2651 * where `n` = 1.
2652 *
2653 * @param {number} p input value, between 0 and 1 inclusive
2654 * @returns {number[]} values of bernoulli distribution at this point
2655 * @throws {Error} if p is outside 0 and 1
2656 * @example
2657 * bernoulliDistribution(0.3); // => [0.7, 0.3]
2658 */
2659function bernoulliDistribution(p) /*: number[] */ {
2660 // Check that `p` is a valid probability (0 ≤ p ≤ 1)
2661 if (p < 0 || p > 1) {
2662 throw new Error(
2663 "bernoulliDistribution requires probability to be between 0 and 1 inclusive"
2664 );
2665 }
2666
2667 return [1 - p, p];
2668}
2669
2670/**
2671 * The [Binomial Distribution](http://en.wikipedia.org/wiki/Binomial_distribution) is the discrete probability
2672 * distribution of the number of successes in a sequence of n independent yes/no experiments, each of which yields
2673 * success with probability `probability`. Such a success/failure experiment is also called a Bernoulli experiment or
2674 * Bernoulli trial; when trials = 1, the Binomial Distribution is a Bernoulli Distribution.
2675 *
2676 * @param {number} trials number of trials to simulate
2677 * @param {number} probability
2678 * @returns {number[]} output
2679 */
2680function binomialDistribution(trials, probability) /*: ?number[] */ {
2681 // Check that `p` is a valid probability (0 ≤ p ≤ 1),
2682 // that `n` is an integer, strictly positive.
2683 if (probability < 0 || probability > 1 || trials <= 0 || trials % 1 !== 0) {
2684 return undefined;
2685 }
2686
2687 // We initialize `x`, the random variable, and `accumulator`, an accumulator
2688 // for the cumulative distribution function to 0. `distribution_functions`
2689 // is the object we'll return with the `probability_of_x` and the
2690 // `cumulativeProbability_of_x`, as well as the calculated mean &
2691 // variance. We iterate until the `cumulativeProbability_of_x` is
2692 // within `epsilon` of 1.0.
2693 var x = 0;
2694 var cumulativeProbability = 0;
2695 var cells = [];
2696 var binomialCoefficient = 1;
2697
2698 // This algorithm iterates through each potential outcome,
2699 // until the `cumulativeProbability` is very close to 1, at
2700 // which point we've defined the vast majority of outcomes
2701 do {
2702 // a [probability mass function](https://en.wikipedia.org/wiki/Probability_mass_function)
2703 cells[x] =
2704 binomialCoefficient *
2705 Math.pow(probability, x) *
2706 Math.pow(1 - probability, trials - x);
2707 cumulativeProbability += cells[x];
2708 x++;
2709 binomialCoefficient = (binomialCoefficient * (trials - x + 1)) / x;
2710 // when the cumulativeProbability is nearly 1, we've calculated
2711 // the useful range of this distribution
2712 } while (cumulativeProbability < 1 - epsilon);
2713
2714 return cells;
2715}
2716
2717/**
2718 * The [Poisson Distribution](http://en.wikipedia.org/wiki/Poisson_distribution)
2719 * is a discrete probability distribution that expresses the probability
2720 * of a given number of events occurring in a fixed interval of time
2721 * and/or space if these events occur with a known average rate and
2722 * independently of the time since the last event.
2723 *
2724 * The Poisson Distribution is characterized by the strictly positive
2725 * mean arrival or occurrence rate, `λ`.
2726 *
2727 * @param {number} lambda location poisson distribution
2728 * @returns {number[]} values of poisson distribution at that point
2729 */
2730function poissonDistribution(lambda) /*: ?number[] */ {
2731 // Check that lambda is strictly positive
2732 if (lambda <= 0) {
2733 return undefined;
2734 }
2735
2736 // our current place in the distribution
2737 var x = 0;
2738 // and we keep track of the current cumulative probability, in
2739 // order to know when to stop calculating chances.
2740 var cumulativeProbability = 0;
2741 // the calculated cells to be returned
2742 var cells = [];
2743 var factorialX = 1;
2744
2745 // This algorithm iterates through each potential outcome,
2746 // until the `cumulativeProbability` is very close to 1, at
2747 // which point we've defined the vast majority of outcomes
2748 do {
2749 // a [probability mass function](https://en.wikipedia.org/wiki/Probability_mass_function)
2750 cells[x] = (Math.exp(-lambda) * Math.pow(lambda, x)) / factorialX;
2751 cumulativeProbability += cells[x];
2752 x++;
2753 factorialX *= x;
2754 // when the cumulativeProbability is nearly 1, we've calculated
2755 // the useful range of this distribution
2756 } while (cumulativeProbability < 1 - epsilon);
2757
2758 return cells;
2759}
2760
2761/**
2762 * **Percentage Points of the χ2 (Chi-Squared) Distribution**
2763 *
2764 * The [χ2 (Chi-Squared) Distribution](http://en.wikipedia.org/wiki/Chi-squared_distribution) is used in the common
2765 * chi-squared tests for goodness of fit of an observed distribution to a theoretical one, the independence of two
2766 * criteria of classification of qualitative data, and in confidence interval estimation for a population standard
2767 * deviation of a normal distribution from a sample standard deviation.
2768 *
2769 * Values from Appendix 1, Table III of William W. Hines & Douglas C. Montgomery, "Probability and Statistics in
2770 * Engineering and Management Science", Wiley (1980).
2771 */
2772var chiSquaredDistributionTable = {
2773 1: {
2774 0.995: 0,
2775 0.99: 0,
2776 0.975: 0,
2777 0.95: 0,
2778 0.9: 0.02,
2779 0.5: 0.45,
2780 0.1: 2.71,
2781 0.05: 3.84,
2782 0.025: 5.02,
2783 0.01: 6.63,
2784 0.005: 7.88
2785 },
2786 2: {
2787 0.995: 0.01,
2788 0.99: 0.02,
2789 0.975: 0.05,
2790 0.95: 0.1,
2791 0.9: 0.21,
2792 0.5: 1.39,
2793 0.1: 4.61,
2794 0.05: 5.99,
2795 0.025: 7.38,
2796 0.01: 9.21,
2797 0.005: 10.6
2798 },
2799 3: {
2800 0.995: 0.07,
2801 0.99: 0.11,
2802 0.975: 0.22,
2803 0.95: 0.35,
2804 0.9: 0.58,
2805 0.5: 2.37,
2806 0.1: 6.25,
2807 0.05: 7.81,
2808 0.025: 9.35,
2809 0.01: 11.34,
2810 0.005: 12.84
2811 },
2812 4: {
2813 0.995: 0.21,
2814 0.99: 0.3,
2815 0.975: 0.48,
2816 0.95: 0.71,
2817 0.9: 1.06,
2818 0.5: 3.36,
2819 0.1: 7.78,
2820 0.05: 9.49,
2821 0.025: 11.14,
2822 0.01: 13.28,
2823 0.005: 14.86
2824 },
2825 5: {
2826 0.995: 0.41,
2827 0.99: 0.55,
2828 0.975: 0.83,
2829 0.95: 1.15,
2830 0.9: 1.61,
2831 0.5: 4.35,
2832 0.1: 9.24,
2833 0.05: 11.07,
2834 0.025: 12.83,
2835 0.01: 15.09,
2836 0.005: 16.75
2837 },
2838 6: {
2839 0.995: 0.68,
2840 0.99: 0.87,
2841 0.975: 1.24,
2842 0.95: 1.64,
2843 0.9: 2.2,
2844 0.5: 5.35,
2845 0.1: 10.65,
2846 0.05: 12.59,
2847 0.025: 14.45,
2848 0.01: 16.81,
2849 0.005: 18.55
2850 },
2851 7: {
2852 0.995: 0.99,
2853 0.99: 1.25,
2854 0.975: 1.69,
2855 0.95: 2.17,
2856 0.9: 2.83,
2857 0.5: 6.35,
2858 0.1: 12.02,
2859 0.05: 14.07,
2860 0.025: 16.01,
2861 0.01: 18.48,
2862 0.005: 20.28
2863 },
2864 8: {
2865 0.995: 1.34,
2866 0.99: 1.65,
2867 0.975: 2.18,
2868 0.95: 2.73,
2869 0.9: 3.49,
2870 0.5: 7.34,
2871 0.1: 13.36,
2872 0.05: 15.51,
2873 0.025: 17.53,
2874 0.01: 20.09,
2875 0.005: 21.96
2876 },
2877 9: {
2878 0.995: 1.73,
2879 0.99: 2.09,
2880 0.975: 2.7,
2881 0.95: 3.33,
2882 0.9: 4.17,
2883 0.5: 8.34,
2884 0.1: 14.68,
2885 0.05: 16.92,
2886 0.025: 19.02,
2887 0.01: 21.67,
2888 0.005: 23.59
2889 },
2890 10: {
2891 0.995: 2.16,
2892 0.99: 2.56,
2893 0.975: 3.25,
2894 0.95: 3.94,
2895 0.9: 4.87,
2896 0.5: 9.34,
2897 0.1: 15.99,
2898 0.05: 18.31,
2899 0.025: 20.48,
2900 0.01: 23.21,
2901 0.005: 25.19
2902 },
2903 11: {
2904 0.995: 2.6,
2905 0.99: 3.05,
2906 0.975: 3.82,
2907 0.95: 4.57,
2908 0.9: 5.58,
2909 0.5: 10.34,
2910 0.1: 17.28,
2911 0.05: 19.68,
2912 0.025: 21.92,
2913 0.01: 24.72,
2914 0.005: 26.76
2915 },
2916 12: {
2917 0.995: 3.07,
2918 0.99: 3.57,
2919 0.975: 4.4,
2920 0.95: 5.23,
2921 0.9: 6.3,
2922 0.5: 11.34,
2923 0.1: 18.55,
2924 0.05: 21.03,
2925 0.025: 23.34,
2926 0.01: 26.22,
2927 0.005: 28.3
2928 },
2929 13: {
2930 0.995: 3.57,
2931 0.99: 4.11,
2932 0.975: 5.01,
2933 0.95: 5.89,
2934 0.9: 7.04,
2935 0.5: 12.34,
2936 0.1: 19.81,
2937 0.05: 22.36,
2938 0.025: 24.74,
2939 0.01: 27.69,
2940 0.005: 29.82
2941 },
2942 14: {
2943 0.995: 4.07,
2944 0.99: 4.66,
2945 0.975: 5.63,
2946 0.95: 6.57,
2947 0.9: 7.79,
2948 0.5: 13.34,
2949 0.1: 21.06,
2950 0.05: 23.68,
2951 0.025: 26.12,
2952 0.01: 29.14,
2953 0.005: 31.32
2954 },
2955 15: {
2956 0.995: 4.6,
2957 0.99: 5.23,
2958 0.975: 6.27,
2959 0.95: 7.26,
2960 0.9: 8.55,
2961 0.5: 14.34,
2962 0.1: 22.31,
2963 0.05: 25,
2964 0.025: 27.49,
2965 0.01: 30.58,
2966 0.005: 32.8
2967 },
2968 16: {
2969 0.995: 5.14,
2970 0.99: 5.81,
2971 0.975: 6.91,
2972 0.95: 7.96,
2973 0.9: 9.31,
2974 0.5: 15.34,
2975 0.1: 23.54,
2976 0.05: 26.3,
2977 0.025: 28.85,
2978 0.01: 32,
2979 0.005: 34.27
2980 },
2981 17: {
2982 0.995: 5.7,
2983 0.99: 6.41,
2984 0.975: 7.56,
2985 0.95: 8.67,
2986 0.9: 10.09,
2987 0.5: 16.34,
2988 0.1: 24.77,
2989 0.05: 27.59,
2990 0.025: 30.19,
2991 0.01: 33.41,
2992 0.005: 35.72
2993 },
2994 18: {
2995 0.995: 6.26,
2996 0.99: 7.01,
2997 0.975: 8.23,
2998 0.95: 9.39,
2999 0.9: 10.87,
3000 0.5: 17.34,
3001 0.1: 25.99,
3002 0.05: 28.87,
3003 0.025: 31.53,
3004 0.01: 34.81,
3005 0.005: 37.16
3006 },
3007 19: {
3008 0.995: 6.84,
3009 0.99: 7.63,
3010 0.975: 8.91,
3011 0.95: 10.12,
3012 0.9: 11.65,
3013 0.5: 18.34,
3014 0.1: 27.2,
3015 0.05: 30.14,
3016 0.025: 32.85,
3017 0.01: 36.19,
3018 0.005: 38.58
3019 },
3020 20: {
3021 0.995: 7.43,
3022 0.99: 8.26,
3023 0.975: 9.59,
3024 0.95: 10.85,
3025 0.9: 12.44,
3026 0.5: 19.34,
3027 0.1: 28.41,
3028 0.05: 31.41,
3029 0.025: 34.17,
3030 0.01: 37.57,
3031 0.005: 40
3032 },
3033 21: {
3034 0.995: 8.03,
3035 0.99: 8.9,
3036 0.975: 10.28,
3037 0.95: 11.59,
3038 0.9: 13.24,
3039 0.5: 20.34,
3040 0.1: 29.62,
3041 0.05: 32.67,
3042 0.025: 35.48,
3043 0.01: 38.93,
3044 0.005: 41.4
3045 },
3046 22: {
3047 0.995: 8.64,
3048 0.99: 9.54,
3049 0.975: 10.98,
3050 0.95: 12.34,
3051 0.9: 14.04,
3052 0.5: 21.34,
3053 0.1: 30.81,
3054 0.05: 33.92,
3055 0.025: 36.78,
3056 0.01: 40.29,
3057 0.005: 42.8
3058 },
3059 23: {
3060 0.995: 9.26,
3061 0.99: 10.2,
3062 0.975: 11.69,
3063 0.95: 13.09,
3064 0.9: 14.85,
3065 0.5: 22.34,
3066 0.1: 32.01,
3067 0.05: 35.17,
3068 0.025: 38.08,
3069 0.01: 41.64,
3070 0.005: 44.18
3071 },
3072 24: {
3073 0.995: 9.89,
3074 0.99: 10.86,
3075 0.975: 12.4,
3076 0.95: 13.85,
3077 0.9: 15.66,
3078 0.5: 23.34,
3079 0.1: 33.2,
3080 0.05: 36.42,
3081 0.025: 39.36,
3082 0.01: 42.98,
3083 0.005: 45.56
3084 },
3085 25: {
3086 0.995: 10.52,
3087 0.99: 11.52,
3088 0.975: 13.12,
3089 0.95: 14.61,
3090 0.9: 16.47,
3091 0.5: 24.34,
3092 0.1: 34.28,
3093 0.05: 37.65,
3094 0.025: 40.65,
3095 0.01: 44.31,
3096 0.005: 46.93
3097 },
3098 26: {
3099 0.995: 11.16,
3100 0.99: 12.2,
3101 0.975: 13.84,
3102 0.95: 15.38,
3103 0.9: 17.29,
3104 0.5: 25.34,
3105 0.1: 35.56,
3106 0.05: 38.89,
3107 0.025: 41.92,
3108 0.01: 45.64,
3109 0.005: 48.29
3110 },
3111 27: {
3112 0.995: 11.81,
3113 0.99: 12.88,
3114 0.975: 14.57,
3115 0.95: 16.15,
3116 0.9: 18.11,
3117 0.5: 26.34,
3118 0.1: 36.74,
3119 0.05: 40.11,
3120 0.025: 43.19,
3121 0.01: 46.96,
3122 0.005: 49.65
3123 },
3124 28: {
3125 0.995: 12.46,
3126 0.99: 13.57,
3127 0.975: 15.31,
3128 0.95: 16.93,
3129 0.9: 18.94,
3130 0.5: 27.34,
3131 0.1: 37.92,
3132 0.05: 41.34,
3133 0.025: 44.46,
3134 0.01: 48.28,
3135 0.005: 50.99
3136 },
3137 29: {
3138 0.995: 13.12,
3139 0.99: 14.26,
3140 0.975: 16.05,
3141 0.95: 17.71,
3142 0.9: 19.77,
3143 0.5: 28.34,
3144 0.1: 39.09,
3145 0.05: 42.56,
3146 0.025: 45.72,
3147 0.01: 49.59,
3148 0.005: 52.34
3149 },
3150 30: {
3151 0.995: 13.79,
3152 0.99: 14.95,
3153 0.975: 16.79,
3154 0.95: 18.49,
3155 0.9: 20.6,
3156 0.5: 29.34,
3157 0.1: 40.26,
3158 0.05: 43.77,
3159 0.025: 46.98,
3160 0.01: 50.89,
3161 0.005: 53.67
3162 },
3163 40: {
3164 0.995: 20.71,
3165 0.99: 22.16,
3166 0.975: 24.43,
3167 0.95: 26.51,
3168 0.9: 29.05,
3169 0.5: 39.34,
3170 0.1: 51.81,
3171 0.05: 55.76,
3172 0.025: 59.34,
3173 0.01: 63.69,
3174 0.005: 66.77
3175 },
3176 50: {
3177 0.995: 27.99,
3178 0.99: 29.71,
3179 0.975: 32.36,
3180 0.95: 34.76,
3181 0.9: 37.69,
3182 0.5: 49.33,
3183 0.1: 63.17,
3184 0.05: 67.5,
3185 0.025: 71.42,
3186 0.01: 76.15,
3187 0.005: 79.49
3188 },
3189 60: {
3190 0.995: 35.53,
3191 0.99: 37.48,
3192 0.975: 40.48,
3193 0.95: 43.19,
3194 0.9: 46.46,
3195 0.5: 59.33,
3196 0.1: 74.4,
3197 0.05: 79.08,
3198 0.025: 83.3,
3199 0.01: 88.38,
3200 0.005: 91.95
3201 },
3202 70: {
3203 0.995: 43.28,
3204 0.99: 45.44,
3205 0.975: 48.76,
3206 0.95: 51.74,
3207 0.9: 55.33,
3208 0.5: 69.33,
3209 0.1: 85.53,
3210 0.05: 90.53,
3211 0.025: 95.02,
3212 0.01: 100.42,
3213 0.005: 104.22
3214 },
3215 80: {
3216 0.995: 51.17,
3217 0.99: 53.54,
3218 0.975: 57.15,
3219 0.95: 60.39,
3220 0.9: 64.28,
3221 0.5: 79.33,
3222 0.1: 96.58,
3223 0.05: 101.88,
3224 0.025: 106.63,
3225 0.01: 112.33,
3226 0.005: 116.32
3227 },
3228 90: {
3229 0.995: 59.2,
3230 0.99: 61.75,
3231 0.975: 65.65,
3232 0.95: 69.13,
3233 0.9: 73.29,
3234 0.5: 89.33,
3235 0.1: 107.57,
3236 0.05: 113.14,
3237 0.025: 118.14,
3238 0.01: 124.12,
3239 0.005: 128.3
3240 },
3241 100: {
3242 0.995: 67.33,
3243 0.99: 70.06,
3244 0.975: 74.22,
3245 0.95: 77.93,
3246 0.9: 82.36,
3247 0.5: 99.33,
3248 0.1: 118.5,
3249 0.05: 124.34,
3250 0.025: 129.56,
3251 0.01: 135.81,
3252 0.005: 140.17
3253 }
3254};
3255
3256/**
3257 * The [χ2 (Chi-Squared) Goodness-of-Fit Test](http://en.wikipedia.org/wiki/Goodness_of_fit#Pearson.27s_chi-squared_test)
3258 * uses a measure of goodness of fit which is the sum of differences between observed and expected outcome frequencies
3259 * (that is, counts of observations), each squared and divided by the number of observations expected given the
3260 * hypothesized distribution. The resulting χ2 statistic, `chiSquared`, can be compared to the chi-squared distribution
3261 * to determine the goodness of fit. In order to determine the degrees of freedom of the chi-squared distribution, one
3262 * takes the total number of observed frequencies and subtracts the number of estimated parameters. The test statistic
3263 * follows, approximately, a chi-square distribution with (k − c) degrees of freedom where `k` is the number of non-empty
3264 * cells and `c` is the number of estimated parameters for the distribution.
3265 *
3266 * @param {Array<number>} data
3267 * @param {Function} distributionType a function that returns a point in a distribution:
3268 * for instance, binomial, bernoulli, or poisson
3269 * @param {number} significance
3270 * @returns {number} chi squared goodness of fit
3271 * @example
3272 * // Data from Poisson goodness-of-fit example 10-19 in William W. Hines & Douglas C. Montgomery,
3273 * // "Probability and Statistics in Engineering and Management Science", Wiley (1980).
3274 * var data1019 = [
3275 * 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
3276 * 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
3277 * 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
3278 * 2, 2, 2, 2, 2, 2, 2, 2, 2,
3279 * 3, 3, 3, 3
3280 * ];
3281 * ss.chiSquaredGoodnessOfFit(data1019, ss.poissonDistribution, 0.05); //= false
3282 */
3283function chiSquaredGoodnessOfFit(data, distributionType, significance) {
3284 // Estimate from the sample data, a weighted mean.
3285 var inputMean = mean(data);
3286 // Calculated value of the χ2 statistic.
3287 var chiSquared = 0;
3288 // Number of hypothesized distribution parameters estimated, expected to be supplied in the distribution test.
3289 // Lose one degree of freedom for estimating `lambda` from the sample data.
3290 var c = 1;
3291 // The hypothesized distribution.
3292 // Generate the hypothesized distribution.
3293 var hypothesizedDistribution = distributionType(inputMean);
3294 var observedFrequencies = [];
3295 var expectedFrequencies = [];
3296
3297 // Create an array holding a histogram from the sample data, of
3298 // the form `{ value: numberOfOcurrences }`
3299 for (var i = 0; i < data.length; i++) {
3300 if (observedFrequencies[data[i]] === undefined) {
3301 observedFrequencies[data[i]] = 0;
3302 }
3303 observedFrequencies[data[i]]++;
3304 }
3305
3306 // The histogram we created might be sparse - there might be gaps
3307 // between values. So we iterate through the histogram, making
3308 // sure that instead of undefined, gaps have 0 values.
3309 for (var i$1 = 0; i$1 < observedFrequencies.length; i$1++) {
3310 if (observedFrequencies[i$1] === undefined) {
3311 observedFrequencies[i$1] = 0;
3312 }
3313 }
3314
3315 // Create an array holding a histogram of expected data given the
3316 // sample size and hypothesized distribution.
3317 for (var k in hypothesizedDistribution) {
3318 if (k in observedFrequencies) {
3319 expectedFrequencies[+k] = hypothesizedDistribution[k] * data.length;
3320 }
3321 }
3322
3323 // Working backward through the expected frequencies, collapse classes
3324 // if less than three observations are expected for a class.
3325 // This transformation is applied to the observed frequencies as well.
3326 for (var k$1 = expectedFrequencies.length - 1; k$1 >= 0; k$1--) {
3327 if (expectedFrequencies[k$1] < 3) {
3328 expectedFrequencies[k$1 - 1] += expectedFrequencies[k$1];
3329 expectedFrequencies.pop();
3330
3331 observedFrequencies[k$1 - 1] += observedFrequencies[k$1];
3332 observedFrequencies.pop();
3333 }
3334 }
3335
3336 // Iterate through the squared differences between observed & expected
3337 // frequencies, accumulating the `chiSquared` statistic.
3338 for (var k$2 = 0; k$2 < observedFrequencies.length; k$2++) {
3339 chiSquared +=
3340 Math.pow(observedFrequencies[k$2] - expectedFrequencies[k$2], 2) /
3341 expectedFrequencies[k$2];
3342 }
3343
3344 // Calculate degrees of freedom for this test and look it up in the
3345 // `chiSquaredDistributionTable` in order to
3346 // accept or reject the goodness-of-fit of the hypothesized distribution.
3347 // Degrees of freedom, calculated as (number of class intervals -
3348 // number of hypothesized distribution parameters estimated - 1)
3349 var degreesOfFreedom = observedFrequencies.length - c - 1;
3350 return (
3351 chiSquaredDistributionTable[degreesOfFreedom][significance] < chiSquared
3352 );
3353}
3354
3355var SQRT_2PI = Math.sqrt(2 * Math.PI);
3356
3357/**
3358 * [Well-known kernels](https://en.wikipedia.org/wiki/Kernel_(statistics)#Kernel_functions_in_common_use)
3359 * @private
3360 */
3361var kernels = {
3362 /**
3363 * The gaussian kernel.
3364 * @private
3365 */
3366 gaussian: function (u) {
3367 return Math.exp(-0.5 * u * u) / SQRT_2PI;
3368 }
3369};
3370
3371/**
3372 * Well known bandwidth selection methods
3373 * @private
3374 */
3375var bandwidthMethods = {
3376 /**
3377 * The ["normal reference distribution"
3378 * rule-of-thumb](https://stat.ethz.ch/R-manual/R-devel/library/MASS/html/bandwidth.nrd.html),
3379 * a commonly used version of [Silverman's
3380 * rule-of-thumb](https://en.wikipedia.org/wiki/Kernel_density_estimation#A_rule-of-thumb_bandwidth_estimator).
3381 * @private
3382 */
3383 nrd: function (x) {
3384 var s = sampleStandardDeviation(x);
3385 var iqr = interquartileRange(x);
3386 if (typeof iqr === "number") {
3387 s = Math.min(s, iqr / 1.34);
3388 }
3389 return 1.06 * s * Math.pow(x.length, -0.2);
3390 }
3391};
3392
3393/**
3394 * [Kernel density estimation](https://en.wikipedia.org/wiki/Kernel_density_estimation)
3395 * is a useful tool for, among other things, estimating the shape of the
3396 * underlying probability distribution from a sample.
3397 *
3398 * @name kernelDensityEstimation
3399 * @param X sample values
3400 * @param kernel The kernel function to use. If a function is provided, it should return non-negative values and integrate to 1. Defaults to 'gaussian'.
3401 * @param bandwidthMethod The "bandwidth selection" method to use, or a fixed bandwidth value. Defaults to "nrd", the commonly-used ["normal reference distribution" rule-of-thumb](https://stat.ethz.ch/R-manual/R-devel/library/MASS/html/bandwidth.nrd.html).
3402 * @returns {Function} An estimated [probability density function](https://en.wikipedia.org/wiki/Probability_density_function) for the given sample. The returned function runs in `O(X.length)`.
3403 */
3404function kernelDensityEstimation(X, kernel, bandwidthMethod) {
3405 var kernelFn;
3406 if (kernel === undefined) {
3407 kernelFn = kernels.gaussian;
3408 } else if (typeof kernel === "string") {
3409 if (!kernels[kernel]) {
3410 throw new Error('Unknown kernel "' + kernel + '"');
3411 }
3412 kernelFn = kernels[kernel];
3413 } else {
3414 kernelFn = kernel;
3415 }
3416
3417 var bandwidth;
3418 if (typeof bandwidthMethod === "undefined") {
3419 bandwidth = bandwidthMethods.nrd(X);
3420 } else if (typeof bandwidthMethod === "string") {
3421 if (!bandwidthMethods[bandwidthMethod]) {
3422 throw new Error(
3423 'Unknown bandwidth method "' + bandwidthMethod + '"'
3424 );
3425 }
3426 bandwidth = bandwidthMethods[bandwidthMethod](X);
3427 } else {
3428 bandwidth = bandwidthMethod;
3429 }
3430
3431 return function (x) {
3432 var i = 0;
3433 var sum = 0;
3434 for (i = 0; i < X.length; i++) {
3435 sum += kernelFn((x - X[i]) / bandwidth);
3436 }
3437 return sum / bandwidth / X.length;
3438 };
3439}
3440
3441/**
3442 * The [Z-Score, or Standard Score](http://en.wikipedia.org/wiki/Standard_score).
3443 *
3444 * The standard score is the number of standard deviations an observation
3445 * or datum is above or below the mean. Thus, a positive standard score
3446 * represents a datum above the mean, while a negative standard score
3447 * represents a datum below the mean. It is a dimensionless quantity
3448 * obtained by subtracting the population mean from an individual raw
3449 * score and then dividing the difference by the population standard
3450 * deviation.
3451 *
3452 * The z-score is only defined if one knows the population parameters;
3453 * if one only has a sample set, then the analogous computation with
3454 * sample mean and sample standard deviation yields the
3455 * Student's t-statistic.
3456 *
3457 * @param {number} x
3458 * @param {number} mean
3459 * @param {number} standardDeviation
3460 * @return {number} z score
3461 * @example
3462 * zScore(78, 80, 5); // => -0.4
3463 */
3464function zScore(x, mean, standardDeviation) {
3465 return (x - mean) / standardDeviation;
3466}
3467
3468var SQRT_2PI$1 = Math.sqrt(2 * Math.PI);
3469
3470function cumulativeDistribution(z) {
3471 var sum = z,
3472 tmp = z;
3473
3474 // 15 iterations are enough for 4-digit precision
3475 for (var i = 1; i < 15; i++) {
3476 tmp *= (z * z) / (2 * i + 1);
3477 sum += tmp;
3478 }
3479 return (
3480 Math.round((0.5 + (sum / SQRT_2PI$1) * Math.exp((-z * z) / 2)) * 1e4) /
3481 1e4
3482 );
3483}
3484
3485/**
3486 * A standard normal table, also called the unit normal table or Z table,
3487 * is a mathematical table for the values of Φ (phi), which are the values of
3488 * the cumulative distribution function of the normal distribution.
3489 * It is used to find the probability that a statistic is observed below,
3490 * above, or between values on the standard normal distribution, and by
3491 * extension, any normal distribution.
3492 *
3493 * The probabilities are calculated using the
3494 * [Cumulative distribution function](https://en.wikipedia.org/wiki/Normal_distribution#Cumulative_distribution_function).
3495 * The table used is the cumulative, and not cumulative from 0 to mean
3496 * (even though the latter has 5 digits precision, instead of 4).
3497 */
3498var standardNormalTable = [];
3499
3500for (var z = 0; z <= 3.09; z += 0.01) {
3501 standardNormalTable.push(cumulativeDistribution(z));
3502}
3503
3504/**
3505 * **[Cumulative Standard Normal Probability](http://en.wikipedia.org/wiki/Standard_normal_table)**
3506 *
3507 * Since probability tables cannot be
3508 * printed for every normal distribution, as there are an infinite variety
3509 * of normal distributions, it is common practice to convert a normal to a
3510 * standard normal and then use the standard normal table to find probabilities.
3511 *
3512 * You can use `.5 + .5 * errorFunction(x / Math.sqrt(2))` to calculate the probability
3513 * instead of looking it up in a table.
3514 *
3515 * @param {number} z
3516 * @returns {number} cumulative standard normal probability
3517 */
3518function cumulativeStdNormalProbability(z) {
3519 // Calculate the position of this value.
3520 var absZ = Math.abs(z);
3521 // Each row begins with a different
3522 // significant digit: 0.5, 0.6, 0.7, and so on. Each value in the table
3523 // corresponds to a range of 0.01 in the input values, so the value is
3524 // multiplied by 100.
3525 var index = Math.min(
3526 Math.round(absZ * 100),
3527 standardNormalTable.length - 1
3528 );
3529
3530 // The index we calculate must be in the table as a positive value,
3531 // but we still pay attention to whether the input is positive
3532 // or negative, and flip the output value as a last step.
3533 if (z >= 0) {
3534 return standardNormalTable[index];
3535 } else {
3536 // due to floating-point arithmetic, values in the table with
3537 // 4 significant figures can nevertheless end up as repeating
3538 // fractions when they're computed here.
3539 return +(1 - standardNormalTable[index]).toFixed(4);
3540 }
3541}
3542
3543/**
3544 * **[Gaussian error function](http://en.wikipedia.org/wiki/Error_function)**
3545 *
3546 * The `errorFunction(x/(sd * Math.sqrt(2)))` is the probability that a value in a
3547 * normal distribution with standard deviation sd is within x of the mean.
3548 *
3549 * This function returns a numerical approximation to the exact value.
3550 * It uses Horner's method to evaluate the polynomial of τ (tau).
3551 *
3552 * @param {number} x input
3553 * @return {number} error estimation
3554 * @example
3555 * errorFunction(1).toFixed(2); // => '0.84'
3556 */
3557function errorFunction(x) {
3558 var t = 1 / (1 + 0.5 * Math.abs(x));
3559 var tau =
3560 t *
3561 Math.exp(
3562 -x * x +
3563 ((((((((0.17087277 * t - 0.82215223) * t + 1.48851587) * t -
3564 1.13520398) *
3565 t +
3566 0.27886807) *
3567 t -
3568 0.18628806) *
3569 t +
3570 0.09678418) *
3571 t +
3572 0.37409196) *
3573 t +
3574 1.00002368) *
3575 t -
3576 1.26551223
3577 );
3578 if (x >= 0) {
3579 return 1 - tau;
3580 } else {
3581 return tau - 1;
3582 }
3583}
3584
3585/**
3586 * The Inverse [Gaussian error function](http://en.wikipedia.org/wiki/Error_function)
3587 * returns a numerical approximation to the value that would have caused
3588 * `errorFunction()` to return x.
3589 *
3590 * @param {number} x value of error function
3591 * @returns {number} estimated inverted value
3592 */
3593function inverseErrorFunction(x) {
3594 var a = (8 * (Math.PI - 3)) / (3 * Math.PI * (4 - Math.PI));
3595
3596 var inv = Math.sqrt(
3597 Math.sqrt(
3598 Math.pow(2 / (Math.PI * a) + Math.log(1 - x * x) / 2, 2) -
3599 Math.log(1 - x * x) / a
3600 ) -
3601 (2 / (Math.PI * a) + Math.log(1 - x * x) / 2)
3602 );
3603
3604 if (x >= 0) {
3605 return inv;
3606 } else {
3607 return -inv;
3608 }
3609}
3610
3611/**
3612 * The [Probit](http://en.wikipedia.org/wiki/Probit)
3613 * is the inverse of cumulativeStdNormalProbability(),
3614 * and is also known as the normal quantile function.
3615 *
3616 * It returns the number of standard deviations from the mean
3617 * where the p'th quantile of values can be found in a normal distribution.
3618 * So, for example, probit(0.5 + 0.6827/2) ≈ 1 because 68.27% of values are
3619 * normally found within 1 standard deviation above or below the mean.
3620 *
3621 * @param {number} p
3622 * @returns {number} probit
3623 */
3624function probit(p) {
3625 if (p === 0) {
3626 p = epsilon;
3627 } else if (p >= 1) {
3628 p = 1 - epsilon;
3629 }
3630 return Math.sqrt(2) * inverseErrorFunction(2 * p - 1);
3631}
3632
3633/**
3634 * Conducts a [permutation test](https://en.wikipedia.org/wiki/Resampling_(statistics)#Permutation_tests)
3635 * to determine if two data sets are *significantly* different from each other, using
3636 * the difference of means between the groups as the test statistic.
3637 * The function allows for the following hypotheses:
3638 * - two_tail = Null hypothesis: the two distributions are equal.
3639 * - greater = Null hypothesis: observations from sampleX tend to be smaller than those from sampleY.
3640 * - less = Null hypothesis: observations from sampleX tend to be greater than those from sampleY.
3641 * [Learn more about one-tail vs two-tail tests.](https://en.wikipedia.org/wiki/One-_and_two-tailed_tests)
3642 *
3643 * @param {Array<number>} sampleX first dataset (e.g. treatment data)
3644 * @param {Array<number>} sampleY second dataset (e.g. control data)
3645 * @param {string} alternative alternative hypothesis, either 'two_sided' (default), 'greater', or 'less'
3646 * @param {number} k number of values in permutation distribution.
3647 * @param {Function} [randomSource=Math.random] an optional entropy source
3648 * @returns {number} p-value The probability of observing the difference between groups (as or more extreme than what we did), assuming the null hypothesis.
3649 *
3650 * @example
3651 * var control = [2, 5, 3, 6, 7, 2, 5];
3652 * var treatment = [20, 5, 13, 12, 7, 2, 2];
3653 * permutationTest(control, treatment); // ~0.1324
3654 */
3655function permutationTest(sampleX, sampleY, alternative, k, randomSource) {
3656 // Set default arguments
3657 if (k === undefined) {
3658 k = 10000;
3659 }
3660 if (alternative === undefined) {
3661 alternative = "two_side";
3662 }
3663 if (
3664 alternative !== "two_side" &&
3665 alternative !== "greater" &&
3666 alternative !== "less"
3667 ) {
3668 throw new Error(
3669 "`alternative` must be either 'two_side', 'greater', or 'less'"
3670 );
3671 }
3672
3673 // get means for each sample
3674 var meanX = mean(sampleX);
3675 var meanY = mean(sampleY);
3676
3677 // calculate initial test statistic. This will be our point of comparison with
3678 // the generated test statistics.
3679 var testStatistic = meanX - meanY;
3680
3681 // create test-statistic distribution
3682 var testStatDsn = new Array(k);
3683
3684 // combine datsets so we can easily shuffle later
3685 var allData = sampleX.concat(sampleY);
3686 var midIndex = Math.floor(allData.length / 2);
3687
3688 for (var i = 0; i < k; i++) {
3689 // 1. shuffle data assignments
3690 shuffleInPlace(allData, randomSource);
3691 var permLeft = allData.slice(0, midIndex);
3692 var permRight = allData.slice(midIndex, allData.length);
3693
3694 // 2.re-calculate test statistic
3695 var permTestStatistic = mean(permLeft) - mean(permRight);
3696
3697 // 3. store test statistic to build test statistic distribution
3698 testStatDsn[i] = permTestStatistic;
3699 }
3700
3701 // Calculate p-value depending on alternative
3702 // For this test, we calculate the percentage of 'extreme' test statistics (subject to our hypothesis)
3703 // more info on permutation test p-value calculations: https://onlinecourses.science.psu.edu/stat464/node/35
3704 var numExtremeTStats = 0;
3705 if (alternative === "two_side") {
3706 for (var i$1 = 0; i$1 <= k; i$1++) {
3707 if (Math.abs(testStatDsn[i$1]) >= Math.abs(testStatistic)) {
3708 numExtremeTStats += 1;
3709 }
3710 }
3711 } else if (alternative === "greater") {
3712 for (var i$2 = 0; i$2 <= k; i$2++) {
3713 if (testStatDsn[i$2] >= testStatistic) {
3714 numExtremeTStats += 1;
3715 }
3716 }
3717 } else {
3718 // alternative === 'less'
3719 for (var i$3 = 0; i$3 <= k; i$3++) {
3720 if (testStatDsn[i$3] <= testStatistic) {
3721 numExtremeTStats += 1;
3722 }
3723 }
3724 }
3725
3726 return numExtremeTStats / k;
3727}
3728
3729/**
3730 * [Sign](https://en.wikipedia.org/wiki/Sign_function) is a function
3731 * that extracts the sign of a real number
3732 *
3733 * @param {number} x input value
3734 * @returns {number} sign value either 1, 0 or -1
3735 * @throws {TypeError} if the input argument x is not a number
3736 * @private
3737 *
3738 * @example
3739 * sign(2); // => 1
3740 */
3741function sign(x) {
3742 if (typeof x === "number") {
3743 if (x < 0) {
3744 return -1;
3745 } else if (x === 0) {
3746 return 0;
3747 } else {
3748 return 1;
3749 }
3750 } else {
3751 throw new TypeError("not a number");
3752 }
3753}
3754
3755/**
3756 * [Bisection method](https://en.wikipedia.org/wiki/Bisection_method) is a root-finding
3757 * method that repeatedly bisects an interval to find the root.
3758 *
3759 * This function returns a numerical approximation to the exact value.
3760 *
3761 * @param {Function} func input function
3762 * @param {number} start - start of interval
3763 * @param {number} end - end of interval
3764 * @param {number} maxIterations - the maximum number of iterations
3765 * @param {number} errorTolerance - the error tolerance
3766 * @returns {number} estimated root value
3767 * @throws {TypeError} Argument func must be a function
3768 *
3769 * @example
3770 * bisect(Math.cos,0,4,100,0.003); // => 1.572265625
3771 */
3772function bisect(func, start, end, maxIterations, errorTolerance) {
3773 if (typeof func !== "function")
3774 { throw new TypeError("func must be a function"); }
3775
3776 for (var i = 0; i < maxIterations; i++) {
3777 var output = (start + end) / 2;
3778
3779 if (
3780 func(output) === 0 ||
3781 Math.abs((end - start) / 2) < errorTolerance
3782 ) {
3783 return output;
3784 }
3785
3786 if (sign(func(output)) === sign(func(start))) {
3787 start = output;
3788 } else {
3789 end = output;
3790 }
3791 }
3792
3793 throw new Error("maximum number of iterations exceeded");
3794}
3795
3796/**
3797 * Calculate Euclidean distance between two points.
3798 * @param {Array<number>} left First N-dimensional point.
3799 * @param {Array<number>} right Second N-dimensional point.
3800 * @returns {number} Distance.
3801 */
3802function euclideanDistance(left, right) {
3803 var sum = 0;
3804 for (var i = 0; i < left.length; i++) {
3805 var diff = left[i] - right[i];
3806 sum += diff * diff;
3807 }
3808 return Math.sqrt(sum);
3809}
3810
3811/**
3812 * @typedef {Object} kMeansReturn
3813 * @property {Array<number>} labels The labels.
3814 * @property {Array<Array<number>>} centroids The cluster centroids.
3815 */
3816
3817/**
3818 * Perform k-means clustering.
3819 *
3820 * @param {Array<Array<number>>} points N-dimensional coordinates of points to be clustered.
3821 * @param {number} numCluster How many clusters to create.
3822 * @param {Function} randomSource An optional entropy source that generates uniform values in [0, 1).
3823 * @return {kMeansReturn} Labels (same length as data) and centroids (same length as numCluster).
3824 * @throws {Error} If any centroids wind up friendless (i.e., without associated points).
3825 *
3826 * @example
3827 * kMeansCluster([[0.0, 0.5], [1.0, 0.5]], 2); // => {labels: [0, 1], centroids: [[0.0, 0.5], [1.0 0.5]]}
3828 */
3829function kMeansCluster(points, numCluster, randomSource) {
3830 if ( randomSource === void 0 ) randomSource = Math.random;
3831
3832 var oldCentroids = null;
3833 var newCentroids = sample(points, numCluster, randomSource);
3834 var labels = null;
3835 var change = Number.MAX_VALUE;
3836 while (change !== 0) {
3837 labels = labelPoints(points, newCentroids);
3838 oldCentroids = newCentroids;
3839 newCentroids = calculateCentroids(points, labels, numCluster);
3840 change = calculateChange(newCentroids, oldCentroids);
3841 }
3842 return {
3843 labels: labels,
3844 centroids: newCentroids
3845 };
3846}
3847
3848/**
3849 * Label each point according to which centroid it is closest to.
3850 *
3851 * @private
3852 * @param {Array<Array<number>>} points Array of XY coordinates.
3853 * @param {Array<Array<number>>} centroids Current centroids.
3854 * @return {Array<number>} Group labels.
3855 */
3856function labelPoints(points, centroids) {
3857 return points.map(function (p) {
3858 var minDist = Number.MAX_VALUE;
3859 var label = -1;
3860 for (var i = 0; i < centroids.length; i++) {
3861 var dist = euclideanDistance(p, centroids[i]);
3862 if (dist < minDist) {
3863 minDist = dist;
3864 label = i;
3865 }
3866 }
3867 return label;
3868 });
3869}
3870
3871/**
3872 * Calculate centroids for points given labels.
3873 *
3874 * @private
3875 * @param {Array<Array<number>>} points Array of XY coordinates.
3876 * @param {Array<number>} labels Which groups points belong to.
3877 * @param {number} numCluster Number of clusters being created.
3878 * @return {Array<Array<number>>} Centroid for each group.
3879 * @throws {Error} If any centroids wind up friendless (i.e., without associated points).
3880 */
3881function calculateCentroids(points, labels, numCluster) {
3882 // Initialize accumulators.
3883 var dimension = points[0].length;
3884 var centroids = makeMatrix(numCluster, dimension);
3885 var counts = Array(numCluster).fill(0);
3886
3887 // Add points to centroids' accumulators and count points per centroid.
3888 var numPoints = points.length;
3889 for (var i = 0; i < numPoints; i++) {
3890 var point = points[i];
3891 var label = labels[i];
3892 var current = centroids[label];
3893 for (var j = 0; j < dimension; j++) {
3894 current[j] += point[j];
3895 }
3896 counts[label] += 1;
3897 }
3898
3899 // Rescale centroids, checking for any that have no points.
3900 for (var i$1 = 0; i$1 < numCluster; i$1++) {
3901 if (counts[i$1] === 0) {
3902 throw new Error(("Centroid " + i$1 + " has no friends"));
3903 }
3904 var centroid = centroids[i$1];
3905 for (var j$1 = 0; j$1 < dimension; j$1++) {
3906 centroid[j$1] /= counts[i$1];
3907 }
3908 }
3909
3910 return centroids;
3911}
3912
3913/**
3914 * Calculate the difference between old centroids and new centroids.
3915 *
3916 * @private
3917 * @param {Array<Array<number>>} left One list of centroids.
3918 * @param {Array<Array<number>>} right Another list of centroids.
3919 * @return {number} Distance between centroids.
3920 */
3921function calculateChange(left, right) {
3922 var total = 0;
3923 for (var i = 0; i < left.length; i++) {
3924 total += euclideanDistance(left[i], right[i]);
3925 }
3926 return total;
3927}
3928
3929/**
3930 * Calculate the [silhouette values](https://en.wikipedia.org/wiki/Silhouette_(clustering))
3931 * for clustered data.
3932 *
3933 * @param {Array<Array<number>>} points N-dimensional coordinates of points.
3934 * @param {Array<number>} labels Labels of points. This must be the same length as `points`,
3935 * and values must lie in [0..G-1], where G is the number of groups.
3936 * @return {Array<number>} The silhouette value for each point.
3937 *
3938 * @example
3939 * silhouette([[0.25], [0.75]], [0, 0]); // => [1.0, 1.0]
3940 */
3941function silhouette(points, labels) {
3942 if (points.length !== labels.length) {
3943 throw new Error("must have exactly as many labels as points");
3944 }
3945 var groupings = createGroups(labels);
3946 var distances = calculateAllDistances(points);
3947 var result = [];
3948 for (var i = 0; i < points.length; i++) {
3949 var s = 0;
3950 if (groupings[labels[i]].length > 1) {
3951 var a = meanDistanceFromPointToGroup(
3952 i,
3953 groupings[labels[i]],
3954 distances
3955 );
3956 var b = meanDistanceToNearestGroup(
3957 i,
3958 labels,
3959 groupings,
3960 distances
3961 );
3962 s = (b - a) / Math.max(a, b);
3963 }
3964 result.push(s);
3965 }
3966 return result;
3967}
3968
3969/**
3970 * Create a lookup table mapping group IDs to point IDs.
3971 *
3972 * @private
3973 * @param {Array<number>} labels Labels of points. This must be the same length as `points`,
3974 * and values must lie in [0..G-1], where G is the number of groups.
3975 * @return {Array<Array<number>>} An array of length G, each of whose entries is an array
3976 * containing the indices of the points in that group.
3977 */
3978function createGroups(labels) {
3979 var numGroups = 1 + max(labels);
3980 var result = Array(numGroups);
3981 for (var i = 0; i < labels.length; i++) {
3982 var label = labels[i];
3983 if (result[label] === undefined) {
3984 result[label] = [];
3985 }
3986 result[label].push(i);
3987 }
3988 return result;
3989}
3990
3991/**
3992 * Create a lookup table of all inter-point distances.
3993 *
3994 * @private
3995 * @param {Array<Array<number>>} points N-dimensional coordinates of points.
3996 * @return {Array<Array<number>>} A symmetric square array of inter-point distances
3997 * (zero on the diagonal).
3998 */
3999function calculateAllDistances(points) {
4000 var numPoints = points.length;
4001 var result = makeMatrix(numPoints, numPoints);
4002 for (var i = 0; i < numPoints; i++) {
4003 for (var j = 0; j < i; j++) {
4004 result[i][j] = euclideanDistance(points[i], points[j]);
4005 result[j][i] = result[i][j];
4006 }
4007 }
4008 return result;
4009}
4010
4011/**
4012 * Calculate the mean distance between this point and all the points in the
4013 * nearest group (as determined by which point in another group is closest).
4014 *
4015 * @private
4016 * @param {number} which The index of this point.
4017 * @param {Array<number>} labels Labels of points.
4018 * @param {Array<Array<number>>} groupings An array whose entries are arrays
4019 * containing the indices of the points in that group.
4020 * @param {Array<Array<number>>} distances A symmetric square array of inter-point
4021 * distances.
4022 * @return {number} The mean distance from this point to others in the nearest
4023 * group.
4024 */
4025function meanDistanceToNearestGroup(which, labels, groupings, distances) {
4026 var label = labels[which];
4027 var result = Number.MAX_VALUE;
4028 for (var i = 0; i < groupings.length; i++) {
4029 if (i !== label) {
4030 var d = meanDistanceFromPointToGroup(
4031 which,
4032 groupings[i],
4033 distances
4034 );
4035 if (d < result) {
4036 result = d;
4037 }
4038 }
4039 }
4040 return result;
4041}
4042
4043/**
4044 * Calculate the mean distance between a point and all the points in a group
4045 * (possibly its own).
4046 *
4047 * @private
4048 * @param {number} which The index of this point.
4049 * @param {Array<number>} group The indices of all the points in the group in
4050 * question.
4051 * @param {Array<Array<number>>} distances A symmetric square array of inter-point
4052 * distances.
4053 * @return {number} The mean distance from this point to others in the
4054 * specified group.
4055 */
4056function meanDistanceFromPointToGroup(which, group, distances) {
4057 var total = 0;
4058 for (var i = 0; i < group.length; i++) {
4059 total += distances[which][group[i]];
4060 }
4061 return total / group.length;
4062}
4063
4064/**
4065 * Calculate the [silhouette metric](https://en.wikipedia.org/wiki/Silhouette_(clustering))
4066 * for a set of N-dimensional points arranged in groups. The metric is the largest
4067 * individual silhouette value for the data.
4068 *
4069 * @param {Array<Array<number>>} points N-dimensional coordinates of points.
4070 * @param {Array<number>} labels Labels of points. This must be the same length as `points`,
4071 * and values must lie in [0..G-1], where G is the number of groups.
4072 * @return {number} The silhouette metric for the groupings.
4073 *
4074 * @example
4075 * silhouetteMetric([[0.25], [0.75]], [0, 0]); // => 1.0
4076 */
4077function silhouetteMetric(points, labels) {
4078 var values = silhouette(points, labels);
4079 return max(values);
4080}
4081
4082/**
4083 * Relative error.
4084 *
4085 * This is more difficult to calculate than it first appears [1,2]. The usual
4086 * formula for the relative error between an actual value A and an expected
4087 * value E is `|(A-E)/E|`, but:
4088 *
4089 * 1. If the expected value is 0, any other value has infinite relative error,
4090 * which is counter-intuitive: if the expected voltage is 0, getting 1/10th
4091 * of a volt doesn't feel like an infinitely large error.
4092 *
4093 * 2. This formula does not satisfy the mathematical definition of a metric [3].
4094 * [4] solved this problem by defining the relative error as `|ln(|A/E|)|`,
4095 * but that formula only works if all values are positive: for example, it
4096 * reports the relative error of -10 and 10 as 0.
4097 *
4098 * Our implementation sticks with convention and returns:
4099 *
4100 * - 0 if the actual and expected values are both zero
4101 * - Infinity if the actual value is non-zero and the expected value is zero
4102 * - `|(A-E)/E|` in all other cases
4103 *
4104 * [1] https://math.stackexchange.com/questions/677852/how-to-calculate-relative-error-when-true-value-is-zero
4105 * [2] https://en.wikipedia.org/wiki/Relative_change_and_difference
4106 * [3] https://en.wikipedia.org/wiki/Metric_(mathematics)#Definition
4107 * [4] F.W.J. Olver: "A New Approach to Error Arithmetic." SIAM Journal on
4108 * Numerical Analysis, 15(2), 1978, 10.1137/0715024.
4109 *
4110 * @param {number} actual The actual value.
4111 * @param {number} expected The expected value.
4112 * @return {number} The relative error.
4113 */
4114function relativeError(actual, expected) {
4115 if (actual === 0 && expected === 0) {
4116 return 0;
4117 }
4118 return Math.abs((actual - expected) / expected);
4119}
4120
4121/**
4122 * Approximate equality.
4123 *
4124 * @param {number} actual The value to be tested.
4125 * @param {number} expected The reference value.
4126 * @param {number} tolerance The acceptable relative difference.
4127 * @return {boolean} Whether numbers are within tolerance.
4128 */
4129function approxEqual(actual, expected, tolerance) {
4130 if ( tolerance === void 0 ) tolerance = epsilon;
4131
4132 return relativeError(actual, expected) <= tolerance;
4133}
4134
4135exports.BayesianClassifier = BayesianClassifier;
4136exports.PerceptronModel = PerceptronModel;
4137exports.addToMean = addToMean;
4138exports.approxEqual = approxEqual;
4139exports.average = mean;
4140exports.averageSimple = meanSimple;
4141exports.bayesian = BayesianClassifier;
4142exports.bernoulliDistribution = bernoulliDistribution;
4143exports.binomialDistribution = binomialDistribution;
4144exports.bisect = bisect;
4145exports.chiSquaredDistributionTable = chiSquaredDistributionTable;
4146exports.chiSquaredGoodnessOfFit = chiSquaredGoodnessOfFit;
4147exports.chunk = chunk;
4148exports.ckmeans = ckmeans;
4149exports.combinations = combinations;
4150exports.combinationsReplacement = combinationsReplacement;
4151exports.combineMeans = combineMeans;
4152exports.combineVariances = combineVariances;
4153exports.cumulativeStdNormalProbability = cumulativeStdNormalProbability;
4154exports.epsilon = epsilon;
4155exports.equalIntervalBreaks = equalIntervalBreaks;
4156exports.erf = errorFunction;
4157exports.errorFunction = errorFunction;
4158exports.extent = extent;
4159exports.extentSorted = extentSorted;
4160exports.factorial = factorial;
4161exports.gamma = gamma;
4162exports.gammaln = gammaln;
4163exports.geometricMean = geometricMean;
4164exports.harmonicMean = harmonicMean;
4165exports.interquartileRange = interquartileRange;
4166exports.inverseErrorFunction = inverseErrorFunction;
4167exports.iqr = interquartileRange;
4168exports.kMeansCluster = kMeansCluster;
4169exports.kde = kernelDensityEstimation;
4170exports.kernelDensityEstimation = kernelDensityEstimation;
4171exports.linearRegression = linearRegression;
4172exports.linearRegressionLine = linearRegressionLine;
4173exports.mad = medianAbsoluteDeviation;
4174exports.max = max;
4175exports.maxSorted = maxSorted;
4176exports.mean = mean;
4177exports.meanSimple = meanSimple;
4178exports.median = median;
4179exports.medianAbsoluteDeviation = medianAbsoluteDeviation;
4180exports.medianSorted = medianSorted;
4181exports.min = min;
4182exports.minSorted = minSorted;
4183exports.mode = mode;
4184exports.modeFast = modeFast;
4185exports.modeSorted = modeSorted;
4186exports.numericSort = numericSort;
4187exports.perceptron = PerceptronModel;
4188exports.permutationTest = permutationTest;
4189exports.permutationsHeap = permutationsHeap;
4190exports.poissonDistribution = poissonDistribution;
4191exports.probit = probit;
4192exports.product = product;
4193exports.quantile = quantile;
4194exports.quantileRank = quantileRank;
4195exports.quantileRankSorted = quantileRankSorted;
4196exports.quantileSorted = quantileSorted;
4197exports.quickselect = quickselect;
4198exports.rSquared = rSquared;
4199exports.relativeError = relativeError;
4200exports.rms = rootMeanSquare;
4201exports.rootMeanSquare = rootMeanSquare;
4202exports.sample = sample;
4203exports.sampleCorrelation = sampleCorrelation;
4204exports.sampleCovariance = sampleCovariance;
4205exports.sampleKurtosis = sampleKurtosis;
4206exports.sampleSkewness = sampleSkewness;
4207exports.sampleStandardDeviation = sampleStandardDeviation;
4208exports.sampleVariance = sampleVariance;
4209exports.sampleWithReplacement = sampleWithReplacement;
4210exports.shuffle = shuffle;
4211exports.shuffleInPlace = shuffleInPlace;
4212exports.sign = sign;
4213exports.silhouette = silhouette;
4214exports.silhouetteMetric = silhouetteMetric;
4215exports.standardDeviation = standardDeviation;
4216exports.standardNormalTable = standardNormalTable;
4217exports.subtractFromMean = subtractFromMean;
4218exports.sum = sum;
4219exports.sumNthPowerDeviations = sumNthPowerDeviations;
4220exports.sumSimple = sumSimple;
4221exports.tTest = tTest;
4222exports.tTestTwoSample = tTestTwoSample;
4223exports.uniqueCountSorted = uniqueCountSorted;
4224exports.variance = variance;
4225exports.zScore = zScore;
4226//# sourceMappingURL=simple-statistics.js.map