UNPKG

6.98 kBJavaScriptView Raw
1"use strict";
2/**
3 * @license
4 * Copyright (c) 2019 The Polymer Project Authors. All rights reserved.
5 * This code may only be used under the BSD style license found at
6 * http://polymer.github.io/LICENSE.txt The complete set of authors may be found
7 * at http://polymer.github.io/AUTHORS.txt The complete set of contributors may
8 * be found at http://polymer.github.io/CONTRIBUTORS.txt Code distributed by
9 * Google as part of the polymer project is also subject to an additional IP
10 * rights grant found at http://polymer.github.io/PATENTS.txt
11 */
12Object.defineProperty(exports, "__esModule", { value: true });
13exports.computeDifference = exports.computeDifferences = exports.horizonsResolved = exports.intervalContains = exports.summaryStats = void 0;
14const jstat = require('jstat'); // TODO Contribute typings.
15function summaryStats(data) {
16 const size = data.length;
17 const sum = sumOf(data);
18 const mean = sum / size;
19 const squareResiduals = data.map((val) => (val - mean) ** 2);
20 // n - 1 due to https://en.wikipedia.org/wiki/Bessel%27s_correction
21 const variance = sumOf(squareResiduals) / (size - 1);
22 const stdDev = Math.sqrt(variance);
23 return {
24 size,
25 mean,
26 meanCI: confidenceInterval95(samplingDistributionOfTheMean({ mean, variance }, size), size),
27 variance,
28 standardDeviation: stdDev,
29 // aka coefficient of variation
30 relativeStandardDeviation: stdDev / mean,
31 };
32}
33exports.summaryStats = summaryStats;
34/**
35 * Compute a 95% confidence interval for the given distribution.
36 */
37function confidenceInterval95({ mean, variance }, size) {
38 // http://www.stat.yale.edu/Courses/1997-98/101/confint.htm
39 const t = jstat.studentt.inv(1 - (.05 / 2), size - 1);
40 const stdDev = Math.sqrt(variance);
41 const margin = t * stdDev;
42 return {
43 low: mean - margin,
44 high: mean + margin,
45 };
46}
47/**
48 * Return whether the given confidence interval contains a value.
49 */
50function intervalContains(interval, value) {
51 return value >= interval.low && value <= interval.high;
52}
53exports.intervalContains = intervalContains;
54/**
55 * Return whether all difference confidence intervals are unambiguously located
56 * on one side or the other of all given horizon values.
57 *
58 * For example, given the horizons 0 and 1:
59 *
60 * <---> true
61 * <---> false
62 * <---> true
63 * <---> false
64 * <---> true
65 * <-----------> false
66 *
67 * |-------|-------|-------| ms difference
68 * -1 0 1 2
69 */
70function horizonsResolved(resultStats, horizons) {
71 for (const { differences } of resultStats) {
72 if (differences === undefined) {
73 continue;
74 }
75 // TODO We may want to offer more control over which particular set of
76 // differences we care about resolving. For the moment, a horizon of 1%
77 // means we'll try to resolve a 1% difference pairwise in both directions.
78 for (const diff of differences) {
79 if (diff === null) {
80 continue;
81 }
82 for (const horizon of horizons.absolute) {
83 if (intervalContains(diff.absolute, horizon)) {
84 return false;
85 }
86 }
87 for (const horizon of horizons.relative) {
88 if (intervalContains(diff.relative, horizon)) {
89 return false;
90 }
91 }
92 }
93 }
94 return true;
95}
96exports.horizonsResolved = horizonsResolved;
97function sumOf(data) {
98 return data.reduce((acc, cur) => acc + cur);
99}
100/**
101 * Given an array of results, return a new array of results where each result
102 * has additional statistics describing how it compares to each other result.
103 */
104function computeDifferences(stats) {
105 return stats.map((result) => {
106 return Object.assign(Object.assign({}, result), { differences: stats.map((other) => other === result ?
107 null :
108 computeDifference(other.stats, result.stats)) });
109 });
110}
111exports.computeDifferences = computeDifferences;
112function computeDifference(a, b) {
113 const meanA = samplingDistributionOfTheMean(a, a.size);
114 const meanB = samplingDistributionOfTheMean(b, b.size);
115 const diffAbs = samplingDistributionOfAbsoluteDifferenceOfMeans(meanA, meanB);
116 const diffRel = samplingDistributionOfRelativeDifferenceOfMeans(meanA, meanB);
117 // We're assuming sample sizes are equal. If they're not for some reason, be
118 // conservative and use the smaller one for the t-distribution's degrees of
119 // freedom (since that will lead to a wider confidence interval).
120 const minSize = Math.min(a.size, b.size);
121 return {
122 absolute: confidenceInterval95(diffAbs, minSize),
123 relative: confidenceInterval95(diffRel, minSize),
124 };
125}
126exports.computeDifference = computeDifference;
127/**
128 * Estimates the sampling distribution of the mean. This models the distribution
129 * of the means that we would compute under repeated samples of the given size.
130 */
131function samplingDistributionOfTheMean(dist, sampleSize) {
132 // http://onlinestatbook.com/2/sampling_distributions/samp_dist_mean.html
133 // http://www.stat.yale.edu/Courses/1997-98/101/sampmn.htm
134 return {
135 mean: dist.mean,
136 // Error shrinks as sample size grows.
137 variance: dist.variance / sampleSize,
138 };
139}
140/**
141 * Estimates the sampling distribution of the difference of means (b-a). This
142 * models the distribution of the difference between two means that we would
143 * compute under repeated samples under the given two sampling distributions of
144 * means.
145 */
146function samplingDistributionOfAbsoluteDifferenceOfMeans(a, b) {
147 // http://onlinestatbook.com/2/sampling_distributions/samplingdist_diff_means.html
148 // http://www.stat.yale.edu/Courses/1997-98/101/meancomp.htm
149 return {
150 mean: b.mean - a.mean,
151 // The error from both input sampling distributions of means accumulate.
152 variance: a.variance + b.variance,
153 };
154}
155/**
156 * Estimates the sampling distribution of the relative difference of means
157 * ((b-a)/a). This models the distribution of the relative difference between
158 * two means that we would compute under repeated samples under the given two
159 * sampling distributions of means.
160 */
161function samplingDistributionOfRelativeDifferenceOfMeans(a, b) {
162 // http://blog.analytics-toolkit.com/2018/confidence-intervals-p-values-percent-change-relative-difference/
163 // Note that the above article also prevents an alternative calculation for a
164 // confidence interval for relative differences, but the one chosen here is
165 // is much simpler and passes our stochastic tests, so it seems sufficient.
166 return {
167 mean: (b.mean - a.mean) / a.mean,
168 variance: (a.variance * b.mean ** 2 + b.variance * a.mean ** 2) / a.mean ** 4,
169 };
170}
171//# sourceMappingURL=stats.js.map
\No newline at end of file