1 | ;
|
2 | /**
|
3 | * @license
|
4 | * Copyright (c) 2019 The Polymer Project Authors. All rights reserved.
|
5 | * This code may only be used under the BSD style license found at
|
6 | * http://polymer.github.io/LICENSE.txt The complete set of authors may be found
|
7 | * at http://polymer.github.io/AUTHORS.txt The complete set of contributors may
|
8 | * be found at http://polymer.github.io/CONTRIBUTORS.txt Code distributed by
|
9 | * Google as part of the polymer project is also subject to an additional IP
|
10 | * rights grant found at http://polymer.github.io/PATENTS.txt
|
11 | */
|
12 | Object.defineProperty(exports, "__esModule", { value: true });
|
13 | exports.computeDifference = exports.computeDifferences = exports.horizonsResolved = exports.intervalContains = exports.summaryStats = void 0;
|
14 | const jstat = require('jstat'); // TODO Contribute typings.
|
15 | function summaryStats(data) {
|
16 | const size = data.length;
|
17 | const sum = sumOf(data);
|
18 | const mean = sum / size;
|
19 | const squareResiduals = data.map((val) => (val - mean) ** 2);
|
20 | // n - 1 due to https://en.wikipedia.org/wiki/Bessel%27s_correction
|
21 | const variance = sumOf(squareResiduals) / (size - 1);
|
22 | const stdDev = Math.sqrt(variance);
|
23 | return {
|
24 | size,
|
25 | mean,
|
26 | meanCI: confidenceInterval95(samplingDistributionOfTheMean({ mean, variance }, size), size),
|
27 | variance,
|
28 | standardDeviation: stdDev,
|
29 | // aka coefficient of variation
|
30 | relativeStandardDeviation: stdDev / mean,
|
31 | };
|
32 | }
|
33 | exports.summaryStats = summaryStats;
|
34 | /**
|
35 | * Compute a 95% confidence interval for the given distribution.
|
36 | */
|
37 | function confidenceInterval95({ mean, variance }, size) {
|
38 | // http://www.stat.yale.edu/Courses/1997-98/101/confint.htm
|
39 | const t = jstat.studentt.inv(1 - (.05 / 2), size - 1);
|
40 | const stdDev = Math.sqrt(variance);
|
41 | const margin = t * stdDev;
|
42 | return {
|
43 | low: mean - margin,
|
44 | high: mean + margin,
|
45 | };
|
46 | }
|
47 | /**
|
48 | * Return whether the given confidence interval contains a value.
|
49 | */
|
50 | function intervalContains(interval, value) {
|
51 | return value >= interval.low && value <= interval.high;
|
52 | }
|
53 | exports.intervalContains = intervalContains;
|
54 | /**
|
55 | * Return whether all difference confidence intervals are unambiguously located
|
56 | * on one side or the other of all given horizon values.
|
57 | *
|
58 | * For example, given the horizons 0 and 1:
|
59 | *
|
60 | * <---> true
|
61 | * <---> false
|
62 | * <---> true
|
63 | * <---> false
|
64 | * <---> true
|
65 | * <-----------> false
|
66 | *
|
67 | * |-------|-------|-------| ms difference
|
68 | * -1 0 1 2
|
69 | */
|
70 | function horizonsResolved(resultStats, horizons) {
|
71 | for (const { differences } of resultStats) {
|
72 | if (differences === undefined) {
|
73 | continue;
|
74 | }
|
75 | // TODO We may want to offer more control over which particular set of
|
76 | // differences we care about resolving. For the moment, a horizon of 1%
|
77 | // means we'll try to resolve a 1% difference pairwise in both directions.
|
78 | for (const diff of differences) {
|
79 | if (diff === null) {
|
80 | continue;
|
81 | }
|
82 | for (const horizon of horizons.absolute) {
|
83 | if (intervalContains(diff.absolute, horizon)) {
|
84 | return false;
|
85 | }
|
86 | }
|
87 | for (const horizon of horizons.relative) {
|
88 | if (intervalContains(diff.relative, horizon)) {
|
89 | return false;
|
90 | }
|
91 | }
|
92 | }
|
93 | }
|
94 | return true;
|
95 | }
|
96 | exports.horizonsResolved = horizonsResolved;
|
97 | function sumOf(data) {
|
98 | return data.reduce((acc, cur) => acc + cur);
|
99 | }
|
100 | /**
|
101 | * Given an array of results, return a new array of results where each result
|
102 | * has additional statistics describing how it compares to each other result.
|
103 | */
|
104 | function computeDifferences(stats) {
|
105 | return stats.map((result) => {
|
106 | return Object.assign(Object.assign({}, result), { differences: stats.map((other) => other === result ?
|
107 | null :
|
108 | computeDifference(other.stats, result.stats)) });
|
109 | });
|
110 | }
|
111 | exports.computeDifferences = computeDifferences;
|
112 | function computeDifference(a, b) {
|
113 | const meanA = samplingDistributionOfTheMean(a, a.size);
|
114 | const meanB = samplingDistributionOfTheMean(b, b.size);
|
115 | const diffAbs = samplingDistributionOfAbsoluteDifferenceOfMeans(meanA, meanB);
|
116 | const diffRel = samplingDistributionOfRelativeDifferenceOfMeans(meanA, meanB);
|
117 | // We're assuming sample sizes are equal. If they're not for some reason, be
|
118 | // conservative and use the smaller one for the t-distribution's degrees of
|
119 | // freedom (since that will lead to a wider confidence interval).
|
120 | const minSize = Math.min(a.size, b.size);
|
121 | return {
|
122 | absolute: confidenceInterval95(diffAbs, minSize),
|
123 | relative: confidenceInterval95(diffRel, minSize),
|
124 | };
|
125 | }
|
126 | exports.computeDifference = computeDifference;
|
127 | /**
|
128 | * Estimates the sampling distribution of the mean. This models the distribution
|
129 | * of the means that we would compute under repeated samples of the given size.
|
130 | */
|
131 | function samplingDistributionOfTheMean(dist, sampleSize) {
|
132 | // http://onlinestatbook.com/2/sampling_distributions/samp_dist_mean.html
|
133 | // http://www.stat.yale.edu/Courses/1997-98/101/sampmn.htm
|
134 | return {
|
135 | mean: dist.mean,
|
136 | // Error shrinks as sample size grows.
|
137 | variance: dist.variance / sampleSize,
|
138 | };
|
139 | }
|
140 | /**
|
141 | * Estimates the sampling distribution of the difference of means (b-a). This
|
142 | * models the distribution of the difference between two means that we would
|
143 | * compute under repeated samples under the given two sampling distributions of
|
144 | * means.
|
145 | */
|
146 | function samplingDistributionOfAbsoluteDifferenceOfMeans(a, b) {
|
147 | // http://onlinestatbook.com/2/sampling_distributions/samplingdist_diff_means.html
|
148 | // http://www.stat.yale.edu/Courses/1997-98/101/meancomp.htm
|
149 | return {
|
150 | mean: b.mean - a.mean,
|
151 | // The error from both input sampling distributions of means accumulate.
|
152 | variance: a.variance + b.variance,
|
153 | };
|
154 | }
|
155 | /**
|
156 | * Estimates the sampling distribution of the relative difference of means
|
157 | * ((b-a)/a). This models the distribution of the relative difference between
|
158 | * two means that we would compute under repeated samples under the given two
|
159 | * sampling distributions of means.
|
160 | */
|
161 | function samplingDistributionOfRelativeDifferenceOfMeans(a, b) {
|
162 | // http://blog.analytics-toolkit.com/2018/confidence-intervals-p-values-percent-change-relative-difference/
|
163 | // Note that the above article also prevents an alternative calculation for a
|
164 | // confidence interval for relative differences, but the one chosen here is
|
165 | // is much simpler and passes our stochastic tests, so it seems sufficient.
|
166 | return {
|
167 | mean: (b.mean - a.mean) / a.mean,
|
168 | variance: (a.variance * b.mean ** 2 + b.variance * a.mean ** 2) / a.mean ** 4,
|
169 | };
|
170 | }
|
171 | //# sourceMappingURL=stats.js.map |
\ | No newline at end of file |