UNPKG

6.7 kBJavaScriptView Raw
1'use strict'
2
3const ttest = require('ttest')
4const atol = 1e-9
5
6function guessInterval (data) {
7 // This function seperates handle sequence up into three partitions.
8 // This is to guess the benchmarking interval, in a typical benchmark the
9 // handle curve will look like:
10 //
11 // handles
12 // |
13 // | |^^^^^|
14 // | -----| |-----
15 // ------------------------ time
16 //
17 // The lines can be very flat but we need to allow for some variation.
18 // The problem of partition generation is thus treated as piecewise least
19 // square regression problem, with unknown partitions.
20 // There are no good solutions to this problem, but since it is a 1D problem
21 // and we can constrain the curves to be flat. The solution can be brute
22 // forced in O(n^2) time. Doing this does require an online mean variance
23 // algorithm, thus the Welford algorithm is used together with an derived
24 // inverse Welford algorithm.
25 const left = new OnlineMeanVariance()
26 const middle = new OnlineMeanVariance()
27 const right = new OnlineMeanVariance()
28
29 // For performance, restructure the data into a dense array
30 const handles = data.map((d) => d.handles)
31
32 // Start by assigning all observations to the middle partition, it will
33 // have the interval [0, handles.length]
34 for (const datum of handles) {
35 middle.add(datum)
36 }
37
38 // Use the current partion, of everything being assigning to the middle
39 // as the initial solution.
40 let bestTotalError = middle.sse
41 let bestLeftIndex = 0 // corresponds to slice(0, leftIndex)
42 let bestRightIndex = handles.length // corresponds to slice(righIndex)
43
44 // Brute force all valid combinations of leftIndex and rightIndex
45 // Middle should always include at least one value, so don't scan to
46 // the end but instead `handles.length - 1`.
47 for (let leftIndex = -1; leftIndex < handles.length - 1; leftIndex++) {
48 // set interval to [leftIndex, handles.length]
49 if (leftIndex >= 0) {
50 left.add(handles[leftIndex])
51 middle.remove(handles[leftIndex])
52 }
53 right.reset()
54
55 // try all valid rightIndex values
56 // because the middle is going to mutate as we scan from the right, save
57 // the middle stat for the interval [leftIndex, handles.length]
58 const oldMiddleState = middle.save()
59 // Middle should always include at least one value, so don't scan to
60 // leftIndex but instead `leftIndex + 1`.
61 for (let rightIndex = handles.length; rightIndex > leftIndex + 1; rightIndex--) {
62 // set interval to [leftIndex, rightIndex]
63 if (rightIndex < handles.length) {
64 right.add(handles[rightIndex])
65 middle.remove(handles[rightIndex])
66 }
67
68 // check for improvement, improvement must exceede the absolute tolerance
69 if (middle.size >= 2 &&
70 bestTotalError > left.sse + middle.sse + right.sse + atol &&
71 statisticallyLessThan(left, middle) &&
72 statisticallyLessThan(right, middle)) {
73 bestTotalError = left.sse + middle.sse + right.sse
74 bestLeftIndex = leftIndex + 1
75 bestRightIndex = rightIndex
76 }
77 }
78
79 // In prepeation for next iteration, restore the middle stat to the
80 // interval [leftIndex, handles.length]
81 middle.load(oldMiddleState)
82 }
83
84 return [bestLeftIndex, bestRightIndex]
85}
86
87module.exports = guessInterval
88
89function statisticallyLessThan (leftSide, rightSide) {
90 // Since handles are in steps of 1, looking for differences less than
91 // 1 doesn't really make sense. The intervals are of course means and thus
92 // the a valid difference could be less than one. But in practice we are
93 // at a benchmarked server, thus the difference will always be much larger.
94 const mindiff = 1
95
96 // if the variance is very small or couldn't be estimated, just default
97 // to a naive mean comparison.
98 if (leftSide.variance < atol || rightSide.variance < atol ||
99 Number.isNaN(leftSide.variance) || Number.isNaN(rightSide.variance)) {
100 return leftSide.mean + mindiff < rightSide.mean
101 }
102
103 // variance is high, use a statistical t-test to check if there is a
104 // difference
105 const t = ttest(leftSide, rightSide, {
106 alpha: 0.01,
107 alternative: 'less',
108 mu: -mindiff
109 })
110 return !t.valid()
111}
112
113// If you need to brush up on your online mean variance algorithms then take
114// a look at:
115// https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance
116// This is required for O(n^2) exact optimization of the interval. If a non
117// online-variant was used, the complexity would be O(n^3).
118class OnlineMeanVariance {
119 constructor () {
120 this.reset()
121 }
122
123 reset () {
124 this.size = 0
125 this.mean = 0
126 this.sse = 0 // sum of square errors
127 }
128
129 save () {
130 return {
131 size: this.size,
132 mean: this.mean,
133 sse: this.sse
134 }
135 }
136
137 load (save) {
138 this.size = save.size
139 this.mean = save.mean
140 this.sse = save.sse
141 }
142
143 // Welford online algorithm is as follow:
144 // - defined is:
145 // δ_1 = x_n - µ_{n-1}
146 // δ_2 = x_n - µ_n
147 // Note that these will become computationally avaliable as they are needed.
148 // This is due how to the online µ changes durring computation.
149
150 // - Then compute:
151 // ~ δ_1 is now avaliable
152 // µ_n = µ_{n-1} + (x_n - µ_{n-1}) / n
153 // µ_n = µ_{n-1} + δ_2 / n
154 // ~ δ_2 is now avaliable
155 // S_n = S_{n-1} + (x_n - µ_{n-1}) * (x_n - µ_n)
156 // S_n = S_{n-1} + δ_1 * δ_2
157 add (x) {
158 this.size += 1
159 const delta1 = x - this.mean
160 this.mean += delta1 / this.size
161 const delta2 = x - this.mean
162 this.sse += delta1 * delta2
163 }
164
165 // This shows the inverse equations derived from the Welford operations
166 // ~ δ_2 is now avaliable
167 // µ_n = µ_{n-1} + (x_n - µ_{n-1}) / n
168 // µ_{n-1} = µ_n - (x_n - µ_{n-1}) / n
169 // n µ_{n-1} = n µ_n - x_n + µ_{n-1}
170 // (n - 1) µ_{n-1} = n µ_n - x_n
171 // (n - 1) µ_{n-1} = (n - 1) µ_n + µ_n - x_n
172 // µ_{n-1} = µ_n + (µ_n - x_n) / (n - 1)
173 // µ_{n-1} = µ_n - δ_2 / (n - 1)
174 // ~ δ_1 is now avaliable
175 // S_n = S_{n-1} + (x_n - µ_{n-1}) * (x_n - µ_n)
176 // S_{n-1} = S_n - (x_n - µ_{n-1}) * (x_n - µ_n)
177 // S_{n-1} = S_n - δ_1 * δ_2
178 remove (x) {
179 this.size -= 1
180 const delta2 = x - this.mean
181 this.mean -= delta2 / this.size
182 const delta1 = x - this.mean
183 this.sse -= delta2 * delta1
184 }
185
186 // This function is only used for the t-test. We want to minimize the MSE of
187 // the curve fit. Since the number of observations stays constant, there is no
188 // need calculate the extra divition in the scans. Thus the SSE can be used
189 // directly.
190 // σ_n = S_n / (n - 1)
191 get variance () {
192 if (this.size < 2) {
193 return NaN
194 } else {
195 return this.sse / (this.size - 1)
196 }
197 }
198}