UNPKG

13 kBJavaScriptView Raw
1var R = require("./R");
2var J = require("jStat").jStat;
3var T = {};
4
5// =============== 檢定 ==============================
6
7T.test = function(o) { // name, D, x, mu, sd, y, alpha, op
8 R.def(o, {alpha:0.05, op:"="});
9 var alpha = o.alpha;
10 var pvalue, interval;
11 var D = o.D;
12 var q1 = D.o2q(o); // 單尾檢定的 pvalue
13
14 if (o.op === "=") {
15 if (q1>0.5) q1 = 1-q1; // (q1>0.5) 取右尾,否則取左尾。
16 pvalue= 2*q1; // 對稱情況:雙尾檢定的 p 值是單尾的兩倍。
17 interval = [D.q2p(alpha/2, o, "L"), D.q2p(1-alpha/2, o, "R")];
18 } else {
19 if (o.op === "<") { // 右尾檢定 H0: q < 1-alpha,
20 interval = [ D.q2p(alpha, o, "L"), Infinity ];
21 pvalue = 1-q1;
22 }
23 if (o.op === ">") { // 左尾檢定 H0: q > alpha
24 interval=[-Infinity, D.q2p(1-alpha, o, "R")];
25 pvalue = q1;
26 }
27 }
28 return {
29 name: o.name,
30 h: D.h(o),
31 alpha: alpha,
32 op: o.op,
33 pvalue: pvalue,
34 ci : interval,
35 df : D.df(o),
36 report: function() { R.report(this) }
37 };
38}
39
40T.report = function(o) {
41 console.log("=========== report ==========");
42 for (var k in o) {
43 if (typeof o[k] !== "function")
44 console.log(k+"\t: "+R.str(o[k]));
45 }
46}
47
48var t1 = { // 單樣本 T 檢定 t = (X-mu)/(S/sqrt(n))
49 h:function(o) { return "H0:mu"+o.op+o.mu; },
50 o2q:function(o) {
51 var x = o.x, n = x.length;
52 var t = (R.mean(x)-o.mu)/(R.sd(x)/Math.sqrt(n));
53 return R.pt(t, n-1);
54 },
55 // P(X-mu/(S/sqrt(n))<t) = q ; 信賴區間 P(T<q)
56 // P(mu > X-t*S/sqrt(n)) = q ; 這反而成了右尾檢定,所以左尾與右尾確實會反過來
57 q2p:function(q, o) {
58 var x = o.x, n = x.length;
59 return R.mean(x) + R.qt(q, n-1) * R.sd(x) / Math.sqrt(n);
60 },
61 df:function(o) { return o.x.length-1; }
62}
63
64var t2vareq = { // σ1=σ2, 合併 T 檢定 (雙樣本)
65 h:function(o) { return "H0:mu1"+o.op+"mu2" },
66 // S^2 = (n1-1)*S1^2+(n2-1)*S2^2)/(n1-1+n2-1)
67 sd:function(o) {
68 var x = o.x, n1 = x.length, y=o.y, n2=y.length;
69 var S1= R.sd(x), S2 = R.sd(y);
70 var S = Math.sqrt(((n1-1)*S1*S1+(n2-1)*S2*S2)/(n1-1+n2-1));
71 return S;
72 },
73 // T = ((X-Y)-(mu1-mu2))/(sqrt(1/n1+1/n2)*S)
74 o2q:function(o) {
75 var x = o.x, n1 = x.length, y=o.y, n2=y.length;
76 var S = this.sd(o);
77 var t = (R.mean(x)-R.mean(y)-o.mu)/(Math.sqrt(1/n1+1/n2)*S);
78 return R.pt(t, n1+n2-2);
79 },
80 // t=((X-Y)-mu)/(sqrt(1/n1+1/n2)*S), (X-Y)-t*sqrt(1/n1+1/n2)*S = mu
81 q2p:function(q, o) {
82 var x = o.x, n1 = x.length, y=o.y, n2=y.length;
83 var S = this.sd(o);
84 return R.mean(x)-R.mean(y)+ R.qt(q, n1+n2-2)*Math.sqrt(1/n1+1/n2)*S;
85 },
86 df:function(o) {
87 var x = o.x, n1 = x.length, y=o.y, n2=y.length;
88 return n1+n2-2;
89 }
90}
91
92var t2paired = { // 成對 T 檢定 T = (X-Y-mu)/(S/sqrt(n)) (雙樣本)
93 h:function(o) { return "H0:mu1"+o.op+"mu2" },
94 sd:function(o) { // S = sd(x-y)
95 var x = o.x, n = x.length, y=o.y;
96 var S= R.sd(R.sub(x,y));
97 return S;
98 },
99 o2q:function(o) {
100 var x = o.x, n = x.length, y=o.y;
101 var S = this.sd(o);
102 var t = (R.mean(R.sub(x,y))-o.mu)/(S/Math.sqrt(n));
103 return R.pt(t, n-1);
104 },
105 // mean(x-y)+t*S/sqrt(n)
106 q2p:function(q, o) {
107 var x = o.x, n = x.length, y=o.y;
108 var S = this.sd(o);
109 return R.mean(R.sub(x,y))+ R.qt(q, n-1)*S/Math.sqrt(n);
110 },
111 df:function(o) {
112 return o.x.length-1;
113 }
114}
115
116var t2varneq = { // σ1≠σ2, Welch's t test (雙樣本) (又稱 Smith-Satterwaite 程序)
117// 參考:http://en.wikipedia.org/wiki/Welch's_t_test
118 h:function(o) { return "H0:mu1"+o.op+"mu2" },
119 // T = ((X-Y)-(mu1-mu2))/sqrt(S1^2/n1+S2^2/n2)
120 o2q:function(o) {
121 var x = o.x, n1 = x.length, y=o.y, n2=y.length;
122 var S1 = R.sd(x), S2=R.sd(y);
123 var t = (R.mean(x)-R.mean(y)-o.mu)/Math.sqrt(S1*S1/n1+S2*S2/n2);
124 return R.pt(t, this.df(o));
125 },
126 // t=((X-Y)-mu)/sqrt(S1^2/n1+S2^2/n2), (X-Y)-t*sqrt(S1^2/n1+S2^2/n2) = mu
127 q2p:function(q, o) {
128 var x = o.x, n1 = x.length, y=o.y, n2=y.length;
129 var S1 = R.sd(x), S2=R.sd(y);
130 return R.mean(x)-R.mean(y)+ R.qt(q, this.df(o))*Math.sqrt(S1*S1/n1+S2*S2/n2);
131 },
132 df:function(o) {
133 var x = o.x, n1 = x.length, y=o.y, n2=y.length;
134 var S1 = R.sd(x), S2=R.sd(y);
135 var Sn1 = S1*S1/n1, Sn2 = S2*S2/n2, Sn12 = Sn1+Sn2;
136 var df = (Sn12*Sn12)/((Sn1*Sn1)/(n1-1)+(Sn2*Sn2)/(n2-1));
137 return df;
138 }
139}
140
141T.ttest = function(o) {
142 var t;
143 if (typeof o.y === "undefined") {
144 o.name = "ttest(X)";
145 o.D = t1;
146 t = T.test(o);
147 t.mean = R.mean(o.x);
148 t.sd = R.sd(o.x);
149 } else {
150 var varequal = o.varequal || false;
151 var paired = o.paired || false;
152 if (varequal) {
153 o.name = "ttest(X,Y,mu="+o.mu+",varequal=true) (pooled)";
154 o.D = t2vareq;
155 t = T.test(o);
156 } else if (paired) {
157 o.name = "ttest(x,y,mu="+o.mu+",paired=true)";
158 o.D = t2paired;
159 t = T.test(o);
160 t.mean = "mean(x-y)="+R.str(R.mean(R.sub(o.x, o.y)));
161 t.sd = "sd(x-y)="+R.str(R.sd(R.sub(o.x, o.y)));
162 } else {
163 o.name = "ttest(x,y,mu="+o.mu+",varequal=false), Welch t-test";
164 o.D = t2varneq;
165 t = T.test(o);
166 }
167 if (typeof t.mean === "undefined") {
168 t.mean = "mean(x)="+R.str(R.mean(o.x))+" mean(y)="+R.str(R.mean(o.y));
169 t.sd = "sd(x)="+R.str(R.sd(o.x))+" sd(y)="+R.str(R.sd(o.y));
170 }
171 }
172 return t;
173}
174
175var f2 = { // 檢定 σ1/σ2 = 1?
176 h:function(o) { return "H0:σ1/σ2"+o.op+"1"; },
177 // F = S1^2/S2^2
178 o2q:function(o) {
179 var x = o.x, n1 = x.length, y=o.y, n2=y.length;
180 var S1 = R.sd(x), S2=R.sd(y);
181 var f = (S1*S1)/(S2*S2);
182 var pf = R.pf(f, n1-1, n2-1);
183 return pf;
184 },
185 // 信賴區間公式: S1^2/(S2^2*F(1-α/2), S1^2/(S2^2*F(α/2))
186 // 也就是要用 S1^2/(S2^2*f(1-q)) ,參考 R 軟體、應用統計方法 (陳景祥) 389 頁。
187 q2p:function(q, o) {
188 var x = o.x, n1 = x.length, y=o.y, n2=y.length;
189 var S1 = R.sd(x), S2=R.sd(y);
190 var qf = R.qf(1-q, n1-1, n2-1);
191 return (S1*S1)/(S2*S2*qf);
192 },
193 df:function(o) {
194 var x = o.x, n1 = x.length, y=o.y, n2=y.length;
195 return [n1-1, n2-1];
196 }
197}
198
199T.ftest = function(o) {
200 o.name = "ftest(X, Y)";
201 o.D = f2;
202 var t = T.test(o);
203 var rsd = R.sd(o.x)/R.sd(o.y);
204 t.ratio = (rsd*rsd);
205 return t;
206}
207
208// R 軟體沒有此函數,測試請看湯銀才 143 頁
209var chisq1 = { // 檢定 σ1 = σ ?
210 h:function(o) { return "H0:σ1"+o.op+"σ"; },
211 // χ(n-1) = (n-1)S^2/σ^2
212 o2q:function(o) {
213 var x = o.x, n = x.length, S=R.sd(x);
214 var v = (n-1)*S*S/(o.sd*o.sd);
215 return R.pchisq(v, n-1);
216 },
217 // 信賴區間公式: (n-1)S^2/χ^2(1-q),參考 R 語言與統計分析 (湯銀才) 142 頁。
218 q2p:function(q, o) {
219 var x = o.x, n = x.length, S=R.sd(x);
220 return (n-1)*S*S/R.qchisq(1-q, n-1);
221 },
222 df:function(o) {
223 var x = o.x, n = x.length;
224 return n-1;
225 }
226}
227
228T.chisqtest = function(o) {
229 o.name = "chisqtest(X)";
230 o.D = chisq1;
231 return T.test(o);
232}
233
234T.vartest = function(o) {
235 if (typeof o.y === "undefined")
236 return R.chisqtest(o);
237 else
238 return R.ftest(o);
239}
240
241var z1 = { // 單樣本 Z 檢定
242 h:function(o) { return "H0:mu"+o.op+o.mu+" when sd="+o.sd; },
243 o2q:function(o) {
244 var x = o.x, n = x.length;
245 var z = (R.mean(x)-o.mu)/(o.sd/Math.sqrt(n)); // z=(X-mu)/(sd/sqrt(n))
246 return R.pnorm(z, 0, 1);
247 },
248 q2p:function(q, o) {
249 var x = o.x, n = x.length;
250 return R.mean(x) + R.qnorm(q, 0, 1) * R.sd(x) / Math.sqrt(n);
251 },
252 df:function(o) { return o.x.length; }
253}
254
255T.ztest = function(o) {
256 o.name = "ztest(X)";
257 o.D = z1;
258 return T.test(o);
259}
260
261var zprop1 = { // 比例的檢定, n 較大時的近似解 o={ x, n, p } // x 為數值,n 個中出現 x 個 1
262 h:function(o) { return "H0:p"+o.op+o.p; },
263 // Z = (p1-p)/sqrt(p(1-p)/n)
264 o2q:function(o) {
265 var x=o.x, n=o.n, p1=x/n, p=o.p||p1;
266 var z = (p1-p)/Math.sqrt(p*(1-p)/n);
267 return R.pnorm(z, 0, 1);
268 },
269 // 信賴區間公式: p1+z*sqrt(p1*(1-p1)/n),參考 R 語言與統計分析 (湯銀才) 149 頁。
270 q2p:function(q, o) {
271 var x=o.x, n=o.n, p1=x/n, p=p1;
272 var z = R.qnorm(q, 0, 1);
273 var z22n = z*z/(2*n);
274 return (p1+z22n+z*Math.sqrt( p*(1-p)/n + z22n/(2*n) ))/(1+2*z22n); // R 的版本,比較複雜的估計公式
275// return p1+z*Math.sqrt(p*(1-p)/n); // 語言與統計分析 (湯銀才) 149 頁的版本。
276 },
277 df:function(o) { return 1; }
278}
279
280var zprop2 = { // 比例的檢定, n 較大時的近似解 o={ x, y, n1, n2 }
281 h:function(o) { return "H0:p1-p2"+o.op+o.p; },
282 // Z = (p1-p2)/sqrt(p*(1-p)*(1/n1+1/n2)), p = (n1p1+n2p2)/(n1+n2),參考 R 語言與統計分析 (湯銀才) 175 頁。
283 o2q:function(o) {
284 var x=o.x, y=o.y, n1=o.n1, n2=o.n2, p1=x/n1, p2=y/n2, p=(n1*p1+n2*p2)/(n1+n2);
285 var z = (p1-p2)/Math.sqrt(p*(1-p)*(1/n1+1/n2));
286 return R.pnorm(z, 0, 1);
287 },
288 // 信賴區間公式: p1-p2+z*sqrt(p*(1-p)*(1/n1+1/n2));
289 q2p:function(q, o) {
290 var x=o.x, y=o.y, n1=o.n1, n2=o.n2, p1=x/n1, p2=y/n2, p=(n1*p1+n2*p2)/(n1+n2);
291 var z = R.qnorm(q, 0, 1);
292 return p1-p2+z*Math.sqrt(p*(1-p)*(1/n1+1/n2));
293 },
294 df:function(o) { return 1; }
295}
296
297/* 在 prop.test.R 當中,雙邊檢定的 pvalue 是用 pchisq, 單邊才是用 z ,為何呢? ( 但是信賴區間則是全部用 z)
298 if (alternative == "two.sided")
299 PVAL <- pchisq(STATISTIC, PARAMETER, lower.tail = FALSE)
300 else {
301 if (k == 1)
302 z <- sign(ESTIMATE - p) * sqrt(STATISTIC)
303 else
304 z <- sign(DELTA) * sqrt(STATISTIC)
305 PVAL <- pnorm(z, lower.tail = (alternative == "less"))
306 }
307*/
308
309T.proptest = function(o) {
310 o.p = o.p || 0.5;
311 o.name = "proptest("+R.str(o)+")";
312 o.correct = o.correct|| false;
313 if (o.correct) {
314 o.name += ", binomtest";
315 o.D += binom1;
316 } else {
317 if (typeof o.y === "undefined") {
318 o.name += ", zprop1";
319 o.D = zprop1;
320 } else {
321 o.p = 0; // p1-p2 = 0
322 o.name += ", zprop2";
323 o.D = zprop2;
324 }
325 }
326 var t=T.test(o);
327 if (typeof o.y === "undefined")
328 t.p = o.x/o.n;
329 else
330 t.p = [o.x/o.n1, o.y/o.n2];
331 return t;
332}
333
334// 參考: https://github.com/SurajGupta/r-source/blob/master/src/library/stats/R/binom.test.R
335var binom1 = { // 比例的檢定, n 較大時的近似解 o={ x, n, p } // x 為數值,n 個中出現 x 個 1
336 h:function(o) { return "H0:p"+o.op+o.p; },
337
338 // Z = C(n, k)*p1^k*(1-p1)^(n-k), CDF(z: from 1 to x)
339 o2q:function(o) {
340 var x=o.x, n=o.n, p = o.p, q;
341 var dx = R.dbinom(x, n, p);
342 if (o.op === "=") { // 雙尾檢定,去雙尾後 / 2
343 var q = 0;
344 for (var i=0; i<=n; i++) {
345 var di = R.dbinom(i, n, p);
346 if (di > dx+1e-5) q += di; // 為何 x 本身不算,如果算應該用 di > dx-1e-5 才對。
347 }
348 q=1-((1-q)/2); // 因為 test 會 * 2 所進行的減半調整
349 } else { // 單尾檢定
350 if (Math.abs(x - n*p)<1e-5) // 正確預測, q=1
351 q = 1;
352 else {
353 if (o.op === ">")
354 q = R.pbinom(x, n, p); // 去右尾
355 else // op=== "<"
356 q = R.pbinom(x-1, n, p); // 去右尾
357 }
358 }
359 return q;
360 },
361/* 注意上述 R 原始碼另一尾的計算方式,是用 < pbinom(最接近 x 者) 的算法,而不是直接 * 2。 問題是我們在 test 中是直接用*2 的方式。
362 d <- dbinom(x, n, p)
363 ...
364 else if (x < m) {
365 i <- seq.int(from = ceiling(m), to = n)
366 y <- sum(dbinom(i, n, p) <= d * relErr)
367 pbinom(x, n, p) 左尾 + pbinom(n - y, n, p, lower.tail = FALSE) 右尾
368 } else {
369 i <- seq.int(from = 0, to = floor(m))
370 y <- sum(dbinom(i, n, p) <= d * relErr)
371 pbinom(y - 1, n, p) 左尾 + pbinom(x - 1, n, p, lower.tail = FALSE) 右尾
372 }
373*/
374 // 信賴區間公式: P(T>c) = Σ (n, i) C(n, i) p1^i (1-p1)^(n-i) for i>c < q
375
376 q2p:function(q, o, side) {
377 var x=o.x, n=o.n, p=o.p, op=o.op;
378 if (side === "L")
379 return R.qbeta(q, x, n - x + 1); // 這裏採用 qbeta 是 R 的作法; 直覺上應該採用 R.qbinom(q, n, p);
380 else
381 return R.qbeta(q, x + 1, n - x);
382 },
383 df:function(o) { return 1; }
384}
385
386T.binomtest = function(o) {
387 o.p = o.p || 0.5;
388 o.name = "binomtest("+R.str(o)+")";
389 o.D = binom1;
390 var t=T.test(o);
391 t.p = o.x/o.n;
392 t.ci[0]=(o.op === ">")?0:t.ci[0];
393 t.ci[1]=(o.op === "<")?1:t.ci[1];
394 return t;
395}
396
397// anova f-test : array1, array2, array3, ...
398T.anovaftest = function() {
399 return {
400 h0 : "σ1=σ2=...=σ"+arguments.length,
401 pvalue: J.anovaftest(),
402 score: J.anovafscore(),
403 };
404}
405
406module.exports = T;
407
408// ---------------- 統計與檢定 ----------------------
409/*
410function opAlt(op) {
411 if (op === "=") return "!=";
412 if (op === "<") return ">=";
413 if (op === ">") return "<=";
414 return null;
415}
416*/