UNPKG

21.6 kBJavaScriptView Raw
1var R=require("lodash");
2var J=require("jStat").jStat;
3
4var repeats=function(f, x, n) {
5 var a=[];
6 for (var i=0; i<n; i++) {
7 a.push(f(x));
8 }
9 return R(a);
10}
11
12R.mixin({repeats:repeats});
13
14var calls=function() {
15 var args = Array.prototype.slice.call(arguments);
16 var n = args[0];
17 var f = args[1];
18 var params = args.slice(2, args.length);
19 var a=[];
20 for (var i=0; i<n; i++)
21 a.push(f.apply(null, params));
22 return R(a);
23}
24
25R.mixin({calls:calls});
26
27var samples=function(space, size, arg) {
28 var arg = R.defaults(arg, {replace:true});
29 if (arg.replace)
30 return R.chain(R.sampleSize(space, size));
31 else
32 return R.repeats(R.sample, space, size);
33}
34
35R.mixin({samples:samples});
36
37R.steps=function(start, end, step) {
38 step = step || 1;
39 return R.range(start,end+0.00001*step, step);
40}
41
42// ============== format ==================
43R.precision = 2;
44// R.largeArray = 50;
45
46R.num2str=function(x) {
47 if (isNaN(x))
48 return "NaN";
49 else if (Math.floor(x) === x) // 整數
50 return x.toString();
51 else if (isFinite(x)) // 有小數點
52 return R.ctrim(x.toFixed(R.precision), "0", "right");
53 else if (x < 0)
54 return "-Infinity";
55 else
56 return "Infinity";
57}
58
59R.ctrim=function(s, set, side) {
60 side = side||"both";
61 for (var b=0; b<s.length; b++)
62 if (set.indexOf(s[b])<0) break;
63 for (var e=s.length-1; e>=0; e--)
64 if (set.indexOf(s[e])<0) break;
65 if (side === "right") b=0;
66 if (side === "left") e=s.length-1;
67 return s.substring(b, e+1);
68}
69
70R.array2str=function(x) {
71 var s = "";
72 for (var i in x)
73 s+= R.str(x[i])+", ";
74 return "["+R.ctrim(s, ", ")+"]";
75}
76
77R.obj2str=function(x, sp) {
78 var s = "";
79 for (var k in x)
80 s+= k+":"+R.str(x[k])+sp;
81 return "{"+R.ctrim(s,sp)+"}";
82}
83
84var str = function(x) {
85 if (typeof x === "number") return R.num2str(x);
86 else if(typeof x === "string") return '\"'+x.toString()+'\"';
87 else if (x instanceof Array) return R.array2str(x);
88 else if (typeof x === "object") return R.obj2str(x, ",");
89 else return x.toString();
90}
91
92R.mixin({str:str}, {chain:false});
93
94// ============== /format ==================
95
96var sd = function(a, flag) {
97 flag = flag || 1;
98 return J.stdev(a, flag);
99}
100
101R.mixin({sd:sd}, {chain:false});
102
103// 協方差
104R.cov = function(x, y) { return J.stdev(x, y); }
105// 相關係數
106R.cor = function(x, y) { return J.corrcoeff(x, y); }
107// 階層 n!
108R.factorial = function(n) { return J.factorial(n); }
109// log(n!)
110R.lfactorial = function(n) { return J.factorialln(n); }
111// 組合 C(n,m)
112R.choose = function(n,m) { return J.combination(n, m); }
113// log C(n,m)
114R.lchoose = function(n,m) { return J.combinationln(n, m); }
115// 組合 C(n,m)
116R.permutation = function(n,m) { return J.permutation(n, m); }
117// log C(n,m)
118R.lchoose = function(n,m) { return J.combinationln(n, m); }
119
120// 均等分布
121R.runif=function(n, a, b) { return R.calls(n, J.uniform.sample, a, b); }
122R.dunif=function(x, a, b) { return J.uniform.pdf(x, a, b); }
123R.punif=function(q, a, b) { return J.uniform.cdf(q, a, b); }
124R.qunif=function(p, a, b) { return J.uniform.inv(p, a, b); }
125// 常態分布
126R.rnorm=function(n, mean, sd) { return R.calls(n, J.normal.sample, mean, sd); }
127R.dnorm=function(x, mean, sd) { return J.normal.pdf(x, mean, sd); }
128R.pnorm=function(q, mean, sd) { return J.normal.cdf(q, mean, sd); }
129R.qnorm=function(p, mean, sd) { return J.normal.inv(p, mean, sd); }
130// 布瓦松分布
131R.rpois=function(n, l) { return R.calls(n, J.poisson.sample, l); }
132R.dpois=function(x, l) { return J.poisson.pdf(x, l); }
133R.ppois=function(q, l) { return J.poisson.cdf(q, l); }
134R.qpois=function(p, l) { return J.poisson.inv(p, l); }
135// F 分布
136R.rf=function(n, df1, df2) { return R.calls(n, J.centralF.sample, df1, df2); }
137R.df=function(x, df1, df2) { return J.centralF.pdf(x, df1, df2); }
138R.pf=function(q, df1, df2) { return J.centralF.cdf(q, df1, df2); }
139R.qf=function(p, df1, df2) { return J.centralF.inv(p, df1, df2); }
140// T 分布
141R.rt=function(n, dof) { return R.calls(n, J.studentt.sample, dof); }
142R.dt=function(x, dof) { return J.studentt.pdf(x, dof); }
143R.pt=function(q, dof) { return J.studentt.cdf(q, dof); }
144R.qt=function(p, dof) { return J.studentt.inv(p, dof); }
145// Beta 分布
146R.rbeta=function(n, alpha, beta) { return R.calls(n, J.beta.sample, alpha, beta); }
147R.dbeta=function(x, alpha, beta) { return J.beta.pdf(x, alpha, beta); }
148R.pbeta=function(q, alpha, beta) { return J.beta.cdf(q, alpha, beta); }
149R.qbeta=function(p, alpha, beta) { return J.beta.inv(p, alpha, beta); }
150// 柯西分布
151R.rcauchy=function(n, local, scale) { return R.calls(n, J.cauchy.sample, local, scale); }
152R.dcauchy=function(x, local, scale) { return J.cauchy.pdf(x, local, scale); }
153R.pcauchy=function(q, local, scale) { return J.cauchy.cdf(q, local, scale); }
154R.qcauchy=function(p, local, scale) { return J.cauchy.inv(p, local, scale); }
155// chisquare 分布
156R.rchisq=function(n, dof) { return R.calls(n, J.chisquare.sample, dof); }
157R.dchisq=function(x, dof) { return J.chisquare.pdf(x, dof); }
158R.pchisq=function(q, dof) { return J.chisquare.cdf(q, dof); }
159R.qchisq=function(p, dof) { return J.chisquare.inv(p, dof); }
160// 指數分布
161R.rexp=function(n, rate) { return R.calls(n, J.exponential.sample, rate); }
162R.dexp=function(x, rate) { return J.exponential.pdf(x, rate); }
163R.pexp=function(q, rate) { return J.exponential.cdf(q, rate); }
164R.qexp=function(p, rate) { return J.exponential.inv(p, rate); }
165// Gamma 分布
166R.rgamma=function(n, shape, scale) { return R.calls(n, J.gamma.sample, shape, scale); }
167R.dgamma=function(x, shape, scale) { return J.gamma.pdf(x, shape, scale); }
168R.pgamma=function(q, shape, scale) { return J.gamma.cdf(q, shape, scale); }
169R.qgamma=function(p, shape, scale) { return J.gamma.inv(p, shape, scale); }
170// 反 Gamma 分布
171R.rinvgamma=function(n, shape, scale) { return R.calls(n, J.invgamma.sample, shape, scale); }
172R.dinvgamma=function(x, shape, scale) { return J.invgamma.pdf(x, shape, scale); }
173R.pinvgamma=function(q, shape, scale) { return J.invgamma.cdf(q, shape, scale); }
174R.qinvgamma=function(p, shape, scale) { return J.invgamma.inv(p, shape, scale); }
175// 對數常態分布
176R.rlognormal=function(n, mu, sigma) { return R.calls(n, J.lognormal.sample, mu, sigma); }
177R.dlognormal=function(x, mu, sigma) { return J.lognormal.pdf(x, mu, sigma); }
178R.plognormal=function(q, mu, sigma) { return J.lognormal.cdf(q, mu, sigma); }
179R.qlognormal=function(p, mu, sigma) { return J.lognormal.inv(p, mu, sigma); }
180// Pareto 分布
181R.rpareto=function(n, scale, shape) { return R.calls(n, J.pareto.sample, scale, shape); }
182R.dpareto=function(x, scale, shape) { return J.pareto.pdf(x, scale, shape); }
183R.ppareto=function(q, scale, shape) { return J.pareto.cdf(q, scale, shape); }
184R.qpareto=function(p, scale, shape) { return J.pareto.inv(p, scale, shape); }
185// Weibull 分布
186R.rweibull=function(n, scale, shape) { return R.calls(n, J.weibull.sample, scale, shape); }
187R.dweibull=function(x, scale, shape) { return J.weibull.pdf(x, scale, shape); }
188R.pweibull=function(q, scale, shape) { return J.weibull.cdf(q, scale, shape); }
189R.qweibull=function(p, scale, shape) { return J.weibull.inv(p, scale, shape); }
190// 三角分布
191R.rtriangular=function(n, a, b, c) { return R.calls(n, J.triangular.sample, a, b, c); }
192R.dtriangular=function(x, a, b, c) { return J.triangular.pdf(x, a, b, c); }
193R.ptriangular=function(q, a, b, c) { return J.triangular.cdf(q, a, b, c); }
194R.qtriangular=function(p, a, b, c) { return J.triangular.inv(p, a, b, c); }
195// 類似 Beta 分布,但計算更簡單
196R.rkumaraswamy=function(n, alpha, beta) { return R.calls(n, J.kumaraswamy.sample, alpha, beta); }
197R.dkumaraswamy=function(x, alpha, beta) { return J.kumaraswamy.pdf(x, alpha, beta); }
198R.pkumaraswamy=function(q, alpha, beta) { return J.kumaraswamy.cdf(q, alpha, beta); }
199R.qkumaraswamy=function(p, alpha, beta) { return J.kumaraswamy.inv(p, alpha, beta); }
200
201// ============= 離散分佈 ============
202var qf=function(cdf, q, N, p) {
203 for (var i=0; i<=N; i++) {
204 if (cdf(i, N, p) > q) return i;
205 }
206 return N;
207}
208var rf=function(cdf, n, N, p) {
209 var samples = [];
210 for (var i=0; i<n; i++) {
211 var q = Math.random();
212 samples.push(cdf(q, N, p));
213 }
214 return samples;
215}
216// 二項分布
217R.dbinom=function(x, N, p) { return J.binomial.pdf(x, N, p); }
218R.pbinom=function(k, N, p) { return J.binomial.cdf(k, N, p); }
219R.qbinom=function(q, N, p) { return qf(R.pbinom, q, N, p); }
220R.rbinom=function(n, N, p) { return rf(R.qbinom, n, N, p); }
221// 負二項分布
222R.dnbinom=function(x, N, p) { return jStat.negbin.pdf(x, N, p); }
223R.pnbinom=function(k, N, p) { return jStat.negbin.cdf(k, N, p); }
224R.qnbinom=function(q, N, p) { return qf(R.pnbinom, q, N, p); }
225R.rnbinom=function(n, N, p) { return rf(R.qnbinom, n, N, p); }
226// 超幾何分布
227R.dhyper=function(x, N, m, n) { return jStat.hypgeom.pdf(x, N, m, n); }
228R.phyper=function(k, N, m, n) { return jStat.hypgeom.cdf(k, N, m, n); }
229R.qhyper=function(q, N, m, n) { return qf(R.phyper, q, N, p); }
230R.rhyper=function(n, N, m, k) { return rf(R.qhyper, n, N, p); }
231
232// =============== 檢定 ==============================
233
234// ---------------- 統計與檢定 ----------------------
235function opAlt(op) {
236 if (op === "=") return "!=";
237 if (op === "<") return ">=";
238 if (op === ">") return "<=";
239 return null;
240}
241
242R.test = function(o) { // name, D, x, mu, sd, y, alpha, op
243 o = R.defaults(o, {alpha:0.05, op:"="});
244 var alpha = o.alpha;
245 var pvalue, interval;
246 var D = o.D;
247 var q1 = D.o2q(o); // 單尾檢定的 pvalue
248
249 if (o.op === "=") {
250 if (q1>0.5) q1 = 1-q1; // (q1>0.5) 取右尾,否則取左尾。
251 pvalue= 2*q1; // 對稱情況:雙尾檢定的 p 值是單尾的兩倍。
252 interval = [D.q2p(alpha/2, o, "L"), D.q2p(1-alpha/2, o, "R")];
253 } else {
254 if (o.op === "<") { // 右尾檢定 H0: q < 1-alpha,
255 interval = [ D.q2p(alpha, o, "L"), Infinity ];
256 pvalue = 1-q1;
257 }
258 if (o.op === ">") { // 左尾檢定 H0: q > alpha
259 interval=[-Infinity, D.q2p(1-alpha, o, "R")];
260 pvalue = q1;
261 }
262 }
263 return {
264 name: o.name,
265 h: D.h(o),
266 alpha: alpha,
267 op: o.op,
268 pvalue: pvalue,
269 ci : interval,
270 df : D.df(o),
271 report: function() { R.report(this) }
272 };
273}
274
275var t1 = { // 單樣本 T 檢定 t = (X-mu)/(S/sqrt(n))
276 h:function(o) { return "H0:mu"+o.op+o.mu; },
277 o2q:function(o) {
278 var x = o.x, n = x.length;
279 var t = (R.mean(x)-o.mu)/(R.sd(x)/Math.sqrt(n));
280 return R.pt(t, n-1);
281 },
282 // P(X-mu/(S/sqrt(n))<t) = q ; 信賴區間 P(T<q)
283 // P(mu > X-t*S/sqrt(n)) = q ; 這反而成了右尾檢定,所以左尾與右尾確實會反過來
284 q2p:function(q, o) {
285 var x = o.x, n = x.length;
286 return R.mean(x) + R.qt(q, n-1) * R.sd(x) / Math.sqrt(n);
287 },
288 df:function(o) { return o.x.length-1; }
289}
290
291var t2vareq = { // σ1=σ2, 合併 T 檢定 (雙樣本)
292 h:function(o) { return "H0:mu1"+o.op+"mu2" },
293 // S^2 = (n1-1)*S1^2+(n2-1)*S2^2)/(n1-1+n2-1)
294 sd:function(o) {
295 var x = o.x, n1 = x.length, y=o.y, n2=y.length;
296 var S1= R.sd(x), S2 = R.sd(y);
297 var S = Math.sqrt(((n1-1)*S1*S1+(n2-1)*S2*S2)/(n1-1+n2-1));
298 return S;
299 },
300 // T = ((X-Y)-(mu1-mu2))/(sqrt(1/n1+1/n2)*S)
301 o2q:function(o) {
302 var x = o.x, n1 = x.length, y=o.y, n2=y.length;
303 var S = this.sd(o);
304 var t = (R.mean(x)-R.mean(y)-o.mu)/(Math.sqrt(1/n1+1/n2)*S);
305 return R.pt(t, n1+n2-2);
306 },
307 // t=((X-Y)-mu)/(sqrt(1/n1+1/n2)*S), (X-Y)-t*sqrt(1/n1+1/n2)*S = mu
308 q2p:function(q, o) {
309 var x = o.x, n1 = x.length, y=o.y, n2=y.length;
310 var S = this.sd(o);
311 return R.mean(x)-R.mean(y)+ R.qt(q, n1+n2-2)*Math.sqrt(1/n1+1/n2)*S;
312 },
313 df:function(o) {
314 var x = o.x, n1 = x.length, y=o.y, n2=y.length;
315 return n1+n2-2;
316 }
317}
318
319var t2paired = { // 成對 T 檢定 T = (X-Y-mu)/(S/sqrt(n)) (雙樣本)
320 h:function(o) { return "H0:mu1"+o.op+"mu2" },
321 sd:function(o) { // S = sd(x-y)
322 var x = o.x, n = x.length, y=o.y;
323 var S= R.sd(R.sub(x,y));
324 return S;
325 },
326 o2q:function(o) {
327 var x = o.x, n = x.length, y=o.y;
328 var S = this.sd(o);
329 var t = (R.mean(R.sub(x,y))-o.mu)/(S/Math.sqrt(n));
330 return R.pt(t, n-1);
331 },
332 // mean(x-y)+t*S/sqrt(n)
333 q2p:function(q, o) {
334 var x = o.x, n = x.length, y=o.y;
335 var S = this.sd(o);
336 return R.mean(R.sub(x,y))+ R.qt(q, n-1)*S/Math.sqrt(n);
337 },
338 df:function(o) {
339 return o.x.length-1;
340 }
341}
342
343var t2varneq = { // σ1≠σ2, Welch's t test (雙樣本) (又稱 Smith-Satterwaite 程序)
344// 參考:http://en.wikipedia.org/wiki/Welch's_t_test
345 h:function(o) { return "H0:mu1"+o.op+"mu2" },
346 // T = ((X-Y)-(mu1-mu2))/sqrt(S1^2/n1+S2^2/n2)
347 o2q:function(o) {
348 var x = o.x, n1 = x.length, y=o.y, n2=y.length;
349 var S1 = R.sd(x), S2=R.sd(y);
350 var t = (R.mean(x)-R.mean(y)-o.mu)/Math.sqrt(S1*S1/n1+S2*S2/n2);
351 return R.pt(t, this.df(o));
352 },
353 // t=((X-Y)-mu)/sqrt(S1^2/n1+S2^2/n2), (X-Y)-t*sqrt(S1^2/n1+S2^2/n2) = mu
354 q2p:function(q, o) {
355 var x = o.x, n1 = x.length, y=o.y, n2=y.length;
356 var S1 = R.sd(x), S2=R.sd(y);
357 return R.mean(x)-R.mean(y)+ R.qt(q, this.df(o))*Math.sqrt(S1*S1/n1+S2*S2/n2);
358 },
359 df:function(o) {
360 var x = o.x, n1 = x.length, y=o.y, n2=y.length;
361 var S1 = R.sd(x), S2=R.sd(y);
362 var Sn1 = S1*S1/n1, Sn2 = S2*S2/n2, Sn12 = Sn1+Sn2;
363 var df = (Sn12*Sn12)/((Sn1*Sn1)/(n1-1)+(Sn2*Sn2)/(n2-1));
364 return df;
365 }
366}
367
368R.ttest = function(o) {
369 var t;
370 if (typeof o.y === "undefined") {
371 o.name = "ttest(X)";
372 o.D = t1;
373 t = R.test(o);
374 t.mean = R.mean(o.x);
375 t.sd = R.sd(o.x);
376 } else {
377 var varequal = R.opt(o, "varequal", false);
378 var paired = R.opt(o, "paired", false);
379 if (varequal) {
380 o.name = "ttest(X,Y,mu="+o.mu+",varequal=true) (pooled)";
381 o.D = t2vareq;
382 t = R.test(o);
383 } else if (paired) {
384 o.name = "ttest(x,y,mu="+o.mu+",paired=true)";
385 o.D = t2paired;
386 t = R.test(o);
387 t.mean = "mean(x-y)="+R.str(R.mean(R.sub(o.x, o.y)));
388 t.sd = "sd(x-y)="+R.str(R.sd(R.sub(o.x, o.y)));
389 } else {
390 o.name = "ttest(x,y,mu="+o.mu+",varequal=false), Welch t-test";
391 o.D = t2varneq;
392 t = R.test(o);
393 }
394 if (typeof t.mean === "undefined") {
395 t.mean = "mean(x)="+R.str(R.mean(o.x))+" mean(y)="+R.str(R.mean(o.y));
396 t.sd = "sd(x)="+R.str(R.sd(o.x))+" sd(y)="+R.str(R.sd(o.y));
397 }
398 }
399 return t;
400}
401
402var f2 = { // 檢定 σ1/σ2 = 1?
403 h:function(o) { return "H0:σ1/σ2"+o.op+"1"; },
404 // F = S1^2/S2^2
405 o2q:function(o) {
406 var x = o.x, n1 = x.length, y=o.y, n2=y.length;
407 var S1 = R.sd(x), S2=R.sd(y);
408 var f = (S1*S1)/(S2*S2);
409 var pf = R.pf(f, n1-1, n2-1);
410 return pf;
411 },
412 // 信賴區間公式: S1^2/(S2^2*F(1-α/2), S1^2/(S2^2*F(α/2))
413 // 也就是要用 S1^2/(S2^2*f(1-q)) ,參考 R 軟體、應用統計方法 (陳景祥) 389 頁。
414 q2p:function(q, o) {
415 var x = o.x, n1 = x.length, y=o.y, n2=y.length;
416 var S1 = R.sd(x), S2=R.sd(y);
417 var qf = R.qf(1-q, n1-1, n2-1);
418 return (S1*S1)/(S2*S2*qf);
419 },
420 df:function(o) {
421 var x = o.x, n1 = x.length, y=o.y, n2=y.length;
422 return [n1-1, n2-1];
423 }
424}
425
426R.ftest = function(o) {
427 o.name = "ftest(X, Y)";
428 o.D = f2;
429 var t = R.test(o);
430 var rsd = R.sd(o.x)/R.sd(o.y);
431 t.ratio = (rsd*rsd);
432 return t;
433}
434
435// R 軟體沒有此函數,測試請看湯銀才 143 頁
436var chisq1 = { // 檢定 σ1 = σ ?
437 h:function(o) { return "H0:σ1"+o.op+"σ"; },
438 // χ(n-1) = (n-1)S^2/σ^2
439 o2q:function(o) {
440 var x = o.x, n = x.length, S=R.sd(x);
441 var v = (n-1)*S*S/(o.sd*o.sd);
442 return R.pchisq(v, n-1);
443 },
444 // 信賴區間公式: (n-1)S^2/χ^2(1-q),參考 R 語言與統計分析 (湯銀才) 142 頁。
445 q2p:function(q, o) {
446 var x = o.x, n = x.length, S=R.sd(x);
447 return (n-1)*S*S/R.qchisq(1-q, n-1);
448 },
449 df:function(o) {
450 var x = o.x, n = x.length;
451 return n-1;
452 }
453}
454
455R.chisqtest = function(o) {
456 o.name = "chisqtest(X)";
457 o.D = chisq1;
458 return R.test(o);
459}
460
461R.vartest = function(o) {
462 if (typeof o.y === "undefined")
463 return R.chisqtest(o);
464 else
465 return R.ftest(o);
466}
467
468var z1 = { // 單樣本 Z 檢定
469 h:function(o) { return "H0:mu"+o.op+o.mu+" when sd="+o.sd; },
470 o2q:function(o) {
471 var x = o.x, n = x.length;
472 var z = (R.mean(x)-o.mu)/(o.sd/Math.sqrt(n)); // z=(X-mu)/(sd/sqrt(n))
473 return R.pnorm(z, 0, 1);
474 },
475 q2p:function(q, o) {
476 var x = o.x, n = x.length;
477 return R.mean(x) + R.qnorm(q, 0, 1) * R.sd(x) / Math.sqrt(n);
478 },
479 df:function(o) { return o.x.length; }
480}
481
482R.ztest = function(o) {
483 o.name = "ztest(X)";
484 o.D = z1;
485 return R.test(o);
486}
487
488var zprop1 = { // 比例的檢定, n 較大時的近似解 o={ x, n, p } // x 為數值,n 個中出現 x 個 1
489 h:function(o) { return "H0:p"+o.op+o.p; },
490 // Z = (p1-p)/sqrt(p(1-p)/n)
491 o2q:function(o) {
492 var x=o.x, n=o.n, p1=x/n, p=R.def(o.p, p1);
493 var z = (p1-p)/Math.sqrt(p*(1-p)/n);
494 return R.pnorm(z, 0, 1);
495 },
496 // 信賴區間公式: p1+z*sqrt(p1*(1-p1)/n),參考 R 語言與統計分析 (湯銀才) 149 頁。
497 q2p:function(q, o) {
498 var x=o.x, n=o.n, p1=x/n, p=p1;
499 var z = R.qnorm(q, 0, 1);
500 var z22n = z*z/(2*n);
501 return (p1+z22n+z*Math.sqrt( p*(1-p)/n + z22n/(2*n) ))/(1+2*z22n); // R 的版本,比較複雜的估計公式
502// return p1+z*Math.sqrt(p*(1-p)/n); // 語言與統計分析 (湯銀才) 149 頁的版本。
503 },
504 df:function(o) { return 1; }
505}
506
507var zprop2 = { // 比例的檢定, n 較大時的近似解 o={ x, y, n1, n2 }
508 h:function(o) { return "H0:p1-p2"+o.op+o.p; },
509 // Z = (p1-p2)/sqrt(p*(1-p)*(1/n1+1/n2)), p = (n1p1+n2p2)/(n1+n2),參考 R 語言與統計分析 (湯銀才) 175 頁。
510 o2q:function(o) {
511 var x=o.x, y=o.y, n1=o.n1, n2=o.n2, p1=x/n1, p2=y/n2, p=(n1*p1+n2*p2)/(n1+n2);
512 var z = (p1-p2)/Math.sqrt(p*(1-p)*(1/n1+1/n2));
513 return R.pnorm(z, 0, 1);
514 },
515 // 信賴區間公式: p1-p2+z*sqrt(p*(1-p)*(1/n1+1/n2));
516 q2p:function(q, o) {
517 var x=o.x, y=o.y, n1=o.n1, n2=o.n2, p1=x/n1, p2=y/n2, p=(n1*p1+n2*p2)/(n1+n2);
518 var z = R.qnorm(q, 0, 1);
519 return p1-p2+z*Math.sqrt(p*(1-p)*(1/n1+1/n2));
520 },
521 df:function(o) { return 1; }
522}
523
524/* 在 prop.test.R 當中,雙邊檢定的 pvalue 是用 pchisq, 單邊才是用 z ,為何呢? ( 但是信賴區間則是全部用 z)
525 if (alternative == "two.sided")
526 PVAL <- pchisq(STATISTIC, PARAMETER, lower.tail = FALSE)
527 else {
528 if (k == 1)
529 z <- sign(ESTIMATE - p) * sqrt(STATISTIC)
530 else
531 z <- sign(DELTA) * sqrt(STATISTIC)
532 PVAL <- pnorm(z, lower.tail = (alternative == "less"))
533 }
534*/
535
536R.proptest = function(o) {
537 o.p = R.opt(o, "p", 0.5);
538 o.name = "proptest("+R.str(o)+")";
539 o.correct = R.opt(o.correct, false);
540 if (o.correct) {
541 o.name += ", binomtest";
542 o.D += binom1;
543 } else {
544 if (typeof o.y === "undefined") {
545 o.name += ", zprop1";
546 o.D = zprop1;
547 } else {
548 o.p = 0; // p1-p2 = 0
549 o.name += ", zprop2";
550 o.D = zprop2;
551 }
552 }
553 var t=R.test(o);
554 if (typeof o.y === "undefined")
555 t.p = o.x/o.n;
556 else
557 t.p = [o.x/o.n1, o.y/o.n2];
558 return t;
559}
560
561// 參考: https://github.com/SurajGupta/r-source/blob/master/src/library/stats/R/binom.test.R
562var binom1 = { // 比例的檢定, n 較大時的近似解 o={ x, n, p } // x 為數值,n 個中出現 x 個 1
563 h:function(o) { return "H0:p"+o.op+o.p; },
564
565 // Z = C(n, k)*p1^k*(1-p1)^(n-k), CDF(z: from 1 to x)
566 o2q:function(o) {
567 var x=o.x, n=o.n, p = o.p, q;
568 var dx = R.dbinom(x, n, p);
569 if (o.op === "=") { // 雙尾檢定,去雙尾後 / 2
570 var q = 0;
571 for (var i=0; i<=n; i++) {
572 var di = R.dbinom(i, n, p);
573 if (di > dx+1e-5) q += di; // 為何 x 本身不算,如果算應該用 di > dx-1e-5 才對。
574 }
575 q=1-((1-q)/2); // 因為 test 會 * 2 所進行的減半調整
576 } else { // 單尾檢定
577 if (Math.abs(x - n*p)<1e-5) // 正確預測, q=1
578 q = 1;
579 else {
580 if (o.op === ">")
581 q = R.pbinom(x, n, p); // 去右尾
582 else // op=== "<"
583 q = R.pbinom(x-1, n, p); // 去右尾
584 }
585 }
586 return q;
587 },
588/* 注意上述 R 原始碼另一尾的計算方式,是用 < pbinom(最接近 x 者) 的算法,而不是直接 * 2。 問題是我們在 test 中是直接用*2 的方式。
589 d <- dbinom(x, n, p)
590 ...
591 else if (x < m) {
592 i <- seq.int(from = ceiling(m), to = n)
593 y <- sum(dbinom(i, n, p) <= d * relErr)
594 pbinom(x, n, p) 左尾 + pbinom(n - y, n, p, lower.tail = FALSE) 右尾
595 } else {
596 i <- seq.int(from = 0, to = floor(m))
597 y <- sum(dbinom(i, n, p) <= d * relErr)
598 pbinom(y - 1, n, p) 左尾 + pbinom(x - 1, n, p, lower.tail = FALSE) 右尾
599 }
600*/
601 // 信賴區間公式: P(T>c) = Σ (n, i) C(n, i) p1^i (1-p1)^(n-i) for i>c < q
602
603 q2p:function(q, o, side) {
604 var x=o.x, n=o.n, p=o.p, op=o.op;
605 if (side === "L")
606 return qbeta(q, x, n - x + 1); // 這裏採用 qbeta 是 R 的作法; 直覺上應該採用 R.qbinom(q, n, p);
607 else
608 return qbeta(q, x + 1, n - x);
609 },
610 df:function(o) { return 1; }
611}
612
613R.binomtest = function(o) {
614 o.p = R.opt(o, "p", 0.5);
615 o.name = "binomtest("+R.str(o)+")";
616 o.D = binom1;
617 var t=R.test(o);
618 t.p = o.x/o.n;
619 t.ci[0]=(o.op === ">")?0:t.ci[0];
620 t.ci[1]=(o.op === "<")?1:t.ci[1];
621 return t;
622}
623
624R.report = function(o) {
625 console.log("=========== report ==========");
626 for (var k in o) {
627 if (typeof o[k] !== "function")
628 console.log(k+"\t: "+str(o[k]));
629 }
630}
631
632// anova f-test : array1, array2, array3, ...
633R.anovaftest = function() {
634 return {
635 h0 : "σ1=σ2=...=σ"+arguments.length,
636 pvalue: J.anovaftest(),
637 score: J.anovafscore(),
638 };
639}
640
641module.exports = R;