UNPKG

4.51 kBJavaScriptView Raw
1import {partition} from './util/util';
2import {randomKDE} from 'vega-statistics';
3import {Transform, ingest} from 'vega-dataflow';
4import {sampleCurve} from 'vega-statistics';
5import {accessorName, error, extent, inherits} from 'vega-util';
6
7/**
8 * Compute kernel density estimates (KDE) for one or more data groups.
9 * @constructor
10 * @param {object} params - The parameters for this operator.
11 * @param {Array<function(object): *>} [params.groupby] - An array of accessors
12 * to groupby.
13 * @param {function(object): *} params.field - An accessor for the data field
14 * to estimate.
15 * @param {number} [params.bandwidth=0] - The KDE kernel bandwidth.
16 * If zero or unspecified, the bandwidth is automatically determined.
17 * @param {boolean} [params.counts=false] - A boolean flag indicating if the
18 * output values should be probability estimates (false, default) or
19 * smoothed counts (true).
20 * @param {string} [params.cumulative=false] - A boolean flag indicating if a
21 * density (false) or cumulative distribution (true) should be generated.
22 * @param {Array<number>} [params.extent] - The domain extent over which to
23 * plot the density. If unspecified, the [min, max] data extent is used.
24 * @param {string} [params.resolve='independent'] - Indicates how parameters for
25 * multiple densities should be resolved. If "independent" (the default), each
26 * density may have its own domain extent and dynamic number of curve sample
27 * steps. If "shared", the KDE transform will ensure that all densities are
28 * defined over a shared domain and curve steps, enabling stacking.
29 * @param {number} [params.minsteps=25] - The minimum number of curve samples
30 * for plotting the density.
31 * @param {number} [params.maxsteps=200] - The maximum number of curve samples
32 * for plotting the density.
33 * @param {number} [params.steps] - The exact number of curve samples for
34 * plotting the density. If specified, overrides both minsteps and maxsteps
35 * to set an exact number of uniform samples. Useful in conjunction with
36 * a fixed extent to ensure consistent sample points for stacked densities.
37 */
38export default function KDE(params) {
39 Transform.call(this, null, params);
40}
41
42KDE.Definition = {
43 'type': 'KDE',
44 'metadata': {'generates': true},
45 'params': [
46 { 'name': 'groupby', 'type': 'field', 'array': true },
47 { 'name': 'field', 'type': 'field', 'required': true },
48 { 'name': 'cumulative', 'type': 'boolean', 'default': false },
49 { 'name': 'counts', 'type': 'boolean', 'default': false },
50 { 'name': 'bandwidth', 'type': 'number', 'default': 0 },
51 { 'name': 'extent', 'type': 'number', 'array': true, 'length': 2 },
52 { 'name': 'resolve', 'type': 'enum', 'values': ['shared', 'independent'], 'default': 'independent' },
53 { 'name': 'steps', 'type': 'number' },
54 { 'name': 'minsteps', 'type': 'number', 'default': 25 },
55 { 'name': 'maxsteps', 'type': 'number', 'default': 200 },
56 { 'name': 'as', 'type': 'string', 'array': true, 'default': ['value', 'density'] }
57 ]
58};
59
60var prototype = inherits(KDE, Transform);
61
62prototype.transform = function(_, pulse) {
63 var out = pulse.fork(pulse.NO_SOURCE | pulse.NO_FIELDS);
64
65 if (!this.value || pulse.changed() || _.modified()) {
66 const source = pulse.materialize(pulse.SOURCE).source,
67 groups = partition(source, _.groupby, _.field),
68 names = (_.groupby || []).map(accessorName),
69 bandwidth = _.bandwidth,
70 method = _.cumulative ? 'cdf' : 'pdf',
71 as = _.as || ['value', 'density'],
72 values = [];
73
74 let domain = _.extent,
75 minsteps = _.steps || _.minsteps || 25,
76 maxsteps = _.steps || _.maxsteps || 200;
77
78 if (method !== 'pdf' && method !== 'cdf') {
79 error('Invalid density method: ' + method);
80 }
81
82 if (_.resolve === 'shared') {
83 if (!domain) domain = extent(source, _.field);
84 minsteps = maxsteps = _.steps || maxsteps;
85 }
86
87 groups.forEach(g => {
88 const density = randomKDE(g, bandwidth)[method],
89 scale = _.counts ? g.length : 1,
90 local = domain || extent(g);
91
92 sampleCurve(density, local, minsteps, maxsteps).forEach(v => {
93 const t = {};
94 for (let i=0; i<names.length; ++i) {
95 t[names[i]] = g.dims[i];
96 }
97 t[as[0]] = v[0];
98 t[as[1]] = v[1] * scale;
99 values.push(ingest(t));
100 });
101 });
102
103 if (this.value) out.rem = this.value;
104 this.value = out.add = out.source = values;
105 }
106
107 return out;
108};