UNPKG

4.12 kBJavaScriptView Raw
1import {Transform, ingest} from 'vega-dataflow';
2import {accessorName, error, inherits} from 'vega-util';
3import {max, mean, median, min} from 'd3-array';
4
5var Methods = {
6 value: 'value',
7 median: median,
8 mean: mean,
9 min: min,
10 max: max
11};
12
13var Empty = [];
14
15/**
16 * Impute missing values.
17 * @constructor
18 * @param {object} params - The parameters for this operator.
19 * @param {function(object): *} params.field - The value field to impute.
20 * @param {Array<function(object): *>} [params.groupby] - An array of
21 * accessors to determine series within which to perform imputation.
22 * @param {function(object): *} params.key - An accessor for a key value.
23 * Each key value should be unique within a group. New tuples will be
24 * imputed for any key values that are not found within a group.
25 * @param {Array<*>} [params.keyvals] - Optional array of required key
26 * values. New tuples will be imputed for any key values that are not
27 * found within a group. In addition, these values will be automatically
28 * augmented with the key values observed in the input data.
29 * @param {string} [method='value'] - The imputation method to use. One of
30 * 'value', 'mean', 'median', 'max', 'min'.
31 * @param {*} [value=0] - The constant value to use for imputation
32 * when using method 'value'.
33 */
34export default function Impute(params) {
35 Transform.call(this, [], params);
36}
37
38Impute.Definition = {
39 'type': 'Impute',
40 'metadata': {'changes': true},
41 'params': [
42 { 'name': 'field', 'type': 'field', 'required': true },
43 { 'name': 'key', 'type': 'field', 'required': true },
44 { 'name': 'keyvals', 'array': true },
45 { 'name': 'groupby', 'type': 'field', 'array': true },
46 { 'name': 'method', 'type': 'enum', 'default': 'value',
47 'values': ['value', 'mean', 'median', 'max', 'min'] },
48 { 'name': 'value', 'default': 0 }
49 ]
50};
51
52var prototype = inherits(Impute, Transform);
53
54function getValue(_) {
55 var m = _.method || Methods.value, v;
56
57 if (Methods[m] == null) {
58 error('Unrecognized imputation method: ' + m);
59 } else if (m === Methods.value) {
60 v = _.value !== undefined ? _.value : 0;
61 return function() { return v; };
62 } else {
63 return Methods[m];
64 }
65}
66
67function getField(_) {
68 var f = _.field;
69 return function(t) { return t ? f(t) : NaN; };
70}
71
72prototype.transform = function(_, pulse) {
73 var out = pulse.fork(pulse.ALL),
74 impute = getValue(_),
75 field = getField(_),
76 fName = accessorName(_.field),
77 kName = accessorName(_.key),
78 gNames = (_.groupby || []).map(accessorName),
79 groups = partition(pulse.source, _.groupby, _.key, _.keyvals),
80 curr = [],
81 prev = this.value,
82 m = groups.domain.length,
83 group, value, gVals, kVal, g, i, j, l, n, t;
84
85 for (g=0, l=groups.length; g<l; ++g) {
86 group = groups[g];
87 gVals = group.values;
88 value = NaN;
89
90 // add tuples for missing values
91 for (j=0; j<m; ++j) {
92 if (group[j] != null) continue;
93 kVal = groups.domain[j];
94
95 t = {_impute: true};
96 for (i=0, n=gVals.length; i<n; ++i) t[gNames[i]] = gVals[i];
97 t[kName] = kVal;
98 t[fName] = Number.isNaN(value) ? (value = impute(group, field)) : value;
99
100 curr.push(ingest(t));
101 }
102 }
103
104 // update pulse with imputed tuples
105 if (curr.length) out.add = out.materialize(out.ADD).add.concat(curr);
106 if (prev.length) out.rem = out.materialize(out.REM).rem.concat(prev);
107 this.value = curr;
108
109 return out;
110};
111
112function partition(data, groupby, key, keyvals) {
113 var get = function(f) { return f(t); },
114 groups = [],
115 domain = keyvals ? keyvals.slice() : [],
116 kMap = {},
117 gMap = {}, gVals, gKey,
118 group, i, j, k, n, t;
119
120 domain.forEach(function(k, i) { kMap[k] = i + 1; });
121
122 for (i=0, n=data.length; i<n; ++i) {
123 t = data[i];
124 k = key(t);
125 j = kMap[k] || (kMap[k] = domain.push(k));
126
127 gKey = (gVals = groupby ? groupby.map(get) : Empty) + '';
128 if (!(group = gMap[gKey])) {
129 group = (gMap[gKey] = []);
130 groups.push(group);
131 group.values = gVals;
132 }
133 group[j-1] = t;
134 }
135
136 groups.domain = domain;
137 return groups;
138}