1 | import {Transform, ingest} from 'vega-dataflow';
|
2 | import {accessorName, error, inherits} from 'vega-util';
|
3 | import {max, mean, median, min} from 'd3-array';
|
4 |
|
5 | const Methods = {
|
6 | value: 'value',
|
7 | median: median,
|
8 | mean: mean,
|
9 | min: min,
|
10 | max: max
|
11 | };
|
12 |
|
13 | const Empty = [];
|
14 |
|
15 |
|
16 |
|
17 |
|
18 |
|
19 |
|
20 |
|
21 |
|
22 |
|
23 |
|
24 |
|
25 |
|
26 |
|
27 |
|
28 |
|
29 |
|
30 |
|
31 |
|
32 |
|
33 |
|
34 | export default function Impute(params) {
|
35 | Transform.call(this, [], params);
|
36 | }
|
37 |
|
38 | Impute.Definition = {
|
39 | 'type': 'Impute',
|
40 | 'metadata': {'changes': true},
|
41 | 'params': [
|
42 | { 'name': 'field', 'type': 'field', 'required': true },
|
43 | { 'name': 'key', 'type': 'field', 'required': true },
|
44 | { 'name': 'keyvals', 'array': true },
|
45 | { 'name': 'groupby', 'type': 'field', 'array': true },
|
46 | { 'name': 'method', 'type': 'enum', 'default': 'value',
|
47 | 'values': ['value', 'mean', 'median', 'max', 'min'] },
|
48 | { 'name': 'value', 'default': 0 }
|
49 | ]
|
50 | };
|
51 |
|
52 | function getValue(_) {
|
53 | var m = _.method || Methods.value, v;
|
54 |
|
55 | if (Methods[m] == null) {
|
56 | error('Unrecognized imputation method: ' + m);
|
57 | } else if (m === Methods.value) {
|
58 | v = _.value !== undefined ? _.value : 0;
|
59 | return () => v;
|
60 | } else {
|
61 | return Methods[m];
|
62 | }
|
63 | }
|
64 |
|
65 | function getField(_) {
|
66 | const f = _.field;
|
67 | return t => t ? f(t) : NaN;
|
68 | }
|
69 |
|
70 | inherits(Impute, Transform, {
|
71 | transform(_, pulse) {
|
72 | var out = pulse.fork(pulse.ALL),
|
73 | impute = getValue(_),
|
74 | field = getField(_),
|
75 | fName = accessorName(_.field),
|
76 | kName = accessorName(_.key),
|
77 | gNames = (_.groupby || []).map(accessorName),
|
78 | groups = partition(pulse.source, _.groupby, _.key, _.keyvals),
|
79 | curr = [],
|
80 | prev = this.value,
|
81 | m = groups.domain.length,
|
82 | group, value, gVals, kVal, g, i, j, l, n, t;
|
83 |
|
84 | for (g=0, l=groups.length; g<l; ++g) {
|
85 | group = groups[g];
|
86 | gVals = group.values;
|
87 | value = NaN;
|
88 |
|
89 |
|
90 | for (j=0; j<m; ++j) {
|
91 | if (group[j] != null) continue;
|
92 | kVal = groups.domain[j];
|
93 |
|
94 | t = {_impute: true};
|
95 | for (i=0, n=gVals.length; i<n; ++i) t[gNames[i]] = gVals[i];
|
96 | t[kName] = kVal;
|
97 | t[fName] = Number.isNaN(value) ? (value = impute(group, field)) : value;
|
98 |
|
99 | curr.push(ingest(t));
|
100 | }
|
101 | }
|
102 |
|
103 |
|
104 | if (curr.length) out.add = out.materialize(out.ADD).add.concat(curr);
|
105 | if (prev.length) out.rem = out.materialize(out.REM).rem.concat(prev);
|
106 | this.value = curr;
|
107 |
|
108 | return out;
|
109 | }
|
110 | });
|
111 |
|
112 | function partition(data, groupby, key, keyvals) {
|
113 | var get = f => f(t),
|
114 | groups = [],
|
115 | domain = keyvals ? keyvals.slice() : [],
|
116 | kMap = {},
|
117 | gMap = {}, gVals, gKey,
|
118 | group, i, j, k, n, t;
|
119 |
|
120 | domain.forEach((k, i) => kMap[k] = i + 1);
|
121 |
|
122 | for (i=0, n=data.length; i<n; ++i) {
|
123 | t = data[i];
|
124 | k = key(t);
|
125 | j = kMap[k] || (kMap[k] = domain.push(k));
|
126 |
|
127 | gKey = (gVals = groupby ? groupby.map(get) : Empty) + '';
|
128 | if (!(group = gMap[gKey])) {
|
129 | group = (gMap[gKey] = []);
|
130 | groups.push(group);
|
131 | group.values = gVals;
|
132 | }
|
133 | group[j-1] = t;
|
134 | }
|
135 |
|
136 | groups.domain = domain;
|
137 | return groups;
|
138 | }
|