1 | import {Transform, ingest} from 'vega-dataflow';
|
2 | import {accessorName, error, inherits} from 'vega-util';
|
3 | import {max, mean, median, min} from 'd3-array';
|
4 |
|
5 | var Methods = {
|
6 | value: 'value',
|
7 | median: median,
|
8 | mean: mean,
|
9 | min: min,
|
10 | max: max
|
11 | };
|
12 |
|
13 | var Empty = [];
|
14 |
|
15 |
|
16 |
|
17 |
|
18 |
|
19 |
|
20 |
|
21 |
|
22 |
|
23 |
|
24 |
|
25 |
|
26 |
|
27 |
|
28 |
|
29 |
|
30 |
|
31 |
|
32 |
|
33 |
|
34 | export default function Impute(params) {
|
35 | Transform.call(this, [], params);
|
36 | }
|
37 |
|
38 | Impute.Definition = {
|
39 | 'type': 'Impute',
|
40 | 'metadata': {'changes': true},
|
41 | 'params': [
|
42 | { 'name': 'field', 'type': 'field', 'required': true },
|
43 | { 'name': 'key', 'type': 'field', 'required': true },
|
44 | { 'name': 'keyvals', 'array': true },
|
45 | { 'name': 'groupby', 'type': 'field', 'array': true },
|
46 | { 'name': 'method', 'type': 'enum', 'default': 'value',
|
47 | 'values': ['value', 'mean', 'median', 'max', 'min'] },
|
48 | { 'name': 'value', 'default': 0 }
|
49 | ]
|
50 | };
|
51 |
|
52 | var prototype = inherits(Impute, Transform);
|
53 |
|
54 | function getValue(_) {
|
55 | var m = _.method || Methods.value, v;
|
56 |
|
57 | if (Methods[m] == null) {
|
58 | error('Unrecognized imputation method: ' + m);
|
59 | } else if (m === Methods.value) {
|
60 | v = _.value !== undefined ? _.value : 0;
|
61 | return function() { return v; };
|
62 | } else {
|
63 | return Methods[m];
|
64 | }
|
65 | }
|
66 |
|
67 | function getField(_) {
|
68 | var f = _.field;
|
69 | return function(t) { return t ? f(t) : NaN; };
|
70 | }
|
71 |
|
72 | prototype.transform = function(_, pulse) {
|
73 | var out = pulse.fork(pulse.ALL),
|
74 | impute = getValue(_),
|
75 | field = getField(_),
|
76 | fName = accessorName(_.field),
|
77 | kName = accessorName(_.key),
|
78 | gNames = (_.groupby || []).map(accessorName),
|
79 | groups = partition(pulse.source, _.groupby, _.key, _.keyvals),
|
80 | curr = [],
|
81 | prev = this.value,
|
82 | m = groups.domain.length,
|
83 | group, value, gVals, kVal, g, i, j, l, n, t;
|
84 |
|
85 | for (g=0, l=groups.length; g<l; ++g) {
|
86 | group = groups[g];
|
87 | gVals = group.values;
|
88 | value = NaN;
|
89 |
|
90 |
|
91 | for (j=0; j<m; ++j) {
|
92 | if (group[j] != null) continue;
|
93 | kVal = groups.domain[j];
|
94 |
|
95 | t = {_impute: true};
|
96 | for (i=0, n=gVals.length; i<n; ++i) t[gNames[i]] = gVals[i];
|
97 | t[kName] = kVal;
|
98 | t[fName] = Number.isNaN(value) ? (value = impute(group, field)) : value;
|
99 |
|
100 | curr.push(ingest(t));
|
101 | }
|
102 | }
|
103 |
|
104 |
|
105 | if (curr.length) out.add = out.materialize(out.ADD).add.concat(curr);
|
106 | if (prev.length) out.rem = out.materialize(out.REM).rem.concat(prev);
|
107 | this.value = curr;
|
108 |
|
109 | return out;
|
110 | };
|
111 |
|
112 | function partition(data, groupby, key, keyvals) {
|
113 | var get = function(f) { return f(t); },
|
114 | groups = [],
|
115 | domain = keyvals ? keyvals.slice() : [],
|
116 | kMap = {},
|
117 | gMap = {}, gVals, gKey,
|
118 | group, i, j, k, n, t;
|
119 |
|
120 | domain.forEach(function(k, i) { kMap[k] = i + 1; });
|
121 |
|
122 | for (i=0, n=data.length; i<n; ++i) {
|
123 | t = data[i];
|
124 | k = key(t);
|
125 | j = kMap[k] || (kMap[k] = domain.push(k));
|
126 |
|
127 | gKey = (gVals = groupby ? groupby.map(get) : Empty) + '';
|
128 | if (!(group = gMap[gKey])) {
|
129 | group = (gMap[gKey] = []);
|
130 | groups.push(group);
|
131 | group.values = gVals;
|
132 | }
|
133 | group[j-1] = t;
|
134 | }
|
135 |
|
136 | groups.domain = domain;
|
137 | return groups;
|
138 | }
|