1 | /* eslint-disable default-case */
|
2 |
|
3 | import { FieldType, DimensionSubtype, DataFormat, FilteringMode } from './enums';
|
4 | import {
|
5 | persistDerivations,
|
6 | getRootGroupByModel,
|
7 | propagateToAllDataModels,
|
8 | getRootDataModel,
|
9 | propagateImmutableActions,
|
10 | addToPropNamespace,
|
11 | sanitizeUnitSchema,
|
12 | splitWithSelect,
|
13 | splitWithProject,
|
14 | getNormalizedProFields
|
15 | } from './helper';
|
16 | import { DM_DERIVATIVES, PROPAGATION } from './constants';
|
17 | import {
|
18 | dataBuilder,
|
19 | rowDiffsetIterator,
|
20 | groupBy
|
21 | } from './operator';
|
22 | import { createBinnedFieldData } from './operator/bucket-creator';
|
23 | import Relation from './relation';
|
24 | import reducerStore from './utils/reducer-store';
|
25 | import { createFields } from './field-creator';
|
26 | import InvalidAwareTypes from './invalid-aware-types';
|
27 | import Value from './value';
|
28 |
|
29 | /**
|
30 | * DataModel is an in-browser representation of tabular data. It supports
|
31 | * {@link https://en.wikipedia.org/wiki/Relational_algebra | relational algebra} operators as well as generic data
|
32 | * processing opearators.
|
33 | * DataModel extends {@link Relation} class which defines all the relational algebra opreators. DataModel gives
|
34 | * definition of generic data processing operators which are not relational algebra complient.
|
35 | *
|
36 | * @public
|
37 | * @class
|
38 | * @extends Relation
|
39 | * @memberof Datamodel
|
40 | */
|
41 | class DataModel extends Relation {
|
42 | /**
|
43 | * Creates a new DataModel instance by providing data and schema. Data could be in the form of
|
44 | * - Flat JSON
|
45 | * - DSV String
|
46 | * - 2D Array
|
47 | *
|
48 | * By default DataModel finds suitable adapter to serialize the data. DataModel also expects a
|
49 | * {@link Schema | schema} for identifying the variables present in data.
|
50 | *
|
51 | * @constructor
|
52 | * @example
|
53 | * const data = loadData('cars.csv');
|
54 | * const schema = [
|
55 | * { name: 'Name', type: 'dimension' },
|
56 | * { name: 'Miles_per_Gallon', type: 'measure', unit : 'cm', scale: '1000', numberformat: val => `${val}G`},
|
57 | * { name: 'Cylinders', type: 'dimension' },
|
58 | * { name: 'Displacement', type: 'measure' },
|
59 | * { name: 'Horsepower', type: 'measure' },
|
60 | * { name: 'Weight_in_lbs', type: 'measure' },
|
61 | * { name: 'Acceleration', type: 'measure' },
|
62 | * { name: 'Year', type: 'dimension', subtype: 'datetime', format: '%Y' },
|
63 | * { name: 'Origin', type: 'dimension' }
|
64 | * ];
|
65 | * const dm = new DataModel(data, schema, { name: 'Cars' });
|
66 | * table(dm);
|
67 | *
|
68 | * @public
|
69 | *
|
70 | * @param {Array.<Object> | string | Array.<Array>} data Input data in any of the mentioned formats
|
71 | * @param {Array.<Schema>} schema Defination of the variables. Order of the variables in data and order of the
|
72 | * variables in schema has to be same.
|
73 | * @param {object} [options] Optional arguments to specify more settings regarding the creation part
|
74 | * @param {string} [options.name] Name of the datamodel instance. If no name is given an auto generated name is
|
75 | * assigned to the instance.
|
76 | * @param {string} [options.fieldSeparator=','] specify field separator type if the data is of type dsv string.
|
77 | */
|
78 | constructor (...args) {
|
79 | super(...args);
|
80 |
|
81 | this._onPropagation = [];
|
82 | }
|
83 |
|
84 | /**
|
85 | * Reducers are simple functions which reduces an array of numbers to a representative number of the set.
|
86 | * Like an array of numbers `[10, 20, 5, 15]` can be reduced to `12.5` if average / mean reducer function is
|
87 | * applied. All the measure fields in datamodel (variables in data) needs a reducer to handle aggregation.
|
88 | *
|
89 | * @public
|
90 | *
|
91 | * @return {ReducerStore} Singleton instance of {@link ReducerStore}.
|
92 | */
|
93 | static get Reducers () {
|
94 | return reducerStore;
|
95 | }
|
96 |
|
97 | /**
|
98 | * Configure null, undefined, invalid values in the source data
|
99 | *
|
100 | * @public
|
101 | *
|
102 | * @param {Object} [config] - Configuration to control how null, undefined and non-parsable values are
|
103 | * represented in DataModel.
|
104 | * @param {string} [config.undefined] - Define how an undefined value will be represented.
|
105 | * @param {string} [config.null] - Define how a null value will be represented.
|
106 | * @param {string} [config.invalid] - Define how a non-parsable value will be represented.
|
107 | */
|
108 | static configureInvalidAwareTypes (config) {
|
109 | return InvalidAwareTypes.invalidAwareVals(config);
|
110 | }
|
111 |
|
112 | /**
|
113 | * Retrieve the data attached to an instance in JSON format.
|
114 | *
|
115 | * @example
|
116 | * // DataModel instance is already prepared and assigned to dm variable
|
117 | * const data = dm.getData({
|
118 | * order: 'column',
|
119 | * formatter: {
|
120 | * origin: (val) => val === 'European Union' ? 'EU' : val;
|
121 | * }
|
122 | * });
|
123 | * console.log(data);
|
124 | *
|
125 | * @public
|
126 | *
|
127 | * @param {Object} [options] Options to control how the raw data is to be returned.
|
128 | * @param {string} [options.order='row'] Defines if data is retieved in row order or column order. Possible values
|
129 | * are `'rows'` and `'columns'`
|
130 | * @param {Function} [options.formatter=null] Formats the output data. This expects an object, where the keys are
|
131 | * the name of the variable needs to be formatted. The formatter function is called for each row passing the
|
132 | * value of the cell for a particular row as arguments. The formatter is a function in the form of
|
133 | * `function (value, rowId, schema) => { ... }`
|
134 | * Know more about {@link Fomatter}.
|
135 | *
|
136 | * @return {Array} Returns a multidimensional array of the data with schema. The return format looks like
|
137 | * ```
|
138 | * {
|
139 | * data,
|
140 | * schema
|
141 | * }
|
142 | * ```
|
143 | */
|
144 | getData (options) {
|
145 | const defOptions = {
|
146 | order: 'row',
|
147 | formatter: null,
|
148 | withUid: false,
|
149 | getAllFields: false,
|
150 | sort: []
|
151 | };
|
152 | options = Object.assign({}, defOptions, options);
|
153 | const fields = this.getPartialFieldspace().fields;
|
154 |
|
155 | const dataGenerated = dataBuilder.call(
|
156 | this,
|
157 | this.getPartialFieldspace().fields,
|
158 | this._rowDiffset,
|
159 | options.getAllFields ? fields.map(d => d.name()).join() : this._colIdentifier,
|
160 | options.sort,
|
161 | {
|
162 | columnWise: options.order === 'column',
|
163 | addUid: !!options.withUid
|
164 | }
|
165 | );
|
166 |
|
167 | if (!options.formatter) {
|
168 | return dataGenerated;
|
169 | }
|
170 |
|
171 | const { formatter } = options;
|
172 | const { data, schema, uids } = dataGenerated;
|
173 | const fieldNames = schema.map((e => e.name));
|
174 | const fmtFieldNames = Object.keys(formatter);
|
175 | const fmtFieldIdx = fmtFieldNames.reduce((acc, next) => {
|
176 | const idx = fieldNames.indexOf(next);
|
177 | if (idx !== -1) {
|
178 | acc.push([idx, formatter[next]]);
|
179 | }
|
180 | return acc;
|
181 | }, []);
|
182 |
|
183 | if (options.order === 'column') {
|
184 | fmtFieldIdx.forEach((elem) => {
|
185 | const fIdx = elem[0];
|
186 | const fmtFn = elem[1];
|
187 |
|
188 | data[fIdx].forEach((datum, datumIdx) => {
|
189 | data[fIdx][datumIdx] = fmtFn.call(
|
190 | undefined,
|
191 | datum,
|
192 | uids[datumIdx],
|
193 | schema[fIdx]
|
194 | );
|
195 | });
|
196 | });
|
197 | } else {
|
198 | data.forEach((datum, datumIdx) => {
|
199 | fmtFieldIdx.forEach((elem) => {
|
200 | const fIdx = elem[0];
|
201 | const fmtFn = elem[1];
|
202 |
|
203 | datum[fIdx] = fmtFn.call(
|
204 | undefined,
|
205 | datum[fIdx],
|
206 | uids[datumIdx],
|
207 | schema[fIdx]
|
208 | );
|
209 | });
|
210 | });
|
211 | }
|
212 |
|
213 | return dataGenerated;
|
214 | }
|
215 |
|
216 | /**
|
217 | * Returns the unique ids in an array.
|
218 | *
|
219 | * @return {Array} Returns an array of ids.
|
220 | */
|
221 | getUids () {
|
222 | const rowDiffset = this._rowDiffset;
|
223 | const ids = [];
|
224 |
|
225 | if (rowDiffset.length) {
|
226 | const diffSets = rowDiffset.split(',');
|
227 |
|
228 | diffSets.forEach((set) => {
|
229 | let [start, end] = set.split('-').map(Number);
|
230 |
|
231 | end = end !== undefined ? end : start;
|
232 | ids.push(...Array(end - start + 1).fill().map((_, idx) => start + idx));
|
233 | });
|
234 | }
|
235 |
|
236 | return ids;
|
237 | }
|
238 | /**
|
239 | * Groups the data using particular dimensions and by reducing measures. It expects a list of dimensions using which
|
240 | * it projects the datamodel and perform aggregations to reduce the duplicate tuples. Refer this
|
241 | * {@link link_to_one_example_with_group_by | document} to know the intuition behind groupBy.
|
242 | *
|
243 | * DataModel by default provides definition of few {@link reducer | Reducers}.
|
244 | * {@link ReducerStore | User defined reducers} can also be registered.
|
245 | *
|
246 | * This is the chained implementation of `groupBy`.
|
247 | * `groupBy` also supports {@link link_to_compose_groupBy | composability}
|
248 | *
|
249 | * @example
|
250 | * const groupedDM = dm.groupBy(['Year'], { horsepower: 'max' } );
|
251 | * console.log(groupedDm);
|
252 | *
|
253 | * @public
|
254 | *
|
255 | * @param {Array.<string>} fieldsArr - Array containing the name of dimensions
|
256 | * @param {Object} [reducers={}] - A map whose key is the variable name and value is the name of the reducer. If its
|
257 | * not passed, or any variable is ommitted from the object, default aggregation function is used from the
|
258 | * schema of the variable.
|
259 | *
|
260 | * @return {DataModel} Returns a new DataModel instance after performing the groupby.
|
261 | */
|
262 | groupBy (fieldsArr, reducers = {}, config = { saveChild: true }) {
|
263 | const groupByString = `${fieldsArr.join()}`;
|
264 | let params = [this, fieldsArr, reducers];
|
265 | const newDataModel = groupBy(...params);
|
266 |
|
267 | persistDerivations(
|
268 | this,
|
269 | newDataModel,
|
270 | DM_DERIVATIVES.GROUPBY,
|
271 | { fieldsArr, groupByString, defaultReducer: reducerStore.defaultReducer() },
|
272 | reducers
|
273 | );
|
274 |
|
275 | if (config.saveChild) {
|
276 | newDataModel.setParent(this);
|
277 | } else {
|
278 | newDataModel.setParent(null);
|
279 | }
|
280 |
|
281 | return newDataModel;
|
282 | }
|
283 |
|
284 | /**
|
285 | * Performs sorting operation on the current {@link DataModel} instance according to the specified sorting details.
|
286 | * Like every other operator it doesn't mutate the current DataModel instance on which it was called, instead
|
287 | * returns a new DataModel instance containing the sorted data.
|
288 | *
|
289 | * DataModel support multi level sorting by listing the variables using which sorting needs to be performed and
|
290 | * the type of sorting `ASC` or `DESC`.
|
291 | *
|
292 | * In the following example, data is sorted by `Origin` field in `DESC` order in first level followed by another
|
293 | * level of sorting by `Acceleration` in `ASC` order.
|
294 | *
|
295 | * @example
|
296 | * // here dm is the pre-declared DataModel instance containing the data of 'cars.json' file
|
297 | * let sortedDm = dm.sort([
|
298 | * ["Origin", "DESC"]
|
299 | * ["Acceleration"] // Default value is ASC
|
300 | * ]);
|
301 | *
|
302 | * console.log(dm.getData());
|
303 | * console.log(sortedDm.getData());
|
304 | *
|
305 | * // Sort with a custom sorting function
|
306 | * sortedDm = dm.sort([
|
307 | * ["Origin", "DESC"]
|
308 | * ["Acceleration", (a, b) => a - b] // Custom sorting function
|
309 | * ]);
|
310 | *
|
311 | * console.log(dm.getData());
|
312 | * console.log(sortedDm.getData());
|
313 | *
|
314 | * @text
|
315 | * DataModel also provides another sorting mechanism out of the box where sort is applied to a variable using
|
316 | * another variable which determines the order.
|
317 | * Like the above DataModel contains three fields `Origin`, `Name` and `Acceleration`. Now, the data in this
|
318 | * model can be sorted by `Origin` field according to the average value of all `Acceleration` for a
|
319 | * particular `Origin` value.
|
320 | *
|
321 | * @example
|
322 | * // here dm is the pre-declared DataModel instance containing the data of 'cars.json' file
|
323 | * const sortedDm = dm.sort([
|
324 | * ['Origin', ['Acceleration', (a, b) => avg(...a.Acceleration) - avg(...b.Acceleration)]]
|
325 | * ]);
|
326 | *
|
327 | * console.log(dm.getData());
|
328 | * console.log(sortedDm.getData());
|
329 | *
|
330 | * @public
|
331 | *
|
332 | * @param {Array.<Array>} sortingDetails - Sorting details based on which the sorting will be performed.
|
333 | * @return {DataModel} Returns a new instance of DataModel with sorted data.
|
334 | */
|
335 | sort (sortingDetails, config = { saveChild: false }) {
|
336 | const rawData = this.getData({
|
337 | order: 'row',
|
338 | sort: sortingDetails
|
339 | });
|
340 | const header = rawData.schema.map(field => field.name);
|
341 | const dataInCSVArr = [header].concat(rawData.data);
|
342 |
|
343 | const sortedDm = new this.constructor(dataInCSVArr, rawData.schema, { dataFormat: 'DSVArr' });
|
344 |
|
345 | persistDerivations(
|
346 | this,
|
347 | sortedDm,
|
348 | DM_DERIVATIVES.SORT,
|
349 | config,
|
350 | sortingDetails
|
351 | );
|
352 |
|
353 | if (config.saveChild) {
|
354 | sortedDm.setParent(this);
|
355 | } else {
|
356 | sortedDm.setParent(null);
|
357 | }
|
358 |
|
359 | return sortedDm;
|
360 | }
|
361 |
|
362 | /**
|
363 | * Performs the serialization operation on the current {@link DataModel} instance according to the specified data
|
364 | * type. When an {@link DataModel} instance is created, it de-serializes the input data into its internal format,
|
365 | * and during its serialization process, it converts its internal data format to the specified data type and returns
|
366 | * that data regardless what type of data is used during the {@link DataModel} initialization.
|
367 | *
|
368 | * @example
|
369 | * // here dm is the pre-declared DataModel instance.
|
370 | * const csvData = dm.serialize(DataModel.DataFormat.DSV_STR, { fieldSeparator: "," });
|
371 | * console.log(csvData); // The csv formatted data.
|
372 | *
|
373 | * const jsonData = dm.serialize(DataModel.DataFormat.FLAT_JSON);
|
374 | * console.log(jsonData); // The json data.
|
375 | *
|
376 | * @public
|
377 | *
|
378 | * @param {string} type - The data type name for serialization.
|
379 | * @param {Object} options - The optional option object.
|
380 | * @param {string} options.fieldSeparator - The field separator character for DSV data type.
|
381 | * @return {Array|string} Returns the serialized data.
|
382 | */
|
383 | serialize (type, options) {
|
384 | type = type || this._dataFormat;
|
385 | options = Object.assign({}, { fieldSeparator: ',' }, options);
|
386 |
|
387 | const fields = this.getFieldspace().fields;
|
388 | const colData = fields.map(f => f.formattedData());
|
389 | const rowsCount = colData[0].length;
|
390 | let serializedData;
|
391 | let rowIdx;
|
392 | let colIdx;
|
393 |
|
394 | if (type === DataFormat.FLAT_JSON) {
|
395 | serializedData = [];
|
396 | for (rowIdx = 0; rowIdx < rowsCount; rowIdx++) {
|
397 | const row = {};
|
398 | for (colIdx = 0; colIdx < fields.length; colIdx++) {
|
399 | row[fields[colIdx].name()] = colData[colIdx][rowIdx];
|
400 | }
|
401 | serializedData.push(row);
|
402 | }
|
403 | } else if (type === DataFormat.DSV_STR) {
|
404 | serializedData = [fields.map(f => f.name()).join(options.fieldSeparator)];
|
405 | for (rowIdx = 0; rowIdx < rowsCount; rowIdx++) {
|
406 | const row = [];
|
407 | for (colIdx = 0; colIdx < fields.length; colIdx++) {
|
408 | row.push(colData[colIdx][rowIdx]);
|
409 | }
|
410 | serializedData.push(row.join(options.fieldSeparator));
|
411 | }
|
412 | serializedData = serializedData.join('\n');
|
413 | } else if (type === DataFormat.DSV_ARR) {
|
414 | serializedData = [fields.map(f => f.name())];
|
415 | for (rowIdx = 0; rowIdx < rowsCount; rowIdx++) {
|
416 | const row = [];
|
417 | for (colIdx = 0; colIdx < fields.length; colIdx++) {
|
418 | row.push(colData[colIdx][rowIdx]);
|
419 | }
|
420 | serializedData.push(row);
|
421 | }
|
422 | } else {
|
423 | throw new Error(`Data type ${type} is not supported`);
|
424 | }
|
425 |
|
426 | return serializedData;
|
427 | }
|
428 |
|
429 | addField (field) {
|
430 | const fieldName = field.name();
|
431 | this._colIdentifier += `,${fieldName}`;
|
432 | const partialFieldspace = this._partialFieldspace;
|
433 | const cachedValueObjects = partialFieldspace._cachedValueObjects;
|
434 | const formattedData = field.formattedData();
|
435 | const rawData = field.partialField.data;
|
436 |
|
437 | if (!partialFieldspace.fieldsObj()[field.name()]) {
|
438 | partialFieldspace.fields.push(field);
|
439 | cachedValueObjects.forEach((obj, i) => {
|
440 | obj[field.name()] = new Value(formattedData[i], rawData[i], field);
|
441 | });
|
442 | } else {
|
443 | const fieldIndex = partialFieldspace.fields.findIndex(fieldinst => fieldinst.name() === fieldName);
|
444 | fieldIndex >= 0 && (partialFieldspace.fields[fieldIndex] = field);
|
445 | }
|
446 |
|
447 | // flush out cached namespace values on addition of new fields
|
448 | partialFieldspace._cachedFieldsObj = null;
|
449 | partialFieldspace._cachedDimension = null;
|
450 | partialFieldspace._cachedMeasure = null;
|
451 |
|
452 | this.__calculateFieldspace().calculateFieldsConfig();
|
453 | return this;
|
454 | }
|
455 |
|
456 | /**
|
457 | * Creates a new variable calculated from existing variables. This method expects the definition of the newly created
|
458 | * variable and a function which resolves the value of the new variable from existing variables.
|
459 | *
|
460 | * Can create a new measure based on existing variables:
|
461 | * @example
|
462 | * // DataModel already prepared and assigned to dm variable;
|
463 | * const newDm = dataModel.calculateVariable({
|
464 | * name: 'powerToWeight',
|
465 | * type: 'measure'
|
466 | * }, ['horsepower', 'weight_in_lbs', (hp, weight) => hp / weight ]);
|
467 | *
|
468 | *
|
469 | * Can create a new dimension based on existing variables:
|
470 | * @example
|
471 | * // DataModel already prepared and assigned to dm variable;
|
472 | * const child = dataModel.calculateVariable(
|
473 | * {
|
474 | * name: 'Efficiency',
|
475 | * type: 'dimension'
|
476 | * }, ['horsepower', (hp) => {
|
477 | * if (hp < 80) { return 'low'; },
|
478 | * else if (hp < 120) { return 'moderate'; }
|
479 | * else { return 'high' }
|
480 | * }]);
|
481 | *
|
482 | * @public
|
483 | *
|
484 | * @param {Object} schema - The schema of newly defined variable.
|
485 | * @param {Array.<string|function>} dependency - An array containing the dependency variable names and a resolver
|
486 | * function as the last element.
|
487 | * @param {Object} config - An optional config object.
|
488 | * @param {boolean} [config.saveChild] - Whether the newly created DataModel will be a child.
|
489 | * @param {boolean} [config.replaceVar] - Whether the newly created variable will replace the existing variable.
|
490 | * @return {DataModel} Returns an instance of DataModel with the new field.
|
491 | */
|
492 | calculateVariable (schema, dependency, config) {
|
493 | schema = sanitizeUnitSchema(schema);
|
494 | config = Object.assign({}, { saveChild: true, replaceVar: false }, config);
|
495 |
|
496 | const fieldsConfig = this.getFieldsConfig();
|
497 | const depVars = dependency.slice(0, dependency.length - 1);
|
498 | const retrieveFn = dependency[dependency.length - 1];
|
499 |
|
500 | if (fieldsConfig[schema.name] && !config.replaceVar) {
|
501 | throw new Error(`${schema.name} field already exists in datamodel`);
|
502 | }
|
503 |
|
504 | const depFieldIndices = depVars.map((field) => {
|
505 | const fieldSpec = fieldsConfig[field];
|
506 | if (!fieldSpec) {
|
507 | // @todo dont throw error here, use warning in production mode
|
508 | throw new Error(`${field} is not a valid column name.`);
|
509 | }
|
510 | return fieldSpec.index;
|
511 | });
|
512 |
|
513 | const clone = this.clone(config.saveChild);
|
514 |
|
515 | const fs = clone.getFieldspace().fields;
|
516 | const suppliedFields = depFieldIndices.map(idx => fs[idx]);
|
517 |
|
518 | let cachedStore = {};
|
519 | let cloneProvider = () => this.detachedRoot();
|
520 |
|
521 | const computedValues = [];
|
522 | rowDiffsetIterator(clone._rowDiffset, (i) => {
|
523 | const fieldsData = suppliedFields.map(field => field.partialField.data[i]);
|
524 | computedValues[i] = retrieveFn(...fieldsData, i, cloneProvider, cachedStore);
|
525 | });
|
526 | const [field] = createFields([computedValues], [schema], [schema.name]);
|
527 | clone.addField(field);
|
528 |
|
529 | persistDerivations(
|
530 | this,
|
531 | clone,
|
532 | DM_DERIVATIVES.CAL_VAR,
|
533 | { config: schema, fields: depVars },
|
534 | retrieveFn
|
535 | );
|
536 |
|
537 | return clone;
|
538 | }
|
539 |
|
540 | /**
|
541 | * Propagates changes across all the connected DataModel instances.
|
542 | *
|
543 | * @param {Array} identifiers - A list of identifiers that were interacted with.
|
544 | * @param {Object} payload - The interaction specific details.
|
545 | *
|
546 | * @return {DataModel} DataModel instance.
|
547 | */
|
548 | propagate (identifiers, config = {}, addToNameSpace, propConfig = {}) {
|
549 | const isMutableAction = config.isMutableAction;
|
550 | const propagationSourceId = config.sourceId;
|
551 | const payload = config.payload;
|
552 | const rootModel = getRootDataModel(this);
|
553 | const propagationNameSpace = rootModel._propagationNameSpace;
|
554 | const rootGroupByModel = getRootGroupByModel(this);
|
555 | const rootModels = {
|
556 | groupByModel: rootGroupByModel,
|
557 | model: rootModel
|
558 | };
|
559 |
|
560 | addToNameSpace && addToPropNamespace(propagationNameSpace, config, this);
|
561 | propagateToAllDataModels(identifiers, rootModels, { propagationNameSpace, sourceId: propagationSourceId },
|
562 | Object.assign({
|
563 | payload
|
564 | }, config));
|
565 |
|
566 | if (isMutableAction) {
|
567 | propagateImmutableActions(propagationNameSpace, rootModels, {
|
568 | config,
|
569 | propConfig
|
570 | }, this);
|
571 | }
|
572 |
|
573 | return this;
|
574 | }
|
575 |
|
576 | /**
|
577 | * Associates a callback with an event name.
|
578 | *
|
579 | * @param {string} eventName - The name of the event.
|
580 | * @param {Function} callback - The callback to invoke.
|
581 | * @return {DataModel} Returns this current DataModel instance itself.
|
582 | */
|
583 | on (eventName, callback) {
|
584 | switch (eventName) {
|
585 | case PROPAGATION:
|
586 | this._onPropagation.push(callback);
|
587 | break;
|
588 | }
|
589 | return this;
|
590 | }
|
591 |
|
592 | /**
|
593 | * Unsubscribes the callbacks for the provided event name.
|
594 | *
|
595 | * @param {string} eventName - The name of the event to unsubscribe.
|
596 | * @return {DataModel} Returns the current DataModel instance itself.
|
597 | */
|
598 | unsubscribe (eventName) {
|
599 | switch (eventName) {
|
600 | case PROPAGATION:
|
601 | this._onPropagation = [];
|
602 | break;
|
603 |
|
604 | }
|
605 | return this;
|
606 | }
|
607 |
|
608 | /**
|
609 | * This method is used to invoke the method associated with propagation.
|
610 | *
|
611 | * @param {Object} payload The interaction payload.
|
612 | * @param {DataModel} identifiers The propagated DataModel.
|
613 | * @memberof DataModel
|
614 | */
|
615 | handlePropagation (propModel, payload) {
|
616 | let propListeners = this._onPropagation;
|
617 | propListeners.forEach(fn => fn.call(this, propModel, payload));
|
618 | }
|
619 |
|
620 | /**
|
621 | * Performs the binning operation on a measure field based on the binning configuration. Binning means discretizing
|
622 | * values of a measure. Binning configuration contains an array; subsequent values from the array marks the boundary
|
623 | * of buckets in [inclusive, exclusive) range format. This operation does not mutate the subject measure field,
|
624 | * instead, it creates a new field (variable) of type dimension and subtype binned.
|
625 | *
|
626 | * Binning can be configured by
|
627 | * - providing custom bin configuration with non-uniform buckets,
|
628 | * - providing bins count,
|
629 | * - providing each bin size,
|
630 | *
|
631 | * When custom `buckets` are provided as part of binning configuration:
|
632 | * @example
|
633 | * // DataModel already prepared and assigned to dm variable
|
634 | * const config = { name: 'binnedHP', buckets: [30, 80, 100, 110] }
|
635 | * const binnedDM = dataModel.bin('horsepower', config);
|
636 | *
|
637 | * @text
|
638 | * When `binsCount` is defined as part of binning configuration:
|
639 | * @example
|
640 | * // DataModel already prepared and assigned to dm variable
|
641 | * const config = { name: 'binnedHP', binsCount: 5, start: 0, end: 100 }
|
642 | * const binDM = dataModel.bin('horsepower', config);
|
643 | *
|
644 | * @text
|
645 | * When `binSize` is defined as part of binning configuration:
|
646 | * @example
|
647 | * // DataModel already prepared and assigned to dm variable
|
648 | * const config = { name: 'binnedHorsepower', binSize: 20, start: 5}
|
649 | * const binDM = dataModel.bin('horsepower', config);
|
650 | *
|
651 | * @public
|
652 | *
|
653 | * @param {string} measureFieldName - The name of the target measure field.
|
654 | * @param {Object} config - The config object.
|
655 | * @param {string} [config.name] - The name of the new field which will be created.
|
656 | * @param {string} [config.buckets] - An array containing the bucket ranges.
|
657 | * @param {string} [config.binSize] - The size of each bin. It is ignored when buckets are given.
|
658 | * @param {string} [config.binsCount] - The total number of bins to generate. It is ignored when buckets are given.
|
659 | * @param {string} [config.start] - The start value of the bucket ranges. It is ignored when buckets are given.
|
660 | * @param {string} [config.end] - The end value of the bucket ranges. It is ignored when buckets are given.
|
661 | * @return {DataModel} Returns a new {@link DataModel} instance with the new field.
|
662 | */
|
663 | bin (measureFieldName, config) {
|
664 | const fieldsConfig = this.getFieldsConfig();
|
665 |
|
666 | if (!fieldsConfig[measureFieldName]) {
|
667 | throw new Error(`Field ${measureFieldName} doesn't exist`);
|
668 | }
|
669 |
|
670 | const binFieldName = config.name || `${measureFieldName}_binned`;
|
671 |
|
672 | if (fieldsConfig[binFieldName]) {
|
673 | throw new Error(`Field ${binFieldName} already exists`);
|
674 | }
|
675 |
|
676 | const measureField = this.getFieldspace().fieldsObj()[measureFieldName];
|
677 | const { binnedData, bins } = createBinnedFieldData(measureField, this._rowDiffset, config);
|
678 |
|
679 | const binField = createFields([binnedData], [
|
680 | {
|
681 | name: binFieldName,
|
682 | type: FieldType.DIMENSION,
|
683 | subtype: DimensionSubtype.BINNED,
|
684 | bins
|
685 | }], [binFieldName])[0];
|
686 |
|
687 | const clone = this.clone(config.saveChild);
|
688 | clone.addField(binField);
|
689 |
|
690 | persistDerivations(
|
691 | this,
|
692 | clone,
|
693 | DM_DERIVATIVES.BIN,
|
694 | { measureFieldName, config, binFieldName },
|
695 | null
|
696 | );
|
697 |
|
698 | return clone;
|
699 | }
|
700 |
|
701 | /**
|
702 | * Creates a new {@link DataModel} instance with completely detached root from current {@link DataModel} instance,
|
703 | * the new {@link DataModel} instance has no parent-children relationship with the current one, but has same data as
|
704 | * the current one.
|
705 | * This API is useful when a completely different {@link DataModel} but with same data as the current instance is
|
706 | * needed.
|
707 | *
|
708 | * @example
|
709 | * const dm = new DataModel(data, schema);
|
710 | * const detachedDm = dm.detachedRoot();
|
711 | *
|
712 | * // has different namespace
|
713 | * console.log(dm.getPartialFieldspace().name);
|
714 | * console.log(detachedDm.getPartialFieldspace().name);
|
715 | *
|
716 | * // has same data
|
717 | * console.log(dm.getData());
|
718 | * console.log(detachedDm.getData());
|
719 | *
|
720 | * @public
|
721 | *
|
722 | * @return {DataModel} Returns a detached {@link DataModel} instance.
|
723 | */
|
724 | detachedRoot () {
|
725 | const data = this.serialize(DataFormat.FLAT_JSON);
|
726 | const schema = this.getSchema();
|
727 |
|
728 | return new DataModel(data, schema);
|
729 | }
|
730 |
|
731 | /**
|
732 | * Creates a set of new {@link DataModel} instances by splitting the set of rows in the source {@link DataModel}
|
733 | * instance based on a set of dimensions.
|
734 | *
|
735 | * For each unique dimensional value, a new split is created which creates a unique {@link DataModel} instance for
|
736 | * that split
|
737 | *
|
738 | * If multiple dimensions are provided, it splits the source {@link DataModel} instance with all possible
|
739 | * combinations of the dimensional values for all the dimensions provided
|
740 | *
|
741 | * Additionally, it also accepts a predicate function to reduce the set of rows provided. A
|
742 | * {@link link_to_selection | Selection} is performed on all the split {@link DataModel} instances based on
|
743 | * the predicate function
|
744 | *
|
745 | * @example
|
746 | * // without predicate function:
|
747 | * const splitDt = dt.splitByRow(['Origin'])
|
748 | * console.log(splitDt));
|
749 | * // This should give three unique DataModel instances, one each having rows only for 'USA',
|
750 | * // 'Europe' and 'Japan' respectively
|
751 | *
|
752 | * @example
|
753 | * // without predicate function:
|
754 | * const splitDtMulti = dt.splitByRow(['Origin', 'Cylinders'])
|
755 | * console.log(splitDtMulti));
|
756 | * // This should give DataModel instances for all unique combinations of Origin and Cylinder values
|
757 | *
|
758 | * @example
|
759 | * // with predicate function:
|
760 | * const splitWithPredDt = dt.select(['Origin'], fields => fields.Origin.value === "USA")
|
761 | * console.log(splitWithPredDt);
|
762 | * // This should not include the DataModel for the Origin : 'USA'
|
763 | *
|
764 | *
|
765 | * @public
|
766 | *
|
767 | * @param {Array} dimensionArr - Set of dimensions based on which the split should occur
|
768 | * @param {Object} config - The configuration object
|
769 | * @param {string} [config.saveChild] - Configuration to save child or not
|
770 | * @param {string}[config.mode=FilteringMode.NORMAL] -The mode of the selection.
|
771 | * @return {Array} Returns the new DataModel instances after operation.
|
772 | */
|
773 | splitByRow (dimensionArr, reducerFn, config) {
|
774 | const fieldsConfig = this.getFieldsConfig();
|
775 |
|
776 | dimensionArr.forEach((fieldName) => {
|
777 | if (!fieldsConfig[fieldName]) {
|
778 | throw new Error(`Field ${fieldName} doesn't exist in the schema`);
|
779 | }
|
780 | });
|
781 |
|
782 | const defConfig = {
|
783 | mode: FilteringMode.NORMAL,
|
784 | saveChild: true
|
785 | };
|
786 |
|
787 | config = Object.assign({}, defConfig, config);
|
788 |
|
789 | return splitWithSelect(this, dimensionArr, reducerFn, config);
|
790 | }
|
791 |
|
792 | /**
|
793 | * Creates a set of new {@link DataModel} instances by splitting the set of fields in the source {@link DataModel}
|
794 | * instance based on a set of common and unique field names provided.
|
795 | *
|
796 | * Each DataModel created contains a set of fields which are common to all and a set of unique fields.
|
797 | * It also accepts configurations such as saveChild and mode(inverse or normal) to include/exclude the respective
|
798 | * fields
|
799 | *
|
800 | * @example
|
801 | * // without predicate function:
|
802 | * const splitDt = dt.splitByColumn( [['Acceleration'], ['Horsepower']], ['Origin'])
|
803 | * console.log(splitDt));
|
804 | * // This should give two unique DataModel instances, both having the field 'Origin' and
|
805 | * // one each having 'Acceleration' and 'Horsepower' fields respectively
|
806 | *
|
807 | * @example
|
808 | * // without predicate function:
|
809 | * const splitDtInv = dt.splitByColumn( [['Acceleration'], ['Horsepower'],['Origin', 'Cylinders'],
|
810 | * {mode: 'inverse'})
|
811 | * console.log(splitDtInv));
|
812 | * // This should give DataModel instances in the following way:
|
813 | * // All DataModel Instances do not have the fields 'Origin' and 'Cylinders'
|
814 | * // One DataModel Instance has rest of the fields except 'Acceleration' and the other DataModel instance
|
815 | * // has rest of the fields except 'Horsepower'
|
816 | *
|
817 | *
|
818 | *
|
819 | * @public
|
820 | *
|
821 | * @param {Array} uniqueFields - Set of unique fields included in each datamModel instance
|
822 | * @param {Array} commonFields - Set of common fields included in all datamModel instances
|
823 | * @param {Object} config - The configuration object
|
824 | * @param {string} [config.saveChild] - Configuration to save child or not
|
825 | * @param {string}[config.mode=FilteringMode.NORMAL] -The mode of the selection.
|
826 | * @return {Array} Returns the new DataModel instances after operation.
|
827 | */
|
828 | splitByColumn (uniqueFields = [], commonFields = [], config) {
|
829 | const defConfig = {
|
830 | mode: FilteringMode.NORMAL,
|
831 | saveChild: true
|
832 | };
|
833 | const fieldConfig = this.getFieldsConfig();
|
834 | const allFields = Object.keys(fieldConfig);
|
835 | const normalizedProjFieldSets = [[commonFields]];
|
836 |
|
837 | config = Object.assign({}, defConfig, config);
|
838 | uniqueFields = uniqueFields.length ? uniqueFields : [[]];
|
839 |
|
840 |
|
841 | uniqueFields.forEach((fieldSet, i) => {
|
842 | normalizedProjFieldSets[i] = getNormalizedProFields(
|
843 | [...fieldSet, ...commonFields],
|
844 | allFields,
|
845 | fieldConfig);
|
846 | });
|
847 |
|
848 | return splitWithProject(this, normalizedProjFieldSets, config, allFields);
|
849 | }
|
850 |
|
851 |
|
852 | }
|
853 |
|
854 | export default DataModel;
|