UNPKG

21 kBJavaScriptView Raw
1import { FilteringMode } from './enums';
2import { getUniqueId } from './utils';
3import {
4 updateFields,
5 cloneWithSelect,
6 cloneWithProject,
7 updateData,
8 getNormalizedProFields
9} from './helper';
10import { crossProduct, difference, naturalJoinFilter, union } from './operator';
11
12/**
13 * Relation provides the definitions of basic operators of relational algebra like *selection*, *projection*, *union*,
14 * *difference* etc.
15 *
16 * It is extended by {@link DataModel} to inherit the functionalities of relational algebra concept.
17 *
18 * @class
19 * @public
20 * @module Relation
21 * @namespace DataModel
22 */
23class Relation {
24
25 /**
26 * Creates a new Relation instance by providing underlying data and schema.
27 *
28 * @private
29 *
30 * @param {Object | string | Relation} data - The input tabular data in dsv or json format or
31 * an existing Relation instance object.
32 * @param {Array} schema - An array of data schema.
33 * @param {Object} [options] - The optional options.
34 */
35 constructor (...params) {
36 let source;
37
38 this._parent = null;
39 this._derivation = [];
40 this._ancestorDerivation = [];
41 this._children = [];
42
43 if (params.length === 1 && ((source = params[0]) instanceof Relation)) {
44 // parent datamodel was passed as part of source
45 this._colIdentifier = source._colIdentifier;
46 this._rowDiffset = source._rowDiffset;
47 this._dataFormat = source._dataFormat;
48 this._parent = source;
49 this._partialFieldspace = this._parent._partialFieldspace;
50 this._fieldStoreName = getUniqueId();
51 this.__calculateFieldspace().calculateFieldsConfig();
52 } else {
53 updateData(this, ...params);
54 this._fieldStoreName = this._partialFieldspace.name;
55 this.__calculateFieldspace().calculateFieldsConfig();
56 this._propagationNameSpace = {
57 mutableActions: {},
58 immutableActions: {}
59 };
60 }
61 }
62
63 /**
64 * Retrieves the {@link Schema | schema} details for every {@link Field | field} as an array.
65 *
66 * @public
67 *
68 * @return {Array.<Schema>} Array of fields schema.
69 * ```
70 * [
71 * { name: 'Name', type: 'dimension' },
72 * { name: 'Miles_per_Gallon', type: 'measure', numberFormat: (val) => `${val} miles / gallon` },
73 * { name: 'Cylinder', type: 'dimension' },
74 * { name: 'Displacement', type: 'measure', defAggFn: 'max' },
75 * { name: 'HorsePower', type: 'measure', defAggFn: 'max' },
76 * { name: 'Weight_in_lbs', type: 'measure', defAggFn: 'avg', },
77 * { name: 'Acceleration', type: 'measure', defAggFn: 'avg' },
78 * { name: 'Year', type: 'dimension', subtype: 'datetime', format: '%Y' },
79 * { name: 'Origin' }
80 * ]
81 * ```
82 */
83 getSchema () {
84 return this.getFieldspace().fields.map(d => d.schema());
85 }
86
87 /**
88 * Returns the name of the {@link DataModel} instance. If no name was specified during {@link DataModel}
89 * initialization, then it returns a auto-generated name.
90 *
91 * @public
92 *
93 * @return {string} Name of the DataModel instance.
94 */
95 getName() {
96 return this._fieldStoreName;
97 }
98
99 getFieldspace () {
100 return this._fieldspace;
101 }
102
103 __calculateFieldspace () {
104 this._fieldspace = updateFields([this._rowDiffset, this._colIdentifier],
105 this.getPartialFieldspace(), this._fieldStoreName);
106 return this;
107 }
108
109 getPartialFieldspace () {
110 return this._partialFieldspace;
111 }
112
113 /**
114 * Performs {@link link_of_cross_product | cross-product} between two {@link DataModel} instances and returns a
115 * new {@link DataModel} instance containing the results. This operation is also called theta join.
116 *
117 * Cross product takes two set and create one set where each value of one set is paired with each value of another
118 * set.
119 *
120 * This method takes an optional predicate which filters the generated result rows. If the predicate returns true
121 * the combined row is included in the resulatant table.
122 *
123 * @example
124 * let originDM = dm.project(['Origin','Origin_Formal_Name']);
125 * let carsDM = dm.project(['Name','Miles_per_Gallon','Origin'])
126 *
127 * console.log(carsDM.join(originDM)));
128 *
129 * console.log(carsDM.join(originDM,
130 * obj => obj.[originDM.getName()].Origin === obj.[carsDM.getName()].Origin));
131 *
132 * @text
133 * This is chained version of `join` operator. `join` can also be used as
134 * {@link link_to_join_op | functional operator}.
135 *
136 * @public
137 *
138 * @param {DataModel} joinWith - The DataModel to be joined with the current instance DataModel.
139 * @param {SelectionPredicate} filterFn - The predicate function that will filter the result of the crossProduct.
140 *
141 * @return {DataModel} New DataModel instance created after joining.
142 */
143 join (joinWith, filterFn) {
144 return crossProduct(this, joinWith, filterFn);
145 }
146
147 /**
148 * {@link natural_join | Natural join} is a special kind of cross-product join where filtering of rows are performed
149 * internally by resolving common fields are from both table and the rows with common value are included.
150 *
151 * @example
152 * let originDM = dm.project(['Origin','Origin_Formal_Name']);
153 * let carsDM = dm.project(['Name','Miles_per_Gallon','Origin'])
154 *
155 * console.log(carsDM.naturalJoin(originDM));
156 *
157 * @text
158 * This is chained version of `naturalJoin` operator. `naturalJoin` can also be used as
159 * {@link link_to_join_op | functional operator}.
160 *
161 * @public
162 *
163 * @param {DataModel} joinWith - The DataModel with which the current instance of DataModel on which the method is
164 * called will be joined.
165 * @return {DataModel} New DataModel instance created after joining.
166 */
167 naturalJoin (joinWith) {
168 return crossProduct(this, joinWith, naturalJoinFilter(this, joinWith), true);
169 }
170
171 /**
172 * {@link link_to_union | Union} operation can be termed as vertical stacking of all rows from both the DataModel
173 * instances, provided that both of the {@link DataModel} instances should have same column names.
174 *
175 * @example
176 * console.log(EuropeanMakerDM.union(USAMakerDM));
177 *
178 * @text
179 * This is chained version of `naturalJoin` operator. `naturalJoin` can also be used as
180 * {@link link_to_join_op | functional operator}.
181 *
182 * @public
183 *
184 * @param {DataModel} unionWith - DataModel instance for which union has to be applied with the instance on which
185 * the method is called
186 *
187 * @return {DataModel} New DataModel instance with the result of the operation
188 */
189 union (unionWith) {
190 return union(this, unionWith);
191 }
192
193 /**
194 * {@link link_to_difference | Difference } operation only include rows which are present in the datamodel on which
195 * it was called but not on the one passed as argument.
196 *
197 * @example
198 * console.log(highPowerDM.difference(highExpensiveDM));
199 *
200 * @text
201 * This is chained version of `naturalJoin` operator. `naturalJoin` can also be used as
202 * {@link link_to_join_op | functional operator}.
203 *
204 * @public
205 *
206 * @param {DataModel} differenceWith - DataModel instance for which difference has to be applied with the instance
207 * on which the method is called
208 * @return {DataModel} New DataModel instance with the result of the operation
209 */
210 difference (differenceWith) {
211 return difference(this, differenceWith);
212 }
213
214 /**
215 * {@link link_to_selection | Selection} is a row filtering operation. It expects a predicate and an optional mode
216 * which control which all rows should be included in the resultant DataModel instance.
217 *
218 * {@link SelectionPredicate} is a function which returns a boolean value. For selection operation the selection
219 * function is called for each row of DataModel instance with the current row passed as argument.
220 *
221 * After executing {@link SelectionPredicate} the rows are labeled as either an entry of selection set or an entry
222 * of rejection set.
223 *
224 * {@link FilteringMode} operates on the selection and rejection set to determine which one would reflect in the
225 * resultant datamodel.
226 *
227 * @warning
228 * Selection and rejection set is only a logical idea for concept explanation purpose.
229 *
230 * @example
231 * // with selection mode NORMAL:
232 * const normDt = dt.select(fields => fields.Origin.value === "USA")
233 * console.log(normDt));
234 *
235 * // with selection mode INVERSE:
236 * const inverDt = dt.select(fields => fields.Origin.value === "USA", { mode: DataModel.FilteringMode.INVERSE })
237 * console.log(inverDt);
238 *
239 * // with selection mode ALL:
240 * const dtArr = dt.select(fields => fields.Origin.value === "USA", { mode: DataModel.FilteringMode.ALL })
241 * // print the selected parts
242 * console.log(dtArr[0]);
243 * // print the inverted parts
244 * console.log(dtArr[1]);
245 *
246 * @text
247 * This is chained version of `select` operator. `select` can also be used as
248 * {@link link_to_join_op | functional operator}.
249 *
250 * @public
251 *
252 * @param {Function} selectFn - The predicate function which is called for each row with the current row.
253 * ```
254 * function (row, i, cloneProvider, store) { ... }
255 * ```
256 * @param {Object} config - The configuration object to control the inclusion exclusion of a row in resultant
257 * DataModel instance.
258 * @param {FilteringMode} [config.mode=FilteringMode.NORMAL] - The mode of the selection.
259 * @return {DataModel} Returns the new DataModel instance(s) after operation.
260 */
261 select (selectFn, config) {
262 const defConfig = {
263 mode: FilteringMode.NORMAL,
264 saveChild: true
265 };
266 config = Object.assign({}, defConfig, config);
267 config.mode = config.mode || defConfig.mode;
268
269 const cloneConfig = { saveChild: config.saveChild };
270 return cloneWithSelect(
271 this,
272 selectFn,
273 config,
274 cloneConfig
275 );
276 }
277
278 /**
279 * Retrieves a boolean value if the current {@link DataModel} instance has data.
280 *
281 * @example
282 * const schema = [
283 * { name: 'CarName', type: 'dimension' },
284 * { name: 'HorsePower', type: 'measure' },
285 * { name: "Origin", type: 'dimension' }
286 * ];
287 * const data = [];
288 *
289 * const dt = new DataModel(data, schema);
290 * console.log(dt.isEmpty());
291 *
292 * @public
293 *
294 * @return {Boolean} True if the datamodel has no data, otherwise false.
295 */
296 isEmpty () {
297 return !this._rowDiffset.length || !this._colIdentifier.length;
298 }
299
300 /**
301 * Creates a clone from the current DataModel instance with child parent relationship.
302 *
303 * @private
304 * @param {boolean} [saveChild=true] - Whether the cloned instance would be recorded in the parent instance.
305 * @return {DataModel} - Returns the newly cloned DataModel instance.
306 */
307 clone (saveChild = true) {
308 const clonedDm = new this.constructor(this);
309 if (saveChild) {
310 clonedDm.setParent(this);
311 } else {
312 clonedDm.setParent(null);
313 }
314 return clonedDm;
315 }
316
317 /**
318 * {@link Projection} is filter column (field) operation. It expects list of fields' name and either include those
319 * or exclude those based on {@link FilteringMode} on the resultant variable.
320 *
321 * Projection expects array of fields name based on which it creates the selection and rejection set. All the field
322 * whose name is present in array goes in selection set and rest of the fields goes in rejection set.
323 *
324 * {@link FilteringMode} operates on the selection and rejection set to determine which one would reflect in the
325 * resulatant datamodel.
326 *
327 * @warning
328 * Selection and rejection set is only a logical idea for concept explanation purpose.
329 *
330 * @example
331 * const dm = new DataModel(data, schema);
332 *
333 * // with projection mode NORMAL:
334 * const normDt = dt.project(["Name", "HorsePower"]);
335 * console.log(normDt.getData());
336 *
337 * // with projection mode INVERSE:
338 * const inverDt = dt.project(["Name", "HorsePower"], { mode: DataModel.FilteringMode.INVERSE })
339 * console.log(inverDt.getData());
340 *
341 * // with selection mode ALL:
342 * const dtArr = dt.project(["Name", "HorsePower"], { mode: DataModel.FilteringMode.ALL })
343 * // print the normal parts
344 * console.log(dtArr[0].getData());
345 * // print the inverted parts
346 * console.log(dtArr[1].getData());
347 *
348 * @text
349 * This is chained version of `select` operator. `select` can also be used as
350 * {@link link_to_join_op | functional operator}.
351 *
352 * @public
353 *
354 * @param {Array.<string | Regexp>} projField - An array of column names in string or regular expression.
355 * @param {Object} [config] - An optional config to control the creation of new DataModel
356 * @param {FilteringMode} [config.mode=FilteringMode.NORMAL] - Mode of the projection
357 *
358 * @return {DataModel} Returns the new DataModel instance after operation.
359 */
360 project (projField, config) {
361 const defConfig = {
362 mode: FilteringMode.NORMAL,
363 saveChild: true
364 };
365 config = Object.assign({}, defConfig, config);
366 const fieldConfig = this.getFieldsConfig();
367 const allFields = Object.keys(fieldConfig);
368 const { mode } = config;
369 const normalizedProjField = getNormalizedProFields(projField, allFields, fieldConfig);
370
371 let dataModel;
372
373 if (mode === FilteringMode.ALL) {
374 let projectionClone = cloneWithProject(this, normalizedProjField, {
375 mode: FilteringMode.NORMAL,
376 saveChild: config.saveChild
377 }, allFields);
378 let rejectionClone = cloneWithProject(this, normalizedProjField, {
379 mode: FilteringMode.INVERSE,
380 saveChild: config.saveChild
381 }, allFields);
382 dataModel = [projectionClone, rejectionClone];
383 } else {
384 let projectionClone = cloneWithProject(this, normalizedProjField, config, allFields);
385 dataModel = projectionClone;
386 }
387
388 return dataModel;
389 }
390
391 getFieldsConfig () {
392 return this._fieldConfig;
393 }
394
395 calculateFieldsConfig () {
396 this._fieldConfig = this._fieldspace.fields.reduce((acc, fieldObj, i) => {
397 acc[fieldObj.name()] = {
398 index: i,
399 def: fieldObj.schema(),
400 };
401 return acc;
402 }, {});
403 return this;
404 }
405
406
407 /**
408 * Frees up the resources associated with the current DataModel instance and breaks all the links instance has in
409 * the DAG.
410 *
411 * @public
412 */
413 dispose () {
414 this._parent && this._parent.removeChild(this);
415 this._parent = null;
416 this._children.forEach((child) => {
417 child._parent = null;
418 });
419 this._children = [];
420 }
421
422 /**
423 * Removes the specified child {@link DataModel} from the child list of the current {@link DataModel} instance.
424 *
425 * @example
426 * const schema = [
427 * { name: 'Name', type: 'dimension' },
428 * { name: 'HorsePower', type: 'measure' },
429 * { name: "Origin", type: 'dimension' }
430 * ];
431 *
432 * const data = [
433 * { Name: "chevrolet chevelle malibu", Horsepower: 130, Origin: "USA" },
434 * { Name: "citroen ds-21 pallas", Horsepower: 115, Origin: "Europe" },
435 * { Name: "datsun pl510", Horsepower: 88, Origin: "Japan" },
436 * { Name: "amc rebel sst", Horsepower: 150, Origin: "USA"},
437 * ]
438 *
439 * const dt = new DataModel(data, schema);
440 *
441 * const dt2 = dt.select(fields => fields.Origin.value === "USA")
442 * dt.removeChild(dt2);
443 *
444 * @private
445 *
446 * @param {DataModel} child - Delegates the parent to remove this child.
447 */
448 removeChild (child) {
449 let idx = this._children.findIndex(sibling => sibling === child);
450 idx !== -1 ? this._children.splice(idx, 1) : true;
451 }
452
453 /**
454 * Sets the specified {@link DataModel} as a parent for the current {@link DataModel} instance.
455 *
456 * @param {DataModel} parent - The datamodel instance which will act as parent.
457 */
458 setParent (parent) {
459 this._parent && this._parent.removeChild(this);
460 this._parent = parent;
461 parent && parent._children.push(this);
462 }
463
464 /**
465 * Returns the parent {@link DataModel} instance.
466 *
467 * @example
468 * const schema = [
469 * { name: 'Name', type: 'dimension' },
470 * { name: 'HorsePower', type: 'measure' },
471 * { name: "Origin", type: 'dimension' }
472 * ];
473 *
474 * const data = [
475 * { Name: "chevrolet chevelle malibu", Horsepower: 130, Origin: "USA" },
476 * { Name: "citroen ds-21 pallas", Horsepower: 115, Origin: "Europe" },
477 * { Name: "datsun pl510", Horsepower: 88, Origin: "Japan" },
478 * { Name: "amc rebel sst", Horsepower: 150, Origin: "USA"},
479 * ]
480 *
481 * const dt = new DataModel(data, schema);
482 *
483 * const dt2 = dt.select(fields => fields.Origin.value === "USA");
484 * const parentDm = dt2.getParent();
485 *
486 * @return {DataModel} Returns the parent DataModel instance.
487 */
488 getParent () {
489 return this._parent;
490 }
491
492 /**
493 * Returns the immediate child {@link DataModel} instances.
494 *
495 * @example
496 * const schema = [
497 * { name: 'Name', type: 'dimension' },
498 * { name: 'HorsePower', type: 'measure' },
499 * { name: "Origin", type: 'dimension' }
500 * ];
501 *
502 * const data = [
503 * { Name: "chevrolet chevelle malibu", Horsepower: 130, Origin: "USA" },
504 * { Name: "citroen ds-21 pallas", Horsepower: 115, Origin: "Europe" },
505 * { Name: "datsun pl510", Horsepower: 88, Origin: "Japan" },
506 * { Name: "amc rebel sst", Horsepower: 150, Origin: "USA"},
507 * ]
508 *
509 * const dt = new DataModel(data, schema);
510 *
511 * const childDm1 = dt.select(fields => fields.Origin.value === "USA");
512 * const childDm2 = dt.select(fields => fields.Origin.value === "Japan");
513 * const childDm3 = dt.groupBy(["Origin"]);
514 *
515 * @return {DataModel[]} Returns the immediate child DataModel instances.
516 */
517 getChildren () {
518 return this._children;
519 }
520
521 /**
522 * Returns the in-between operation meta data while creating the current {@link DataModel} instance.
523 *
524 * @example
525 * const schema = [
526 * { name: 'Name', type: 'dimension' },
527 * { name: 'HorsePower', type: 'measure' },
528 * { name: "Origin", type: 'dimension' }
529 * ];
530 *
531 * const data = [
532 * { Name: "chevrolet chevelle malibu", Horsepower: 130, Origin: "USA" },
533 * { Name: "citroen ds-21 pallas", Horsepower: 115, Origin: "Europe" },
534 * { Name: "datsun pl510", Horsepower: 88, Origin: "Japan" },
535 * { Name: "amc rebel sst", Horsepower: 150, Origin: "USA"},
536 * ]
537 *
538 * const dt = new DataModel(data, schema);
539 * const dt2 = dt.select(fields => fields.Origin.value === "USA");
540 * const dt3 = dt2.groupBy(["Origin"]);
541 * const derivations = dt3.getDerivations();
542 *
543 * @return {Any[]} Returns the derivation meta data.
544 */
545 getDerivations () {
546 return this._derivation;
547 }
548
549 /**
550 * Returns the in-between operation meta data happened from root {@link DataModel} to current instance.
551 *
552 * @example
553 * const schema = [
554 * { name: 'Name', type: 'dimension' },
555 * { name: 'HorsePower', type: 'measure' },
556 * { name: "Origin", type: 'dimension' }
557 * ];
558 *
559 * const data = [
560 * { Name: "chevrolet chevelle malibu", Horsepower: 130, Origin: "USA" },
561 * { Name: "citroen ds-21 pallas", Horsepower: 115, Origin: "Europe" },
562 * { Name: "datsun pl510", Horsepower: 88, Origin: "Japan" },
563 * { Name: "amc rebel sst", Horsepower: 150, Origin: "USA"},
564 * ]
565 *
566 * const dt = new DataModel(data, schema);
567 * const dt2 = dt.select(fields => fields.Origin.value === "USA");
568 * const dt3 = dt2.groupBy(["Origin"]);
569 * const ancDerivations = dt3.getAncestorDerivations();
570 *
571 * @return {Any[]} Returns the previous derivation meta data.
572 */
573 getAncestorDerivations () {
574 return this._ancestorDerivation;
575 }
576}
577
578export default Relation;