UNPKG

4.98 kBJavaScriptView Raw
1var replaceStream = require("replacestream");
2var _ = require("lodash/fp");
3// TODO does csv follow this: https://tools.ietf.org/html/rfc4180
4var csv = require("csv-streamify");
5var hl = require("highland");
6var Rx = require("rx-extra");
7require("../es5/spinoffs/pipeToStdout");
8require("../es5/spinoffs/toNodeStream");
9var VError = require("verror");
10
11const delimiterFor = {
12 csv: ",",
13 tsv: "\t"
14};
15
16function argType(arg) {
17 if (_.isUndefined(arg)) {
18 return "auto";
19 } else if (_.isFinite(parseInt(arg))) {
20 return "columnIndex";
21 } else {
22 return "string";
23 }
24}
25
26const field = _.curry(function(headersEnabled, argType, parsedArg, row) {
27 if (headersEnabled) {
28 return parsedArg in row ? row[parsedArg] : parsedArg;
29 } else {
30 return argType === "string" ? parsedArg : row[parsedArg];
31 }
32});
33
34module.exports = function(
35 bridgeDb,
36 organismArg,
37 xrefDataSourceArg,
38 xrefIdentifierArg,
39 desiredXrefDataSources,
40 options
41) {
42 const { insertionPoint } = options;
43 var headersOption = options.hasOwnProperty("headers")
44 ? Boolean(options.headers)
45 : false;
46 var delimiterOption =
47 options.format in delimiterFor ? delimiterFor[options.format] : "\t";
48 var newlineOption = options.newline || "\n";
49 var quoteOption = options.quote || '"';
50 var commentOption = options.comment || "#";
51
52 const organismArgType = argType(organismArg);
53 const xrefDataSourceArgType = argType(xrefDataSourceArg);
54 const xrefIdentifierArgType = argType(xrefIdentifierArg);
55
56 var organismColumn =
57 organismArgType === "auto"
58 ? 0
59 : organismArgType === "columnIndex"
60 ? parseInt(organismArg)
61 : organismArg;
62
63 var xrefDataSourceColumn =
64 xrefDataSourceArgType === "auto"
65 ? _.isFinite(parseInt(organismColumn))
66 ? organismColumn + 1
67 : 0
68 : xrefDataSourceArgType === "columnIndex"
69 ? parseInt(xrefDataSourceArg)
70 : xrefDataSourceArg;
71
72 var xrefIdentifierColumn =
73 xrefIdentifierArgType === "auto"
74 ? _.isFinite(parseInt(xrefDataSourceColumn))
75 ? xrefDataSourceColumn + 1
76 : 0
77 : xrefIdentifierArgType === "columnIndex"
78 ? parseInt(xrefIdentifierArg)
79 : xrefIdentifierArg;
80
81 const field3 = field(headersOption);
82 const organismField = field3(organismArgType, organismColumn);
83 const xrefDataSourceField = field3(
84 xrefDataSourceArgType,
85 xrefDataSourceColumn
86 );
87 const xrefIdentifierField = field3(
88 xrefIdentifierArgType,
89 xrefIdentifierColumn
90 );
91
92 var parser = csv({
93 delimiter: delimiterOption,
94 newline: newlineOption,
95 quote: quoteOption,
96 objectMode: true,
97 columns: headersOption
98 });
99
100 var commentLineRe = new RegExp(
101 `^${commentOption}.*$[${newlineOption}]?`,
102 "gm"
103 );
104 Rx.Observable.fromNodeReadableStream(
105 hl(process.stdin)
106 // Ignore comments
107 .through(replaceStream(commentLineRe, ""))
108 .through(parser)
109 )
110 .mergeMap(function(row) {
111 const organism = organismField(row);
112 const xrefDataSource = xrefDataSourceField(row);
113 const xrefIdentifier = xrefIdentifierField(row);
114 /* TODO should we use this?
115 .distinctUntilChanged(function(
116 a,
117 b
118 ) {
119 return (
120 [a.xrefDataSource, a.xrefIdentifier].join() ===
121 [b.xrefDataSource, b.xrefIdentifier].join()
122 );
123 //return JSON.stringify(a) === JSON.stringify(b);
124 })
125 //*/
126 var mappedXrefs$ = bridgeDb.xrefs(
127 organism,
128 xrefDataSource,
129 xrefIdentifier,
130 desiredXrefDataSources
131 );
132 if (insertionPoint !== "none") {
133 // wide format
134 return mappedXrefs$.map(function(xrefs) {
135 const xrefsField =
136 quoteOption +
137 xrefs
138 .map(function(xref) {
139 return [xref.xrefDataSource, xref.xrefIdentifier].join(":");
140 })
141 .join(delimiterOption) +
142 quoteOption;
143
144 const oldRow = headersOption ? _.values(row) : row;
145 const rowOut = [];
146 oldRow.splice(0, insertionPoint).forEach(function(field) {
147 rowOut.push(field);
148 });
149 rowOut.push(xrefsField);
150 oldRow.forEach(function(field) {
151 rowOut.push(field);
152 });
153
154 return rowOut.join(delimiterOption);
155 });
156 } else {
157 // long format
158 return mappedXrefs$.mergeMap(function(xrefs) {
159 const firstColumn =
160 typeof xrefDataSourceOption !== "undefined"
161 ? xrefIdentifier
162 : [xrefDataSource, xrefIdentifier].join(delimiterOption);
163
164 return Rx.Observable.from(xrefs).map(xref => {
165 return [
166 firstColumn,
167 [xref.xrefDataSource, xref.xrefIdentifier].join(delimiterOption)
168 ].join(delimiterOption);
169 });
170 });
171 }
172 })
173 .map(line => line + newlineOption)
174 .pipeToStdout();
175};