UNPKG

6.42 kBJavaScriptView Raw
1var _ = require("lodash");
2var fs = require("fs");
3var path = require("path");
4var BridgeDbModule = require("../es5/BridgeDb");
5var BridgeDb = BridgeDbModule.BridgeDb;
6var CONFIG_DEFAULT = BridgeDbModule.CONFIG_DEFAULT;
7var hl = require("highland");
8var ndjson = require("ndjson");
9var JSONStream = require("JSONStream");
10var Rx = require("rx-extra");
11require("../es5/spinoffs/pipeToStdout");
12require("../es5/spinoffs/toNodeStream");
13var VError = require("verror");
14var dsvXrefs = require("./dsvXrefs");
15var jsonXrefs = require("./jsonXrefs");
16var noneXrefs = require("./noneXrefs");
17
18const DEFAULT_ADDMAPPEDXREFS_BASE = ".";
19
20const xrefsForFormat = {
21 csv: dsvXrefs,
22 json: jsonXrefs,
23 none: noneXrefs,
24 tsv: dsvXrefs
25};
26
27const helpTextDescriptions = [
28 {
29 name: "dsv",
30 formats: ["tsv", "csv"]
31 },
32 {
33 name: "json",
34 formats: ["json"]
35 },
36 {
37 name: "none",
38 formats: ["none"]
39 }
40].map(function({ name, formats }) {
41 return {
42 path: path.join(__dirname, name + "Xrefs" + ".help.sh"),
43 formats: formats
44 };
45});
46
47// NOTE: BridgeDb internally uses ms, but using seconds here.
48const TIMEOUT_DEFAULT_SECONDS = CONFIG_DEFAULT.http.timeout / 1000;
49
50module.exports = function createXrefsCLI(program) {
51 program
52 .command(
53 `xrefs <organism> <xrefDataSource> <xrefIdentifier> [desiredXrefDataSource...]`
54 )
55 .description(
56 `Get alternate xrefs (datasource identifiers) and
57optionally insert them into to your json, csv or tsv.
58
59For example, ensembl:ENSG00000132031 -> ncbigene:4148 and uniprot:O15232
60
61The xrefs come from BridgeDb.
62`
63 )
64 .option(
65 "-f,--format [string]",
66 `Input format, e.g., none, json, csv, tsv. Default: none`,
67 // TODO: we are specifying default both here and below for options/optionsRaw
68 format => (!!format ? format : "none")
69 )
70 .option(
71 "-t, --timeout <seconds>",
72 `Timeout in seconds for HTTP requests. Default: ${TIMEOUT_DEFAULT_SECONDS}`,
73 // TODO: we are specifying default both here and below for options/optionsRaw
74 timeout => (!!timeout ? parseInt(timeout) : TIMEOUT_DEFAULT_SECONDS)
75 )
76 .option(
77 "-i,--insertion-point [path]",
78 `Where to add alternate mapped xrefs`
79 )
80 .option(
81 "-b,--base [path]",
82 `(json only) prepended to all other paths. Default: "${DEFAULT_ADDMAPPEDXREFS_BASE}"
83 Similar in concept to HTML and XML BASE. More info:
84 https://developer.mozilla.org/en-US/docs/Web/HTML/Element/base
85 https://www.w3.org/TR/xmlbase/`,
86 // TODO: we are specifying default both here and below for options/optionsRaw
87 base => (!!base ? base : DEFAULT_ADDMAPPEDXREFS_BASE)
88 )
89 // DSV options
90 .option(
91 "--headers [boolean]",
92 "(tsv/csv only) Does first row of input file contain headers? Default: false"
93 )
94 /* handled by specifying format tsv or csv
95 .option(
96 "-d, --delimiter [string]",
97 'Delimiter for file, e.g., "," or "\\t". Default: "\\t"'
98 )
99 //*/
100 .option(
101 "-n, --newline [string]",
102 '(tsv/csv only) New line character for file, e.g., "\\n" or "\\r\\n". Default: "\\n"'
103 // (use \r\n for CRLF files)
104 )
105 .option(
106 "-q, --quote [string]",
107 `(tsv/csv only) Quote character for file, e.g., '"'. Default: '"'`
108 )
109 .option(
110 "-c, --comment [string]",
111 `(tsv/csv only) Comment character for file, e.g., "#" or "''". Default: '#'`
112 )
113 .action(function(
114 organismArg,
115 xrefDataSourceArg,
116 xrefIdentifierArg,
117 desiredXrefDataSources,
118 optionsRaw
119 ) {
120 const options = _.defaults(optionsRaw, {
121 base: DEFAULT_ADDMAPPEDXREFS_BASE,
122 insertionPoint: "none",
123 format: "none",
124 timeout: TIMEOUT_DEFAULT_SECONDS
125 });
126
127 const { format, timeout: timeoutSeconds } = options;
128 // NOTE: http (internal to BridgeDb) expects milliseconds.
129 const timeout = timeoutSeconds * 1000;
130
131 if (process.env.NODE_ENV === "development") {
132 if (!process.env.MOCK_SERVER_WARNING_GIVEN) {
133 console.warn("Using development mock server.");
134 process.env.MOCK_SERVER_WARNING_GIVEN = "yes";
135 }
136 var bridgeDb = new BridgeDb({
137 baseIri: "http://localhost:4522/",
138 dataSourcesHeadersIri:
139 "http://localhost:4522/datasources_headers.txt",
140 dataSourcesMetadataIri: "http://localhost:4522/datasources.txt",
141 http: { timeout }
142 });
143 //*/
144 } else {
145 var bridgeDb = new BridgeDb({ http: { timeout } });
146 }
147
148 if (format in xrefsForFormat) {
149 xrefsForFormat[format](
150 bridgeDb,
151 organismArg,
152 xrefDataSourceArg,
153 xrefIdentifierArg,
154 desiredXrefDataSources,
155 options
156 );
157 } else {
158 throw new VError(`Unrecognized format: ${format}`);
159 }
160 })
161 .on("--help", function() {
162 console.log(`
163For organism, xrefDataSource and xrefIdentifier, you can specify either one of these:
164* the actual value to use, e.g., "Human"
165* the path to it in your data, e.g.,
166 * ".data.organism" for json
167 * "0" (column index) or "organism" (column header) for tsv or csv
168
169organism: the species for the gene or protein. For metabolites,
170 this is a dummy variable -- just enter "Human".
171
172xrefDataSource: the data source (AKA database or namespace) responsible for creating
173 the type of identifier in your data, e.g., "Entrez Gene" or "ensembl"
174
175xrefIdentifier: gene, protein or metabolite identifier, e.g., "1234" or "ENSG00000164344"
176
177desiredXrefDataSource: limit results to a single data source, e.g., "ensembl", or
178 to multiple data sources, e.g.,
179 "ensembl" "uniprot" "hgnc.symbol"
180`);
181
182 console.log(
183 helpTextDescriptions
184 .map(function({ path, formats }) {
185 const helpText = fs
186 .readFileSync(path, "utf8")
187 // We drop the shebang when displaying here as help
188 .replace(/^.*\n/, "")
189 // We also drop bash comment characters
190 .replace(/^#(\ )?/gm, "")
191 // and replace path to bridgedb with just bridgedb
192 .replace(/(\.\/)?bin\/(bridgedb)/gm, "$2");
193 return [`For format ${formats.join(" or ")}:`, helpText].join("\n");
194 })
195 .join("\n")
196 );
197 });
198};