1 | #!/usr/bin/env node
|
2 |
|
3 | var _ = require("lodash");
|
4 | var crypto = require("crypto");
|
5 | var fs = require("fs");
|
6 | var hl = require("highland");
|
7 | var npmPackage = require("../package.json");
|
8 | var ndjson = require("ndjson");
|
9 | var program = require("commander");
|
10 | var VError = require("verror");
|
11 |
|
12 | var gpml2pvjson = require("../es5/2013a/toPvjson").toPvjson;
|
13 |
|
14 | program
|
15 | .version(npmPackage.version)
|
16 | .description("Converts GPML (XML) to pvjson (JSON)")
|
17 | .option(
|
18 | "--id [string]",
|
19 | 'Specify unique ID of this pathway, e.g., "http://identifiers.org/wikipathways/WP4"'
|
20 | )
|
21 | .option(
|
22 | "--pathway-version [string]",
|
23 | 'Specify version of this pathway, e.g., "90358"'
|
24 | );
|
25 |
|
26 | // NOTE: this is in addition to the automatically generated help text
|
27 | program.on("--help", function() {
|
28 | console.log(" Examples:");
|
29 | console.log();
|
30 | console.log(" Display pvjson in command line:");
|
31 | console.log(
|
32 | ` $ gpml2pvjson --id http://identifiers.org/wikipathways/WP554 < ./test/input/WP554_77712.gpml`
|
33 | );
|
34 |
|
35 | console.log(" Save pvjson to new file:");
|
36 | console.log(
|
37 | " $ gpml2pvjson < ./test/input/WP554_77712.gpml > ./WP554_77712.json"
|
38 | );
|
39 |
|
40 | console.log(" Download from WikiPathways and convert:");
|
41 | console.log(
|
42 | ` $ curl "http://webservice.wikipathways.org/getPathwayAs?fileType=xml&pwId=WP554&revision=77712&format=xml" | xpath "*/ns1:data/text()" | base64 --decode | gpml2pvjson --id http://identifiers.org/wikipathways/WP554 --pathway-version=77712`
|
43 | );
|
44 |
|
45 | console.log(" Get w/ WikiPathways API and convert:");
|
46 | console.log(
|
47 | ` $ wikipathways-api-client get-pathway WP4 | gpml2pvjson --id http://identifiers.org/wikipathways/WP4`
|
48 | );
|
49 | });
|
50 |
|
51 | program.parse(process.argv);
|
52 |
|
53 | var id = program.id;
|
54 | var pathwayVersion = program.pathwayVersion;
|
55 | // NOTE If an id is not provided, the CLI generates a hash of the input to use as the id. See
|
56 | // https://bentrask.com/?q=hash://sha256/98493caa8b37eaa26343bbf73f232597a3ccda20498563327a4c3713821df892
|
57 | // This is for the CLI only; the library itself does not do this.
|
58 | var HASH_NAME = "sha256";
|
59 |
|
60 | // NOTE: some GPML files use lowercase "utf-8", so we need the ignore case flag to be set.
|
61 | var primaryDelimiterBetweenGPMLFiles = /<\?xml\ version=["']1.0["']\ encoding=["']UTF-8["']\?>/i;
|
62 | var secondaryDelimiterBetweenGPMLFiles = "</Pathway>";
|
63 |
|
64 | var source = hl(process.stdin)
|
65 | // NOTE this splitter (the next two steps) successfully splits the stream when
|
66 | // the XML file has the XML header. We don't want to process the XML header
|
67 | // with CXML, so this splitter has the additional benefit of removing the
|
68 | // header, in addition to splitting the stream.
|
69 | .splitBy(primaryDelimiterBetweenGPMLFiles)
|
70 | .drop(1)
|
71 | // This splitter (next three steps) is needed to handle the case of a stream
|
72 | // of GPML files when one or more of those files don't have the XML header.
|
73 | // It just splits the stream at the new file boundary, without removing or
|
74 | // otherwise changing anything, because we add the separator back in.
|
75 | .splitBy(
|
76 | new RegExp(
|
77 | // TODO Do we need to replace the forward slash to make it work in the
|
78 | // RegExp? I checked the latest versions of Chrome and FF, and they don't
|
79 | // appear to need this, because they automatically do this, so both of
|
80 | // the following are equivalent:
|
81 | //secondaryDelimiterBetweenGPMLFiles.replace("/", "\\/") + "[\r\n]*$"
|
82 | secondaryDelimiterBetweenGPMLFiles + "[\r\n]*$"
|
83 | )
|
84 | )
|
85 | // NOTE: We are appending the secondary delimiter back in, because the
|
86 | // splitter removed it, but we need to include it so CXML can process
|
87 | // the GPML.
|
88 | // TODO can we use intersperse or something instead of this?
|
89 | // Problem with intersperse: we get two streams where we want just one.
|
90 | .map(x => x + secondaryDelimiterBetweenGPMLFiles)
|
91 | // NOTE: w/out this filter, the final value in the stream would always just be
|
92 | // an extraneous, isolated copy of the secondary delimiter.
|
93 | .filter(
|
94 | x =>
|
95 | !new RegExp("^[\r\n]*" + secondaryDelimiterBetweenGPMLFiles + "$").test(x)
|
96 | );
|
97 |
|
98 | hl(source).map(gpml => hl([gpml])).each(function(gpmlStream) {
|
99 | const hash = crypto.createHash(HASH_NAME);
|
100 | hash.setEncoding("hex");
|
101 |
|
102 | gpmlStream.observe().each(chunk => hash.update(chunk.toString()));
|
103 |
|
104 | gpml2pvjson(gpmlStream, id)
|
105 | .last()
|
106 | .map(function(pvjson) {
|
107 | if (!pvjson.pathway.id) {
|
108 | pvjson.pathway.id = `hash://${HASH_NAME}/${hash.digest("hex")}`;
|
109 | }
|
110 | if (!!pathwayVersion) {
|
111 | pvjson.pathway.pathwayVersion = pathwayVersion;
|
112 | }
|
113 | return pvjson;
|
114 | })
|
115 | .errors(function(err) {
|
116 | console.error(err);
|
117 | process.exit(1);
|
118 | })
|
119 | .pipe(ndjson.serialize())
|
120 | .pipe(process.stdout);
|
121 | });
|
122 |
|
123 | // TODO does the process exit on its own?
|
124 | //process.exit(0);
|