UNPKG

4.74 kBPlain TextView Raw
1#!/usr/bin/env node
2
3var _ = require("lodash");
4var crypto = require("crypto");
5var fs = require("fs");
6var hl = require("highland");
7var npmPackage = require("../package.json");
8var ndjson = require("ndjson");
9var program = require("commander");
10var VError = require("verror");
11
12var gpml2pvjson = require("../es5/2013a/toPvjson").toPvjson;
13
14program
15 .version(npmPackage.version)
16 .description("Converts GPML (XML) to pvjson (JSON)")
17 .option(
18 "--id [string]",
19 'Specify unique ID of this pathway, e.g., "http://identifiers.org/wikipathways/WP4"'
20 )
21 .option(
22 "--pathway-version [string]",
23 'Specify version of this pathway, e.g., "90358"'
24 );
25
26// NOTE: this is in addition to the automatically generated help text
27program.on("--help", function() {
28 console.log(" Examples:");
29 console.log();
30 console.log(" Display pvjson in command line:");
31 console.log(
32 ` $ gpml2pvjson --id http://identifiers.org/wikipathways/WP554 < ./test/input/WP554_77712.gpml`
33 );
34
35 console.log(" Save pvjson to new file:");
36 console.log(
37 " $ gpml2pvjson < ./test/input/WP554_77712.gpml > ./WP554_77712.json"
38 );
39
40 console.log(" Download from WikiPathways and convert:");
41 console.log(
42 ` $ curl "http://webservice.wikipathways.org/getPathwayAs?fileType=xml&pwId=WP554&revision=77712&format=xml" | xpath "*/ns1:data/text()" | base64 --decode | gpml2pvjson --id http://identifiers.org/wikipathways/WP554 --pathway-version=77712`
43 );
44
45 console.log(" Get w/ WikiPathways API and convert:");
46 console.log(
47 ` $ wikipathways-api-client get-pathway WP4 | gpml2pvjson --id http://identifiers.org/wikipathways/WP4`
48 );
49});
50
51program.parse(process.argv);
52
53var id = program.id;
54var pathwayVersion = program.pathwayVersion;
55// NOTE If an id is not provided, the CLI generates a hash of the input to use as the id. See
56// https://bentrask.com/?q=hash://sha256/98493caa8b37eaa26343bbf73f232597a3ccda20498563327a4c3713821df892
57// This is for the CLI only; the library itself does not do this.
58var HASH_NAME = "sha256";
59
60// NOTE: some GPML files use lowercase "utf-8", so we need the ignore case flag to be set.
61var primaryDelimiterBetweenGPMLFiles = /<\?xml\ version=["']1.0["']\ encoding=["']UTF-8["']\?>/i;
62var secondaryDelimiterBetweenGPMLFiles = "</Pathway>";
63
64var source = hl(process.stdin)
65 // NOTE this splitter (the next two steps) successfully splits the stream when
66 // the XML file has the XML header. We don't want to process the XML header
67 // with CXML, so this splitter has the additional benefit of removing the
68 // header, in addition to splitting the stream.
69 .splitBy(primaryDelimiterBetweenGPMLFiles)
70 .drop(1)
71 // This splitter (next three steps) is needed to handle the case of a stream
72 // of GPML files when one or more of those files don't have the XML header.
73 // It just splits the stream at the new file boundary, without removing or
74 // otherwise changing anything, because we add the separator back in.
75 .splitBy(
76 new RegExp(
77 // TODO Do we need to replace the forward slash to make it work in the
78 // RegExp? I checked the latest versions of Chrome and FF, and they don't
79 // appear to need this, because they automatically do this, so both of
80 // the following are equivalent:
81 //secondaryDelimiterBetweenGPMLFiles.replace("/", "\\/") + "[\r\n]*$"
82 secondaryDelimiterBetweenGPMLFiles + "[\r\n]*$"
83 )
84 )
85 // NOTE: We are appending the secondary delimiter back in, because the
86 // splitter removed it, but we need to include it so CXML can process
87 // the GPML.
88 // TODO can we use intersperse or something instead of this?
89 // Problem with intersperse: we get two streams where we want just one.
90 .map(x => x + secondaryDelimiterBetweenGPMLFiles)
91 // NOTE: w/out this filter, the final value in the stream would always just be
92 // an extraneous, isolated copy of the secondary delimiter.
93 .filter(
94 x =>
95 !new RegExp("^[\r\n]*" + secondaryDelimiterBetweenGPMLFiles + "$").test(x)
96 );
97
98hl(source).map(gpml => hl([gpml])).each(function(gpmlStream) {
99 const hash = crypto.createHash(HASH_NAME);
100 hash.setEncoding("hex");
101
102 gpmlStream.observe().each(chunk => hash.update(chunk.toString()));
103
104 gpml2pvjson(gpmlStream, id)
105 .last()
106 .map(function(pvjson) {
107 if (!pvjson.pathway.id) {
108 pvjson.pathway.id = `hash://${HASH_NAME}/${hash.digest("hex")}`;
109 }
110 if (!!pathwayVersion) {
111 pvjson.pathway.pathwayVersion = pathwayVersion;
112 }
113 return pvjson;
114 })
115 .errors(function(err) {
116 console.error(err);
117 process.exit(1);
118 })
119 .pipe(ndjson.serialize())
120 .pipe(process.stdout);
121});
122
123// TODO does the process exit on its own?
124//process.exit(0);