#!/usr/bin/env node var _ = require("lodash"); var crypto = require("crypto"); var fs = require("fs"); var hl = require("highland"); var npmPackage = require("../package.json"); var ndjson = require("ndjson"); var program = require("commander"); var VError = require("verror"); var gpml2pvjson = require("../lib/2013a/toPvjson").toPvjson; program .version(npmPackage.version) .description("Converts GPML (XML) to pvjson (JSON)") .option( "--id [string]", 'Specify unique ID of this pathway, e.g., "http://identifiers.org/wikipathways/WP4"' ) .option( "--pathway-version [string]", 'Specify version of this pathway, e.g., "90358"' ); // NOTE: this is in addition to the automatically generated help text program.on("--help", function() { console.log(" Examples:"); console.log(); console.log(" Display pvjson in command line:"); console.log( ` $ gpml2pvjson --id http://identifiers.org/wikipathways/WP554 < ./test/input/WP554_77712.gpml` ); console.log(" Save pvjson to new file:"); console.log( " $ gpml2pvjson < ./test/input/WP554_77712.gpml > ./WP554_77712.json" ); console.log(" Download from WikiPathways and convert:"); console.log( ` $ curl "http://webservice.wikipathways.org/getPathwayAs?fileType=xml&pwId=WP554&revision=77712&format=xml" | xpath "*/ns1:data/text()" | base64 --decode | gpml2pvjson --id http://identifiers.org/wikipathways/WP554 --pathway-version=77712` ); console.log(" Get w/ WikiPathways API and convert:"); console.log( ` $ wikipathways-api-client get-pathway WP4 | gpml2pvjson --id http://identifiers.org/wikipathways/WP4` ); }); program.parse(process.argv); var id = program.id; var pathwayVersion = program.pathwayVersion; // NOTE If an id is not provided, the CLI generates a hash of the input to use as the id. See // https://bentrask.com/?q=hash://sha256/98493caa8b37eaa26343bbf73f232597a3ccda20498563327a4c3713821df892 // This is for the CLI only; the library itself does not do this. var HASH_NAME = "sha256"; // NOTE: some GPML files use lowercase "utf-8", so we need the ignore case flag to be set. var primaryDelimiterBetweenGPMLFiles = /<\?xml\ version=["']1.0["']\ encoding=["']UTF-8["']\?>/i; var secondaryDelimiterBetweenGPMLFiles = ""; var source = hl(process.stdin) // NOTE this splitter (the next two steps) successfully splits the stream when // the XML file has the XML header. We don't want to process the XML header // with CXML, so this splitter has the additional benefit of removing the // header, in addition to splitting the stream. .splitBy(primaryDelimiterBetweenGPMLFiles) .drop(1) // This splitter (next three steps) is needed to handle the case of a stream // of GPML files when one or more of those files don't have the XML header. // It just splits the stream at the new file boundary, without removing or // otherwise changing anything, because we add the separator back in. .splitBy( new RegExp( // TODO Do we need to replace the forward slash to make it work in the // RegExp? I checked the latest versions of Chrome and FF, and they don't // appear to need this, because they automatically do this, so both of // the following are equivalent: //secondaryDelimiterBetweenGPMLFiles.replace("/", "\\/") + "[\r\n]*$" secondaryDelimiterBetweenGPMLFiles + "[\r\n]*$" ) ) // NOTE: We are appending the secondary delimiter back in, because the // splitter removed it, but we need to include it so CXML can process // the GPML. // TODO can we use intersperse or something instead of this? // Problem with intersperse: we get two streams where we want just one. .map(x => x + secondaryDelimiterBetweenGPMLFiles) // NOTE: w/out this filter, the final value in the stream would always just be // an extraneous, isolated copy of the secondary delimiter. .filter( x => !new RegExp("^[\r\n]*" + secondaryDelimiterBetweenGPMLFiles + "$").test(x) ); hl(source).map(gpml => hl([gpml])).each(function(gpmlStream) { const hash = crypto.createHash(HASH_NAME); hash.setEncoding("hex"); gpmlStream.observe().each(chunk => hash.update(chunk.toString())); gpml2pvjson(gpmlStream, id) .last() .map(function(pvjson) { if (!pvjson.pathway.id) { pvjson.pathway.id = `hash://${HASH_NAME}/${hash.digest("hex")}`; } if (!!pathwayVersion) { pvjson.pathway.pathwayVersion = pathwayVersion; } return pvjson; }) .errors(function(err) { console.error(err); process.exit(1); }) .pipe(ndjson.serialize()) .pipe(process.stdout); }); // TODO does the process exit on its own? //process.exit(0);