1 | #! /bin/bash
|
2 |
|
3 | GPML_SOURCE_DIR="./wikipathways-20180210-gpml-Homo_sapiens";
|
4 | MAPPED_DIR="./mapped";
|
5 |
|
6 | if [ ! -d "$MAPPED_DIR" ]; then
|
7 | mkdir "$MAPPED_DIR";
|
8 | fi
|
9 |
|
10 | if [ ! -d "$GPML_SOURCE_DIR" ]; then
|
11 | wget "http://data.wikipathways.org/current/gpml/$GPML_SOURCE_DIR.zip";
|
12 | unzip "$GPML_SOURCE_DIR.zip" -d "$GPML_SOURCE_DIR";
|
13 | rm "$GPML_SOURCE_DIR.zip";
|
14 | fi
|
15 |
|
16 | for f in $(ls $GPML_SOURCE_DIR | grep "WP.*.gpml"); do
|
17 |
|
18 | gpml="$GPML_SOURCE_DIR/$f"
|
19 | organism=$(xpath "$gpml" "//@Organism" | awk '{gsub(/\ *[a-zA-Z][a-zA-Z0-9]*="/ , ""); gsub(/"\ */, ""); print $0}');
|
20 | xpath $gpml "//Xref/@*[name()='Database' or name()='ID']" |\
|
21 | awk '{gsub(/["\ ]*Database="/, "\n"); gsub (/"\ *[a-zA-Z][a-zA-Z0-9]*="/, "\t"); gsub(/"\ */, ""); print $0}' |\
|
22 | awk 'NF' |\
|
23 | ./bin/bridgedb xrefs -f "tsv" "$organism" 0 1 chembl.compound chebi wikidata ensembl ncbigene \
|
24 | > "$MAPPED_DIR/$f.gmt.tsv"
|
25 | done
|