--- /dev/null
+#!/usr/bin/perl -w
+use strict;
+
+# SVN test from jedit on Mac
+
+# --------------------------------------------------------------------
+# Rice Reactome - CHEBI Ontology Mapping Script
+#
+# Justin Preece, 10/06/10
+#
+# Purpose: Map CHEBI ontology terms onto Rice Reactome database.
+#
+# Inputs:
+# CHEBI OBO file (preset)
+# Rice Reactome file (preset, provided by YuanMing Wu)
+# (Header) [ReactomeID] [Compound_Name] [CAS] [LIGAND] [RiceCyc]
+# (Row) 923893 S-adenosyl-L-methionine 29908-03-0 C00019 S-ADENOSYLMETHIONINE ** the '-' (dash) symbol will be applied to any empty columns
+#
+# Outputs: tab-del mapping file (reactome_chebi_mapping.txt)
+# (Header) [ReactomeID] [CHEBI] [XREF_Type] [XREF_ID]
+# (Row) 923893 15414 CAS 29908-03-0
+# (Row) 923893 15414 LIGAND C00019
+# (Row) 923893 15414 RiceCyc S-ADENOSYLMETHIONINE ** this would be a rare mapping occurrence
+# --------------------------------------------------------------------
+
+
+# --------------------------------------------------------------------
+# modules
+# --------------------------------------------------------------------
+
+use Bio::OntologyIO;
+
+# --------------------------------------------------------------------
+# declarations
+# --------------------------------------------------------------------
+
+# set paths to data files
+my $data_path = "/home/preecej/Documents/Projects/Reactome/";
+my $chebi_obo_file = "chebi_sample.obo";
+my $reactome_file = "RiceReferenceMolecules_sample.txt";
+my $mapped_output_file = "reactome_chebi_mapping.txt";
+
+my $ont; # chebi ontology
+
+my %reactome_CAS; # rice reactome CAS hash
+my %reactome_LIGAND; # rice reactome LIGAND hash
+my %reactome_RiceCyc; # rice reactome RiceCyc hash
+
+my @map_results = (); # successful mappings between chebi and reactome
+
+
+# --------------------------------------------------------------------
+# functions
+# --------------------------------------------------------------------
+
+
+# setup chebi parser and reactome data
+# --------------------------------------------------------------------
+sub init
+{
+ # init ontology parser
+ my $parser = Bio::OntologyIO->new (
+ -format => "obo",
+ -file => $data_path . $chebi_obo_file);
+
+ # init ontology
+ $ont = $parser->next_ontology();
+ $parser->close();
+
+ # read rice reactome file into 3 separate hashes
+ open(REACTOME_FILE,$data_path . $reactome_file);
+
+ my $line = <REACTOME_FILE>; # skip the header
+
+ while (<REACTOME_FILE>)
+ {
+ $line = $_;
+ chomp $line;
+ my @reactome_entry = split(/\t/, $line); # break up our tab-del line
+
+ # load up this reactome entry's ID, CAS, LIGAND, and RiceCyc values
+ my $reactome_id = $reactome_entry[0];
+ my $CAS_id = $reactome_entry[2];
+ my $LIGAND_id = $reactome_entry[3];
+ my $RiceCyc_term = $reactome_entry[4];
+
+ # There is a possibility that a single CAS, LIGAND, or RiceCyc
+ # identifier may appear in more than one reactome entry. This
+ # temp array allows each matched hash value to hold more than
+ # one ReactomeID, if necessary.
+
+ # --CAS Hash Load--
+ if ($CAS_id ne "-") # keep those "-" placeholders out
+ {
+ # build the CAS hash; each value may hold 1...n reactome
+ # ids (as an array)
+ push @{$reactome_CAS{$CAS_id}}, $reactome_id;
+ }
+
+ # similarly...
+
+ # --LIGAND Hash Load--
+ if ($LIGAND_id ne "-")
+ {
+ push @{$reactome_LIGAND{$LIGAND_id}}, $reactome_id;
+ }
+
+ # --RiceCyc Hash Load--
+ if ($RiceCyc_term ne "-")
+ {
+ push @{$reactome_RiceCyc{"\U$RiceCyc_term"}}, $reactome_id;
+ }
+ }
+ close REACTOME_FILE;
+}
+
+
+# spit out some data to make sure you've read in the files correctly
+# --------------------------------------------------------------------
+sub test_inputs
+{
+ # output basic stats on chebi ontology
+ print "\n[Ontology Stats]\n";
+ print "read ontology ",$ont->name()," with ",
+ scalar($ont->get_root_terms)," root terms, and ",
+ scalar($ont->get_all_terms)," total terms, and ",
+ scalar($ont->get_leaf_terms)," leaf terms\n";
+
+ # all chebi terms in the ontology
+ print "\n[CHEBI Term List from \$ont]\n";
+ foreach my $term ($ont->get_all_terms) {
+ my @synonyms = $term->get_synonyms;
+ my @xrefs = $term->get_dbxrefs;
+
+ print $term->identifier;
+ print " \|NAME\| ";
+ if (defined($term->name)) {
+ print $term->name;
+ }
+ print " \|SYNONYMS\| ";
+ print "$_," foreach @synonyms;
+ print " \|XREFS\| ";
+ print "$_" foreach @xrefs;
+ foreach my $xref (@xrefs) {
+ print $xref->primary_id;
+ }
+ print "\n\n";
+ }
+
+ # show reactome hashes - this is important, give >1 dupes to Pankaj
+ # for manual reference
+ my $k; my @v;
+ print "\n[Reactome Hashes]\n";
+ print "\n--CAS Hash--\n";
+ for $k (keys %reactome_CAS) {
+ #if (@{$reactome_CAS{$k}} > 1) {
+ print "$k: @{$reactome_CAS{$k}}\n";
+ #}
+ }
+ print "\n--LIGAND Hash--\n";
+ for $k (keys %reactome_LIGAND) {
+ #if (@{$reactome_LIGAND{$k}} > 1) {
+ print "$k: @{$reactome_LIGAND{$k}}\n";
+ #}
+ }
+ print "\n--RiceCyc Hash--\n";
+ for $k (keys %reactome_RiceCyc) {
+ #if (@{$reactome_RiceCyc{$k}} > 1) {
+ print "$k: @{$reactome_RiceCyc{$k}}\n";
+ #}
+ }
+}
+
+
+# map the chebi terms to the reactome entries
+# --------------------------------------------------------------------
+sub perform_map
+{
+ my @chebi_obo_terms = $ont->get_all_terms;
+ #print $_->identifier . "\n" foreach @chebi_obo_terms;
+
+ # loop through each chebi term
+ foreach my $term (@chebi_obo_terms)
+ {
+ # set locals for matching each term property
+ my $term_name;
+ if (defined($term->name)) {
+ $term_name = $term->name;
+ } else {
+ $term_name = "";
+ }
+ my @term_synonyms = $term->get_synonyms;
+
+ # attempt CHEBI match on CAS ID
+
+ # attempt CHEBI match on LIGAND ID
+
+ # attempt CHEBI match on RiceCyc names
+ if (defined($reactome_RiceCyc{"\U$term_name"})) {
+ push (@map_results, "$reactome_RiceCyc{$term_name}\t",
+ "$term->identifier\t",
+ "RiceCyc\t",
+ $term_name);
+ } else { # check the term synonyms, if needed
+ foreach my $synonym (@term_synonyms) {
+ print "";
+ }
+ }
+ }
+}
+
+# sample format - remove later
+# [ReactomeID] [CHEBI] [XREF_Type] [XREF_ID]
+# 923893 15414 CAS 29908-03-0
+# 923893 15414 LIGAND C00019
+# 923893 15414 RiceCyc S-ADENOSYLMETHIONINE
+
+
+# put the results in the mapped output file
+# --------------------------------------------------------------------
+sub create_mapfile
+{
+ if (@map_results > 0)
+ {
+ # add a header to the results array
+ unshift (@map_results, "ReactomeID\tCHEBI\tXREF_Type\tXREF_ID");
+
+ # setup output file
+ open(OUTPUT_FILE,">>" . $data_path . $mapped_output_file);
+
+ #format results for file output
+ print OUTPUT_FILE "$_\n" foreach @map_results;
+
+ close OUTPUT_FILE;
+ } else {
+ print "\n\nSorry, there are no mapped results.\n\n";
+ }
+}
+
+
+# --------------------------------------------------------------------
+# main
+# --------------------------------------------------------------------
+
+init;
+#test_inputs;
+perform_map;
+create_mapfile;
+
+exit;
--- /dev/null
+#!/usr/bin/perl -w
+use strict;
+
+# --------------------------------------------------------------------
+# Rice Reactome - CHEBI Ontology Mapping Script
+#
+# Justin Preece, 10/06/10
+#
+# Purpose: Map CHEBI ontology terms onto Rice Reactome database.
+#
+# Inputs:
+# CHEBI OBO file (preset)
+# Rice Reactome file (preset, provided by YuanMing Wu)
+# (Header) [ReactomeID] [Compound_Name] [CAS] [LIGAND] [RiceCyc]
+# (Row) 923893 S-adenosyl-L-methionine 29908-03-0 C00019 S-ADENOSYLMETHIONINE ** the '-' (dash) symbol will be applied to any empty columns
+#
+# Outputs: tab-del mapping file (reactome_chebi_mapping.txt)
+# (Header) [ReactomeID] [CHEBI] [XREF_Type] [XREF_ID]
+# (Row) 923893 15414 CAS 29908-03-0
+# (Row) 923893 15414 LIGAND C00019
+# (Row) 923893 15414 RiceCyc S-ADENOSYLMETHIONINE ** this would be a rare mapping occurrence
+# --------------------------------------------------------------------
+
+
+# --------------------------------------------------------------------
+# modules
+# --------------------------------------------------------------------
+
+use Bio::OntologyIO;
+
+# --------------------------------------------------------------------
+# declarations
+# --------------------------------------------------------------------
+
+# set paths to data files
+my $data_path = "/home/preecej/Documents/Projects/Reactome/";
+my $chebi_obo_file = "chebi_sample.obo";
+my $reactome_file = "RiceReferenceMolecules_sample.txt";
+my $mapped_output_file = "reactome_chebi_mapping.txt";
+
+my $ont; # chebi ontology
+
+my %reactome_CAS; # rice reactome CAS hash
+my %reactome_LIGAND; # rice reactome LIGAND hash
+my %reactome_RiceCyc; # rice reactome RiceCyc hash
+
+my @map_results = (); # successful mappings between chebi and reactome
+
+
+# --------------------------------------------------------------------
+# functions
+# --------------------------------------------------------------------
+
+
+# setup chebi parser and reactome data
+# --------------------------------------------------------------------
+sub init
+{
+ # init ontology parser
+ my $parser = Bio::OntologyIO->new (
+ -format => "obo",
+ -file => $data_path . $chebi_obo_file);
+
+ # init ontology
+ $ont = $parser->next_ontology();
+ $parser->close();
+
+ # read rice reactome file into 3 separate hashes
+ open(REACTOME_FILE,$data_path . $reactome_file);
+
+ my $line = <REACTOME_FILE>; # skip the header
+
+ while (<REACTOME_FILE>)
+ {
+ $line = $_;
+ chomp $line;
+ my @reactome_entry = split(/\t/, $line); # break up our tab-del line
+
+ # load up this reactome entry's ID, CAS, LIGAND, and RiceCyc values
+ my $reactome_id = $reactome_entry[0];
+ my $CAS_id = $reactome_entry[2];
+ my $LIGAND_id = $reactome_entry[3];
+ my $RiceCyc_term = $reactome_entry[4];
+
+ # There is a possibility that a single CAS, LIGAND, or RiceCyc
+ # identifier may appear in more than one reactome entry. This
+ # temp array allows each matched hash value to hold more than
+ # one ReactomeID, if necessary.
+
+ # --CAS Hash Load--
+ if ($CAS_id ne "-") # keep those "-" placeholders out
+ {
+ # build the CAS hash; each value may hold 1...n reactome
+ # ids (as an array)
+ push @{$reactome_CAS{$CAS_id}}, $reactome_id;
+ }
+
+ # similarly...
+
+ # --LIGAND Hash Load--
+ if ($LIGAND_id ne "-")
+ {
+ push @{$reactome_LIGAND{$LIGAND_id}}, $reactome_id;
+ }
+
+ # --RiceCyc Hash Load--
+ if ($RiceCyc_term ne "-")
+ {
+ push @{$reactome_RiceCyc{"\U$RiceCyc_term"}}, $reactome_id;
+ }
+ }
+ close REACTOME_FILE;
+}
+
+
+# spit out some data to make sure you've read in the files correctly
+# --------------------------------------------------------------------
+sub test_inputs
+{
+ # output basic stats on chebi ontology
+ print "\n[Ontology Stats]\n";
+ print "read ontology ",$ont->name()," with ",
+ scalar($ont->get_root_terms)," root terms, and ",
+ scalar($ont->get_all_terms)," total terms, and ",
+ scalar($ont->get_leaf_terms)," leaf terms\n";
+
+ # all chebi terms in the ontology
+ print "\n[CHEBI Term List from \$ont]\n";
+ foreach my $term ($ont->get_all_terms) {
+ my @synonyms = $term->get_synonyms;
+ my @xrefs = $term->get_dbxrefs;
+
+ print $term->identifier;
+ print " \|NAME\| ";
+ if (defined($term->name)) {
+ print $term->name;
+ }
+ print " \|SYNONYMS\| ";
+ print "$_," foreach @synonyms;
+ print " \|XREFS\| ";
+ print "$_" foreach @xrefs;
+ foreach my $xref (@xrefs) {
+ print $xref->primary_id;
+ }
+ print "\n\n";
+ }
+
+ # show reactome hashes - this is important, give >1 dupes to Pankaj
+ # for manual reference
+ my $k; my @v;
+ print "\n[Reactome Hashes]\n";
+ print "\n--CAS Hash--\n";
+ for $k (keys %reactome_CAS) {
+ #if (@{$reactome_CAS{$k}} > 1) {
+ print "$k: @{$reactome_CAS{$k}}\n";
+ #}
+ }
+ print "\n--LIGAND Hash--\n";
+ for $k (keys %reactome_LIGAND) {
+ #if (@{$reactome_LIGAND{$k}} > 1) {
+ print "$k: @{$reactome_LIGAND{$k}}\n";
+ #}
+ }
+ print "\n--RiceCyc Hash--\n";
+ for $k (keys %reactome_RiceCyc) {
+ #if (@{$reactome_RiceCyc{$k}} > 1) {
+ print "$k: @{$reactome_RiceCyc{$k}}\n";
+ #}
+ }
+}
+
+
+# map the chebi terms to the reactome entries
+# --------------------------------------------------------------------
+sub perform_map
+{
+ my @chebi_obo_terms = $ont->get_all_terms;
+ #print $_->identifier . "\n" foreach @chebi_obo_terms;
+
+ # loop through each chebi term
+ foreach my $term (@chebi_obo_terms)
+ {
+ # set locals for matching each term property
+ my $term_name;
+ if (defined($term->name)) {
+ $term_name = $term->name;
+ } else {
+ $term_name = "";
+ }
+ my @term_synonyms = $term->get_synonyms;
+
+ # attempt CHEBI match on CAS ID
+
+ # attempt CHEBI match on LIGAND ID
+
+ # attempt CHEBI match on RiceCyc names
+ if (defined($reactome_RiceCyc{"\U$term_name"})) {
+ push (@map_results, "$reactome_RiceCyc{$term_name}\t",
+ "$term->identifier\t",
+ "RiceCyc\t",
+ $term_name);
+ } else { # check the term synonyms, if needed
+ foreach my $synonym (@term_synonyms) {
+ print "";
+ }
+ }
+ }
+}
+
+# sample format - remove later
+# [ReactomeID] [CHEBI] [XREF_Type] [XREF_ID]
+# 923893 15414 CAS 29908-03-0
+# 923893 15414 LIGAND C00019
+# 923893 15414 RiceCyc S-ADENOSYLMETHIONINE
+
+
+# put the results in the mapped output file
+# --------------------------------------------------------------------
+sub create_mapfile
+{
+ if (@map_results > 0)
+ {
+ # add a header to the results array
+ unshift (@map_results, "ReactomeID\tCHEBI\tXREF_Type\tXREF_ID");
+
+ # setup output file
+ open(OUTPUT_FILE,">>" . $data_path . $mapped_output_file);
+
+ #format results for file output
+ print OUTPUT_FILE "$_\n" foreach @map_results;
+
+ close OUTPUT_FILE;
+ } else {
+ print "\n\nSorry, there are no mapped results.\n\n";
+ }
+}
+
+
+# --------------------------------------------------------------------
+# main
+# --------------------------------------------------------------------
+
+init;
+#test_inputs;
+perform_map;
+create_mapfile;
+
+exit;