# CHEBI OBO file (preset)
# Rice Reactome file (preset, provided by YuanMing Wu)
# (Header) [ReactomeID] [Compound_Name] [CAS] [LIGAND] [RiceCyc]
-# (Row) 923893 S-adenosyl-L-methionine 29908-03-0 C00019 S-ADENOSYLMETHIONINE ** please use a '-' (dash) symbol for any empty columns
+# (Row) 923893 S-adenosyl-L-methionine 29908-03-0 C00019 S-ADENOSYLMETHIONINE ** the '-' (dash) symbol will be applied to any empty columns
#
# Outputs: tab-del mapping file (reactome_chebi_mapping.txt)
# (Header) [ReactomeID] [CHEBI] [XREF_Type] [XREF_ID]
# (Row) 923893 15414 CAS 29908-03-0
# (Row) 923893 15414 LIGAND C00019
-# (Row) 923893 15414 RiceCyc S-ADENOSYLMETHIONINE ** this would be a rare mapping occurrence; only if CAS and LIGAND mappings are not available
+# (Row) 923893 15414 RiceCyc S-ADENOSYLMETHIONINE ** this would be a rare mapping occurrence
# --------------------------------------------------------------------
use Bio::OntologyIO;
-
# --------------------------------------------------------------------
# declarations
# --------------------------------------------------------------------
my %reactome_LIGAND; # rice reactome LIGAND hash
my %reactome_RiceCyc; # rice reactome RiceCyc hash
-my @map_results; # successful mappings between chebi and reactome
+my @map_results = (); # successful mappings between chebi and reactome
# --------------------------------------------------------------------
}
# similarly...
-
+
# --LIGAND Hash Load--
if ($LIGAND_id ne "-")
{
# --RiceCyc Hash Load--
if ($RiceCyc_term ne "-")
{
- push @{$reactome_RiceCyc{$RiceCyc_term}}, $reactome_id;
+ push @{$reactome_RiceCyc{"\U$RiceCyc_term"}}, $reactome_id;
}
}
-
close REACTOME_FILE;
}
scalar($ont->get_leaf_terms)," leaf terms\n";
# all chebi terms in the ontology
- #print "\n[CHEBI Term List from \$ont]\n";
- #foreach my $term ($ont->get_all_terms()) {
- #print $term->identifier() . "\n";
- #}
+ print "\n[CHEBI Term List from \$ont]\n";
+ foreach my $term ($ont->get_all_terms) {
+ my @synonyms = $term->get_synonyms;
+ my @xrefs = $term->get_dbxrefs;
+
+ print $term->identifier;
+ print " \|NAME\| ";
+ if (defined($term->name)) {
+ print $term->name;
+ }
+ print " \|SYNONYMS\| ";
+ print "$_," foreach @synonyms;
+ print " \|XREFS\| ";
+ print "$_" foreach @xrefs;
+ foreach my $xref (@xrefs) {
+ print $xref->primary_id;
+ }
+ print "\n\n";
+ }
- # show reactome hashes
+ # show reactome hashes - this is important, give >1 dupes to Pankaj
+ # for manual reference
my $k; my @v;
print "\n[Reactome Hashes]\n";
print "\n--CAS Hash--\n";
for $k (keys %reactome_CAS) {
- print "$k: @{$reactome_CAS{$k}}\n";
+ #if (@{$reactome_CAS{$k}} > 1) {
+ print "$k: @{$reactome_CAS{$k}}\n";
+ #}
}
print "\n--LIGAND Hash--\n";
for $k (keys %reactome_LIGAND) {
- print "$k: @{$reactome_LIGAND{$k}}\n";
+ #if (@{$reactome_LIGAND{$k}} > 1) {
+ print "$k: @{$reactome_LIGAND{$k}}\n";
+ #}
}
print "\n--RiceCyc Hash--\n";
for $k (keys %reactome_RiceCyc) {
- print "$k: @{$reactome_RiceCyc{$k}}\n";
+ #if (@{$reactome_RiceCyc{$k}} > 1) {
+ print "$k: @{$reactome_RiceCyc{$k}}\n";
+ #}
}
-
}
-# map the chebi terms to the reactome entries (brute-force algo)
+# map the chebi terms to the reactome entries
# --------------------------------------------------------------------
sub perform_map
{
- # iterate ontology; use brute-force matching to map
- my @chebi_obo_terms = $ont->get_all_terms();
+ my @chebi_obo_terms = $ont->get_all_terms;
+ #print $_->identifier . "\n" foreach @chebi_obo_terms;
- #print $_->identifier() . "\n" foreach @chebi_obo_terms;
+ # loop through each chebi term
+ foreach my $term (@chebi_obo_terms)
+ {
+ # set locals for matching each term property
+ my $term_name;
+ if (defined($term->name)) {
+ $term_name = $term->name;
+ } else {
+ $term_name = "";
+ }
+ my @term_synonyms = $term->get_synonyms;
- # --pseudo--
- # loop through each chebi term (~33K)
- # if match chebi to reactome hashes on CAS, LIGAND, and/or RiceCyc name;
- # write tab-del mapping string directly to results array for EACH type of match
+ # attempt CHEBI match on CAS ID
+
+ # attempt CHEBI match on LIGAND ID
+ # attempt CHEBI match on RiceCyc names
+ if (defined($reactome_RiceCyc{"\U$term_name"})) {
+ push (@map_results, "$reactome_RiceCyc{$term_name}\t",
+ "$term->identifier\t",
+ "RiceCyc\t",
+ $term_name);
+ } else { # check the term synonyms, if needed
+ foreach my $synonym (@term_synonyms) {
+ print "";
+ }
+ }
+ }
}
+# sample format - remove later
+# [ReactomeID] [CHEBI] [XREF_Type] [XREF_ID]
+# 923893 15414 CAS 29908-03-0
+# 923893 15414 LIGAND C00019
+# 923893 15414 RiceCyc S-ADENOSYLMETHIONINE
+
# put the results in the mapped output file
# --------------------------------------------------------------------
sub create_mapfile
{
- # setup output file
- open(OUTPUT_FILE,">>" . $data_path . $mapped_output_file);
-
- #format results for file output
- print OUTPUT_FILE "$_\n" foreach @map_results;
+ if (@map_results > 0)
+ {
+ # add a header to the results array
+ unshift (@map_results, "ReactomeID\tCHEBI\tXREF_Type\tXREF_ID");
+
+ # setup output file
+ open(OUTPUT_FILE,">>" . $data_path . $mapped_output_file);
- close OUTPUT_FILE;
+ #format results for file output
+ print OUTPUT_FILE "$_\n" foreach @map_results;
+
+ close OUTPUT_FILE;
+ } else {
+ print "\n\nSorry, there are no mapped results.\n\n";
+ }
}
# --------------------------------------------------------------------
init;
-test_inputs;
+#test_inputs;
perform_map;
create_mapfile;