From 50294e5c3942e0533bcf66b622e02772b693687a Mon Sep 17 00:00:00 2001 From: preecej Date: Sat, 9 Oct 2010 00:30:00 +0000 Subject: [PATCH] wrote outline for sub perform_map(); successfully added code for mapping on RiceCyc term name; added header line to output file svn path=/; revision=61 --- .../reactome_chebi_mapping.pl | 116 +++++++++++++----- 1 file changed, 85 insertions(+), 31 deletions(-) diff --git a/preecej/perl_singletons/reactome_chebi_mapping/reactome_chebi_mapping.pl b/preecej/perl_singletons/reactome_chebi_mapping/reactome_chebi_mapping.pl index 4ff66d9..5d01cd7 100755 --- a/preecej/perl_singletons/reactome_chebi_mapping/reactome_chebi_mapping.pl +++ b/preecej/perl_singletons/reactome_chebi_mapping/reactome_chebi_mapping.pl @@ -12,13 +12,13 @@ use strict; # CHEBI OBO file (preset) # Rice Reactome file (preset, provided by YuanMing Wu) # (Header) [ReactomeID] [Compound_Name] [CAS] [LIGAND] [RiceCyc] -# (Row) 923893 S-adenosyl-L-methionine 29908-03-0 C00019 S-ADENOSYLMETHIONINE ** please use a '-' (dash) symbol for any empty columns +# (Row) 923893 S-adenosyl-L-methionine 29908-03-0 C00019 S-ADENOSYLMETHIONINE ** the '-' (dash) symbol will be applied to any empty columns # # Outputs: tab-del mapping file (reactome_chebi_mapping.txt) # (Header) [ReactomeID] [CHEBI] [XREF_Type] [XREF_ID] # (Row) 923893 15414 CAS 29908-03-0 # (Row) 923893 15414 LIGAND C00019 -# (Row) 923893 15414 RiceCyc S-ADENOSYLMETHIONINE ** this would be a rare mapping occurrence; only if CAS and LIGAND mappings are not available +# (Row) 923893 15414 RiceCyc S-ADENOSYLMETHIONINE ** this would be a rare mapping occurrence # -------------------------------------------------------------------- @@ -28,7 +28,6 @@ use strict; use Bio::OntologyIO; - # -------------------------------------------------------------------- # declarations # -------------------------------------------------------------------- @@ -45,7 +44,7 @@ my %reactome_CAS; # rice reactome CAS hash my %reactome_LIGAND; # rice reactome LIGAND hash my %reactome_RiceCyc; # rice reactome RiceCyc hash -my @map_results; # successful mappings between chebi and reactome +my @map_results = (); # successful mappings between chebi and reactome # -------------------------------------------------------------------- @@ -97,7 +96,7 @@ sub init } # similarly... - + # --LIGAND Hash Load-- if ($LIGAND_id ne "-") { @@ -107,10 +106,9 @@ sub init # --RiceCyc Hash Load-- if ($RiceCyc_term ne "-") { - push @{$reactome_RiceCyc{$RiceCyc_term}}, $reactome_id; + push @{$reactome_RiceCyc{"\U$RiceCyc_term"}}, $reactome_id; } } - close REACTOME_FILE; } @@ -127,58 +125,114 @@ sub test_inputs scalar($ont->get_leaf_terms)," leaf terms\n"; # all chebi terms in the ontology - #print "\n[CHEBI Term List from \$ont]\n"; - #foreach my $term ($ont->get_all_terms()) { - #print $term->identifier() . "\n"; - #} + print "\n[CHEBI Term List from \$ont]\n"; + foreach my $term ($ont->get_all_terms) { + my @synonyms = $term->get_synonyms; + my @xrefs = $term->get_dbxrefs; + + print $term->identifier; + print " \|NAME\| "; + if (defined($term->name)) { + print $term->name; + } + print " \|SYNONYMS\| "; + print "$_," foreach @synonyms; + print " \|XREFS\| "; + print "$_" foreach @xrefs; + foreach my $xref (@xrefs) { + print $xref->primary_id; + } + print "\n\n"; + } - # show reactome hashes + # show reactome hashes - this is important, give >1 dupes to Pankaj + # for manual reference my $k; my @v; print "\n[Reactome Hashes]\n"; print "\n--CAS Hash--\n"; for $k (keys %reactome_CAS) { - print "$k: @{$reactome_CAS{$k}}\n"; + #if (@{$reactome_CAS{$k}} > 1) { + print "$k: @{$reactome_CAS{$k}}\n"; + #} } print "\n--LIGAND Hash--\n"; for $k (keys %reactome_LIGAND) { - print "$k: @{$reactome_LIGAND{$k}}\n"; + #if (@{$reactome_LIGAND{$k}} > 1) { + print "$k: @{$reactome_LIGAND{$k}}\n"; + #} } print "\n--RiceCyc Hash--\n"; for $k (keys %reactome_RiceCyc) { - print "$k: @{$reactome_RiceCyc{$k}}\n"; + #if (@{$reactome_RiceCyc{$k}} > 1) { + print "$k: @{$reactome_RiceCyc{$k}}\n"; + #} } - } -# map the chebi terms to the reactome entries (brute-force algo) +# map the chebi terms to the reactome entries # -------------------------------------------------------------------- sub perform_map { - # iterate ontology; use brute-force matching to map - my @chebi_obo_terms = $ont->get_all_terms(); + my @chebi_obo_terms = $ont->get_all_terms; + #print $_->identifier . "\n" foreach @chebi_obo_terms; - #print $_->identifier() . "\n" foreach @chebi_obo_terms; + # loop through each chebi term + foreach my $term (@chebi_obo_terms) + { + # set locals for matching each term property + my $term_name; + if (defined($term->name)) { + $term_name = $term->name; + } else { + $term_name = ""; + } + my @term_synonyms = $term->get_synonyms; - # --pseudo-- - # loop through each chebi term (~33K) - # if match chebi to reactome hashes on CAS, LIGAND, and/or RiceCyc name; - # write tab-del mapping string directly to results array for EACH type of match + # attempt CHEBI match on CAS ID + + # attempt CHEBI match on LIGAND ID + # attempt CHEBI match on RiceCyc names + if (defined($reactome_RiceCyc{"\U$term_name"})) { + push (@map_results, "$reactome_RiceCyc{$term_name}\t", + "$term->identifier\t", + "RiceCyc\t", + $term_name); + } else { # check the term synonyms, if needed + foreach my $synonym (@term_synonyms) { + print ""; + } + } + } } +# sample format - remove later +# [ReactomeID] [CHEBI] [XREF_Type] [XREF_ID] +# 923893 15414 CAS 29908-03-0 +# 923893 15414 LIGAND C00019 +# 923893 15414 RiceCyc S-ADENOSYLMETHIONINE + # put the results in the mapped output file # -------------------------------------------------------------------- sub create_mapfile { - # setup output file - open(OUTPUT_FILE,">>" . $data_path . $mapped_output_file); - - #format results for file output - print OUTPUT_FILE "$_\n" foreach @map_results; + if (@map_results > 0) + { + # add a header to the results array + unshift (@map_results, "ReactomeID\tCHEBI\tXREF_Type\tXREF_ID"); + + # setup output file + open(OUTPUT_FILE,">>" . $data_path . $mapped_output_file); - close OUTPUT_FILE; + #format results for file output + print OUTPUT_FILE "$_\n" foreach @map_results; + + close OUTPUT_FILE; + } else { + print "\n\nSorry, there are no mapped results.\n\n"; + } } @@ -187,7 +241,7 @@ sub create_mapfile # -------------------------------------------------------------------- init; -test_inputs; +#test_inputs; perform_map; create_mapfile; -- 2.34.1