# (Row) 923893 15414 RiceCyc S-ADENOSYLMETHIONINE ** this would be a rare mapping occurrence; only if CAS and LIGAND mappings are not available
# --------------------------------------------------------------------
+
# --------------------------------------------------------------------
# modules
# --------------------------------------------------------------------
use Bio::OntologyIO;
+
# --------------------------------------------------------------------
# declarations
# --------------------------------------------------------------------
my $reactome_file = "RiceReferenceMolecules_sample.txt";
my $mapped_output_file = "reactome_chebi_mapping.txt";
-my $parser; # chebi ontology parser
-my @reactome_ref_molecules; # rice reactomes
+my $ont; # chebi ontology
+
+my %reactome_CAS; # rice reactome CAS hash
+my %reactome_LIGAND; # rice reactome LIGAND hash
+my %reactome_RiceCyc; # rice reactome RiceCyc hash
+
+my @map_results; # successful mappings between chebi and reactome
+
# --------------------------------------------------------------------
# functions
# --------------------------------------------------------------------
+
# setup chebi parser and reactome data
# --------------------------------------------------------------------
sub init
{
- # initialize ontology parser
- $parser = Bio::OntologyIO->new (
+ # init ontology parser
+ my $parser = Bio::OntologyIO->new (
-format => "obo",
-file => $data_path . $chebi_obo_file);
+
+ # init ontology
+ $ont = $parser->next_ontology();
+ $parser->close();
+
+ # read rice reactome file into 3 separate hashes
+ open(REACTOME_FILE,$data_path . $reactome_file);
+
+ my $line = <REACTOME_FILE>; # skip the header
- # read chebi file into array
- #open(CHEBI_OBO_FILE,$data_path . $chebi_obo_file);
- #my @chebi_obo_terms = <CHEBI_OBO_FILE>;
- #chomp(@chebi_obo_terms);
- #close CHEBI_OBO_FILE;
+ while (<REACTOME_FILE>)
+ {
+ $line = $_;
+ chomp $line;
+ my @reactome_entry = split(/\t/, $line); # break up our tab-del line
+
+ # There is a possibility that a single CAS, LIGAND, or RiceCyc
+ # identifier may appear in more than one reactome entry. This
+ # temp array allows each hash match to hold more than
+ # ReactomeID, if necessary.
+ my @temp_ary = ();
+
+ # --CAS Hash Load--
+ # if this reactome entry is already in the hash, just
+ # append it to the hash's array
+ if(defined($reactome_CAS{$reactome_entry[2]}))
+ {
+ $reactome_CAS{push(@_,$reactome_entry[2])} = $reactome_entry[0];
+ # otherwise, just add the reactome entry to the hash
+ } else {
+ @temp_ary = (); # clear it for re-use
+ if ($reactome_entry[2] != "-") # keep those "-" placeholders out
+ {
+ $reactome_CAS{$temp_ary[$reactome_entry[2]]}
+ = $reactome_entry[0];
+ }
+ }
+ }
+
+ close REACTOME_FILE;
# read rice reactome file into array
- open(REACTOME_FILE,$data_path . $reactome_file);
- @reactome_ref_molecules = <REACTOME_FILE>;
- chomp(@reactome_ref_molecules);
- close REACTOME_FILE;
+ #open(REACTOME_FILE,$data_path . $reactome_file);
+ #@reactome_ref_molecules = <REACTOME_FILE>;
+ #chomp(@reactome_ref_molecules);
+ #close REACTOME_FILE;
}
+
# spit out some data to make sure you've read in the files correctly
# --------------------------------------------------------------------
sub test_inputs
{
# output basic stats on chebi ontology
- while (my $ont = $parser->next_ontology()) {
- print "read ontology ",$ont->name()," with ",
- scalar($ont->get_root_terms)," root terms, and ",
- scalar($ont->get_all_terms)," total terms, and ",
- scalar($ont->get_leaf_terms)," leaf terms\n";
- }
-
- # test - show arrays
- # print "$_\n" foreach @chebi_obo_terms;
- # print "$_\n" foreach @reactome_ref_molecules;
+ print "\n[Ontology Stats]\n";
+ print "read ontology ",$ont->name()," with ",
+ scalar($ont->get_root_terms)," root terms, and ",
+ scalar($ont->get_all_terms)," total terms, and ",
+ scalar($ont->get_leaf_terms)," leaf terms\n";
+
+ # show reactome hashes
+ print "\n[Reactome Hashes]\n";
+ print "\n--CAS Hash--\n";
+ print "$_\n" foreach %reactome_CAS;
+ print "\n";
}
-# map the chebi terms to the reactome entries
+
+# map the chebi terms to the reactome entries (brute-force algo)
# --------------------------------------------------------------------
sub perform_map
+{
+ # iterate ontology; use brute-force matching to map
+ my @chebi_obo_terms = $ont->get_all_terms();
+
+ #print $_->identifier() . "\n" foreach @chebi_obo_terms;
+
+ # --pseudo--
+ # loop through each chebi term (~33K)
+ # if match chebi to reactome hashes on CAS, LIGAND, and/or RiceCyc name;
+ # write tab-del mapping string directly to results array for EACH type of match
+
+ foreach my $term (@chebi_obo_terms) {
+ print $term->identifier() . "\n";
+ }
+}
+
+
+# put the results in the mapped output file
+# --------------------------------------------------------------------
+sub create_mapfile
{
# setup output file
open(OUTPUT_FILE,">>" . $data_path . $mapped_output_file);
- # do brute-force matching here
-
- # actually print out matches here
- #print OUTPUT_FILE "$_\n" foreach @reactome_ref_molecules;
+ #format results for file output
+ print OUTPUT_FILE "$_\n" foreach @map_results;
- # cleanup
close OUTPUT_FILE;
}
+
# --------------------------------------------------------------------
# main
# --------------------------------------------------------------------
init;
test_inputs;
-# perform_map;
+perform_map;
+create_mapfile;
exit;