Hello!

To see the file structure, click on "tree".

Note that updates take place every 10 minutes, commits may not be seen immediately.
imported Bio::OntologyIO and ran parser test on CHEBI obo file
authorpreecej <preecej@localhost>
Thu, 7 Oct 2010 20:50:44 +0000 (20:50 +0000)
committerpreecej <preecej@localhost>
Thu, 7 Oct 2010 20:50:44 +0000 (20:50 +0000)
svn path=/; revision=58

preecej/perl_singletons/reactome_chebi_mapping/reactome_chebi_mapping.pl

index ec89553b8ac647d95453e15fe8df0a49bf09dce2..85e201b1059f5a61d52bdbe1adbff531bf33435c 100755 (executable)
 #!/usr/bin/perl -w
 use strict;
 
+# --------------------------------------------------------------------
+# Rice Reactome - CHEBI Ontology Mapping Script
+#
+# Justin Preece, 10/06/10
+#
+# Purpose: Map CHEBI ontology terms onto Rice Reactome database.
+#
+# Inputs:
+#   CHEBI OBO file (preset)
+#   Rice Reactome file (preset, provided by YuanMing Wu)
+#   (Header)    [ReactomeID]    [Compound_Name]            [CAS]           [LIGAND]    [RiceCyc]
+#   (Row)       923893          S-adenosyl-L-methionine    29908-03-0      C00019      S-ADENOSYLMETHIONINE        ** please use a '-' (dash) symbol for any empty columns
+#
+# Outputs: tab-del mapping file (reactome_chebi_mapping.txt)
+#   (Header)    [ReactomeID]    [CHEBI]    [XREF_Type]    [XREF_ID]       
+#   (Row)       923893          15414      CAS            29908-03-0
+#   (Row)       923893          15414      LIGAND         C00019
+#   (Row)       923893          15414      RiceCyc        S-ADENOSYLMETHIONINE      ** this would be a rare mapping occurrence; only if CAS and LIGAND mappings are not available
+# --------------------------------------------------------------------
+
+# --------------------------------------------------------------------
+# modules
+# --------------------------------------------------------------------
+
+use Bio::OntologyIO;
+
+# --------------------------------------------------------------------
+# declarations
+# --------------------------------------------------------------------
+
 # set paths to data files
 my $data_path = "/home/preecej/Documents/Projects/Reactome/";
 my $chebi_obo_file = "chebi_sample.obo";
 my $reactome_file = "RiceReferenceMolecules_sample.txt";
 my $mapped_output_file = "reactome_chebi_mapping.txt";
 
-# read files into arrays
-open(CHEBI_OBO_FILE,$data_path . $chebi_obo_file);
-my @chebi_obo_terms = <CHEBI_OBO_FILE>;
-chomp(@chebi_obo_terms);
-close CHEBI_OBO_FILE;
+my $parser; # chebi ontology parser
+my @reactome_ref_molecules; # rice reactomes
 
-open(REACTOME_FILE,$data_path . $reactome_file);
-my @reactome_ref_molecules = <REACTOME_FILE>;
-chomp(@reactome_ref_molecules);
-close REACTOME_FILE;
+# --------------------------------------------------------------------
+# functions
+# --------------------------------------------------------------------
 
-# show arrays
-# print "$_\n" foreach @chebi_obo_terms;
-# print "$_\n" foreach @reactome_ref_molecules;
+# setup chebi parser and reactome data
+# --------------------------------------------------------------------
+sub init
+{
+    # initialize ontology parser
+    $parser = Bio::OntologyIO->new (
+        -format => "obo",
+        -file => $data_path . $chebi_obo_file);
+    
+    # read chebi file into array
+    #open(CHEBI_OBO_FILE,$data_path . $chebi_obo_file);
+    #my @chebi_obo_terms = <CHEBI_OBO_FILE>;
+    #chomp(@chebi_obo_terms);
+    #close CHEBI_OBO_FILE;
+    
+    # read rice reactome file into array
+    open(REACTOME_FILE,$data_path . $reactome_file);
+    @reactome_ref_molecules = <REACTOME_FILE>;
+    chomp(@reactome_ref_molecules);
+    close REACTOME_FILE;
+}
 
-# setup output file
-open(OUTPUT_FILE,">>" . $data_path . $mapped_output_file);
+# spit out some data to make sure you've read in the files correctly
+# --------------------------------------------------------------------
+sub test_inputs
+{
+    # output basic stats on chebi ontology
+    while (my $ont = $parser->next_ontology()) {
+        print "read ontology ",$ont->name()," with ",
+            scalar($ont->get_root_terms)," root terms, and ",
+            scalar($ont->get_all_terms)," total terms, and ",
+            scalar($ont->get_leaf_terms)," leaf terms\n";
+    }
 
-#
-# do brute-force matching here
-#
+    # test - show arrays
+    # print "$_\n" foreach @chebi_obo_terms;
+    # print "$_\n" foreach @reactome_ref_molecules;
+}
+
+# map the chebi terms to the reactome entries
+# --------------------------------------------------------------------
+sub perform_map
+{
+    # setup output file
+    open(OUTPUT_FILE,">>" . $data_path . $mapped_output_file);
+
+    # do brute-force matching here
+
+    # actually print out matches here
+    #print OUTPUT_FILE "$_\n" foreach @reactome_ref_molecules;
+    
+    # cleanup
+    close OUTPUT_FILE;
+}
 
-print OUTPUT_FILE "$_\n" foreach @reactome_ref_molecules;
+# --------------------------------------------------------------------
+# main
+# --------------------------------------------------------------------
 
-# cleanup
-close OUTPUT_FILE;
+init;
+test_inputs;
+# perform_map;
 
 exit;