#!/usr/bin/perl -w
use strict;
+# --------------------------------------------------------------------
+# Rice Reactome - CHEBI Ontology Mapping Script
+#
+# Justin Preece, 10/06/10
+#
+# Purpose: Map CHEBI ontology terms onto Rice Reactome database.
+#
+# Inputs:
+# CHEBI OBO file (preset)
+# Rice Reactome file (preset, provided by YuanMing Wu)
+# (Header) [ReactomeID] [Compound_Name] [CAS] [LIGAND] [RiceCyc]
+# (Row) 923893 S-adenosyl-L-methionine 29908-03-0 C00019 S-ADENOSYLMETHIONINE ** please use a '-' (dash) symbol for any empty columns
+#
+# Outputs: tab-del mapping file (reactome_chebi_mapping.txt)
+# (Header) [ReactomeID] [CHEBI] [XREF_Type] [XREF_ID]
+# (Row) 923893 15414 CAS 29908-03-0
+# (Row) 923893 15414 LIGAND C00019
+# (Row) 923893 15414 RiceCyc S-ADENOSYLMETHIONINE ** this would be a rare mapping occurrence; only if CAS and LIGAND mappings are not available
+# --------------------------------------------------------------------
+
+# --------------------------------------------------------------------
+# modules
+# --------------------------------------------------------------------
+
+use Bio::OntologyIO;
+
+# --------------------------------------------------------------------
+# declarations
+# --------------------------------------------------------------------
+
# set paths to data files
my $data_path = "/home/preecej/Documents/Projects/Reactome/";
my $chebi_obo_file = "chebi_sample.obo";
my $reactome_file = "RiceReferenceMolecules_sample.txt";
my $mapped_output_file = "reactome_chebi_mapping.txt";
-# read files into arrays
-open(CHEBI_OBO_FILE,$data_path . $chebi_obo_file);
-my @chebi_obo_terms = <CHEBI_OBO_FILE>;
-chomp(@chebi_obo_terms);
-close CHEBI_OBO_FILE;
+my $parser; # chebi ontology parser
+my @reactome_ref_molecules; # rice reactomes
-open(REACTOME_FILE,$data_path . $reactome_file);
-my @reactome_ref_molecules = <REACTOME_FILE>;
-chomp(@reactome_ref_molecules);
-close REACTOME_FILE;
+# --------------------------------------------------------------------
+# functions
+# --------------------------------------------------------------------
-# show arrays
-# print "$_\n" foreach @chebi_obo_terms;
-# print "$_\n" foreach @reactome_ref_molecules;
+# setup chebi parser and reactome data
+# --------------------------------------------------------------------
+sub init
+{
+ # initialize ontology parser
+ $parser = Bio::OntologyIO->new (
+ -format => "obo",
+ -file => $data_path . $chebi_obo_file);
+
+ # read chebi file into array
+ #open(CHEBI_OBO_FILE,$data_path . $chebi_obo_file);
+ #my @chebi_obo_terms = <CHEBI_OBO_FILE>;
+ #chomp(@chebi_obo_terms);
+ #close CHEBI_OBO_FILE;
+
+ # read rice reactome file into array
+ open(REACTOME_FILE,$data_path . $reactome_file);
+ @reactome_ref_molecules = <REACTOME_FILE>;
+ chomp(@reactome_ref_molecules);
+ close REACTOME_FILE;
+}
-# setup output file
-open(OUTPUT_FILE,">>" . $data_path . $mapped_output_file);
+# spit out some data to make sure you've read in the files correctly
+# --------------------------------------------------------------------
+sub test_inputs
+{
+ # output basic stats on chebi ontology
+ while (my $ont = $parser->next_ontology()) {
+ print "read ontology ",$ont->name()," with ",
+ scalar($ont->get_root_terms)," root terms, and ",
+ scalar($ont->get_all_terms)," total terms, and ",
+ scalar($ont->get_leaf_terms)," leaf terms\n";
+ }
-#
-# do brute-force matching here
-#
+ # test - show arrays
+ # print "$_\n" foreach @chebi_obo_terms;
+ # print "$_\n" foreach @reactome_ref_molecules;
+}
+
+# map the chebi terms to the reactome entries
+# --------------------------------------------------------------------
+sub perform_map
+{
+ # setup output file
+ open(OUTPUT_FILE,">>" . $data_path . $mapped_output_file);
+
+ # do brute-force matching here
+
+ # actually print out matches here
+ #print OUTPUT_FILE "$_\n" foreach @reactome_ref_molecules;
+
+ # cleanup
+ close OUTPUT_FILE;
+}
-print OUTPUT_FILE "$_\n" foreach @reactome_ref_molecules;
+# --------------------------------------------------------------------
+# main
+# --------------------------------------------------------------------
-# cleanup
-close OUTPUT_FILE;
+init;
+test_inputs;
+# perform_map;
exit;