From: preecej Date: Thu, 7 Oct 2010 20:50:44 +0000 (+0000) Subject: imported Bio::OntologyIO and ran parser test on CHEBI obo file X-Git-Url: http://gitweb.planteome.org/?a=commitdiff_plain;h=35b0919d86cb2f856712fb394af3a0984d43208f;p=old-jaiswallab-svn%2F.git imported Bio::OntologyIO and ran parser test on CHEBI obo file svn path=/; revision=58 --- diff --git a/preecej/perl_singletons/reactome_chebi_mapping/reactome_chebi_mapping.pl b/preecej/perl_singletons/reactome_chebi_mapping/reactome_chebi_mapping.pl index ec89553..85e201b 100755 --- a/preecej/perl_singletons/reactome_chebi_mapping/reactome_chebi_mapping.pl +++ b/preecej/perl_singletons/reactome_chebi_mapping/reactome_chebi_mapping.pl @@ -1,37 +1,110 @@ #!/usr/bin/perl -w use strict; +# -------------------------------------------------------------------- +# Rice Reactome - CHEBI Ontology Mapping Script +# +# Justin Preece, 10/06/10 +# +# Purpose: Map CHEBI ontology terms onto Rice Reactome database. +# +# Inputs: +# CHEBI OBO file (preset) +# Rice Reactome file (preset, provided by YuanMing Wu) +# (Header) [ReactomeID] [Compound_Name] [CAS] [LIGAND] [RiceCyc] +# (Row) 923893 S-adenosyl-L-methionine 29908-03-0 C00019 S-ADENOSYLMETHIONINE ** please use a '-' (dash) symbol for any empty columns +# +# Outputs: tab-del mapping file (reactome_chebi_mapping.txt) +# (Header) [ReactomeID] [CHEBI] [XREF_Type] [XREF_ID] +# (Row) 923893 15414 CAS 29908-03-0 +# (Row) 923893 15414 LIGAND C00019 +# (Row) 923893 15414 RiceCyc S-ADENOSYLMETHIONINE ** this would be a rare mapping occurrence; only if CAS and LIGAND mappings are not available +# -------------------------------------------------------------------- + +# -------------------------------------------------------------------- +# modules +# -------------------------------------------------------------------- + +use Bio::OntologyIO; + +# -------------------------------------------------------------------- +# declarations +# -------------------------------------------------------------------- + # set paths to data files my $data_path = "/home/preecej/Documents/Projects/Reactome/"; my $chebi_obo_file = "chebi_sample.obo"; my $reactome_file = "RiceReferenceMolecules_sample.txt"; my $mapped_output_file = "reactome_chebi_mapping.txt"; -# read files into arrays -open(CHEBI_OBO_FILE,$data_path . $chebi_obo_file); -my @chebi_obo_terms = ; -chomp(@chebi_obo_terms); -close CHEBI_OBO_FILE; +my $parser; # chebi ontology parser +my @reactome_ref_molecules; # rice reactomes -open(REACTOME_FILE,$data_path . $reactome_file); -my @reactome_ref_molecules = ; -chomp(@reactome_ref_molecules); -close REACTOME_FILE; +# -------------------------------------------------------------------- +# functions +# -------------------------------------------------------------------- -# show arrays -# print "$_\n" foreach @chebi_obo_terms; -# print "$_\n" foreach @reactome_ref_molecules; +# setup chebi parser and reactome data +# -------------------------------------------------------------------- +sub init +{ + # initialize ontology parser + $parser = Bio::OntologyIO->new ( + -format => "obo", + -file => $data_path . $chebi_obo_file); + + # read chebi file into array + #open(CHEBI_OBO_FILE,$data_path . $chebi_obo_file); + #my @chebi_obo_terms = ; + #chomp(@chebi_obo_terms); + #close CHEBI_OBO_FILE; + + # read rice reactome file into array + open(REACTOME_FILE,$data_path . $reactome_file); + @reactome_ref_molecules = ; + chomp(@reactome_ref_molecules); + close REACTOME_FILE; +} -# setup output file -open(OUTPUT_FILE,">>" . $data_path . $mapped_output_file); +# spit out some data to make sure you've read in the files correctly +# -------------------------------------------------------------------- +sub test_inputs +{ + # output basic stats on chebi ontology + while (my $ont = $parser->next_ontology()) { + print "read ontology ",$ont->name()," with ", + scalar($ont->get_root_terms)," root terms, and ", + scalar($ont->get_all_terms)," total terms, and ", + scalar($ont->get_leaf_terms)," leaf terms\n"; + } -# -# do brute-force matching here -# + # test - show arrays + # print "$_\n" foreach @chebi_obo_terms; + # print "$_\n" foreach @reactome_ref_molecules; +} + +# map the chebi terms to the reactome entries +# -------------------------------------------------------------------- +sub perform_map +{ + # setup output file + open(OUTPUT_FILE,">>" . $data_path . $mapped_output_file); + + # do brute-force matching here + + # actually print out matches here + #print OUTPUT_FILE "$_\n" foreach @reactome_ref_molecules; + + # cleanup + close OUTPUT_FILE; +} -print OUTPUT_FILE "$_\n" foreach @reactome_ref_molecules; +# -------------------------------------------------------------------- +# main +# -------------------------------------------------------------------- -# cleanup -close OUTPUT_FILE; +init; +test_inputs; +# perform_map; exit;