+++ /dev/null
-#!/usr/bin/perl -w
-use strict;
-
-# SVN test from jedit on Mac
-
-# --------------------------------------------------------------------
-# Rice Reactome - CHEBI Ontology Mapping Script
-#
-# Justin Preece, 10/06/10
-#
-# Purpose: Map CHEBI ontology terms onto Rice Reactome database.
-#
-# Inputs:
-# CHEBI OBO file (preset)
-# Rice Reactome file (preset, provided by YuanMing Wu)
-# (Header) [ReactomeID] [Compound_Name] [CAS] [LIGAND] [RiceCyc]
-# (Row) 923893 S-adenosyl-L-methionine 29908-03-0 C00019 S-ADENOSYLMETHIONINE ** the '-' (dash) symbol will be applied to any empty columns
-#
-# Outputs: tab-del mapping file (reactome_chebi_mapping.txt)
-# (Header) [ReactomeID] [CHEBI] [XREF_Type] [XREF_ID]
-# (Row) 923893 15414 CAS 29908-03-0
-# (Row) 923893 15414 LIGAND C00019
-# (Row) 923893 15414 RiceCyc S-ADENOSYLMETHIONINE ** this would be a rare mapping occurrence
-# --------------------------------------------------------------------
-
-
-# --------------------------------------------------------------------
-# modules
-# --------------------------------------------------------------------
-
-use Bio::OntologyIO;
-
-# --------------------------------------------------------------------
-# declarations
-# --------------------------------------------------------------------
-
-# set paths to data files
-my $data_path = "/home/preecej/Documents/Projects/Reactome/";
-my $chebi_obo_file = "chebi_sample.obo";
-my $reactome_file = "RiceReferenceMolecules_sample.txt";
-my $mapped_output_file = "reactome_chebi_mapping.txt";
-
-my $ont; # chebi ontology
-
-my %reactome_CAS; # rice reactome CAS hash
-my %reactome_LIGAND; # rice reactome LIGAND hash
-my %reactome_RiceCyc; # rice reactome RiceCyc hash
-
-my @map_results = (); # successful mappings between chebi and reactome
-
-
-# --------------------------------------------------------------------
-# functions
-# --------------------------------------------------------------------
-
-
-# setup chebi parser and reactome data
-# --------------------------------------------------------------------
-sub init
-{
- # init ontology parser
- my $parser = Bio::OntologyIO->new (
- -format => "obo",
- -file => $data_path . $chebi_obo_file);
-
- # init ontology
- $ont = $parser->next_ontology();
- $parser->close();
-
- # read rice reactome file into 3 separate hashes
- open(REACTOME_FILE,$data_path . $reactome_file);
-
- my $line = <REACTOME_FILE>; # skip the header
-
- while (<REACTOME_FILE>)
- {
- $line = $_;
- chomp $line;
- my @reactome_entry = split(/\t/, $line); # break up our tab-del line
-
- # load up this reactome entry's ID, CAS, LIGAND, and RiceCyc values
- my $reactome_id = $reactome_entry[0];
- my $CAS_id = $reactome_entry[2];
- my $LIGAND_id = $reactome_entry[3];
- my $RiceCyc_term = $reactome_entry[4];
-
- # There is a possibility that a single CAS, LIGAND, or RiceCyc
- # identifier may appear in more than one reactome entry. This
- # temp array allows each matched hash value to hold more than
- # one ReactomeID, if necessary.
-
- # --CAS Hash Load--
- if ($CAS_id ne "-") # keep those "-" placeholders out
- {
- # build the CAS hash; each value may hold 1...n reactome
- # ids (as an array)
- push @{$reactome_CAS{$CAS_id}}, $reactome_id;
- }
-
- # similarly...
-
- # --LIGAND Hash Load--
- if ($LIGAND_id ne "-")
- {
- push @{$reactome_LIGAND{$LIGAND_id}}, $reactome_id;
- }
-
- # --RiceCyc Hash Load--
- if ($RiceCyc_term ne "-")
- {
- push @{$reactome_RiceCyc{"\U$RiceCyc_term"}}, $reactome_id;
- }
- }
- close REACTOME_FILE;
-}
-
-
-# spit out some data to make sure you've read in the files correctly
-# --------------------------------------------------------------------
-sub test_inputs
-{
- # output basic stats on chebi ontology
- print "\n[Ontology Stats]\n";
- print "read ontology ",$ont->name()," with ",
- scalar($ont->get_root_terms)," root terms, and ",
- scalar($ont->get_all_terms)," total terms, and ",
- scalar($ont->get_leaf_terms)," leaf terms\n";
-
- # all chebi terms in the ontology
- print "\n[CHEBI Term List from \$ont]\n";
- foreach my $term ($ont->get_all_terms) {
- my @synonyms = $term->get_synonyms;
- my @xrefs = $term->get_dbxrefs;
-
- print $term->identifier;
- print " \|NAME\| ";
- if (defined($term->name)) {
- print $term->name;
- }
- print " \|SYNONYMS\| ";
- print "$_," foreach @synonyms;
- print " \|XREFS\| ";
- print "$_" foreach @xrefs;
- foreach my $xref (@xrefs) {
- print $xref->primary_id;
- }
- print "\n\n";
- }
-
- # show reactome hashes - this is important, give >1 dupes to Pankaj
- # for manual reference
- my $k; my @v;
- print "\n[Reactome Hashes]\n";
- print "\n--CAS Hash--\n";
- for $k (keys %reactome_CAS) {
- #if (@{$reactome_CAS{$k}} > 1) {
- print "$k: @{$reactome_CAS{$k}}\n";
- #}
- }
- print "\n--LIGAND Hash--\n";
- for $k (keys %reactome_LIGAND) {
- #if (@{$reactome_LIGAND{$k}} > 1) {
- print "$k: @{$reactome_LIGAND{$k}}\n";
- #}
- }
- print "\n--RiceCyc Hash--\n";
- for $k (keys %reactome_RiceCyc) {
- #if (@{$reactome_RiceCyc{$k}} > 1) {
- print "$k: @{$reactome_RiceCyc{$k}}\n";
- #}
- }
-}
-
-
-# map the chebi terms to the reactome entries
-# --------------------------------------------------------------------
-sub perform_map
-{
- my @chebi_obo_terms = $ont->get_all_terms;
- #print $_->identifier . "\n" foreach @chebi_obo_terms;
-
- # loop through each chebi term
- foreach my $term (@chebi_obo_terms)
- {
- # set locals for matching each term property
- my $term_name;
- if (defined($term->name)) {
- $term_name = $term->name;
- } else {
- $term_name = "";
- }
- my @term_synonyms = $term->get_synonyms;
-
- # attempt CHEBI match on CAS ID
-
- # attempt CHEBI match on LIGAND ID
-
- # attempt CHEBI match on RiceCyc names
- if (defined($reactome_RiceCyc{"\U$term_name"})) {
- push (@map_results, "$reactome_RiceCyc{$term_name}\t",
- "$term->identifier\t",
- "RiceCyc\t",
- $term_name);
- } else { # check the term synonyms, if needed
- foreach my $synonym (@term_synonyms) {
- print "";
- }
- }
- }
-}
-
-# sample format - remove later
-# [ReactomeID] [CHEBI] [XREF_Type] [XREF_ID]
-# 923893 15414 CAS 29908-03-0
-# 923893 15414 LIGAND C00019
-# 923893 15414 RiceCyc S-ADENOSYLMETHIONINE
-
-
-# put the results in the mapped output file
-# --------------------------------------------------------------------
-sub create_mapfile
-{
- if (@map_results > 0)
- {
- # add a header to the results array
- unshift (@map_results, "ReactomeID\tCHEBI\tXREF_Type\tXREF_ID");
-
- # setup output file
- open(OUTPUT_FILE,">>" . $data_path . $mapped_output_file);
-
- #format results for file output
- print OUTPUT_FILE "$_\n" foreach @map_results;
-
- close OUTPUT_FILE;
- } else {
- print "\n\nSorry, there are no mapped results.\n\n";
- }
-}
-
-
-# --------------------------------------------------------------------
-# main
-# --------------------------------------------------------------------
-
-init;
-#test_inputs;
-perform_map;
-create_mapfile;
-
-exit;
+++ /dev/null
-#!/usr/bin/perl -w
-use strict;
-
-# --------------------------------------------------------------------
-# Rice Reactome - CHEBI Ontology Mapping Script
-#
-# Justin Preece, 10/06/10
-#
-# Purpose: Map CHEBI ontology terms onto Rice Reactome database.
-#
-# Inputs:
-# CHEBI OBO file (preset)
-# Rice Reactome file (preset, provided by YuanMing Wu)
-# (Header) [ReactomeID] [Compound_Name] [CAS] [LIGAND] [RiceCyc]
-# (Row) 923893 S-adenosyl-L-methionine 29908-03-0 C00019 S-ADENOSYLMETHIONINE ** the '-' (dash) symbol will be applied to any empty columns
-#
-# Outputs: tab-del mapping file (reactome_chebi_mapping.txt)
-# (Header) [ReactomeID] [CHEBI] [XREF_Type] [XREF_ID]
-# (Row) 923893 15414 CAS 29908-03-0
-# (Row) 923893 15414 LIGAND C00019
-# (Row) 923893 15414 RiceCyc S-ADENOSYLMETHIONINE ** this would be a rare mapping occurrence
-# --------------------------------------------------------------------
-
-
-# --------------------------------------------------------------------
-# modules
-# --------------------------------------------------------------------
-
-use Bio::OntologyIO;
-
-# --------------------------------------------------------------------
-# declarations
-# --------------------------------------------------------------------
-
-# set paths to data files
-my $data_path = "/home/preecej/Documents/Projects/Reactome/";
-my $chebi_obo_file = "chebi_sample.obo";
-my $reactome_file = "RiceReferenceMolecules_sample.txt";
-my $mapped_output_file = "reactome_chebi_mapping.txt";
-
-my $ont; # chebi ontology
-
-my %reactome_CAS; # rice reactome CAS hash
-my %reactome_LIGAND; # rice reactome LIGAND hash
-my %reactome_RiceCyc; # rice reactome RiceCyc hash
-
-my @map_results = (); # successful mappings between chebi and reactome
-
-
-# --------------------------------------------------------------------
-# functions
-# --------------------------------------------------------------------
-
-
-# setup chebi parser and reactome data
-# --------------------------------------------------------------------
-sub init
-{
- # init ontology parser
- my $parser = Bio::OntologyIO->new (
- -format => "obo",
- -file => $data_path . $chebi_obo_file);
-
- # init ontology
- $ont = $parser->next_ontology();
- $parser->close();
-
- # read rice reactome file into 3 separate hashes
- open(REACTOME_FILE,$data_path . $reactome_file);
-
- my $line = <REACTOME_FILE>; # skip the header
-
- while (<REACTOME_FILE>)
- {
- $line = $_;
- chomp $line;
- my @reactome_entry = split(/\t/, $line); # break up our tab-del line
-
- # load up this reactome entry's ID, CAS, LIGAND, and RiceCyc values
- my $reactome_id = $reactome_entry[0];
- my $CAS_id = $reactome_entry[2];
- my $LIGAND_id = $reactome_entry[3];
- my $RiceCyc_term = $reactome_entry[4];
-
- # There is a possibility that a single CAS, LIGAND, or RiceCyc
- # identifier may appear in more than one reactome entry. This
- # temp array allows each matched hash value to hold more than
- # one ReactomeID, if necessary.
-
- # --CAS Hash Load--
- if ($CAS_id ne "-") # keep those "-" placeholders out
- {
- # build the CAS hash; each value may hold 1...n reactome
- # ids (as an array)
- push @{$reactome_CAS{$CAS_id}}, $reactome_id;
- }
-
- # similarly...
-
- # --LIGAND Hash Load--
- if ($LIGAND_id ne "-")
- {
- push @{$reactome_LIGAND{$LIGAND_id}}, $reactome_id;
- }
-
- # --RiceCyc Hash Load--
- if ($RiceCyc_term ne "-")
- {
- push @{$reactome_RiceCyc{"\U$RiceCyc_term"}}, $reactome_id;
- }
- }
- close REACTOME_FILE;
-}
-
-
-# spit out some data to make sure you've read in the files correctly
-# --------------------------------------------------------------------
-sub test_inputs
-{
- # output basic stats on chebi ontology
- print "\n[Ontology Stats]\n";
- print "read ontology ",$ont->name()," with ",
- scalar($ont->get_root_terms)," root terms, and ",
- scalar($ont->get_all_terms)," total terms, and ",
- scalar($ont->get_leaf_terms)," leaf terms\n";
-
- # all chebi terms in the ontology
- print "\n[CHEBI Term List from \$ont]\n";
- foreach my $term ($ont->get_all_terms) {
- my @synonyms = $term->get_synonyms;
- my @xrefs = $term->get_dbxrefs;
-
- print $term->identifier;
- print " \|NAME\| ";
- if (defined($term->name)) {
- print $term->name;
- }
- print " \|SYNONYMS\| ";
- print "$_," foreach @synonyms;
- print " \|XREFS\| ";
- print "$_" foreach @xrefs;
- foreach my $xref (@xrefs) {
- print $xref->primary_id;
- }
- print "\n\n";
- }
-
- # show reactome hashes - this is important, give >1 dupes to Pankaj
- # for manual reference
- my $k; my @v;
- print "\n[Reactome Hashes]\n";
- print "\n--CAS Hash--\n";
- for $k (keys %reactome_CAS) {
- #if (@{$reactome_CAS{$k}} > 1) {
- print "$k: @{$reactome_CAS{$k}}\n";
- #}
- }
- print "\n--LIGAND Hash--\n";
- for $k (keys %reactome_LIGAND) {
- #if (@{$reactome_LIGAND{$k}} > 1) {
- print "$k: @{$reactome_LIGAND{$k}}\n";
- #}
- }
- print "\n--RiceCyc Hash--\n";
- for $k (keys %reactome_RiceCyc) {
- #if (@{$reactome_RiceCyc{$k}} > 1) {
- print "$k: @{$reactome_RiceCyc{$k}}\n";
- #}
- }
-}
-
-
-# map the chebi terms to the reactome entries
-# --------------------------------------------------------------------
-sub perform_map
-{
- my @chebi_obo_terms = $ont->get_all_terms;
- #print $_->identifier . "\n" foreach @chebi_obo_terms;
-
- # loop through each chebi term
- foreach my $term (@chebi_obo_terms)
- {
- # set locals for matching each term property
- my $term_name;
- if (defined($term->name)) {
- $term_name = $term->name;
- } else {
- $term_name = "";
- }
- my @term_synonyms = $term->get_synonyms;
-
- # attempt CHEBI match on CAS ID
-
- # attempt CHEBI match on LIGAND ID
-
- # attempt CHEBI match on RiceCyc names
- if (defined($reactome_RiceCyc{"\U$term_name"})) {
- push (@map_results, "$reactome_RiceCyc{$term_name}\t",
- "$term->identifier\t",
- "RiceCyc\t",
- $term_name);
- } else { # check the term synonyms, if needed
- foreach my $synonym (@term_synonyms) {
- print "";
- }
- }
- }
-}
-
-# sample format - remove later
-# [ReactomeID] [CHEBI] [XREF_Type] [XREF_ID]
-# 923893 15414 CAS 29908-03-0
-# 923893 15414 LIGAND C00019
-# 923893 15414 RiceCyc S-ADENOSYLMETHIONINE
-
-
-# put the results in the mapped output file
-# --------------------------------------------------------------------
-sub create_mapfile
-{
- if (@map_results > 0)
- {
- # add a header to the results array
- unshift (@map_results, "ReactomeID\tCHEBI\tXREF_Type\tXREF_ID");
-
- # setup output file
- open(OUTPUT_FILE,">>" . $data_path . $mapped_output_file);
-
- #format results for file output
- print OUTPUT_FILE "$_\n" foreach @map_results;
-
- close OUTPUT_FILE;
- } else {
- print "\n\nSorry, there are no mapped results.\n\n";
- }
-}
-
-
-# --------------------------------------------------------------------
-# main
-# --------------------------------------------------------------------
-
-init;
-#test_inputs;
-perform_map;
-create_mapfile;
-
-exit;