From 7dffa7f1a5a96ef7229f5d67e7821ba2b094a0d2 Mon Sep 17 00:00:00 2001 From: preecej Date: Mon, 11 Oct 2010 17:24:19 +0000 Subject: [PATCH] Removed unnecessary backups from laptop svn path=/; revision=64 --- .../reactome_chebi_mapping-HEAD.pl | 250 ------------------ .../reactome_chebi_mapping-HEAD.pl~ | 248 ----------------- 2 files changed, 498 deletions(-) delete mode 100644 preecej/perl_singletons/reactome_chebi_mapping/reactome_chebi_mapping-HEAD.pl delete mode 100644 preecej/perl_singletons/reactome_chebi_mapping/reactome_chebi_mapping-HEAD.pl~ diff --git a/preecej/perl_singletons/reactome_chebi_mapping/reactome_chebi_mapping-HEAD.pl b/preecej/perl_singletons/reactome_chebi_mapping/reactome_chebi_mapping-HEAD.pl deleted file mode 100644 index ec31169..0000000 --- a/preecej/perl_singletons/reactome_chebi_mapping/reactome_chebi_mapping-HEAD.pl +++ /dev/null @@ -1,250 +0,0 @@ -#!/usr/bin/perl -w -use strict; - -# SVN test from jedit on Mac - -# -------------------------------------------------------------------- -# Rice Reactome - CHEBI Ontology Mapping Script -# -# Justin Preece, 10/06/10 -# -# Purpose: Map CHEBI ontology terms onto Rice Reactome database. -# -# Inputs: -# CHEBI OBO file (preset) -# Rice Reactome file (preset, provided by YuanMing Wu) -# (Header) [ReactomeID] [Compound_Name] [CAS] [LIGAND] [RiceCyc] -# (Row) 923893 S-adenosyl-L-methionine 29908-03-0 C00019 S-ADENOSYLMETHIONINE ** the '-' (dash) symbol will be applied to any empty columns -# -# Outputs: tab-del mapping file (reactome_chebi_mapping.txt) -# (Header) [ReactomeID] [CHEBI] [XREF_Type] [XREF_ID] -# (Row) 923893 15414 CAS 29908-03-0 -# (Row) 923893 15414 LIGAND C00019 -# (Row) 923893 15414 RiceCyc S-ADENOSYLMETHIONINE ** this would be a rare mapping occurrence -# -------------------------------------------------------------------- - - -# -------------------------------------------------------------------- -# modules -# -------------------------------------------------------------------- - -use Bio::OntologyIO; - -# -------------------------------------------------------------------- -# declarations -# -------------------------------------------------------------------- - -# set paths to data files -my $data_path = "/home/preecej/Documents/Projects/Reactome/"; -my $chebi_obo_file = "chebi_sample.obo"; -my $reactome_file = "RiceReferenceMolecules_sample.txt"; -my $mapped_output_file = "reactome_chebi_mapping.txt"; - -my $ont; # chebi ontology - -my %reactome_CAS; # rice reactome CAS hash -my %reactome_LIGAND; # rice reactome LIGAND hash -my %reactome_RiceCyc; # rice reactome RiceCyc hash - -my @map_results = (); # successful mappings between chebi and reactome - - -# -------------------------------------------------------------------- -# functions -# -------------------------------------------------------------------- - - -# setup chebi parser and reactome data -# -------------------------------------------------------------------- -sub init -{ - # init ontology parser - my $parser = Bio::OntologyIO->new ( - -format => "obo", - -file => $data_path . $chebi_obo_file); - - # init ontology - $ont = $parser->next_ontology(); - $parser->close(); - - # read rice reactome file into 3 separate hashes - open(REACTOME_FILE,$data_path . $reactome_file); - - my $line = ; # skip the header - - while () - { - $line = $_; - chomp $line; - my @reactome_entry = split(/\t/, $line); # break up our tab-del line - - # load up this reactome entry's ID, CAS, LIGAND, and RiceCyc values - my $reactome_id = $reactome_entry[0]; - my $CAS_id = $reactome_entry[2]; - my $LIGAND_id = $reactome_entry[3]; - my $RiceCyc_term = $reactome_entry[4]; - - # There is a possibility that a single CAS, LIGAND, or RiceCyc - # identifier may appear in more than one reactome entry. This - # temp array allows each matched hash value to hold more than - # one ReactomeID, if necessary. - - # --CAS Hash Load-- - if ($CAS_id ne "-") # keep those "-" placeholders out - { - # build the CAS hash; each value may hold 1...n reactome - # ids (as an array) - push @{$reactome_CAS{$CAS_id}}, $reactome_id; - } - - # similarly... - - # --LIGAND Hash Load-- - if ($LIGAND_id ne "-") - { - push @{$reactome_LIGAND{$LIGAND_id}}, $reactome_id; - } - - # --RiceCyc Hash Load-- - if ($RiceCyc_term ne "-") - { - push @{$reactome_RiceCyc{"\U$RiceCyc_term"}}, $reactome_id; - } - } - close REACTOME_FILE; -} - - -# spit out some data to make sure you've read in the files correctly -# -------------------------------------------------------------------- -sub test_inputs -{ - # output basic stats on chebi ontology - print "\n[Ontology Stats]\n"; - print "read ontology ",$ont->name()," with ", - scalar($ont->get_root_terms)," root terms, and ", - scalar($ont->get_all_terms)," total terms, and ", - scalar($ont->get_leaf_terms)," leaf terms\n"; - - # all chebi terms in the ontology - print "\n[CHEBI Term List from \$ont]\n"; - foreach my $term ($ont->get_all_terms) { - my @synonyms = $term->get_synonyms; - my @xrefs = $term->get_dbxrefs; - - print $term->identifier; - print " \|NAME\| "; - if (defined($term->name)) { - print $term->name; - } - print " \|SYNONYMS\| "; - print "$_," foreach @synonyms; - print " \|XREFS\| "; - print "$_" foreach @xrefs; - foreach my $xref (@xrefs) { - print $xref->primary_id; - } - print "\n\n"; - } - - # show reactome hashes - this is important, give >1 dupes to Pankaj - # for manual reference - my $k; my @v; - print "\n[Reactome Hashes]\n"; - print "\n--CAS Hash--\n"; - for $k (keys %reactome_CAS) { - #if (@{$reactome_CAS{$k}} > 1) { - print "$k: @{$reactome_CAS{$k}}\n"; - #} - } - print "\n--LIGAND Hash--\n"; - for $k (keys %reactome_LIGAND) { - #if (@{$reactome_LIGAND{$k}} > 1) { - print "$k: @{$reactome_LIGAND{$k}}\n"; - #} - } - print "\n--RiceCyc Hash--\n"; - for $k (keys %reactome_RiceCyc) { - #if (@{$reactome_RiceCyc{$k}} > 1) { - print "$k: @{$reactome_RiceCyc{$k}}\n"; - #} - } -} - - -# map the chebi terms to the reactome entries -# -------------------------------------------------------------------- -sub perform_map -{ - my @chebi_obo_terms = $ont->get_all_terms; - #print $_->identifier . "\n" foreach @chebi_obo_terms; - - # loop through each chebi term - foreach my $term (@chebi_obo_terms) - { - # set locals for matching each term property - my $term_name; - if (defined($term->name)) { - $term_name = $term->name; - } else { - $term_name = ""; - } - my @term_synonyms = $term->get_synonyms; - - # attempt CHEBI match on CAS ID - - # attempt CHEBI match on LIGAND ID - - # attempt CHEBI match on RiceCyc names - if (defined($reactome_RiceCyc{"\U$term_name"})) { - push (@map_results, "$reactome_RiceCyc{$term_name}\t", - "$term->identifier\t", - "RiceCyc\t", - $term_name); - } else { # check the term synonyms, if needed - foreach my $synonym (@term_synonyms) { - print ""; - } - } - } -} - -# sample format - remove later -# [ReactomeID] [CHEBI] [XREF_Type] [XREF_ID] -# 923893 15414 CAS 29908-03-0 -# 923893 15414 LIGAND C00019 -# 923893 15414 RiceCyc S-ADENOSYLMETHIONINE - - -# put the results in the mapped output file -# -------------------------------------------------------------------- -sub create_mapfile -{ - if (@map_results > 0) - { - # add a header to the results array - unshift (@map_results, "ReactomeID\tCHEBI\tXREF_Type\tXREF_ID"); - - # setup output file - open(OUTPUT_FILE,">>" . $data_path . $mapped_output_file); - - #format results for file output - print OUTPUT_FILE "$_\n" foreach @map_results; - - close OUTPUT_FILE; - } else { - print "\n\nSorry, there are no mapped results.\n\n"; - } -} - - -# -------------------------------------------------------------------- -# main -# -------------------------------------------------------------------- - -init; -#test_inputs; -perform_map; -create_mapfile; - -exit; diff --git a/preecej/perl_singletons/reactome_chebi_mapping/reactome_chebi_mapping-HEAD.pl~ b/preecej/perl_singletons/reactome_chebi_mapping/reactome_chebi_mapping-HEAD.pl~ deleted file mode 100644 index 5d01cd7..0000000 --- a/preecej/perl_singletons/reactome_chebi_mapping/reactome_chebi_mapping-HEAD.pl~ +++ /dev/null @@ -1,248 +0,0 @@ -#!/usr/bin/perl -w -use strict; - -# -------------------------------------------------------------------- -# Rice Reactome - CHEBI Ontology Mapping Script -# -# Justin Preece, 10/06/10 -# -# Purpose: Map CHEBI ontology terms onto Rice Reactome database. -# -# Inputs: -# CHEBI OBO file (preset) -# Rice Reactome file (preset, provided by YuanMing Wu) -# (Header) [ReactomeID] [Compound_Name] [CAS] [LIGAND] [RiceCyc] -# (Row) 923893 S-adenosyl-L-methionine 29908-03-0 C00019 S-ADENOSYLMETHIONINE ** the '-' (dash) symbol will be applied to any empty columns -# -# Outputs: tab-del mapping file (reactome_chebi_mapping.txt) -# (Header) [ReactomeID] [CHEBI] [XREF_Type] [XREF_ID] -# (Row) 923893 15414 CAS 29908-03-0 -# (Row) 923893 15414 LIGAND C00019 -# (Row) 923893 15414 RiceCyc S-ADENOSYLMETHIONINE ** this would be a rare mapping occurrence -# -------------------------------------------------------------------- - - -# -------------------------------------------------------------------- -# modules -# -------------------------------------------------------------------- - -use Bio::OntologyIO; - -# -------------------------------------------------------------------- -# declarations -# -------------------------------------------------------------------- - -# set paths to data files -my $data_path = "/home/preecej/Documents/Projects/Reactome/"; -my $chebi_obo_file = "chebi_sample.obo"; -my $reactome_file = "RiceReferenceMolecules_sample.txt"; -my $mapped_output_file = "reactome_chebi_mapping.txt"; - -my $ont; # chebi ontology - -my %reactome_CAS; # rice reactome CAS hash -my %reactome_LIGAND; # rice reactome LIGAND hash -my %reactome_RiceCyc; # rice reactome RiceCyc hash - -my @map_results = (); # successful mappings between chebi and reactome - - -# -------------------------------------------------------------------- -# functions -# -------------------------------------------------------------------- - - -# setup chebi parser and reactome data -# -------------------------------------------------------------------- -sub init -{ - # init ontology parser - my $parser = Bio::OntologyIO->new ( - -format => "obo", - -file => $data_path . $chebi_obo_file); - - # init ontology - $ont = $parser->next_ontology(); - $parser->close(); - - # read rice reactome file into 3 separate hashes - open(REACTOME_FILE,$data_path . $reactome_file); - - my $line = ; # skip the header - - while () - { - $line = $_; - chomp $line; - my @reactome_entry = split(/\t/, $line); # break up our tab-del line - - # load up this reactome entry's ID, CAS, LIGAND, and RiceCyc values - my $reactome_id = $reactome_entry[0]; - my $CAS_id = $reactome_entry[2]; - my $LIGAND_id = $reactome_entry[3]; - my $RiceCyc_term = $reactome_entry[4]; - - # There is a possibility that a single CAS, LIGAND, or RiceCyc - # identifier may appear in more than one reactome entry. This - # temp array allows each matched hash value to hold more than - # one ReactomeID, if necessary. - - # --CAS Hash Load-- - if ($CAS_id ne "-") # keep those "-" placeholders out - { - # build the CAS hash; each value may hold 1...n reactome - # ids (as an array) - push @{$reactome_CAS{$CAS_id}}, $reactome_id; - } - - # similarly... - - # --LIGAND Hash Load-- - if ($LIGAND_id ne "-") - { - push @{$reactome_LIGAND{$LIGAND_id}}, $reactome_id; - } - - # --RiceCyc Hash Load-- - if ($RiceCyc_term ne "-") - { - push @{$reactome_RiceCyc{"\U$RiceCyc_term"}}, $reactome_id; - } - } - close REACTOME_FILE; -} - - -# spit out some data to make sure you've read in the files correctly -# -------------------------------------------------------------------- -sub test_inputs -{ - # output basic stats on chebi ontology - print "\n[Ontology Stats]\n"; - print "read ontology ",$ont->name()," with ", - scalar($ont->get_root_terms)," root terms, and ", - scalar($ont->get_all_terms)," total terms, and ", - scalar($ont->get_leaf_terms)," leaf terms\n"; - - # all chebi terms in the ontology - print "\n[CHEBI Term List from \$ont]\n"; - foreach my $term ($ont->get_all_terms) { - my @synonyms = $term->get_synonyms; - my @xrefs = $term->get_dbxrefs; - - print $term->identifier; - print " \|NAME\| "; - if (defined($term->name)) { - print $term->name; - } - print " \|SYNONYMS\| "; - print "$_," foreach @synonyms; - print " \|XREFS\| "; - print "$_" foreach @xrefs; - foreach my $xref (@xrefs) { - print $xref->primary_id; - } - print "\n\n"; - } - - # show reactome hashes - this is important, give >1 dupes to Pankaj - # for manual reference - my $k; my @v; - print "\n[Reactome Hashes]\n"; - print "\n--CAS Hash--\n"; - for $k (keys %reactome_CAS) { - #if (@{$reactome_CAS{$k}} > 1) { - print "$k: @{$reactome_CAS{$k}}\n"; - #} - } - print "\n--LIGAND Hash--\n"; - for $k (keys %reactome_LIGAND) { - #if (@{$reactome_LIGAND{$k}} > 1) { - print "$k: @{$reactome_LIGAND{$k}}\n"; - #} - } - print "\n--RiceCyc Hash--\n"; - for $k (keys %reactome_RiceCyc) { - #if (@{$reactome_RiceCyc{$k}} > 1) { - print "$k: @{$reactome_RiceCyc{$k}}\n"; - #} - } -} - - -# map the chebi terms to the reactome entries -# -------------------------------------------------------------------- -sub perform_map -{ - my @chebi_obo_terms = $ont->get_all_terms; - #print $_->identifier . "\n" foreach @chebi_obo_terms; - - # loop through each chebi term - foreach my $term (@chebi_obo_terms) - { - # set locals for matching each term property - my $term_name; - if (defined($term->name)) { - $term_name = $term->name; - } else { - $term_name = ""; - } - my @term_synonyms = $term->get_synonyms; - - # attempt CHEBI match on CAS ID - - # attempt CHEBI match on LIGAND ID - - # attempt CHEBI match on RiceCyc names - if (defined($reactome_RiceCyc{"\U$term_name"})) { - push (@map_results, "$reactome_RiceCyc{$term_name}\t", - "$term->identifier\t", - "RiceCyc\t", - $term_name); - } else { # check the term synonyms, if needed - foreach my $synonym (@term_synonyms) { - print ""; - } - } - } -} - -# sample format - remove later -# [ReactomeID] [CHEBI] [XREF_Type] [XREF_ID] -# 923893 15414 CAS 29908-03-0 -# 923893 15414 LIGAND C00019 -# 923893 15414 RiceCyc S-ADENOSYLMETHIONINE - - -# put the results in the mapped output file -# -------------------------------------------------------------------- -sub create_mapfile -{ - if (@map_results > 0) - { - # add a header to the results array - unshift (@map_results, "ReactomeID\tCHEBI\tXREF_Type\tXREF_ID"); - - # setup output file - open(OUTPUT_FILE,">>" . $data_path . $mapped_output_file); - - #format results for file output - print OUTPUT_FILE "$_\n" foreach @map_results; - - close OUTPUT_FILE; - } else { - print "\n\nSorry, there are no mapped results.\n\n"; - } -} - - -# -------------------------------------------------------------------- -# main -# -------------------------------------------------------------------- - -init; -#test_inputs; -perform_map; -create_mapfile; - -exit; -- 2.34.1