From 173c2c03771972b3f605dde5c20105963d9bff5d Mon Sep 17 00:00:00 2001 From: elserj Date: Wed, 6 Jul 2011 23:56:01 +0000 Subject: [PATCH] Added new species svn path=/; revision=114 --- interactome_scripts/find_species.pl | 33 +++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/interactome_scripts/find_species.pl b/interactome_scripts/find_species.pl index 2fdf90e..8b9bb6b 100755 --- a/interactome_scripts/find_species.pl +++ b/interactome_scripts/find_species.pl @@ -4,6 +4,9 @@ # # # Written by Justin Elser # # Updated 9/2/10 for new species # +# # +# Updated 6/16/11 for new species and added sub # +# to return retrieval info # ########################################################### sub find_species { @@ -52,6 +55,8 @@ sub find_species { $temp = "Glycine_max"; }elsif ($temp =~ /Homo\_sapiens/) { $temp = "Homo_sapiens"; + }elsif ($temp =~ /Jatropha/) { + $temp = "Jatropha_curcas"; }elsif ($temp =~ /Laccaria/) { $temp = "Laccaria_bicolor"; }elsif ($temp =~ /Magnaporthe/) { @@ -76,6 +81,8 @@ sub find_species { $temp = "Oryza_sativa"; }elsif ($temp =~ /Pediculus/) { $temp = "Pediculus_humanus"; + }elsif ($temp =~ /Phoenix/) { + $temp = "Phoenix_dactylifera"; }elsif ($temp =~ /Physcomit/) { $temp = "Physcomitrella_patens"; }elsif ($temp =~ /Populus/) { @@ -98,6 +105,8 @@ sub find_species { $temp = "Sorghum_bicolor"; }elsif ($temp =~ /Synechocystis/) { $temp = "Synechocystis_pcc6803"; + }elsif ($temp =~ /Theobroma\_cacao/) { + $temp = "Theobroma_cacao"; }elsif ($temp =~ /Tolypocladium\_inflatum/) { $temp = "Tolypocladium_inflatum"; }elsif ($temp =~ /Trichoderma\_atroviride/) { @@ -186,6 +195,9 @@ sub find_gene { }elsif ($species eq "Homo_sapiens") { my ($protein,$type,$chrom,$gene_id,$transcript) = split(/\s/,$gene_header); $gene = $protein; + }elsif ($species eq "Jatropha_curcas") { + my ($gene_id, $temp) = split(/\s/,$gene_header); + $gene = $gene_id; }elsif ($species eq "Laccaria_bicolor") { my ($source,$spec,$gene_id,$scaffold) = split(/\|/, $gene_header); $gene = $gene_id; @@ -222,6 +234,9 @@ sub find_gene { my ($source,$gene_pa,$func,$unknown,$gene_id) = split(/\|/, $gene_header); $gene_id =~ s/gene\://; $gene = $gene_pa; + }elsif ($species eq "Phoenix_dactylifera") { + my ($gene_id, $temp) = split(/\s/, $gene_header); + $gene = $gene_id; }elsif ($species eq "Physcomitrella_patens") { my ($name,$locus_id,$scaffold,$prot_id) = split(/\|/,$gene_header); $gene = $prot_id; #??? @@ -255,6 +270,9 @@ sub find_gene { }elsif ($species eq "Synechocystis_pcc6803") { my ($gene_id,$temp) = split(" ",$gene_header); $gene = $gene_id; #??? + }elsif ($species eq "Theobroma_cacao") { + my ($gene_id,$temp) = split(/\s/,$gene_header); + $gene = $gene_id; }elsif ($species eq "Tolypocladium_inflatum") { my ($gene_id, $type, $info) = split(/\s/, $gene_header); $gene = $gene_id; @@ -276,6 +294,8 @@ sub find_gene { }elsif ($species eq "Zea_mays") { my ($transcript,$seq_type,$coord,$parent_transcript,$parent_gene) = split(/;\s/, $gene_header); $gene = $transcript; + $gene =~ s/\sseq=translation//g; + }else { die "Error: Gene id can not be found for species $species!"; } @@ -307,6 +327,7 @@ sub all_species_array { push (@spec_array, "Fusarium_verticilliodes"); push (@spec_array, "Glycine_max"); push (@spec_array, "Homo_sapiens"); + push (@spec_array, "Jatropha_curcas"); push (@spec_array, "Laccaria_bicolor"); push (@spec_array, "Magnaporthe_grissa"); push (@spec_array, "Malus_domestica"); @@ -319,6 +340,7 @@ sub all_species_array { push (@spec_array, "Nostoc_punctiforme"); push (@spec_array, "Oryza_sativa"); push (@spec_array, "Pediculus_humanus"); + push (@spec_array, "Phoenix_dactylifera"); push (@spec_array, "Physcomitrella_patens"); push (@spec_array, "Populus_trichocarpa"); push (@spec_array, "Prunus_persica"); @@ -330,6 +352,7 @@ sub all_species_array { push (@spec_array, "Selaginella_moellendorffii"); push (@spec_array, "Sorghum_bicolor"); push (@spec_array, "Synechocystis_pcc6803"); + push (@spec_array, "Theobroma_cacao"); push (@spec_array, "Tolypocladium_inflatum"); push (@spec_array, "Trichoderma_atroviride"); push (@spec_array, "Trichoderma_reesii"); @@ -340,4 +363,14 @@ sub all_species_array { return @spec_array; } + +sub retrieval_info { + my $file = $_[0]; + $file =~ /([a-zA-Z]*\_[a-zA-Z0-9]*)\_((?:[a-zA-Z0-9]+\_?)+)\_(\d+\_\d+\_\d+).fa/; + my $species = $1; + my $method = $2; + my $date = $3; + return ($method, $date); +} + 1; -- 2.34.1