Hello!

To see the file structure, click on "tree".

Note that updates take place every 10 minutes, commits may not be seen immediately.
Added new species
authorelserj <elserj@localhost>
Wed, 6 Jul 2011 23:56:01 +0000 (23:56 +0000)
committerelserj <elserj@localhost>
Wed, 6 Jul 2011 23:56:01 +0000 (23:56 +0000)
svn path=/; revision=114

interactome_scripts/find_species.pl

index 2fdf90e4d5a2ff4f4fc03b4bcece7d54172ff1d8..8b9bb6b99090a07a9684ff674938f7ae22467883 100755 (executable)
@@ -4,6 +4,9 @@
 #                                                         #
 #   Written by Justin Elser                               #
 #     Updated 9/2/10 for new species                      #
+#                                                         #
+#     Updated 6/16/11 for new species and added sub       #
+#        to return retrieval info                         #
 ###########################################################
 
 sub find_species {
@@ -52,6 +55,8 @@ sub find_species {
                $temp = "Glycine_max";
        }elsif ($temp =~ /Homo\_sapiens/) {
                $temp = "Homo_sapiens";
+       }elsif ($temp =~ /Jatropha/) {
+               $temp = "Jatropha_curcas";
        }elsif ($temp =~ /Laccaria/) {
                $temp = "Laccaria_bicolor";
        }elsif ($temp =~ /Magnaporthe/) {
@@ -76,6 +81,8 @@ sub find_species {
                $temp = "Oryza_sativa";
        }elsif ($temp =~ /Pediculus/) {
                $temp = "Pediculus_humanus";
+       }elsif ($temp =~ /Phoenix/) {
+               $temp = "Phoenix_dactylifera";
        }elsif ($temp =~ /Physcomit/) {
                $temp = "Physcomitrella_patens";
        }elsif ($temp =~ /Populus/) {
@@ -98,6 +105,8 @@ sub find_species {
                $temp = "Sorghum_bicolor";
        }elsif ($temp =~ /Synechocystis/) {
                $temp = "Synechocystis_pcc6803";
+       }elsif ($temp =~ /Theobroma\_cacao/) {
+               $temp = "Theobroma_cacao";
        }elsif ($temp =~ /Tolypocladium\_inflatum/) {
                $temp = "Tolypocladium_inflatum";
        }elsif ($temp =~ /Trichoderma\_atroviride/) {
@@ -186,6 +195,9 @@ sub find_gene {
        }elsif ($species eq "Homo_sapiens") {
                my ($protein,$type,$chrom,$gene_id,$transcript) = split(/\s/,$gene_header);
                $gene = $protein;
+       }elsif ($species eq "Jatropha_curcas") {
+               my ($gene_id, $temp) = split(/\s/,$gene_header);
+               $gene = $gene_id;
        }elsif ($species eq "Laccaria_bicolor") {
                my ($source,$spec,$gene_id,$scaffold) = split(/\|/, $gene_header);
                $gene = $gene_id;
@@ -222,6 +234,9 @@ sub find_gene {
                my ($source,$gene_pa,$func,$unknown,$gene_id) = split(/\|/, $gene_header);
                $gene_id =~ s/gene\://;
                $gene = $gene_pa;
+       }elsif ($species eq "Phoenix_dactylifera") {
+               my ($gene_id, $temp) = split(/\s/, $gene_header);
+               $gene = $gene_id;
        }elsif ($species eq "Physcomitrella_patens") {
                my ($name,$locus_id,$scaffold,$prot_id) = split(/\|/,$gene_header);
                $gene = $prot_id; #???
@@ -255,6 +270,9 @@ sub find_gene {
        }elsif ($species eq "Synechocystis_pcc6803") {
                my ($gene_id,$temp) = split(" ",$gene_header);
                $gene = $gene_id; #???
+       }elsif ($species eq "Theobroma_cacao") {
+               my ($gene_id,$temp) = split(/\s/,$gene_header);
+               $gene = $gene_id;
        }elsif ($species eq "Tolypocladium_inflatum") {
                my ($gene_id, $type, $info) = split(/\s/, $gene_header);
                $gene = $gene_id;
@@ -276,6 +294,8 @@ sub find_gene {
        }elsif ($species eq "Zea_mays") {
                my ($transcript,$seq_type,$coord,$parent_transcript,$parent_gene) = split(/;\s/, $gene_header);
                $gene = $transcript;
+               $gene =~ s/\sseq=translation//g;
+               
        }else {
                die "Error: Gene id can not be found for species $species!";
        }
@@ -307,6 +327,7 @@ sub all_species_array {
        push (@spec_array, "Fusarium_verticilliodes");
        push (@spec_array, "Glycine_max");
        push (@spec_array, "Homo_sapiens");
+       push (@spec_array, "Jatropha_curcas");
        push (@spec_array, "Laccaria_bicolor");
        push (@spec_array, "Magnaporthe_grissa");
        push (@spec_array, "Malus_domestica");
@@ -319,6 +340,7 @@ sub all_species_array {
        push (@spec_array, "Nostoc_punctiforme");
        push (@spec_array, "Oryza_sativa");
        push (@spec_array, "Pediculus_humanus");
+       push (@spec_array, "Phoenix_dactylifera");
        push (@spec_array, "Physcomitrella_patens");
        push (@spec_array, "Populus_trichocarpa");
        push (@spec_array, "Prunus_persica");
@@ -330,6 +352,7 @@ sub all_species_array {
        push (@spec_array, "Selaginella_moellendorffii");
        push (@spec_array, "Sorghum_bicolor");
        push (@spec_array, "Synechocystis_pcc6803");
+       push (@spec_array, "Theobroma_cacao");
        push (@spec_array, "Tolypocladium_inflatum");
        push (@spec_array, "Trichoderma_atroviride");
        push (@spec_array, "Trichoderma_reesii");
@@ -340,4 +363,14 @@ sub all_species_array {
        
        return @spec_array;
 }
+
+sub retrieval_info {
+       my $file = $_[0];
+       $file =~ /([a-zA-Z]*\_[a-zA-Z0-9]*)\_((?:[a-zA-Z0-9]+\_?)+)\_(\d+\_\d+\_\d+).fa/;
+       my $species = $1;
+       my $method = $2;
+       my $date = $3;
+       return ($method, $date);
+}
+
 1;