Hello!

To see the file structure, click on "tree".

Note that updates take place every 10 minutes, commits may not be seen immediately.
Updated for new species and fixed a couple of regex issues
authorelserj <elserj@localhost>
Fri, 14 Feb 2014 17:40:36 +0000 (17:40 +0000)
committerelserj <elserj@localhost>
Fri, 14 Feb 2014 17:40:36 +0000 (17:40 +0000)
svn path=/; revision=527

interactome_scripts/find_species.pl

index 48317a13f9e96d5c63a33391da44799dcb0576d7..c93b9faed65aaece068dcca1be8c7a0b29d97c3d 100755 (executable)
@@ -24,12 +24,12 @@ sub find_species {
                $species = "Batrachochytrium_dendrobatidis";
        }elsif ($temp =~ /Brachypodium\_distachyon/) {
                $species = "Brachypodium_distachyon";
-       }elsif ($temp =~ /Brachypodium\_sylvaticum\_Corvallis/) {
-               $species = "Brachypodium_sylvaticum_Corvallis";
-       }elsif ($temp =~ /Brachypodium\_sylvaticum\_Greece/) {
-               $species = "Brachypodium_sylvaticum_Greece";
-       }elsif ($temp =~ /Brachypodium\_sylvaticum\_Spain/) {
-               $species = "Brachypodium_sylvaticum_Spain";
+       }elsif ($temp =~ /Brachypodium\_sylvaticum\.Corvallis/) {
+               $species = "Brachypodium_sylvaticum.Corvallis";
+       }elsif ($temp =~ /Brachypodium\_sylvaticum\.Greece/) {
+               $species = "Brachypodium_sylvaticum.Greece";
+       }elsif ($temp =~ /Brachypodium\_sylvaticum\.Spain/) {
+               $species = "Brachypodium_sylvaticum.Spain";
        }elsif ($temp =~ /Brassica\_rapa/) {
                $species = "Brassica_rapa";
        }elsif ($temp =~ /elegans/) {
@@ -54,6 +54,12 @@ sub find_species {
                $species = "Drosophila_melanogaster";
        }elsif ($temp =~ /Ectocarpus/) {
                $species = "Ectocarpus_siliculosus";
+       }elsif ($temp =~ /Elaphocordyceps\_capitata/) {
+               $species = "Elaphocordyceps_capitata";
+       }elsif ($temp =~ /Elaphocordyceps\_ophioglossoides/) {
+               $species = "Elaphocordyceps_ophioglossoides";
+       }elsif ($temp =~ /Elaphocordyceps\_paradoxa/) {
+               $species = "Elaphocordyceps_paradoxa";
        }elsif ($temp =~ /Epichloe_festuca/) {
                $species = "Epichloe\_festuca";
        }elsif ($temp =~ /Escherichia\_coli/) {
@@ -65,9 +71,9 @@ sub find_species {
        }elsif ($temp =~ /Fusarium_graminearum/) {
                $species = "Fusarium\_graminearum";
        }elsif ($temp =~ /Fusarium_oxysporum/) {
-               $species = "Fusarium\_oxysporum";
-       }elsif ($temp =~ /Fusarium_verticilliodes/) {
-               $species = "Fusarium\_verticilliodes";
+               $species = "Fusarium\_oxysporum.4287";
+       }elsif ($temp =~ /Fusarium_verticillioides/) {
+               $species = "Fusarium\_verticillioides";
        }elsif ($temp =~ /Gadus_morhua/) {
                $species = "Gadus\_morhua";
        }elsif ($temp =~ /Gasterosteus/) {
@@ -128,9 +134,9 @@ sub find_species {
                $species = "Oryza_punctata";
        }elsif ($temp =~ /Oryza\_rufipogon/) {
                $species = "Oryza_rufipogon";
-       }elsif ($temp =~ /Oryza\_sativa.indica\_gramene/) {
+       }elsif ($temp =~ /Oryza\_sativa.indica\.gramene/) {
                $species = "Oryza_sativa.indica.gramene";
-       }elsif ($temp =~ /Oryza\_sativa.indica\_iplant/) {
+       }elsif ($temp =~ /Oryza\_sativa.indica\.iplant/) {
                $species = "Oryza_sativa.indica.iplant";
        }elsif ($temp =~ /Oryza\_sativa.japonica.IRGSP/) {
                $species = "Oryza_sativa.japonica.IRGSP";
@@ -148,6 +154,12 @@ sub find_species {
                $species = "Phyllostachys_heterocycla";
        }elsif ($temp =~ /Physcomit/) {
                $species = "Physcomitrella_patens";
+       }elsif ($temp =~ /Phytophthora/) {
+               $species = "Phytophthora_infestans";
+       }elsif ($temp =~ /Picea/) {
+               $species = "Picea_abies";
+       }elsif ($temp =~ /Pinus/) {
+               $species = "Pinus_taeda";
        }elsif ($temp =~ /Populus/) {
                $species = "Populus_trichocarpa";
        }elsif ($temp =~ /Prunus/) {
@@ -164,6 +176,8 @@ sub find_species {
                $species = "Schizosaccharomyces_pombe";
        }elsif ($temp =~ /Selaginella/) {
                $species = "Selaginella_moellendorffii";
+       }elsif ($temp =~ /Setaria/) {
+               $species = "Setaria_italica";
        }elsif ($temp =~ /Solanum_tuberosum/) {
                $species = "Solanum_tuberosum";
        }elsif ($temp =~ /Solanum_lycopersicum/) {
@@ -178,26 +192,16 @@ sub find_species {
                $species = "Tetraodon\_nigroviridis";
        }elsif ($temp =~ /Theobroma\_cacao/) {
                $species = "Theobroma_cacao";
-       }elsif ($temp =~ /TmDV92/) {
-               $species = "TmDV92";
-       }elsif ($temp =~ /TmG3116/) {
-               $species = "TmG3116";
        }elsif ($temp =~ /Tolypocladium\_inflatum/) {
                $species = "Tolypocladium_inflatum";
-       }elsif ($temp =~ /Trichoderma\_atroviride/) {
-               $species = "Trichoderma_atroviride";
-       }elsif ($temp =~ /Trichoderma\_reesii/) {
-               $species = "Trichoderma_reesii";
-       }elsif ($temp =~ /Trichoderma\_virens/) {
-               $species = "Trichoderma_virens";
        }elsif ($temp =~ /Trichodesmium/) {
                $species = "Trichodesmium_erythraeum";
-       }elsif ($temp =~ /triticum\_aestivum/) {
+       }elsif ($temp =~ /Triticum\_aestivum/) {
                $species = "Triticum_aestivum";
-       }elsif ($temp =~ /Triticum\_monococcumDV92/) {
-               $species = "Triticum_monococcumDV92";
-       }elsif ($temp =~ /Triticum\_monococcumG3116/) {
-               $species = "Triticum_monococcumG3116";
+       }elsif ($temp =~ /Triticum\_monococcum\.DV92/) {
+               $species = "Triticum_monococcum.DV92";
+       }elsif ($temp =~ /Triticum\_monococcum\.G3116/) {
+               $species = "Triticum_monococcum.G3116";
        }elsif ($temp =~ /Triticum\_urartu/) {
                $species = "Triticum_urartu";
        }elsif ($temp =~ /Vitis\_vinifera/) {
@@ -227,17 +231,17 @@ sub find_gene {
                my ($gene_id,$isomer) = split(/\|/, $gene_header);
                $gene = $isomer;
        }elsif ($species eq "Batrachochytrium_dendrobatidis") {
-               my ($gene_id,$transcript,$info) = split(/\s\|\s/, $gene_header);
-               $gene = $gene_id;
+               my ($transcript,$gene_id,$info) = split(/\s\|\s/, $gene_header);
+               $gene = $transcript;
        }elsif ($species eq "Brachypodium_distachyon") {
                my ($gene_id,$isomer) = split(/\|/, $gene_header);
                $gene = $isomer;
-       }elsif ($species eq "Brachypodium_sylvaticum_Corvallis") {
+       }elsif ($species eq "Brachypodium_sylvaticum.Corvallis") {
                my ($gene_id,$temp,$start,$end) = split(/\|/, $gene_header);
                $gene = $gene_id;
-       }elsif ($species eq "Brachypodium_sylvaticum_Greece") {
+       }elsif ($species eq "Brachypodium_sylvaticum.Greece") {
                $gene = $gene_header;
-       }elsif ($species eq "Brachypodium_sylvaticum_Spain") {
+       }elsif ($species eq "Brachypodium_sylvaticum.Spain") {
                my ($gene_id,$temp,$start,$end) = split(/\|/, $gene_header);
                $gene = $gene_id;
        }elsif ($species eq "Brassica_rapa") {
@@ -278,8 +282,15 @@ sub find_gene {
        }elsif ($species eq "Ectocarpus_siliculosus") {
                my ($gene_id,$temp) = split(/\|/, $gene_header);
                $gene = $gene_id;
-       }elsif ($species eq "Epichloe_festuca") {
+       }elsif ($species eq "Elaphocordyceps_capitata") {
+               $gene = $gene_header;
+       }elsif ($species eq "Elaphocordyceps_ophioglossoides") {
+               $gene = $gene_header;
+       }elsif ($species eq "Elaphocordyceps_paradoxa") {
                $gene = $gene_header;
+       }elsif ($species eq "Epichloe_festuca") {
+               my ($gene_id, $temp) = split(/\s/, $gene_header);
+               $gene = $gene_id;
        }elsif ($species eq "Escherichia_coli") {
                $gene = $gene_header; #???
        }elsif ($species eq "Eucalyptus_grandis") {
@@ -290,13 +301,14 @@ sub find_gene {
                $gene_id =~ s/\-v1\.0\-hybrid//;
                $gene = $gene_id;
        }elsif ($species eq "Fusarium_graminearum") {
-               my ($gene_id, $info) = split(/\s\|\s/, $gene_header);
-               $gene = $gene_id;
-       }elsif ($species eq "Fusarium_oxysporum") {
-               $gene = $gene_header;
-       }elsif ($species eq "Fusarium_verticilliodes") {
-               my ($gene_id, $info) = split(/\s\|\s/, $gene_header);
-               $gene = $gene_id;
+               my ($transcript, $gene_id, $info) = split(/\s\|\s/, $gene_header);
+               $gene = $transcript;
+       }elsif ($species eq "Fusarium_oxysporum.4287") {
+               my ($transcript, $gene_id, $info) = split(/\s\|\s/, $gene_header);
+               $gene = $transcript;
+       }elsif ($species eq "Fusarium_verticillioides") {
+               my ($transcript, $gene_id, $info) = split(/\s\|\s/, $gene_header);
+               $gene = $transcript;
        }elsif ($species eq "Gadus_morhua") {
                my ($peptide,$temp,$chrom,$gene_id,$transcript,$gene_type,$transcript_type) = split(/\s/,$gene_header);
                $transcript =~ s/transcript\://;
@@ -345,7 +357,7 @@ sub find_gene {
                $gene = $transcript;
        }elsif ($species eq "Mimulus_guttatus") {
                my ($gene_id,$transcript) = split(/\|/, $gene_header);
-               $gene = $gene_id;
+               $gene = $transcript;
        }elsif ($species eq "Mus_musculus") {
                my ($protein,$type,$chrom,$gene_id,$transcript) = split(/\s/,$gene_header);
                $gene = $protein;
@@ -353,8 +365,9 @@ sub find_gene {
                my ($protein,$type,$chrom,$gene_id,$transcript) = split(/\s/,$gene_header);
                $gene = $protein;
        }elsif ($species eq "Nectria_haematococca") {
-               my ($source, $spec, $int_id, $gene_id) = split(/\|/, $gene_header);
-               $gene = $gene_id;
+               my ($peptide,$temp,$chrom,$gene_id,$transcript,$gene_type,$transcript_type) = split(/\s/,$gene_header);
+               $transcript =~ s/transcript\://;
+               $gene = $transcript;
        }elsif ($species eq "Neurospora_crassa") {
                my ($transcript,$gene_id,$temp) = split(/\s\|\s/,$gene_header);
                $gene = $transcript;
@@ -363,7 +376,7 @@ sub find_gene {
                $gene = $transcript;
        }elsif ($species eq "Oncorhynchus_mykiss") {
                my ($gi, $number, $type, $gene_id, $function) = split(/\|/,$gene_header);
-               $gene = $gene_id;
+               $gene = $number;
        }elsif ($species eq "Leersia_perrieri") {
                my ($gene_id,$chrom,$program,$function,$type) = split(/[\|\s]+/, $gene_header);
                $gene = $gene_id;
@@ -399,64 +412,81 @@ sub find_gene {
                $gene = $gene_id;
        }elsif ($species eq "Oryza_sativa.indica.gramene") {
                my ($peptide,$temp,$chrom,$gene_id,$transcript,$gene_type,$transcript_type) = split(/\s/,$gene_header);
-                $transcript =~ s/transcript\://;
-                $gene = $transcript;
+        $transcript =~ s/transcript\://;
+        $gene = $transcript;
        }elsif ($species eq "Oryza_sativa.japonica.iplant") {
                my ($gene_id,$chrom,$program,$function,$type) = split(/[\|\s]+/, $gene_header);
                $gene = $gene_id;
        }elsif ($species eq "Oryza_sativa.japonica.IRGSP") {
                my ($peptide,$temp,$chrom,$gene_id,$transcript,$gene_type,$transcript_type) = split(/\s/,$gene_header);
-                $transcript =~ s/transcript\://;
-                $gene = $transcript;
+        $transcript =~ s/transcript\://;
+        $gene = $transcript;
        }elsif ($species eq "Oryza_sativa.japonica.MSU") {
                my ($gene_id,$transcript) = split(/\|/,$gene_header);
                 $gene = $transcript;
        }elsif ($species eq "Oryzias_latipes") {
-               my ($gene_id, $transcript_id) = split(/\|/,$gene_header);
-               $gene = $transcript_id;
+               my ($peptide,$temp,$chrom,$gene_id,$transcript,$gene_type,$transcript_type) = split(/\s/,$gene_header);
+            $transcript =~ s/transcript\://;
+            $gene = $transcript;
        }elsif ($species eq "Pediculus_humanus") {
-               my ($source,$gene_pa,$func,$unknown,$gene_id) = split(/\|/, $gene_header);
-               $gene_id =~ s/gene\://;
+               my ($gene_pa,$func,$func2,$unknown,$gene_id) = split(/[\|\s+]/, $gene_header);
                $gene = $gene_pa;
        }elsif ($species eq "Phoenix_dactylifera") {
-               my ($gene_id, $temp) = split(/\s/, $gene_header);
+               my ($gene_id, $locus_tag, $product, $strand) = split(/\s\[/, $gene_header);
                $gene = $gene_id;
        }elsif ($species eq "Phyllostachys_heterocycla") {
                my ($gene_id, $gene_model, $location, $dot, $strand, $dot2, $info) = split (/\s+/, $gene_header);
                $gene = $gene_id;
        }elsif ($species eq "Physcomitrella_patens") {
-               my ($name,$pac_id) = split(/\|/,$gene_header);
-               $gene = $name; #???
+               my ($gene_id,$transcript_id) = split(/\|/,$gene_header);
+               $gene = $transcript_id;
+       }elsif ($species eq "Phytophthora_infestans") {
+               my ($transcript, $gene_id, $function) = split(/\s\|\s/,$gene_header);
+               $gene = $transcript;
+       }elsif ($species eq "Picea_abies") {
+               my ($gene_id, $confidence) = split(/\s/,$gene_header);
+               $gene = $gene_id;
+       }elsif ($species eq "Pinus_taeda") {
+               my ($gene_id,$temp) = split(/\:/,$gene_header);
+               $gene_id =~ /\D+(\d+)/;
+               print "$1\n";
+               $gene = $1;
        }elsif ($species eq "Populus_trichocarpa") {
                my ($gene_id,$transcript_id) = split(/\|/,$gene_header);
                $gene = $transcript_id;
        }elsif ($species eq "Prunus_persica") {
-               my ($gene_id,$temp) = split(/\s/,$gene_header);
+               my ($transcript,$gene_id) = split(/\|/,$gene_header);
                $gene = $gene_id;
        }elsif ($species eq "Rattus_norvegicus") {
-               my ($gene_id,$transcript,$chrom) = split(/\|/, $gene_header);
-               $gene = $gene_id;
+               my ($peptide,$temp,$chrom,$gene_id,$transcript,$gene_type,$transcript_type) = split(/\s/,$gene_header);
+        $transcript =~ s/transcript\://;
+        $gene = $transcript;
        }elsif ($species eq "Rhizopus_oryzae") {
-               my ($gene_id,$func) = split(/\s\|\s/, $gene_header);
-               $gene = $gene_id;
+               my ($jgi,$temp,$num,$gene_id) = split(/\|/, $gene_header);
+               $gene = $num;
        }elsif ($species eq "Ricinus_communis") {
-               my ($name,$transcript,$gene_id,$temp) = split(/\|/, $gene_header);
+               my ($transcript,$model) = split(/\|/, $gene_header);
                $gene = $transcript;
        }elsif ($species eq "Saccharomyces_cerevisiae") {
-               my ($gene_id,$gene_name,$transcript,$temp) = split(/\s/,$gene_header);
-               $gene = $gene_id;
+               my ($peptide,$temp,$chrom,$gene_id,$transcript,$gene_type,$transcript_type) = split(/\s/,$gene_header);
+        $transcript =~ s/transcript\://;
+        $gene = $transcript
        }elsif ($species eq "Schizosaccharomyces_pombe") {
-               my ($gene_id,$gene_name,$unknown,$func,$name,$chrom,$temp) = split(/\s/,$gene_header);
-               $gene = $gene_id;
+               my ($peptide,$temp,$chrom,$gene_id,$transcript,$gene_type,$transcript_type) = split(/\s/,$gene_header);
+        $transcript =~ s/transcript\://;
+        $gene = $transcript
        }elsif ($species eq "Selaginella_moellendorffii") {
-               my ($name,$locus_id,$chrom_id,$prot_id) = split(/\|/,$gene_header);
-               $gene = $prot_id; #???
-       }elsif ($species eq "Solanum_tuberosum") {
-               my ($protein,$transcript,$type) = split(/\s/,$gene_header);
-               $gene = $protein;
+               my ($locus_id,$prot_id) = split(/\|/,$gene_header);
+               $gene = $locus_id;
+       }elsif ($species eq "Setaria_italica") {
+               my ($locus_id,$prot_id) = split(/\|/,$gene_header);
+               $gene = $prot_id;
        }elsif ($species eq "Solanum_lycopersicum") {
-               my ($protein,$type,$location,$gene_1,$transcript) = split(/\s/,$gene_header);
-               $gene = $protein;
+               my ($locus_id,$prot_id) = split(/\|/,$gene_header);
+               $gene = $locus_id;
+       }elsif ($species eq "Solanum_tuberosum") {
+               my ($locus_id,$prot_id) = split(/\|/,$gene_header);
+               $gene = $prot_id;
        }elsif ($species eq "Sorghum_bicolor") {
                my ($gene_id,$transcript_id) = split(/\|/,$gene_header);
                $gene = $transcript_id;
@@ -464,54 +494,42 @@ sub find_gene {
                my ($gene_id,$temp) = split(" ",$gene_header);
                $gene = $gene_id; #???
        }elsif ($species eq "Takifugu_rubripes") {
-               my ($gene_id, $transcript_id) = split(/\|/,$gene_header);
-               $gene = $transcript_id;
+               my ($peptide,$temp,$chrom,$gene_id,$transcript,$gene_type,$transcript_type) = split(/\s/,$gene_header);
+        $transcript =~ s/transcript\://;
+        $gene = $transcript;
        }elsif ($species eq "Tetraodon_nigroviridis") {
-               my ($gene_id, $transcript_id) = split(/\|/,$gene_header);
-               $gene = $transcript_id;
+               my ($peptide,$temp,$chrom,$gene_id,$transcript,$gene_type,$transcript_type) = split(/\s/,$gene_header);
+        $transcript =~ s/transcript\://;
+        $gene = $transcript;
        }elsif ($species eq "Theobroma_cacao") {
-               my ($gene_id,$temp) = split(/\s/,$gene_header);
+               my ($gene_id,$transcript) = split(/\s/,$gene_header);
                $gene = $gene_id;
-       }elsif ($species eq "TmDV92") {
-               my ($gene_id,$temp) = split(/\s+/,$gene_header);
-               $gene = $gene_id;
-       }elsif ($species eq "TmG3116") {
-               my ($gene_id,$temp) = split(/\s+/,$gene_header);
-               $gene = $gene_id;       
        }elsif ($species eq "Tolypocladium_inflatum") {
-               my ($gene_id, $type, $info) = split(/\s/, $gene_header);
-               $gene = $gene_id;
-       }elsif ($species eq "Trichoderma_atroviride") {
-               my ($source, $spec, $int_id, $gene_id) = split(/\|/,$gene_header);
-               $gene = $gene_id;
-       }elsif ($species eq "Trichoderma_reesii") {
-               my ($source, $spec, $int_id, $gene_id) = split(/\|/,$gene_header);
-               $gene = $gene_id;
-       }elsif ($species eq "Trichoderma_virens") {
-               my ($temp,$unknown,$source,$gene_id,$func) = split(/\|/, $gene_header);
-               $gene = $gene_id;
+               $gene = $gene_header;
        }elsif ($species eq "Trichodesmium_erythraeum") {
-               my ($temp,$unknown,$source,$gene_id,$func) = split(/\|/, $gene_header);
-               $gene = $gene_id;
+               my ($peptide,$temp,$chrom,$gene_id,$transcript,$gene_type,$transcript_type) = split(/\s/,$gene_header);
+        $transcript =~ s/transcript\://;
+        $gene = $transcript;
        }elsif ($species eq "Triticum_aestivum") {
-               my ($type,$transcript,$source,$gene_id,$func) = split(/\|/, $gene_header);
-               $gene = $transcript;
-       }elsif ($species eq "Triticum_monococcumDV92") {
+               my ($peptide,$temp,$chrom,$gene_id,$transcript,$gene_type,$transcript_type) = split(/\s/,$gene_header);
+        $transcript =~ s/transcript\://;
+        $gene = $transcript;
+       }elsif ($species eq "Triticum_monococcum.DV92") {
                my ($gene_id,$temp) = split(/\s+/,$gene_header);
                $gene = $gene_id;
-       }elsif ($species eq "Triticum_monococcumG3116") {
+       }elsif ($species eq "Triticum_monococcum.G3116") {
                my ($gene_id,$temp) = split(/\s+/,$gene_header);
                $gene = $gene_id;
        }elsif ($species eq "Triticum_urartu") {
                my ($peptide,$temp,$chrom,$gene_id,$transcript,$gene_type,$transcript_type) = split(/\s/,$gene_header);
-                $transcript =~ s/transcript\://;
-                $gene = $transcript;
+        $transcript =~ s/transcript\://;
+        $gene = $transcript;
        }elsif ($species eq "Vitis_vinifera") {
-               #my ($name,$gene_id,$chrom_id,$id) = split(/\|/,$gene_header);
-               $gene = $gene_header; #???
+               my ($gene_id,$transcript_id) = split(/\|/,$gene_header);
+               $gene = $gene_id;
        }elsif ($species eq "Zea_mays") {
                my ($gene_id,$transcript) = split(/\|/,$gene_header);
-               $gene = $transcript;
+               $gene = $gene_id;
                
        }else {
                die "Error: Gene id can not be found for species $species!";
@@ -526,8 +544,8 @@ sub find_gene_synonym {
        my $species = $_[1];
        my $synonym;
        if ($species eq "Batrachochytrium_dendrobatidis") {
-               my ($gene_id,$transcript,$info) = split(/\s\|\s/, $gene_header);
-               $synonym = $transcript;
+               my ($transcript,$gene_id,$info) = split(/\s\|\s/, $gene_header);
+               $synonym = $gene_id;
        }elsif ($species eq "Caenorhabditis_elegans") {
                my ($locus_id,$unknown,$gene_id,$temp) = split(/\|/, $gene_header);
                $synonym = $gene_id; #???
@@ -556,11 +574,13 @@ sub find_gene_synonym {
                my ($peptide,$temp,$chrom,$gene_id,$transcript,$gene_type,$transcript_type) = split(/\s/,$gene_header);
                $transcript =~ s/transcript\://;
                $gene_id =~ s/gene\://;
+               $peptide =~ s/\:pep//;
                $synonym = "$peptide,$gene_id";
        }elsif ($species eq "Drosophila_melanogaster") {
                my ($peptide,$temp,$chrom,$gene_id,$transcript,$gene_type,$transcript_type) = split(/\s/,$gene_header);
                $transcript =~ s/transcript\://;
                $gene_id =~ s/gene\://;
+               $peptide =~ s/\:pep//;
                $synonym = "$peptide,$gene_id";
        }elsif ($species eq "Eucalyptus_grandis") {
                my ($gene_id,$transcript_id) = split(/\|/,$gene_header);
@@ -570,15 +590,26 @@ sub find_gene_synonym {
                $gene_id =~ s/\-v1\.0\-hybrid//;
                $mrna_id =~ s/\-v1\.0\-hybrid//;
                $synonym = $mrna_id;
+       }elsif ($species eq "Fusarium_graminearum") {
+               my ($transcript, $gene_id, $info) = split(/\s\|\s/, $gene_header);
+               $synonym = $gene_id;
+       }elsif ($species eq "Fusarium_oxysporum.4287") {
+               my ($transcript, $gene_id, $info) = split(/\s\|\s/, $gene_header);
+               $synonym = $gene_id;
+       }elsif ($species eq "Fusarium_verticillioides") {
+               my ($transcript, $gene_id, $info) = split(/\s\|\s/, $gene_header);
+               $synonym = $gene_id;
        }elsif ($species eq "Gadus_morhua") {
                my ($peptide,$temp,$chrom,$gene_id,$transcript,$gene_type,$transcript_type) = split(/\s/,$gene_header);
                $transcript =~ s/transcript\://;
                $gene_id =~ s/gene\://;
+               $peptide =~ s/\:pep//;
                $synonym = "$peptide,$gene_id";
        }elsif ($species eq "Gasterosteus_aculeatus") {
                my ($peptide,$temp,$chrom,$gene_id,$transcript,$gene_type,$transcript_type) = split(/\s/,$gene_header);
                $transcript =~ s/transcript\://;
                $gene_id =~ s/gene\://;
+               $peptide =~ s/\:pep//;
                $synonym = "$peptide,$gene_id";
        }elsif ($species eq "Glycine_max") {
                my ($locus_id,$isomer) = split(/\|/,$gene_header);
@@ -590,11 +621,13 @@ sub find_gene_synonym {
                my ($peptide,$temp,$chrom,$gene_id,$transcript,$gene_type,$transcript_type) = split(/\s/,$gene_header);
                $transcript =~ s/transcript\://;
                $gene_id =~ s/gene\://;
+               $peptide =~ s/\:pep//;
                $synonym = "$peptide,$gene_id";
        }elsif ($species eq "Hordeum_vulgare") {
                my ($peptide,$temp,$chrom,$gene_id,$transcript,$gene_type,$transcript_type) = split(/\s/,$gene_header);
                $transcript =~ s/transcript\://;
                $gene_id =~ s/gene\://;
+               $peptide =~ s/\:pep//;
                $synonym = "$peptide,$gene_id";
        }elsif ($species eq "Linum_usitatissimum") {
                my ($transcript,$gene_id) = split(/\|/, $gene_header);
@@ -618,11 +651,19 @@ sub find_gene_synonym {
                my ($protein,$type,$chrom,$gene_id,$transcript) = split(/\s/,$gene_header);
                $transcript =~ s/transcript\://;
                $gene_id =~ s/gene\://;
+               $peptide =~ s/\:pep//;
                $synonym = "$peptide,$gene_id";
        }elsif ($species eq "Musa_acuminata") {
                my ($protein,$type,$chrom,$gene_id,$transcript) = split(/\s/,$gene_header);
                $transcript =~ s/transcript\://;
                $gene_id =~ s/gene\://;
+               $peptide =~ s/\:pep//;
+               $synonym = "$peptide,$gene_id";
+       }elsif ($species eq "Nectria_haematococca") {
+               my ($peptide,$temp,$chrom,$gene_id,$transcript,$gene_type,$transcript_type) = split(/\s/,$gene_header);
+               $transcript =~ s/transcript\://;
+               $gene_id =~ s/gene\://;
+               $peptide =~ s/\:pep//;
                $synonym = "$peptide,$gene_id";
        }elsif ($species eq "Neurospora_crassa") {
                my ($transcript,$gene_id,$temp) = split(/\s\|\s/,$gene_header);
@@ -632,124 +673,125 @@ sub find_gene_synonym {
                $synonym = $gene_id;
        }elsif ($species eq "Oncorhynchus_mykiss") {
                my ($gi, $number, $type, $gene_id, $function) = split(/\|/,$gene_header);
-               $synonym = $number;
-        }elsif ($species eq "Oryza_sativa.indica.gramene") {
-                my ($peptide,$temp,$chrom,$gene_id,$transcript,$gene_type,$transcript_type) = split(/\s/,$gene_header);
-                $transcript =~ s/transcript\://;
-                $gene_id =~ s/gene\://;
-                $synonym = "$peptide,$gene_id";
-        }elsif ($species eq "Oryza_sativa.japonica.IRGSP") {
-                my ($peptide,$temp,$chrom,$gene_id,$transcript,$gene_type,$transcript_type) = split(/\s/,$gene_header);
-                $transcript =~ s/transcript\://;
-                $gene_id =~ s/gene\://;
-                $synonym = "$peptide,$gene_id";
+               $synonym = $gene_id;
+    }elsif ($species eq "Oryza_sativa.indica.gramene") {
+        my ($peptide,$temp,$chrom,$gene_id,$transcript,$gene_type,$transcript_type) = split(/\s/,$gene_header);
+        $transcript =~ s/transcript\://;
+        $gene_id =~ s/gene\://;
+        $peptide =~ s/\:pep//;
+        $synonym = "$peptide,$gene_id";
+    }elsif ($species eq "Oryza_sativa.japonica.IRGSP") {
+        my ($peptide,$temp,$chrom,$gene_id,$transcript,$gene_type,$transcript_type) = split(/\s/,$gene_header);
+        $transcript =~ s/transcript\://;
+        $gene_id =~ s/gene\://;
+        $peptide =~ s/\:pep//;
+        $synonym = "$peptide,$gene_id";
        }elsif ($species eq "Oryzias_latipes") {
-               my ($gene_id, $transcript_id) = split(/\|/,$gene_header);
-               $gene = $transcript_id;
+               my ($peptide,$temp,$chrom,$gene_id,$transcript,$gene_type,$transcript_type) = split(/\s/,$gene_header);
+        $transcript =~ s/transcript\://;
+        $gene_id =~ s/gene\://;
+        $peptide =~ s/\:pep//;
+        $synonym = "$peptide,$gene_id";
        }elsif ($species eq "Pediculus_humanus") {
-               my ($source,$gene_pa,$func,$unknown,$gene_id) = split(/\|/, $gene_header);
+               my ($gene_pa,$func,$func2,$unknown,$gene_id) = split(/[\|\s+]/, $gene_header);
                $gene_id =~ s/gene\://;
-               $gene = $gene_pa;
+               $synonym = $gene_id;
        }elsif ($species eq "Phoenix_dactylifera") {
-               my ($gene_id, $temp) = split(/\s/, $gene_header);
-               $gene = $gene_id;
+               my ($gene_id, $locus_tag, $product, $strand) = split(/\s\[/, $gene_header);
+               $locus_tag =~ s/locus_tag=//;
+               $locus_tag =~ s/"//g;
+               $synonym = $locus_tag;
        }elsif ($species eq "Phyllostachys_heterocycla") {
                my ($gene_id, $gene_model, $location, $dot, $strand, $dot2, $info) = split (/\s+/, $gene_header);
                $gene = $gene_id;
        }elsif ($species eq "Physcomitrella_patens") {
-               my ($name,$pac_id) = split(/\|/,$gene_header);
-               $gene = $name; #???
+               my ($gene_id,$transcript_id) = split(/\|/,$gene_header);
+               $synonym = $gene_id;
+       }elsif ($species eq "Phytophthora_infestans") {
+               my ($transcript, $gene_id, $function) = split(/\s\|\s/,$gene_header);
+               $synonym = $gene_id;
        }elsif ($species eq "Populus_trichocarpa") {
                my ($gene_id,$transcript_id) = split(/\|/,$gene_header);
-               $gene = $transcript_id;
+               $synonym = $gene_id;
        }elsif ($species eq "Prunus_persica") {
-               my ($gene_id,$temp) = split(/\s/,$gene_header);
-               $gene = $gene_id;
+               my ($transcript,$gene_id) = split(/\|/,$gene_header);
+               $synonym = $transcript;
        }elsif ($species eq "Rattus_norvegicus") {
-               my ($gene_id,$transcript,$chrom) = split(/\|/, $gene_header);
-               $gene = $gene_id;
-       }elsif ($species eq "Rhizopus_oryzae") {
-               my ($gene_id,$func) = split(/\s\|\s/, $gene_header);
-               $gene = $gene_id;
+               my ($peptide,$temp,$chrom,$gene_id,$transcript,$gene_type,$transcript_type) = split(/\s/,$gene_header);
+        $transcript =~ s/transcript\://;
+        $gene_id =~ s/gene\://;
+        $peptide =~ s/\:pep//;
+        $synonym = "$peptide,$gene_id";
        }elsif ($species eq "Ricinus_communis") {
-               my ($name,$transcript,$gene_id,$temp) = split(/\|/, $gene_header);
-               $gene = $transcript;
+               my ($transcript,$model) = split(/\|/, $gene_header);
+               $synonym = $model;
        }elsif ($species eq "Saccharomyces_cerevisiae") {
-               my ($gene_id,$gene_name,$transcript,$temp) = split(/\s/,$gene_header);
-               $gene = $gene_id;
+               my ($peptide,$temp,$chrom,$gene_id,$transcript,$gene_type,$transcript_type) = split(/\s/,$gene_header);
+        $transcript =~ s/transcript\://;
+        $gene_id =~ s/gene\://;
+        $peptide =~ s/\:pep//;
+        $synonym = "$peptide,$gene_id";
        }elsif ($species eq "Schizosaccharomyces_pombe") {
-               my ($gene_id,$gene_name,$unknown,$func,$name,$chrom,$temp) = split(/\s/,$gene_header);
-               $gene = $gene_id;
+               my ($peptide,$temp,$chrom,$gene_id,$transcript,$gene_type,$transcript_type) = split(/\s/,$gene_header);
+        $transcript =~ s/transcript\://;
+        $gene_id =~ s/gene\://;
+        $peptide =~ s/\:pep//;
+        $synonym = "$peptide,$gene_id";
        }elsif ($species eq "Selaginella_moellendorffii") {
-               my ($name,$locus_id,$chrom_id,$prot_id) = split(/\|/,$gene_header);
-               $gene = $prot_id; #???
-       }elsif ($species eq "Solanum_tuberosum") {
-               my ($protein,$transcript,$type) = split(/\s/,$gene_header);
-               $gene = $protein;
+               my ($locus_id,$prot_id) = split(/\|/,$gene_header);
+               $synonym = $prot_id;
+       }elsif ($species eq "Setaria_italica") {
+               my ($locus_id,$prot_id) = split(/\|/,$gene_header);
+               $synonym = $locus_id;
        }elsif ($species eq "Solanum_lycopersicum") {
-               my ($protein,$type,$location,$gene_1,$transcript) = split(/\s/,$gene_header);
-               $gene = $protein;
+               my ($locus_id,$prot_id) = split(/\|/,$gene_header);
+               $synonym = $locus_id;
+       }elsif ($species eq "Solanum_tuberosum") {
+               my ($locus_id,$prot_id) = split(/\|/,$gene_header);
+               $synonym = $locus_id;
        }elsif ($species eq "Sorghum_bicolor") {
                my ($gene_id,$transcript_id) = split(/\|/,$gene_header);
                $synonym = $gene_id;
-       }elsif ($species eq "Synechocystis_pcc6803") {
-               my ($gene_id,$temp) = split(" ",$gene_header);
-               $gene = $gene_id; #???
        }elsif ($species eq "Takifugu_rubripes") {
-               my ($gene_id, $transcript_id) = split(/\|/,$gene_header);
-               $gene = $transcript_id;
+               my ($peptide,$temp,$chrom,$gene_id,$transcript,$gene_type,$transcript_type) = split(/\s/,$gene_header);
+        $transcript =~ s/transcript\://;
+        $gene_id =~ s/gene\://;
+        $peptide =~ s/\:pep//;
+        $synonym = "$peptide,$gene_id";
        }elsif ($species eq "Tetraodon_nigroviridis") {
-               my ($gene_id, $transcript_id) = split(/\|/,$gene_header);
-               $gene = $transcript_id;
+               my ($peptide,$temp,$chrom,$gene_id,$transcript,$gene_type,$transcript_type) = split(/\s/,$gene_header);
+        $transcript =~ s/transcript\://;
+        $gene_id =~ s/gene\://;
+        $peptide =~ s/\:pep//;
+        $synonym = "$peptide,$gene_id";
        }elsif ($species eq "Theobroma_cacao") {
-               my ($gene_id,$temp) = split(/\s/,$gene_header);
-               $gene = $gene_id;
-       }elsif ($species eq "TmDV92") {
-               my ($gene_id,$temp) = split(/\s+/,$gene_header);
-               $gene = $gene_id;
-       }elsif ($species eq "TmG3116") {
-               my ($gene_id,$temp) = split(/\s+/,$gene_header);
-               $gene = $gene_id;       
-       }elsif ($species eq "Tolypocladium_inflatum") {
-               my ($gene_id, $type, $info) = split(/\s/, $gene_header);
-               $gene = $gene_id;
-       }elsif ($species eq "Trichoderma_atroviride") {
-               my ($source, $spec, $int_id, $gene_id) = split(/\|/,$gene_header);
-               $gene = $gene_id;
-       }elsif ($species eq "Trichoderma_reesii") {
-               my ($source, $spec, $int_id, $gene_id) = split(/\|/,$gene_header);
-               $gene = $gene_id;
-       }elsif ($species eq "Trichoderma_virens") {
-               my ($temp,$unknown,$source,$gene_id,$func) = split(/\|/, $gene_header);
-               $gene = $gene_id;
+               my ($gene_id,$transcript) = split(/\s/,$gene_header);
+               $synonym = $transcript;
        }elsif ($species eq "Trichodesmium_erythraeum") {
                my ($temp,$unknown,$source,$gene_id,$func) = split(/\|/, $gene_header);
                $gene = $gene_id;
        }elsif ($species eq "Triticum_aestivum") {
-               my ($type,$transcript,$source,$gene_id,$func) = split(/\|/, $gene_header);
-               $gene = $transcript;
-       }elsif ($species eq "Triticum_monococcumDV92") {
-               my ($gene_id,$temp) = split(/\s+/,$gene_header);
-               $gene = $gene_id;
-       }elsif ($species eq "Triticum_monococcumG3116") {
-               my ($gene_id,$temp) = split(/\s+/,$gene_header);
-               $gene = $gene_id;
+               my ($peptide,$temp,$chrom,$gene_id,$transcript,$gene_type,$transcript_type) = split(/\s/,$gene_header);
+        $transcript =~ s/transcript\://;
+        $gene_id =~ s/gene\://;
+        $peptide =~ s/\:pep//;
+        $synonym = "$peptide,$gene_id";
        }elsif ($species eq "Triticum_urartu") {
-                my ($peptide,$temp,$chrom,$gene_id,$transcript,$gene_type,$transcript_type) = split(/\s/,$gene_header);
-                $transcript =~ s/transcript\://;
-                $gene_id =~ s/gene\://;
-                $synonym = "$peptide,$gene_id";
+               my ($peptide,$temp,$chrom,$gene_id,$transcript,$gene_type,$transcript_type) = split(/\s/,$gene_header);
+               $transcript =~ s/transcript\://;
+               $gene_id =~ s/gene\://;
+               $synonym = "$peptide,$gene_id";
        }elsif ($species eq "Vitis_vinifera") {
-               #my ($name,$gene_id,$chrom_id,$id) = split(/\|/,$gene_header);
-               $gene = $gene_header; #???
+               my ($gene_id,$transcript_id) = split(/\|/,$gene_header);
+               $synonym = $transcript_id;
        }elsif ($species eq "Zea_mays") {
                my ($gene_id,$transcript) = split(/\|/,$gene_header);
-               $synonym = $gene_id;
+               $synonym = $transcript;
                
        }else {
-               die "Error: Gene id can not be found for species $species!";
+               $synonym = "null";
        }
-       return $gene;
+       return $synonym;
        
 }
 
@@ -760,7 +802,7 @@ sub all_species_array {
        push (@spec_array, "Amborella_trichopoda");
        push (@spec_array, "Arabidopsis_lyrata");
        push (@spec_array, "Arabidopsis_thaliana");
-       #push (@spec_array, "Batrachochytrium_dendrobatidis");
+       push (@spec_array, "Batrachochytrium_dendrobatidis");
        push (@spec_array, "Brachypodium_distachyon");
        push (@spec_array, "Brachypodium_sylvaticum_Corvallis");
        push (@spec_array, "Brachypodium_sylvaticum_Greece");
@@ -777,13 +819,16 @@ sub all_species_array {
        push (@spec_array, "Danio_rerio");
        push (@spec_array, "Drosophila_melanogaster");
        push (@spec_array, "Ectocarpus_siliculosus");
+       push (@spec_array, "Elaphocordyceps_capitata");
+       push (@spec_array, "Elaphocordyceps_ophioglossoides");
+       push (@spec_array, "Elaphocordyceps_paradoxa");
        push (@spec_array, "Epichloe_festuca");
        push (@spec_array, "Escherichia_coli");
        push (@spec_array, "Eucalyptus_grandis");
        push (@spec_array, "Fragaria_vesca");
        push (@spec_array, "Fusarium_graminearum");
-       push (@spec_array, "Fusarium_oxysporum");
-       push (@spec_array, "Fusarium_verticilliodes");
+       push (@spec_array, "Fusarium_oxysporum.4287");
+       push (@spec_array, "Fusarium_verticillioides");
        push (@spec_array, "Gadus_morhua");
        push (@spec_array, "Gasterosteus_aculeatus");
        push (@spec_array, "Glycine_max");
@@ -804,12 +849,26 @@ sub all_species_array {
        push (@spec_array, "Neurospora_crassa");
        push (@spec_array, "Nostoc_punctiforme");
        push (@spec_array, "Oncorhynchus_mykiss");
-       push (@spec_array, "Oryza_sativa");
+       push (@spec_array, "Oryza_barthii");
+       push (@spec_array, "Oryza_brachyantha");
+       push (@spec_array, "Oryza_glaberrima");
+       push (@spec_array, "Oryza_glumaepatula");
+       push (@spec_array, "Oryza_longistaminata");
+       push (@spec_array, "Oryza_meridionalis");
+       push (@spec_array, "Oryza_nivara");
+       push (@spec_array, "Oryza_punctata");
+       push (@spec_array, "Oryza_rufipogon");
+       push (@spec_array, "Oryza_sativa.indica.gramene");
+       push (@spec_array, "Oryza_sativa.indica.iplant");
+       push (@spec_array, "Oryza_sativa.japonica.iplant");
+       push (@spec_array, "Oryza_sativa.japonica.IRGSP");
+       push (@spec_array, "Oryza_sativa.japonica.MSU");
        push (@spec_array, "Oryzias_latipes");
        push (@spec_array, "Pediculus_humanus");
-       push (@spec_array, "Phoenix_dactylifera");
        push (@spec_array, "Phyllostachys_heterocycla");
        push (@spec_array, "Physcomitrella_patens");
+       push (@spec_array, "Phytophthora_infestans");
+       push (@spec_array, "Picea_abies");
        push (@spec_array, "Populus_trichocarpa");
        push (@spec_array, "Prunus_persica");
        push (@spec_array, "Rattus_norvegicus");
@@ -818,6 +877,7 @@ sub all_species_array {
        push (@spec_array, "Saccharomyces_cerevisiae");
        push (@spec_array, "Schizosaccharomyces_pombe");
        push (@spec_array, "Selaginella_moellendorffii");
+       push (@spec_array, "Setaria_italica");
        push (@spec_array, "Solanum_tuberosum");
        push (@spec_array, "Solanum_lycopersicum");
        push (@spec_array, "Sorghum_bicolor");
@@ -825,16 +885,11 @@ sub all_species_array {
        push (@spec_array, "Takifugu_rubripes");
        push (@spec_array, "Tetraodon_nigroviridis");
        push (@spec_array, "Theobroma_cacao");
-       push (@spec_array, "TmDV92");
-       push (@spec_array, "TmG3116");
        push (@spec_array, "Tolypocladium_inflatum");
-       push (@spec_array, "Trichoderma_atroviride");
-       push (@spec_array, "Trichoderma_reesii");
-       push (@spec_array, "Trichoderma_virens");
        push (@spec_array, "Trichodesmium_erythraeum");
        push (@spec_array, "Triticum_aestivum");
-       push (@spec_array, "Triticum_monococcumDV92");
-       push (@spec_array, "Triticum_monococcumG3116");
+       push (@spec_array, "Triticum_monococcum.DV92");
+       push (@spec_array, "Triticum_monococcum.G3116");
        push (@spec_array, "Triticum_urartu");
        push (@spec_array, "Vitis_vinifera");
        push (@spec_array, "Zea_mays");
@@ -844,7 +899,7 @@ sub all_species_array {
 
 sub retrieval_info {
        my $file = $_[0];
-       $file =~ /([a-zA-Z]*\_+[a-zA-Z0-9]+)\_((?:[a-zA-Z0-9+-.]+\_?)+)\_(\d+\_\d+\_\d+).fa/;
+       $file =~ /([a-zA-Z]*\_+[a-zA-Z0-9.]+)\_((?:[a-zA-Z0-9+-.]+\_?)+)\_(\d+\_\d+\_\d+).fa/;
        my $species = $1;
        my $method = $2;
        my $date = $3;