$species = "Batrachochytrium_dendrobatidis";
}elsif ($temp =~ /Brachypodium\_distachyon/) {
$species = "Brachypodium_distachyon";
- }elsif ($temp =~ /Brachypodium\_sylvaticum\_Corvallis/) {
- $species = "Brachypodium_sylvaticum_Corvallis";
- }elsif ($temp =~ /Brachypodium\_sylvaticum\_Greece/) {
- $species = "Brachypodium_sylvaticum_Greece";
- }elsif ($temp =~ /Brachypodium\_sylvaticum\_Spain/) {
- $species = "Brachypodium_sylvaticum_Spain";
+ }elsif ($temp =~ /Brachypodium\_sylvaticum\.Corvallis/) {
+ $species = "Brachypodium_sylvaticum.Corvallis";
+ }elsif ($temp =~ /Brachypodium\_sylvaticum\.Greece/) {
+ $species = "Brachypodium_sylvaticum.Greece";
+ }elsif ($temp =~ /Brachypodium\_sylvaticum\.Spain/) {
+ $species = "Brachypodium_sylvaticum.Spain";
}elsif ($temp =~ /Brassica\_rapa/) {
$species = "Brassica_rapa";
}elsif ($temp =~ /elegans/) {
$species = "Drosophila_melanogaster";
}elsif ($temp =~ /Ectocarpus/) {
$species = "Ectocarpus_siliculosus";
+ }elsif ($temp =~ /Elaphocordyceps\_capitata/) {
+ $species = "Elaphocordyceps_capitata";
+ }elsif ($temp =~ /Elaphocordyceps\_ophioglossoides/) {
+ $species = "Elaphocordyceps_ophioglossoides";
+ }elsif ($temp =~ /Elaphocordyceps\_paradoxa/) {
+ $species = "Elaphocordyceps_paradoxa";
}elsif ($temp =~ /Epichloe_festuca/) {
$species = "Epichloe\_festuca";
}elsif ($temp =~ /Escherichia\_coli/) {
}elsif ($temp =~ /Fusarium_graminearum/) {
$species = "Fusarium\_graminearum";
}elsif ($temp =~ /Fusarium_oxysporum/) {
- $species = "Fusarium\_oxysporum";
- }elsif ($temp =~ /Fusarium_verticilliodes/) {
- $species = "Fusarium\_verticilliodes";
+ $species = "Fusarium\_oxysporum.4287";
+ }elsif ($temp =~ /Fusarium_verticillioides/) {
+ $species = "Fusarium\_verticillioides";
}elsif ($temp =~ /Gadus_morhua/) {
$species = "Gadus\_morhua";
}elsif ($temp =~ /Gasterosteus/) {
$species = "Oryza_punctata";
}elsif ($temp =~ /Oryza\_rufipogon/) {
$species = "Oryza_rufipogon";
- }elsif ($temp =~ /Oryza\_sativa.indica\_gramene/) {
+ }elsif ($temp =~ /Oryza\_sativa.indica\.gramene/) {
$species = "Oryza_sativa.indica.gramene";
- }elsif ($temp =~ /Oryza\_sativa.indica\_iplant/) {
+ }elsif ($temp =~ /Oryza\_sativa.indica\.iplant/) {
$species = "Oryza_sativa.indica.iplant";
}elsif ($temp =~ /Oryza\_sativa.japonica.IRGSP/) {
$species = "Oryza_sativa.japonica.IRGSP";
$species = "Phyllostachys_heterocycla";
}elsif ($temp =~ /Physcomit/) {
$species = "Physcomitrella_patens";
+ }elsif ($temp =~ /Phytophthora/) {
+ $species = "Phytophthora_infestans";
+ }elsif ($temp =~ /Picea/) {
+ $species = "Picea_abies";
+ }elsif ($temp =~ /Pinus/) {
+ $species = "Pinus_taeda";
}elsif ($temp =~ /Populus/) {
$species = "Populus_trichocarpa";
}elsif ($temp =~ /Prunus/) {
$species = "Schizosaccharomyces_pombe";
}elsif ($temp =~ /Selaginella/) {
$species = "Selaginella_moellendorffii";
+ }elsif ($temp =~ /Setaria/) {
+ $species = "Setaria_italica";
}elsif ($temp =~ /Solanum_tuberosum/) {
$species = "Solanum_tuberosum";
}elsif ($temp =~ /Solanum_lycopersicum/) {
$species = "Tetraodon\_nigroviridis";
}elsif ($temp =~ /Theobroma\_cacao/) {
$species = "Theobroma_cacao";
- }elsif ($temp =~ /TmDV92/) {
- $species = "TmDV92";
- }elsif ($temp =~ /TmG3116/) {
- $species = "TmG3116";
}elsif ($temp =~ /Tolypocladium\_inflatum/) {
$species = "Tolypocladium_inflatum";
- }elsif ($temp =~ /Trichoderma\_atroviride/) {
- $species = "Trichoderma_atroviride";
- }elsif ($temp =~ /Trichoderma\_reesii/) {
- $species = "Trichoderma_reesii";
- }elsif ($temp =~ /Trichoderma\_virens/) {
- $species = "Trichoderma_virens";
}elsif ($temp =~ /Trichodesmium/) {
$species = "Trichodesmium_erythraeum";
- }elsif ($temp =~ /triticum\_aestivum/) {
+ }elsif ($temp =~ /Triticum\_aestivum/) {
$species = "Triticum_aestivum";
- }elsif ($temp =~ /Triticum\_monococcumDV92/) {
- $species = "Triticum_monococcumDV92";
- }elsif ($temp =~ /Triticum\_monococcumG3116/) {
- $species = "Triticum_monococcumG3116";
+ }elsif ($temp =~ /Triticum\_monococcum\.DV92/) {
+ $species = "Triticum_monococcum.DV92";
+ }elsif ($temp =~ /Triticum\_monococcum\.G3116/) {
+ $species = "Triticum_monococcum.G3116";
}elsif ($temp =~ /Triticum\_urartu/) {
$species = "Triticum_urartu";
}elsif ($temp =~ /Vitis\_vinifera/) {
my ($gene_id,$isomer) = split(/\|/, $gene_header);
$gene = $isomer;
}elsif ($species eq "Batrachochytrium_dendrobatidis") {
- my ($gene_id,$transcript,$info) = split(/\s\|\s/, $gene_header);
- $gene = $gene_id;
+ my ($transcript,$gene_id,$info) = split(/\s\|\s/, $gene_header);
+ $gene = $transcript;
}elsif ($species eq "Brachypodium_distachyon") {
my ($gene_id,$isomer) = split(/\|/, $gene_header);
$gene = $isomer;
- }elsif ($species eq "Brachypodium_sylvaticum_Corvallis") {
+ }elsif ($species eq "Brachypodium_sylvaticum.Corvallis") {
my ($gene_id,$temp,$start,$end) = split(/\|/, $gene_header);
$gene = $gene_id;
- }elsif ($species eq "Brachypodium_sylvaticum_Greece") {
+ }elsif ($species eq "Brachypodium_sylvaticum.Greece") {
$gene = $gene_header;
- }elsif ($species eq "Brachypodium_sylvaticum_Spain") {
+ }elsif ($species eq "Brachypodium_sylvaticum.Spain") {
my ($gene_id,$temp,$start,$end) = split(/\|/, $gene_header);
$gene = $gene_id;
}elsif ($species eq "Brassica_rapa") {
}elsif ($species eq "Ectocarpus_siliculosus") {
my ($gene_id,$temp) = split(/\|/, $gene_header);
$gene = $gene_id;
- }elsif ($species eq "Epichloe_festuca") {
+ }elsif ($species eq "Elaphocordyceps_capitata") {
+ $gene = $gene_header;
+ }elsif ($species eq "Elaphocordyceps_ophioglossoides") {
+ $gene = $gene_header;
+ }elsif ($species eq "Elaphocordyceps_paradoxa") {
$gene = $gene_header;
+ }elsif ($species eq "Epichloe_festuca") {
+ my ($gene_id, $temp) = split(/\s/, $gene_header);
+ $gene = $gene_id;
}elsif ($species eq "Escherichia_coli") {
$gene = $gene_header; #???
}elsif ($species eq "Eucalyptus_grandis") {
$gene_id =~ s/\-v1\.0\-hybrid//;
$gene = $gene_id;
}elsif ($species eq "Fusarium_graminearum") {
- my ($gene_id, $info) = split(/\s\|\s/, $gene_header);
- $gene = $gene_id;
- }elsif ($species eq "Fusarium_oxysporum") {
- $gene = $gene_header;
- }elsif ($species eq "Fusarium_verticilliodes") {
- my ($gene_id, $info) = split(/\s\|\s/, $gene_header);
- $gene = $gene_id;
+ my ($transcript, $gene_id, $info) = split(/\s\|\s/, $gene_header);
+ $gene = $transcript;
+ }elsif ($species eq "Fusarium_oxysporum.4287") {
+ my ($transcript, $gene_id, $info) = split(/\s\|\s/, $gene_header);
+ $gene = $transcript;
+ }elsif ($species eq "Fusarium_verticillioides") {
+ my ($transcript, $gene_id, $info) = split(/\s\|\s/, $gene_header);
+ $gene = $transcript;
}elsif ($species eq "Gadus_morhua") {
my ($peptide,$temp,$chrom,$gene_id,$transcript,$gene_type,$transcript_type) = split(/\s/,$gene_header);
$transcript =~ s/transcript\://;
$gene = $transcript;
}elsif ($species eq "Mimulus_guttatus") {
my ($gene_id,$transcript) = split(/\|/, $gene_header);
- $gene = $gene_id;
+ $gene = $transcript;
}elsif ($species eq "Mus_musculus") {
my ($protein,$type,$chrom,$gene_id,$transcript) = split(/\s/,$gene_header);
$gene = $protein;
my ($protein,$type,$chrom,$gene_id,$transcript) = split(/\s/,$gene_header);
$gene = $protein;
}elsif ($species eq "Nectria_haematococca") {
- my ($source, $spec, $int_id, $gene_id) = split(/\|/, $gene_header);
- $gene = $gene_id;
+ my ($peptide,$temp,$chrom,$gene_id,$transcript,$gene_type,$transcript_type) = split(/\s/,$gene_header);
+ $transcript =~ s/transcript\://;
+ $gene = $transcript;
}elsif ($species eq "Neurospora_crassa") {
my ($transcript,$gene_id,$temp) = split(/\s\|\s/,$gene_header);
$gene = $transcript;
$gene = $transcript;
}elsif ($species eq "Oncorhynchus_mykiss") {
my ($gi, $number, $type, $gene_id, $function) = split(/\|/,$gene_header);
- $gene = $gene_id;
+ $gene = $number;
}elsif ($species eq "Leersia_perrieri") {
my ($gene_id,$chrom,$program,$function,$type) = split(/[\|\s]+/, $gene_header);
$gene = $gene_id;
$gene = $gene_id;
}elsif ($species eq "Oryza_sativa.indica.gramene") {
my ($peptide,$temp,$chrom,$gene_id,$transcript,$gene_type,$transcript_type) = split(/\s/,$gene_header);
- $transcript =~ s/transcript\://;
- $gene = $transcript;
+ $transcript =~ s/transcript\://;
+ $gene = $transcript;
}elsif ($species eq "Oryza_sativa.japonica.iplant") {
my ($gene_id,$chrom,$program,$function,$type) = split(/[\|\s]+/, $gene_header);
$gene = $gene_id;
}elsif ($species eq "Oryza_sativa.japonica.IRGSP") {
my ($peptide,$temp,$chrom,$gene_id,$transcript,$gene_type,$transcript_type) = split(/\s/,$gene_header);
- $transcript =~ s/transcript\://;
- $gene = $transcript;
+ $transcript =~ s/transcript\://;
+ $gene = $transcript;
}elsif ($species eq "Oryza_sativa.japonica.MSU") {
my ($gene_id,$transcript) = split(/\|/,$gene_header);
$gene = $transcript;
}elsif ($species eq "Oryzias_latipes") {
- my ($gene_id, $transcript_id) = split(/\|/,$gene_header);
- $gene = $transcript_id;
+ my ($peptide,$temp,$chrom,$gene_id,$transcript,$gene_type,$transcript_type) = split(/\s/,$gene_header);
+ $transcript =~ s/transcript\://;
+ $gene = $transcript;
}elsif ($species eq "Pediculus_humanus") {
- my ($source,$gene_pa,$func,$unknown,$gene_id) = split(/\|/, $gene_header);
- $gene_id =~ s/gene\://;
+ my ($gene_pa,$func,$func2,$unknown,$gene_id) = split(/[\|\s+]/, $gene_header);
$gene = $gene_pa;
}elsif ($species eq "Phoenix_dactylifera") {
- my ($gene_id, $temp) = split(/\s/, $gene_header);
+ my ($gene_id, $locus_tag, $product, $strand) = split(/\s\[/, $gene_header);
$gene = $gene_id;
}elsif ($species eq "Phyllostachys_heterocycla") {
my ($gene_id, $gene_model, $location, $dot, $strand, $dot2, $info) = split (/\s+/, $gene_header);
$gene = $gene_id;
}elsif ($species eq "Physcomitrella_patens") {
- my ($name,$pac_id) = split(/\|/,$gene_header);
- $gene = $name; #???
+ my ($gene_id,$transcript_id) = split(/\|/,$gene_header);
+ $gene = $transcript_id;
+ }elsif ($species eq "Phytophthora_infestans") {
+ my ($transcript, $gene_id, $function) = split(/\s\|\s/,$gene_header);
+ $gene = $transcript;
+ }elsif ($species eq "Picea_abies") {
+ my ($gene_id, $confidence) = split(/\s/,$gene_header);
+ $gene = $gene_id;
+ }elsif ($species eq "Pinus_taeda") {
+ my ($gene_id,$temp) = split(/\:/,$gene_header);
+ $gene_id =~ /\D+(\d+)/;
+ print "$1\n";
+ $gene = $1;
}elsif ($species eq "Populus_trichocarpa") {
my ($gene_id,$transcript_id) = split(/\|/,$gene_header);
$gene = $transcript_id;
}elsif ($species eq "Prunus_persica") {
- my ($gene_id,$temp) = split(/\s/,$gene_header);
+ my ($transcript,$gene_id) = split(/\|/,$gene_header);
$gene = $gene_id;
}elsif ($species eq "Rattus_norvegicus") {
- my ($gene_id,$transcript,$chrom) = split(/\|/, $gene_header);
- $gene = $gene_id;
+ my ($peptide,$temp,$chrom,$gene_id,$transcript,$gene_type,$transcript_type) = split(/\s/,$gene_header);
+ $transcript =~ s/transcript\://;
+ $gene = $transcript;
}elsif ($species eq "Rhizopus_oryzae") {
- my ($gene_id,$func) = split(/\s\|\s/, $gene_header);
- $gene = $gene_id;
+ my ($jgi,$temp,$num,$gene_id) = split(/\|/, $gene_header);
+ $gene = $num;
}elsif ($species eq "Ricinus_communis") {
- my ($name,$transcript,$gene_id,$temp) = split(/\|/, $gene_header);
+ my ($transcript,$model) = split(/\|/, $gene_header);
$gene = $transcript;
}elsif ($species eq "Saccharomyces_cerevisiae") {
- my ($gene_id,$gene_name,$transcript,$temp) = split(/\s/,$gene_header);
- $gene = $gene_id;
+ my ($peptide,$temp,$chrom,$gene_id,$transcript,$gene_type,$transcript_type) = split(/\s/,$gene_header);
+ $transcript =~ s/transcript\://;
+ $gene = $transcript
}elsif ($species eq "Schizosaccharomyces_pombe") {
- my ($gene_id,$gene_name,$unknown,$func,$name,$chrom,$temp) = split(/\s/,$gene_header);
- $gene = $gene_id;
+ my ($peptide,$temp,$chrom,$gene_id,$transcript,$gene_type,$transcript_type) = split(/\s/,$gene_header);
+ $transcript =~ s/transcript\://;
+ $gene = $transcript
}elsif ($species eq "Selaginella_moellendorffii") {
- my ($name,$locus_id,$chrom_id,$prot_id) = split(/\|/,$gene_header);
- $gene = $prot_id; #???
- }elsif ($species eq "Solanum_tuberosum") {
- my ($protein,$transcript,$type) = split(/\s/,$gene_header);
- $gene = $protein;
+ my ($locus_id,$prot_id) = split(/\|/,$gene_header);
+ $gene = $locus_id;
+ }elsif ($species eq "Setaria_italica") {
+ my ($locus_id,$prot_id) = split(/\|/,$gene_header);
+ $gene = $prot_id;
}elsif ($species eq "Solanum_lycopersicum") {
- my ($protein,$type,$location,$gene_1,$transcript) = split(/\s/,$gene_header);
- $gene = $protein;
+ my ($locus_id,$prot_id) = split(/\|/,$gene_header);
+ $gene = $locus_id;
+ }elsif ($species eq "Solanum_tuberosum") {
+ my ($locus_id,$prot_id) = split(/\|/,$gene_header);
+ $gene = $prot_id;
}elsif ($species eq "Sorghum_bicolor") {
my ($gene_id,$transcript_id) = split(/\|/,$gene_header);
$gene = $transcript_id;
my ($gene_id,$temp) = split(" ",$gene_header);
$gene = $gene_id; #???
}elsif ($species eq "Takifugu_rubripes") {
- my ($gene_id, $transcript_id) = split(/\|/,$gene_header);
- $gene = $transcript_id;
+ my ($peptide,$temp,$chrom,$gene_id,$transcript,$gene_type,$transcript_type) = split(/\s/,$gene_header);
+ $transcript =~ s/transcript\://;
+ $gene = $transcript;
}elsif ($species eq "Tetraodon_nigroviridis") {
- my ($gene_id, $transcript_id) = split(/\|/,$gene_header);
- $gene = $transcript_id;
+ my ($peptide,$temp,$chrom,$gene_id,$transcript,$gene_type,$transcript_type) = split(/\s/,$gene_header);
+ $transcript =~ s/transcript\://;
+ $gene = $transcript;
}elsif ($species eq "Theobroma_cacao") {
- my ($gene_id,$temp) = split(/\s/,$gene_header);
+ my ($gene_id,$transcript) = split(/\s/,$gene_header);
$gene = $gene_id;
- }elsif ($species eq "TmDV92") {
- my ($gene_id,$temp) = split(/\s+/,$gene_header);
- $gene = $gene_id;
- }elsif ($species eq "TmG3116") {
- my ($gene_id,$temp) = split(/\s+/,$gene_header);
- $gene = $gene_id;
}elsif ($species eq "Tolypocladium_inflatum") {
- my ($gene_id, $type, $info) = split(/\s/, $gene_header);
- $gene = $gene_id;
- }elsif ($species eq "Trichoderma_atroviride") {
- my ($source, $spec, $int_id, $gene_id) = split(/\|/,$gene_header);
- $gene = $gene_id;
- }elsif ($species eq "Trichoderma_reesii") {
- my ($source, $spec, $int_id, $gene_id) = split(/\|/,$gene_header);
- $gene = $gene_id;
- }elsif ($species eq "Trichoderma_virens") {
- my ($temp,$unknown,$source,$gene_id,$func) = split(/\|/, $gene_header);
- $gene = $gene_id;
+ $gene = $gene_header;
}elsif ($species eq "Trichodesmium_erythraeum") {
- my ($temp,$unknown,$source,$gene_id,$func) = split(/\|/, $gene_header);
- $gene = $gene_id;
+ my ($peptide,$temp,$chrom,$gene_id,$transcript,$gene_type,$transcript_type) = split(/\s/,$gene_header);
+ $transcript =~ s/transcript\://;
+ $gene = $transcript;
}elsif ($species eq "Triticum_aestivum") {
- my ($type,$transcript,$source,$gene_id,$func) = split(/\|/, $gene_header);
- $gene = $transcript;
- }elsif ($species eq "Triticum_monococcumDV92") {
+ my ($peptide,$temp,$chrom,$gene_id,$transcript,$gene_type,$transcript_type) = split(/\s/,$gene_header);
+ $transcript =~ s/transcript\://;
+ $gene = $transcript;
+ }elsif ($species eq "Triticum_monococcum.DV92") {
my ($gene_id,$temp) = split(/\s+/,$gene_header);
$gene = $gene_id;
- }elsif ($species eq "Triticum_monococcumG3116") {
+ }elsif ($species eq "Triticum_monococcum.G3116") {
my ($gene_id,$temp) = split(/\s+/,$gene_header);
$gene = $gene_id;
}elsif ($species eq "Triticum_urartu") {
my ($peptide,$temp,$chrom,$gene_id,$transcript,$gene_type,$transcript_type) = split(/\s/,$gene_header);
- $transcript =~ s/transcript\://;
- $gene = $transcript;
+ $transcript =~ s/transcript\://;
+ $gene = $transcript;
}elsif ($species eq "Vitis_vinifera") {
- #my ($name,$gene_id,$chrom_id,$id) = split(/\|/,$gene_header);
- $gene = $gene_header; #???
+ my ($gene_id,$transcript_id) = split(/\|/,$gene_header);
+ $gene = $gene_id;
}elsif ($species eq "Zea_mays") {
my ($gene_id,$transcript) = split(/\|/,$gene_header);
- $gene = $transcript;
+ $gene = $gene_id;
}else {
die "Error: Gene id can not be found for species $species!";
my $species = $_[1];
my $synonym;
if ($species eq "Batrachochytrium_dendrobatidis") {
- my ($gene_id,$transcript,$info) = split(/\s\|\s/, $gene_header);
- $synonym = $transcript;
+ my ($transcript,$gene_id,$info) = split(/\s\|\s/, $gene_header);
+ $synonym = $gene_id;
}elsif ($species eq "Caenorhabditis_elegans") {
my ($locus_id,$unknown,$gene_id,$temp) = split(/\|/, $gene_header);
$synonym = $gene_id; #???
my ($peptide,$temp,$chrom,$gene_id,$transcript,$gene_type,$transcript_type) = split(/\s/,$gene_header);
$transcript =~ s/transcript\://;
$gene_id =~ s/gene\://;
+ $peptide =~ s/\:pep//;
$synonym = "$peptide,$gene_id";
}elsif ($species eq "Drosophila_melanogaster") {
my ($peptide,$temp,$chrom,$gene_id,$transcript,$gene_type,$transcript_type) = split(/\s/,$gene_header);
$transcript =~ s/transcript\://;
$gene_id =~ s/gene\://;
+ $peptide =~ s/\:pep//;
$synonym = "$peptide,$gene_id";
}elsif ($species eq "Eucalyptus_grandis") {
my ($gene_id,$transcript_id) = split(/\|/,$gene_header);
$gene_id =~ s/\-v1\.0\-hybrid//;
$mrna_id =~ s/\-v1\.0\-hybrid//;
$synonym = $mrna_id;
+ }elsif ($species eq "Fusarium_graminearum") {
+ my ($transcript, $gene_id, $info) = split(/\s\|\s/, $gene_header);
+ $synonym = $gene_id;
+ }elsif ($species eq "Fusarium_oxysporum.4287") {
+ my ($transcript, $gene_id, $info) = split(/\s\|\s/, $gene_header);
+ $synonym = $gene_id;
+ }elsif ($species eq "Fusarium_verticillioides") {
+ my ($transcript, $gene_id, $info) = split(/\s\|\s/, $gene_header);
+ $synonym = $gene_id;
}elsif ($species eq "Gadus_morhua") {
my ($peptide,$temp,$chrom,$gene_id,$transcript,$gene_type,$transcript_type) = split(/\s/,$gene_header);
$transcript =~ s/transcript\://;
$gene_id =~ s/gene\://;
+ $peptide =~ s/\:pep//;
$synonym = "$peptide,$gene_id";
}elsif ($species eq "Gasterosteus_aculeatus") {
my ($peptide,$temp,$chrom,$gene_id,$transcript,$gene_type,$transcript_type) = split(/\s/,$gene_header);
$transcript =~ s/transcript\://;
$gene_id =~ s/gene\://;
+ $peptide =~ s/\:pep//;
$synonym = "$peptide,$gene_id";
}elsif ($species eq "Glycine_max") {
my ($locus_id,$isomer) = split(/\|/,$gene_header);
my ($peptide,$temp,$chrom,$gene_id,$transcript,$gene_type,$transcript_type) = split(/\s/,$gene_header);
$transcript =~ s/transcript\://;
$gene_id =~ s/gene\://;
+ $peptide =~ s/\:pep//;
$synonym = "$peptide,$gene_id";
}elsif ($species eq "Hordeum_vulgare") {
my ($peptide,$temp,$chrom,$gene_id,$transcript,$gene_type,$transcript_type) = split(/\s/,$gene_header);
$transcript =~ s/transcript\://;
$gene_id =~ s/gene\://;
+ $peptide =~ s/\:pep//;
$synonym = "$peptide,$gene_id";
}elsif ($species eq "Linum_usitatissimum") {
my ($transcript,$gene_id) = split(/\|/, $gene_header);
my ($protein,$type,$chrom,$gene_id,$transcript) = split(/\s/,$gene_header);
$transcript =~ s/transcript\://;
$gene_id =~ s/gene\://;
+ $peptide =~ s/\:pep//;
$synonym = "$peptide,$gene_id";
}elsif ($species eq "Musa_acuminata") {
my ($protein,$type,$chrom,$gene_id,$transcript) = split(/\s/,$gene_header);
$transcript =~ s/transcript\://;
$gene_id =~ s/gene\://;
+ $peptide =~ s/\:pep//;
+ $synonym = "$peptide,$gene_id";
+ }elsif ($species eq "Nectria_haematococca") {
+ my ($peptide,$temp,$chrom,$gene_id,$transcript,$gene_type,$transcript_type) = split(/\s/,$gene_header);
+ $transcript =~ s/transcript\://;
+ $gene_id =~ s/gene\://;
+ $peptide =~ s/\:pep//;
$synonym = "$peptide,$gene_id";
}elsif ($species eq "Neurospora_crassa") {
my ($transcript,$gene_id,$temp) = split(/\s\|\s/,$gene_header);
$synonym = $gene_id;
}elsif ($species eq "Oncorhynchus_mykiss") {
my ($gi, $number, $type, $gene_id, $function) = split(/\|/,$gene_header);
- $synonym = $number;
- }elsif ($species eq "Oryza_sativa.indica.gramene") {
- my ($peptide,$temp,$chrom,$gene_id,$transcript,$gene_type,$transcript_type) = split(/\s/,$gene_header);
- $transcript =~ s/transcript\://;
- $gene_id =~ s/gene\://;
- $synonym = "$peptide,$gene_id";
- }elsif ($species eq "Oryza_sativa.japonica.IRGSP") {
- my ($peptide,$temp,$chrom,$gene_id,$transcript,$gene_type,$transcript_type) = split(/\s/,$gene_header);
- $transcript =~ s/transcript\://;
- $gene_id =~ s/gene\://;
- $synonym = "$peptide,$gene_id";
+ $synonym = $gene_id;
+ }elsif ($species eq "Oryza_sativa.indica.gramene") {
+ my ($peptide,$temp,$chrom,$gene_id,$transcript,$gene_type,$transcript_type) = split(/\s/,$gene_header);
+ $transcript =~ s/transcript\://;
+ $gene_id =~ s/gene\://;
+ $peptide =~ s/\:pep//;
+ $synonym = "$peptide,$gene_id";
+ }elsif ($species eq "Oryza_sativa.japonica.IRGSP") {
+ my ($peptide,$temp,$chrom,$gene_id,$transcript,$gene_type,$transcript_type) = split(/\s/,$gene_header);
+ $transcript =~ s/transcript\://;
+ $gene_id =~ s/gene\://;
+ $peptide =~ s/\:pep//;
+ $synonym = "$peptide,$gene_id";
}elsif ($species eq "Oryzias_latipes") {
- my ($gene_id, $transcript_id) = split(/\|/,$gene_header);
- $gene = $transcript_id;
+ my ($peptide,$temp,$chrom,$gene_id,$transcript,$gene_type,$transcript_type) = split(/\s/,$gene_header);
+ $transcript =~ s/transcript\://;
+ $gene_id =~ s/gene\://;
+ $peptide =~ s/\:pep//;
+ $synonym = "$peptide,$gene_id";
}elsif ($species eq "Pediculus_humanus") {
- my ($source,$gene_pa,$func,$unknown,$gene_id) = split(/\|/, $gene_header);
+ my ($gene_pa,$func,$func2,$unknown,$gene_id) = split(/[\|\s+]/, $gene_header);
$gene_id =~ s/gene\://;
- $gene = $gene_pa;
+ $synonym = $gene_id;
}elsif ($species eq "Phoenix_dactylifera") {
- my ($gene_id, $temp) = split(/\s/, $gene_header);
- $gene = $gene_id;
+ my ($gene_id, $locus_tag, $product, $strand) = split(/\s\[/, $gene_header);
+ $locus_tag =~ s/locus_tag=//;
+ $locus_tag =~ s/"//g;
+ $synonym = $locus_tag;
}elsif ($species eq "Phyllostachys_heterocycla") {
my ($gene_id, $gene_model, $location, $dot, $strand, $dot2, $info) = split (/\s+/, $gene_header);
$gene = $gene_id;
}elsif ($species eq "Physcomitrella_patens") {
- my ($name,$pac_id) = split(/\|/,$gene_header);
- $gene = $name; #???
+ my ($gene_id,$transcript_id) = split(/\|/,$gene_header);
+ $synonym = $gene_id;
+ }elsif ($species eq "Phytophthora_infestans") {
+ my ($transcript, $gene_id, $function) = split(/\s\|\s/,$gene_header);
+ $synonym = $gene_id;
}elsif ($species eq "Populus_trichocarpa") {
my ($gene_id,$transcript_id) = split(/\|/,$gene_header);
- $gene = $transcript_id;
+ $synonym = $gene_id;
}elsif ($species eq "Prunus_persica") {
- my ($gene_id,$temp) = split(/\s/,$gene_header);
- $gene = $gene_id;
+ my ($transcript,$gene_id) = split(/\|/,$gene_header);
+ $synonym = $transcript;
}elsif ($species eq "Rattus_norvegicus") {
- my ($gene_id,$transcript,$chrom) = split(/\|/, $gene_header);
- $gene = $gene_id;
- }elsif ($species eq "Rhizopus_oryzae") {
- my ($gene_id,$func) = split(/\s\|\s/, $gene_header);
- $gene = $gene_id;
+ my ($peptide,$temp,$chrom,$gene_id,$transcript,$gene_type,$transcript_type) = split(/\s/,$gene_header);
+ $transcript =~ s/transcript\://;
+ $gene_id =~ s/gene\://;
+ $peptide =~ s/\:pep//;
+ $synonym = "$peptide,$gene_id";
}elsif ($species eq "Ricinus_communis") {
- my ($name,$transcript,$gene_id,$temp) = split(/\|/, $gene_header);
- $gene = $transcript;
+ my ($transcript,$model) = split(/\|/, $gene_header);
+ $synonym = $model;
}elsif ($species eq "Saccharomyces_cerevisiae") {
- my ($gene_id,$gene_name,$transcript,$temp) = split(/\s/,$gene_header);
- $gene = $gene_id;
+ my ($peptide,$temp,$chrom,$gene_id,$transcript,$gene_type,$transcript_type) = split(/\s/,$gene_header);
+ $transcript =~ s/transcript\://;
+ $gene_id =~ s/gene\://;
+ $peptide =~ s/\:pep//;
+ $synonym = "$peptide,$gene_id";
}elsif ($species eq "Schizosaccharomyces_pombe") {
- my ($gene_id,$gene_name,$unknown,$func,$name,$chrom,$temp) = split(/\s/,$gene_header);
- $gene = $gene_id;
+ my ($peptide,$temp,$chrom,$gene_id,$transcript,$gene_type,$transcript_type) = split(/\s/,$gene_header);
+ $transcript =~ s/transcript\://;
+ $gene_id =~ s/gene\://;
+ $peptide =~ s/\:pep//;
+ $synonym = "$peptide,$gene_id";
}elsif ($species eq "Selaginella_moellendorffii") {
- my ($name,$locus_id,$chrom_id,$prot_id) = split(/\|/,$gene_header);
- $gene = $prot_id; #???
- }elsif ($species eq "Solanum_tuberosum") {
- my ($protein,$transcript,$type) = split(/\s/,$gene_header);
- $gene = $protein;
+ my ($locus_id,$prot_id) = split(/\|/,$gene_header);
+ $synonym = $prot_id;
+ }elsif ($species eq "Setaria_italica") {
+ my ($locus_id,$prot_id) = split(/\|/,$gene_header);
+ $synonym = $locus_id;
}elsif ($species eq "Solanum_lycopersicum") {
- my ($protein,$type,$location,$gene_1,$transcript) = split(/\s/,$gene_header);
- $gene = $protein;
+ my ($locus_id,$prot_id) = split(/\|/,$gene_header);
+ $synonym = $locus_id;
+ }elsif ($species eq "Solanum_tuberosum") {
+ my ($locus_id,$prot_id) = split(/\|/,$gene_header);
+ $synonym = $locus_id;
}elsif ($species eq "Sorghum_bicolor") {
my ($gene_id,$transcript_id) = split(/\|/,$gene_header);
$synonym = $gene_id;
- }elsif ($species eq "Synechocystis_pcc6803") {
- my ($gene_id,$temp) = split(" ",$gene_header);
- $gene = $gene_id; #???
}elsif ($species eq "Takifugu_rubripes") {
- my ($gene_id, $transcript_id) = split(/\|/,$gene_header);
- $gene = $transcript_id;
+ my ($peptide,$temp,$chrom,$gene_id,$transcript,$gene_type,$transcript_type) = split(/\s/,$gene_header);
+ $transcript =~ s/transcript\://;
+ $gene_id =~ s/gene\://;
+ $peptide =~ s/\:pep//;
+ $synonym = "$peptide,$gene_id";
}elsif ($species eq "Tetraodon_nigroviridis") {
- my ($gene_id, $transcript_id) = split(/\|/,$gene_header);
- $gene = $transcript_id;
+ my ($peptide,$temp,$chrom,$gene_id,$transcript,$gene_type,$transcript_type) = split(/\s/,$gene_header);
+ $transcript =~ s/transcript\://;
+ $gene_id =~ s/gene\://;
+ $peptide =~ s/\:pep//;
+ $synonym = "$peptide,$gene_id";
}elsif ($species eq "Theobroma_cacao") {
- my ($gene_id,$temp) = split(/\s/,$gene_header);
- $gene = $gene_id;
- }elsif ($species eq "TmDV92") {
- my ($gene_id,$temp) = split(/\s+/,$gene_header);
- $gene = $gene_id;
- }elsif ($species eq "TmG3116") {
- my ($gene_id,$temp) = split(/\s+/,$gene_header);
- $gene = $gene_id;
- }elsif ($species eq "Tolypocladium_inflatum") {
- my ($gene_id, $type, $info) = split(/\s/, $gene_header);
- $gene = $gene_id;
- }elsif ($species eq "Trichoderma_atroviride") {
- my ($source, $spec, $int_id, $gene_id) = split(/\|/,$gene_header);
- $gene = $gene_id;
- }elsif ($species eq "Trichoderma_reesii") {
- my ($source, $spec, $int_id, $gene_id) = split(/\|/,$gene_header);
- $gene = $gene_id;
- }elsif ($species eq "Trichoderma_virens") {
- my ($temp,$unknown,$source,$gene_id,$func) = split(/\|/, $gene_header);
- $gene = $gene_id;
+ my ($gene_id,$transcript) = split(/\s/,$gene_header);
+ $synonym = $transcript;
}elsif ($species eq "Trichodesmium_erythraeum") {
my ($temp,$unknown,$source,$gene_id,$func) = split(/\|/, $gene_header);
$gene = $gene_id;
}elsif ($species eq "Triticum_aestivum") {
- my ($type,$transcript,$source,$gene_id,$func) = split(/\|/, $gene_header);
- $gene = $transcript;
- }elsif ($species eq "Triticum_monococcumDV92") {
- my ($gene_id,$temp) = split(/\s+/,$gene_header);
- $gene = $gene_id;
- }elsif ($species eq "Triticum_monococcumG3116") {
- my ($gene_id,$temp) = split(/\s+/,$gene_header);
- $gene = $gene_id;
+ my ($peptide,$temp,$chrom,$gene_id,$transcript,$gene_type,$transcript_type) = split(/\s/,$gene_header);
+ $transcript =~ s/transcript\://;
+ $gene_id =~ s/gene\://;
+ $peptide =~ s/\:pep//;
+ $synonym = "$peptide,$gene_id";
}elsif ($species eq "Triticum_urartu") {
- my ($peptide,$temp,$chrom,$gene_id,$transcript,$gene_type,$transcript_type) = split(/\s/,$gene_header);
- $transcript =~ s/transcript\://;
- $gene_id =~ s/gene\://;
- $synonym = "$peptide,$gene_id";
+ my ($peptide,$temp,$chrom,$gene_id,$transcript,$gene_type,$transcript_type) = split(/\s/,$gene_header);
+ $transcript =~ s/transcript\://;
+ $gene_id =~ s/gene\://;
+ $synonym = "$peptide,$gene_id";
}elsif ($species eq "Vitis_vinifera") {
- #my ($name,$gene_id,$chrom_id,$id) = split(/\|/,$gene_header);
- $gene = $gene_header; #???
+ my ($gene_id,$transcript_id) = split(/\|/,$gene_header);
+ $synonym = $transcript_id;
}elsif ($species eq "Zea_mays") {
my ($gene_id,$transcript) = split(/\|/,$gene_header);
- $synonym = $gene_id;
+ $synonym = $transcript;
}else {
- die "Error: Gene id can not be found for species $species!";
+ $synonym = "null";
}
- return $gene;
+ return $synonym;
}
push (@spec_array, "Amborella_trichopoda");
push (@spec_array, "Arabidopsis_lyrata");
push (@spec_array, "Arabidopsis_thaliana");
- #push (@spec_array, "Batrachochytrium_dendrobatidis");
+ push (@spec_array, "Batrachochytrium_dendrobatidis");
push (@spec_array, "Brachypodium_distachyon");
push (@spec_array, "Brachypodium_sylvaticum_Corvallis");
push (@spec_array, "Brachypodium_sylvaticum_Greece");
push (@spec_array, "Danio_rerio");
push (@spec_array, "Drosophila_melanogaster");
push (@spec_array, "Ectocarpus_siliculosus");
+ push (@spec_array, "Elaphocordyceps_capitata");
+ push (@spec_array, "Elaphocordyceps_ophioglossoides");
+ push (@spec_array, "Elaphocordyceps_paradoxa");
push (@spec_array, "Epichloe_festuca");
push (@spec_array, "Escherichia_coli");
push (@spec_array, "Eucalyptus_grandis");
push (@spec_array, "Fragaria_vesca");
push (@spec_array, "Fusarium_graminearum");
- push (@spec_array, "Fusarium_oxysporum");
- push (@spec_array, "Fusarium_verticilliodes");
+ push (@spec_array, "Fusarium_oxysporum.4287");
+ push (@spec_array, "Fusarium_verticillioides");
push (@spec_array, "Gadus_morhua");
push (@spec_array, "Gasterosteus_aculeatus");
push (@spec_array, "Glycine_max");
push (@spec_array, "Neurospora_crassa");
push (@spec_array, "Nostoc_punctiforme");
push (@spec_array, "Oncorhynchus_mykiss");
- push (@spec_array, "Oryza_sativa");
+ push (@spec_array, "Oryza_barthii");
+ push (@spec_array, "Oryza_brachyantha");
+ push (@spec_array, "Oryza_glaberrima");
+ push (@spec_array, "Oryza_glumaepatula");
+ push (@spec_array, "Oryza_longistaminata");
+ push (@spec_array, "Oryza_meridionalis");
+ push (@spec_array, "Oryza_nivara");
+ push (@spec_array, "Oryza_punctata");
+ push (@spec_array, "Oryza_rufipogon");
+ push (@spec_array, "Oryza_sativa.indica.gramene");
+ push (@spec_array, "Oryza_sativa.indica.iplant");
+ push (@spec_array, "Oryza_sativa.japonica.iplant");
+ push (@spec_array, "Oryza_sativa.japonica.IRGSP");
+ push (@spec_array, "Oryza_sativa.japonica.MSU");
push (@spec_array, "Oryzias_latipes");
push (@spec_array, "Pediculus_humanus");
- push (@spec_array, "Phoenix_dactylifera");
push (@spec_array, "Phyllostachys_heterocycla");
push (@spec_array, "Physcomitrella_patens");
+ push (@spec_array, "Phytophthora_infestans");
+ push (@spec_array, "Picea_abies");
push (@spec_array, "Populus_trichocarpa");
push (@spec_array, "Prunus_persica");
push (@spec_array, "Rattus_norvegicus");
push (@spec_array, "Saccharomyces_cerevisiae");
push (@spec_array, "Schizosaccharomyces_pombe");
push (@spec_array, "Selaginella_moellendorffii");
+ push (@spec_array, "Setaria_italica");
push (@spec_array, "Solanum_tuberosum");
push (@spec_array, "Solanum_lycopersicum");
push (@spec_array, "Sorghum_bicolor");
push (@spec_array, "Takifugu_rubripes");
push (@spec_array, "Tetraodon_nigroviridis");
push (@spec_array, "Theobroma_cacao");
- push (@spec_array, "TmDV92");
- push (@spec_array, "TmG3116");
push (@spec_array, "Tolypocladium_inflatum");
- push (@spec_array, "Trichoderma_atroviride");
- push (@spec_array, "Trichoderma_reesii");
- push (@spec_array, "Trichoderma_virens");
push (@spec_array, "Trichodesmium_erythraeum");
push (@spec_array, "Triticum_aestivum");
- push (@spec_array, "Triticum_monococcumDV92");
- push (@spec_array, "Triticum_monococcumG3116");
+ push (@spec_array, "Triticum_monococcum.DV92");
+ push (@spec_array, "Triticum_monococcum.G3116");
push (@spec_array, "Triticum_urartu");
push (@spec_array, "Vitis_vinifera");
push (@spec_array, "Zea_mays");
sub retrieval_info {
my $file = $_[0];
- $file =~ /([a-zA-Z]*\_+[a-zA-Z0-9]+)\_((?:[a-zA-Z0-9+-.]+\_?)+)\_(\d+\_\d+\_\d+).fa/;
+ $file =~ /([a-zA-Z]*\_+[a-zA-Z0-9.]+)\_((?:[a-zA-Z0-9+-.]+\_?)+)\_(\d+\_\d+\_\d+).fa/;
my $species = $1;
my $method = $2;
my $date = $3;