sub find_species {
my $temp = $_[0];
+ my $species;
if ($temp =~ /Aegilops\_tauschii/) {
- $temp = "Aegilops_tauschii";
+ $species = "Aegilops_tauschii";
+ }elsif ($temp =~ /Amborella\_trichopoda/) {
+ $species = "Amborella_trichopoda";
}elsif ($temp =~ /Arabidopsis\_lyrata/) {
- $temp = "Arabidopsis_lyrata";
+ $species = "Arabidopsis_lyrata";
}elsif ($temp =~ /Arabidopsis\_thaliana/) {
- $temp = "Arabidopsis_thaliana";
+ $species = "Arabidopsis_thaliana";
}elsif ($temp =~ /Batrachochytrium/) {
- $temp = "Batrachochytrium_distachyon";
- }elsif ($temp =~ /Brachypodium/) {
- $temp = "Brachypodium_distachyon";
+ $species = "Batrachochytrium_dendrobatidis";
+ }elsif ($temp =~ /Brachypodium\_distachyon/) {
+ $species = "Brachypodium_distachyon";
+ }elsif ($temp =~ /Brachypodium\_sylvaticum\_Corvallis/) {
+ $species = "Brachypodium_sylvaticum_Corvallis";
+ }elsif ($temp =~ /Brachypodium\_sylvaticum\_Greece/) {
+ $species = "Brachypodium_sylvaticum_Greece";
+ }elsif ($temp =~ /Brachypodium\_sylvaticum\_Spain/) {
+ $species = "Brachypodium_sylvaticum_Spain";
+ }elsif ($temp =~ /Brassica\_rapa/) {
+ $species = "Brassica_rapa";
}elsif ($temp =~ /elegans/) {
- $temp = "Caenorhabditis_elegans";
+ $species = "Caenorhabditis_elegans";
+ }elsif ($temp =~ /Cajanus\_cajan/) {
+ $species = "Cajanus_cajan";
}elsif ($temp =~ /Carica\_papaya/) {
- $temp = "Carica_papaya";
+ $species = "Carica_papaya";
}elsif ($temp =~ /Chlamy/) {
- $temp = "Chlamydomonas_reinhardtii";
+ $species = "Chlamydomonas_reinhardtii";
+ }elsif ($temp =~ /Cicer\_arietinum/) {
+ $species = "Cicer_arietinum";
}elsif ($temp =~ /Citrus\_clementina/) {
- $temp = "Citrus_clementina";
+ $species = "Citrus_clementina";
}elsif ($temp =~ /Citrus\_sinensis/) {
- $temp = "Citrus_sinensis";
+ $species = "Citrus_sinensis";
}elsif ($temp =~ /Cucumis\_sativus/) {
- $temp = "Cucumis_sativus";
+ $species = "Cucumis_sativus";
}elsif ($temp =~ /Danio/) {
- $temp = "Danio_rerio";
+ $species = "Danio_rerio";
}elsif ($temp =~ /Drosophila/) {
- $temp = "Drosophila_melanogaster";
+ $species = "Drosophila_melanogaster";
}elsif ($temp =~ /Ectocarpus/) {
- $temp = "Ectocarpus_siliculosus";
+ $species = "Ectocarpus_siliculosus";
}elsif ($temp =~ /Epichloe_festuca/) {
- $temp = "Epichloe\_festuca";
+ $species = "Epichloe\_festuca";
}elsif ($temp =~ /Escherichia\_coli/) {
- $temp = "Escherichia_coli";
+ $species = "Escherichia_coli";
}elsif ($temp =~ /Eucalyptus\_grandis/) {
- $temp = "Eucalyptus_grandis";
+ $species = "Eucalyptus_grandis";
}elsif ($temp =~ /Fragaria/) {
- $temp = "Fragaria_vesca";
+ $species = "Fragaria_vesca";
}elsif ($temp =~ /Fusarium_graminearum/) {
- $temp = "Fusarium\_graminearum";
+ $species = "Fusarium\_graminearum";
}elsif ($temp =~ /Fusarium_oxysporum/) {
- $temp = "Fusarium\_oxysporum";
+ $species = "Fusarium\_oxysporum";
}elsif ($temp =~ /Fusarium_verticilliodes/) {
- $temp = "Fusarium\_verticilliodes";
+ $species = "Fusarium\_verticilliodes";
}elsif ($temp =~ /Gadus_morhua/) {
- $temp = "Gadus\_morhua";
- }elsif ($temp =~ /Gastroerosteus/) {
- $temp = "Gastroerosteus\_aculeatus";
+ $species = "Gadus\_morhua";
+ }elsif ($temp =~ /Gasterosteus/) {
+ $species = "Gasterosteus\_aculeatus";
}elsif ($temp =~ /Glycine/) {
- $temp = "Glycine_max";
+ $species = "Glycine_max";
+ }elsif ($temp =~ /Gossypium\_raimondii/) {
+ $species = "Gossypium_raimondii";
}elsif ($temp =~ /Homo\_sapiens/) {
- $temp = "Homo_sapiens";
+ $species = "Homo_sapiens";
}elsif ($temp =~ /Hordeum\_vulgare/) {
- $temp = "Hordeum_vulgare";
+ $species = "Hordeum_vulgare";
}elsif ($temp =~ /Jatropha/) {
- $temp = "Jatropha_curcas";
+ $species = "Jatropha_curcas";
}elsif ($temp =~ /Laccaria/) {
- $temp = "Laccaria_bicolor";
+ $species = "Laccaria_bicolor";
+ }elsif ($temp =~ /Leersia\_perrieri/) {
+ $species = "Leersia_perrieri";
+ }elsif ($temp =~ /Linum\_usitatissimum/) {
+ $species = "Linum_usitatissimum";
}elsif ($temp =~ /Magnaporthe/) {
- $temp = "Magnaporthe_grissa";
+ $species = "Magnaporthe_grissa";
}elsif ($temp =~ /Malus/) {
- $temp = "Malus_domestica";
+ $species = "Malus_domestica";
}elsif ($temp =~ /Manihot/) {
- $temp = "Manihot_esculenta";
+ $species = "Manihot_esculenta";
}elsif ($temp =~ /Medicago/) {
- $temp = "Medicago_truncatula";
+ $species = "Medicago_truncatula";
}elsif ($temp =~ /Mimulus/) {
- $temp = "Mimulus_guttatus";
+ $species = "Mimulus_guttatus";
}elsif ($temp =~ /Mus\_musculus/) {
- $temp = "Mus_musculus";
+ $species = "Mus_musculus";
}elsif ($temp =~ /Musa/) {
- $temp = "Musa_acuminata";
+ $species = "Musa_acuminata";
}elsif ($temp =~ /Nectria\_haematococca/) {
- $temp = "Nectria_haematococca";
+ $species = "Nectria_haematococca";
}elsif ($temp =~ /Neurospora/) {
- $temp = "Neurospora_crassa";
+ $species = "Neurospora_crassa";
}elsif ($temp =~ /Nostoc/) {
- $temp = "Nostoc_punctiforme";
+ $species = "Nostoc_punctiforme";
}elsif ($temp =~ /Oncorhynchus\_mykiss/) {
- $temp = "Oncorhynchus\_mykiss";
- }elsif ($temp =~ /Oryza\_sativa/) {
- $temp = "Oryza_sativa";
+ $species = "Oncorhynchus\_mykiss";
+ }elsif ($temp =~ /Oryza\_barthii/) {
+ $species = "Oryza_barthii";
+ }elsif ($temp =~ /Oryza\_brachyantha/) {
+ $species = "Oryza_brachyantha";
+ }elsif ($temp =~ /Oryza\_glaberrima/) {
+ $species = "Oryza_glaberrima";
+ }elsif ($temp =~ /Oryza\_glumaepatula/) {
+ $species = "Oryza_glumaepatula";
+ }elsif ($temp =~ /Oryza\_longistaminata/) {
+ $species = "Oryza_longistaminata";
+ }elsif ($temp =~ /Oryza\_meridionalis/) {
+ $species = "Oryza_meridionalis";
+ }elsif ($temp =~ /Oryza\_nivara/) {
+ $species = "Oryza_nivara";
+ }elsif ($temp =~ /Oryza\_punctata/) {
+ $species = "Oryza_punctata";
+ }elsif ($temp =~ /Oryza\_rufipogon/) {
+ $species = "Oryza_rufipogon";
+ }elsif ($temp =~ /Oryza\_sativa.indica\_gramene/) {
+ $species = "Oryza_sativa.indica.gramene";
+ }elsif ($temp =~ /Oryza\_sativa.indica\_iplant/) {
+ $species = "Oryza_sativa.indica.iplant";
+ }elsif ($temp =~ /Oryza\_sativa.japonica.IRGSP/) {
+ $species = "Oryza_sativa.japonica.IRGSP";
+ }elsif ($temp =~ /Oryza\_sativa.japonica.MSU/) {
+ $species = "Oryza_sativa.japonica.MSU";
+ }elsif ($temp =~ /Oryza\_sativa.japonica\_iplant/) {
+ $species = "Oryza_sativa.japonica.iplant";
}elsif ($temp =~ /Oryzias\_latipes/) {
- $temp = "Oryzias\_latipes";
+ $species = "Oryzias\_latipes";
}elsif ($temp =~ /Pediculus/) {
- $temp = "Pediculus_humanus";
+ $species = "Pediculus_humanus";
}elsif ($temp =~ /Phoenix/) {
- $temp = "Phoenix_dactylifera";
+ $species = "Phoenix_dactylifera";
}elsif ($temp =~ /Phyllostachys/) {
- $temp = "Phyllostachys_heterocycla";
+ $species = "Phyllostachys_heterocycla";
}elsif ($temp =~ /Physcomit/) {
- $temp = "Physcomitrella_patens";
+ $species = "Physcomitrella_patens";
}elsif ($temp =~ /Populus/) {
- $temp = "Populus_trichocarpa";
+ $species = "Populus_trichocarpa";
}elsif ($temp =~ /Prunus/) {
- $temp = "Prunus_persica";
+ $species = "Prunus_persica";
}elsif ($temp =~ /Rattus/) {
- $temp = "Rattus_norvegicus";
+ $species = "Rattus_norvegicus";
}elsif ($temp =~ /Rhizopus/) {
- $temp = "Rhizopus_oryzae";
+ $species = "Rhizopus_oryzae";
}elsif ($temp =~ /Ricinus/) {
- $temp = "Ricinus_communis";
+ $species = "Ricinus_communis";
}elsif ($temp =~ /cerevisiae/) {
- $temp = "Saccharomyces_cerevisiae";
+ $species = "Saccharomyces_cerevisiae";
}elsif ($temp =~ /pombe/) {
- $temp = "Schizosaccharomyces_pombe";
+ $species = "Schizosaccharomyces_pombe";
}elsif ($temp =~ /Selaginella/) {
- $temp = "Selaginella_moellendorffii";
+ $species = "Selaginella_moellendorffii";
}elsif ($temp =~ /Solanum_tuberosum/) {
- $temp = "Solanum_tuberosum";
+ $species = "Solanum_tuberosum";
}elsif ($temp =~ /Solanum_lycopersicum/) {
- $temp = "Solanum_lycopersicum";
+ $species = "Solanum_lycopersicum";
}elsif ($temp =~ /Sorghum/) {
- $temp = "Sorghum_bicolor";
+ $species = "Sorghum_bicolor";
}elsif ($temp =~ /Synechocystis/) {
- $temp = "Synechocystis_pcc6803";
+ $species = "Synechocystis_pcc6803";
}elsif ($temp =~ /Takifugu/) {
- $temp = "Takifugu\_rubripes";
+ $species = "Takifugu\_rubripes";
}elsif ($temp =~ /Tetraodon/) {
- $temp = "Tetraodon\_nigroviridis";
+ $species = "Tetraodon\_nigroviridis";
}elsif ($temp =~ /Theobroma\_cacao/) {
- $temp = "Theobroma_cacao";
+ $species = "Theobroma_cacao";
}elsif ($temp =~ /TmDV92/) {
- $temp = "TmDV92";
+ $species = "TmDV92";
}elsif ($temp =~ /TmG3116/) {
- $temp = "TmG3116";
+ $species = "TmG3116";
}elsif ($temp =~ /Tolypocladium\_inflatum/) {
- $temp = "Tolypocladium_inflatum";
+ $species = "Tolypocladium_inflatum";
}elsif ($temp =~ /Trichoderma\_atroviride/) {
- $temp = "Trichoderma_atroviride";
+ $species = "Trichoderma_atroviride";
}elsif ($temp =~ /Trichoderma\_reesii/) {
- $temp = "Trichoderma_reesii";
+ $species = "Trichoderma_reesii";
}elsif ($temp =~ /Trichoderma\_virens/) {
- $temp = "Trichoderma_virens";
+ $species = "Trichoderma_virens";
}elsif ($temp =~ /Trichodesmium/) {
- $temp = "Trichodesmium_erythraeum";
+ $species = "Trichodesmium_erythraeum";
}elsif ($temp =~ /triticum\_aestivum/) {
- $temp = "Triticum_aestivum";
+ $species = "Triticum_aestivum";
+ }elsif ($temp =~ /Triticum\_monococcumDV92/) {
+ $species = "Triticum_monococcumDV92";
+ }elsif ($temp =~ /Triticum\_monococcumG3116/) {
+ $species = "Triticum_monococcumG3116";
}elsif ($temp =~ /Triticum\_urartu/) {
- $temp = "Triticum_urartu";
+ $species = "Triticum_urartu";
}elsif ($temp =~ /Vitis\_vinifera/) {
- $temp = "Vitis_vinifera";
+ $species = "Vitis_vinifera";
}elsif ($temp =~ /Zea\_mays/) {
- $temp = "Zea_mays";
+ $species = "Zea_mays";
}else {
die "Error: Species can not be found from file name $temp!";
}
- return $temp;
+ return $species;
}
sub find_gene {
if ($species eq "Aegilops_tauschii") {
my ($gene_id, $type, $location, $info) = split(/\s/, $gene_header);
$gene = $gene_id;
+ }elsif ($species eq "Amborella_trichopoda") {
+ $gene = $gene_header;
}elsif ($species eq "Arabidopsis_lyrata") {
- my ($name,$gene_id,$scaffold,$isomer) = split(/\|/, $gene_header);
- $gene = $isomer;
+ my ($name,$gene_id) = split(/\|/, $gene_header);
+ $gene = $gene_id;
}elsif ($species eq "Arabidopsis_thaliana") {
- my ($name,$gene_id,$chrom,$isomer) = split(/\|/, $gene_header);
+ my ($gene_id,$isomer) = split(/\|/, $gene_header);
$gene = $isomer;
- }elsif ($species eq "Batrachochytrium_distachyon") {
+ }elsif ($species eq "Batrachochytrium_dendrobatidis") {
my ($gene_id,$transcript,$info) = split(/\s\|\s/, $gene_header);
$gene = $gene_id;
}elsif ($species eq "Brachypodium_distachyon") {
- my ($name,$gene_id,$chrom,$isomer) = split(/\|/, $gene_header);
+ my ($gene_id,$isomer) = split(/\|/, $gene_header);
$gene = $isomer;
+ }elsif ($species eq "Brachypodium_sylvaticum_Corvallis") {
+ my ($gene_id,$temp,$start,$end) = split(/\|/, $gene_header);
+ $gene = $gene_id;
+ }elsif ($species eq "Brachypodium_sylvaticum_Greece") {
+ $gene = $gene_header;
+ }elsif ($species eq "Brachypodium_sylvaticum_Spain") {
+ my ($gene_id,$temp,$start,$end) = split(/\|/, $gene_header);
+ $gene = $gene_id;
+ }elsif ($species eq "Brassica_rapa") {
+ my ($gene_id,$transcript) = split(/\|/, $gene_header);
+ $gene = $transcript; #????
}elsif ($species eq "Caenorhabditis_elegans") {
my ($locus_id,$unknown,$gene_id,$temp) = split(/\|/, $gene_header);
$gene = $locus_id; #???
+ }elsif ($species eq "Cajanus_cajan") {
+ my ($gene_id,$type,$locus,$temp) = split(/\s/, $gene_header);
+ $gene = $gene_id;
}elsif ($species eq "Carica_papaya") {
- my $supercontig = $gene_header;
- $gene = $supercontig;
+ my ($contig,$gene_id) = split(/\|/, $gene_header);
+ $gene = $gene_id;
}elsif ($species eq "Chlamydomonas_reinhardtii") {
- my ($name,$locus_id,$chrom,$transcript) = split(/\|/,$gene_header);
+ my ($locus,$transcript) = split(/\|/,$gene_header);
$gene = $transcript; #???
+ }elsif ($species eq "Cicer_arietinum") {
+ my ($gi,$number,$ref,$gene_id,$function) = split(/\|/, $gene_header);
+ $gene = $gene_id;
}elsif ($species eq "Citrus_clementina") {
- my ($gene_id,$pacid) = split(/\|/,$gene_header);
+ my ($transcript_id,$gene_id) = split(/\|/,$gene_header);
$gene = $gene_id;
}elsif ($species eq "Citrus_sinensis") {
- my ($gene_id,$pacid) = split(/\|/,$gene_header);
+ my ($transcript_id,$gene_id) = split(/\|/,$gene_header);
$gene = $gene_id;
}elsif ($species eq "Cucumis_sativus") {
- my ($name,$main_isomer,$scaffold,$isomer) = split(/\|/,$gene_header);
+ my ($gene_id,$isomer) = split(/\|/,$gene_header);
$gene = $isomer;
}elsif ($species eq "Danio_rerio") {
- my ($gene_id,$transcript) = split(/\|/,$gene_header);
- $gene = $gene_id;
+ my ($peptide,$temp,$chrom,$gene_id,$transcript,$gene_type,$transcript_type) = split(/\s/,$gene_header);
+ $transcript =~ s/transcript\://;
+ $gene = $transcript;
}elsif ($species eq "Drosophila_melanogaster") {
- my ($gene_id,$temp) = split(/\s/,$gene_header);
- $gene = $gene_id;
+ my ($peptide,$temp,$chrom,$gene_id,$transcript,$gene_type,$transcript_type) = split(/\s/,$gene_header);
+ $transcript =~ s/transcript\://;
+ $gene = $transcript;
}elsif ($species eq "Ectocarpus_siliculosus") {
- my ($temp,$transcript,$source,$gene_id,$func) = split(/\|/, $gene_header);
+ my ($gene_id,$temp) = split(/\|/, $gene_header);
$gene = $gene_id;
}elsif ($species eq "Epichloe_festuca") {
$gene = $gene_header;
}elsif ($species eq "Escherichia_coli") {
$gene = $gene_header; #???
}elsif ($species eq "Eucalyptus_grandis") {
- my ($gene_id,$pacid) = split(/\|/,$gene_header);
- $gene = $gene_id;
+ my ($gene_id,$transcript_id) = split(/\|/,$gene_header);
+ $gene = $transcript_id;
}elsif ($species eq "Fragaria_vesca") {
- my ($gene_id, $mrna_id, $method, $length) = split(/\s\|\s/, $gene_header);
+ my ($gene_id, $mrna_id) = split(/\|/, $gene_header);
+ $gene_id =~ s/\-v1\.0\-hybrid//;
$gene = $gene_id;
}elsif ($species eq "Fusarium_graminearum") {
my ($gene_id, $info) = split(/\s\|\s/, $gene_header);
my ($gene_id, $info) = split(/\s\|\s/, $gene_header);
$gene = $gene_id;
}elsif ($species eq "Gadus_morhua") {
- my ($gene_id, $transcript_id) = split(/\|/,$gene_header);
- $gene = $transcript_id;
- }elsif ($species eq "Gastroerosteus_aculeatus") {
- my ($gene_id, $transcript_id) = split(/\|/,$gene_header);
- $gene = $transcript_id;
+ my ($peptide,$temp,$chrom,$gene_id,$transcript,$gene_type,$transcript_type) = split(/\s/,$gene_header);
+ $transcript =~ s/transcript\://;
+ $gene = $transcript;
+ }elsif ($species eq "Gasterosteus_aculeatus") {
+ my ($peptide,$temp,$chrom,$gene_id,$transcript,$gene_type,$transcript_type) = split(/\s/,$gene_header);
+ $transcript =~ s/transcript\://;
+ $gene = $transcript;
}elsif ($species eq "Glycine_max") {
- my ($name,$locus_id,$isomer,$chrom) = split(/\|/,$gene_header);
+ my ($locus_id,$isomer) = split(/\|/,$gene_header);
+ $gene = $isomer;
+ }elsif ($species eq "Gossypium_raimondii") {
+ my ($locus_id,$isomer) = split(/\|/,$gene_header);
$gene = $isomer;
}elsif ($species eq "Homo_sapiens") {
- my ($protein,$type,$chrom,$gene_id,$transcript) = split(/\s/,$gene_header);
- $gene = $protein;
+ my ($peptide,$temp,$chrom,$gene_id,$transcript,$gene_type,$transcript_type) = split(/\s/,$gene_header);
+ $transcript =~ s/transcript\://;
+ $gene = $transcript;
}elsif ($species eq "Hordeum_vulgare") {
- my ($gene_id,$transcript) = split(/\|/,$gene_header);
+ my ($peptide,$temp,$chrom,$gene_id,$transcript,$gene_type,$transcript_type) = split(/\s/,$gene_header);
+ $transcript =~ s/transcript\://;
$gene = $transcript;
}elsif ($species eq "Jatropha_curcas") {
my ($gene_id, $temp) = split(/\s/,$gene_header);
}elsif ($species eq "Laccaria_bicolor") {
my ($source,$spec,$gene_id,$scaffold) = split(/\|/, $gene_header);
$gene = $gene_id;
+ }elsif ($species eq "Leersia_perrieri") {
+ my ($gene_id,$chrom,$program,$function,$type) = split(/[\|\s]+/, $gene_header);
+ $gene = $gene_id;
+ }elsif ($species eq "Linum_usitatissimum") {
+ my ($transcript,$gene_id) = split(/\|/, $gene_header);
+ $gene = $gene_id;
}elsif ($species eq "Magnaporthe_grissa") {
my ($transcript,$gene_id,$temp) = split(/\s\|\s/, $gene_header);
$gene = $transcript;
}elsif ($species eq "Malus_domestica" ) {
- $gene = $gene_header;
+ my ($gene_id1,$gene_id2) = split(/\|/, $gene_header);
+ $gene = $gene_id2;
}elsif ($species eq "Manihot_esculenta") {
- my ($name,$scaffold,$gene_id,$temp) = split(/\|/, $gene_header);
+ my ($transcript,$gene_id) = split(/\|/, $gene_header);
$gene = $gene_id;
}elsif ($species eq "Medicago_truncatula") {
- my ($name,$gene_id,$chrom,$temp) = split(/\|/, $gene_header);
- $gene = $gene_id;
+ my ($gene_id,$transcript) = split(/\|/, $gene_header);
+ $gene = $transcript;
}elsif ($species eq "Mimulus_guttatus") {
- my ($name,$gene_id,$scaffold,$temp) = split(/\|/, $gene_header);
+ my ($gene_id,$transcript) = split(/\|/, $gene_header);
$gene = $gene_id;
}elsif ($species eq "Mus_musculus") {
my ($protein,$type,$chrom,$gene_id,$transcript) = split(/\s/,$gene_header);
my ($temp,$transcript,$source,$gene_id,$func) = split(/\|/, $gene_header);
$gene = $transcript;
}elsif ($species eq "Oncorhynchus_mykiss") {
- my ($gene_id, $offset, $coord_1, $coord_2) = split(/\s/,$gene_header);
+ my ($gi, $number, $type, $gene_id, $function) = split(/\|/,$gene_header);
$gene = $gene_id;
- }elsif ($species eq "Oryza_sativa") {
- my ($isomer,$temp,$type) = split(/\|/,$gene_header);
- $gene = $isomer;
+ }elsif ($species eq "Leersia_perrieri") {
+ my ($gene_id,$chrom,$program,$function,$type) = split(/[\|\s]+/, $gene_header);
+ $gene = $gene_id;
+ }elsif ($species eq "Oryza_barthii") {
+ my ($gene_id,$chrom,$program,$function,$type) = split(/[\|\s]+/, $gene_header);
+ $gene = $gene_id;
+ }elsif ($species eq "Oryza_brachyantha") {
+ my ($gene_id,$chrom,$program,$function,$type) = split(/[\|\s]+/, $gene_header);
+ $gene = $gene_id;
+ }elsif ($species eq "Oryza_glaberrima") {
+ my ($gene_id,$chrom,$program,$function,$type) = split(/[\|\s]+/, $gene_header);
+ $gene = $gene_id;
+ }elsif ($species eq "Oryza_glumaepatula") {
+ my ($gene_id,$chrom,$program,$function,$type) = split(/[\|\s]+/, $gene_header);
+ $gene = $gene_id;
+ }elsif ($species eq "Oryza_longistaminata") {
+ my ($gene_id,$chrom,$program,$function,$type) = split(/[\|\s]+/, $gene_header);
+ $gene = $gene_id;
+ }elsif ($species eq "Oryza_meridionalis") {
+ my ($gene_id,$chrom,$program,$function,$type) = split(/[\|\s]+/, $gene_header);
+ $gene = $gene_id;
+ }elsif ($species eq "Oryza_nivara") {
+ my ($gene_id,$chrom,$program,$function,$type) = split(/[\|\s]+/, $gene_header);
+ $gene = $gene_id;
+ }elsif ($species eq "Oryza_punctata") {
+ my ($gene_id,$chrom,$program,$function,$type) = split(/[\|\s]+/, $gene_header);
+ $gene = $gene_id;
+ }elsif ($species eq "Oryza_rufipogon") {
+ my ($gene_id,$chrom,$program,$function,$type) = split(/[\|\s]+/, $gene_header);
+ $gene = $gene_id;
+ }elsif ($species eq "Oryza_sativa.indica.iplant") {
+ my ($gene_id,$chrom,$program,$function,$type) = split(/[\|\s]+/, $gene_header);
+ $gene = $gene_id;
+ }elsif ($species eq "Oryza_sativa.indica.gramene") {
+ my ($peptide,$temp,$chrom,$gene_id,$transcript,$gene_type,$transcript_type) = split(/\s/,$gene_header);
+ $transcript =~ s/transcript\://;
+ $gene = $transcript;
+ }elsif ($species eq "Oryza_sativa.japonica.iplant") {
+ my ($gene_id,$chrom,$program,$function,$type) = split(/[\|\s]+/, $gene_header);
+ $gene = $gene_id;
+ }elsif ($species eq "Oryza_sativa.japonica.IRGSP") {
+ my ($peptide,$temp,$chrom,$gene_id,$transcript,$gene_type,$transcript_type) = split(/\s/,$gene_header);
+ $transcript =~ s/transcript\://;
+ $gene = $transcript;
+ }elsif ($species eq "Oryza_sativa.japonica.MSU") {
+ my ($gene_id,$transcript) = split(/\|/,$gene_header);
+ $gene = $transcript;
}elsif ($species eq "Oryzias_latipes") {
my ($gene_id, $transcript_id) = split(/\|/,$gene_header);
$gene = $transcript_id;
}elsif ($species eq "Triticum_aestivum") {
my ($type,$transcript,$source,$gene_id,$func) = split(/\|/, $gene_header);
$gene = $transcript;
- }elsif ($species eq "Triticum_urartu") {
- my ($gene_id, $type, $location, $info) = split(/\s/, $gene_header);
+ }elsif ($species eq "Triticum_monococcumDV92") {
+ my ($gene_id,$temp) = split(/\s+/,$gene_header);
+ $gene = $gene_id;
+ }elsif ($species eq "Triticum_monococcumG3116") {
+ my ($gene_id,$temp) = split(/\s+/,$gene_header);
$gene = $gene_id;
+ }elsif ($species eq "Triticum_urartu") {
+ my ($peptide,$temp,$chrom,$gene_id,$transcript,$gene_type,$transcript_type) = split(/\s/,$gene_header);
+ $transcript =~ s/transcript\://;
+ $gene = $transcript;
}elsif ($species eq "Vitis_vinifera") {
#my ($name,$gene_id,$chrom_id,$id) = split(/\|/,$gene_header);
$gene = $gene_header; #???
}elsif ($species eq "Zea_mays") {
- #my ($transcript,$seq_type,$coord,$parent_transcript,$parent_gene) = split(/;\s/, $gene_header);
- my ($spec, $gene_id, $unknown, $transcript) = split(/\|/, $gene_header);
+ my ($gene_id,$transcript) = split(/\|/,$gene_header);
$gene = $transcript;
- $gene =~ s/\sseq=translation//g;
}else {
die "Error: Gene id can not be found for species $species!";
return $gene;
}
+
+sub find_gene_synonym {
+ my $gene_header = $_[0];
+ $gene_header =~ s/^>//; # strip off the header line identifier, if it isn't already
+ my $species = $_[1];
+ my $synonym;
+ if ($species eq "Batrachochytrium_dendrobatidis") {
+ my ($gene_id,$transcript,$info) = split(/\s\|\s/, $gene_header);
+ $synonym = $transcript;
+ }elsif ($species eq "Caenorhabditis_elegans") {
+ my ($locus_id,$unknown,$gene_id,$temp) = split(/\|/, $gene_header);
+ $synonym = $gene_id; #???
+ }elsif ($species eq "Cajanus_cajan") {
+ my ($gene_id,$type,$locus,$temp) = split(/\s/, $gene_header);
+ $synonym = $locus;
+ }elsif ($species eq "Carica_papaya") {
+ my ($contig,$gene_id) = split(/\|/, $gene_header);
+ $synonym = $contig;
+ }elsif ($species eq "Chlamydomonas_reinhardtii") {
+ my ($locus,$transcript) = split(/\|/,$gene_header);
+ $synonym = $locus; #???
+ }elsif ($species eq "Cicer_arietinum") {
+ my ($gi,$number,$ref,$gene_id,$function) = split(/\|/, $gene_header);
+ $synonym = $number;
+ }elsif ($species eq "Citrus_clementina") {
+ my ($transcript_id,$gene_id) = split(/\|/,$gene_header);
+ $synonym = $transcript_id;
+ }elsif ($species eq "Citrus_sinensis") {
+ my ($transcript_id,$gene_id) = split(/\|/,$gene_header);
+ $synonym = $transcript_id;
+ }elsif ($species eq "Cucumis_sativus") {
+ my ($gene_id,$isomer) = split(/\|/,$gene_header);
+ $synonym = $gene_id;
+ }elsif ($species eq "Danio_rerio") {
+ my ($peptide,$temp,$chrom,$gene_id,$transcript,$gene_type,$transcript_type) = split(/\s/,$gene_header);
+ $transcript =~ s/transcript\://;
+ $gene_id =~ s/gene\://;
+ $synonym = "$peptide,$gene_id";
+ }elsif ($species eq "Drosophila_melanogaster") {
+ my ($peptide,$temp,$chrom,$gene_id,$transcript,$gene_type,$transcript_type) = split(/\s/,$gene_header);
+ $transcript =~ s/transcript\://;
+ $gene_id =~ s/gene\://;
+ $synonym = "$peptide,$gene_id";
+ }elsif ($species eq "Eucalyptus_grandis") {
+ my ($gene_id,$transcript_id) = split(/\|/,$gene_header);
+ $synonym = $gene_id;
+ }elsif ($species eq "Fragaria_vesca") {
+ my ($gene_id, $mrna_id) = split(/\|/, $gene_header);
+ $gene_id =~ s/\-v1\.0\-hybrid//;
+ $mrna_id =~ s/\-v1\.0\-hybrid//;
+ $synonym = $mrna_id;
+ }elsif ($species eq "Gadus_morhua") {
+ my ($peptide,$temp,$chrom,$gene_id,$transcript,$gene_type,$transcript_type) = split(/\s/,$gene_header);
+ $transcript =~ s/transcript\://;
+ $gene_id =~ s/gene\://;
+ $synonym = "$peptide,$gene_id";
+ }elsif ($species eq "Gasterosteus_aculeatus") {
+ my ($peptide,$temp,$chrom,$gene_id,$transcript,$gene_type,$transcript_type) = split(/\s/,$gene_header);
+ $transcript =~ s/transcript\://;
+ $gene_id =~ s/gene\://;
+ $synonym = "$peptide,$gene_id";
+ }elsif ($species eq "Glycine_max") {
+ my ($locus_id,$isomer) = split(/\|/,$gene_header);
+ $gene = $isomer;
+ }elsif ($species eq "Gossypium_raimondii") {
+ my ($locus_id,$isomer) = split(/\|/,$gene_header);
+ $synonym = $locus_id;
+ }elsif ($species eq "Homo_sapiens") {
+ my ($peptide,$temp,$chrom,$gene_id,$transcript,$gene_type,$transcript_type) = split(/\s/,$gene_header);
+ $transcript =~ s/transcript\://;
+ $gene_id =~ s/gene\://;
+ $synonym = "$peptide,$gene_id";
+ }elsif ($species eq "Hordeum_vulgare") {
+ my ($peptide,$temp,$chrom,$gene_id,$transcript,$gene_type,$transcript_type) = split(/\s/,$gene_header);
+ $transcript =~ s/transcript\://;
+ $gene_id =~ s/gene\://;
+ $synonym = "$peptide,$gene_id";
+ }elsif ($species eq "Linum_usitatissimum") {
+ my ($transcript,$gene_id) = split(/\|/, $gene_header);
+ $synonym = $transcript;
+ }elsif ($species eq "Magnaporthe_grissa") {
+ my ($transcript,$gene_id,$temp) = split(/\s\|\s/, $gene_header);
+ $synonym = $gene_id;
+ }elsif ($species eq "Malus_domestica" ) {
+ my ($gene_id1,$gene_id2) = split(/\|/, $gene_header);
+ $synonym = $gene_id1;
+ }elsif ($species eq "Manihot_esculenta") {
+ my ($transcript,$gene_id) = split(/\|/, $gene_header);
+ $synonym = $transcript;
+ }elsif ($species eq "Medicago_truncatula") {
+ my ($gene_id,$transcript) = split(/\|/, $gene_header);
+ $synonym = $gene_id;
+ }elsif ($species eq "Mimulus_guttatus") {
+ my ($gene_id,$transcript) = split(/\|/, $gene_header);
+ $synonym = $gene_id;
+ }elsif ($species eq "Mus_musculus") {
+ my ($protein,$type,$chrom,$gene_id,$transcript) = split(/\s/,$gene_header);
+ $transcript =~ s/transcript\://;
+ $gene_id =~ s/gene\://;
+ $synonym = "$peptide,$gene_id";
+ }elsif ($species eq "Musa_acuminata") {
+ my ($protein,$type,$chrom,$gene_id,$transcript) = split(/\s/,$gene_header);
+ $transcript =~ s/transcript\://;
+ $gene_id =~ s/gene\://;
+ $synonym = "$peptide,$gene_id";
+ }elsif ($species eq "Neurospora_crassa") {
+ my ($transcript,$gene_id,$temp) = split(/\s\|\s/,$gene_header);
+ $synonym = $gene_id;
+ }elsif ($species eq "Nostoc_punctiforme") {
+ my ($temp,$transcript,$source,$gene_id,$func) = split(/\|/, $gene_header);
+ $synonym = $gene_id;
+ }elsif ($species eq "Oncorhynchus_mykiss") {
+ my ($gi, $number, $type, $gene_id, $function) = split(/\|/,$gene_header);
+ $synonym = $number;
+ }elsif ($species eq "Oryza_sativa.indica.gramene") {
+ my ($peptide,$temp,$chrom,$gene_id,$transcript,$gene_type,$transcript_type) = split(/\s/,$gene_header);
+ $transcript =~ s/transcript\://;
+ $gene_id =~ s/gene\://;
+ $synonym = "$peptide,$gene_id";
+ }elsif ($species eq "Oryza_sativa.japonica.IRGSP") {
+ my ($peptide,$temp,$chrom,$gene_id,$transcript,$gene_type,$transcript_type) = split(/\s/,$gene_header);
+ $transcript =~ s/transcript\://;
+ $gene_id =~ s/gene\://;
+ $synonym = "$peptide,$gene_id";
+ }elsif ($species eq "Oryzias_latipes") {
+ my ($gene_id, $transcript_id) = split(/\|/,$gene_header);
+ $gene = $transcript_id;
+ }elsif ($species eq "Pediculus_humanus") {
+ my ($source,$gene_pa,$func,$unknown,$gene_id) = split(/\|/, $gene_header);
+ $gene_id =~ s/gene\://;
+ $gene = $gene_pa;
+ }elsif ($species eq "Phoenix_dactylifera") {
+ my ($gene_id, $temp) = split(/\s/, $gene_header);
+ $gene = $gene_id;
+ }elsif ($species eq "Phyllostachys_heterocycla") {
+ my ($gene_id, $gene_model, $location, $dot, $strand, $dot2, $info) = split (/\s+/, $gene_header);
+ $gene = $gene_id;
+ }elsif ($species eq "Physcomitrella_patens") {
+ my ($name,$pac_id) = split(/\|/,$gene_header);
+ $gene = $name; #???
+ }elsif ($species eq "Populus_trichocarpa") {
+ my ($gene_id,$transcript_id) = split(/\|/,$gene_header);
+ $gene = $transcript_id;
+ }elsif ($species eq "Prunus_persica") {
+ my ($gene_id,$temp) = split(/\s/,$gene_header);
+ $gene = $gene_id;
+ }elsif ($species eq "Rattus_norvegicus") {
+ my ($gene_id,$transcript,$chrom) = split(/\|/, $gene_header);
+ $gene = $gene_id;
+ }elsif ($species eq "Rhizopus_oryzae") {
+ my ($gene_id,$func) = split(/\s\|\s/, $gene_header);
+ $gene = $gene_id;
+ }elsif ($species eq "Ricinus_communis") {
+ my ($name,$transcript,$gene_id,$temp) = split(/\|/, $gene_header);
+ $gene = $transcript;
+ }elsif ($species eq "Saccharomyces_cerevisiae") {
+ my ($gene_id,$gene_name,$transcript,$temp) = split(/\s/,$gene_header);
+ $gene = $gene_id;
+ }elsif ($species eq "Schizosaccharomyces_pombe") {
+ my ($gene_id,$gene_name,$unknown,$func,$name,$chrom,$temp) = split(/\s/,$gene_header);
+ $gene = $gene_id;
+ }elsif ($species eq "Selaginella_moellendorffii") {
+ my ($name,$locus_id,$chrom_id,$prot_id) = split(/\|/,$gene_header);
+ $gene = $prot_id; #???
+ }elsif ($species eq "Solanum_tuberosum") {
+ my ($protein,$transcript,$type) = split(/\s/,$gene_header);
+ $gene = $protein;
+ }elsif ($species eq "Solanum_lycopersicum") {
+ my ($protein,$type,$location,$gene_1,$transcript) = split(/\s/,$gene_header);
+ $gene = $protein;
+ }elsif ($species eq "Sorghum_bicolor") {
+ my ($gene_id,$transcript_id) = split(/\|/,$gene_header);
+ $synonym = $gene_id;
+ }elsif ($species eq "Synechocystis_pcc6803") {
+ my ($gene_id,$temp) = split(" ",$gene_header);
+ $gene = $gene_id; #???
+ }elsif ($species eq "Takifugu_rubripes") {
+ my ($gene_id, $transcript_id) = split(/\|/,$gene_header);
+ $gene = $transcript_id;
+ }elsif ($species eq "Tetraodon_nigroviridis") {
+ my ($gene_id, $transcript_id) = split(/\|/,$gene_header);
+ $gene = $transcript_id;
+ }elsif ($species eq "Theobroma_cacao") {
+ my ($gene_id,$temp) = split(/\s/,$gene_header);
+ $gene = $gene_id;
+ }elsif ($species eq "TmDV92") {
+ my ($gene_id,$temp) = split(/\s+/,$gene_header);
+ $gene = $gene_id;
+ }elsif ($species eq "TmG3116") {
+ my ($gene_id,$temp) = split(/\s+/,$gene_header);
+ $gene = $gene_id;
+ }elsif ($species eq "Tolypocladium_inflatum") {
+ my ($gene_id, $type, $info) = split(/\s/, $gene_header);
+ $gene = $gene_id;
+ }elsif ($species eq "Trichoderma_atroviride") {
+ my ($source, $spec, $int_id, $gene_id) = split(/\|/,$gene_header);
+ $gene = $gene_id;
+ }elsif ($species eq "Trichoderma_reesii") {
+ my ($source, $spec, $int_id, $gene_id) = split(/\|/,$gene_header);
+ $gene = $gene_id;
+ }elsif ($species eq "Trichoderma_virens") {
+ my ($temp,$unknown,$source,$gene_id,$func) = split(/\|/, $gene_header);
+ $gene = $gene_id;
+ }elsif ($species eq "Trichodesmium_erythraeum") {
+ my ($temp,$unknown,$source,$gene_id,$func) = split(/\|/, $gene_header);
+ $gene = $gene_id;
+ }elsif ($species eq "Triticum_aestivum") {
+ my ($type,$transcript,$source,$gene_id,$func) = split(/\|/, $gene_header);
+ $gene = $transcript;
+ }elsif ($species eq "Triticum_monococcumDV92") {
+ my ($gene_id,$temp) = split(/\s+/,$gene_header);
+ $gene = $gene_id;
+ }elsif ($species eq "Triticum_monococcumG3116") {
+ my ($gene_id,$temp) = split(/\s+/,$gene_header);
+ $gene = $gene_id;
+ }elsif ($species eq "Triticum_urartu") {
+ my ($peptide,$temp,$chrom,$gene_id,$transcript,$gene_type,$transcript_type) = split(/\s/,$gene_header);
+ $transcript =~ s/transcript\://;
+ $gene_id =~ s/gene\://;
+ $synonym = "$peptide,$gene_id";
+ }elsif ($species eq "Vitis_vinifera") {
+ #my ($name,$gene_id,$chrom_id,$id) = split(/\|/,$gene_header);
+ $gene = $gene_header; #???
+ }elsif ($species eq "Zea_mays") {
+ my ($gene_id,$transcript) = split(/\|/,$gene_header);
+ $synonym = $gene_id;
+
+ }else {
+ die "Error: Gene id can not be found for species $species!";
+ }
+ return $gene;
+
+}
+
sub all_species_array {
# return all species in an array
my @spec_array;
push (@spec_array, "Aegilops_tauschii");
+ push (@spec_array, "Amborella_trichopoda");
push (@spec_array, "Arabidopsis_lyrata");
push (@spec_array, "Arabidopsis_thaliana");
- push (@spec_array, "Batrachochytrium_distachyon");
+ #push (@spec_array, "Batrachochytrium_dendrobatidis");
push (@spec_array, "Brachypodium_distachyon");
+ push (@spec_array, "Brachypodium_sylvaticum_Corvallis");
+ push (@spec_array, "Brachypodium_sylvaticum_Greece");
+ push (@spec_array, "Brachypodium_sylvaticum_Spain");
+ push (@spec_array, "Brassica_rapa");
push (@spec_array, "Caenorhabditis_elegans");
+ push (@spec_array, "Cajanus_cajan");
push (@spec_array, "Carica_papaya");
push (@spec_array, "Chlamydomonas_reinhardtii");
+ push (@spec_array, "Cicer_arietinum");
push (@spec_array, "Citrus_clementina");
push (@spec_array, "Citrus_sinensis");
push (@spec_array, "Cucumis_sativus");
push (@spec_array, "Fusarium_oxysporum");
push (@spec_array, "Fusarium_verticilliodes");
push (@spec_array, "Gadus_morhua");
- push (@spec_array, "Gastroerosteus_aculeatus");
+ push (@spec_array, "Gasterosteus_aculeatus");
push (@spec_array, "Glycine_max");
+ push (@spec_array, "Gossypium_raimondii");
push (@spec_array, "Homo_sapiens");
push (@spec_array, "Hordeum_vulgare");
push (@spec_array, "Jatropha_curcas");
push (@spec_array, "Laccaria_bicolor");
+ push (@spec_array, "Linum_usitatissimum");
push (@spec_array, "Magnaporthe_grissa");
push (@spec_array, "Malus_domestica");
push (@spec_array, "Manihot_esculenta");
push (@spec_array, "Trichoderma_virens");
push (@spec_array, "Trichodesmium_erythraeum");
push (@spec_array, "Triticum_aestivum");
+ push (@spec_array, "Triticum_monococcumDV92");
+ push (@spec_array, "Triticum_monococcumG3116");
push (@spec_array, "Triticum_urartu");
push (@spec_array, "Vitis_vinifera");
push (@spec_array, "Zea_mays");
sub retrieval_info {
my $file = $_[0];
- $file =~ /([a-zA-Z]*\_[a-zA-Z0-9]*)\_((?:[a-zA-Z0-9]+\_?)+)\_(\d+\_\d+\_\d+).fa/;
+ $file =~ /([a-zA-Z]*\_+[a-zA-Z0-9]+)\_((?:[a-zA-Z0-9+-.]+\_?)+)\_(\d+\_\d+\_\d+).fa/;
my $species = $1;
my $method = $2;
my $date = $3;