# #
# Written by Justin Elser #
# Updated 9/2/10 for new species #
+# #
+# Updated 6/16/11 for new species and added sub #
+# to return retrieval info #
###########################################################
sub find_species {
$temp = "Glycine_max";
}elsif ($temp =~ /Homo\_sapiens/) {
$temp = "Homo_sapiens";
+ }elsif ($temp =~ /Jatropha/) {
+ $temp = "Jatropha_curcas";
}elsif ($temp =~ /Laccaria/) {
$temp = "Laccaria_bicolor";
}elsif ($temp =~ /Magnaporthe/) {
$temp = "Oryza_sativa";
}elsif ($temp =~ /Pediculus/) {
$temp = "Pediculus_humanus";
+ }elsif ($temp =~ /Phoenix/) {
+ $temp = "Phoenix_dactylifera";
}elsif ($temp =~ /Physcomit/) {
$temp = "Physcomitrella_patens";
}elsif ($temp =~ /Populus/) {
$temp = "Sorghum_bicolor";
}elsif ($temp =~ /Synechocystis/) {
$temp = "Synechocystis_pcc6803";
+ }elsif ($temp =~ /Theobroma\_cacao/) {
+ $temp = "Theobroma_cacao";
}elsif ($temp =~ /Tolypocladium\_inflatum/) {
$temp = "Tolypocladium_inflatum";
}elsif ($temp =~ /Trichoderma\_atroviride/) {
}elsif ($species eq "Homo_sapiens") {
my ($protein,$type,$chrom,$gene_id,$transcript) = split(/\s/,$gene_header);
$gene = $protein;
+ }elsif ($species eq "Jatropha_curcas") {
+ my ($gene_id, $temp) = split(/\s/,$gene_header);
+ $gene = $gene_id;
}elsif ($species eq "Laccaria_bicolor") {
my ($source,$spec,$gene_id,$scaffold) = split(/\|/, $gene_header);
$gene = $gene_id;
my ($source,$gene_pa,$func,$unknown,$gene_id) = split(/\|/, $gene_header);
$gene_id =~ s/gene\://;
$gene = $gene_pa;
+ }elsif ($species eq "Phoenix_dactylifera") {
+ my ($gene_id, $temp) = split(/\s/, $gene_header);
+ $gene = $gene_id;
}elsif ($species eq "Physcomitrella_patens") {
my ($name,$locus_id,$scaffold,$prot_id) = split(/\|/,$gene_header);
$gene = $prot_id; #???
}elsif ($species eq "Synechocystis_pcc6803") {
my ($gene_id,$temp) = split(" ",$gene_header);
$gene = $gene_id; #???
+ }elsif ($species eq "Theobroma_cacao") {
+ my ($gene_id,$temp) = split(/\s/,$gene_header);
+ $gene = $gene_id;
}elsif ($species eq "Tolypocladium_inflatum") {
my ($gene_id, $type, $info) = split(/\s/, $gene_header);
$gene = $gene_id;
}elsif ($species eq "Zea_mays") {
my ($transcript,$seq_type,$coord,$parent_transcript,$parent_gene) = split(/;\s/, $gene_header);
$gene = $transcript;
+ $gene =~ s/\sseq=translation//g;
+
}else {
die "Error: Gene id can not be found for species $species!";
}
push (@spec_array, "Fusarium_verticilliodes");
push (@spec_array, "Glycine_max");
push (@spec_array, "Homo_sapiens");
+ push (@spec_array, "Jatropha_curcas");
push (@spec_array, "Laccaria_bicolor");
push (@spec_array, "Magnaporthe_grissa");
push (@spec_array, "Malus_domestica");
push (@spec_array, "Nostoc_punctiforme");
push (@spec_array, "Oryza_sativa");
push (@spec_array, "Pediculus_humanus");
+ push (@spec_array, "Phoenix_dactylifera");
push (@spec_array, "Physcomitrella_patens");
push (@spec_array, "Populus_trichocarpa");
push (@spec_array, "Prunus_persica");
push (@spec_array, "Selaginella_moellendorffii");
push (@spec_array, "Sorghum_bicolor");
push (@spec_array, "Synechocystis_pcc6803");
+ push (@spec_array, "Theobroma_cacao");
push (@spec_array, "Tolypocladium_inflatum");
push (@spec_array, "Trichoderma_atroviride");
push (@spec_array, "Trichoderma_reesii");
return @spec_array;
}
+
+sub retrieval_info {
+ my $file = $_[0];
+ $file =~ /([a-zA-Z]*\_[a-zA-Z0-9]*)\_((?:[a-zA-Z0-9]+\_?)+)\_(\d+\_\d+\_\d+).fa/;
+ my $species = $1;
+ my $method = $2;
+ my $date = $3;
+ return ($method, $date);
+}
+
1;