From b0e11aa72d51e3c960d7282be3e3ab8c3fcda117 Mon Sep 17 00:00:00 2001 From: elserj Date: Fri, 3 Sep 2010 21:58:45 +0000 Subject: [PATCH] Fixed find_genes subroutine for new versions of the sequence files svn path=/; revision=28 --- interactome_scripts/find_species.pl | 141 +++++++++++++++++++--------- 1 file changed, 98 insertions(+), 43 deletions(-) diff --git a/interactome_scripts/find_species.pl b/interactome_scripts/find_species.pl index eaf444d..77183d3 100755 --- a/interactome_scripts/find_species.pl +++ b/interactome_scripts/find_species.pl @@ -94,75 +94,130 @@ sub find_species { sub find_gene { my $gene_header = $_[0]; + $gene_header =~ s/^>//; # strip off the header line identifier, if it isn't already my $species = $_[1]; my $gene; - if ($species eq "Ath") { + if ($species eq "Arabidopsis_lyrata") { + my ($name,$gene_id,$scaffold,$isomer) = split(/\|/, $gene_header); + $gene = $isomer; + }elsif ($species eq "Arabidopsis_thaliana") { + my ($name,$gene_id,$chrom,$isomer) = split(/\|/, $gene_header); + $gene = $isomer; + }elsif ($species eq "Batrachochytrium_distachyon") { + my ($gene_id,$transcript,$info) = split(/\s\|\s/, $gene_header); + $gene = $gene_id; + }elsif ($species eq "Brachypodium_distachyon") { my ($name,$gene_id,$chrom,$isomer) = split(/\|/, $gene_header); $gene = $isomer; - }elsif ($species eq "Brachy") { - #uncomment for newer version of fasta file - #my ($gene_id,$chrom,$isomer,$name) = split(/\|/, $gene_header); - #$gene = $isomer; - #uncomment for old version of fasta file - $gene = $gene_header; - }elsif ($species eq "C_elegans") { - my ($gene_id,$temp) = split(/\|/, $gene_header); + }elsif ($species eq "Caenorhabditis_elegans") { + my ($locus_id,$unknown,$gene_id,$temp) = split(/\s+/, $gene_header); $gene = $gene_id; #??? - }elsif ($species eq "Chlamy") { - my ($name,$locus_id,$scaff_id,$temp) = split(/\|/,$gene_header); - $gene = $locus_id; #??? - }elsif ($species eq "Danio") { + }elsif ($species eq "Carica_papaya") { + my $supercontig = $gene_header; + $gene = $supercontig; + }elsif ($species eq "Chlamydomonas_reinhardtii") { + my ($name,$locus_id,$chrom,$transcript) = split(/\|/,$gene_header); + $gene = $transcript; #??? + }elsif ($species eq "Cucumis_sativus") { + my ($name,$gene_id,$scaffold,$unknown) = split(/\|/,$gene_header); + print "$name\t$gene_id\t$scaffold\t$unknown\n"; + $gene = $gene_id; + }elsif ($species eq "Danio_rerio") { + my ($gene_id,$transcript) = split(/\|/,$gene_header); + $gene = $gene_id; + }elsif ($species eq "Drosophila_melanogaster") { my ($gene_id,$temp) = split(/\s/,$gene_header); $gene = $gene_id; - }elsif ($species eq "E_coli") { + }elsif ($species eq "Ectocarpus_siliculosus") { + my ($temp,$transcript,$source,$gene_id,$func) = split(/\|/, $gene_header); + $gene = $gene_id; + }elsif ($species eq "Escherichia_coli") { $gene = $gene_header; #??? - }elsif ($species eq "Fragaria") { - my ($gene_id, $mrna_id, $method, $length) = split(/\|/, $gene_header); + }elsif ($species eq "Fragaria_vesca") { + my ($gene_id, $mrna_id, $method, $length) = split(/\s\|\s/, $gene_header); $gene = $gene_id; - }elsif ($species eq "Glycine") { - my ($name,$locus_id,$scaff_id,$isomer) = split(/\|/,$gene_header); + }elsif ($species eq "Glycine_max") { + my ($name,$locus_id,$isomer,$chrom) = split(/\|/,$gene_header); $gene = $isomer; - }elsif ($species eq "Human") { - my ($gene_id,$temp) = split(/\s/,$gene_header); + }elsif ($species eq "Homo_sapiens") { + my ($protein,$type,$chrom,$gene_id,$transcript) = split(/\s/,$gene_header); + $gene_id =~ s/Gene\://; $gene = $gene_id; - }elsif ($species eq "Maize") { - $gene = $gene_header; - }elsif ($species eq "Mouse") { - my ($gene_id,$temp) = split(/\s/,$gene_header); + }elsif ($species eq "Laccaria_bicolor") { + my ($source,$spec,$gene_id,$scaffold) = split(/\|/, $gene_header); $gene = $gene_id; - }elsif ($species eq "Neurospora") { - my ($gene_id,$temp) = split(/\s/,$gene_header); + }elsif ($species eq "Magnaporthe_grissa") { + my ($transcript,$gene_id,$temp) = split(/\s\|\s/, $gene_header); + $gene = $transcript; + }elsif ($species eq "Manihot_esculenta") { + my ($name,$scaffold,$gene_id,$temp) = split(/\|/, $gene_header); + $gene = $gene_id; + }elsif ($species eq "Medicago_truncatula") { + my ($name,$gene_id,$chrom,$temp) = split(/\|/, $gene_header); + $gene = $gene_id; + }elsif ($species eq "Mimulus_guttatus") { + my ($name,$gene_id,$scaffold,$temp) = split(/\|/, $gene_header); + $gene = $gene_id; + }elsif ($species eq "Mus_musculus") { + my ($protein,$type,$chrom,$gene_id,$transcript) = split(/\s/,$gene_header); + $gene_id =~ s/Gene\://; + $gene = $gene_id; + }elsif ($species eq "Neurospora_crassa") { + my ($transcript,$gene_id,$temp) = split(/\s\|\s/,$gene_header); + $gene = $transcript; + }elsif ($species eq "Nostoc_punctiforme") { + my ($temp,$transcript,$source,$gene_id,$func) = split(/\|/, $gene_header); $gene = $gene_id; }elsif ($species eq "Oryza_sativa") { my ($isomer,$temp,$type) = split(/\|/,$gene_header); $gene = $isomer; - }elsif ($species eq "Physcomitrella") { - my ($name,$locus_id,$chrom_id,$prot_id) = split(/\|/,$gene_header); - $gene = $prot_id; #??? - }elsif ($species eq "Poplar") { - my ($name,$locus_id,$chrom_id,$prot_id) = split(/\|/,$gene_header); + }elsif ($species eq "Pediculus_humanus") { + my ($source,$gene_pa,$func,$unknown,$gene_id) = split(/\|/, $gene_header); + $gene_id =~ s/gene\://; + $gene = $gene_id; + }elsif ($species eq "Physcomitrella_patens") { + my ($name,$locus_id,$scaffold,$prot_id) = split(/\|/,$gene_header); $gene = $prot_id; #??? - }elsif ($species eq "P_persica") { + }elsif ($species eq "Populus_trichocarpa") { + my ($name,$gene_id,$scaffold,$prot_id) = split(/\|/,$gene_header); + $gene = $gene_id; #??? + }elsif ($species eq "Prunus_persica") { my ($gene_id,$temp) = split(/\s/,$gene_header); $gene = $gene_id; - }elsif ($species eq "Sacc_cerevisiae") { - my ($gene_id,$temp) = split(/\s/,$gene_header); + }elsif ($species eq "Rattus_norvegicus") { + my ($gene_id,$transcript,$chrom) = split(/\|/, $gene_header); $gene = $gene_id; - }elsif ($species eq "Schizo_pombe") { - my ($gene_id,$temp) = split(/\s/,$gene_header); + }elsif ($species eq "Rhizopus_oryzae") { + my ($gene_id,$func) = split(/\s\|\s/, $gene_header); + $gene = $gene_id; + }elsif ($species eq "Ricinus_communis") { + my ($name,$transcript,$gene_id,$temp) = split(/\|/, $gene_header); + $gene = $transcript; + }elsif ($species eq "Saccharomyces_cerevisiae") { + my ($unknown,$gene_name,$gene_id,$temp) = split(/\s/,$gene_header); + $gene_id =~ s/SGDID\://; + $gene = $gene_id; + }elsif ($species eq "Schizosaccharomyces_pombe") { + my ($gene_id,$gene_name,$unknown,$func,$name,$chrom,$temp) = split(/\s/,$gene_header); $gene = $gene_id; - }elsif ($species eq "Selaginella") { + }elsif ($species eq "Selaginella_moellendorffii") { my ($name,$locus_id,$chrom_id,$prot_id) = split(/\|/,$gene_header); $gene = $prot_id; #??? - }elsif ($species eq "Sorghum") { - my ($name,$locus_id,$scaff_id,$prot_id) = split(/\|/,$gene_header); - $gene = $prot_id; #??? - }elsif ($species eq "Synechocystis") { - my ($gene_id,$type,$temp) = split(" ",$gene_header); + }elsif ($species eq "Sorghum_bicolor") { + my ($name,$gene_id,$chrom,$prot_id) = split(/\|/,$gene_header); + $gene = $gene_id; #??? + }elsif ($species eq "Synechocystis_pcc6803") { + my ($gene_id,$temp) = split(" ",$gene_header); $gene = $gene_id; #??? - }elsif ($species eq "Vitis") { + }elsif ($species eq "Trichodesmium_erythraeum") { + my ($temp,$unknown,$source,$gene_id,$func) = split(/\|/, $gene_header); + $gene = $gene_id; + }elsif ($species eq "Vitis_vinifera") { my ($name,$gene_id,$chrom_id,$id) = split(/\|/,$gene_header); $gene = $gene_id; #??? + }elsif ($species eq "Zea_mays") { + my ($name,$gene_id,$unknown,$transcript) = split(/\|/, $gene_header); + $gene = $transcript }else { die "Error: Gene id can not be found for species $species!"; } -- 2.34.1