Hello!

To see the file structure, click on "tree".

Note that updates take place every 10 minutes, commits may not be seen immediately.
added the script that has the find_species and find_gene subroutines
authorelserj <elserj@localhost>
Mon, 5 Apr 2010 20:09:48 +0000 (20:09 +0000)
committerelserj <elserj@localhost>
Mon, 5 Apr 2010 20:09:48 +0000 (20:09 +0000)
svn path=/; revision=13

interactome_scripts/find_species.pl [new file with mode: 0755]

diff --git a/interactome_scripts/find_species.pl b/interactome_scripts/find_species.pl
new file mode 100755 (executable)
index 0000000..17c7a88
--- /dev/null
@@ -0,0 +1,136 @@
+###########################################################
+#  Common subroutines to find the species from the        #
+#    string (filename) and the gene from the gene header  #
+#                                                         #
+###########################################################
+
+sub find_species {
+       my $temp = $_[0];
+       if ($temp =~ /Arabidopsis/) {
+               $temp = "Ath";
+       }elsif ($temp =~ /brachypodium/) {
+               $temp = "Brachy";
+       }elsif ($temp =~ /elegans/) {
+               $temp = "C_elegans";
+       }elsif ($temp =~ /Chlamy/) {
+               $temp = "Chlamy";
+       }elsif ($temp =~ /Danio/) {
+               $temp = "Danio";
+       }elsif ($temp =~ /E\_coli/) {
+               $temp = "E_coli";
+       }elsif ($temp =~ /Fragaria/) {
+               $temp = "Fragaria";
+       }elsif ($temp =~ /Glycine/) {
+               $temp = "Glycine";
+       }elsif ($temp =~ /Homo\_sapiens/) {
+               $temp = "Human";
+       }elsif ($temp =~ /Maize/) {
+               $temp = "Maize";
+       }elsif ($temp =~ /musculus/) {
+               $temp = "Mouse";
+       }elsif ($temp =~ /neurospora/) {
+               $temp = "Neurospora";
+       }elsif ($temp =~ /Oryza\_sativa/) {
+               $temp = "Oryza_sativa";
+       }elsif ($temp =~ /Physcomit/) {
+               $temp = "Physcomitreall";
+       }elsif ($temp =~ /Populus/) {
+               $temp = "Poplar";
+       }elsif ($temp =~ /Ppersica/) {
+               $temp = "P_persica";
+       }elsif ($temp =~ /cerevisiae/) {
+               $temp = "Sacc_cerevisiae";
+       }elsif ($temp =~ /pombe/) {
+               $temp = "Sacc_pombe";
+       }elsif ($temp =~ /Selaginella/) {
+               $temp = "Selaginella";
+       }elsif ($temp =~ /Sorghum/) {
+               $temp = "Sorghum";
+       }elsif ($temp =~ /Synechosystis/) {
+               $temp = "Synechosystis";
+       }elsif ($temp =~ /Vitis\_vinifera/) {
+               $temp = "Vitis";
+       }else {
+               die "Error: Species can not be found from file name!";
+       }
+       return $temp;
+}
+
+sub find_gene {
+       my $gene_header = $_[0];
+       my $species = $_[1];
+       my $gene;
+       if ($species eq "Ath") {
+               my ($name,$gene_id,$chrom,$isomer) = split(/\|/, $gene_header);
+               $gene = $isomer;
+       }elsif ($species eq "Brachy") {
+               #uncomment for newer version of fasta file
+               #my ($gene_id,$chrom,$isomer,$name) = split(/\|/, $gene_header);
+               #$gene = $isomer;
+               #uncomment for old version of fasta file
+               $gene = $gene_header;
+       }elsif ($species eq "C_elegans") {
+               my ($gene_id,$temp) = split(/\|/, $gene_header);
+               $gene = $gene_id; #???
+       }elsif ($species eq "Chlamy") {
+               my ($name,$locus_id,$scaff_id,$temp) = split(/\|/,$gene_header);
+               $gene = $locus_id; #???
+       }elsif ($species eq "Danio") {
+               my ($gene_id,$temp) = split(/\s/,$gene_header);
+               $gene = $gene_id;
+       }elsif ($species eq "E_coli") {
+               $gene = $gene_header; #???
+       }elsif ($species eq "Fragaria") {
+               my ($gene_id, $mrna_id, $method, $length) = split(/\|/, $gene_header);
+               $gene = $gene_id;
+       }elsif ($species eq "Glycine") {
+               my ($name,$locus_id,$scaff_id,$isomer) = split(/\|/,$gene_header);
+               $gene = $isomer;
+       }elsif ($species eq "Human") {
+               my ($gene_id,$temp) = split(/\s/,$gene_header);
+               $gene = $gene_id;
+       }elsif ($species eq "Maize") {
+               $gene = $gene_header;
+       }elsif ($species eq "Mouse") {
+               my ($gene_id,$temp) = split(/\s/,$gene_header);
+               $gene = $gene_id;
+       }elsif ($species eq "Neurospora") {
+               my ($gene_id,$temp) = split(/\s/,$gene_header);
+               $gene = $gene_id;
+       }elsif ($species eq "Oryza_sativa") {
+               my ($isomer,$temp,$type) = split(/\|/,$gene_header);
+               $gene = $isomer;
+       }elsif ($species eq "Physcomitreall") {
+               my ($name,$locus_id,$chrom_id,$prot_id) = split(/\|/,$gene_header);
+               $gene = $prot_id; #???
+       }elsif ($species eq "Poplar") {
+               my ($name,$locus_id,$chrom_id,$prot_id) = split(/\|/,$gene_header);
+               $gene = $prot_id; #???
+       }elsif ($species eq "P_persica") {
+               my ($gene_id,$temp) = split(/\s/,$gene_header);
+               $gene = $gene_id;
+       }elsif ($species eq "Sacc_cerevisiae") {
+               my ($gene_id,$temp) = split(/\s/,$gene_header);
+               $gene = $gene_id;
+       }elsif ($species eq "Sacc_pombe") {
+               my ($gene_id,$temp) = split(/\s/,$gene_header);
+               $gene = $gene_id;
+       }elsif ($species eq "Selaginella") {
+               my ($name,$locus_id,$chrom_id,$prot_id) = split(/\|/,$gene_header);
+               $gene = $prot_id; #???
+       }elsif ($species eq "Sorghum") {
+               my ($name,$locus_id,$scaff_id,$prot_id) = split(/\|/,$gene_header);
+               $gene = $prot_id; #???
+       }elsif ($species eq "Synechosystis") {
+               my ($gene_id,$type,$temp) = split(" ",$gene_header);
+               $gene = $gene_id; #???
+       }elsif ($species eq "Vitis") {
+               my ($name,$gene_id,$chrom_id,$id) = split(/\|/,$gene_header);
+               $gene = $gene_id; #???
+       }else {
+               die "Error: Gene id can not be found!";
+       }
+       return $gene;
+}
+
+1;