From cf9f6d5efd49b02b9c4441f1bc693bc4e4f86cf0 Mon Sep 17 00:00:00 2001 From: elserj Date: Fri, 3 Sep 2010 00:02:12 +0000 Subject: [PATCH] Fix to skip all isomers, even if higher than .29 svn path=/; revision=22 --- .../inparanoid_output_parse.pl | 133 +++--------------- 1 file changed, 16 insertions(+), 117 deletions(-) diff --git a/interactome_scripts/inparanoid_output_parse.pl b/interactome_scripts/inparanoid_output_parse.pl index 8f64b9a..8c035b5 100755 --- a/interactome_scripts/inparanoid_output_parse.pl +++ b/interactome_scripts/inparanoid_output_parse.pl @@ -9,6 +9,8 @@ # Seems to work fine # # Version 1.01 - Nov 4 '09 # # Added support for strawberry # +# Version 1.1 - April 2010 # +# Changed to use external subs to find species # # # ############################################################### @@ -19,6 +21,8 @@ use warnings; use DBI; use Term::Screen::ReadLine; +require "$ENV{HOME}/scripts/jaiswallab/interactome_scripts/find_species.pl"; + # open the file to be parsed, assume this is all sqltable* files in directory my @files = glob("sqltable*"); @@ -59,6 +63,8 @@ foreach my $input_file (@files) { my $clust_table = "clusters"; my %id_hash; + + print "Working on $species_table\n"; @@ -107,6 +113,16 @@ foreach my $input_file (@files) { # skip isomers that are not .1 next if ($gene =~ /\.[2-9]$/); + next if ($gene =~ /\.1[0-9]$/); + next if ($gene =~ /\.2[0-9]$/); + next if ($gene =~ /\.3[0-9]$/); + next if ($gene =~ /\.4[0-9]$/); + next if ($gene =~ /\.5[0-9]$/); + next if ($gene =~ /\.6[0-9]$/); + next if ($gene =~ /\.7[0-9]$/); + next if ($gene =~ /\.8[0-9]$/); + next if ($gene =~ /\.9[0-9]$/); + # assume there are never more than 29 isomers if(!defined($id_hash{$clust_id})) { if ($clust_id ne $clust_id_prev) { @@ -135,120 +151,3 @@ foreach my $input_file (@files) { $dbh->disconnect; -sub find_species { - my $temp = $_[0]; - if ($temp =~ /Arabidopsis/) { - $temp = "Ath"; - }elsif ($temp =~ /brachypodium/) { - $temp = "Brachy"; - }elsif ($temp =~ /elegans/) { - $temp = "C_elegans"; - }elsif ($temp =~ /Chlamy/) { - $temp = "Chlamy"; - }elsif ($temp =~ /Danio/) { - $temp = "Danio"; - }elsif ($temp =~ /E\_coli/) { - $temp = "E_coli"; - }elsif ($temp =~ /Fragaria/) { - $temp = "Fragaria"; - }elsif ($temp =~ /Glycine/) { - $temp = "Soy"; - }elsif ($temp =~ /Homo\_sapiens/) { - $temp = "Human"; - }elsif ($temp =~ /Maize/) { - $temp = "Maize"; - }elsif ($temp =~ /musculus/) { - $temp = "Mouse"; - }elsif ($temp =~ /neurospora/) { - $temp = "Neurospora"; - }elsif ($temp =~ /Oryza\_sativa/) { - $temp = "Oryza_sativa"; - }elsif ($temp =~ /Physcomit/) { - $temp = "Physcomitreall"; - }elsif ($temp =~ /Populus/) { - $temp = "Poplar"; - }elsif ($temp =~ /cerevisiae/) { - $temp = "Sacc_cerevisiae"; - }elsif ($temp =~ /pombe/) { - $temp = "Sacc_pombe"; - }elsif ($temp =~ /Selaginella/) { - $temp = "Selaginella"; - }elsif ($temp =~ /Sorghum/) { - $temp = "Sorghum"; - }elsif ($temp =~ /Synechosystis/) { - $temp = "Synechosystis"; - }elsif ($temp =~ /Vitis\_vinifera/) { - $temp = "Grape"; - }else { - die "Error: Species can not be found from file name!"; - } - return $temp; -} - -sub find_gene { - my $gene_header = $_[0]; - my $species = $_[1]; - my $gene; - if ($species eq "Ath") { - my ($name,$gene_id,$chrom,$isomer) = split(/\|/, $gene_header); - $gene = $isomer; - }elsif ($species eq "Brachy") { - $gene = $gene_header; - }elsif ($species eq "C_elegans") { - my ($gene_id,$temp) = split(/\|/, $gene_header); - $gene = $gene_id; #??? - }elsif ($species eq "Chlamy") { - my ($name,$locus_id,$scaff_id,$temp) = split(/\|/,$gene_header); - $gene = $locus_id; #??? - }elsif ($species eq "Danio") { - $gene = $gene_header; - }elsif ($species eq "E_coli") { - $gene = $gene_header; #??? - }elsif ($species eq "Fragaria") { - my ($gene_id, $mrna_id, $method, $length) = split(/\|/, $gene_header); - $gene = $gene_id; - }elsif ($species eq "Soy") { - my ($name,$locus_id,$scaff_id,$isomer) = split(/\|/,$gene_header); - $gene = $isomer; - }elsif ($species eq "Human") { - $gene = $gene_header; - }elsif ($species eq "Maize") { - $gene = $gene_header; - }elsif ($species eq "Mouse") { - $gene = $gene_header; - }elsif ($species eq "Neurospora") { - $gene = $gene_header; - }elsif ($species eq "Oryza_sativa") { - my ($isomer,$temp,$type) = split(/\|/,$gene_header); - $gene = $isomer; - }elsif ($species eq "Physcomitreall") { - my ($name,$locus_id,$chrom_id,$prot_id) = split(/\|/,$gene_header); - $gene = $prot_id; #??? - }elsif ($species eq "Poplar") { - my ($name,$locus_id,$chrom_id,$prot_id) = split(/\|/,$gene_header); - $gene = $prot_id; #??? - }elsif ($species eq "Sacc_cerevisiae") { - $gene = $gene_header; - }elsif ($species eq "Sacc_pombe") { - $gene = $gene_header; - }elsif ($species eq "Selaginella") { - my ($name,$locus_id,$chrom_id,$prot_id) = split(/\|/,$gene_header); - $gene = $prot_id; #??? - }elsif ($species eq "Sorghum") { - my ($name,$locus_id,$scaff_id,$prot_id) = split(/\|/,$gene_header); - $gene = $prot_id; #??? - }elsif ($species eq "Synechosystis") { - my ($gene_id,$type,$temp) = split(" ",$gene_header); - $gene = $gene_id; #??? - }elsif ($species eq "Grape") { - my ($name,$gene_id,$chrom_id,$id) = split(/\|/,$gene_header); - $gene = $gene_id; #??? - }else { - die "Error: Gene id can not be found!"; - } - return $gene; -} - - - - -- 2.34.1