From: elserj Date: Tue, 16 Dec 2014 23:33:24 +0000 (+0000) Subject: Generalize species input so that script doesn't have to be edited each time X-Git-Url: http://gitweb.planteome.org/?a=commitdiff_plain;h=8f438e289e5536b6cbdae42807156309e5dd9155;p=old-jaiswallab-svn%2F.git Generalize species input so that script doesn't have to be edited each time svn path=/; revision=592 --- diff --git a/interactome_scripts/find_ortho_inpara.pl b/interactome_scripts/find_ortho_inpara.pl index a7876e4..600cd27 100755 --- a/interactome_scripts/find_ortho_inpara.pl +++ b/interactome_scripts/find_ortho_inpara.pl @@ -12,8 +12,8 @@ use strict; use warnings; -if($#ARGV != 1) { - print "usage: find_ortho_inpara.pl input_gene_list output_file_prefix\n"; +if($#ARGV != 3) { + print "usage: find_ortho_inpara.pl species_1 species_2 input_gene_list output_file\n"; exit; } @@ -23,16 +23,17 @@ use DbiFloret; my $dbh = DbiFloret::dbconnect; -my @species_array = ("Ath", "Oryza_sativa", "Sorghum", "Maize", "Glycine", "Brachy", "Poplar"); +my @species_array = ($ARGV[0], $ARGV[1]); +#my @species_array = ("Arabidopsis_thaliana", "Zea_mays"); #my @species_array = ("Ath", "Maize"); my $spec_array_size = @species_array; # similarity score to determine if we should keep the paralogs. # 0.0 would keep all paralogs, 1.0 would restrict to strict orthologs -my $sim_score = 0.0; +my $sim_score = 0.1; # read in list of genes from csv file given as argument -my $in_file = $ARGV[0]; +my $in_file = $ARGV[2]; open (in_file, "$in_file"); @@ -43,7 +44,7 @@ while() { chomp $in_gene; $in_gene =~ s/\s//g; if ($in_gene !~ /\.\d$/) { - $in_gene .= ".1"; # add the suffix back so that the gene matches the db + #$in_gene .= ".1"; # add the suffix back so that the gene matches the db } push(@in_gene_array, $in_gene); #print "$in_gene\n"; @@ -51,20 +52,23 @@ while() { } close(in_file); +my %ortho_hash; for (my $i = 1; $i<$spec_array_size; $i++) { - my $out_file = $ARGV[1] . "_$species_array[$i].txt"; + my $out_file = $ARGV[3]; # . "_$species_array[3].txt"; open(out_file, ">$out_file"); my $table = $species_array[0] . "_" . $species_array[$i]; - print out_file "gene\tortho_gene\tscore\n"; + print out_file "id\tgene\tortho_gene\tscore\n"; + + my $safe_table = $dbh->quote_identifier($table); # set up the db query statement - my $sth_get_id = $dbh->prepare("select id from $table where gene = ?"); + my $sth_get_id = $dbh->prepare("select id from $safe_table where gene like ?"); # set up db query statement to get the orthologs using the cluster id - my $sth_get_ortho = $dbh->prepare("select gene, score from $table where id = ? and species = '$species_array[$i]' and score >= '$sim_score'"); + my $sth_get_ortho = $dbh->prepare("select gene, score from $safe_table where id = ? and species = '$species_array[1]' and score >= '$sim_score'"); foreach my $gene (@in_gene_array) { @@ -74,6 +78,7 @@ for (my $i = 1; $i<$spec_array_size; $i++) { } while (my $id = $sth_get_id->fetchrow_array()) { + #print "$id\n"; my $rv2 = $sth_get_ortho->execute($id); if (!$rv2) { next; @@ -81,11 +86,19 @@ for (my $i = 1; $i<$spec_array_size; $i++) { while (my @line = $sth_get_ortho->fetchrow_array()) { my ($ortho,$score) = @line; - print out_file "$gene\t$ortho\t$score\n"; + print out_file "$id\t$gene\t$ortho\t$score\n"; + if(defined($ortho_hash{$gene})) { + $ortho_hash{$gene} = "$ortho_hash{$gene}\t$ortho"; + }else{ + $ortho_hash{$gene} = $ortho; + } } } } - close(out_file); } +#foreach my $key (keys %ortho_hash) { +# print out_file "$key\t$ortho_hash{$key}\n"; +#} +close(out_file);