use strict;
use warnings;
-if($#ARGV != 1) {
- print "usage: find_ortho_inpara.pl input_gene_list output_file_prefix\n";
+if($#ARGV != 3) {
+ print "usage: find_ortho_inpara.pl species_1 species_2 input_gene_list output_file\n";
exit;
}
my $dbh = DbiFloret::dbconnect;
-my @species_array = ("Ath", "Oryza_sativa", "Sorghum", "Maize", "Glycine", "Brachy", "Poplar");
+my @species_array = ($ARGV[0], $ARGV[1]);
+#my @species_array = ("Arabidopsis_thaliana", "Zea_mays");
#my @species_array = ("Ath", "Maize");
my $spec_array_size = @species_array;
# similarity score to determine if we should keep the paralogs.
# 0.0 would keep all paralogs, 1.0 would restrict to strict orthologs
-my $sim_score = 0.0;
+my $sim_score = 0.1;
# read in list of genes from csv file given as argument
-my $in_file = $ARGV[0];
+my $in_file = $ARGV[2];
open (in_file, "$in_file");
chomp $in_gene;
$in_gene =~ s/\s//g;
if ($in_gene !~ /\.\d$/) {
- $in_gene .= ".1"; # add the suffix back so that the gene matches the db
+ #$in_gene .= ".1"; # add the suffix back so that the gene matches the db
}
push(@in_gene_array, $in_gene);
#print "$in_gene\n";
}
close(in_file);
+my %ortho_hash;
for (my $i = 1; $i<$spec_array_size; $i++) {
- my $out_file = $ARGV[1] . "_$species_array[$i].txt";
+ my $out_file = $ARGV[3]; # . "_$species_array[3].txt";
open(out_file, ">$out_file");
my $table = $species_array[0] . "_" . $species_array[$i];
- print out_file "gene\tortho_gene\tscore\n";
+ print out_file "id\tgene\tortho_gene\tscore\n";
+
+ my $safe_table = $dbh->quote_identifier($table);
# set up the db query statement
- my $sth_get_id = $dbh->prepare("select id from $table where gene = ?");
+ my $sth_get_id = $dbh->prepare("select id from $safe_table where gene like ?");
# set up db query statement to get the orthologs using the cluster id
- my $sth_get_ortho = $dbh->prepare("select gene, score from $table where id = ? and species = '$species_array[$i]' and score >= '$sim_score'");
+ my $sth_get_ortho = $dbh->prepare("select gene, score from $safe_table where id = ? and species = '$species_array[1]' and score >= '$sim_score'");
foreach my $gene (@in_gene_array) {
}
while (my $id = $sth_get_id->fetchrow_array()) {
+ #print "$id\n";
my $rv2 = $sth_get_ortho->execute($id);
if (!$rv2) {
next;
while (my @line = $sth_get_ortho->fetchrow_array()) {
my ($ortho,$score) = @line;
- print out_file "$gene\t$ortho\t$score\n";
+ print out_file "$id\t$gene\t$ortho\t$score\n";
+ if(defined($ortho_hash{$gene})) {
+ $ortho_hash{$gene} = "$ortho_hash{$gene}\t$ortho";
+ }else{
+ $ortho_hash{$gene} = $ortho;
+ }
}
}
}
- close(out_file);
}
+#foreach my $key (keys %ortho_hash) {
+# print out_file "$key\t$ortho_hash{$key}\n";
+#}
+close(out_file);