use strict;
use warnings;
+if($#ARGV != 1) {
+ print "usage: find_ortho_inpara.pl input_gene_list output_file_prefix\n";
+ exit;
+}
+
use lib "$ENV{HOME}/scripts/jaiswallab/interactome_scripts";
use DbiFloret;
my $dbh = DbiFloret::dbconnect;
-#my @species_array = ("Ath", "Oryza_sativa", "Sorghum", "Maize", "Glycine");
-my @species_array = ("Ath", "Maize");
+my @species_array = ("Ath", "Oryza_sativa", "Sorghum", "Maize", "Glycine", "Brachy", "Poplar");
+#my @species_array = ("Ath", "Maize");
my $spec_array_size = @species_array;
+# similarity score to determine if we should keep the paralogs.
+# 0.0 would keep all paralogs, 1.0 would restrict to strict orthologs
+my $sim_score = 0.0;
+
# read in list of genes from csv file given as argument
my $in_file = $ARGV[0];
my $in_gene = $_;
chomp $in_gene;
$in_gene =~ s/\s//g;
- $in_gene .= ".1"; # add the suffix back so that the gene matches the db
+ if ($in_gene !~ /\.\d$/) {
+ $in_gene .= ".1"; # add the suffix back so that the gene matches the db
+ }
push(@in_gene_array, $in_gene);
#print "$in_gene\n";
}
close(in_file);
-open(out_file, ">Ath_ortho.txt");
for (my $i = 1; $i<$spec_array_size; $i++) {
+ my $out_file = $ARGV[1] . "_$species_array[$i].txt";
+ open(out_file, ">$out_file");
+
my $table = $species_array[0] . "_" . $species_array[$i];
- print out_file "$species_array[$i]\t$table\n";
+ print out_file "gene\tortho_gene\tscore\n";
# set up the db query statement
my $sth_get_id = $dbh->prepare("select id from $table where gene = ?");
# set up db query statement to get the orthologs using the cluster id
- my $sth_get_ortho = $dbh->prepare("select gene, score from $table where id = ? and species = '$species_array[$i]' and score >= '0.0'");
+ my $sth_get_ortho = $dbh->prepare("select gene, score from $table where id = ? and species = '$species_array[$i]' and score >= '$sim_score'");
foreach my $gene (@in_gene_array) {
}
}
}
+ close(out_file);
}
-close(out_file);
+
#!/usr/bin/perl
-#####################################################################
-# Written by Justin Elser 4/14/10 #
-# #
-# This program takes an input file with a list of genes and #
-# finds the orthologs (and paralogs if the score in sth_get_ortho #
-# is changed). #
-# #
-#####################################################################
+###########################################################################
+# Written by Justin Elser 4/14/10 #
+# #
+# This program takes an input file with a list of genes and #
+# finds the orthologs (and paralogs if the score in sth_get_ortho #
+# is changed) from the supercluster table #
+# #
+###########################################################################
use strict;
use warnings;
+if($#ARGV != 1) {
+ print "usage: find_ortho_super.pl input_gene_list output_file_prefix\n";
+ exit;
+}
+
use lib "$ENV{HOME}/scripts/jaiswallab/interactome_scripts";
use DbiFloret;
my $dbh = DbiFloret::dbconnect;
-#my @species_array = ("Ath", "Oryza_sativa", "Sorghum", "Maize", "Glycine");
-my @species_array = ("Ath", "Maize");
+my @species_array = ("Ath", "Oryza_sativa", "Sorghum", "Maize", "Glycine", "Brachy");
+#my @species_array = ("Ath", "Maize");
my $spec_array_size = @species_array;
# read in list of genes from csv file given as argument
my $in_gene = $_;
chomp $in_gene;
$in_gene =~ s/\s//g;
- $in_gene .= ".1"; # add the suffix back so that the gene matches the db
+ if ($in_gene !~ /\.\d$/) {
+ $in_gene .= ".1"; # add the suffix back so that the gene matches the db
+ }
push(@in_gene_array, $in_gene);
#print "$in_gene\n";
}
close(in_file);
-open(out_file, ">Ath_ortho_super.txt");
-
for (my $i = 1; $i<$spec_array_size; $i++) {
+ my $out_file = $ARGV[1] . "_$species_array[$i].txt";
+ open(out_file, ">$out_file");
+
my $table = "super_clust";
#print out_file "$species_array[$i]\t$table\n";
# set up db query statement to get the orthologs using the cluster id
my $sth_get_ortho = $dbh->prepare("select gene from $table where super_id = ? and species = '$species_array[$i]'");
#my $sth_get_ortho = $dbh->prepare("select gene,species from $table where super_id = ?");
-
- # setup query to make sure there is an ortholog gene in the cluster
- my $sth_check_ortho = $dbh->prepare("select species from $table where super_id = ? and species = '$species_array[0]'");
-
+
foreach my $gene (@in_gene_array) {
my $rv1 = $sth_get_id->execute($gene);
}
while (my $id = $sth_get_id->fetchrow_array()) {
- #my $rv2 = $sth_check_ortho->execute($id);
- #if (!$rv2) {
- # next;
- #}
- #my $check = $sth_check_ortho->fetchrow_array();
- #print "$id\t$check\n";
- #if ($check ne "$species_array[0]") {
- # print "no match\t\t$id\t$check\n";
- # next;
- #}
-
- my $rv3 = $sth_get_ortho->execute($id);
- if (!$rv3) {
+
+ my $rv2 = $sth_get_ortho->execute($id);
+ if (!$rv2) {
next;
}
}
}
}
+ close(out_file);
}
-close(out_file);