From d6bc0a0131e3897cef2ab457722c127e1e369c87 Mon Sep 17 00:00:00 2001
From: elserj <elserj@localhost>
Date: Fri, 3 Sep 2010 00:26:16 +0000
Subject: [PATCH] Fixed scripts for Mamathas needs, and added better use of
 vars

svn path=/; revision=25
---
 interactome_scripts/find_ortho_inpara.pl | 28 +++++++++---
 interactome_scripts/find_ortho_super.pl  | 57 +++++++++++-------------
 2 files changed, 47 insertions(+), 38 deletions(-)

diff --git a/interactome_scripts/find_ortho_inpara.pl b/interactome_scripts/find_ortho_inpara.pl
index 08fca80..a7876e4 100755
--- a/interactome_scripts/find_ortho_inpara.pl
+++ b/interactome_scripts/find_ortho_inpara.pl
@@ -12,16 +12,25 @@
 use strict;
 use warnings;
 
+if($#ARGV != 1) {
+	print "usage: find_ortho_inpara.pl input_gene_list output_file_prefix\n";
+	exit;
+}
+
 use lib "$ENV{HOME}/scripts/jaiswallab/interactome_scripts";
 
 use DbiFloret;
 
 my $dbh = DbiFloret::dbconnect;
 
-#my @species_array = ("Ath", "Oryza_sativa", "Sorghum", "Maize", "Glycine");
-my @species_array = ("Ath", "Maize");
+my @species_array = ("Ath", "Oryza_sativa", "Sorghum", "Maize", "Glycine", "Brachy", "Poplar");
+#my @species_array = ("Ath", "Maize");
 my $spec_array_size = @species_array;
 
+# similarity score to determine if we should keep the paralogs.
+#  0.0 would keep all paralogs, 1.0 would restrict to strict orthologs
+my $sim_score = 0.0;
+
 # read in list of genes from csv file given as argument
 my $in_file = $ARGV[0];
 
@@ -33,25 +42,29 @@ while(<in_file>) {
 	my $in_gene = $_;
 	chomp $in_gene;	
 	$in_gene =~ s/\s//g;
-	$in_gene .= ".1"; # add the suffix back so that the gene matches the db
+	if ($in_gene !~ /\.\d$/) {
+		$in_gene .= ".1"; # add the suffix back so that the gene matches the db
+	}
 	push(@in_gene_array, $in_gene);
 	#print "$in_gene\n";
 	
 }
 close(in_file);
 
-open(out_file, ">Ath_ortho.txt");
 
 for (my $i = 1; $i<$spec_array_size; $i++) {
 	
+	my $out_file = $ARGV[1] . "_$species_array[$i].txt";
+	open(out_file, ">$out_file");
+	
 	my $table = $species_array[0] . "_" . $species_array[$i];
-	print out_file "$species_array[$i]\t$table\n";
+	print out_file "gene\tortho_gene\tscore\n";
 	
 	# set up the db query statement
 	my $sth_get_id = $dbh->prepare("select id from $table where gene = ?");
 	
 	# set up db query statement to get the orthologs using the cluster id
-	my $sth_get_ortho = $dbh->prepare("select gene, score from $table where id = ? and species = '$species_array[$i]' and score >= '0.0'");
+	my $sth_get_ortho = $dbh->prepare("select gene, score from $table where id = ? and species = '$species_array[$i]' and score >= '$sim_score'");
 	
 	foreach my $gene (@in_gene_array) {
 		
@@ -72,6 +85,7 @@ for (my $i = 1; $i<$spec_array_size; $i++) {
 			}
 		}
 	}
+	close(out_file);
 }
 
-close(out_file);
+
diff --git a/interactome_scripts/find_ortho_super.pl b/interactome_scripts/find_ortho_super.pl
index ca7af46..d4a3a51 100755
--- a/interactome_scripts/find_ortho_super.pl
+++ b/interactome_scripts/find_ortho_super.pl
@@ -1,25 +1,30 @@
 #!/usr/bin/perl
 
-#####################################################################
-#    Written by Justin Elser 4/14/10                                #
-#                                                                   #
-#    This program takes an input file with a list of genes and      #
-#      finds the orthologs (and paralogs if the score in sth_get_ortho #
-#      is changed).                                                 #
-#                                                                   #
-#####################################################################
+###########################################################################
+#    Written by Justin Elser 4/14/10                                      #
+#                                                                         #
+#    This program takes an input file with a list of genes and            #
+#      finds the orthologs (and paralogs if the score in sth_get_ortho    #
+#      is changed) from the supercluster table                            #
+#                                                                         #
+###########################################################################
 
 use strict;
 use warnings;
 
+if($#ARGV != 1) {
+	print "usage: find_ortho_super.pl input_gene_list output_file_prefix\n";
+	exit;
+}
+
 use lib "$ENV{HOME}/scripts/jaiswallab/interactome_scripts";
 
 use DbiFloret;
 
 my $dbh = DbiFloret::dbconnect;
 
-#my @species_array = ("Ath", "Oryza_sativa", "Sorghum", "Maize", "Glycine");
-my @species_array = ("Ath", "Maize");
+my @species_array = ("Ath", "Oryza_sativa", "Sorghum", "Maize", "Glycine", "Brachy");
+#my @species_array = ("Ath", "Maize");
 my $spec_array_size = @species_array;
 
 # read in list of genes from csv file given as argument
@@ -33,17 +38,20 @@ while(<in_file>) {
 	my $in_gene = $_;
 	chomp $in_gene;	
 	$in_gene =~ s/\s//g;
-	$in_gene .= ".1"; # add the suffix back so that the gene matches the db
+	if ($in_gene !~ /\.\d$/) {
+		$in_gene .= ".1"; # add the suffix back so that the gene matches the db
+	}
 	push(@in_gene_array, $in_gene);
 	#print "$in_gene\n";
 	
 }
 close(in_file);
 
-open(out_file, ">Ath_ortho_super.txt");
-
 for (my $i = 1; $i<$spec_array_size; $i++) {
 	
+	my $out_file = $ARGV[1] . "_$species_array[$i].txt";
+	open(out_file, ">$out_file");
+	
 	my $table = "super_clust";
 	#print out_file "$species_array[$i]\t$table\n";
 	
@@ -53,10 +61,7 @@ for (my $i = 1; $i<$spec_array_size; $i++) {
 	# set up db query statement to get the orthologs using the cluster id
 	my $sth_get_ortho = $dbh->prepare("select gene from $table where super_id = ? and species = '$species_array[$i]'");
 	#my $sth_get_ortho = $dbh->prepare("select gene,species from $table where super_id = ?");
-	
-	# setup query to make sure there is an ortholog gene in the cluster
-	my $sth_check_ortho = $dbh->prepare("select species from $table where super_id = ? and species = '$species_array[0]'");
-	
+
 	foreach my $gene (@in_gene_array) {
 		
 		my $rv1 = $sth_get_id->execute($gene);
@@ -65,19 +70,9 @@ for (my $i = 1; $i<$spec_array_size; $i++) {
 		}
 		
 		while (my $id = $sth_get_id->fetchrow_array()) {
-			#my $rv2 = $sth_check_ortho->execute($id);
-			#if (!$rv2) {
-			#	next;
-			#}
-			#my $check = $sth_check_ortho->fetchrow_array();
-			#print "$id\t$check\n";
-			#if ($check ne "$species_array[0]") {
-			#	print "no match\t\t$id\t$check\n";
-			#	next;
-			#}
-			
-			my $rv3 = $sth_get_ortho->execute($id);
-			if (!$rv3) {
+
+			my $rv2 = $sth_get_ortho->execute($id);
+			if (!$rv2) {
 				next;
 			}
 			
@@ -86,6 +81,6 @@ for (my $i = 1; $i<$spec_array_size; $i++) {
 			}
 		}
 	}
+	close(out_file);
 }
 
-close(out_file);
-- 
2.34.1