Hello!

To see the file structure, click on "tree".

Note that updates take place every 10 minutes, commits may not be seen immediately.
Split find_ortho scripts to do it from the super_clust table instead of inparanoid
authorelserj <elserj@localhost>
Wed, 14 Apr 2010 21:19:14 +0000 (21:19 +0000)
committerelserj <elserj@localhost>
Wed, 14 Apr 2010 21:19:14 +0000 (21:19 +0000)
svn path=/; revision=16

interactome_scripts/find_ortho_inpara.pl
interactome_scripts/find_ortho_super.pl [new file with mode: 0755]

index 51d9504f93687a8aa70bfd896110e88c617d0057..08fca807156b5928ec2039a56248db0de83a53ba 100755 (executable)
@@ -32,13 +32,15 @@ my @in_gene_array;
 while(<in_file>) {
        my $in_gene = $_;
        chomp $in_gene; 
+       $in_gene =~ s/\s//g;
        $in_gene .= ".1"; # add the suffix back so that the gene matches the db
        push(@in_gene_array, $in_gene);
+       #print "$in_gene\n";
        
 }
 close(in_file);
 
-open(out_file, ">flower_ortho.txt");
+open(out_file, ">Ath_ortho.txt");
 
 for (my $i = 1; $i<$spec_array_size; $i++) {
        
@@ -49,7 +51,7 @@ for (my $i = 1; $i<$spec_array_size; $i++) {
        my $sth_get_id = $dbh->prepare("select id from $table where gene = ?");
        
        # set up db query statement to get the orthologs using the cluster id
-       my $sth_get_ortho = $dbh->prepare("select gene from $table where id = ? and species = '$species_array[$i]' and score = '1'");
+       my $sth_get_ortho = $dbh->prepare("select gene, score from $table where id = ? and species = '$species_array[$i]' and score >= '0.0'");
        
        foreach my $gene (@in_gene_array) {
                
@@ -64,8 +66,9 @@ for (my $i = 1; $i<$spec_array_size; $i++) {
                                next;
                        }
                        
-                       while (my $ortho = $sth_get_ortho->fetchrow_array()) {
-                               print out_file "$gene\t$ortho\n";
+                       while (my @line = $sth_get_ortho->fetchrow_array()) {
+                               my ($ortho,$score) = @line;
+                               print out_file "$gene\t$ortho\t$score\n";
                        }
                }
        }
diff --git a/interactome_scripts/find_ortho_super.pl b/interactome_scripts/find_ortho_super.pl
new file mode 100755 (executable)
index 0000000..45cbfa9
--- /dev/null
@@ -0,0 +1,76 @@
+#!/usr/bin/perl
+
+#####################################################################
+#    Written by Justin Elser 4/14/10                                #
+#                                                                   #
+#    This program takes an input file with a list of genes and      #
+#      finds the orthologs (and paralogs if the score in sth_get_ortho #
+#      is changed).                                                 #
+#                                                                   #
+#####################################################################
+
+use strict;
+use warnings;
+
+use lib "$ENV{HOME}/scripts/jaiswallab/interactome_scripts";
+
+use DbiFloret;
+
+my $dbh = DbiFloret::dbconnect;
+
+#my @species_array = ("Ath", "Oryza_sativa", "Sorghum", "Maize", "Glycine");
+my @species_array = ("Ath", "Maize");
+my $spec_array_size = @species_array;
+
+# read in list of genes from csv file given as argument
+my $in_file = $ARGV[0];
+
+open (in_file, "$in_file");
+
+my @in_gene_array;
+
+while(<in_file>) {
+       my $in_gene = $_;
+       chomp $in_gene; 
+       $in_gene =~ s/\s//g;
+       $in_gene .= ".1"; # add the suffix back so that the gene matches the db
+       push(@in_gene_array, $in_gene);
+       #print "$in_gene\n";
+       
+}
+close(in_file);
+
+open(out_file, ">Ath_ortho_super.txt");
+
+for (my $i = 1; $i<$spec_array_size; $i++) {
+       
+       my $table = "super_clust";
+       #print out_file "$species_array[$i]\t$table\n";
+       
+       # set up the db query statement
+       my $sth_get_id = $dbh->prepare("select super_id from $table where gene = ?");
+       
+       # set up db query statement to get the orthologs using the cluster id
+       my $sth_get_ortho = $dbh->prepare("select gene from $table where super_id = ? and species = '$species_array[$i]'");
+       
+       foreach my $gene (@in_gene_array) {
+               
+               my $rv1 = $sth_get_id->execute($gene);
+               if (!$rv1) {
+                       next;
+               }
+               
+               while (my $id = $sth_get_id->fetchrow_array()) {
+                       my $rv2 = $sth_get_ortho->execute($id);
+                       if (!$rv2) {
+                               next;
+                       }
+                       
+                       while (my $ortho = $sth_get_ortho->fetchrow_array()) {
+                               print out_file "$gene\t$ortho\n";
+                       }
+               }
+       }
+}
+
+close(out_file);