Hello!

To see the file structure, click on "tree".

Note that updates take place every 10 minutes, commits may not be seen immediately.
Script that will generate the tabulated cluster gene counts from InParanoid. To...
authorelserj <elserj@localhost>
Wed, 23 Sep 2015 22:29:02 +0000 (22:29 +0000)
committerelserj <elserj@localhost>
Wed, 23 Sep 2015 22:29:02 +0000 (22:29 +0000)
svn path=/; revision=634

interactome_scripts/cafe_from_inparanoid.pl [new file with mode: 0755]

diff --git a/interactome_scripts/cafe_from_inparanoid.pl b/interactome_scripts/cafe_from_inparanoid.pl
new file mode 100755 (executable)
index 0000000..5c2ab45
--- /dev/null
@@ -0,0 +1,112 @@
+#!/usr/bin/perl
+
+use strict;
+use warnings;
+
+if(-e "$ENV{HOME}/scripts/jaiswallab/interactome_scripts/find_species.pl") {
+       require "$ENV{HOME}/scripts/jaiswallab/interactome_scripts/find_species.pl";
+}elsif(-e "$ENV{HOME}/jaiswallab_svn/interactome_scripts/find_species.pl") {
+       require "$ENV{HOME}/jaiswallab_svn/interactome_scripts/find_species.pl";
+}
+
+
+if($#ARGV != 1) {
+       print "usage: cafe_from_inparanoid.pl species_list output_file\n";
+       exit;
+}
+
+
+use lib "$ENV{HOME}/scripts/jaiswallab/interactome_scripts";
+
+use DbiFloret;
+
+my $dbh = DbiFloret::dbconnect;
+
+my $in_file = $ARGV[0];
+my $out_file = $ARGV[1];
+
+# Get list of species to check
+my @species_list;
+
+open(INFILE, $in_file);
+while(<INFILE>) {
+               my $line = $_;
+               chomp $line;
+               push (@species_list, $line);
+}
+close(INFILE);
+
+
+# create the species string for the db query
+my $species_string = "";
+for my $species (@species_list) {
+               $species_string = $species_string . "\'$species\',";
+}
+$species_string =~ s/\,$//;
+
+               
+
+# set up the mysql query
+# query will only return values where species is from the list supplied
+my $sth = $dbh->prepare("select super_id,species, count(gene) from super_clust group by super_id,species having species in ($species_string) order by super_id asc;");
+
+my $rv = $sth->execute();
+
+if (!$rv) {
+       next;
+}
+
+
+
+# create hash of hashes where outer hash is keyed by super_id and inner hash is keyed by species with the count for the value
+my %cluster_hash;
+
+while (my @line = $sth->fetchrow_array()) {
+       my ($super_id, $species, $count) = @line;
+       $cluster_hash{$super_id}{$species} = $count;
+}
+
+open(OUTFILE, ">$out_file");
+
+print OUTFILE "Description\tID";
+# Print species list to output file
+for my $species (@species_list) {
+               print OUTFILE "\t$species";
+}
+print OUTFILE "\n";
+
+# Loop through all super_ids
+for my $super_id (sort {$a <=> $b} keys %cluster_hash) {
+               my $mid_line = "";
+               # Loop through each species in list
+               for my $species (@species_list) {
+                               if(defined($cluster_hash{$super_id}{$species})) {
+                                               if($mid_line ne "") {
+                                                               $mid_line = "$mid_line\t$cluster_hash{$super_id}{$species}";
+                                               }else{
+                                                               $mid_line = "$cluster_hash{$super_id}{$species}";
+                                               }
+                               }else{
+                                               if($mid_line ne "") {
+                                                               $mid_line = "$mid_line\t0";
+                                               }else{
+                                                               $mid_line = "0";
+                                               }
+                               }
+               }
+               
+               # drop any lines (clusters) that only have one species in them
+               my @line_array = split("\t", $mid_line);
+               my @match_array;
+               foreach my $elem (@line_array) {
+                               if($elem != 0) {
+                                               push(@match_array, $elem);
+                               }
+               }
+               my $match_count = @match_array;
+               if ($match_count != 1) {
+                               print OUTFILE "$super_id\t$super_id\t$mid_line\n";
+               }
+}
+
+