Script that will generate the tabulated cluster gene counts from InParanoid. To...

author elserj <elserj@localhost>

Wed, 23 Sep 2015 22:29:02 +0000 (22:29 +0000)

committer elserj <elserj@localhost>

Wed, 23 Sep 2015 22:29:02 +0000 (22:29 +0000)
author elserj <elserj@localhost>
Wed, 23 Sep 2015 22:29:02 +0000 (22:29 +0000)
committer elserj <elserj@localhost>
Wed, 23 Sep 2015 22:29:02 +0000 (22:29 +0000)
diff --git a/interactome_scripts/cafe_from_inparanoid.pl b/interactome_scripts/cafe_from_inparanoid.pl

new file mode 100755 (executable)

index 0000000..5c2ab45
--- /dev/null
+++ b/interactome_scripts/cafe_from_inparanoid.pl
@@ -0,0 +1,112 @@
+#!/usr/bin/perl
+
+use strict;
+use warnings;
+
+if(-e "$ENV{HOME}/scripts/jaiswallab/interactome_scripts/find_species.pl") {
+       require "$ENV{HOME}/scripts/jaiswallab/interactome_scripts/find_species.pl";
+}elsif(-e "$ENV{HOME}/jaiswallab_svn/interactome_scripts/find_species.pl") {
+       require "$ENV{HOME}/jaiswallab_svn/interactome_scripts/find_species.pl";
+}
+
+
+if($#ARGV != 1) {
+       print "usage: cafe_from_inparanoid.pl species_list output_file\n";
+       exit;
+}
+
+
+use lib "$ENV{HOME}/scripts/jaiswallab/interactome_scripts";
+
+use DbiFloret;
+
+my $dbh = DbiFloret::dbconnect;
+
+my $in_file = $ARGV[0];
+my $out_file = $ARGV[1];
+
+# Get list of species to check
+my @species_list;
+
+open(INFILE, $in_file);
+while(<INFILE>) {
+               my $line = $_;
+               chomp $line;
+               push (@species_list, $line);
+}
+close(INFILE);
+
+
+# create the species string for the db query
+my $species_string = "";
+for my $species (@species_list) {
+               $species_string = $species_string . "\'$species\',";
+}
+$species_string =~ s/\,$//;
+
+               
+
+# set up the mysql query
+# query will only return values where species is from the list supplied
+my $sth = $dbh->prepare("select super_id,species, count(gene) from super_clust group by super_id,species having species in ($species_string) order by super_id asc;");
+
+my $rv = $sth->execute();
+
+if (!$rv) {
+       next;
+}
+
+
+
+# create hash of hashes where outer hash is keyed by super_id and inner hash is keyed by species with the count for the value
+my %cluster_hash;
+
+while (my @line = $sth->fetchrow_array()) {
+       my ($super_id, $species, $count) = @line;
+       $cluster_hash{$super_id}{$species} = $count;
+}
+
+open(OUTFILE, ">$out_file");
+
+print OUTFILE "Description\tID";
+# Print species list to output file
+for my $species (@species_list) {
+               print OUTFILE "\t$species";
+}
+print OUTFILE "\n";
+
+# Loop through all super_ids
+for my $super_id (sort {$a <=> $b} keys %cluster_hash) {
+               my $mid_line = "";
+               # Loop through each species in list
+               for my $species (@species_list) {
+                               if(defined($cluster_hash{$super_id}{$species})) {
+                                               if($mid_line ne "") {
+                                                               $mid_line = "$mid_line\t$cluster_hash{$super_id}{$species}";
+                                               }else{
+                                                               $mid_line = "$cluster_hash{$super_id}{$species}";
+                                               }
+                               }else{
+                                               if($mid_line ne "") {
+                                                               $mid_line = "$mid_line\t0";
+                                               }else{
+                                                               $mid_line = "0";
+                                               }
+                               }
+               }
+               
+               # drop any lines (clusters) that only have one species in them
+               my @line_array = split("\t", $mid_line);
+               my @match_array;
+               foreach my $elem (@line_array) {
+                               if($elem != 0) {
+                                               push(@match_array, $elem);
+                               }
+               }
+               my $match_count = @match_array;
+               if ($match_count != 1) {
+                               print OUTFILE "$super_id\t$super_id\t$mid_line\n";
+               }
+}
+
+
author	elserj <elserj@localhost>
	Wed, 23 Sep 2015 22:29:02 +0000 (22:29 +0000)
committer	elserj <elserj@localhost>
	Wed, 23 Sep 2015 22:29:02 +0000 (22:29 +0000)