--- /dev/null
+#!/usr/bin/perl
+
+use strict;
+use warnings;
+
+if(-e "$ENV{HOME}/scripts/jaiswallab/interactome_scripts/find_species.pl") {
+ require "$ENV{HOME}/scripts/jaiswallab/interactome_scripts/find_species.pl";
+}elsif(-e "$ENV{HOME}/jaiswallab_svn/interactome_scripts/find_species.pl") {
+ require "$ENV{HOME}/jaiswallab_svn/interactome_scripts/find_species.pl";
+}
+
+
+if($#ARGV != 1) {
+ print "usage: cafe_from_inparanoid.pl species_list output_file\n";
+ exit;
+}
+
+
+use lib "$ENV{HOME}/scripts/jaiswallab/interactome_scripts";
+
+use DbiFloret;
+
+my $dbh = DbiFloret::dbconnect;
+
+my $in_file = $ARGV[0];
+my $out_file = $ARGV[1];
+
+# Get list of species to check
+my @species_list;
+
+open(INFILE, $in_file);
+while(<INFILE>) {
+ my $line = $_;
+ chomp $line;
+ push (@species_list, $line);
+}
+close(INFILE);
+
+
+# create the species string for the db query
+my $species_string = "";
+for my $species (@species_list) {
+ $species_string = $species_string . "\'$species\',";
+}
+$species_string =~ s/\,$//;
+
+
+
+# set up the mysql query
+# query will only return values where species is from the list supplied
+my $sth = $dbh->prepare("select super_id,species, count(gene) from super_clust group by super_id,species having species in ($species_string) order by super_id asc;");
+
+my $rv = $sth->execute();
+
+if (!$rv) {
+ next;
+}
+
+
+
+# create hash of hashes where outer hash is keyed by super_id and inner hash is keyed by species with the count for the value
+my %cluster_hash;
+
+while (my @line = $sth->fetchrow_array()) {
+ my ($super_id, $species, $count) = @line;
+ $cluster_hash{$super_id}{$species} = $count;
+}
+
+open(OUTFILE, ">$out_file");
+
+print OUTFILE "Description\tID";
+# Print species list to output file
+for my $species (@species_list) {
+ print OUTFILE "\t$species";
+}
+print OUTFILE "\n";
+
+# Loop through all super_ids
+for my $super_id (sort {$a <=> $b} keys %cluster_hash) {
+ my $mid_line = "";
+ # Loop through each species in list
+ for my $species (@species_list) {
+ if(defined($cluster_hash{$super_id}{$species})) {
+ if($mid_line ne "") {
+ $mid_line = "$mid_line\t$cluster_hash{$super_id}{$species}";
+ }else{
+ $mid_line = "$cluster_hash{$super_id}{$species}";
+ }
+ }else{
+ if($mid_line ne "") {
+ $mid_line = "$mid_line\t0";
+ }else{
+ $mid_line = "0";
+ }
+ }
+ }
+
+ # drop any lines (clusters) that only have one species in them
+ my @line_array = split("\t", $mid_line);
+ my @match_array;
+ foreach my $elem (@line_array) {
+ if($elem != 0) {
+ push(@match_array, $elem);
+ }
+ }
+ my $match_count = @match_array;
+ if ($match_count != 1) {
+ print OUTFILE "$super_id\t$super_id\t$mid_line\n";
+ }
+}
+
+