Hello!

To see the file structure, click on "tree".

Note that updates take place every 10 minutes, commits may not be seen immediately.
Fix the species printing in output to not have underscore "_"
authorelserj <elserj@localhost>
Thu, 7 May 2020 21:44:08 +0000 (21:44 +0000)
committerelserj <elserj@localhost>
Thu, 7 May 2020 21:44:08 +0000 (21:44 +0000)
svn path=/; revision=679

interactome_scripts/cafe_from_inparanoid.pl

index 5c2ab45ac660046547d03ced7bbbe83a00711c3c..9ccca41dd662907c56a2dc7978929f3563e15aa5 100755 (executable)
@@ -25,6 +25,8 @@ my $dbh = DbiFloret::dbconnect;
 my $in_file = $ARGV[0];
 my $out_file = $ARGV[1];
 
+my $max_genes = 100;
+
 # Get list of species to check
 my @species_list;
 
@@ -71,7 +73,9 @@ open(OUTFILE, ">$out_file");
 print OUTFILE "Description\tID";
 # Print species list to output file
 for my $species (@species_list) {
-               print OUTFILE "\t$species";
+       my $species_sub = $species;
+       $species_sub =~ s/_//;
+       print OUTFILE "\t$species_sub";
 }
 print OUTFILE "\n";
 
@@ -98,13 +102,17 @@ for my $super_id (sort {$a <=> $b} keys %cluster_hash) {
                # drop any lines (clusters) that only have one species in them
                my @line_array = split("\t", $mid_line);
                my @match_array;
+               my $max_over = 1;
                foreach my $elem (@line_array) {
                                if($elem != 0) {
                                                push(@match_array, $elem);
                                }
+                               if($elem >=  $max_genes) {
+                                       $max_over = 0;
+                               }
                }
                my $match_count = @match_array;
-               if ($match_count != 1) {
+               if ($match_count != 1 && $max_over) {
                                print OUTFILE "$super_id\t$super_id\t$mid_line\n";
                }
 }