Hello!

To see the file structure, click on "tree".

Note that updates take place every 10 minutes, commits may not be seen immediately.
Moved the "clusters" table select statement outside the loop. Fixes random stalls...
authorelserj <elserj@localhost>
Thu, 23 Sep 2010 00:24:05 +0000 (00:24 +0000)
committerelserj <elserj@localhost>
Thu, 23 Sep 2010 00:24:05 +0000 (00:24 +0000)
svn path=/; revision=47

interactome_scripts/inparanoid_output_parse.pl

index bd58591a460af9feae03ec3d51f6989e37651a31..bb8de08839c331f818bcd420fe3fec271051af71 100755 (executable)
@@ -66,12 +66,24 @@ my $screen = Term::Screen::ReadLine->new();
 my $clust_table = "clusters";
 my $safe_clust_table = $dbh->quote_identifier($clust_table);
 
+my %id_hash;
 if($new) {
        $dbh->do("create table $safe_clust_table (
                `id` INT( 11 ) NOT NULL AUTO_INCREMENT,
                `clust_id` VARCHAR( 255 ) NOT NULL ,
                INDEX ( `id` )
                ) TYPE = MYISAM");
+}else{ # if not new load, grab the data from the clusters table
+       my $sth_id_dump = $dbh->prepare("select * from clusters");
+       # put the clusters table data into a hash
+       print "before dump\n";
+       $sth_id_dump->execute();
+       print "after dump\n";
+       while(my @ids = $sth_id_dump->fetchrow_array) {
+               if(!defined($id_hash{$ids[1]})) {
+                       $id_hash{$ids[1]} = $ids[0];
+               }
+       }
 }
                
 my $tot_file_count = scalar @files;
@@ -91,14 +103,12 @@ foreach my $input_file (@files) {
        my $species_table = $species_1."_".$species_2;
        
 
-       my %id_hash;
        
        print "Working on $species_table: file $file_counter/$tot_file_count\n";
        $file_counter++;
 
 
        
-               
        my $safe_species_table = $dbh->quote_identifier($species_table);
        $dbh->do("drop table if exists $safe_species_table");
        $dbh->do("CREATE TABLE $safe_species_table (
@@ -113,17 +123,11 @@ foreach my $input_file (@files) {
 
        
                
-       my $sth_id_dump = $dbh->prepare("select * from clusters");
+       
        my $sth_id_add = $dbh->prepare("insert into $safe_clust_table (id, clust_id) values (?,?)");
        my $sth_get_id = $dbh->prepare("select last_insert_id(id) from clusters order by id desc limit 1");
 
-       # put the clusters table data into a hash
-       $sth_id_dump->execute();
-       while(my @ids = $sth_id_dump->fetchrow_array) {
-               if(!defined($id_hash{$ids[1]})) {
-                       $id_hash{$ids[1]} = $ids[0];
-               }
-       }
+       
        
        my $clust_id_prev = "";
 
@@ -140,7 +144,6 @@ foreach my $input_file (@files) {
                $clust_id = "$species_1"."___".$species_2."___".$clust_id;
                $species = find_species($species);
                my $gene = find_gene($gene_header,$species);
-               
                # skip isomers that are not .1
                next if ($gene =~ /\.[2-9]$/);
                next if ($gene =~ /\.1[0-9]$/);
@@ -156,7 +159,6 @@ foreach my $input_file (@files) {
                if(!defined($id_hash{$clust_id})) {
                        if ($clust_id ne $clust_id_prev) {
                                $sth_id_add->execute(undef, $clust_id);
-                       
                                $sth_get_id->execute();
                                my @new_id = $sth_get_id->fetchrow_array;
                                $id = $new_id[0];
@@ -168,7 +170,6 @@ foreach my $input_file (@files) {
        
                
                $sth->execute($id, $bit_score, $species, $score, $gene);
-       
                $clust_id_prev = $clust_id;