Hello!

To see the file structure, click on "tree".

Note that updates take place every 10 minutes, commits may not be seen immediately.
Fixed the file so it wasn't a symlink
authorelserj <elserj@localhost>
Tue, 22 Sep 2009 19:41:47 +0000 (19:41 +0000)
committerelserj <elserj@localhost>
Tue, 22 Sep 2009 19:41:47 +0000 (19:41 +0000)
svn path=/; revision=8

interactome_scripts/inparanoid_output_parse.pl [new file with mode: 0755]

diff --git a/interactome_scripts/inparanoid_output_parse.pl b/interactome_scripts/inparanoid_output_parse.pl
new file mode 100755 (executable)
index 0000000..be7f965
--- /dev/null
@@ -0,0 +1,166 @@
+#!/usr/bin/perl
+
+use strict;
+use warnings;
+
+use DBI;
+use Term::Screen::ReadLine;
+
+# open the file to be parsed, assume this is all sqltable* files in directory
+
+my @files = glob("sqltable*");
+
+# define the database handle to be used 
+
+my $screen = Term::Screen::ReadLine->new();
+       # clear the screen
+       $screen->clrscr;
+       # ask for username
+       $screen->at(0,0)->puts("Username: ");
+       my $username = $screen->readline(ROW => 0, COL=>11);
+
+       # ask for password, replace character presses with stars
+       $screen->at(1,0)->puts("Password: ");
+       my $password = $screen->readline(ROW => 1, COL => 11, PASSWORD => 1);
+
+       $screen->at(2,0);
+       undef $screen;
+       
+my $dbh = DBI->connect('DBI:mysql:inparanoid_data;host=floret.cgrb.oregonstate.edu', $username, $password,
+       { RaiseError=> 1, AutoCommit=>1 }
+       ) or die "Failed to connect to database: $DBI::errstr";
+       
+       
+# start the database entries
+foreach my $input_file (@files) {
+       open(in_file, "$input_file") || die "Error: file '$input_file' can not be opened\n";
+
+       my %clust_hash;
+
+       
+
+       my ($species_1, $species_2) = split("-", $input_file);
+       $species_1 = find_species($species_1);
+       $species_2 = find_species($species_2);
+       my $species_table = $species_1."_".$species_2;
+       my $clust_table = "clusters";
+
+       my %id_hash;
+
+
+       
+               
+               my $safe_species_table = $dbh->quote_identifier($species_table);
+               $dbh->do("drop table if exists $safe_species_table");
+               $dbh->do("CREATE TABLE $safe_species_table (
+                       `id` INT( 11 ) NOT NULL ,
+                       `bit_score` INT( 11 ) NOT NULL ,
+                       `species` VARCHAR( 255 ) NOT NULL ,
+                       `score` FLOAT( 11 ) NOT NULL ,
+                       `gene` VARCHAR( 255 ) NOT NULL ,
+                       INDEX ( `id` )
+                       ) TYPE = MYISAM");
+               my $sth = $dbh->prepare("insert into $safe_species_table (id, bit_score, species, score, gene) values (?,?,?,?,?)");
+       
+       
+               my $safe_clust_table = $dbh->quote_identifier($clust_table);
+               my $sth_id_dump = $dbh->prepare("select * from clusters");
+               my $sth_id_add = $dbh->prepare("insert into $safe_clust_table (id, clust_id) values (?,?)");
+               my $sth_get_id = $dbh->prepare("select last_insert_id(id) from clusters order by id desc limit 1");
+       
+               # put the clusters table data into a hash
+               $sth_id_dump->execute();
+               while(my @ids = $sth_id_dump->fetchrow_array) {
+                       if(!defined($id_hash{$ids[1]})) {
+                               $id_hash{$ids[1]} = $ids[0];
+                       }
+               }
+       
+       my $clust_id_prev = "";
+
+       while(<in_file>) {
+               my $entry = $_;
+               # strip off newline characters
+               $entry =~ s/\r//g;
+               $entry =~ s/\n//g;
+       
+               my $id;
+       
+               my ($clust_id, $bit_score, $species, $score, $gene_header) = split ("\t", $entry);
+       
+               $clust_id = "$species_1"."___".$species_2."___".$clust_id;
+               $species = find_species($species);
+       
+               if(!defined($id_hash{$clust_id})) {
+                       if ($clust_id ne $clust_id_prev) {
+                               $sth_id_add->execute(undef, $clust_id);
+                       
+                               $sth_get_id->execute();
+                               my @new_id = $sth_get_id->fetchrow_array;
+                               $id = $new_id[0];
+                               $id_hash{$clust_id} = $id;
+                       }
+               }else {
+                       $id = $id_hash{$clust_id};
+               }
+       
+               $sth->execute($id, $bit_score, $species, $score, $gene_header);
+       
+               $clust_id_prev = $clust_id;
+
+               
+       }
+       $sth_get_id->finish();
+}
+
+
+$dbh->disconnect;
+       
+
+sub find_species {
+       my $temp = $_[0];
+       if ($temp =~ /Arabidopsis/) {
+               $temp = "Ath";
+       }elsif ($temp =~ /brachypodium/) {
+               $temp = "Brachy";
+       }elsif ($temp =~ /elegans/) {
+               $temp = "C_elegans";
+       }elsif ($temp =~ /Chlamy/) {
+               $temp = "Chlamy";
+       }elsif ($temp =~ /Danio/) {
+               $temp = "Danio";
+       }elsif ($temp =~ /E\_coli/) {
+               $temp = "E_coli";
+       }elsif ($temp =~ /Glycine/) {
+               $temp = "Soy";
+       }elsif ($temp =~ /Homo\_sapiens/) {
+               $temp = "Human";
+       }elsif ($temp =~ /Maize/) {
+               $temp = "Maize";
+       }elsif ($temp =~ /musculus/) {
+               $temp = "Mouse";
+       }elsif ($temp =~ /neurospora/) {
+               $temp = "Neurospora";
+       }elsif ($temp =~ /Oryza\sativa/) {
+               $temp = "Oryza_sativa";
+       }elsif ($temp =~ /Physcomit/) {
+               $temp = "Physcomitreall";
+       }elsif ($temp =~ /cerevisiae/) {
+               $temp = "Sacc_cerevisiae";
+       }elsif ($temp =~ /pombe/) {
+               $temp = "Sacc_pombe";
+       }elsif ($temp =~ /Selaginella/) {
+               $temp = "Selaginella";
+       }elsif ($temp =~ /Sorghum/) {
+               $temp = "Sorghum";
+       }elsif ($temp =~ /Synechosystis/) {
+               $temp = "Synechosystis";
+       }elsif ($temp =~ /Vitis\_vinifera/) {
+               $temp = "Grape";
+       }else {
+               die "Error: Species can not be found from file name!";
+       }
+       return $temp;
+}
+
+