From 271e2c0faa3f7bfb87563c5156bccb8f87f9478b Mon Sep 17 00:00:00 2001 From: elserj Date: Tue, 22 Sep 2009 19:41:47 +0000 Subject: [PATCH] Fixed the file so it wasn't a symlink svn path=/; revision=8 --- .../inparanoid_output_parse.pl | 166 ++++++++++++++++++ 1 file changed, 166 insertions(+) create mode 100755 interactome_scripts/inparanoid_output_parse.pl diff --git a/interactome_scripts/inparanoid_output_parse.pl b/interactome_scripts/inparanoid_output_parse.pl new file mode 100755 index 0000000..be7f965 --- /dev/null +++ b/interactome_scripts/inparanoid_output_parse.pl @@ -0,0 +1,166 @@ +#!/usr/bin/perl + +use strict; +use warnings; + +use DBI; +use Term::Screen::ReadLine; + +# open the file to be parsed, assume this is all sqltable* files in directory + +my @files = glob("sqltable*"); + +# define the database handle to be used + +my $screen = Term::Screen::ReadLine->new(); + # clear the screen + $screen->clrscr; + # ask for username + $screen->at(0,0)->puts("Username: "); + my $username = $screen->readline(ROW => 0, COL=>11); + + # ask for password, replace character presses with stars + $screen->at(1,0)->puts("Password: "); + my $password = $screen->readline(ROW => 1, COL => 11, PASSWORD => 1); + + $screen->at(2,0); + undef $screen; + +my $dbh = DBI->connect('DBI:mysql:inparanoid_data;host=floret.cgrb.oregonstate.edu', $username, $password, + { RaiseError=> 1, AutoCommit=>1 } + ) or die "Failed to connect to database: $DBI::errstr"; + + +# start the database entries +foreach my $input_file (@files) { + open(in_file, "$input_file") || die "Error: file '$input_file' can not be opened\n"; + + my %clust_hash; + + + + my ($species_1, $species_2) = split("-", $input_file); + $species_1 = find_species($species_1); + $species_2 = find_species($species_2); + my $species_table = $species_1."_".$species_2; + my $clust_table = "clusters"; + + my %id_hash; + + + + + my $safe_species_table = $dbh->quote_identifier($species_table); + $dbh->do("drop table if exists $safe_species_table"); + $dbh->do("CREATE TABLE $safe_species_table ( + `id` INT( 11 ) NOT NULL , + `bit_score` INT( 11 ) NOT NULL , + `species` VARCHAR( 255 ) NOT NULL , + `score` FLOAT( 11 ) NOT NULL , + `gene` VARCHAR( 255 ) NOT NULL , + INDEX ( `id` ) + ) TYPE = MYISAM"); + my $sth = $dbh->prepare("insert into $safe_species_table (id, bit_score, species, score, gene) values (?,?,?,?,?)"); + + + my $safe_clust_table = $dbh->quote_identifier($clust_table); + my $sth_id_dump = $dbh->prepare("select * from clusters"); + my $sth_id_add = $dbh->prepare("insert into $safe_clust_table (id, clust_id) values (?,?)"); + my $sth_get_id = $dbh->prepare("select last_insert_id(id) from clusters order by id desc limit 1"); + + # put the clusters table data into a hash + $sth_id_dump->execute(); + while(my @ids = $sth_id_dump->fetchrow_array) { + if(!defined($id_hash{$ids[1]})) { + $id_hash{$ids[1]} = $ids[0]; + } + } + + my $clust_id_prev = ""; + + while() { + my $entry = $_; + # strip off newline characters + $entry =~ s/\r//g; + $entry =~ s/\n//g; + + my $id; + + my ($clust_id, $bit_score, $species, $score, $gene_header) = split ("\t", $entry); + + $clust_id = "$species_1"."___".$species_2."___".$clust_id; + $species = find_species($species); + + if(!defined($id_hash{$clust_id})) { + if ($clust_id ne $clust_id_prev) { + $sth_id_add->execute(undef, $clust_id); + + $sth_get_id->execute(); + my @new_id = $sth_get_id->fetchrow_array; + $id = $new_id[0]; + $id_hash{$clust_id} = $id; + } + }else { + $id = $id_hash{$clust_id}; + } + + $sth->execute($id, $bit_score, $species, $score, $gene_header); + + $clust_id_prev = $clust_id; + + + } + $sth_get_id->finish(); +} + + +$dbh->disconnect; + + +sub find_species { + my $temp = $_[0]; + if ($temp =~ /Arabidopsis/) { + $temp = "Ath"; + }elsif ($temp =~ /brachypodium/) { + $temp = "Brachy"; + }elsif ($temp =~ /elegans/) { + $temp = "C_elegans"; + }elsif ($temp =~ /Chlamy/) { + $temp = "Chlamy"; + }elsif ($temp =~ /Danio/) { + $temp = "Danio"; + }elsif ($temp =~ /E\_coli/) { + $temp = "E_coli"; + }elsif ($temp =~ /Glycine/) { + $temp = "Soy"; + }elsif ($temp =~ /Homo\_sapiens/) { + $temp = "Human"; + }elsif ($temp =~ /Maize/) { + $temp = "Maize"; + }elsif ($temp =~ /musculus/) { + $temp = "Mouse"; + }elsif ($temp =~ /neurospora/) { + $temp = "Neurospora"; + }elsif ($temp =~ /Oryza\sativa/) { + $temp = "Oryza_sativa"; + }elsif ($temp =~ /Physcomit/) { + $temp = "Physcomitreall"; + }elsif ($temp =~ /cerevisiae/) { + $temp = "Sacc_cerevisiae"; + }elsif ($temp =~ /pombe/) { + $temp = "Sacc_pombe"; + }elsif ($temp =~ /Selaginella/) { + $temp = "Selaginella"; + }elsif ($temp =~ /Sorghum/) { + $temp = "Sorghum"; + }elsif ($temp =~ /Synechosystis/) { + $temp = "Synechosystis"; + }elsif ($temp =~ /Vitis\_vinifera/) { + $temp = "Grape"; + }else { + die "Error: Species can not be found from file name!"; + } + return $temp; +} + + -- 2.34.1