--- /dev/null
+#!/usr/bin/perl
+
+use strict;
+use warnings;
+
+use DBI;
+use Term::Screen::ReadLine;
+
+# open the file to be parsed, assume this is all sqltable* files in directory
+
+my @files = glob("sqltable*");
+
+# define the database handle to be used
+
+my $screen = Term::Screen::ReadLine->new();
+ # clear the screen
+ $screen->clrscr;
+ # ask for username
+ $screen->at(0,0)->puts("Username: ");
+ my $username = $screen->readline(ROW => 0, COL=>11);
+
+ # ask for password, replace character presses with stars
+ $screen->at(1,0)->puts("Password: ");
+ my $password = $screen->readline(ROW => 1, COL => 11, PASSWORD => 1);
+
+ $screen->at(2,0);
+ undef $screen;
+
+my $dbh = DBI->connect('DBI:mysql:inparanoid_data;host=floret.cgrb.oregonstate.edu', $username, $password,
+ { RaiseError=> 1, AutoCommit=>1 }
+ ) or die "Failed to connect to database: $DBI::errstr";
+
+
+# start the database entries
+foreach my $input_file (@files) {
+ open(in_file, "$input_file") || die "Error: file '$input_file' can not be opened\n";
+
+ my %clust_hash;
+
+
+
+ my ($species_1, $species_2) = split("-", $input_file);
+ $species_1 = find_species($species_1);
+ $species_2 = find_species($species_2);
+ my $species_table = $species_1."_".$species_2;
+ my $clust_table = "clusters";
+
+ my %id_hash;
+
+
+
+
+ my $safe_species_table = $dbh->quote_identifier($species_table);
+ $dbh->do("drop table if exists $safe_species_table");
+ $dbh->do("CREATE TABLE $safe_species_table (
+ `id` INT( 11 ) NOT NULL ,
+ `bit_score` INT( 11 ) NOT NULL ,
+ `species` VARCHAR( 255 ) NOT NULL ,
+ `score` FLOAT( 11 ) NOT NULL ,
+ `gene` VARCHAR( 255 ) NOT NULL ,
+ INDEX ( `id` )
+ ) TYPE = MYISAM");
+ my $sth = $dbh->prepare("insert into $safe_species_table (id, bit_score, species, score, gene) values (?,?,?,?,?)");
+
+
+ my $safe_clust_table = $dbh->quote_identifier($clust_table);
+ my $sth_id_dump = $dbh->prepare("select * from clusters");
+ my $sth_id_add = $dbh->prepare("insert into $safe_clust_table (id, clust_id) values (?,?)");
+ my $sth_get_id = $dbh->prepare("select last_insert_id(id) from clusters order by id desc limit 1");
+
+ # put the clusters table data into a hash
+ $sth_id_dump->execute();
+ while(my @ids = $sth_id_dump->fetchrow_array) {
+ if(!defined($id_hash{$ids[1]})) {
+ $id_hash{$ids[1]} = $ids[0];
+ }
+ }
+
+ my $clust_id_prev = "";
+
+ while(<in_file>) {
+ my $entry = $_;
+ # strip off newline characters
+ $entry =~ s/\r//g;
+ $entry =~ s/\n//g;
+
+ my $id;
+
+ my ($clust_id, $bit_score, $species, $score, $gene_header) = split ("\t", $entry);
+
+ $clust_id = "$species_1"."___".$species_2."___".$clust_id;
+ $species = find_species($species);
+
+ if(!defined($id_hash{$clust_id})) {
+ if ($clust_id ne $clust_id_prev) {
+ $sth_id_add->execute(undef, $clust_id);
+
+ $sth_get_id->execute();
+ my @new_id = $sth_get_id->fetchrow_array;
+ $id = $new_id[0];
+ $id_hash{$clust_id} = $id;
+ }
+ }else {
+ $id = $id_hash{$clust_id};
+ }
+
+ $sth->execute($id, $bit_score, $species, $score, $gene_header);
+
+ $clust_id_prev = $clust_id;
+
+
+ }
+ $sth_get_id->finish();
+}
+
+
+$dbh->disconnect;
+
+
+sub find_species {
+ my $temp = $_[0];
+ if ($temp =~ /Arabidopsis/) {
+ $temp = "Ath";
+ }elsif ($temp =~ /brachypodium/) {
+ $temp = "Brachy";
+ }elsif ($temp =~ /elegans/) {
+ $temp = "C_elegans";
+ }elsif ($temp =~ /Chlamy/) {
+ $temp = "Chlamy";
+ }elsif ($temp =~ /Danio/) {
+ $temp = "Danio";
+ }elsif ($temp =~ /E\_coli/) {
+ $temp = "E_coli";
+ }elsif ($temp =~ /Glycine/) {
+ $temp = "Soy";
+ }elsif ($temp =~ /Homo\_sapiens/) {
+ $temp = "Human";
+ }elsif ($temp =~ /Maize/) {
+ $temp = "Maize";
+ }elsif ($temp =~ /musculus/) {
+ $temp = "Mouse";
+ }elsif ($temp =~ /neurospora/) {
+ $temp = "Neurospora";
+ }elsif ($temp =~ /Oryza\sativa/) {
+ $temp = "Oryza_sativa";
+ }elsif ($temp =~ /Physcomit/) {
+ $temp = "Physcomitreall";
+ }elsif ($temp =~ /cerevisiae/) {
+ $temp = "Sacc_cerevisiae";
+ }elsif ($temp =~ /pombe/) {
+ $temp = "Sacc_pombe";
+ }elsif ($temp =~ /Selaginella/) {
+ $temp = "Selaginella";
+ }elsif ($temp =~ /Sorghum/) {
+ $temp = "Sorghum";
+ }elsif ($temp =~ /Synechosystis/) {
+ $temp = "Synechosystis";
+ }elsif ($temp =~ /Vitis\_vinifera/) {
+ $temp = "Grape";
+ }else {
+ die "Error: Species can not be found from file name!";
+ }
+ return $temp;
+}
+
+