--- /dev/null
+#!/usr/bin/perl
+
+###############################################################
+# Written by Justin Elser 2/22/10 #
+# #
+# This program takes all fasta files in the current working #
+# directory and puts them in a database named protein #
+# sequences #
+# #
+# Initial version 0.1 #
+# Shouldn't have to change much once this is done #
+# except to maybe add more species detection #
+# which is actually done in the pulled in subs #
+# #
+###############################################################
+
+use strict;
+use warnings;
+
+use DBI;
+use Term::Screen::ReadLine;
+
+# pull in the find_species and find_gene common subroutines
+require "$ENV{HOME}/scripts/jaiswallab/interactome_scripts/find_species.pl";
+
+
+# define the database handle to be used
+
+my $screen = Term::Screen::ReadLine->new();
+ # clear the screen
+ $screen->clrscr;
+ # ask for username
+ $screen->at(0,0)->puts("Username: ");
+ my $username = $screen->readline(ROW => 0, COL=>11);
+
+ # ask for password, replace character presses with stars
+ $screen->at(1,0)->puts("Password: ");
+ my $password = $screen->readline(ROW => 1, COL => 11, PASSWORD => 1);
+
+ $screen->at(2,0);
+ undef $screen;
+
+my $dbh = DBI->connect('DBI:mysql:protein_sequences;host=floret.cgrb.oregonstate.edu', $username, $password,
+ { RaiseError=> 1, AutoCommit=>1 }
+ ) or die "Failed to connect to database: $DBI::errstr";
+
+
+my @files = glob("*.fa");
+
+foreach my $file (@files) {
+ my $species = find_species($file);
+ print "on species $species\n";
+
+ # create the database table
+ my $safe_table = $dbh->quote_identifier($species);
+
+ $dbh->do("drop table if exists $safe_table");
+ $dbh->do("CREATE TABLE $safe_table (
+ `gene_id` VARCHAR( 255 ) NOT NULL ,
+ `gene_header` TEXT NOT NULL ,
+ `sequence` TEXT NOT NULL ,
+ UNIQUE ( `gene_id` )
+ ) TYPE = MYISAM");
+
+ # and the statement handler to do the inserts
+ my $insert_sth = $dbh->prepare("insert into $safe_table (gene_id, gene_header, sequence) values (?,?,?)");
+
+ # create the variables
+ my %seq_hash; # keys is $gene, values are $seq and $gene_header
+ my $gene;
+ my $seq;
+
+ open(in_file,$file);
+
+ while(<in_file>) {
+ my $gene_header;
+ my $line = $_;
+ chomp $line;
+ if($line =~ /^\>/) {
+ if(defined($gene)) {
+ $seq_hash{$gene}->{'sequence'} = $seq;
+ $seq_hash{$gene}->{'gene_header'} = $gene_header;
+ }
+ $gene_header = $line;
+ $gene = find_gene($gene_header,$species);
+ $gene =~ s/^\>//;
+ $seq = "";
+ }else{
+ $seq = "$seq"."$line";
+ }
+ }
+
+ # add the last gene to the hash
+ $seq_hash{$gene}->{'sequence'} = $seq;
+ $seq_hash{$gene}->{'gene_header'} = $gene_header;
+
+ foreach my $key (keys %seq_hash) {
+ $insert_sth->execute($key,$seq_hash{$key}->{'gene_header'},$seq_hash{$key}->{'sequence'});
+ }
+}
+
+
+