Hello!

To see the file structure, click on "tree".

Note that updates take place every 10 minutes, commits may not be seen immediately.
Added species db loading script to svn. This script will parse the fasta files and...
authorelserj <elserj@localhost>
Thu, 23 Sep 2010 21:54:22 +0000 (21:54 +0000)
committerelserj <elserj@localhost>
Thu, 23 Sep 2010 21:54:22 +0000 (21:54 +0000)
svn path=/; revision=50

interactome_scripts/load_species_db.pl [new file with mode: 0755]

diff --git a/interactome_scripts/load_species_db.pl b/interactome_scripts/load_species_db.pl
new file mode 100755 (executable)
index 0000000..e5aad70
--- /dev/null
@@ -0,0 +1,103 @@
+#!/usr/bin/perl
+
+###############################################################
+#  Written by Justin Elser 2/22/10                            #
+#                                                             #
+#  This program takes all fasta files in the current working  #
+#    directory and puts them in a database named protein      #
+#       sequences                                             #
+#                                                             #
+#  Initial version 0.1                                        #
+#     Shouldn't have to change much once this is done         #
+#         except to maybe add more species detection          #
+#         which is actually done in the pulled in subs        #
+#                                                             #
+###############################################################
+
+use strict;
+use warnings;
+
+use DBI;
+use Term::Screen::ReadLine;
+
+# pull in the find_species and find_gene common subroutines
+require "$ENV{HOME}/scripts/jaiswallab/interactome_scripts/find_species.pl";
+
+
+# define the database handle to be used 
+
+my $screen = Term::Screen::ReadLine->new();
+       # clear the screen
+       $screen->clrscr;
+       # ask for username
+       $screen->at(0,0)->puts("Username: ");
+       my $username = $screen->readline(ROW => 0, COL=>11);
+
+       # ask for password, replace character presses with stars
+       $screen->at(1,0)->puts("Password: ");
+       my $password = $screen->readline(ROW => 1, COL => 11, PASSWORD => 1);
+
+       $screen->at(2,0);
+       undef $screen;
+
+my $dbh = DBI->connect('DBI:mysql:protein_sequences;host=floret.cgrb.oregonstate.edu', $username, $password,
+       { RaiseError=> 1, AutoCommit=>1 }
+       ) or die "Failed to connect to database: $DBI::errstr";
+
+
+my @files = glob("*.fa");
+
+foreach my $file (@files) {
+       my $species = find_species($file);
+       print "on species $species\n";
+       
+       # create the database table
+       my $safe_table = $dbh->quote_identifier($species);
+       
+       $dbh->do("drop table if exists $safe_table");
+       $dbh->do("CREATE TABLE $safe_table (
+               `gene_id` VARCHAR( 255 ) NOT NULL ,
+               `gene_header` TEXT NOT NULL ,
+               `sequence` TEXT NOT NULL ,
+               UNIQUE ( `gene_id` )
+               ) TYPE = MYISAM");
+               
+       # and the statement handler to do the inserts
+       my $insert_sth = $dbh->prepare("insert into $safe_table (gene_id, gene_header, sequence) values (?,?,?)");
+       
+       # create the variables
+       my %seq_hash; # keys is $gene, values are $seq and $gene_header
+       my $gene;
+       my $seq;
+       
+       open(in_file,$file);
+       
+       while(<in_file>) {
+               my $gene_header;
+               my $line = $_;
+               chomp $line;
+               if($line =~ /^\>/) {
+                       if(defined($gene)) {
+                               $seq_hash{$gene}->{'sequence'} = $seq;
+                               $seq_hash{$gene}->{'gene_header'} = $gene_header;
+                       }
+                       $gene_header = $line;
+                       $gene = find_gene($gene_header,$species);
+                       $gene =~ s/^\>//;
+                       $seq = "";
+               }else{
+                       $seq = "$seq"."$line";
+               }
+       }
+       
+       # add the last gene to the hash
+       $seq_hash{$gene}->{'sequence'} = $seq;
+       $seq_hash{$gene}->{'gene_header'} = $gene_header;
+
+       foreach my $key (keys %seq_hash) {
+               $insert_sth->execute($key,$seq_hash{$key}->{'gene_header'},$seq_hash{$key}->{'sequence'});
+       }
+}
+
+
+