Hello!

To see the file structure, click on "tree".

Note that updates take place every 10 minutes, commits may not be seen immediately.
Moved to DbiFloret::dbconnect for db connection, and put in some versioning info
authorelserj <elserj@localhost>
Wed, 6 Jul 2011 23:57:59 +0000 (23:57 +0000)
committerelserj <elserj@localhost>
Wed, 6 Jul 2011 23:57:59 +0000 (23:57 +0000)
svn path=/; revision=115

interactome_scripts/load_species_db.pl

index e4a4e6c6cdeda14fe42306870c26be9ce185479f..45c5fa8255c83814ac52adedb4c87007548ee258 100755 (executable)
@@ -4,14 +4,20 @@
 #  Written by Justin Elser 2/22/10                            #
 #                                                             #
 #  This program takes all fasta files in the current working  #
-#    directory and puts them in a database named protein      #
-#       sequences                                             #
+#    directory and puts them in a database                    #
 #                                                             #
 #  Initial version 0.1                                        #
 #     Shouldn't have to change much once this is done         #
 #         except to maybe add more species detection          #
 #         which is actually done in the pulled in subs        #
 #                                                             #
+#  version 0.9                                                #
+#      Switched to using the DbiFloret::dbconnect to manage   #
+#        the connection to the DB.  Also added a section to   #
+#        put in the version info into a new table.            #
+#        Can't think of much more that would need to go in    #
+#        here.                                                #
+#                                                             #
 ###############################################################
 
 use strict;
@@ -27,27 +33,11 @@ if(-e "$ENV{HOME}/scripts/jaiswallab/interactome_scripts/find_species.pl") {
        require "$ENV{HOME}/bin/find_species.pl";
 }
 
+use lib "$ENV{HOME}/scripts/jaiswallab/interactome_scripts";
 
-# define the database handle to be used 
-
-my $screen = Term::Screen::ReadLine->new();
-       # clear the screen
-       $screen->clrscr;
-       # ask for username
-       $screen->at(0,0)->puts("Username: ");
-       my $username = $screen->readline(ROW => 0, COL=>11);
-
-       # ask for password, replace character presses with stars
-       $screen->at(1,0)->puts("Password: ");
-       my $password = $screen->readline(ROW => 1, COL => 11, PASSWORD => 1);
-
-       $screen->at(2,0);
-       undef $screen;
-
-my $dbh = DBI->connect('DBI:mysql:protein_sequences;host=floret.cgrb.oregonstate.edu', $username, $password,
-       { RaiseError=> 1, AutoCommit=>1 }
-       ) or die "Failed to connect to database: $DBI::errstr";
+use DbiFloret;
 
+my $dbh = DbiFloret::dbconnect;
 
 my @files = glob("*.fa");
 
@@ -55,6 +45,13 @@ foreach my $file (@files) {
        my $species = find_species($file);
        print "on species $species\n";
        
+       my ($retrieval_location, $retrieval_date) = retrieval_info($file);
+       $retrieval_date =~ /(\d+)\_(\d+)\_(\d\d)/;
+       my $formatted_date = "20".$1."-".$2."-".$3;
+       
+       my $safe_info_table = $dbh->quote_identifier("version_info");
+       $dbh->do("insert into $safe_info_table (?,?,?)  on duplicate key update retrieval_location=$retrieval_location,date=$formatted_date",$species,$retrieval_location,$formatted_date);
+       
        # create the database table
        my $safe_table = $dbh->quote_identifier($species);
        
@@ -69,7 +66,8 @@ foreach my $file (@files) {
        # and the statement handler to do the inserts
        my $insert_sth = $dbh->prepare("insert into $safe_table (gene_id, gene_header, sequence) values (?,?,?)");
        
-       # create the variables
+       # create the variables:q!
+       
        my %seq_hash; # keys is $gene, values are $seq and $gene_header
        my $gene;
        my $seq;