From: elserj Date: Tue, 16 Dec 2014 23:36:44 +0000 (+0000) Subject: Add synonym entries X-Git-Url: http://gitweb.planteome.org/?a=commitdiff_plain;h=48188207522dc1bcd30287af260e79bb9b1d0e82;p=old-jaiswallab-svn%2F.git Add synonym entries svn path=/; revision=596 --- diff --git a/interactome_scripts/load_species_db.pl b/interactome_scripts/load_species_db.pl index 45c5fa8..bdfce67 100755 --- a/interactome_scripts/load_species_db.pl +++ b/interactome_scripts/load_species_db.pl @@ -47,14 +47,27 @@ foreach my $file (@files) { my ($retrieval_location, $retrieval_date) = retrieval_info($file); $retrieval_date =~ /(\d+)\_(\d+)\_(\d\d)/; - my $formatted_date = "20".$1."-".$2."-".$3; + my $formatted_date = "20".$3."-".$1."-".$2; + # insert version info into database. If species previously entered, location and date will be updated. my $safe_info_table = $dbh->quote_identifier("version_info"); - $dbh->do("insert into $safe_info_table (?,?,?) on duplicate key update retrieval_location=$retrieval_location,date=$formatted_date",$species,$retrieval_location,$formatted_date); + # Create the database for version info if it doesn't already exist + $dbh->do("CREATE TABLE IF NOT EXISTS $safe_info_table ( + `species` varchar(255) NOT NULL, + `retrieval_location` varchar(255) NOT NULL, + `date` date NOT NULL, + PRIMARY KEY (`species`) + ) ENGINE=MyISAM"); + + + $dbh->do("insert into $safe_info_table (species,retrieval_location,date) values ('$species','$retrieval_location','$formatted_date') on duplicate key update retrieval_location='$retrieval_location', date='$formatted_date'"); - # create the database table + # create the database table for the species my $safe_table = $dbh->quote_identifier($species); + # create the database table for all synonyms + my $safe_syn_table = $dbh->quote_identifier('synonyms'); + $dbh->do("drop table if exists $safe_table"); $dbh->do("CREATE TABLE $safe_table ( `gene_id` VARCHAR( 255 ) NOT NULL , @@ -62,16 +75,24 @@ foreach my $file (@files) { `sequence` TEXT NOT NULL , UNIQUE ( `gene_id` ) ) TYPE = MYISAM"); - + + $dbh->do("CREATE TABLE if NOT EXISTS $safe_syn_table( + `gene_id` VARCHAR( 255 ) NOT NULL , + `species` VARCHAR( 255 ) NOT NULL , + `synonyms` TEXT + ) TYPE = MYISAM"); + # and the statement handler to do the inserts my $insert_sth = $dbh->prepare("insert into $safe_table (gene_id, gene_header, sequence) values (?,?,?)"); + my $insert_syn_sth = $dbh->prepare("insert into $safe_syn_table (gene_id, species, synonyms) values (?,?,?)"); - # create the variables:q! + # create the variables my %seq_hash; # keys is $gene, values are $seq and $gene_header my $gene; my $seq; my $gene_header; + my $synonyms; open(in_file,$file); @@ -87,6 +108,7 @@ foreach my $file (@files) { $gene_header =~ s/^>//; $gene = find_gene($gene_header,$species); $gene =~ s/^\>//; + $synonyms = find_gene_synonym($gene_header,$species); $seq = ""; }else{ $seq = "$seq"."$line"; @@ -99,6 +121,10 @@ foreach my $file (@files) { foreach my $key (keys %seq_hash) { $insert_sth->execute($key,$seq_hash{$key}->{'gene_header'},$seq_hash{$key}->{'sequence'}); + + if($synonyms ne "null") { + $insert_syn_sth->execute($key,$species,$synonyms); + } } }