Hello!

To see the file structure, click on "tree".

Note that updates take place every 10 minutes, commits may not be seen immediately.
Removing unwanted files
authorathreyab <athreyab@localhost>
Mon, 23 Jan 2012 00:03:16 +0000 (00:03 +0000)
committerathreyab <athreyab@localhost>
Mon, 23 Jan 2012 00:03:16 +0000 (00:03 +0000)
svn path=/; revision=269

Personnel/athreyab/interactions/a.pl [deleted file]
Personnel/athreyab/interactions/example.sif [deleted file]
Personnel/athreyab/interactions/interactionPathsFromSif.pl [new file with mode: 0644]
Personnel/athreyab/interactions/interactionPathsFromsif.pl [deleted file]
Personnel/athreyab/interactions/interaction_final.png [deleted file]

diff --git a/Personnel/athreyab/interactions/a.pl b/Personnel/athreyab/interactions/a.pl
deleted file mode 100644 (file)
index 6f897e2..0000000
+++ /dev/null
@@ -1,6 +0,0 @@
-
-
-$hello = "hello world";
-print $hello=~/\%/;
-
-print $hello;
diff --git a/Personnel/athreyab/interactions/example.sif b/Personnel/athreyab/interactions/example.sif
deleted file mode 100644 (file)
index 7d1fe07..0000000
+++ /dev/null
@@ -1,30 +0,0 @@
-AT1G09700      pp      AT1G01040
-AT2G28380      pp      AT1G01040
-AT5G41070      pp      AT1G01040
-AT2G46830      pp      AT1G01060
-AT3G60250      pp      AT1G01060
-AT5G02810      pp      AT1G01060
-AT5G24470      pp      AT1G01060
-AT5G67380      pp      AT1G01060
-AT5G24270      pp      AT1G01140
-AT5G55990      pp      AT1G01140
-AT2G22840      pp      AT1G01160
-AT4G09000      pp      AT1G01160
-AT1G63650      pp      AT1G01380
-AT5G41315      pp      AT1G01380
-AT1G01480      pp      AT1G01480
-AT1G01510      pp      AT1G01510
-AT1G26830      pp      AT1G01640
-AT4G02570      pp      AT1G01640
-AT1G75840      pp      AT1G01700
-AT3G51300      pp      AT1G01700
-AT3G61140      pp      AT1G02090
-AT5G14250      pp      AT1G02090
-AT5G42970      pp      AT1G02090
-AT1G02280      pp      AT1G02280
-AT5G54190      pp      AT1G02280
-AT1G02340      pp      AT1G02340
-AT1G09530      pp      AT1G02340
-AT2G32950      pp      AT1G02340
-AT2G46340      pp      AT1G02340
-AT4G25560      pp      AT1G02340
diff --git a/Personnel/athreyab/interactions/interactionPathsFromSif.pl b/Personnel/athreyab/interactions/interactionPathsFromSif.pl
new file mode 100644 (file)
index 0000000..37709de
--- /dev/null
@@ -0,0 +1,240 @@
+#!/usr/bin/perl -w
+
+#use strict;
+
+use DBI;
+use Config::IniFiles;
+use Switch;
+require "interactionPath.pl";
+
+
+#use constant SITE_ADJECTIVE => ($ENV{'DevelopmentSite'} ? 'Development': 'Live');
+use constant SITE_ADJECTIVE => 'Development';
+
+sub showUsage
+{
+       print "\n\nUsage: \n\tperl interactionsPathsFromSif <option> [<fileName>]\n";
+       print "options:\n\t";
+       print "0 - to add a new species\n\t";
+       print "1 - to import ortholog data\n\t";
+       print "2 - to import paralog data\n\t";
+       print "3 - to import seed interactions data\n\n";
+       die "";
+       
+}
+
+#interaction type in sif files is in abbreviated format - 'pp','up',''down' etc. This function retrieves the
+#full format.
+sub getInteractionTypeForSifFormat
+{
+       ($abbreviated_interaction_type) = @_;
+       while ( ($key, $value) = each %interaction_types )
+       {
+               if($key eq $abbreviated_interaction_type){
+                       $interaction_type = $value;
+                       break;
+               }
+       }
+       $interaction_type ne "" || die "$abbreviated_interaction_type may not be present in the db";
+       
+       return getInteractionTypeId($interaction_type); 
+}
+
+#read paralog file and import data into object and homology tables
+sub importParalogData
+{
+       print "reading paralog data\n";
+       open(paralog_file, "$fileName") || die "Error: file '$fileName' can not be opened\n";
+       while(<paralog_file>){
+               my $entry = $_;
+               # strip off newline characters
+      $entry =~ s/\r//g;
+      $entry =~ s/\n//g;
+      # split the columns into 3 separate variables
+      my ($obj_l, $obj_r, $score) = split("\t", $entry);
+      # skip if $ident is empty or Nan
+      next if(!defined($score));
+      next if($score =~ /D/);
+      next if($score eq "");
+      next if($score =~ /\%/);
+
+               #get object id from Object for the gene accession in the first column. If it doesn't exist, insert into the table
+               $obj_id_l = getObjectIdFromAccession($obj_l);
+               if(isEmpty($obj_id_l)){
+                       $query = "INSERT INTO Object(`object_accession`, `species_id`) VALUES ('$obj_l', '$speciesIdForParalog') ON DUPLICATE KEY UPDATE object_accession='$obj_l'";
+                       executeDbQuery($query);
+                       $obj_id_l = getObjectIdFromAccession($obj_l);
+               }
+
+               #get object id from Object for the gene accession in the second column. If it doesn't exist, insert into the table      
+               $obj_id_r = getObjectIdFromAccession($obj_r);
+               if(isEmpty($obj_id_r)){
+                       $query = "INSERT INTO Object(`object_accession`, `species_id`) VALUES ('$obj_r', '$speciesIdForParalog') ON DUPLICATE KEY UPDATE object_accession='$obj_r'";
+                       executeDbQuery($query);         
+                       $obj_id_r = getObjectIdFromAccession($obj_r);
+               }
+
+               #check if the homology between the accessions is already recorded. If it is already, check the scores and 
+               #store the highest score. If the relation doesn't exist, insert into the homology table.
+               ($homology_id,$origScore) = getHomologyIdAndScore($obj_id_l,$obj_id_r,$score);
+               if(isEmpty($homology_id) || $origScore < $score){
+                       $query = "INSERT INTO Homology(`object_id_source`,`object_id_projection`,`score`,`homology_type_id`) VALUES('$obj_id_l','$obj_id_r','$score','2')";
+                       executeDbQuery($query);
+                       print "$obj_id_l\t$obj_id_r\t$score\n";
+               }               
+       }
+       print "Paralog data import completed\n";
+
+}
+
+#read ortholog file and import data into object and homology tables
+sub importOrthologData
+{
+       print "reading ortholog data\n";
+       open(paralog_file, "$fileName") || die "Error: file '$fileName' can not be opened\n";
+       while(<paralog_file>){
+               my $entry = $_;
+               # strip off newline characters
+      $entry =~ s/\r//g;
+      $entry =~ s/\n//g;
+      # split the columns into 3 separate variables
+      my ($obj_l, $obj_r, $score) = split("\t", $entry);
+      # skip if $ident is empty or Nan
+      next if(!defined($score));
+      next if($score =~ /D/);
+      next if($score eq "");
+      next if($score =~ /\%/);
+               
+               #get object id from Object for the gene accession in the first column. If it doesn't exist, insert into the table
+               $obj_id_l = getObjectIdFromAccession($obj_l);
+               if(isEmpty($obj_id_l)){
+                       $query = "INSERT INTO Object(`object_accession`, `species_id`) VALUES ('$obj_l', '$speciesId1ForOrtholog') ON DUPLICATE KEY UPDATE object_accession='$obj_l'";
+                       executeDbQuery($query);
+                       $obj_id_l = getObjectIdFromAccession($obj_l);
+               }
+       
+               #get object id from Object for the gene accession in the second column. If it doesn't exist, insert into the table
+               $obj_id_r = getObjectIdFromAccession($obj_r);
+               if(isEmpty($obj_id_r)){
+                       $query = "INSERT INTO Object(`object_accession`, `species_id`) VALUES ('$obj_r', '$speciesId2ForOrtholog') ON DUPLICATE KEY UPDATE object_accession='$obj_r'";
+                       executeDbQuery($query);         
+                       $obj_id_r = getObjectIdFromAccession($obj_r);
+               }
+               
+               #check if the homology between the accessions is already recorded. If it is already, check the scores and
+               # store the highest score. If the relation doesn't exist, insert into the homology table.
+               ($homology_id,$origScore) = getHomologyIdAndScore($obj_id_l,$obj_id_r);
+               if(isEmpty($homology_id) || $origScore < $score){
+                       $query = "INSERT INTO Homology(`object_id_source`,`object_id_projection`,`score`,`homology_type_id`) VALUES('$obj_id_l','$obj_id_r','$score','1')";
+                       executeDbQuery($query);
+                       print "$obj_id_l\t$obj_id_r\t$score\n";
+               }
+               
+       }
+       print "Ortholog data import completed\n";
+
+}
+
+#import the seed interaction data into the database. The file is in sif format
+sub importSifData
+{
+       print "reading seed interaction information from $fileName\n";
+
+       open(interactions_file, "$fileName") || die "Error: file '$fileName' can not be opened\n";
+       print "Species id: ".$speciesIdForSif."\n";
+       while(<interactions_file>){
+               my $entry = $_;
+               # strip off newline characters
+               $entry =~ s/\r//g;
+      $entry =~ s/\n//g;
+      # split the columns into separate variables
+      my ($accession_left,$interaction_type,$accession_right) = split("\t", $entry);
+      # skip if any of the variables are empty
+      next if(isEmpty($accession_left));
+               next if(isEmpty($interaction_type));
+               next if(isEmpty($accession_right));
+      
+               #get object_id for object in the first column
+               $accession_left_id = getObjectIdFromAccessionLeft($accession_left);
+               #get interaction_type_id for object in the first column
+               $interaction_type_id = getInteractionTypeForSifFormat($interaction_type);
+               #get object_id for object in the second column
+               $accesion_right_id = getObjectIdFromAccessionRight($accession_right);
+               print "$accession_left\t$accession_right\n";
+               print "$accession_left_id\t$interaction_type_id\t$accession_right_id\n";
+               next if(isEmpty($accession_left_id) || isEmpty($accession_right_id) || isEmpty($interaction_type_id));
+               #check if the interaction is already recorded. Write to database only if it doesn't exist already.
+               $interactionId = getInteractionId($accession_left_id,$interaction_type_id,$accession_right_id,$curator_id);
+               if(isEmpty($interactionId)){
+                       $query = "INSERT INTO Interaction(`object_id_left`,`object_id_right`,`interaction_type_id`,`curator_id`)
+                        VALUES('$accession_left_id','$accession_right_id','interaction_type_id','$curatorId')";
+                        executeDbQuery($query);
+                        print "$accession_left_id\t$interaction_type_id\t$accession_right_id\t$curator_id\n";                                  
+               }
+       }
+       print "Seed interaction data import completed";
+}
+
+
+$optionId = $ARGV[0];
+if($optionId ne "0" && $#ARGV != 1){
+       showUsage();
+}
+
+
+$fileName = $ARGV[1];
+$speciesIdForSif="";
+$speciesIdForParalog="";
+$speciesId1ForOrtholog="";
+$speciesId2ForOrtholog="";
+$curatorId = "";
+%interaction_types=(pp=>"protein-protein binding",up=>"up regulation",down=>"down regulation");
+$dbh = setUpDBConnection();
+
+
+switch($optionId){
+       #add a new species to our database.
+       case "0"
+       {
+               $speciesId = addNewSpecies(getSpeciesProperties(1));
+       }
+       #import ortholog information
+       case "1"
+       {
+               print "\n\nYou are importing a file containing ortholog genes.\nMake sure you have information for both species\n";
+
+               print "Species - 1. Species to which gene ids in the first column belong to";
+               $speciesId1ForOrtholog = getSpeciesId(getSpeciesProperties(0));
+               $speciesId1ForOrtholog ne "" || die "species not found in our tables.\n";
+
+               print "\nSpecies - 2. Species to which gene ids in the second column belong to";
+               $speciesId2ForOrtholog = getSpeciesId(getSpeciesProperties(0));
+               $speciesId2ForOrtholog ne "" || die "species not found in our tables.\n";
+
+               print "$speciesId1ForOrtholog"."\t"."$speciesId2ForOrtholog"."\n";
+               importOrthologData();
+       }
+       #import paralog information
+        case "2"
+       {               
+               print "\n\nYou are importing a file containing paralog genes.\nMake sure you have the species information";
+               $speciesIdForParalog = getSpeciesId(getSpeciesProperties(0));
+               $speciesIdForParalog ne "" || die "";
+               importParalogData();
+       }
+        case "3"
+       {
+               print "Make sure you have the following information:\n";
+               print "\tspecies to which the gene accessions in the .sif file belong to\n";
+               print "\tncbi id of the species\n";
+               $curatorId = getCuratorId(getCuratorProperties());
+               $speciesIdForSif = getSpeciesId(getSpeciesProperties(0));       
+               $speciesIdForSif ne "" || die "";                       
+               importSifData();
+       }
+               else
+       {
+               print "Invalid inputs\n";
+               showUsage();                    
+       }
+}
diff --git a/Personnel/athreyab/interactions/interactionPathsFromsif.pl b/Personnel/athreyab/interactions/interactionPathsFromsif.pl
deleted file mode 100644 (file)
index 9f46874..0000000
+++ /dev/null
@@ -1,209 +0,0 @@
-#!/usr/bin/perl -w
-
-#use strict;
-
-use DBI;
-use Config::IniFiles;
-use Switch;
-
-
-#use constant SITE_ADJECTIVE => ($ENV{'DevelopmentSite'} ? 'Development': 'Live');
-use constant SITE_ADJECTIVE => 'Development';
-
-
-sub executeDbQuery{
-       ($query, $dbh) = @_;
-       $stmnt = $dbh->prepare("$query");
-       $stmnt->execute() or die "Couldn't execute statement: " . $stmnt->errstr;
-}
-
-sub executeDbQueryAndFetchRowArray{
-       ($query, $dbh) = @_;
-       #print $query."\n"; 
-       $stmnt = $dbh->prepare("$query");
-       $stmnt->execute() or die "Couldn't execute statement: " . $stmnt->errstr;
-       return $stmnt->fetchrow_array();
-}
-
-sub getObjectId{
-       ($obj_abbr, $dbh) = @_;
-       $query = "SELECT object_id FROM Object WHERE object_abbreviation = '$obj_abbr'"; 
-       $obj_id = executeDbQueryAndFetchRowArray($query,$dbh);
-       $obj_id ne "" or die "$obj_abbr was not found in Object table";
-       return $obj_id;
-}
-
-
-sub getInteractorTypeId{
-       ($int_type, $dbh) = @_;
-       $query = "SELECT interactor_type_id FROM Interactor_type WHERE interactor_type = '$int_type'"; 
-       $int_type_id = executeDbQueryAndFetchRowArray($query,$dbh);
-       $int_type_id ne "" or die "$int_type was not found in Interactor_type table";
-       return $int_type_id;
-}
-
-
-sub getInteractionTypeId{
-       ($interaction_type, $dbh) = @_;
-       $query = "SELECT interaction_type_id FROM Interaction_type WHERE interaction_type = '$interaction_type'"; 
-       $interaction_type_id = executeDbQueryAndFetchRowArray($query,$dbh);
-       $interaction_type_id ne "" or die "$interaction_type was not found in Interaction_type table";
-       return $interaction_type_id;
-}
-
-
-sub getModeOfActionId{
-       ($mode_of_action, $dbh) = @_;
-       $query = "SELECT mode_of_action_id FROM Mode_of_action WHERE mode_of_action = '$mode_of_action'"; 
-       $mode_of_action_id = executeDbQueryAndFetchRowArray($query,$dbh);
-       $mode_of_action_id ne "" or die "$mode_of_action was not found in ModeofAction table";
-       return $mode_of_action_id;
-}
-
-sub getEvidenceId{
-       ($evidence, $dbh) = @_;
-       @evidenceArray = split(/:/,$evidence);
-       scalar(@evidenceArray) == 2 or die "Incorrect evidence information. Evidence source name or id missing";
-       $query = "SELECT evidence_id FROM Evidence WHERE source_name = '$evidenceArray[0]' and source_id='$evidenceArray[1]'"; 
-       $evidence_id = executeDbQueryAndFetchRowArray($query,$dbh);
-       if($evidence_id eq ""){
-               $query = "INSERT INTO Evidence(`source_name`,`source_id`) VALUES('$evidenceArray[0]','$evidenceArray[1]')"; 
-               executeDbQuery($query,$dbh);
-               return getEvidenceId($evidence,$dbh);
-       }
-       return $evidence_id;
-}
-
-sub getEvidenceCodeId{
-       ($evidence_code, $dbh) = @_;
-       $query = "SELECT evidence_code_id FROM Evidence_code WHERE evidence_code = '$evidence_code'"; 
-       $evidence_code_id = executeDbQueryAndFetchRowArray($query,$dbh);
-       $evidence_code_id ne "" or die "$evidence_code was not found in Evidence table";
-       return $evidence_code_id;
-}
-
-
-sub getExperimentId{
-       ($experiment, $dbh) = @_;
-       $query = "SELECT experiment_id FROM Experiment WHERE experiment_name = '$experiment'"; 
-       $experiment_id = executeDbQueryAndFetchRowArray($query,$dbh);
-       $experiment_id ne "" or die "$experiment was not found in Experiment table";
-       return $experiment_id;
-}
-
-sub getCuratorId{
-       ($labName,$userName) = @_;
-
-       $query = "SELECT curator_id FROM Curator WHERE lab_name = '$labName' and user_name = '$userName'"; 
-       $curator_id = executeDbQueryAndFetchRowArray($query,$dbh);
-       if($curator_id ne ""){
-               return $curator_id;     
-       }
-       else{
-               $query = "INSERT INTO Curator(`lab_name`,`user_name`) VALUES('$labName','$userName')";
-               executeDbQuery($query,$dbh);
-               return getCuratorId($labName,$userName);
-       }
-}
-
-sub setUpDBConnection
-{
-       #read config file for db config values
-       $ini = Config::IniFiles->new( -file => "/home/balaji/Documents/code/interactions/dbConfig.ini");
-       $dbUser=$ini->val(SITE_ADJECTIVE, 'dbUser');
-       $dbPw=$ini->val(SITE_ADJECTIVE, 'dbPw');
-       $host=$ini->val(SITE_ADJECTIVE, 'host');
-       $dbName=$ini->val(SITE_ADJECTIVE, 'dbName');
-       $dbh = DBI->connect("DBI:mysql:$dbName;host=$host",$dbUser,$dbPw,{RaiseError=> 1}) or die("Failed to connect to database - $DBI::errstr");
-       return $dbh;
-}
-
-sub getCuratorProperties(){
-       print "Enter curator lab name or press 'enter'. Default is Jaiswal Lab.\n";
-       $labName = <STDIN>;
-       if($labName eq "\n"){
-               $labName = "Jaiswal lab";
-       }
-       print "Enter curator name or press 'enter'. Default is ''.\n";
-       $curatorName = <STDIN>;
-       if($curatorName eq "\n"){
-               $curatorName = "";
-       }
-}
-
-sub getSpeciesId(){
-       $query = "SELECT species_id from Species WHERE LOWER(`species`) = LOWER('$speciesName')";
-       $species_id = executeDbQueryAndFetchRowArray($query,$dbh);
-       if($species_id eq ""){
-               $query = "INSERT INTO Species(`species`,`NCBI_taxonomy_id`) VALUES ('$speciesName','$ncbi_id') ON DUPLICATE KEY UPDATE species='$species_name'";
-               executeDbQuery($query,$dbh);
-               return getSpeciesId();
-       }
-       else{
-               return $species_id;     
-       }               
-               
-}
-sub showUsage(){
-       print "Usage: \n\tperl interactionPathsFromsif <sifFileName>\n";        
-}
-
-sub getSpeciesProperties(){
-       print "Enter the name of the species\n";
-       $speciesName = <STDIN>;
-       $speciesName ne "\n" || die "Species name can not be empty";
-       
-       print "Enter the ncbi taxonomy id of the species\n";
-       $ncbi_id = <STDIN>;
-       if($ncbi_id eq "\n"){
-               $ncbi_id = "";
-               }       
-}
-sub importSifData(){
-       print "reading seed interaction information from $fileName\n";
-
-       open(interactions_file, "$fileName") || die "Error: file '$fileName' can not be opened\n";
-       $species_id = getSpeciesId();
-       print "Species id: ".$species_id;
-       while(<interactions_file>){
-               my $entry = $_;
-               # strip off newline characters
-               $entry =~ s/\r//g;
-      $entry =~ s/\n//g;
-      # split the columns into separate variables
-      my ($accession_left,$interaction_type,$accession_right) = split("\t", $entry);
-      # skip if any of the variables are empty
-      next if(!defined($accession_left) || $accession_left eq "");
-               next if(!defined($interaction_type) || $interaction_type eq "");
-               next if(!defined($accession_right) || $accession_right eq "");
-               
-               print $accession_left."\t".$interaction_type."\t".$accession_right."\n";
-       }
-}
-
-if($#ARGV != 0){
-       print "Invalid inputs\n";
-       showUsage();
-}
-
-$fileName = $ARGV[0];
-$labName="";
-$userName="";
-$speciesName="";
-$ncbi_id = "";
-$dbh = "";
-if(index($fileName,".sif") != -1){
-       print "Make sure you have the following information:\n";
-       print "\tspecies to which the gene accessions in the .sif file belong to\n";
-       print "\tncbi id of the species\n";
-       getCuratorProperties();
-       getSpeciesProperties();
-       $dbh = setUpDBConnection();
-       importSifData();
-}
-else{
-       print "File entered is not a .sif file\n";
-       showUsage();
-}
diff --git a/Personnel/athreyab/interactions/interaction_final.png b/Personnel/athreyab/interactions/interaction_final.png
deleted file mode 100644 (file)
index b21858b..0000000
Binary files a/Personnel/athreyab/interactions/interaction_final.png and /dev/null differ