--- /dev/null
+#!/usr/bin/perl -w
+
+#use strict;
+
+use DBI;
+use Config::IniFiles;
+use Switch;
+require "interactionPath.pl";
+
+
+#use constant SITE_ADJECTIVE => ($ENV{'DevelopmentSite'} ? 'Development': 'Live');
+use constant SITE_ADJECTIVE => 'Development';
+
+sub showUsage
+{
+ print "\n\nUsage: \n\tperl interactionsPathsFromSif <option> [<fileName>]\n";
+ print "options:\n\t";
+ print "0 - to add a new species\n\t";
+ print "1 - to import ortholog data\n\t";
+ print "2 - to import paralog data\n\t";
+ print "3 - to import seed interactions data\n\n";
+ die "";
+
+}
+
+#interaction type in sif files is in abbreviated format - 'pp','up',''down' etc. This function retrieves the
+#full format.
+sub getInteractionTypeForSifFormat
+{
+ ($abbreviated_interaction_type) = @_;
+ while ( ($key, $value) = each %interaction_types )
+ {
+ if($key eq $abbreviated_interaction_type){
+ $interaction_type = $value;
+ break;
+ }
+ }
+ $interaction_type ne "" || die "$abbreviated_interaction_type may not be present in the db";
+
+ return getInteractionTypeId($interaction_type);
+}
+
+#read paralog file and import data into object and homology tables
+sub importParalogData
+{
+ print "reading paralog data\n";
+ open(paralog_file, "$fileName") || die "Error: file '$fileName' can not be opened\n";
+ while(<paralog_file>){
+ my $entry = $_;
+ # strip off newline characters
+ $entry =~ s/\r//g;
+ $entry =~ s/\n//g;
+ # split the columns into 3 separate variables
+ my ($obj_l, $obj_r, $score) = split("\t", $entry);
+ # skip if $ident is empty or Nan
+ next if(!defined($score));
+ next if($score =~ /D/);
+ next if($score eq "");
+ next if($score =~ /\%/);
+
+ #get object id from Object for the gene accession in the first column. If it doesn't exist, insert into the table
+ $obj_id_l = getObjectIdFromAccession($obj_l);
+ if(isEmpty($obj_id_l)){
+ $query = "INSERT INTO Object(`object_accession`, `species_id`) VALUES ('$obj_l', '$speciesIdForParalog') ON DUPLICATE KEY UPDATE object_accession='$obj_l'";
+ executeDbQuery($query);
+ $obj_id_l = getObjectIdFromAccession($obj_l);
+ }
+
+ #get object id from Object for the gene accession in the second column. If it doesn't exist, insert into the table
+ $obj_id_r = getObjectIdFromAccession($obj_r);
+ if(isEmpty($obj_id_r)){
+ $query = "INSERT INTO Object(`object_accession`, `species_id`) VALUES ('$obj_r', '$speciesIdForParalog') ON DUPLICATE KEY UPDATE object_accession='$obj_r'";
+ executeDbQuery($query);
+ $obj_id_r = getObjectIdFromAccession($obj_r);
+ }
+
+ #check if the homology between the accessions is already recorded. If it is already, check the scores and
+ #store the highest score. If the relation doesn't exist, insert into the homology table.
+ ($homology_id,$origScore) = getHomologyIdAndScore($obj_id_l,$obj_id_r,$score);
+ if(isEmpty($homology_id) || $origScore < $score){
+ $query = "INSERT INTO Homology(`object_id_source`,`object_id_projection`,`score`,`homology_type_id`) VALUES('$obj_id_l','$obj_id_r','$score','2')";
+ executeDbQuery($query);
+ print "$obj_id_l\t$obj_id_r\t$score\n";
+ }
+ }
+ print "Paralog data import completed\n";
+
+}
+
+#read ortholog file and import data into object and homology tables
+sub importOrthologData
+{
+ print "reading ortholog data\n";
+ open(paralog_file, "$fileName") || die "Error: file '$fileName' can not be opened\n";
+ while(<paralog_file>){
+ my $entry = $_;
+ # strip off newline characters
+ $entry =~ s/\r//g;
+ $entry =~ s/\n//g;
+ # split the columns into 3 separate variables
+ my ($obj_l, $obj_r, $score) = split("\t", $entry);
+ # skip if $ident is empty or Nan
+ next if(!defined($score));
+ next if($score =~ /D/);
+ next if($score eq "");
+ next if($score =~ /\%/);
+
+ #get object id from Object for the gene accession in the first column. If it doesn't exist, insert into the table
+ $obj_id_l = getObjectIdFromAccession($obj_l);
+ if(isEmpty($obj_id_l)){
+ $query = "INSERT INTO Object(`object_accession`, `species_id`) VALUES ('$obj_l', '$speciesId1ForOrtholog') ON DUPLICATE KEY UPDATE object_accession='$obj_l'";
+ executeDbQuery($query);
+ $obj_id_l = getObjectIdFromAccession($obj_l);
+ }
+
+ #get object id from Object for the gene accession in the second column. If it doesn't exist, insert into the table
+ $obj_id_r = getObjectIdFromAccession($obj_r);
+ if(isEmpty($obj_id_r)){
+ $query = "INSERT INTO Object(`object_accession`, `species_id`) VALUES ('$obj_r', '$speciesId2ForOrtholog') ON DUPLICATE KEY UPDATE object_accession='$obj_r'";
+ executeDbQuery($query);
+ $obj_id_r = getObjectIdFromAccession($obj_r);
+ }
+
+ #check if the homology between the accessions is already recorded. If it is already, check the scores and
+ # store the highest score. If the relation doesn't exist, insert into the homology table.
+ ($homology_id,$origScore) = getHomologyIdAndScore($obj_id_l,$obj_id_r);
+ if(isEmpty($homology_id) || $origScore < $score){
+ $query = "INSERT INTO Homology(`object_id_source`,`object_id_projection`,`score`,`homology_type_id`) VALUES('$obj_id_l','$obj_id_r','$score','1')";
+ executeDbQuery($query);
+ print "$obj_id_l\t$obj_id_r\t$score\n";
+ }
+
+ }
+ print "Ortholog data import completed\n";
+
+}
+
+#import the seed interaction data into the database. The file is in sif format
+sub importSifData
+{
+ print "reading seed interaction information from $fileName\n";
+
+ open(interactions_file, "$fileName") || die "Error: file '$fileName' can not be opened\n";
+ print "Species id: ".$speciesIdForSif."\n";
+ while(<interactions_file>){
+ my $entry = $_;
+ # strip off newline characters
+ $entry =~ s/\r//g;
+ $entry =~ s/\n//g;
+ # split the columns into separate variables
+ my ($accession_left,$interaction_type,$accession_right) = split("\t", $entry);
+ # skip if any of the variables are empty
+ next if(isEmpty($accession_left));
+ next if(isEmpty($interaction_type));
+ next if(isEmpty($accession_right));
+
+ #get object_id for object in the first column
+ $accession_left_id = getObjectIdFromAccessionLeft($accession_left);
+ #get interaction_type_id for object in the first column
+ $interaction_type_id = getInteractionTypeForSifFormat($interaction_type);
+ #get object_id for object in the second column
+ $accesion_right_id = getObjectIdFromAccessionRight($accession_right);
+ print "$accession_left\t$accession_right\n";
+ print "$accession_left_id\t$interaction_type_id\t$accession_right_id\n";
+ next if(isEmpty($accession_left_id) || isEmpty($accession_right_id) || isEmpty($interaction_type_id));
+ #check if the interaction is already recorded. Write to database only if it doesn't exist already.
+ $interactionId = getInteractionId($accession_left_id,$interaction_type_id,$accession_right_id,$curator_id);
+ if(isEmpty($interactionId)){
+ $query = "INSERT INTO Interaction(`object_id_left`,`object_id_right`,`interaction_type_id`,`curator_id`)
+ VALUES('$accession_left_id','$accession_right_id','interaction_type_id','$curatorId')";
+ executeDbQuery($query);
+ print "$accession_left_id\t$interaction_type_id\t$accession_right_id\t$curator_id\n";
+ }
+ }
+ print "Seed interaction data import completed";
+}
+
+
+$optionId = $ARGV[0];
+if($optionId ne "0" && $#ARGV != 1){
+ showUsage();
+}
+
+
+$fileName = $ARGV[1];
+$speciesIdForSif="";
+$speciesIdForParalog="";
+$speciesId1ForOrtholog="";
+$speciesId2ForOrtholog="";
+$curatorId = "";
+%interaction_types=(pp=>"protein-protein binding",up=>"up regulation",down=>"down regulation");
+$dbh = setUpDBConnection();
+
+
+switch($optionId){
+ #add a new species to our database.
+ case "0"
+ {
+ $speciesId = addNewSpecies(getSpeciesProperties(1));
+ }
+ #import ortholog information
+ case "1"
+ {
+ print "\n\nYou are importing a file containing ortholog genes.\nMake sure you have information for both species\n";
+
+ print "Species - 1. Species to which gene ids in the first column belong to";
+ $speciesId1ForOrtholog = getSpeciesId(getSpeciesProperties(0));
+ $speciesId1ForOrtholog ne "" || die "species not found in our tables.\n";
+
+ print "\nSpecies - 2. Species to which gene ids in the second column belong to";
+ $speciesId2ForOrtholog = getSpeciesId(getSpeciesProperties(0));
+ $speciesId2ForOrtholog ne "" || die "species not found in our tables.\n";
+
+ print "$speciesId1ForOrtholog"."\t"."$speciesId2ForOrtholog"."\n";
+ importOrthologData();
+ }
+ #import paralog information
+ case "2"
+ {
+ print "\n\nYou are importing a file containing paralog genes.\nMake sure you have the species information";
+ $speciesIdForParalog = getSpeciesId(getSpeciesProperties(0));
+ $speciesIdForParalog ne "" || die "";
+ importParalogData();
+ }
+ case "3"
+ {
+ print "Make sure you have the following information:\n";
+ print "\tspecies to which the gene accessions in the .sif file belong to\n";
+ print "\tncbi id of the species\n";
+ $curatorId = getCuratorId(getCuratorProperties());
+ $speciesIdForSif = getSpeciesId(getSpeciesProperties(0));
+ $speciesIdForSif ne "" || die "";
+ importSifData();
+ }
+ else
+ {
+ print "Invalid inputs\n";
+ showUsage();
+ }
+}
+++ /dev/null
-#!/usr/bin/perl -w
-
-#use strict;
-
-use DBI;
-use Config::IniFiles;
-use Switch;
-
-
-#use constant SITE_ADJECTIVE => ($ENV{'DevelopmentSite'} ? 'Development': 'Live');
-use constant SITE_ADJECTIVE => 'Development';
-
-
-sub executeDbQuery{
- ($query, $dbh) = @_;
- $stmnt = $dbh->prepare("$query");
- $stmnt->execute() or die "Couldn't execute statement: " . $stmnt->errstr;
-}
-
-sub executeDbQueryAndFetchRowArray{
- ($query, $dbh) = @_;
- #print $query."\n";
- $stmnt = $dbh->prepare("$query");
- $stmnt->execute() or die "Couldn't execute statement: " . $stmnt->errstr;
- return $stmnt->fetchrow_array();
-}
-
-sub getObjectId{
- ($obj_abbr, $dbh) = @_;
- $query = "SELECT object_id FROM Object WHERE object_abbreviation = '$obj_abbr'";
- $obj_id = executeDbQueryAndFetchRowArray($query,$dbh);
- $obj_id ne "" or die "$obj_abbr was not found in Object table";
- return $obj_id;
-}
-
-
-sub getInteractorTypeId{
- ($int_type, $dbh) = @_;
- $query = "SELECT interactor_type_id FROM Interactor_type WHERE interactor_type = '$int_type'";
- $int_type_id = executeDbQueryAndFetchRowArray($query,$dbh);
- $int_type_id ne "" or die "$int_type was not found in Interactor_type table";
- return $int_type_id;
-}
-
-
-sub getInteractionTypeId{
- ($interaction_type, $dbh) = @_;
- $query = "SELECT interaction_type_id FROM Interaction_type WHERE interaction_type = '$interaction_type'";
- $interaction_type_id = executeDbQueryAndFetchRowArray($query,$dbh);
- $interaction_type_id ne "" or die "$interaction_type was not found in Interaction_type table";
- return $interaction_type_id;
-}
-
-
-sub getModeOfActionId{
- ($mode_of_action, $dbh) = @_;
- $query = "SELECT mode_of_action_id FROM Mode_of_action WHERE mode_of_action = '$mode_of_action'";
- $mode_of_action_id = executeDbQueryAndFetchRowArray($query,$dbh);
- $mode_of_action_id ne "" or die "$mode_of_action was not found in ModeofAction table";
- return $mode_of_action_id;
-}
-
-sub getEvidenceId{
- ($evidence, $dbh) = @_;
- @evidenceArray = split(/:/,$evidence);
- scalar(@evidenceArray) == 2 or die "Incorrect evidence information. Evidence source name or id missing";
- $query = "SELECT evidence_id FROM Evidence WHERE source_name = '$evidenceArray[0]' and source_id='$evidenceArray[1]'";
- $evidence_id = executeDbQueryAndFetchRowArray($query,$dbh);
- if($evidence_id eq ""){
- $query = "INSERT INTO Evidence(`source_name`,`source_id`) VALUES('$evidenceArray[0]','$evidenceArray[1]')";
- executeDbQuery($query,$dbh);
- return getEvidenceId($evidence,$dbh);
- }
- return $evidence_id;
-}
-
-sub getEvidenceCodeId{
- ($evidence_code, $dbh) = @_;
- $query = "SELECT evidence_code_id FROM Evidence_code WHERE evidence_code = '$evidence_code'";
- $evidence_code_id = executeDbQueryAndFetchRowArray($query,$dbh);
- $evidence_code_id ne "" or die "$evidence_code was not found in Evidence table";
- return $evidence_code_id;
-}
-
-
-sub getExperimentId{
- ($experiment, $dbh) = @_;
- $query = "SELECT experiment_id FROM Experiment WHERE experiment_name = '$experiment'";
- $experiment_id = executeDbQueryAndFetchRowArray($query,$dbh);
- $experiment_id ne "" or die "$experiment was not found in Experiment table";
- return $experiment_id;
-}
-
-sub getCuratorId{
- ($labName,$userName) = @_;
-
- $query = "SELECT curator_id FROM Curator WHERE lab_name = '$labName' and user_name = '$userName'";
- $curator_id = executeDbQueryAndFetchRowArray($query,$dbh);
- if($curator_id ne ""){
- return $curator_id;
- }
- else{
- $query = "INSERT INTO Curator(`lab_name`,`user_name`) VALUES('$labName','$userName')";
- executeDbQuery($query,$dbh);
- return getCuratorId($labName,$userName);
- }
-}
-
-
-sub setUpDBConnection
-{
- #read config file for db config values
- $ini = Config::IniFiles->new( -file => "/home/balaji/Documents/code/interactions/dbConfig.ini");
- $dbUser=$ini->val(SITE_ADJECTIVE, 'dbUser');
- $dbPw=$ini->val(SITE_ADJECTIVE, 'dbPw');
- $host=$ini->val(SITE_ADJECTIVE, 'host');
- $dbName=$ini->val(SITE_ADJECTIVE, 'dbName');
- $dbh = DBI->connect("DBI:mysql:$dbName;host=$host",$dbUser,$dbPw,{RaiseError=> 1}) or die("Failed to connect to database - $DBI::errstr");
- return $dbh;
-}
-
-sub getCuratorProperties(){
- print "Enter curator lab name or press 'enter'. Default is Jaiswal Lab.\n";
- $labName = <STDIN>;
- if($labName eq "\n"){
- $labName = "Jaiswal lab";
- }
- print "Enter curator name or press 'enter'. Default is ''.\n";
- $curatorName = <STDIN>;
- if($curatorName eq "\n"){
- $curatorName = "";
- }
-}
-
-sub getSpeciesId(){
- $query = "SELECT species_id from Species WHERE LOWER(`species`) = LOWER('$speciesName')";
- $species_id = executeDbQueryAndFetchRowArray($query,$dbh);
- if($species_id eq ""){
- $query = "INSERT INTO Species(`species`,`NCBI_taxonomy_id`) VALUES ('$speciesName','$ncbi_id') ON DUPLICATE KEY UPDATE species='$species_name'";
- executeDbQuery($query,$dbh);
- return getSpeciesId();
- }
- else{
- return $species_id;
- }
-
-}
-sub showUsage(){
- print "Usage: \n\tperl interactionPathsFromsif <sifFileName>\n";
-}
-
-
-sub getSpeciesProperties(){
- print "Enter the name of the species\n";
- $speciesName = <STDIN>;
- $speciesName ne "\n" || die "Species name can not be empty";
-
- print "Enter the ncbi taxonomy id of the species\n";
- $ncbi_id = <STDIN>;
- if($ncbi_id eq "\n"){
- $ncbi_id = "";
- }
-}
-sub importSifData(){
- print "reading seed interaction information from $fileName\n";
-
- open(interactions_file, "$fileName") || die "Error: file '$fileName' can not be opened\n";
- $species_id = getSpeciesId();
- print "Species id: ".$species_id;
- while(<interactions_file>){
- my $entry = $_;
- # strip off newline characters
- $entry =~ s/\r//g;
- $entry =~ s/\n//g;
- # split the columns into separate variables
- my ($accession_left,$interaction_type,$accession_right) = split("\t", $entry);
- # skip if any of the variables are empty
- next if(!defined($accession_left) || $accession_left eq "");
- next if(!defined($interaction_type) || $interaction_type eq "");
- next if(!defined($accession_right) || $accession_right eq "");
-
- print $accession_left."\t".$interaction_type."\t".$accession_right."\n";
- }
-}
-
-if($#ARGV != 0){
- print "Invalid inputs\n";
- showUsage();
-}
-
-$fileName = $ARGV[0];
-$labName="";
-$userName="";
-$speciesName="";
-$ncbi_id = "";
-$dbh = "";
-if(index($fileName,".sif") != -1){
- print "Make sure you have the following information:\n";
- print "\tspecies to which the gene accessions in the .sif file belong to\n";
- print "\tncbi id of the species\n";
- getCuratorProperties();
- getSpeciesProperties();
- $dbh = setUpDBConnection();
- importSifData();
-}
-else{
- print "File entered is not a .sif file\n";
- showUsage();
-}