From 161a33fbf7bdf069a3c291a63e9d7ae0d8362cfc Mon Sep 17 00:00:00 2001 From: athreyab Date: Tue, 28 Feb 2012 22:44:18 +0000 Subject: [PATCH] committing missed files and minor changes to the sif script svn path=/; revision=303 --- .../athreyab/interactions/interactionPath.pl | 268 ++++++++++++++++++ .../interactions/interactionPathsFromSif.pl | 11 +- .../athreyab/interactions/species_ncbi.pl | 36 +++ 3 files changed, 314 insertions(+), 1 deletion(-) create mode 100644 Personnel/athreyab/interactions/interactionPath.pl create mode 100644 Personnel/athreyab/interactions/species_ncbi.pl diff --git a/Personnel/athreyab/interactions/interactionPath.pl b/Personnel/athreyab/interactions/interactionPath.pl new file mode 100644 index 0000000..c3000d4 --- /dev/null +++ b/Personnel/athreyab/interactions/interactionPath.pl @@ -0,0 +1,268 @@ + +use DBI; +use Config::IniFiles; + + +$dbh = setUpDBConnection(); +$species_ncbi_hash={}; +loadSpeciesNcbiHash(); +#check of empty or undefined string +sub isEmpty +{ + ($arg) = @_; + return (!defined($arg) || $arg eq ""); +} + +#executes a db query. doesn't return anything - for insert/update +sub executeDbQuery{ + ($query) = @_; + $stmnt = $dbh->prepare("$query"); + #print $query; + $stmnt->execute() or die "Couldn't execute statement: " . $stmnt->errstr; +} + +#executes a db query and returns the first row as an array. +sub executeDbQueryAndFetchRowArray{ + ($query) = @_; + $stmnt = $dbh->prepare("$query"); + $stmnt->execute() or die "Couldn't execute statement: " . $stmnt->errstr; + return $stmnt->fetchrow_array(); +} + +#get object_id from Object table if object_abbreviation is given +sub getObjectIdFromAbbreviation{ + ($obj_abbr) = @_; + $query = "SELECT object_id FROM Object WHERE object_abbreviation = '$obj_abbr'"; + return executeDbQueryAndFetchRowArray($query); +} + +#get object_id from Object table if accession id is given +sub getObjectIdFromAccession{ + ($obj_acc) = @_; + $query = "SELECT object_id FROM Object WHERE object_accession = '$obj_acc'"; + $obj_id = executeDbQueryAndFetchRowArray($query); + return $obj_id; +} + +#get interactor type ID from Interactor_type table. +#possible interactor types - DNA, RNA, gene, metabolite, protein, NA +sub getInteractorTypeId{ + ($int_type) = @_; + $query = "SELECT interactor_type_id FROM Interactor_type WHERE interactor_type = '$int_type'"; + $int_type_id = executeDbQueryAndFetchRowArray($query); + return $int_type_id; +} + +#get interaction type ID from Interaction_type table. +#possible interactor types - protein-protein binding, down regulation, up regulation, co-expression +sub getInteractionTypeId{ + ($interaction_type) = @_; + $query = "SELECT interaction_type_id FROM Interaction_type WHERE interaction_type = '$interaction_type'"; + $interaction_type_id = executeDbQueryAndFetchRowArray($query); + return $interaction_type_id; +} + +#get synonym id from Synonym table. If not present already, insert it. +sub getSynonymId{ + ($synonym) = @_; + $query = "SELECT synonym_id FROM Synonym WHERE synonym = '$synonym'"; + $synonym_id = executeDbQueryAndFetchRowArray($query); + if(isEmpty($synonym_id)){ + $query = "INSERT INTO Synonym(`synonym`) VALUES('$synonym')"; + executeDbQuery($query); + return getSynonymId($synonym); + } + + return $synonym_id; +} + +sub getModeOfActionId{ + ($mode_of_action) = @_; + $query = "SELECT mode_of_action_id FROM Mode_of_action WHERE mode_of_action = '$mode_of_action'"; + $mode_of_action_id = executeDbQueryAndFetchRowArray($query); + return $mode_of_action_id; +} + +#get evidence id. If it doesn't exist already, insert into the table +sub getEvidenceId{ + ($evidence) = @_; + @evidenceArray = split(/:/,$evidence); + $query = "SELECT evidence_id FROM Evidence WHERE source_name = '$evidenceArray[0]' and source_id='$evidenceArray[1]'"; + $evidence_id = executeDbQueryAndFetchRowArray($query); + if($evidence_id eq ""){ + $query = "INSERT INTO Evidence(`source_name`,`source_id`) VALUES('$evidenceArray[0]','$evidenceArray[1]')"; + executeDbQuery($query); + return getEvidenceId($evidence); + } + return $evidence_id; +} + +sub getEvidenceCodeId{ + ($evidence_code) = @_; + $query = "SELECT evidence_code_id FROM Evidence_code WHERE evidence_code = '$evidence_code'"; + $evidence_code_id = executeDbQueryAndFetchRowArray($query); + return $evidence_code_id; +} + + +sub getExperimentId{ + ($experiment) = @_; + $query = "SELECT experiment_id FROM Experiment WHERE experiment_name = '$experiment'"; + $experiment_id = executeDbQueryAndFetchRowArray($query); + return $experiment_id; +} + +#get interaction id from Interaction Table. +sub getInteractionId{ + ($accession_left_id,$interaction_type_id,$accession_right_id,$curatorId) = @_; + $query = "SELECT interaction_id from Interaction where `object_id_left` = '$accession_left_id' and `object_id_right` = '$accession_right_id' and `interaction_type_id` = '$interaction_type_id' and `curator_id` = '$curatorId'"; + $interaction_id = executeDbQueryAndFetchRowArray($query); + if(!isEmpty($interaction_id)){ + return $interaction_id; + } + #if the interaction type is neither up-regulation nor down-regulation, the interaction is not + #direction specific.try flipping the objects and see if the interaction exists.if yes, return that interaction_id + elsif($interaction_type_id ne 2 || $interaction_type_id ne 3){ + $query = "SELECT interaction_id from Interaction where `object_id_left` = '$accession_right_id' and `object_id_right` = '$accession_left_id' and `interaction_type_id` = '$interaction_type_id' and `curator_id` = '$curatorId'"; + $interaction_id = executeDbQueryAndFetchRowArray($query); + if(!isEmpty($interaction_id)){ + return $interaction_id; + } + } + return ""; +} + +sub getCuratorId{ + ($labName,$userName) = @_; + $query = "SELECT curator_id FROM Curator WHERE lab_name = '$labName' and user_name = '$userName'"; + $curator_id = executeDbQueryAndFetchRowArray($query); + if(!isEmpty($curator_id)){ + return $curator_id; + } + else{ + $query = "INSERT INTO Curator(`lab_name`,`user_name`) VALUES('$labName','$userName')"; + executeDbQuery($query); + return getCuratorId($labName,$userName); + } +} + + +sub setUpDBConnection +{ + #read config file for db config values + $ini = Config::IniFiles->new( -file => "dbConfig.ini"); + $dbUser=$ini->val(SITE_ADJECTIVE, 'dbUser'); + $dbPw=$ini->val(SITE_ADJECTIVE, 'dbPw'); + $host=$ini->val(SITE_ADJECTIVE, 'host'); + $dbName=$ini->val(SITE_ADJECTIVE, 'dbName'); + $dbh = DBI->connect("DBI:mysql:$dbName;host=$host",$dbUser,$dbPw,{RaiseError=> 1}) or die("Failed to connect to database - $DBI::errstr"); + return $dbh; +} + +sub getCuratorProperties +{ + print "Enter curator lab name or press 'enter'. Default is Jaiswal Lab.\n"; + $labName = ; + if($labName eq "\n"){ + $labName = "Jaiswal lab"; + } + print "Enter curator name or press 'enter'. Default is ''.\n"; + $curatorName = ; + if($curatorName eq "\n"){ + $curatorName = ""; + } + return ($labName,$curatorName); +} + + +sub getSpeciesProperties +{ + ($askForBoth) = @_; + print "\nEnter the name of the species\n"; + $speciesName = ; + $speciesName =~ s/\n//g; + if(isEmpty($speciesName) || $askForBoth == 1){ + print "Enter the ncbi taxonomy id of the species\n"; + $ncbi_id = ; + $ncbi_id =~ s/\n//g; + } + #just to suppress warnings + if(isEmpty($speciesName)){ + $speciesName = ""; + } + #just to suppress warnings + if(isEmpty($ncbi_id)){ + $ncbi_id = ""; + } + return ("$speciesName","$ncbi_id"); +} + +sub getSpeciesId +{ + ($speciesName,$ncbi_id) = @_; + $query = "SELECT species_id from Species WHERE LOWER(`species`) = LOWER('$speciesName')"; + $species_id = executeDbQueryAndFetchRowArray($query); + if(isEmpty($species_id) && !isEmpty($ncbi_id)){ + $query = "SELECT species_id from Species WHERE `NCBI_taxonomy_id` = '$ncbi_id'"; + $species_id = executeDbQueryAndFetchRowArray($query); + } + !isEmpty($species_id) || print "Your entry was not found. Choose from the list or check for spelling mistakes \n"; + + return $species_id; +} +sub addNewSpecies +{ + ($speciesName,$ncbi_id) = @_; + $speciesId = getSpeciesId(($speciesName,$ncbi_id)); + if(!isEmpty($speciesId)){ + print "$speciesName or $ncbi_id already exists\n"; + return $speciesId; + } + else{ + $query = "INSERT INTO Species(`species`,`NCBI_taxonomy_id`) VALUES('$speciesName','$ncbi_id')"; + executeDbQuery($query); + print "New species successfully added\n"; + return getSpeciesId($speciesName,$ncbi_id); + } +} + +sub getHomologyIdAndScore +{ + ($obj_id_l,$obj_id_r,$score) = @_; + $query = "SELECT homology_id,score from Homology where `object_id_source` = '$obj_id_l' and `object_id_projection` = '$obj_id_r'"; + ($homologyId,$score) = executeDbQueryAndFetchRowArray($query); + return ($homologyId,$score); +} + +sub loadSpeciesNcbiHash +{ + $query = 'SELECT NCBI_taxonomy_id,species FROM Species'; + $stmnt = $dbh->prepare("$query"); + $stmnt->execute() or die "Couldn't execute statement: " . $stmnt->errstr; + while(($ncbi_id,$species) = $stmnt->fetchrow_array()){ + + # skip + next if(!defined($species) || !defined($species)); + $species_ncbi_hash{"$ncbi_id"}="$species"; + } +} + +sub showAvailableSpecies +{ + print "Existing ncbi ids and species names:\n"; + $out = ""; + $i=0; + while (($key, $value) = each(%species_ncbi_hash)){ + $out = $out."\t$key:$value\t"; + #just some formatting - 3 species in a line + if($i == 1){ + $i=0; + $out=$out."\n" + } + else{ + $i=$i+1; + } + } + print $out."\n"; +} +return 1; diff --git a/Personnel/athreyab/interactions/interactionPathsFromSif.pl b/Personnel/athreyab/interactions/interactionPathsFromSif.pl index cbf4df7..2784a76 100644 --- a/Personnel/athreyab/interactions/interactionPathsFromSif.pl +++ b/Personnel/athreyab/interactions/interactionPathsFromSif.pl @@ -201,8 +201,17 @@ sub importSifData #but in the future, we may allow people other than the devs to upload the data themselves. $interactionId = getInteractionId($accession_left_id,$interaction_type_id,$accession_right_id,$curatorId); if(isEmpty($interactionId)){ - $query = "INSERT INTO Interaction(`object_id_left`,`object_id_right`,`interaction_type_id`,`curator_id`,`date`) + # if the interaction type is pp, automatically fill interactor_type_id_left, interactor_type_id_right and mode_of_action_id values appropriately. i.e - protein, protein, binds respectively + if($interaction_type == "pp"){ + $interactor_type_id = getInteractorTypeId("protein"); + $mode_of_action_id = getModeOfActionId("binds"); + $query = "INSERT INTO Interaction(`object_id_left`,`object_id_right`,`interaction_type_id`, `interactor_type_id_left`, `interactor_type_id_left`, `mode_of_action_id`, `curator_id`,`date`) + VALUES('$accession_left_id','$accession_right_id','$interaction_type_id','$interactor_type_id','$interactor_type_id','$mode_of_action_id','$curatorId', '".strftime("%Y-%m-%d", localtime)."')"; + } + else{ + $query = "INSERT INTO Interaction(`object_id_left`,`object_id_right`,`interaction_type_id`,`curator_id`,`date`) VALUES('$accession_left_id','$accession_right_id','$interaction_type_id','$curatorId', '".strftime("%Y-%m-%d", localtime)."')"; + } executeDbQuery($query); } } diff --git a/Personnel/athreyab/interactions/species_ncbi.pl b/Personnel/athreyab/interactions/species_ncbi.pl new file mode 100644 index 0000000..1317f3f --- /dev/null +++ b/Personnel/athreyab/interactions/species_ncbi.pl @@ -0,0 +1,36 @@ +use FreezeThaw qw(freeze thaw); # Import freeze() and thaw() +# Create sample object +open(species_ncbi_list, "species_ncbi_list") || die "Error: file 'species_ncbi_list' can not be opened\n"; +%species_ncbi_hash; +while(){ + $entry = $_; + # strip off newline characters + chomp $entry; + $entry =~ s/\r//g; + $entry =~ s/\n//g; + # split the columns into 3 separate variables + ($ncbi_id, $species) = split("\t", $entry); + + # skip + next if(!defined($species) || !defined($species)); + $species_ncbi_hash{"$ncbi_id"}="$species"; +} +$msg = freeze(%species_ncbi_hash); +open(File,"> frozenHash") || die "couldn't open file"; +syswrite (File, $msg, length($msg)); +print "successfully written into the file\n"; +open(frozenHash, "frozenHash") || die "Error: file frozenHash can not be opened\n"; +while(){ + $entry = $_; +} + %deserialized_data = %{ thaw($entry) }; +print length(%deserialized_data); +while(($key, $value) = each(%deserialized_data)) { + print "something \n"; + # do something with $key and $value + print "$key : $value \n"; +} + + + + -- 2.34.1