From: athreyab Date: Mon, 23 Jan 2012 01:29:52 +0000 (+0000) Subject: modified interaction.pl to suit both sif and tsv format X-Git-Url: http://gitweb.planteome.org/?a=commitdiff_plain;h=8683398a6b24e8a6253a012783e409f6f9367bf7;p=old-jaiswallab-svn%2F.git modified interaction.pl to suit both sif and tsv format svn path=/; revision=270 --- diff --git a/Personnel/athreyab/interactions/gene_IDs.tsv b/Personnel/athreyab/interactions/gene_IDs.tsv index a2b0c1d..3ba6215 100644 --- a/Personnel/athreyab/interactions/gene_IDs.tsv +++ b/Personnel/athreyab/interactions/gene_IDs.tsv @@ -20,7 +20,3 @@ LOC_Os08g34580 yes no TPS8 trehalose-6-phosphate synthase 8 OsTPS8 Oryza sativa LOC_Os09g25890 yes no TPS9 trehalose-6-phosphate synthase 9 OsTPS9 Oryza sativa 4530 LOC_Os09g23350 yes no TPS10 trehalose-6-phosphate synthase 10 OsTPS10 Oryza sativa 4530 LOC_Os09g20990 yes no TPS11 trehalose-6-phosphate synthase 11 OsTPS11 Oryza sativa 4530 - - - -* value = yes/no diff --git a/Personnel/athreyab/interactions/interactionPathsFromSif.pl b/Personnel/athreyab/interactions/interactionPathsFromSif.pl index 37709de..90406ec 100644 --- a/Personnel/athreyab/interactions/interactionPathsFromSif.pl +++ b/Personnel/athreyab/interactions/interactionPathsFromSif.pl @@ -155,14 +155,14 @@ sub importSifData next if(isEmpty($accession_right)); #get object_id for object in the first column - $accession_left_id = getObjectIdFromAccessionLeft($accession_left); + $accession_left_id = getObjectIdFromAccession($accession_left); #get interaction_type_id for object in the first column $interaction_type_id = getInteractionTypeForSifFormat($interaction_type); #get object_id for object in the second column - $accesion_right_id = getObjectIdFromAccessionRight($accession_right); - print "$accession_left\t$accession_right\n"; - print "$accession_left_id\t$interaction_type_id\t$accession_right_id\n"; + $accesion_right_id = getObjectIdFromAccession($accession_right); + next if(isEmpty($accession_left_id) || isEmpty($accession_right_id) || isEmpty($interaction_type_id)); + #check if the interaction is already recorded. Write to database only if it doesn't exist already. $interactionId = getInteractionId($accession_left_id,$interaction_type_id,$accession_right_id,$curator_id); if(isEmpty($interactionId)){ diff --git a/Personnel/athreyab/interactions/interactionPathsFromTsv.pl b/Personnel/athreyab/interactions/interactionPathsFromTsv.pl index a70da8b..7bb8152 100644 --- a/Personnel/athreyab/interactions/interactionPathsFromTsv.pl +++ b/Personnel/athreyab/interactions/interactionPathsFromTsv.pl @@ -30,30 +30,34 @@ sub importSeedInteractionData(){ $entry =~ s/\n//g; # split the columns into separate variables my ($obj_abbr_left,$int_type_left,$mode_of_action,$obj_abbr_right,$int_type_right,$interaction_type,$evidence,$evidence_code,$experiment,$comments) = split("\t", $entry); - # skip if any of the variables are empty - next if(!defined($obj_abbr_left) || $obj_abbr_left eq ""); - next if(!defined($obj_abbr_right) || $obj_abbr_right eq ""); - next if(!defined($int_type_left) || $int_type_left eq ""); - next if(!defined($int_type_right) || $int_type_right eq ""); - next if(!defined($mode_of_action) || $mode_of_action eq ""); - next if(!defined($interaction_type) || $interaction_type eq ""); - next if(!defined($evidence) || $evidence eq ""); - next if(!defined($evidence_code) || $evidence_code eq ""); - next if(!defined($experiment) || $experiment eq ""); + #we need all the 3 values before we can insert data into the interaction table. + next if(isEmpty($obj_abbr_left)); + next if(isEmpty($obj_abbr_right)); + next if(isEmpty($interaction_type)); #get ids(primary keys) from other tables $obj_id_left = getObjectIdFromAbbreviation($obj_abbr_left); $obj_id_right = getObjectIdFromAbbreviation($obj_abbr_right); $interaction_type_id = getInteractionTypeId($interaction_type); - #we need a minimum of the above 3 values before we can insert data into the interaction table. - next if(!defined($obj_id_left) || $obj_id_left eq "" || !defined($obj_id_right) || $obj_id_right eq "" || !defined($interaction_type_id) || $interaction_type_id eq ""); + #if any of the above values are empty, it means they are empty. print that message and move to the next line + if(isEmpty($obj_id_left)){ + print "$obj_abbr_left was not found in the db. Skipping this line\n"; + next; + } + elsif(isEmpty($obj_id_right)){ + print "$obj_abbr_right was not found in the db. Skipping this line\n"; + next; + } + elsif(isEmpty($interaction_type_id)){ + print "$interaction_type was not found in the db. Skipping this line\n"; + next; + } $int_type_id_left = getInteractorTypeId($int_type_left); $int_type_id_right = getInteractorTypeId($int_type_right); $mode_of_action_id = getModeOfActionId($mode_of_action); $evidence_id = getEvidenceId($evidence); $evidence_code_id = getEvidenceCodeId($evidence_code); $experiment_id = getExperimentId($experiment); - $curator_id = getCuratorId(); print $obj_id_left."\t".$obj_id_right."\t".$int_type_id_left."\t".$int_type_id_right."\t".$interaction_type_id. "\t".$evidence_id."\t".$evidence_code_id."\t".$experiment_id.$curator_id."\n"; @@ -78,48 +82,45 @@ sub importGeneInformation(){ $entry =~ s/\n//g; my ($accession, $is_gene,$is_metabolite,$symbol,$full_name,$synonym,$species_name,$ncbi_id) = split("\t", $entry); - - next if(!defined($accession) || $accession eq ""); - next if((!defined($is_gene) || $is_gene eq "") && (!defined($is_metabolite) || $is_metabolite eq "")); - next if(!defined($symbol) || $symbol eq ""); - next if(!defined($full_name) || $full_name eq ""); - next if(!defined($synonym) || $synonym eq ""); - next if(!defined($species_name) || $species_name eq ""); - next if(!defined($ncbi_id) || $ncbi_id eq ""); - - $object_type=1; + #skip the line if accession value is empty + next if(isEmpty($accession)); + $object_type=1; if($is_metabolite eq "yes"){ $object_type=2; } - - #first, insert the species name into the species table. - $query = "INSERT INTO Species(`species`,`NCBI_taxonomy_id`) VALUES ('$species_name','$ncbi_id') ON DUPLICATE KEY UPDATE species='$species_name'"; - executeDbQuery($query); - #get the species_id - $query = "SELECT species_id FROM Species WHERE species = '$species_name'"; - $species_id = executeDbQueryAndFetchRowArray($query); + $species_id = getSpeciesId($species_name,$ncbi_id); + #if the species is not present already, insert it + if(isEmpty($species_id)){ + if(!isEmpty($ncbi_id)){ + $query = "INSERT INTO Species(`species`,`NCBI_taxonomy_id`) VALUES ('$species_name','$ncbi_id')"; + } + else{ + $query = "INSERT INTO Species(`species`) VALUES ('$species_name')"; + } + executeDbQuery($query); + $species_id = getSpeciesId($species_name,$ncbi_id); + } - #insert synonym into the synonym table. - $query = "INSERT INTO Synonym(`synonym`) VALUES('$synonym') ON DUPLICATE KEY UPDATE synonym='$synonym'"; - executeDbQuery($query); - - #get the synonym_id - $query = "SELECT synonym_id FROM Synonym WHERE synonym = '$synonym'"; - $synonym_id = executeDbQueryAndFetchRowArray($query); + #gets synonym_id if exists already. else, inserts synonym and returns the id. + $synonym_id = getSynonymId($synonym); - #insert gene information into the object table - $query = "INSERT INTO Object(`object_accession`,`species_id`,`object_full_name`,`object_abbreviation`,`object_type_id`) VALUES('$accession','$species_id','$full_name','$symbol','$object_type') ON DUPLICATE KEY UPDATE object_accession='$accession'"; + $query = "INSERT INTO Object(`object_accession`,`species_id`,`object_full_name`,`object_abbreviation`,`object_type_id`) VALUES('$accession','$species_id','$full_name','$symbol','$object_type') ON DUPLICATE KEY UPDATE object_accession='$accession' "; + if(!isEmpty($full_name)){ + $query = $query." , object_full_name='$full_name'"; + } + if(!isEmpty($full_name)){ + $query = $query." , object_abbreviation='$symbol'"; + } + if(!isEmpty($object_type)){ + $query = $query." , object_type_id='$object_type'"; + } executeDbQuery($query); - - #get the object_id - $query = "SELECT object_id FROM Object WHERE object_accession = '$accession'"; - $object_id = executeDbQueryAndFetchRowArray($query); + $object_id = getObjectIdFromAccession($accession); #next, insert object_id and synonym_id into object_synonym table. - $query = "INSERT INTO Object_synonym(`object_id`,`synonym_id`) VALUES ('$object_id','$synonym_id') ON DUPLICATE KEY UPDATE object_id='$object_id'"; + $query = "INSERT INTO Object_synonym(`object_id`,`synonym_id`) VALUES ('$object_id','$synonym_id')"; executeDbQuery($query); - } } @@ -128,10 +129,12 @@ sub importGeneInformation(){ if($#ARGV != 1){ print "Invalid inputs\n"; showUsage(); + die; } $optionId = $ARGV[0]; $fileName = $ARGV[1]; +$curatorId = ""; $labName = ""; $userName = ""; $dbh = setUpDBConnection(); @@ -142,7 +145,7 @@ switch($optionId){ } case "2" { - getCuratorProperties(); + $curatorId = getCuratorId(getCuratorProperties()); importSeedInteractionData(); } else{