Hello!

To see the file structure, click on "tree".

Note that updates take place every 10 minutes, commits may not be seen immediately.
modified interaction.pl to suit both sif and tsv format
authorathreyab <athreyab@localhost>
Mon, 23 Jan 2012 01:29:52 +0000 (01:29 +0000)
committerathreyab <athreyab@localhost>
Mon, 23 Jan 2012 01:29:52 +0000 (01:29 +0000)
svn path=/; revision=270

Personnel/athreyab/interactions/gene_IDs.tsv
Personnel/athreyab/interactions/interactionPathsFromSif.pl
Personnel/athreyab/interactions/interactionPathsFromTsv.pl

index a2b0c1d43711882a1c253b472b9a152172ec7907..3ba6215e52800291b9de1731dcd5f8b625ddd700 100644 (file)
@@ -20,7 +20,3 @@ LOC_Os08g34580        yes     no      TPS8    trehalose-6-phosphate synthase 8        OsTPS8  Oryza sativa
 LOC_Os09g25890 yes     no      TPS9    trehalose-6-phosphate synthase 9        OsTPS9  Oryza sativa    4530
 LOC_Os09g23350 yes     no      TPS10   trehalose-6-phosphate synthase 10       OsTPS10 Oryza sativa    4530
 LOC_Os09g20990 yes     no      TPS11   trehalose-6-phosphate synthase 11       OsTPS11 Oryza sativa    4530
-                                                       
-                                                       
-                                                       
-* value = yes/no                                                       
index 37709de1855b4abaf4c33f036d4b6ba564c7cefb..90406ecbdc5445a57fb61c76089aecc868a029e9 100644 (file)
@@ -155,14 +155,14 @@ sub importSifData
                next if(isEmpty($accession_right));
       
                #get object_id for object in the first column
-               $accession_left_id = getObjectIdFromAccessionLeft($accession_left);
+               $accession_left_id = getObjectIdFromAccession($accession_left);
                #get interaction_type_id for object in the first column
                $interaction_type_id = getInteractionTypeForSifFormat($interaction_type);
                #get object_id for object in the second column
-               $accesion_right_id = getObjectIdFromAccessionRight($accession_right);
-               print "$accession_left\t$accession_right\n";
-               print "$accession_left_id\t$interaction_type_id\t$accession_right_id\n";
+               $accesion_right_id = getObjectIdFromAccession($accession_right);
+               
                next if(isEmpty($accession_left_id) || isEmpty($accession_right_id) || isEmpty($interaction_type_id));
+
                #check if the interaction is already recorded. Write to database only if it doesn't exist already.
                $interactionId = getInteractionId($accession_left_id,$interaction_type_id,$accession_right_id,$curator_id);
                if(isEmpty($interactionId)){
index a70da8bb98b3fd30edbffd250cd66b177b19cfba..7bb8152fe1ceecb93c85abc515bbd61c77154898 100644 (file)
@@ -30,30 +30,34 @@ sub importSeedInteractionData(){
       $entry =~ s/\n//g;
       # split the columns into separate variables
       my ($obj_abbr_left,$int_type_left,$mode_of_action,$obj_abbr_right,$int_type_right,$interaction_type,$evidence,$evidence_code,$experiment,$comments) = split("\t", $entry);
-      # skip if any of the variables are empty
-      next if(!defined($obj_abbr_left) || $obj_abbr_left eq "");
-      next if(!defined($obj_abbr_right) || $obj_abbr_right eq "");
-      next if(!defined($int_type_left) || $int_type_left eq "");
-               next if(!defined($int_type_right) || $int_type_right eq "");
-               next if(!defined($mode_of_action) || $mode_of_action eq "");
-               next if(!defined($interaction_type) || $interaction_type eq "");
-               next if(!defined($evidence) || $evidence eq "");
-               next if(!defined($evidence_code) || $evidence_code eq "");
-               next if(!defined($experiment) || $experiment eq "");
+      #we need all the 3 values before we can insert data into the interaction table.
+      next if(isEmpty($obj_abbr_left));
+      next if(isEmpty($obj_abbr_right));
+               next if(isEmpty($interaction_type));
       
       #get ids(primary keys) from other tables
       $obj_id_left = getObjectIdFromAbbreviation($obj_abbr_left);
       $obj_id_right = getObjectIdFromAbbreviation($obj_abbr_right);
       $interaction_type_id = getInteractionTypeId($interaction_type);
-      #we need a minimum of the above 3 values before we can insert data into the interaction table.
-      next if(!defined($obj_id_left) || $obj_id_left eq "" || !defined($obj_id_right) || $obj_id_right eq "" || !defined($interaction_type_id) || $interaction_type_id eq "");
+       #if any of the above values are empty, it means they are empty. print that message and move to the next line
+               if(isEmpty($obj_id_left)){
+                       print "$obj_abbr_left was not found in the db. Skipping this line\n";
+                       next;
+               }
+               elsif(isEmpty($obj_id_right)){
+                       print "$obj_abbr_right was not found in the db. Skipping this line\n";
+                       next;
+               }
+               elsif(isEmpty($interaction_type_id)){
+                       print "$interaction_type was not found in the db. Skipping this line\n";
+                       next;
+               }
       $int_type_id_left = getInteractorTypeId($int_type_left);
       $int_type_id_right = getInteractorTypeId($int_type_right);
       $mode_of_action_id = getModeOfActionId($mode_of_action);
       $evidence_id = getEvidenceId($evidence);
       $evidence_code_id = getEvidenceCodeId($evidence_code);
       $experiment_id = getExperimentId($experiment);
-               $curator_id = getCuratorId();
       
       print $obj_id_left."\t".$obj_id_right."\t".$int_type_id_left."\t".$int_type_id_right."\t".$interaction_type_id.
       "\t".$evidence_id."\t".$evidence_code_id."\t".$experiment_id.$curator_id."\n";
@@ -78,48 +82,45 @@ sub importGeneInformation(){
       $entry =~ s/\n//g;
       
       my ($accession, $is_gene,$is_metabolite,$symbol,$full_name,$synonym,$species_name,$ncbi_id) = split("\t", $entry);
-      
-      next if(!defined($accession) || $accession eq "");
-               next if((!defined($is_gene) || $is_gene eq "") && (!defined($is_metabolite) || $is_metabolite eq ""));
-               next if(!defined($symbol) || $symbol eq "");
-               next if(!defined($full_name) || $full_name eq "");
-               next if(!defined($synonym) || $synonym eq "");
-               next if(!defined($species_name) || $species_name eq "");
-               next if(!defined($ncbi_id) || $ncbi_id eq "");
-               
-               $object_type=1;
+      #skip the line if accession value is empty
+      next if(isEmpty($accession));
+      $object_type=1;
                if($is_metabolite eq "yes"){
                        $object_type=2;
                        }
-
-               #first, insert the species name into the species table.         
-               $query = "INSERT INTO Species(`species`,`NCBI_taxonomy_id`) VALUES ('$species_name','$ncbi_id') ON DUPLICATE KEY UPDATE species='$species_name'";
-               executeDbQuery($query);         
                
-               #get the species_id 
-               $query = "SELECT species_id FROM Species WHERE species = '$species_name'"; 
-               $species_id = executeDbQueryAndFetchRowArray($query);   
+               $species_id = getSpeciesId($species_name,$ncbi_id);
+               #if the species is not present already, insert it               
+               if(isEmpty($species_id)){
+                       if(!isEmpty($ncbi_id)){
+                               $query = "INSERT INTO Species(`species`,`NCBI_taxonomy_id`) VALUES ('$species_name','$ncbi_id')";
+                       }
+                       else{
+                               $query = "INSERT INTO Species(`species`) VALUES ('$species_name')";
+                       }
+                       executeDbQuery($query);
+                       $species_id = getSpeciesId($species_name,$ncbi_id);
+               }
                
-               #insert synonym into the synonym table.
-               $query = "INSERT INTO Synonym(`synonym`) VALUES('$synonym') ON DUPLICATE KEY UPDATE synonym='$synonym'";
-               executeDbQuery($query); 
-                               
-               #get the synonym_id 
-               $query = "SELECT synonym_id FROM Synonym WHERE synonym = '$synonym'"; 
-               $synonym_id = executeDbQueryAndFetchRowArray($query);
+               #gets synonym_id if exists already. else, inserts synonym and returns the id.
+               $synonym_id = getSynonymId($synonym);
                
-               #insert gene information into the object table                                                                                                                                                                                                                  
-               $query = "INSERT INTO Object(`object_accession`,`species_id`,`object_full_name`,`object_abbreviation`,`object_type_id`) VALUES('$accession','$species_id','$full_name','$symbol','$object_type') ON DUPLICATE KEY UPDATE object_accession='$accession'";
+               $query = "INSERT INTO Object(`object_accession`,`species_id`,`object_full_name`,`object_abbreviation`,`object_type_id`) VALUES('$accession','$species_id','$full_name','$symbol','$object_type') ON DUPLICATE KEY UPDATE object_accession='$accession' ";
+               if(!isEmpty($full_name)){
+                       $query = $query." , object_full_name='$full_name'";     
+               }
+               if(!isEmpty($full_name)){
+                       $query = $query." , object_abbreviation='$symbol'";
+               }
+               if(!isEmpty($object_type)){
+                       $query = $query." , object_type_id='$object_type'";     
+               }
                executeDbQuery($query);
-               
-               #get the object_id 
-               $query = "SELECT object_id FROM Object WHERE object_accession = '$accession'";
-               $object_id = executeDbQueryAndFetchRowArray($query);
+               $object_id = getObjectIdFromAccession($accession);
        
                #next, insert object_id and synonym_id into object_synonym table.
-               $query = "INSERT INTO Object_synonym(`object_id`,`synonym_id`) VALUES ('$object_id','$synonym_id') ON DUPLICATE KEY UPDATE object_id='$object_id'";
+               $query = "INSERT INTO Object_synonym(`object_id`,`synonym_id`) VALUES ('$object_id','$synonym_id')";
                executeDbQuery($query);
-       
        }
 
 }
@@ -128,10 +129,12 @@ sub importGeneInformation(){
 if($#ARGV != 1){
        print "Invalid inputs\n";
        showUsage();
+       die;
 }
 
 $optionId = $ARGV[0];
 $fileName = $ARGV[1];
+$curatorId = "";
 $labName = "";
 $userName = "";
 $dbh = setUpDBConnection();
@@ -142,7 +145,7 @@ switch($optionId){
        }
        case "2"
        {
-               getCuratorProperties();
+               $curatorId = getCuratorId(getCuratorProperties());
                importSeedInteractionData();
        }
        else{