Hello!

To see the file structure, click on "tree".

Note that updates take place every 10 minutes, commits may not be seen immediately.
added a few comments and support to check if the interaction already in tsv script
authorathreyab <athreyab@localhost>
Tue, 24 Jan 2012 19:51:05 +0000 (19:51 +0000)
committerathreyab <athreyab@localhost>
Tue, 24 Jan 2012 19:51:05 +0000 (19:51 +0000)
svn path=/; revision=274

Personnel/athreyab/interactions/interactionPathsFromSif.pl
Personnel/athreyab/interactions/interactionPathsFromTsv.pl

index c4cb9ce058018633f213c509659c1065f1abeeca..3a32d0ac809584115c9213cb0f3a2488df1045ac 100644 (file)
@@ -30,8 +30,11 @@ sub showUsage
 
 #interaction type in sif files is in abbreviated format - 'pp','up',''down' etc. This function retrieves the
 #full format.
+#accepts abbreviated interaction type as argument
+#returns interaction type id from db
 sub getInteractionTypeForSifFormat
 {
+       # %interaction_types = map containing abbreviated and full forms of interaction_type as a key-value pair 
        ($abbreviated_interaction_type) = @_;
        while ( ($key, $value) = each %interaction_types )
        {
@@ -42,6 +45,7 @@ sub getInteractionTypeForSifFormat
        }
        $interaction_type ne "" || die "$abbreviated_interaction_type may not be present in the db";
        
+       #get interaction type id from the db
        return getInteractionTypeId($interaction_type); 
 }
 
@@ -57,8 +61,10 @@ sub importParalogData
                chomp $entry;
       $entry =~ s/\r//g;
       $entry =~ s/\n//g;
+      
       # split the columns into 3 separate variables
       my ($obj_l, $obj_r, $score) = split("\t", $entry);
+      
       # skip
       next if(!defined($score));
       next if($score =~ /D/);
@@ -81,7 +87,7 @@ sub importParalogData
                        $obj_id_r = getObjectIdFromAccession($obj_r);
                }
 
-               #check if the homology between the accessions is already recorded. If it is already, check the scores and 
+               #check if the homology between the accessions is already recorded. If it is recorded already, check the scores and 
                #store the highest score. If the relation doesn't exist, insert into the homology table.
                ($homology_id,$origScore) = getHomologyIdAndScore($obj_id_l,$obj_id_r,$score);
                if(isEmpty($homology_id) || $origScore < $score){
@@ -101,12 +107,15 @@ sub importOrthologData
        open(paralog_file, "$fileName") || die "Error: file '$fileName' can not be opened\n";
        while(<paralog_file>){
                my $entry = $_;
+               
                # strip off newline characters
                chomp $entry;
       $entry =~ s/\r//g;
       $entry =~ s/\n//g;
+      
       # split the columns into 3 separate variables
       my ($obj_l, $obj_r, $score) = split("\t", $entry);
+      
       # skip
       next if(!defined($score));
       next if($score =~ /D/);
@@ -149,13 +158,15 @@ sub importSifData
        print "reading seed interaction information from $fileName\n";
 
        open(interactions_file, "$fileName") || die "Error: file '$fileName' can not be opened\n";
-       print "Species id: ".$speciesIdForSif."\n";
+       
        while(<interactions_file>){
                my $entry = $_;
+               
                # strip off newline characters
                chomp $entry;
                $entry =~ s/\r//g;
       $entry =~ s/\n//g;
+      
       # split the columns into separate variables
       my ($accession_left,$interaction_type,$accession_right) = split("\t", $entry);
       
@@ -175,7 +186,11 @@ sub importSifData
 
                next if(isEmpty($accession_left_id) || isEmpty($accession_right_id) || isEmpty($interaction_type_id));
                
-               #check if the interaction is already recorded. Write to database only if it doesn't exist already.
+               #check if the interaction is already recorded. Write to database only if it doesn't exist already.e
+               #an interaction is considered new under two conditions: 
+                       #a. interaction has not been recorded previously
+                       #b. interaction is already recorded, but by a different curator - this is not something that we need right now
+                               #but in the future, we may allow people other than the devs to upload the data themselves.
                $interactionId = getInteractionId($accession_left_id,$interaction_type_id,$accession_right_id,$curator_id);
                print "interaction id is: $interactionId\n";
                if(isEmpty($interactionId)){
@@ -192,7 +207,7 @@ if($optionId ne "0" && $#ARGV != 1){
        showUsage();
 }
 
-
+#some initializations
 $fileName = $ARGV[1];
 $speciesIdForSif="";
 $speciesIdForParalog="";
index 291b4cf94b65ae6f2630bac4269bfb4f503f7e33..99ea7a5a792c0e0a4abb49cc14ce4419009a4b5b 100644 (file)
@@ -21,8 +21,7 @@ sub showUsage(){
        print "Usage: \n\tperl interactionPathsFromTsv <option> <fileName>\n";
        print "options:\n\t";
        print "1 - to import gene id data\n\t";
-       print "2 - to import seed interaction data\n\t";
-       
+       print "2 - to import seed interaction data\n\t";        
 }
 
 #imports the tab separated file containing seed interaction 
@@ -32,6 +31,7 @@ sub importSeedInteractionData(){
        open(interactions_file, "$fileName") || die "Error: file '$fileName' can not be opened\n";
        while(<interactions_file>){
                my $entry = $_;
+               
                # strip off newline characters
                chomp $entry;
                $entry =~ s/\r//g;
@@ -50,7 +50,7 @@ sub importSeedInteractionData(){
       $obj_id_right = getObjectIdFromAbbreviation($obj_abbr_right);
       $interaction_type_id = getInteractionTypeId($interaction_type);
        
-       #if any of the above values are empty, it means they are empty. print that message and move to the next line
+       #if any of the above values are empty, print a message and move to the next line
                if(isEmpty($obj_id_left)){
                        print "$obj_abbr_left was not found in the db. Skipping this line\n";
                        next;
@@ -72,13 +72,17 @@ sub importSeedInteractionData(){
       $evidence_code_id = getEvidenceCodeId($evidence_code);
       $experiment_id = getExperimentId($experiment);
       
-     
-      $query = "INSERT INTO Interaction(`object_id_left`,`object_id_right`,`interaction_type_id`,`mode_of_action_id`,
-      `interactor_type_id_left`,`interactor_type_id_right`,`evidence_id`,`evidence_code_id`,`experiment_id`,`comments`,`curator_id`) 
-      VALUES ('$obj_id_left','$obj_id_right','$interaction_type_id','$mode_of_action_id','$int_type_id_left','$int_type_id_right'
-      ,'$evidence_id','$evidence_code_id','$experiment_id','$comments','$curator_id')";      
-      executeDbQuery($query);
-                               
+      #get interaction_id to see if already exists
+       $interaction_id = getInteractionId($obj_id_left,$interaction_type_id,$obj_id_right,$curator_id)
+       
+       #if not, insert data into to the database
+       if(isEmpty($interaction_id)){
+       $query = "INSERT INTO Interaction(`object_id_left`,`object_id_right`,`interaction_type_id`,`mode_of_action_id`,
+       `interactor_type_id_left`,`interactor_type_id_right`,`evidence_id`,`evidence_code_id`,`experiment_id`,`comments`,`curator_id`) 
+       VALUES ('$obj_id_left','$obj_id_right','$interaction_type_id','$mode_of_action_id','$int_type_id_left','$int_type_id_right'
+       ,'$evidence_id','$evidence_code_id','$experiment_id','$comments','$curator_id')";      
+       executeDbQuery($query);
+      }                                
        }
 }
 
@@ -117,7 +121,7 @@ sub importGeneInformation(){
                        print "New species $species_name added to the db\n";
                }
                
-               #gets synonym_id if exists already. else, inserts synonym and returns the id.
+               #gets synonym_id. The function returns the id if exists already. else, inserts synonym and returns the id.
                $synonym_id = getSynonymId($synonym);
                
                #construct the query and insert object information to the table.
@@ -155,10 +159,12 @@ $labName = "";
 $userName = "";
 $dbh = setUpDBConnection();
 switch($optionId){
+       #import gene information - accession id, synonym etc
    case "1"
        {
           importGeneInformation();
        }
+       #import interaction data
        case "2"
        {
                $curatorId = getCuratorId(getCuratorProperties());