Hello!

To see the file structure, click on "tree".

Note that updates take place every 10 minutes, commits may not be seen immediately.
fixed the accession_right_id issue.
authorathreyab <athreyab@localhost>
Tue, 24 Jan 2012 19:04:36 +0000 (19:04 +0000)
committerathreyab <athreyab@localhost>
Tue, 24 Jan 2012 19:04:36 +0000 (19:04 +0000)
svn path=/; revision=273

Personnel/athreyab/interactions/interactionPathsFromSif.pl
Personnel/athreyab/interactions/interactionPathsFromTsv.pl

index 90406ecbdc5445a57fb61c76089aecc868a029e9..c4cb9ce058018633f213c509659c1065f1abeeca 100644 (file)
@@ -11,6 +11,11 @@ require "interactionPath.pl";
 #use constant SITE_ADJECTIVE => ($ENV{'DevelopmentSite'} ? 'Development': 'Live');
 use constant SITE_ADJECTIVE => 'Development';
 
+
+############First various sub routines are defined.############ 
+############Following it the main subroutine code is at the botton of the file############
+
+
 sub showUsage
 {
        print "\n\nUsage: \n\tperl interactionsPathsFromSif <option> [<fileName>]\n";
@@ -47,12 +52,14 @@ sub importParalogData
        open(paralog_file, "$fileName") || die "Error: file '$fileName' can not be opened\n";
        while(<paralog_file>){
                my $entry = $_;
+               
                # strip off newline characters
+               chomp $entry;
       $entry =~ s/\r//g;
       $entry =~ s/\n//g;
       # split the columns into 3 separate variables
       my ($obj_l, $obj_r, $score) = split("\t", $entry);
-      # skip if $ident is empty or Nan
+      # skip
       next if(!defined($score));
       next if($score =~ /D/);
       next if($score eq "");
@@ -95,11 +102,12 @@ sub importOrthologData
        while(<paralog_file>){
                my $entry = $_;
                # strip off newline characters
+               chomp $entry;
       $entry =~ s/\r//g;
       $entry =~ s/\n//g;
       # split the columns into 3 separate variables
       my ($obj_l, $obj_r, $score) = split("\t", $entry);
-      # skip if $ident is empty or Nan
+      # skip
       next if(!defined($score));
       next if($score =~ /D/);
       next if($score eq "");
@@ -145,37 +153,40 @@ sub importSifData
        while(<interactions_file>){
                my $entry = $_;
                # strip off newline characters
+               chomp $entry;
                $entry =~ s/\r//g;
       $entry =~ s/\n//g;
       # split the columns into separate variables
       my ($accession_left,$interaction_type,$accession_right) = split("\t", $entry);
-      # skip if any of the variables are empty
+      
+      # skip if any of the variables are empty      
       next if(isEmpty($accession_left));
                next if(isEmpty($interaction_type));
                next if(isEmpty($accession_right));
       
-               #get object_id for object in the first column
+      #get object_id for object in the first column
                $accession_left_id = getObjectIdFromAccession($accession_left);
+
                #get interaction_type_id for object in the first column
                $interaction_type_id = getInteractionTypeForSifFormat($interaction_type);
+
                #get object_id for object in the second column
-               $accesion_right_id = getObjectIdFromAccession($accession_right);
-               
-               next if(isEmpty($accession_left_id) || isEmpty($accession_right_id) || isEmpty($interaction_type_id));
+               $accession_right_id = getObjectIdFromAccession($accession_right);
 
+               next if(isEmpty($accession_left_id) || isEmpty($accession_right_id) || isEmpty($interaction_type_id));
+               
                #check if the interaction is already recorded. Write to database only if it doesn't exist already.
                $interactionId = getInteractionId($accession_left_id,$interaction_type_id,$accession_right_id,$curator_id);
+               print "interaction id is: $interactionId\n";
                if(isEmpty($interactionId)){
                        $query = "INSERT INTO Interaction(`object_id_left`,`object_id_right`,`interaction_type_id`,`curator_id`)
-                        VALUES('$accession_left_id','$accession_right_id','interaction_type_id','$curatorId')";
-                        executeDbQuery($query);
-                        print "$accession_left_id\t$interaction_type_id\t$accession_right_id\t$curator_id\n";                                  
+                        VALUES('$accession_left_id','$accession_right_id','$interaction_type_id','$curatorId')";
+                        executeDbQuery($query);                                        
                }
        }
-       print "Seed interaction data import completed";
+       print "Seed interaction data import completed\n";
 }
 
-
 $optionId = $ARGV[0];
 if($optionId ne "0" && $#ARGV != 1){
        showUsage();
@@ -229,7 +240,7 @@ switch($optionId){
                print "\tncbi id of the species\n";
                $curatorId = getCuratorId(getCuratorProperties());
                $speciesIdForSif = getSpeciesId(getSpeciesProperties(0));       
-               $speciesIdForSif ne "" || die "";                       
+               $speciesIdForSif ne "" || die "Species id was not found for ".getSpeciesProperties(0)."\n";                     
                importSifData();
        }
                else
index 7bb8152fe1ceecb93c85abc515bbd61c77154898..291b4cf94b65ae6f2630bac4269bfb4f503f7e33 100644 (file)
@@ -5,11 +5,18 @@
 use DBI;
 use Config::IniFiles;
 use Switch;
+
+#functions common to both interactionPathsFromSif and interactionPathsFromTsv are in this perl script file
 require "interactionPath.pl";
 
 #use constant SITE_ADJECTIVE => ($ENV{'DevelopmentSite'} ? 'Development': 'Live');
 use constant SITE_ADJECTIVE => 'Development';
 
+
+############First various sub routines are defined.############ 
+############Following it the main subroutine code is at the botton of the file############
+
+
 sub showUsage(){
        print "Usage: \n\tperl interactionPathsFromTsv <option> <fileName>\n";
        print "options:\n\t";
@@ -26,10 +33,13 @@ sub importSeedInteractionData(){
        while(<interactions_file>){
                my $entry = $_;
                # strip off newline characters
+               chomp $entry;
                $entry =~ s/\r//g;
       $entry =~ s/\n//g;
+      
       # split the columns into separate variables
       my ($obj_abbr_left,$int_type_left,$mode_of_action,$obj_abbr_right,$int_type_right,$interaction_type,$evidence,$evidence_code,$experiment,$comments) = split("\t", $entry);
+      
       #we need all the 3 values before we can insert data into the interaction table.
       next if(isEmpty($obj_abbr_left));
       next if(isEmpty($obj_abbr_right));
@@ -39,6 +49,7 @@ sub importSeedInteractionData(){
       $obj_id_left = getObjectIdFromAbbreviation($obj_abbr_left);
       $obj_id_right = getObjectIdFromAbbreviation($obj_abbr_right);
       $interaction_type_id = getInteractionTypeId($interaction_type);
+       
        #if any of the above values are empty, it means they are empty. print that message and move to the next line
                if(isEmpty($obj_id_left)){
                        print "$obj_abbr_left was not found in the db. Skipping this line\n";
@@ -52,6 +63,8 @@ sub importSeedInteractionData(){
                        print "$interaction_type was not found in the db. Skipping this line\n";
                        next;
                }
+               
+               #get various primary ids from the table.      
       $int_type_id_left = getInteractorTypeId($int_type_left);
       $int_type_id_right = getInteractorTypeId($int_type_right);
       $mode_of_action_id = getModeOfActionId($mode_of_action);
@@ -59,14 +72,11 @@ sub importSeedInteractionData(){
       $evidence_code_id = getEvidenceCodeId($evidence_code);
       $experiment_id = getExperimentId($experiment);
       
-      print $obj_id_left."\t".$obj_id_right."\t".$int_type_id_left."\t".$int_type_id_right."\t".$interaction_type_id.
-      "\t".$evidence_id."\t".$evidence_code_id."\t".$experiment_id.$curator_id."\n";
-      
+     
       $query = "INSERT INTO Interaction(`object_id_left`,`object_id_right`,`interaction_type_id`,`mode_of_action_id`,
       `interactor_type_id_left`,`interactor_type_id_right`,`evidence_id`,`evidence_code_id`,`experiment_id`,`comments`,`curator_id`) 
       VALUES ('$obj_id_left','$obj_id_right','$interaction_type_id','$mode_of_action_id','$int_type_id_left','$int_type_id_right'
-      ,'$evidence_id','$evidence_code_id','$experiment_id','$comments','$curator_id')";
-      
+      ,'$evidence_id','$evidence_code_id','$experiment_id','$comments','$curator_id')";      
       executeDbQuery($query);
                                
        }
@@ -77,20 +87,24 @@ sub importGeneInformation(){
        open(gene_file, "$fileName") || die "Error: file '$fileName' can not be opened\n";
        while(<gene_file>){
                my $entry = $_;
+               
                # strip off newline characters
+               chomp $entry;
                $entry =~ s/\r//g;
       $entry =~ s/\n//g;
       
       my ($accession, $is_gene,$is_metabolite,$symbol,$full_name,$synonym,$species_name,$ncbi_id) = split("\t", $entry);
+      
       #skip the line if accession value is empty
       next if(isEmpty($accession));
+      
       $object_type=1;
                if($is_metabolite eq "yes"){
                        $object_type=2;
                        }
                
+               #get species id, if the species is not present already, insert it
                $species_id = getSpeciesId($species_name,$ncbi_id);
-               #if the species is not present already, insert it               
                if(isEmpty($species_id)){
                        if(!isEmpty($ncbi_id)){
                                $query = "INSERT INTO Species(`species`,`NCBI_taxonomy_id`) VALUES ('$species_name','$ncbi_id')";
@@ -100,11 +114,13 @@ sub importGeneInformation(){
                        }
                        executeDbQuery($query);
                        $species_id = getSpeciesId($species_name,$ncbi_id);
+                       print "New species $species_name added to the db\n";
                }
                
                #gets synonym_id if exists already. else, inserts synonym and returns the id.
                $synonym_id = getSynonymId($synonym);
                
+               #construct the query and insert object information to the table.
                $query = "INSERT INTO Object(`object_accession`,`species_id`,`object_full_name`,`object_abbreviation`,`object_type_id`) VALUES('$accession','$species_id','$full_name','$symbol','$object_type') ON DUPLICATE KEY UPDATE object_accession='$accession' ";
                if(!isEmpty($full_name)){
                        $query = $query." , object_full_name='$full_name'";