works now, at least for Spanish

author elserj <elserj@localhost>

Wed, 6 Jul 2011 23:58:36 +0000 (23:58 +0000)

committer elserj <elserj@localhost>

Wed, 6 Jul 2011 23:58:36 +0000 (23:58 +0000)
author elserj <elserj@localhost>
Wed, 6 Jul 2011 23:58:36 +0000 (23:58 +0000)
committer elserj <elserj@localhost>
Wed, 6 Jul 2011 23:58:36 +0000 (23:58 +0000)
diff --git a/interactome_scripts/po_insert_translations.pl b/interactome_scripts/po_insert_translations.pl

index 9d295abf2b7b63eaa75a0b48eed262ca5d3b8182..38f2df8163c1f76923bab13fe614bdb55498fc24 100755 (executable)
--- a/interactome_scripts/po_insert_translations.pl
+++ b/interactome_scripts/po_insert_translations.pl
@@ -22,16 +22,16 @@ if($lang eq "SP") {
  
  my %lang_hash;
  
-open(language_file,"$trans_file");
-while(<language_file>) {
+open(language_File,"$trans_file");
+while(<language_File>) {
         my $line = $_;
         chomp $line;
         my ($term, $name, $translation, $defn) = split("\t", $line);
         $lang_hash{$term} = $translation;
  }
  
-open(in_file,"$obo_file");
-open(output_file,">$out_file");
+open(in_File,"$obo_file");
+open(output_File,">$out_file");
  
  my $line_prev = "";
  my $po_id;
@@ -40,43 +40,61 @@ my $found_synonyms =0;
  my @before_ids = ("id", "is_anonymous", "name", "namespace", "alt_id", "def", "comment", "subset");
  my @after_ids = ("xref", "is_a", "intersection_of", "union_of", "disjoint_from", "relationship", "is_obsolete", "replaced_by", "consider");
  
-while(<in_file>) {
+while(<in_File>) {
         my $line_curr = $_;
         chomp $line_curr;
         
-#      if($line_curr =~ m/Term/) {
-#              $found_synonyms = 0;
-#      }
+       if($line_curr =~ m/^\[Term\]/) {
+               $found_synonyms = 0;
+               print output_File "$line_curr\n";
+               next;
+               
+       }
+       
+       if($line_curr =~ m/^&/) {
+               print output_File "$line_curr\n";
+               next;
+       }
         
         if($line_curr =~ m/^id:\s+(PO:\d+)/) {
                 $po_id = $1;
         }
         
-       if ($line_curr =~ m/^synonym/) {
+       if ($line_curr =~ m/^synonym:/) {
                 $found_synonyms = 1;
                 if(defined($lang_hash{$po_id})) {
                         my $new_synonym = "synonym: \"$lang_hash{$po_id}\" $line_end";
-                       if ($new_synonym lt $line_curr && $new_synonym gt $line_prev) {
-                               print output_file "$new_synonym\n";
+                       if (lc($new_synonym) lt lc($line_curr) && lc($new_synonym) gt lc($line_prev)) {  # need to lowercase the string otherwise the ascii string compare will not work right
+                               print output_File "$new_synonym\n";
+                       }elsif( $new_synonym eq $line_curr) {
+                               # translation already in file
+                               next;
                         }
                 }else{
                         print "po id not found\t$po_id\n";
                 }
         }
         
-       $line_curr =~ m/^(\w+)/;
-       my $line_curr_identifier = $1;
+       my $line_curr_identifier = "no match";
+       $line_curr =~ m/^(\w+)\:/;
+       if(defined($1)) {
+               $line_curr_identifier = $1;
+       }
+       
+       my $line_prev_identifier = "no match";
+       $line_prev =~ m/^(\w+)\:/;
+       if(defined($1)) {
+               $line_prev_identifier = $1;
+       }
         
-       $line_prev =~ m/^(\w+)/;
-       my $line_prev_identifier = $1;
-       my $count_before = grep /$line_prev_identifier/, @before_ids;
-       my $count_after = grep /$line_curr_identifier/, @after_ids;
+       my $count_before = grep /^$line_prev_identifier$/, @before_ids;
+       my $count_after = grep /^$line_curr_identifier$/, @after_ids;
         
         if($count_before && $count_after) {
                 if(defined($lang_hash{$po_id})) {
                         if(!$found_synonyms) {
                                 my $new_synonym = "synonym: \"$lang_hash{$po_id}\" $line_end";
-                               print output_file "$new_synonym\n";
+                               print output_File "$new_synonym\n";
                         }
                 }else{
                         print "po id not found\t$po_id\n";
@@ -84,7 +102,7 @@ while(<in_file>) {
         }
         
         
-       print output_file "$line_curr\n";
+       print output_File "$line_curr\n";
         $line_prev=$line_curr;
         
  }
author	elserj <elserj@localhost>
	Wed, 6 Jul 2011 23:58:36 +0000 (23:58 +0000)
committer	elserj <elserj@localhost>
	Wed, 6 Jul 2011 23:58:36 +0000 (23:58 +0000)