Update to spelled out names, and fix if translation already in file

author elserj <elserj@localhost>

Fri, 6 Jan 2012 18:14:16 +0000 (18:14 +0000)

committer elserj <elserj@localhost>

Fri, 6 Jan 2012 18:14:16 +0000 (18:14 +0000)
author elserj <elserj@localhost>
Fri, 6 Jan 2012 18:14:16 +0000 (18:14 +0000)
committer elserj <elserj@localhost>
Fri, 6 Jan 2012 18:14:16 +0000 (18:14 +0000)
diff --git a/interactome_scripts/po_insert_translations.pl b/interactome_scripts/po_insert_translations.pl

index 38f2df8163c1f76923bab13fe614bdb55498fc24..4c542e4ffa02306fa63593614d18145eb37446e3 100755 (executable)
--- a/interactome_scripts/po_insert_translations.pl
+++ b/interactome_scripts/po_insert_translations.pl
@@ -1,7 +1,9 @@
  #!/usr/bin/perl
  
+use utf8;
  use strict;
  use warnings;
+use Encode qw(encode_utf8);
  
  if ($#ARGV !=3) {
         print "usage: po_insert_translations.pl language translation_file OBO_file output_file\n";
@@ -15,9 +17,9 @@ my $out_file = $ARGV[3];
  
  my $line_end;
  if($lang eq "SP") {
-       $line_end = "EXACT Spanish [POC:mag]";
+       $line_end = "(Spanish)\" EXACT Spanish [POC:Maria_Alejandra_Gandolfo]";
  }elsif($lang eq "JP") {
-       $line_end = "EXACT Japanese [NIG:yy]";
+       $line_end = "(Japanese)\" EXACT Japanese [NIG:Yukiko_Yamazaki]";
  }
  
  my %lang_hash;
@@ -27,8 +29,25 @@ while(<language_File>) {
         my $line = $_;
         chomp $line;
         my ($term, $name, $translation, $defn) = split("\t", $line);
-       $lang_hash{$term} = $translation;
+       $translation =~ s/\"//g;
+       if(defined($lang_hash{$term})) {
+               $lang_hash{$term} = "$lang_hash{$term}\t$translation";
+       }else{
+               $lang_hash{$term} = $translation;
+       }
+}
+
+# need to make sure that the translations are in alphabetical order
+foreach my $key (keys %lang_hash) {
+       my @translations = split("\t", $lang_hash{$key});
+       my @sorted_translations = sort {lc($a) cmp lc($b)} @translations;
+       my $length = @sorted_translations;
+       $lang_hash{$key} = "$sorted_translations[0]";
+       for (my $i=1; $i<$length; $i++) {
+               $lang_hash{$key} = "$lang_hash{$key}\t$sorted_translations[$i]";
+       }
  }
+       
  
  open(in_File,"$obo_file");
  open(output_File,">$out_file");
@@ -63,18 +82,22 @@ while(<in_File>) {
         if ($line_curr =~ m/^synonym:/) {
                 $found_synonyms = 1;
                 if(defined($lang_hash{$po_id})) {
-                       my $new_synonym = "synonym: \"$lang_hash{$po_id}\" $line_end";
-                       if (lc($new_synonym) lt lc($line_curr) && lc($new_synonym) gt lc($line_prev)) {  # need to lowercase the string otherwise the ascii string compare will not work right
-                               print output_File "$new_synonym\n";
-                       }elsif( $new_synonym eq $line_curr) {
-                               # translation already in file
-                               next;
+                       foreach my $new_synonym(split("\t",$lang_hash{$po_id})) {
+                               my $new_synonym_line = "synonym: \"$new_synonym $line_end";
+                               if (lc($new_synonym_line) lt lc($line_curr) && lc($new_synonym_line) gt lc($line_prev)) {  # need to lowercase the string otherwise the ascii string compare will not work right
+                                       print output_File "$new_synonym_line\n";
+                               }elsif( $new_synonym_line eq $line_curr) {
+                                       # translation already in file
+                                       next;
+                               }
                         }
                 }else{
-                       print "po id not found\t$po_id\n";
+                       #print "po id not found\t$po_id\n";
                 }
         }
         
+
+       
         my $line_curr_identifier = "no match";
         $line_curr =~ m/^(\w+)\:/;
         if(defined($1)) {
@@ -93,11 +116,24 @@ while(<in_File>) {
         if($count_before && $count_after) {
                 if(defined($lang_hash{$po_id})) {
                         if(!$found_synonyms) {
-                               my $new_synonym = "synonym: \"$lang_hash{$po_id}\" $line_end";
-                               print output_File "$new_synonym\n";
+                               foreach my $new_synonym(split("\t",$lang_hash{$po_id})) {
+                                       my $new_synonym_line = "synonym: \"$new_synonym $line_end";
+                                       print output_File "$new_synonym_line\n";
+                               }
                         }
                 }else{
-                       print "po id not found\t$po_id\n";
+                       #print "po id not found\t$po_id\n";
+               }
+       }
+
+       # put in section to deal with Japanese chars.
+       #  Japanese characters are utf-8, and should come after the ascii chars of spanish
+       if ($line_prev =~ m/^synonym:/ && $count_after) {
+               if(defined($lang_hash{$po_id})) {
+                       foreach my $new_synonym(split("\t",$lang_hash{$po_id})) {
+                               my $new_synonym_line = "synonym: \"$new_synonym $line_end";
+                               print output_File "$new_synonym_line\n";
+                       }
                 }
         }
author	elserj <elserj@localhost>
	Fri, 6 Jan 2012 18:14:16 +0000 (18:14 +0000)
committer	elserj <elserj@localhost>
	Fri, 6 Jan 2012 18:14:16 +0000 (18:14 +0000)