Hello!

To see the file structure, click on "tree".

Note that updates take place every 10 minutes, commits may not be seen immediately.
Update to spelled out names, and fix if translation already in file
authorelserj <elserj@localhost>
Fri, 6 Jan 2012 18:14:16 +0000 (18:14 +0000)
committerelserj <elserj@localhost>
Fri, 6 Jan 2012 18:14:16 +0000 (18:14 +0000)
svn path=/; revision=254

interactome_scripts/po_insert_translations.pl

index 38f2df8163c1f76923bab13fe614bdb55498fc24..4c542e4ffa02306fa63593614d18145eb37446e3 100755 (executable)
@@ -1,7 +1,9 @@
 #!/usr/bin/perl
 
+use utf8;
 use strict;
 use warnings;
+use Encode qw(encode_utf8);
 
 if ($#ARGV !=3) {
        print "usage: po_insert_translations.pl language translation_file OBO_file output_file\n";
@@ -15,9 +17,9 @@ my $out_file = $ARGV[3];
 
 my $line_end;
 if($lang eq "SP") {
-       $line_end = "EXACT Spanish [POC:mag]";
+       $line_end = "(Spanish)\" EXACT Spanish [POC:Maria_Alejandra_Gandolfo]";
 }elsif($lang eq "JP") {
-       $line_end = "EXACT Japanese [NIG:yy]";
+       $line_end = "(Japanese)\" EXACT Japanese [NIG:Yukiko_Yamazaki]";
 }
 
 my %lang_hash;
@@ -27,8 +29,25 @@ while(<language_File>) {
        my $line = $_;
        chomp $line;
        my ($term, $name, $translation, $defn) = split("\t", $line);
-       $lang_hash{$term} = $translation;
+       $translation =~ s/\"//g;
+       if(defined($lang_hash{$term})) {
+               $lang_hash{$term} = "$lang_hash{$term}\t$translation";
+       }else{
+               $lang_hash{$term} = $translation;
+       }
+}
+
+# need to make sure that the translations are in alphabetical order
+foreach my $key (keys %lang_hash) {
+       my @translations = split("\t", $lang_hash{$key});
+       my @sorted_translations = sort {lc($a) cmp lc($b)} @translations;
+       my $length = @sorted_translations;
+       $lang_hash{$key} = "$sorted_translations[0]";
+       for (my $i=1; $i<$length; $i++) {
+               $lang_hash{$key} = "$lang_hash{$key}\t$sorted_translations[$i]";
+       }
 }
+       
 
 open(in_File,"$obo_file");
 open(output_File,">$out_file");
@@ -63,18 +82,22 @@ while(<in_File>) {
        if ($line_curr =~ m/^synonym:/) {
                $found_synonyms = 1;
                if(defined($lang_hash{$po_id})) {
-                       my $new_synonym = "synonym: \"$lang_hash{$po_id}\" $line_end";
-                       if (lc($new_synonym) lt lc($line_curr) && lc($new_synonym) gt lc($line_prev)) {  # need to lowercase the string otherwise the ascii string compare will not work right
-                               print output_File "$new_synonym\n";
-                       }elsif( $new_synonym eq $line_curr) {
-                               # translation already in file
-                               next;
+                       foreach my $new_synonym(split("\t",$lang_hash{$po_id})) {
+                               my $new_synonym_line = "synonym: \"$new_synonym $line_end";
+                               if (lc($new_synonym_line) lt lc($line_curr) && lc($new_synonym_line) gt lc($line_prev)) {  # need to lowercase the string otherwise the ascii string compare will not work right
+                                       print output_File "$new_synonym_line\n";
+                               }elsif( $new_synonym_line eq $line_curr) {
+                                       # translation already in file
+                                       next;
+                               }
                        }
                }else{
-                       print "po id not found\t$po_id\n";
+                       #print "po id not found\t$po_id\n";
                }
        }
        
+
+       
        my $line_curr_identifier = "no match";
        $line_curr =~ m/^(\w+)\:/;
        if(defined($1)) {
@@ -93,11 +116,24 @@ while(<in_File>) {
        if($count_before && $count_after) {
                if(defined($lang_hash{$po_id})) {
                        if(!$found_synonyms) {
-                               my $new_synonym = "synonym: \"$lang_hash{$po_id}\" $line_end";
-                               print output_File "$new_synonym\n";
+                               foreach my $new_synonym(split("\t",$lang_hash{$po_id})) {
+                                       my $new_synonym_line = "synonym: \"$new_synonym $line_end";
+                                       print output_File "$new_synonym_line\n";
+                               }
                        }
                }else{
-                       print "po id not found\t$po_id\n";
+                       #print "po id not found\t$po_id\n";
+               }
+       }
+
+       # put in section to deal with Japanese chars.
+       #  Japanese characters are utf-8, and should come after the ascii chars of spanish
+       if ($line_prev =~ m/^synonym:/ && $count_after) {
+               if(defined($lang_hash{$po_id})) {
+                       foreach my $new_synonym(split("\t",$lang_hash{$po_id})) {
+                               my $new_synonym_line = "synonym: \"$new_synonym $line_end";
+                               print output_File "$new_synonym_line\n";
+                       }
                }
        }