#!/usr/bin/perl
+use utf8;
use strict;
use warnings;
+use Encode qw(encode_utf8);
if ($#ARGV !=3) {
print "usage: po_insert_translations.pl language translation_file OBO_file output_file\n";
my $line_end;
if($lang eq "SP") {
- $line_end = "EXACT Spanish [POC:mag]";
+ $line_end = "(Spanish)\" EXACT Spanish [POC:Maria_Alejandra_Gandolfo]";
}elsif($lang eq "JP") {
- $line_end = "EXACT Japanese [NIG:yy]";
+ $line_end = "(Japanese)\" EXACT Japanese [NIG:Yukiko_Yamazaki]";
}
my %lang_hash;
my $line = $_;
chomp $line;
my ($term, $name, $translation, $defn) = split("\t", $line);
- $lang_hash{$term} = $translation;
+ $translation =~ s/\"//g;
+ if(defined($lang_hash{$term})) {
+ $lang_hash{$term} = "$lang_hash{$term}\t$translation";
+ }else{
+ $lang_hash{$term} = $translation;
+ }
+}
+
+# need to make sure that the translations are in alphabetical order
+foreach my $key (keys %lang_hash) {
+ my @translations = split("\t", $lang_hash{$key});
+ my @sorted_translations = sort {lc($a) cmp lc($b)} @translations;
+ my $length = @sorted_translations;
+ $lang_hash{$key} = "$sorted_translations[0]";
+ for (my $i=1; $i<$length; $i++) {
+ $lang_hash{$key} = "$lang_hash{$key}\t$sorted_translations[$i]";
+ }
}
+
open(in_File,"$obo_file");
open(output_File,">$out_file");
if ($line_curr =~ m/^synonym:/) {
$found_synonyms = 1;
if(defined($lang_hash{$po_id})) {
- my $new_synonym = "synonym: \"$lang_hash{$po_id}\" $line_end";
- if (lc($new_synonym) lt lc($line_curr) && lc($new_synonym) gt lc($line_prev)) { # need to lowercase the string otherwise the ascii string compare will not work right
- print output_File "$new_synonym\n";
- }elsif( $new_synonym eq $line_curr) {
- # translation already in file
- next;
+ foreach my $new_synonym(split("\t",$lang_hash{$po_id})) {
+ my $new_synonym_line = "synonym: \"$new_synonym $line_end";
+ if (lc($new_synonym_line) lt lc($line_curr) && lc($new_synonym_line) gt lc($line_prev)) { # need to lowercase the string otherwise the ascii string compare will not work right
+ print output_File "$new_synonym_line\n";
+ }elsif( $new_synonym_line eq $line_curr) {
+ # translation already in file
+ next;
+ }
}
}else{
- print "po id not found\t$po_id\n";
+ #print "po id not found\t$po_id\n";
}
}
+
+
my $line_curr_identifier = "no match";
$line_curr =~ m/^(\w+)\:/;
if(defined($1)) {
if($count_before && $count_after) {
if(defined($lang_hash{$po_id})) {
if(!$found_synonyms) {
- my $new_synonym = "synonym: \"$lang_hash{$po_id}\" $line_end";
- print output_File "$new_synonym\n";
+ foreach my $new_synonym(split("\t",$lang_hash{$po_id})) {
+ my $new_synonym_line = "synonym: \"$new_synonym $line_end";
+ print output_File "$new_synonym_line\n";
+ }
}
}else{
- print "po id not found\t$po_id\n";
+ #print "po id not found\t$po_id\n";
+ }
+ }
+
+ # put in section to deal with Japanese chars.
+ # Japanese characters are utf-8, and should come after the ascii chars of spanish
+ if ($line_prev =~ m/^synonym:/ && $count_after) {
+ if(defined($lang_hash{$po_id})) {
+ foreach my $new_synonym(split("\t",$lang_hash{$po_id})) {
+ my $new_synonym_line = "synonym: \"$new_synonym $line_end";
+ print output_File "$new_synonym_line\n";
+ }
}
}