From df36e150eff904ff59713e275cd1132838bdea8c Mon Sep 17 00:00:00 2001 From: elserj Date: Fri, 6 Jan 2012 18:14:16 +0000 Subject: [PATCH] Update to spelled out names, and fix if translation already in file svn path=/; revision=254 --- interactome_scripts/po_insert_translations.pl | 62 +++++++++++++++---- 1 file changed, 49 insertions(+), 13 deletions(-) diff --git a/interactome_scripts/po_insert_translations.pl b/interactome_scripts/po_insert_translations.pl index 38f2df8..4c542e4 100755 --- a/interactome_scripts/po_insert_translations.pl +++ b/interactome_scripts/po_insert_translations.pl @@ -1,7 +1,9 @@ #!/usr/bin/perl +use utf8; use strict; use warnings; +use Encode qw(encode_utf8); if ($#ARGV !=3) { print "usage: po_insert_translations.pl language translation_file OBO_file output_file\n"; @@ -15,9 +17,9 @@ my $out_file = $ARGV[3]; my $line_end; if($lang eq "SP") { - $line_end = "EXACT Spanish [POC:mag]"; + $line_end = "(Spanish)\" EXACT Spanish [POC:Maria_Alejandra_Gandolfo]"; }elsif($lang eq "JP") { - $line_end = "EXACT Japanese [NIG:yy]"; + $line_end = "(Japanese)\" EXACT Japanese [NIG:Yukiko_Yamazaki]"; } my %lang_hash; @@ -27,8 +29,25 @@ while() { my $line = $_; chomp $line; my ($term, $name, $translation, $defn) = split("\t", $line); - $lang_hash{$term} = $translation; + $translation =~ s/\"//g; + if(defined($lang_hash{$term})) { + $lang_hash{$term} = "$lang_hash{$term}\t$translation"; + }else{ + $lang_hash{$term} = $translation; + } +} + +# need to make sure that the translations are in alphabetical order +foreach my $key (keys %lang_hash) { + my @translations = split("\t", $lang_hash{$key}); + my @sorted_translations = sort {lc($a) cmp lc($b)} @translations; + my $length = @sorted_translations; + $lang_hash{$key} = "$sorted_translations[0]"; + for (my $i=1; $i<$length; $i++) { + $lang_hash{$key} = "$lang_hash{$key}\t$sorted_translations[$i]"; + } } + open(in_File,"$obo_file"); open(output_File,">$out_file"); @@ -63,18 +82,22 @@ while() { if ($line_curr =~ m/^synonym:/) { $found_synonyms = 1; if(defined($lang_hash{$po_id})) { - my $new_synonym = "synonym: \"$lang_hash{$po_id}\" $line_end"; - if (lc($new_synonym) lt lc($line_curr) && lc($new_synonym) gt lc($line_prev)) { # need to lowercase the string otherwise the ascii string compare will not work right - print output_File "$new_synonym\n"; - }elsif( $new_synonym eq $line_curr) { - # translation already in file - next; + foreach my $new_synonym(split("\t",$lang_hash{$po_id})) { + my $new_synonym_line = "synonym: \"$new_synonym $line_end"; + if (lc($new_synonym_line) lt lc($line_curr) && lc($new_synonym_line) gt lc($line_prev)) { # need to lowercase the string otherwise the ascii string compare will not work right + print output_File "$new_synonym_line\n"; + }elsif( $new_synonym_line eq $line_curr) { + # translation already in file + next; + } } }else{ - print "po id not found\t$po_id\n"; + #print "po id not found\t$po_id\n"; } } + + my $line_curr_identifier = "no match"; $line_curr =~ m/^(\w+)\:/; if(defined($1)) { @@ -93,11 +116,24 @@ while() { if($count_before && $count_after) { if(defined($lang_hash{$po_id})) { if(!$found_synonyms) { - my $new_synonym = "synonym: \"$lang_hash{$po_id}\" $line_end"; - print output_File "$new_synonym\n"; + foreach my $new_synonym(split("\t",$lang_hash{$po_id})) { + my $new_synonym_line = "synonym: \"$new_synonym $line_end"; + print output_File "$new_synonym_line\n"; + } } }else{ - print "po id not found\t$po_id\n"; + #print "po id not found\t$po_id\n"; + } + } + + # put in section to deal with Japanese chars. + # Japanese characters are utf-8, and should come after the ascii chars of spanish + if ($line_prev =~ m/^synonym:/ && $count_after) { + if(defined($lang_hash{$po_id})) { + foreach my $new_synonym(split("\t",$lang_hash{$po_id})) { + my $new_synonym_line = "synonym: \"$new_synonym $line_end"; + print output_File "$new_synonym_line\n"; + } } } -- 2.34.1