From df36e150eff904ff59713e275cd1132838bdea8c Mon Sep 17 00:00:00 2001
From: elserj <elserj@localhost>
Date: Fri, 6 Jan 2012 18:14:16 +0000
Subject: [PATCH] Update to spelled out names, and fix if translation already
 in file

svn path=/; revision=254
---
 interactome_scripts/po_insert_translations.pl | 62 +++++++++++++++----
 1 file changed, 49 insertions(+), 13 deletions(-)

diff --git a/interactome_scripts/po_insert_translations.pl b/interactome_scripts/po_insert_translations.pl
index 38f2df8..4c542e4 100755
--- a/interactome_scripts/po_insert_translations.pl
+++ b/interactome_scripts/po_insert_translations.pl
@@ -1,7 +1,9 @@
 #!/usr/bin/perl
 
+use utf8;
 use strict;
 use warnings;
+use Encode qw(encode_utf8);
 
 if ($#ARGV !=3) {
 	print "usage: po_insert_translations.pl language translation_file OBO_file output_file\n";
@@ -15,9 +17,9 @@ my $out_file = $ARGV[3];
 
 my $line_end;
 if($lang eq "SP") {
-	$line_end = "EXACT Spanish [POC:mag]";
+	$line_end = "(Spanish)\" EXACT Spanish [POC:Maria_Alejandra_Gandolfo]";
 }elsif($lang eq "JP") {
-	$line_end = "EXACT Japanese [NIG:yy]";
+	$line_end = "(Japanese)\" EXACT Japanese [NIG:Yukiko_Yamazaki]";
 }
 
 my %lang_hash;
@@ -27,8 +29,25 @@ while(<language_File>) {
 	my $line = $_;
 	chomp $line;
 	my ($term, $name, $translation, $defn) = split("\t", $line);
-	$lang_hash{$term} = $translation;
+	$translation =~ s/\"//g;
+	if(defined($lang_hash{$term})) {
+		$lang_hash{$term} = "$lang_hash{$term}\t$translation";
+	}else{
+		$lang_hash{$term} = $translation;
+	}
+}
+
+# need to make sure that the translations are in alphabetical order
+foreach my $key (keys %lang_hash) {
+	my @translations = split("\t", $lang_hash{$key});
+	my @sorted_translations = sort {lc($a) cmp lc($b)} @translations;
+	my $length = @sorted_translations;
+	$lang_hash{$key} = "$sorted_translations[0]";
+	for (my $i=1; $i<$length; $i++) {
+		$lang_hash{$key} = "$lang_hash{$key}\t$sorted_translations[$i]";
+	}
 }
+	
 
 open(in_File,"$obo_file");
 open(output_File,">$out_file");
@@ -63,18 +82,22 @@ while(<in_File>) {
 	if ($line_curr =~ m/^synonym:/) {
 		$found_synonyms = 1;
 		if(defined($lang_hash{$po_id})) {
-			my $new_synonym = "synonym: \"$lang_hash{$po_id}\" $line_end";
-			if (lc($new_synonym) lt lc($line_curr) && lc($new_synonym) gt lc($line_prev)) {  # need to lowercase the string otherwise the ascii string compare will not work right
-				print output_File "$new_synonym\n";
-			}elsif( $new_synonym eq $line_curr) {
-				# translation already in file
-				next;
+			foreach my $new_synonym(split("\t",$lang_hash{$po_id})) {
+				my $new_synonym_line = "synonym: \"$new_synonym $line_end";
+				if (lc($new_synonym_line) lt lc($line_curr) && lc($new_synonym_line) gt lc($line_prev)) {  # need to lowercase the string otherwise the ascii string compare will not work right
+					print output_File "$new_synonym_line\n";
+				}elsif( $new_synonym_line eq $line_curr) {
+					# translation already in file
+					next;
+				}
 			}
 		}else{
-			print "po id not found\t$po_id\n";
+			#print "po id not found\t$po_id\n";
 		}
 	}
 	
+
+	
 	my $line_curr_identifier = "no match";
 	$line_curr =~ m/^(\w+)\:/;
 	if(defined($1)) {
@@ -93,11 +116,24 @@ while(<in_File>) {
 	if($count_before && $count_after) {
 		if(defined($lang_hash{$po_id})) {
 			if(!$found_synonyms) {
-				my $new_synonym = "synonym: \"$lang_hash{$po_id}\" $line_end";
-				print output_File "$new_synonym\n";
+				foreach my $new_synonym(split("\t",$lang_hash{$po_id})) {
+					my $new_synonym_line = "synonym: \"$new_synonym $line_end";
+					print output_File "$new_synonym_line\n";
+				}
 			}
 		}else{
-			print "po id not found\t$po_id\n";
+			#print "po id not found\t$po_id\n";
+		}
+	}
+
+	# put in section to deal with Japanese chars.
+	#  Japanese characters are utf-8, and should come after the ascii chars of spanish
+	if ($line_prev =~ m/^synonym:/ && $count_after) {
+		if(defined($lang_hash{$po_id})) {
+			foreach my $new_synonym(split("\t",$lang_hash{$po_id})) {
+				my $new_synonym_line = "synonym: \"$new_synonym $line_end";
+				print output_File "$new_synonym_line\n";
+			}
 		}
 	}
 	
-- 
2.34.1