First version of translation insertion script, not working right yet

author elserj <elserj@localhost>

Sun, 5 Jun 2011 16:47:11 +0000 (16:47 +0000)

committer elserj <elserj@localhost>

Sun, 5 Jun 2011 16:47:11 +0000 (16:47 +0000)
author elserj <elserj@localhost>
Sun, 5 Jun 2011 16:47:11 +0000 (16:47 +0000)
committer elserj <elserj@localhost>
Sun, 5 Jun 2011 16:47:11 +0000 (16:47 +0000)
diff --git a/interactome_scripts/po_insert_translations.pl b/interactome_scripts/po_insert_translations.pl

new file mode 100755 (executable)

index 0000000..9d295ab
--- /dev/null
+++ b/interactome_scripts/po_insert_translations.pl
@@ -0,0 +1,91 @@
+#!/usr/bin/perl
+
+use strict;
+use warnings;
+
+if ($#ARGV !=3) {
+       print "usage: po_insert_translations.pl language translation_file OBO_file output_file\n";
+       exit;
+}
+
+my $lang = $ARGV[0];
+my $trans_file = $ARGV[1];
+my $obo_file = $ARGV[2];
+my $out_file = $ARGV[3];
+
+my $line_end;
+if($lang eq "SP") {
+       $line_end = "EXACT Spanish [POC:mag]";
+}elsif($lang eq "JP") {
+       $line_end = "EXACT Japanese [NIG:yy]";
+}
+
+my %lang_hash;
+
+open(language_file,"$trans_file");
+while(<language_file>) {
+       my $line = $_;
+       chomp $line;
+       my ($term, $name, $translation, $defn) = split("\t", $line);
+       $lang_hash{$term} = $translation;
+}
+
+open(in_file,"$obo_file");
+open(output_file,">$out_file");
+
+my $line_prev = "";
+my $po_id;
+my $found_synonyms =0;
+
+my @before_ids = ("id", "is_anonymous", "name", "namespace", "alt_id", "def", "comment", "subset");
+my @after_ids = ("xref", "is_a", "intersection_of", "union_of", "disjoint_from", "relationship", "is_obsolete", "replaced_by", "consider");
+
+while(<in_file>) {
+       my $line_curr = $_;
+       chomp $line_curr;
+       
+#      if($line_curr =~ m/Term/) {
+#              $found_synonyms = 0;
+#      }
+       
+       if($line_curr =~ m/^id:\s+(PO:\d+)/) {
+               $po_id = $1;
+       }
+       
+       if ($line_curr =~ m/^synonym/) {
+               $found_synonyms = 1;
+               if(defined($lang_hash{$po_id})) {
+                       my $new_synonym = "synonym: \"$lang_hash{$po_id}\" $line_end";
+                       if ($new_synonym lt $line_curr && $new_synonym gt $line_prev) {
+                               print output_file "$new_synonym\n";
+                       }
+               }else{
+                       print "po id not found\t$po_id\n";
+               }
+       }
+       
+       $line_curr =~ m/^(\w+)/;
+       my $line_curr_identifier = $1;
+       
+       $line_prev =~ m/^(\w+)/;
+       my $line_prev_identifier = $1;
+       my $count_before = grep /$line_prev_identifier/, @before_ids;
+       my $count_after = grep /$line_curr_identifier/, @after_ids;
+       
+       if($count_before && $count_after) {
+               if(defined($lang_hash{$po_id})) {
+                       if(!$found_synonyms) {
+                               my $new_synonym = "synonym: \"$lang_hash{$po_id}\" $line_end";
+                               print output_file "$new_synonym\n";
+                       }
+               }else{
+                       print "po id not found\t$po_id\n";
+               }
+       }
+       
+       
+       print output_file "$line_curr\n";
+       $line_prev=$line_curr;
+       
+}
+
author	elserj <elserj@localhost>
	Sun, 5 Jun 2011 16:47:11 +0000 (16:47 +0000)
committer	elserj <elserj@localhost>
	Sun, 5 Jun 2011 16:47:11 +0000 (16:47 +0000)