Hello!

To see the file structure, click on "tree".

Note that updates take place every 10 minutes, commits may not be seen immediately.
First version of translation insertion script, not working right yet
authorelserj <elserj@localhost>
Sun, 5 Jun 2011 16:47:11 +0000 (16:47 +0000)
committerelserj <elserj@localhost>
Sun, 5 Jun 2011 16:47:11 +0000 (16:47 +0000)
svn path=/; revision=106

interactome_scripts/po_insert_translations.pl [new file with mode: 0755]

diff --git a/interactome_scripts/po_insert_translations.pl b/interactome_scripts/po_insert_translations.pl
new file mode 100755 (executable)
index 0000000..9d295ab
--- /dev/null
@@ -0,0 +1,91 @@
+#!/usr/bin/perl
+
+use strict;
+use warnings;
+
+if ($#ARGV !=3) {
+       print "usage: po_insert_translations.pl language translation_file OBO_file output_file\n";
+       exit;
+}
+
+my $lang = $ARGV[0];
+my $trans_file = $ARGV[1];
+my $obo_file = $ARGV[2];
+my $out_file = $ARGV[3];
+
+my $line_end;
+if($lang eq "SP") {
+       $line_end = "EXACT Spanish [POC:mag]";
+}elsif($lang eq "JP") {
+       $line_end = "EXACT Japanese [NIG:yy]";
+}
+
+my %lang_hash;
+
+open(language_file,"$trans_file");
+while(<language_file>) {
+       my $line = $_;
+       chomp $line;
+       my ($term, $name, $translation, $defn) = split("\t", $line);
+       $lang_hash{$term} = $translation;
+}
+
+open(in_file,"$obo_file");
+open(output_file,">$out_file");
+
+my $line_prev = "";
+my $po_id;
+my $found_synonyms =0;
+
+my @before_ids = ("id", "is_anonymous", "name", "namespace", "alt_id", "def", "comment", "subset");
+my @after_ids = ("xref", "is_a", "intersection_of", "union_of", "disjoint_from", "relationship", "is_obsolete", "replaced_by", "consider");
+
+while(<in_file>) {
+       my $line_curr = $_;
+       chomp $line_curr;
+       
+#      if($line_curr =~ m/Term/) {
+#              $found_synonyms = 0;
+#      }
+       
+       if($line_curr =~ m/^id:\s+(PO:\d+)/) {
+               $po_id = $1;
+       }
+       
+       if ($line_curr =~ m/^synonym/) {
+               $found_synonyms = 1;
+               if(defined($lang_hash{$po_id})) {
+                       my $new_synonym = "synonym: \"$lang_hash{$po_id}\" $line_end";
+                       if ($new_synonym lt $line_curr && $new_synonym gt $line_prev) {
+                               print output_file "$new_synonym\n";
+                       }
+               }else{
+                       print "po id not found\t$po_id\n";
+               }
+       }
+       
+       $line_curr =~ m/^(\w+)/;
+       my $line_curr_identifier = $1;
+       
+       $line_prev =~ m/^(\w+)/;
+       my $line_prev_identifier = $1;
+       my $count_before = grep /$line_prev_identifier/, @before_ids;
+       my $count_after = grep /$line_curr_identifier/, @after_ids;
+       
+       if($count_before && $count_after) {
+               if(defined($lang_hash{$po_id})) {
+                       if(!$found_synonyms) {
+                               my $new_synonym = "synonym: \"$lang_hash{$po_id}\" $line_end";
+                               print output_file "$new_synonym\n";
+                       }
+               }else{
+                       print "po id not found\t$po_id\n";
+               }
+       }
+       
+       
+       print output_file "$line_curr\n";
+       $line_prev=$line_curr;
+       
+}
+