From 7469d5c3011ba6d7ffdcfa8b420a0d3b2489b1ff Mon Sep 17 00:00:00 2001 From: elserj Date: Sun, 5 Jun 2011 16:47:11 +0000 Subject: [PATCH] First version of translation insertion script, not working right yet svn path=/; revision=106 --- interactome_scripts/po_insert_translations.pl | 91 +++++++++++++++++++ 1 file changed, 91 insertions(+) create mode 100755 interactome_scripts/po_insert_translations.pl diff --git a/interactome_scripts/po_insert_translations.pl b/interactome_scripts/po_insert_translations.pl new file mode 100755 index 0000000..9d295ab --- /dev/null +++ b/interactome_scripts/po_insert_translations.pl @@ -0,0 +1,91 @@ +#!/usr/bin/perl + +use strict; +use warnings; + +if ($#ARGV !=3) { + print "usage: po_insert_translations.pl language translation_file OBO_file output_file\n"; + exit; +} + +my $lang = $ARGV[0]; +my $trans_file = $ARGV[1]; +my $obo_file = $ARGV[2]; +my $out_file = $ARGV[3]; + +my $line_end; +if($lang eq "SP") { + $line_end = "EXACT Spanish [POC:mag]"; +}elsif($lang eq "JP") { + $line_end = "EXACT Japanese [NIG:yy]"; +} + +my %lang_hash; + +open(language_file,"$trans_file"); +while() { + my $line = $_; + chomp $line; + my ($term, $name, $translation, $defn) = split("\t", $line); + $lang_hash{$term} = $translation; +} + +open(in_file,"$obo_file"); +open(output_file,">$out_file"); + +my $line_prev = ""; +my $po_id; +my $found_synonyms =0; + +my @before_ids = ("id", "is_anonymous", "name", "namespace", "alt_id", "def", "comment", "subset"); +my @after_ids = ("xref", "is_a", "intersection_of", "union_of", "disjoint_from", "relationship", "is_obsolete", "replaced_by", "consider"); + +while() { + my $line_curr = $_; + chomp $line_curr; + +# if($line_curr =~ m/Term/) { +# $found_synonyms = 0; +# } + + if($line_curr =~ m/^id:\s+(PO:\d+)/) { + $po_id = $1; + } + + if ($line_curr =~ m/^synonym/) { + $found_synonyms = 1; + if(defined($lang_hash{$po_id})) { + my $new_synonym = "synonym: \"$lang_hash{$po_id}\" $line_end"; + if ($new_synonym lt $line_curr && $new_synonym gt $line_prev) { + print output_file "$new_synonym\n"; + } + }else{ + print "po id not found\t$po_id\n"; + } + } + + $line_curr =~ m/^(\w+)/; + my $line_curr_identifier = $1; + + $line_prev =~ m/^(\w+)/; + my $line_prev_identifier = $1; + my $count_before = grep /$line_prev_identifier/, @before_ids; + my $count_after = grep /$line_curr_identifier/, @after_ids; + + if($count_before && $count_after) { + if(defined($lang_hash{$po_id})) { + if(!$found_synonyms) { + my $new_synonym = "synonym: \"$lang_hash{$po_id}\" $line_end"; + print output_file "$new_synonym\n"; + } + }else{ + print "po id not found\t$po_id\n"; + } + } + + + print output_file "$line_curr\n"; + $line_prev=$line_curr; + +} + -- 2.34.1