Hello!

To see the file structure, click on "tree".

Note that updates take place every 10 minutes, commits may not be seen immediately.
Script to be used for detection of obsolete terms in PO, to be used as part of the...
authorelserj <elserj@localhost>
Sat, 8 Jan 2011 01:29:38 +0000 (01:29 +0000)
committerelserj <elserj@localhost>
Sat, 8 Jan 2011 01:29:38 +0000 (01:29 +0000)
svn path=/; revision=82

interactome_scripts/po_obsolete_check.pl [new file with mode: 0755]

diff --git a/interactome_scripts/po_obsolete_check.pl b/interactome_scripts/po_obsolete_check.pl
new file mode 100755 (executable)
index 0000000..a9712ad
--- /dev/null
@@ -0,0 +1,85 @@
+#!/usr/bin/perl
+
+use strict;
+use warnings;
+
+##########################################################################
+#   po_obsolete_check.pl                                                 #
+#    Version 0.1                                                         #
+#                                                                        #
+#  Written by Justin Elser, Dept. of BPP, OSU 2011                       #
+#                                                                        #
+#  Program will check the current po obo file for terms that have        #
+#    been obsoleted.  It will then check all association files for       #
+#    annotations linked with those terms and create output files for     #
+#    used for manual intervention by annotation curators.                #
+#                                                                        #
+#  Program should be made part of the SOP when loading new DBs           #
+#                                                                        #
+##########################################################################
+
+#  Use Chris Mungall's GO::Parser to do the searching for is_obsoletes
+use GO::Parser;
+
+# check for arguments and explain usage
+if ($#ARGV !=2) {
+       print "usage: po_obsolete_check.pl OBO_file directory_with_association_files output_file\n";
+       exit;
+}
+
+my $obo_file = $ARGV[0];
+my $assoc_dir = $ARGV[1];
+my $out_file = $ARGV[2];
+
+my @assoc_files = glob("$assoc_dir*.assoc");
+
+# init GO parser
+my $parser = GO::Parser->new({handler=>'obj'});
+$parser->parse($obo_file);
+
+my $ont = $parser->handler->graph;
+
+
+my $obo_terms = $ont->get_all_nodes;
+
+# hash to store obsolete terms
+my %obs_terms_hash;
+
+foreach my $term (@$obo_terms) {
+       if ($term->is_obsolete){
+               my $id = $term->acc;
+               my $name = $term->name;
+               $obs_terms_hash{$id} = $name;
+       }
+}
+
+my %bad_term_hash;
+
+open(output_file, ">$out_file");
+
+foreach my $file (@assoc_files) {
+       open (assoc_file, "$file");
+       my $line_counter = 0;
+       while(<assoc_file>) {
+               my $line = $_;
+               $line_counter++;
+               foreach my $term (keys %obs_terms_hash) {
+                       if ($line =~ $term) {
+                               print  output_file "match found!!!\t$file\t$term\t$line_counter\n";
+                               if(!defined($bad_term_hash{$term})) {
+                                       $bad_term_hash{$term} = 1;
+                               }else{
+                                       my $curr_count = $bad_term_hash{$term};
+                                       $curr_count++;
+                                       $bad_term_hash{$term} = $curr_count;
+                               }
+                       }
+               }
+       }
+}
+               
+foreach my $bad_term (keys %bad_term_hash) {
+       print "$bad_term\t$bad_term_hash{$bad_term}\n";
+}
+
+