From: elserj <elserj@localhost>
Date: Sat, 8 Jan 2011 01:29:38 +0000 (+0000)
Subject: Script to be used for detection of obsolete terms in PO, to be used as part of the... 
X-Git-Url: http://gitweb.planteome.org/?a=commitdiff_plain;h=58fb81ed7f67151d5b37b4fa6239d987bcdbd771;p=old-jaiswallab-svn%2F.git

Script to be used for detection of obsolete terms in PO, to be used as part of the pipeline when loading the DB

svn path=/; revision=82
---

diff --git a/interactome_scripts/po_obsolete_check.pl b/interactome_scripts/po_obsolete_check.pl
new file mode 100755
index 0000000..a9712ad
--- /dev/null
+++ b/interactome_scripts/po_obsolete_check.pl
@@ -0,0 +1,85 @@
+#!/usr/bin/perl
+
+use strict;
+use warnings;
+
+##########################################################################
+#   po_obsolete_check.pl                                                 #
+#    Version 0.1                                                         #
+#                                                                        #
+#  Written by Justin Elser, Dept. of BPP, OSU 2011                       #
+#                                                                        #
+#  Program will check the current po obo file for terms that have        #
+#    been obsoleted.  It will then check all association files for       #
+#    annotations linked with those terms and create output files for     #
+#    used for manual intervention by annotation curators.                #
+#                                                                        #
+#  Program should be made part of the SOP when loading new DBs           #
+#                                                                        #
+##########################################################################
+
+#  Use Chris Mungall's GO::Parser to do the searching for is_obsoletes
+use GO::Parser;
+
+# check for arguments and explain usage
+if ($#ARGV !=2) {
+	print "usage: po_obsolete_check.pl OBO_file directory_with_association_files output_file\n";
+	exit;
+}
+
+my $obo_file = $ARGV[0];
+my $assoc_dir = $ARGV[1];
+my $out_file = $ARGV[2];
+
+my @assoc_files = glob("$assoc_dir*.assoc");
+
+# init GO parser
+my $parser = GO::Parser->new({handler=>'obj'});
+$parser->parse($obo_file);
+
+my $ont = $parser->handler->graph;
+
+
+my $obo_terms = $ont->get_all_nodes;
+
+# hash to store obsolete terms
+my %obs_terms_hash;
+
+foreach my $term (@$obo_terms) {
+	if ($term->is_obsolete){
+		my $id = $term->acc;
+		my $name = $term->name;
+		$obs_terms_hash{$id} = $name;
+	}
+}
+
+my %bad_term_hash;
+
+open(output_file, ">$out_file");
+
+foreach my $file (@assoc_files) {
+	open (assoc_file, "$file");
+	my $line_counter = 0;
+	while(<assoc_file>) {
+		my $line = $_;
+		$line_counter++;
+		foreach my $term (keys %obs_terms_hash) {
+			if ($line =~ $term) {
+				print  output_file "match found!!!\t$file\t$term\t$line_counter\n";
+				if(!defined($bad_term_hash{$term})) {
+					$bad_term_hash{$term} = 1;
+				}else{
+					my $curr_count = $bad_term_hash{$term};
+					$curr_count++;
+					$bad_term_hash{$term} = $curr_count;
+				}
+			}
+		}
+	}
+}
+		
+foreach my $bad_term (keys %bad_term_hash) {
+	print "$bad_term\t$bad_term_hash{$bad_term}\n";
+}
+
+