From: elserj Date: Sat, 8 Jan 2011 01:29:38 +0000 (+0000) Subject: Script to be used for detection of obsolete terms in PO, to be used as part of the... X-Git-Url: http://gitweb.planteome.org/?a=commitdiff_plain;h=58fb81ed7f67151d5b37b4fa6239d987bcdbd771;p=old-jaiswallab-svn%2F.git Script to be used for detection of obsolete terms in PO, to be used as part of the pipeline when loading the DB svn path=/; revision=82 --- diff --git a/interactome_scripts/po_obsolete_check.pl b/interactome_scripts/po_obsolete_check.pl new file mode 100755 index 0000000..a9712ad --- /dev/null +++ b/interactome_scripts/po_obsolete_check.pl @@ -0,0 +1,85 @@ +#!/usr/bin/perl + +use strict; +use warnings; + +########################################################################## +# po_obsolete_check.pl # +# Version 0.1 # +# # +# Written by Justin Elser, Dept. of BPP, OSU 2011 # +# # +# Program will check the current po obo file for terms that have # +# been obsoleted. It will then check all association files for # +# annotations linked with those terms and create output files for # +# used for manual intervention by annotation curators. # +# # +# Program should be made part of the SOP when loading new DBs # +# # +########################################################################## + +# Use Chris Mungall's GO::Parser to do the searching for is_obsoletes +use GO::Parser; + +# check for arguments and explain usage +if ($#ARGV !=2) { + print "usage: po_obsolete_check.pl OBO_file directory_with_association_files output_file\n"; + exit; +} + +my $obo_file = $ARGV[0]; +my $assoc_dir = $ARGV[1]; +my $out_file = $ARGV[2]; + +my @assoc_files = glob("$assoc_dir*.assoc"); + +# init GO parser +my $parser = GO::Parser->new({handler=>'obj'}); +$parser->parse($obo_file); + +my $ont = $parser->handler->graph; + + +my $obo_terms = $ont->get_all_nodes; + +# hash to store obsolete terms +my %obs_terms_hash; + +foreach my $term (@$obo_terms) { + if ($term->is_obsolete){ + my $id = $term->acc; + my $name = $term->name; + $obs_terms_hash{$id} = $name; + } +} + +my %bad_term_hash; + +open(output_file, ">$out_file"); + +foreach my $file (@assoc_files) { + open (assoc_file, "$file"); + my $line_counter = 0; + while() { + my $line = $_; + $line_counter++; + foreach my $term (keys %obs_terms_hash) { + if ($line =~ $term) { + print output_file "match found!!!\t$file\t$term\t$line_counter\n"; + if(!defined($bad_term_hash{$term})) { + $bad_term_hash{$term} = 1; + }else{ + my $curr_count = $bad_term_hash{$term}; + $curr_count++; + $bad_term_hash{$term} = $curr_count; + } + } + } + } +} + +foreach my $bad_term (keys %bad_term_hash) { + print "$bad_term\t$bad_term_hash{$bad_term}\n"; +} + +