use strict;
use warnings;
+# use this to get filename, without the rest of the directory structure
+use File::Basename;
+
##########################################################################
# po_obsolete_check.pl #
# Version 0.1 #
my $out_file = $ARGV[2];
my @assoc_files = glob("$assoc_dir*.assoc");
+# if the files had a differenct extension, like .txt check here
+if ($#assoc_files == 0) {
+ my @assoc_files = glob("$assoc_dir*.txt");
+}
# init GO parser
my $parser = GO::Parser->new({handler=>'obj'});
}
}
+my @source_array = ('sgn','tair','gramene','maizegdb','nasc');
+
my %bad_term_hash;
open(output_file, ">$out_file");
foreach my $file (@assoc_files) {
open (assoc_file, "$file");
+
+ $file = fileparse($file);
+
+ my $source_db;
+ foreach my $source (@source_array) {
+ if ($file =~ /$source/i) {
+ $source_db = $source;
+ }
+ }
+
+ if ($source_db eq "") {
+ print "source not found for file $file\n";
+ }
+
+ my $output_source_file = "$source_db" . "_obsoletes.txt";
+ open (output_source, ">>$output_source_file");
my $line_counter = 0;
while(<assoc_file>) {
my $line = $_;
$line_counter++;
foreach my $term (keys %obs_terms_hash) {
if ($line =~ $term) {
- print output_file "match found!!!\t$file\t$term\t$line_counter\n";
+ print output_file "match found!!!\t$file\t$term\tline number:$line_counter\n";
+ print output_source "$file\t$term\tline:number:$line_counter\n";
if(!defined($bad_term_hash{$term})) {
$bad_term_hash{$term} = 1;
}else{
}
}
}
+ close (assoc_file);
+ close (output_source);
}
+close (output_file);
foreach my $bad_term (keys %bad_term_hash) {
print "$bad_term\t$bad_term_hash{$bad_term}\n";