Hello!

To see the file structure, click on "tree".

Note that updates take place every 10 minutes, commits may not be seen immediately.
Add check for .txt assoc files
authorelserj <elserj@localhost>
Wed, 4 May 2011 17:55:20 +0000 (17:55 +0000)
committerelserj <elserj@localhost>
Wed, 4 May 2011 17:55:20 +0000 (17:55 +0000)
svn path=/; revision=93

interactome_scripts/po_obsolete_check.pl

index a9712ad277ea8e91badc7c16766a912157c01648..bcdc9bc001bc59549d8e8d77acddb92f48ddf539 100755 (executable)
@@ -3,6 +3,9 @@
 use strict;
 use warnings;
 
+# use this to get filename, without the rest of the directory structure
+use File::Basename;
+
 ##########################################################################
 #   po_obsolete_check.pl                                                 #
 #    Version 0.1                                                         #
@@ -32,6 +35,10 @@ my $assoc_dir = $ARGV[1];
 my $out_file = $ARGV[2];
 
 my @assoc_files = glob("$assoc_dir*.assoc");
+# if the files had a differenct extension, like .txt check here
+if ($#assoc_files == 0) {
+       my @assoc_files = glob("$assoc_dir*.txt");
+}
 
 # init GO parser
 my $parser = GO::Parser->new({handler=>'obj'});
@@ -53,19 +60,38 @@ foreach my $term (@$obo_terms) {
        }
 }
 
+my @source_array = ('sgn','tair','gramene','maizegdb','nasc');
+
 my %bad_term_hash;
 
 open(output_file, ">$out_file");
 
 foreach my $file (@assoc_files) {
        open (assoc_file, "$file");
+       
+       $file = fileparse($file);
+       
+       my $source_db;
+       foreach my $source (@source_array) {
+               if ($file =~ /$source/i) {
+                       $source_db = $source;
+               }
+       }
+       
+       if ($source_db eq "") {
+               print "source not found for file $file\n";
+       }
+       
+       my $output_source_file = "$source_db" . "_obsoletes.txt";
+       open (output_source, ">>$output_source_file");
        my $line_counter = 0;
        while(<assoc_file>) {
                my $line = $_;
                $line_counter++;
                foreach my $term (keys %obs_terms_hash) {
                        if ($line =~ $term) {
-                               print  output_file "match found!!!\t$file\t$term\t$line_counter\n";
+                               print  output_file "match found!!!\t$file\t$term\tline number:$line_counter\n";
+                               print  output_source "$file\t$term\tline:number:$line_counter\n";
                                if(!defined($bad_term_hash{$term})) {
                                        $bad_term_hash{$term} = 1;
                                }else{
@@ -76,7 +102,10 @@ foreach my $file (@assoc_files) {
                        }
                }
        }
+       close (assoc_file);
+       close (output_source);
 }
+close (output_file);
                
 foreach my $bad_term (keys %bad_term_hash) {
        print "$bad_term\t$bad_term_hash{$bad_term}\n";