Hello!

To see the file structure, click on "tree".

Note that updates take place every 10 minutes, commits may not be seen immediately.
Add fasta split by header script
authorelserj <elserj@localhost>
Fri, 23 Feb 2024 21:29:56 +0000 (21:29 +0000)
committerelserj <elserj@localhost>
Fri, 23 Feb 2024 21:29:56 +0000 (21:29 +0000)
svn path=/; revision=686

interactome_scripts/fasta_split_by_header_regex.pl [new file with mode: 0755]
interactome_scripts/po_obsolete_check.pl

diff --git a/interactome_scripts/fasta_split_by_header_regex.pl b/interactome_scripts/fasta_split_by_header_regex.pl
new file mode 100755 (executable)
index 0000000..954c2df
--- /dev/null
@@ -0,0 +1,41 @@
+#!/usr/bin/perl
+
+use strict;
+use warnings;
+
+if ($#ARGV != 2) {
+    print "usage: fasta_split_by_header_regex.pl input_fasta output_file regex_pattern\n";
+    exit;
+}
+
+my $infile = $ARGV[0];
+my $outfile = $ARGV[1];
+my $regex = $ARGV[2];
+
+my $line_is_header = 0;
+my $header_matches_regex = 0;
+
+open(INFILE, "$infile") or die "Error opening input file!\n";
+
+open(OUTFILE, ">$outfile") or die "Error opening output file!\n";
+
+while(my $line = <INFILE>) {
+    chomp $line;
+
+    if ($line =~ /^>/) {
+        $line_is_header = 1;
+        $header_matches_regex = 0; # reset for new header line
+        if ($line =~ /$regex/) {
+            $header_matches_regex = 1;
+            print OUTFILE "$line\n";
+        }
+    }else{
+        $line_is_header = 0;
+        if ($header_matches_regex) {
+            print OUTFILE "$line\n";
+        }
+    }
+}
+
+close(INFILE);
+close(OUTFILE);
\ No newline at end of file
index 666a7dc3d531eef7fc533df915115507c1a4b2cb..50ff5ea07b60d6f9316e552e273e566c5b6eb8ba 100755 (executable)
@@ -60,7 +60,6 @@ foreach my $term (@$obo_terms) {
        }
 }
 
-my @source_array = ('sgn','tair','gramene','maizegdb','nasc', 'jaiswallab', 'cosmoss', 'AgBase', 'poc');
 
 my %bad_term_hash;
 
@@ -73,27 +72,15 @@ foreach my $file (@assoc_files) {
        
        print "checking file: $file\n";
        
-       my $source_db;
-       foreach my $source (@source_array) {
-               if ($file =~ /$source/i) {
-                       $source_db = $source;
-               }
-       }
-       
-       if ($source_db eq "") {
-               print "source not found for file $file\n";
-       }
-       
-       my $output_source_file = "$source_db" . "_obsoletes.txt";
-       open (output_source, ">>$output_source_file");
        my $line_counter = 0;
        while(<assoc_file>) {
                my $line = $_;
                $line_counter++;
+               # ignore commented lines
+               next if ($line =~ /^!/);
                foreach my $term (keys %obs_terms_hash) {
                        if ($line =~ $term) {
                                print  output_file "match found!!!\t$file\t$term\tline number:$line_counter\n";
-                               print  output_source "$file\t$term\tline:number:$line_counter\n";
                                if(!defined($bad_term_hash{$term})) {
                                        $bad_term_hash{$term} = 1;
                                }else{
@@ -105,7 +92,6 @@ foreach my $file (@assoc_files) {
                }
        }
        close (assoc_file);
-       close (output_source);
 }
 close (output_file);