Add fasta split by header script

author elserj <elserj@localhost>

Fri, 23 Feb 2024 21:29:56 +0000 (21:29 +0000)

committer elserj <elserj@localhost>

Fri, 23 Feb 2024 21:29:56 +0000 (21:29 +0000)
author elserj <elserj@localhost>
Fri, 23 Feb 2024 21:29:56 +0000 (21:29 +0000)
committer elserj <elserj@localhost>
Fri, 23 Feb 2024 21:29:56 +0000 (21:29 +0000)
diff --git a/interactome_scripts/fasta_split_by_header_regex.pl b/interactome_scripts/fasta_split_by_header_regex.pl

new file mode 100755 (executable)

index 0000000..954c2df
--- /dev/null
+++ b/interactome_scripts/fasta_split_by_header_regex.pl
@@ -0,0 +1,41 @@
+#!/usr/bin/perl
+
+use strict;
+use warnings;
+
+if ($#ARGV != 2) {
+    print "usage: fasta_split_by_header_regex.pl input_fasta output_file regex_pattern\n";
+    exit;
+}
+
+my $infile = $ARGV[0];
+my $outfile = $ARGV[1];
+my $regex = $ARGV[2];
+
+my $line_is_header = 0;
+my $header_matches_regex = 0;
+
+open(INFILE, "$infile") or die "Error opening input file!\n";
+
+open(OUTFILE, ">$outfile") or die "Error opening output file!\n";
+
+while(my $line = <INFILE>) {
+    chomp $line;
+
+    if ($line =~ /^>/) {
+        $line_is_header = 1;
+        $header_matches_regex = 0; # reset for new header line
+        if ($line =~ /$regex/) {
+            $header_matches_regex = 1;
+            print OUTFILE "$line\n";
+        }
+    }else{
+        $line_is_header = 0;
+        if ($header_matches_regex) {
+            print OUTFILE "$line\n";
+        }
+    }
+}
+
+close(INFILE);
+close(OUTFILE);
+\ No newline at end of file
diff --git a/interactome_scripts/po_obsolete_check.pl b/interactome_scripts/po_obsolete_check.pl

index 666a7dc3d531eef7fc533df915115507c1a4b2cb..50ff5ea07b60d6f9316e552e273e566c5b6eb8ba 100755 (executable)
--- a/interactome_scripts/po_obsolete_check.pl
+++ b/interactome_scripts/po_obsolete_check.pl
@@ -60,7 +60,6 @@ foreach my $term (@$obo_terms) {
         }
  }
  
-my @source_array = ('sgn','tair','gramene','maizegdb','nasc', 'jaiswallab', 'cosmoss', 'AgBase', 'poc');
  
  my %bad_term_hash;
  
@@ -73,27 +72,15 @@ foreach my $file (@assoc_files) {
         
         print "checking file: $file\n";
         
-       my $source_db;
-       foreach my $source (@source_array) {
-               if ($file =~ /$source/i) {
-                       $source_db = $source;
-               }
-       }
-       
-       if ($source_db eq "") {
-               print "source not found for file $file\n";
-       }
-       
-       my $output_source_file = "$source_db" . "_obsoletes.txt";
-       open (output_source, ">>$output_source_file");
         my $line_counter = 0;
         while(<assoc_file>) {
                 my $line = $_;
                 $line_counter++;
+               # ignore commented lines
+               next if ($line =~ /^!/);
                 foreach my $term (keys %obs_terms_hash) {
                         if ($line =~ $term) {
                                 print  output_file "match found!!!\t$file\t$term\tline number:$line_counter\n";
-                               print  output_source "$file\t$term\tline:number:$line_counter\n";
                                 if(!defined($bad_term_hash{$term})) {
                                         $bad_term_hash{$term} = 1;
                                 }else{
@@ -105,7 +92,6 @@ foreach my $file (@assoc_files) {
                 }
         }
         close (assoc_file);
-       close (output_source);
  }
  close (output_file);
author	elserj <elserj@localhost>
	Fri, 23 Feb 2024 21:29:56 +0000 (21:29 +0000)
committer	elserj <elserj@localhost>
	Fri, 23 Feb 2024 21:29:56 +0000 (21:29 +0000)
interactome_scripts/fasta_split_by_header_regex.pl	[new file with mode: 0755]	patch \| blob
interactome_scripts/po_obsolete_check.pl		patch \| blob \| history