--- /dev/null
+#!/usr/bin/perl
+
+use strict;
+use warnings;
+
+if ($#ARGV != 2) {
+ print "usage: fasta_split_by_header_regex.pl input_fasta output_file regex_pattern\n";
+ exit;
+}
+
+my $infile = $ARGV[0];
+my $outfile = $ARGV[1];
+my $regex = $ARGV[2];
+
+my $line_is_header = 0;
+my $header_matches_regex = 0;
+
+open(INFILE, "$infile") or die "Error opening input file!\n";
+
+open(OUTFILE, ">$outfile") or die "Error opening output file!\n";
+
+while(my $line = <INFILE>) {
+ chomp $line;
+
+ if ($line =~ /^>/) {
+ $line_is_header = 1;
+ $header_matches_regex = 0; # reset for new header line
+ if ($line =~ /$regex/) {
+ $header_matches_regex = 1;
+ print OUTFILE "$line\n";
+ }
+ }else{
+ $line_is_header = 0;
+ if ($header_matches_regex) {
+ print OUTFILE "$line\n";
+ }
+ }
+}
+
+close(INFILE);
+close(OUTFILE);
\ No newline at end of file
}
}
-my @source_array = ('sgn','tair','gramene','maizegdb','nasc', 'jaiswallab', 'cosmoss', 'AgBase', 'poc');
my %bad_term_hash;
print "checking file: $file\n";
- my $source_db;
- foreach my $source (@source_array) {
- if ($file =~ /$source/i) {
- $source_db = $source;
- }
- }
-
- if ($source_db eq "") {
- print "source not found for file $file\n";
- }
-
- my $output_source_file = "$source_db" . "_obsoletes.txt";
- open (output_source, ">>$output_source_file");
my $line_counter = 0;
while(<assoc_file>) {
my $line = $_;
$line_counter++;
+ # ignore commented lines
+ next if ($line =~ /^!/);
foreach my $term (keys %obs_terms_hash) {
if ($line =~ $term) {
print output_file "match found!!!\t$file\t$term\tline number:$line_counter\n";
- print output_source "$file\t$term\tline:number:$line_counter\n";
if(!defined($bad_term_hash{$term})) {
$bad_term_hash{$term} = 1;
}else{
}
}
close (assoc_file);
- close (output_source);
}
close (output_file);