From d66ca4774aad77e947452d9ddb7eb9626027188b Mon Sep 17 00:00:00 2001 From: elserj Date: Fri, 23 Feb 2024 21:29:56 +0000 Subject: [PATCH] Add fasta split by header script svn path=/; revision=686 --- .../fasta_split_by_header_regex.pl | 41 +++++++++++++++++++ interactome_scripts/po_obsolete_check.pl | 18 +------- 2 files changed, 43 insertions(+), 16 deletions(-) create mode 100755 interactome_scripts/fasta_split_by_header_regex.pl diff --git a/interactome_scripts/fasta_split_by_header_regex.pl b/interactome_scripts/fasta_split_by_header_regex.pl new file mode 100755 index 0000000..954c2df --- /dev/null +++ b/interactome_scripts/fasta_split_by_header_regex.pl @@ -0,0 +1,41 @@ +#!/usr/bin/perl + +use strict; +use warnings; + +if ($#ARGV != 2) { + print "usage: fasta_split_by_header_regex.pl input_fasta output_file regex_pattern\n"; + exit; +} + +my $infile = $ARGV[0]; +my $outfile = $ARGV[1]; +my $regex = $ARGV[2]; + +my $line_is_header = 0; +my $header_matches_regex = 0; + +open(INFILE, "$infile") or die "Error opening input file!\n"; + +open(OUTFILE, ">$outfile") or die "Error opening output file!\n"; + +while(my $line = ) { + chomp $line; + + if ($line =~ /^>/) { + $line_is_header = 1; + $header_matches_regex = 0; # reset for new header line + if ($line =~ /$regex/) { + $header_matches_regex = 1; + print OUTFILE "$line\n"; + } + }else{ + $line_is_header = 0; + if ($header_matches_regex) { + print OUTFILE "$line\n"; + } + } +} + +close(INFILE); +close(OUTFILE); \ No newline at end of file diff --git a/interactome_scripts/po_obsolete_check.pl b/interactome_scripts/po_obsolete_check.pl index 666a7dc..50ff5ea 100755 --- a/interactome_scripts/po_obsolete_check.pl +++ b/interactome_scripts/po_obsolete_check.pl @@ -60,7 +60,6 @@ foreach my $term (@$obo_terms) { } } -my @source_array = ('sgn','tair','gramene','maizegdb','nasc', 'jaiswallab', 'cosmoss', 'AgBase', 'poc'); my %bad_term_hash; @@ -73,27 +72,15 @@ foreach my $file (@assoc_files) { print "checking file: $file\n"; - my $source_db; - foreach my $source (@source_array) { - if ($file =~ /$source/i) { - $source_db = $source; - } - } - - if ($source_db eq "") { - print "source not found for file $file\n"; - } - - my $output_source_file = "$source_db" . "_obsoletes.txt"; - open (output_source, ">>$output_source_file"); my $line_counter = 0; while() { my $line = $_; $line_counter++; + # ignore commented lines + next if ($line =~ /^!/); foreach my $term (keys %obs_terms_hash) { if ($line =~ $term) { print output_file "match found!!!\t$file\t$term\tline number:$line_counter\n"; - print output_source "$file\t$term\tline:number:$line_counter\n"; if(!defined($bad_term_hash{$term})) { $bad_term_hash{$term} = 1; }else{ @@ -105,7 +92,6 @@ foreach my $file (@assoc_files) { } } close (assoc_file); - close (output_source); } close (output_file); -- 2.34.1