From: elserj Date: Tue, 7 Sep 2010 22:18:52 +0000 (+0000) Subject: Script to test and verify fasta files are ready for InParanoid X-Git-Url: http://gitweb.planteome.org/?a=commitdiff_plain;h=5f290eccddbf5757f084b0485f55e9a9b9c7a043;p=old-jaiswallab-svn%2F.git Script to test and verify fasta files are ready for InParanoid svn path=/; revision=29 --- diff --git a/interactome_scripts/fasta_verify.pl b/interactome_scripts/fasta_verify.pl new file mode 100755 index 0000000..6f12a38 --- /dev/null +++ b/interactome_scripts/fasta_verify.pl @@ -0,0 +1,52 @@ +#!/usr/bin/perl + +use strict; +use warnings; + + +if($#ARGV != 0) { + print "usage: fasta_verify.pl fast_file\n"; + exit; +} + +my $in_file = $ARGV[0]; + +open(in_file, "$in_file"); + +my $prev_line_is_header = 0; + +my $counter = 0; + +while() { + my $line = $_; + chomp $line; + $counter++; + + # detect if two lines in a row are headers, if they are, print error and give line # + my $curr_line_is_header = 0; + if($line =~ /^>/) { + $curr_line_is_header = 1; + } + + if($curr_line_is_header == 1 && $prev_line_is_header == 1) { + print "Error: Header found with no sequence data on line $counter in file $in_file\n"; + } + + # detect if line after header is blank + if($line !~ /^[a-zA-Z*]/ && $prev_line_is_header == 1) { + print "Error: Blank line found on line $counter in file $in_file\n"; + } + + # detect if header line has tabs in it + if($curr_line_is_header ==1 && $line =~ /\t/) { + print "Error: Tab character in header on line $counter in file $in_file\n"; + print "Reccommend using sed 's/\\t/\|/g' to replace tabs with pipe symbols\n"; + } + + # set prev_line_header info for next time through loop + if($curr_line_is_header) { + $prev_line_is_header = 1; + }else{ + $prev_line_is_header = 0; + } +}