From: elserj Date: Mon, 27 Sep 2010 23:25:38 +0000 (+0000) Subject: Added detection of non-unique gene-ids X-Git-Url: http://gitweb.planteome.org/?a=commitdiff_plain;h=4a14b9dec3ba4929bd5bc502f5d08e518c059259;p=old-jaiswallab-svn%2F.git Added detection of non-unique gene-ids svn path=/; revision=52 --- diff --git a/interactome_scripts/fasta_verify.pl b/interactome_scripts/fasta_verify.pl index 6f12a38..ac6eb84 100755 --- a/interactome_scripts/fasta_verify.pl +++ b/interactome_scripts/fasta_verify.pl @@ -3,20 +3,26 @@ use strict; use warnings; +require "$ENV{HOME}/scripts/jaiswallab/interactome_scripts/find_species.pl"; + if($#ARGV != 0) { - print "usage: fasta_verify.pl fast_file\n"; + print "usage: fasta_verify.pl fasta_file\n"; exit; } my $in_file = $ARGV[0]; +my $species = find_species($in_file); + open(in_file, "$in_file"); my $prev_line_is_header = 0; my $counter = 0; +my %gene_id_hash; + while() { my $line = $_; chomp $line; @@ -46,6 +52,13 @@ while() { # set prev_line_header info for next time through loop if($curr_line_is_header) { $prev_line_is_header = 1; + #my $gene = find_gene($line,$species); + my $gene = $line; + if(!defined($gene_id_hash{$gene})) { + $gene_id_hash{$gene} = $line; + }else{ + print "Error: gene $gene has multiple sequences associated with it in file $in_file\n" + } }else{ $prev_line_is_header = 0; }