Hello!

To see the file structure, click on "tree".

Note that updates take place every 10 minutes, commits may not be seen immediately.
Added detection of non-unique gene-ids
authorelserj <elserj@localhost>
Mon, 27 Sep 2010 23:25:38 +0000 (23:25 +0000)
committerelserj <elserj@localhost>
Mon, 27 Sep 2010 23:25:38 +0000 (23:25 +0000)
svn path=/; revision=52

interactome_scripts/fasta_verify.pl

index 6f12a3826589b22027241561090e7144f841ee43..ac6eb842959e9e02e00d155628ccbc17e080ac74 100755 (executable)
@@ -3,20 +3,26 @@
 use strict;
 use warnings;
 
+require "$ENV{HOME}/scripts/jaiswallab/interactome_scripts/find_species.pl";
+
 
 if($#ARGV != 0) {
-       print "usage: fasta_verify.pl fast_file\n";
+       print "usage: fasta_verify.pl fasta_file\n";
        exit;
 }
 
 my $in_file = $ARGV[0];
 
+my $species = find_species($in_file);
+
 open(in_file, "$in_file");
 
 my $prev_line_is_header = 0;
 
 my $counter = 0;
 
+my %gene_id_hash;
+
 while(<in_file>) {
        my $line = $_;
        chomp $line;
@@ -46,6 +52,13 @@ while(<in_file>) {
        # set prev_line_header info for next time through loop
        if($curr_line_is_header) {
                $prev_line_is_header = 1;
+               #my $gene = find_gene($line,$species);
+               my $gene = $line;
+               if(!defined($gene_id_hash{$gene})) {
+                       $gene_id_hash{$gene} = $line;
+               }else{
+                       print "Error: gene $gene has multiple sequences associated with it in file $in_file\n"
+               }
        }else{
                $prev_line_is_header = 0;
        }