--- /dev/null
+#!/usr/bin/perl
+
+use strict;
+use warnings;
+
+## Written by Justin Elser
+# 10/20/14
+#
+# Takes a fasta file and a list of genes to ignore and creates
+# a new fasta file with the longest isoform only (if known)
+
+if(-e "$ENV{HOME}/scripts/jaiswallab/interactome_scripts/find_species.pl") {
+ require "$ENV{HOME}/scripts/jaiswallab/interactome_scripts/find_species.pl";
+}elsif(-e "$ENV{HOME}/jaiswallab_svn/interactome_scripts/find_species.pl") {
+ require "$ENV{HOME}/jaiswallab_svn/interactome_scripts/find_species.pl";
+}
+
+if ($#ARGV != 1) {
+ print "usage: make_fasta_from_fasta_with_no_isoforms.pl input_fasta output_fasta\n";
+ exit;
+}
+
+my $infile = $ARGV[0];
+my $outfile = $ARGV[1];
+
+my $species = find_species($infile);
+
+
+# Read in the fasta file and put in a hash
+my $line_is_header = 0;
+my %gene_hash;
+
+my $gene_id = "";
+my $prev_gene_id = "";
+
+my $seq;
+
+open(INFILE, "$infile") or die "Error opening input file!\n";
+
+
+while(my $line = <INFILE>) {
+ # match the > at beginning of lines
+ if ($line =~ /^>/) {
+ chomp ($line);
+ # $line =~ s/>//; # remove the > from the line
+ $prev_gene_id = $gene_id;
+ $line_is_header = 1;
+ $gene_id = find_gene($line,$species);
+ } else {
+ $seq .= $line;
+ $line_is_header = 0;
+ }
+
+ if ($line_is_header) {
+ if ($prev_gene_id ne "") {
+ # Since we are now on the next gene, we need to
+ # put the previous gene in the hash
+
+ if($prev_gene_id =~ /\.[1-9][0-9]$/ || $prev_gene_id =~/\.[2-9]$/) {
+ $seq = "";
+ next;
+ }
+
+ if($species eq "Zea_mays" && $prev_gene_id =~ /\_T[0-9][02-9]$/) {
+ $seq = "";
+ next;
+ }
+
+
+ $gene_hash{$prev_gene_id} = $seq;
+
+ # IMPORTANT: reset $seq to empty for the next gene
+ $seq = "";
+ }
+ }
+}
+
+$gene_hash{$gene_id} = $seq;
+
+close(INFILE);
+
+open(OUTPUT, ">$outfile");
+foreach my $gene (keys %gene_hash) {
+ print OUTPUT ">$species|$gene\n$gene_hash{$gene}";
+}