From bf85a36724ee52470bac86c598b03c2b90132658 Mon Sep 17 00:00:00 2001 From: elserj Date: Fri, 17 Sep 2010 23:09:10 +0000 Subject: [PATCH] Fixes the gene headers in the fasta files so that they are correctly parsed by blast. Changes it so the only info in the header is the protein/gene id. svn path=/; revision=37 --- interactome_scripts/fasta_header_fix.pl | 35 +++++++++++++++++++++++++ 1 file changed, 35 insertions(+) create mode 100755 interactome_scripts/fasta_header_fix.pl diff --git a/interactome_scripts/fasta_header_fix.pl b/interactome_scripts/fasta_header_fix.pl new file mode 100755 index 0000000..eaaa1a8 --- /dev/null +++ b/interactome_scripts/fasta_header_fix.pl @@ -0,0 +1,35 @@ +#!/usr/bin/perl + +use strict; +use warnings; + +require "$ENV{HOME}/scripts/jaiswallab/interactome_scripts/find_species.pl"; + +my @files; + +if($#ARGV == 0) { + @files = $ARGV[0]; +}else{ + @files = glob("*.fa"); +} + +foreach my $input_file (@files) { + open(in_file, $input_file) || die "Error: File $input_file can not be opened"; + my $output_file = $input_file . "_fixed.fa"; + open(out_file, ">$output_file"); + + my $species = find_species($input_file); + + while() { + my $line = $_; + chomp $line; + if ($line =~ /^>/) { + my $gene = find_gene($line,$species); + $line = ">" . "$gene"; + } + print out_file "$line\n"; + } + + close(in_file); + close(out_file); +} -- 2.34.1