Hello!

To see the file structure, click on "tree".

Note that updates take place every 10 minutes, commits may not be seen immediately.
Added script that will take the list of protein coding genes and repeated genes from...
authorelserj <elserj@localhost>
Thu, 10 Mar 2011 22:52:55 +0000 (22:52 +0000)
committerelserj <elserj@localhost>
Thu, 10 Mar 2011 22:52:55 +0000 (22:52 +0000)
svn path=/; revision=89

interactome_scripts/remove_transposable_elements.pl [new file with mode: 0755]

diff --git a/interactome_scripts/remove_transposable_elements.pl b/interactome_scripts/remove_transposable_elements.pl
new file mode 100755 (executable)
index 0000000..9a4f51a
--- /dev/null
@@ -0,0 +1,66 @@
+#!/usr/bin/perl
+
+use strict;
+use warnings;
+
+if ($#ARGV != 3) {
+       print "usage: remove_transposable_elements.pl IPR_genes genes_to_remove fasta_file output_file\n";
+       exit;
+}
+
+my $ipr_file = $ARGV[0];
+my $remove_file = $ARGV[1];
+my $fasta_file = $ARGV[2];
+my $out_file = $ARGV[3];
+
+my %ipr_hash;
+open (ipr_file, $ipr_file);
+while(<ipr_file>) {
+       my $line = $_;
+       chomp $line;
+       $ipr_hash{$line} = $line;
+}
+close(ipr_file);
+
+my %remove_hash;
+
+open (remove_file, $remove_file);
+while(<remove_file>) {
+       my $line = $_;
+       chomp $line;
+       
+       $remove_hash{$line} = $line;
+}
+close(remove_file);
+
+open(fasta_file, $fasta_file);
+
+
+my %fasta_hash;
+my $gene_prev = "";
+while(<fasta_file>) {
+       my $line = $_;
+       
+       if ($line =~ /^>/) {
+               chomp $line;
+               $line =~ s/^>//g;
+               $line =~ s/_1//g;
+               $gene_prev=$line;
+       }else{
+               if(!defined($fasta_hash{$gene_prev})) {
+                       $fasta_hash{$gene_prev} = $line;
+               }else{
+                       $fasta_hash{$gene_prev} = $fasta_hash{$gene_prev} . $line;
+               }
+       }
+}
+close(fasta_file);
+
+open(out_file, ">$out_file");
+foreach my $gene (keys %fasta_hash) {
+       if(defined($ipr_hash{$gene}) && !defined($remove_hash{$gene})) {
+               print out_file ">$gene\n";
+               print out_file "$fasta_hash{$gene}";
+       }
+}
+close(out_file);