From d9933a63c46318302fb93fc3d7e21691b707c2fc Mon Sep 17 00:00:00 2001 From: elserj Date: Thu, 14 Oct 2010 21:06:53 +0000 Subject: [PATCH] Added script to take hal tree output and replace 2 char species names with full names svn path=/; revision=67 --- interactome_scripts/species_tree_names_fix.pl | 52 +++++++++++++++++++ 1 file changed, 52 insertions(+) create mode 100755 interactome_scripts/species_tree_names_fix.pl diff --git a/interactome_scripts/species_tree_names_fix.pl b/interactome_scripts/species_tree_names_fix.pl new file mode 100755 index 0000000..19d5689 --- /dev/null +++ b/interactome_scripts/species_tree_names_fix.pl @@ -0,0 +1,52 @@ +#!/usr/bin/perl + +####################################################### +# Written by Justin Elser 9/30/10 # +# takes the genome_map.tab and tree files from hal # +# and writes a new tree file with 2 char id # +# replaced by full genus + species # +# # +####################################################### + +use strict; +use warnings; + +if($#ARGV != 2) { + print "usage: species_tree_names_fix.pl genome_map.tab tree_file output_tree_file\n"; + exit; +} + +my $tab_file = $ARGV[0]; +my $tree_file = $ARGV[1]; +my $out_file = $ARGV[2]; + +open(table_file,"$tab_file"); + + +my %species_hash; +while (){ + my $line = $_; + chomp $line; + my ($spec_id,$spec_name) = split("\t",$line); + $spec_name =~ s/\.fasta//g; + if(!defined($species_hash{$spec_id})) { + $species_hash{$spec_id} = $spec_name; + }else{ + print "duplicate entries in hash!!!\n"; + exit; + } +} + +close(table_file); + +open(tree_file,"$tree_file"); +open(output_file, ">$out_file"); + +while() { + my $line = $_; + chomp $line; + foreach my $id (keys %species_hash) { + $line =~ s/$id\:/$species_hash{$id}\:/g; + } + print output_file "$line\n"; +} -- 2.34.1