From: preecej Date: Wed, 5 Oct 2011 00:19:18 +0000 (+0000) Subject: Added maize classical gene name and symbol mapping script for POC X-Git-Url: http://gitweb.planteome.org/?a=commitdiff_plain;h=5912438b5a10a5d97dd6f9d9b5d1a3ee59f329cf;p=old-jaiswallab-svn%2F.git Added maize classical gene name and symbol mapping script for POC svn path=/; revision=183 --- diff --git a/preecej/perl_singletons/zea_Maize_PO_CoGe_name_swap.pl b/preecej/perl_singletons/zea_Maize_PO_CoGe_name_swap.pl new file mode 100644 index 0000000..e76fc58 --- /dev/null +++ b/preecej/perl_singletons/zea_Maize_PO_CoGe_name_swap.pl @@ -0,0 +1,74 @@ +#!/usr/bin/perl -w +use strict; +use Data::Dumper; + +my $CoGe_genes_file_name = $ARGV[0]; +my $assoc_file_name = $ARGV[1]; + +# read in CoGe file (arg 0), build hash of gene model ids to symbols/gene names + +open(IN_FILE, $CoGe_genes_file_name); + +my %classical_genes_by_gene_model; + +my $line = ; + +while () +{ + $line = $_; + chomp $line; + + my @curr_line = split(',',$line); + + my $gene_symbol = $curr_line[0]; + $gene_symbol =~ tr/"//d; + my $gene_name = $curr_line[2]; + $gene_name =~ tr/"//d; + my $gene_model_id = $curr_line[8]; + $gene_model_id =~ tr/"//d; + + #print $gene_symbol . "\|" . $gene_name . "\|" . $gene_model_id . "\n"; + + my $gene_model_expr = "^(GRMZM)"; + if ($gene_model_id =~ $gene_model_expr) { + $classical_genes_by_gene_model{$gene_model_id} = [ $gene_symbol, $gene_name ]; + } +} + +close (IN_FILE); + +#print Dumper(\%classical_genes_by_gene_model) . "\n\n"; + +# read in assoc file (arg 1) + +open(ASSOC_IN_FILE, $ARGV[1]); + +open(OUT_FILE,">" . (split('\.',$assoc_file_name))[0] . "_named.assoc"); + +while () +{ + $line = $_; + chomp $line; + + if (length($line) > 0) { + + #print $line. "\n"; + + my @curr_line = split('\t',$line); + + # look for each annotation's hashed gene model id + if (defined $classical_genes_by_gene_model{$curr_line[1]}) { + # add/replace the appropriate cols + $curr_line[2] = ${$classical_genes_by_gene_model{$curr_line[1]}}[0]; + $curr_line[9] = ${$classical_genes_by_gene_model{$curr_line[1]}}[1]; + + } + # output to new assoc file with appended name + #print join("\t", @curr_line) . "\n"; + print OUT_FILE join("\t", @curr_line) . "\n"; + } +} + +close (ASSOC_IN_FILE); +close (OUT_FILE); +exit;