Added maize classical gene name and symbol mapping script for POC

author preecej <preecej@localhost>

Wed, 5 Oct 2011 00:19:18 +0000 (00:19 +0000)

committer preecej <preecej@localhost>

Wed, 5 Oct 2011 00:19:18 +0000 (00:19 +0000)
author preecej <preecej@localhost>
Wed, 5 Oct 2011 00:19:18 +0000 (00:19 +0000)
committer preecej <preecej@localhost>
Wed, 5 Oct 2011 00:19:18 +0000 (00:19 +0000)
diff --git a/preecej/perl_singletons/zea_Maize_PO_CoGe_name_swap.pl b/preecej/perl_singletons/zea_Maize_PO_CoGe_name_swap.pl

new file mode 100644 (file)

index 0000000..e76fc58
--- /dev/null
+++ b/preecej/perl_singletons/zea_Maize_PO_CoGe_name_swap.pl
@@ -0,0 +1,74 @@
+#!/usr/bin/perl -w
+use strict;
+use Data::Dumper;
+
+my $CoGe_genes_file_name = $ARGV[0];
+my $assoc_file_name = $ARGV[1];
+
+# read in CoGe file (arg 0), build hash of gene model ids to symbols/gene names
+
+open(IN_FILE, $CoGe_genes_file_name);
+
+my %classical_genes_by_gene_model;
+
+my $line = <IN_FILE>;
+
+while (<IN_FILE>)
+{
+       $line = $_;
+       chomp $line;
+
+       my @curr_line = split(',',$line);
+
+       my $gene_symbol = $curr_line[0];
+       $gene_symbol =~ tr/"//d;
+       my $gene_name = $curr_line[2];
+       $gene_name =~ tr/"//d;
+       my $gene_model_id = $curr_line[8];
+       $gene_model_id =~ tr/"//d;
+
+       #print $gene_symbol . "\|" . $gene_name . "\|" . $gene_model_id . "\n";
+       
+       my $gene_model_expr = "^(GRMZM)";
+       if ($gene_model_id =~ $gene_model_expr) {
+           $classical_genes_by_gene_model{$gene_model_id} = [ $gene_symbol, $gene_name ];
+       }
+}
+
+close (IN_FILE);
+
+#print Dumper(\%classical_genes_by_gene_model) . "\n\n";
+
+# read in assoc file (arg 1)
+
+open(ASSOC_IN_FILE, $ARGV[1]);
+
+open(OUT_FILE,">" . (split('\.',$assoc_file_name))[0] . "_named.assoc");
+
+while (<ASSOC_IN_FILE>) 
+{
+       $line = $_;
+       chomp $line;
+
+       if (length($line) > 0) {
+        
+           #print $line. "\n";
+        
+        my @curr_line = split('\t',$line);
+    
+        # look for each annotation's hashed gene model id 
+        if (defined $classical_genes_by_gene_model{$curr_line[1]}) {
+            # add/replace the appropriate cols
+            $curr_line[2] = ${$classical_genes_by_gene_model{$curr_line[1]}}[0];
+            $curr_line[9] = ${$classical_genes_by_gene_model{$curr_line[1]}}[1];
+
+        }
+        # output to new assoc file with appended name
+        #print join("\t", @curr_line) . "\n";
+        print OUT_FILE join("\t", @curr_line) . "\n";
+    }
+}
+
+close (ASSOC_IN_FILE);
+close (OUT_FILE);
+exit;
author	preecej <preecej@localhost>
	Wed, 5 Oct 2011 00:19:18 +0000 (00:19 +0000)
committer	preecej <preecej@localhost>
	Wed, 5 Oct 2011 00:19:18 +0000 (00:19 +0000)