Hello!

To see the file structure, click on "tree".

Note that updates take place every 10 minutes, commits may not be seen immediately.
Added maize classical gene name and symbol mapping script for POC
authorpreecej <preecej@localhost>
Wed, 5 Oct 2011 00:19:18 +0000 (00:19 +0000)
committerpreecej <preecej@localhost>
Wed, 5 Oct 2011 00:19:18 +0000 (00:19 +0000)
svn path=/; revision=183

preecej/perl_singletons/zea_Maize_PO_CoGe_name_swap.pl [new file with mode: 0644]

diff --git a/preecej/perl_singletons/zea_Maize_PO_CoGe_name_swap.pl b/preecej/perl_singletons/zea_Maize_PO_CoGe_name_swap.pl
new file mode 100644 (file)
index 0000000..e76fc58
--- /dev/null
@@ -0,0 +1,74 @@
+#!/usr/bin/perl -w
+use strict;
+use Data::Dumper;
+
+my $CoGe_genes_file_name = $ARGV[0];
+my $assoc_file_name = $ARGV[1];
+
+# read in CoGe file (arg 0), build hash of gene model ids to symbols/gene names
+
+open(IN_FILE, $CoGe_genes_file_name);
+
+my %classical_genes_by_gene_model;
+
+my $line = <IN_FILE>;
+
+while (<IN_FILE>)
+{
+       $line = $_;
+       chomp $line;
+
+       my @curr_line = split(',',$line);
+
+       my $gene_symbol = $curr_line[0];
+       $gene_symbol =~ tr/"//d;
+       my $gene_name = $curr_line[2];
+       $gene_name =~ tr/"//d;
+       my $gene_model_id = $curr_line[8];
+       $gene_model_id =~ tr/"//d;
+
+       #print $gene_symbol . "\|" . $gene_name . "\|" . $gene_model_id . "\n";
+       
+       my $gene_model_expr = "^(GRMZM)";
+       if ($gene_model_id =~ $gene_model_expr) {
+           $classical_genes_by_gene_model{$gene_model_id} = [ $gene_symbol, $gene_name ];
+       }
+}
+
+close (IN_FILE);
+
+#print Dumper(\%classical_genes_by_gene_model) . "\n\n";
+
+# read in assoc file (arg 1)
+
+open(ASSOC_IN_FILE, $ARGV[1]);
+
+open(OUT_FILE,">" . (split('\.',$assoc_file_name))[0] . "_named.assoc");
+
+while (<ASSOC_IN_FILE>) 
+{
+       $line = $_;
+       chomp $line;
+
+       if (length($line) > 0) {
+        
+           #print $line. "\n";
+        
+        my @curr_line = split('\t',$line);
+    
+        # look for each annotation's hashed gene model id 
+        if (defined $classical_genes_by_gene_model{$curr_line[1]}) {
+            # add/replace the appropriate cols
+            $curr_line[2] = ${$classical_genes_by_gene_model{$curr_line[1]}}[0];
+            $curr_line[9] = ${$classical_genes_by_gene_model{$curr_line[1]}}[1];
+
+        }
+        # output to new assoc file with appended name
+        #print join("\t", @curr_line) . "\n";
+        print OUT_FILE join("\t", @curr_line) . "\n";
+    }
+}
+
+close (ASSOC_IN_FILE);
+close (OUT_FILE);
+exit;