Hello!

To see the file structure, click on "tree".

Note that updates take place every 10 minutes, commits may not be seen immediately.
First draft
authorpreecej <preecej@localhost>
Wed, 23 Jan 2013 22:58:26 +0000 (22:58 +0000)
committerpreecej <preecej@localhost>
Wed, 23 Jan 2013 22:58:26 +0000 (22:58 +0000)
svn path=/; revision=423

Personnel/preecej/perl_singletons/monococcum_ssr_mash.pl

index 88cd1a76c6382cb68344953d9b8c8d0de6887cc4..f5cece53c9f2173d936b9ddab86427cdbd829fcd 100644 (file)
@@ -4,23 +4,23 @@ use strict;
 use Switch;
 use Data::Dumper;
 
-#perl script for ssr data
+# perl script for ssr data, specialized to monococcum
+# note: this script could easily be generalized to n comparitive cases, using
+# recursion
 
 $Data::Dumper::Pad = "... "; 
 
 #file handles
-my $esp_ssr = "./brasy-esp.ssr";
-my $gre_ssr = "./brasy-gre.ssr";
-my $cor_ssr = "./brasy-cor.ssr";;
-my $ortho = "./sylvaticum_orthologs.txt";
+my $DV_ssr = "./TmDV92_ssr.txt";
+my $G3_ssr = "./TmG3116_ssr.txt";
+my $ortho = "./monococcum_orthologs.txt";
 
 # hashes
-my %esp;
-my %gre;
-my %cor;
+my %DV;
+my %G3;
 
-#load ssr data into hash keyed by spain gene id   
-#add ssr_words hash to the spain_id_gene_key
+#load ssr data into hash keyed by DV gene id   
+#add ssr_words hash to the DV_id_gene_key
 #add first ssr word as key to ssr_words hash with ssr as key and # as value
 #add additional ssr words for this gene as encountered (if Gene exists)
 sub pop_locus_hashes($)
@@ -52,27 +52,18 @@ sub pop_locus_hashes($)
     return %local_hash
 }
 
-
-    #Does this locus already exist?
-#    if (!exists $esp{$locus_id})
+# Does this locus already exist?
+#    if (!exists $DV{$locus_id})
 #    {
 
-
-
-
-#Read blastall results
-#foreach line splits on tab get 1st gene identifier
-    #look in spain hash for gene id
+# Read blastall results
+# Foreach line splits on tab get 1st gene identifier
+    #look in DV hash for gene id
         #look at each ssr_words key
 
-#look in greece hash for greece (2nd gene identifier)
-                #look in its ssr_words for current spain ssr_word key
-                #if present
-
-        #look in corvallis hash for corvallis (3rd gene identifier)
-                    #look in its ssr_words for current spain ssr_word key
-                    #if present
-                        #print 1st 2nd 3rd gene ids current spain ssr_word key current spain greece corvallis ssr_word{key} values (one for each separate key - i.e. corvallis_key)
+        # look in G3 hash for G3ece (2nd gene identifier)
+                #look in its ssr_words for current DV ssr_word key
+                #if present...
                         
 sub stitch_ssr_to_ortho()
 {
@@ -85,42 +76,30 @@ sub stitch_ssr_to_ortho()
         $line = $_;
         chomp $line;
         my @line_ary = split('\t',$line);
-        my $cor_locus = $line_ary[0];
-        my $esp_locus = $line_ary[1];
-        my $gre_locus = $line_ary[2];
+        my $DV_locus = $line_ary[0];
+        my $G3_locus = $line_ary[1];
         
-        #print "$cor_locus\t$esp_locus\t$gre_locus\n";
+        #print "$DV_locus\t$G3_locus\n";
         
-        if (exists $esp{$esp_locus})
+        if (exists $DV{$DV_locus})
         {
-            foreach my $curr_esp_ssr_word (keys $esp{$esp_locus}{ssr_words}) {
-                #print "$esp_locus : $curr_esp_ssr_word: " . $esp{$esp_locus}{ssr_words}{$curr_esp_ssr_word} . "\n";
-                if (exists $gre{$gre_locus})
+            foreach my $curr_DV_ssr_word (keys $DV{$DV_locus}{ssr_words}) {
+                #print "$DV_locus : $curr_DV_ssr_word: " . $DV{$DV_locus}{ssr_words}{$curr_DV_ssr_word} . "\n";
+                if (exists $G3{$G3_locus})
                 {
-                    foreach my $curr_gre_ssr_word (keys $gre{$gre_locus}{ssr_words}) {
-                        if ($curr_esp_ssr_word eq $curr_gre_ssr_word) {
-                            #print "$esp_locus:$curr_esp_ssr_word:" . $esp{$esp_locus}{ssr_words}{$curr_esp_ssr_word}
-                            #    . "\t$gre_locus:$curr_gre_ssr_word:" . $gre{$gre_locus}{ssr_words}{$curr_gre_ssr_word} . "\n";
-                            if (exists $cor{$cor_locus})
+                    foreach my $curr_G3_ssr_word (keys $G3{$G3_locus}{ssr_words}) {
+                        if ($curr_DV_ssr_word eq $curr_G3_ssr_word) {
+                            #print "$DV_locus:$curr_DV_ssr_word:" . $DV{$DV_locus}{ssr_words}{$curr_DV_ssr_word}
+                            #    . "\t$G3_locus:$curr_G3_ssr_word:" . $G3{$G3_locus}{ssr_words}{$curr_G3_ssr_word} . "\n";
+                            if ($DV{$DV_locus}{ssr_words}{$curr_DV_ssr_word} != $G3{$G3_locus}{ssr_words}{$curr_G3_ssr_word}) 
                             {
-                                foreach my $curr_cor_ssr_word (keys $cor{$cor_locus}{ssr_words}) {
-                                    if ($curr_esp_ssr_word eq $curr_cor_ssr_word) {
-                                        if (($esp{$esp_locus}{ssr_words}{$curr_esp_ssr_word} != $gre{$gre_locus}{ssr_words}{$curr_gre_ssr_word}) 
-                                            || ($esp{$esp_locus}{ssr_words}{$curr_esp_ssr_word} != $cor{$cor_locus}{ssr_words}{$curr_cor_ssr_word})
-                                            || ($gre{$gre_locus}{ssr_words}{$curr_gre_ssr_word} != $cor{$cor_locus}{ssr_words}{$curr_cor_ssr_word}))
-                                        {
-                                            if ((length($curr_esp_ssr_word) > 2)) { # dimer exclusion
-                                                print "$curr_esp_ssr_word"
-                                                    . "\t$esp_locus\t$esp{$esp_locus}{ssr_words}{$curr_esp_ssr_word}"
-                                                    . "\t$gre_locus\t$gre{$gre_locus}{ssr_words}{$curr_gre_ssr_word}"
-                                                    . "\t$cor_locus\t$cor{$cor_locus}{ssr_words}{$curr_cor_ssr_word}\n";
-                                            }
-                                            #print "$esp_locus:$curr_esp_ssr_word:" . $esp{$esp_locus}{ssr_words}{$curr_esp_ssr_word}
-                                            #    . "\t$gre_locus:$curr_gre_ssr_word:" . $gre{$gre_locus}{ssr_words}{$curr_gre_ssr_word}
-                                            #    . "\t$cor_locus:$curr_cor_ssr_word:" . $cor{$cor_locus}{ssr_words}{$curr_cor_ssr_word} . "\n";
-                                        }
-                                    }
+                                if ((length($curr_DV_ssr_word) > 2)) { # dimer exclusion
+                                    print "$curr_DV_ssr_word"
+                                        . "\t$DV_locus\t$DV{$DV_locus}{ssr_words}{$curr_DV_ssr_word}"
+                                        . "\t$G3_locus\t$G3{$G3_locus}{ssr_words}{$curr_G3_ssr_word}\n";
                                 }
+                                #print "$DV_locus:$curr_DV_ssr_word:" . $DV{$DV_locus}{ssr_words}{$curr_DV_ssr_word}
+                                #    . "\t$G3_locus:$curr_G3_ssr_word:" . $G3{$G3_locus}{ssr_words}{$curr_G3_ssr_word} . "\n";
                             }
                         }
                     }
@@ -134,9 +113,8 @@ sub stitch_ssr_to_ortho()
                         
                         
 # main
-%esp = pop_locus_hashes($esp_ssr);
-%gre = pop_locus_hashes($gre_ssr);
-%cor = pop_locus_hashes($cor_ssr);
+%DV = pop_locus_hashes($DV_ssr);
+%G3 = pop_locus_hashes($G3_ssr);
 
 stitch_ssr_to_ortho();