use Switch;
use Data::Dumper;
-#perl script for ssr data
+# perl script for ssr data, specialized to monococcum
+# note: this script could easily be generalized to n comparitive cases, using
+# recursion
$Data::Dumper::Pad = "... ";
#file handles
-my $esp_ssr = "./brasy-esp.ssr";
-my $gre_ssr = "./brasy-gre.ssr";
-my $cor_ssr = "./brasy-cor.ssr";;
-my $ortho = "./sylvaticum_orthologs.txt";
+my $DV_ssr = "./TmDV92_ssr.txt";
+my $G3_ssr = "./TmG3116_ssr.txt";
+my $ortho = "./monococcum_orthologs.txt";
# hashes
-my %esp;
-my %gre;
-my %cor;
+my %DV;
+my %G3;
-#load ssr data into hash keyed by spain gene id
-#add ssr_words hash to the spain_id_gene_key
+#load ssr data into hash keyed by DV gene id
+#add ssr_words hash to the DV_id_gene_key
#add first ssr word as key to ssr_words hash with ssr as key and # as value
#add additional ssr words for this gene as encountered (if Gene exists)
sub pop_locus_hashes($)
return %local_hash
}
-
- #Does this locus already exist?
-# if (!exists $esp{$locus_id})
+# Does this locus already exist?
+# if (!exists $DV{$locus_id})
# {
-
-
-
-#Read blastall results
-#foreach line splits on tab get 1st gene identifier
- #look in spain hash for gene id
+# Read blastall results
+# Foreach line splits on tab get 1st gene identifier
+ #look in DV hash for gene id
#look at each ssr_words key
-#look in greece hash for greece (2nd gene identifier)
- #look in its ssr_words for current spain ssr_word key
- #if present
-
- #look in corvallis hash for corvallis (3rd gene identifier)
- #look in its ssr_words for current spain ssr_word key
- #if present
- #print 1st 2nd 3rd gene ids current spain ssr_word key current spain greece corvallis ssr_word{key} values (one for each separate key - i.e. corvallis_key)
+ # look in G3 hash for G3ece (2nd gene identifier)
+ #look in its ssr_words for current DV ssr_word key
+ #if present...
sub stitch_ssr_to_ortho()
{
$line = $_;
chomp $line;
my @line_ary = split('\t',$line);
- my $cor_locus = $line_ary[0];
- my $esp_locus = $line_ary[1];
- my $gre_locus = $line_ary[2];
+ my $DV_locus = $line_ary[0];
+ my $G3_locus = $line_ary[1];
- #print "$cor_locus\t$esp_locus\t$gre_locus\n";
+ #print "$DV_locus\t$G3_locus\n";
- if (exists $esp{$esp_locus})
+ if (exists $DV{$DV_locus})
{
- foreach my $curr_esp_ssr_word (keys $esp{$esp_locus}{ssr_words}) {
- #print "$esp_locus : $curr_esp_ssr_word: " . $esp{$esp_locus}{ssr_words}{$curr_esp_ssr_word} . "\n";
- if (exists $gre{$gre_locus})
+ foreach my $curr_DV_ssr_word (keys $DV{$DV_locus}{ssr_words}) {
+ #print "$DV_locus : $curr_DV_ssr_word: " . $DV{$DV_locus}{ssr_words}{$curr_DV_ssr_word} . "\n";
+ if (exists $G3{$G3_locus})
{
- foreach my $curr_gre_ssr_word (keys $gre{$gre_locus}{ssr_words}) {
- if ($curr_esp_ssr_word eq $curr_gre_ssr_word) {
- #print "$esp_locus:$curr_esp_ssr_word:" . $esp{$esp_locus}{ssr_words}{$curr_esp_ssr_word}
- # . "\t$gre_locus:$curr_gre_ssr_word:" . $gre{$gre_locus}{ssr_words}{$curr_gre_ssr_word} . "\n";
- if (exists $cor{$cor_locus})
+ foreach my $curr_G3_ssr_word (keys $G3{$G3_locus}{ssr_words}) {
+ if ($curr_DV_ssr_word eq $curr_G3_ssr_word) {
+ #print "$DV_locus:$curr_DV_ssr_word:" . $DV{$DV_locus}{ssr_words}{$curr_DV_ssr_word}
+ # . "\t$G3_locus:$curr_G3_ssr_word:" . $G3{$G3_locus}{ssr_words}{$curr_G3_ssr_word} . "\n";
+ if ($DV{$DV_locus}{ssr_words}{$curr_DV_ssr_word} != $G3{$G3_locus}{ssr_words}{$curr_G3_ssr_word})
{
- foreach my $curr_cor_ssr_word (keys $cor{$cor_locus}{ssr_words}) {
- if ($curr_esp_ssr_word eq $curr_cor_ssr_word) {
- if (($esp{$esp_locus}{ssr_words}{$curr_esp_ssr_word} != $gre{$gre_locus}{ssr_words}{$curr_gre_ssr_word})
- || ($esp{$esp_locus}{ssr_words}{$curr_esp_ssr_word} != $cor{$cor_locus}{ssr_words}{$curr_cor_ssr_word})
- || ($gre{$gre_locus}{ssr_words}{$curr_gre_ssr_word} != $cor{$cor_locus}{ssr_words}{$curr_cor_ssr_word}))
- {
- if ((length($curr_esp_ssr_word) > 2)) { # dimer exclusion
- print "$curr_esp_ssr_word"
- . "\t$esp_locus\t$esp{$esp_locus}{ssr_words}{$curr_esp_ssr_word}"
- . "\t$gre_locus\t$gre{$gre_locus}{ssr_words}{$curr_gre_ssr_word}"
- . "\t$cor_locus\t$cor{$cor_locus}{ssr_words}{$curr_cor_ssr_word}\n";
- }
- #print "$esp_locus:$curr_esp_ssr_word:" . $esp{$esp_locus}{ssr_words}{$curr_esp_ssr_word}
- # . "\t$gre_locus:$curr_gre_ssr_word:" . $gre{$gre_locus}{ssr_words}{$curr_gre_ssr_word}
- # . "\t$cor_locus:$curr_cor_ssr_word:" . $cor{$cor_locus}{ssr_words}{$curr_cor_ssr_word} . "\n";
- }
- }
+ if ((length($curr_DV_ssr_word) > 2)) { # dimer exclusion
+ print "$curr_DV_ssr_word"
+ . "\t$DV_locus\t$DV{$DV_locus}{ssr_words}{$curr_DV_ssr_word}"
+ . "\t$G3_locus\t$G3{$G3_locus}{ssr_words}{$curr_G3_ssr_word}\n";
}
+ #print "$DV_locus:$curr_DV_ssr_word:" . $DV{$DV_locus}{ssr_words}{$curr_DV_ssr_word}
+ # . "\t$G3_locus:$curr_G3_ssr_word:" . $G3{$G3_locus}{ssr_words}{$curr_G3_ssr_word} . "\n";
}
}
}
# main
-%esp = pop_locus_hashes($esp_ssr);
-%gre = pop_locus_hashes($gre_ssr);
-%cor = pop_locus_hashes($cor_ssr);
+%DV = pop_locus_hashes($DV_ssr);
+%G3 = pop_locus_hashes($G3_ssr);
stitch_ssr_to_ortho();