From: preecej Date: Wed, 23 Jan 2013 22:58:26 +0000 (+0000) Subject: First draft X-Git-Url: http://gitweb.planteome.org/?a=commitdiff_plain;h=379f5209588daae28bbfa4f576a9b2e6b1faa61c;p=old-jaiswallab-svn%2F.git First draft svn path=/; revision=423 --- diff --git a/Personnel/preecej/perl_singletons/monococcum_ssr_mash.pl b/Personnel/preecej/perl_singletons/monococcum_ssr_mash.pl index 88cd1a7..f5cece5 100644 --- a/Personnel/preecej/perl_singletons/monococcum_ssr_mash.pl +++ b/Personnel/preecej/perl_singletons/monococcum_ssr_mash.pl @@ -4,23 +4,23 @@ use strict; use Switch; use Data::Dumper; -#perl script for ssr data +# perl script for ssr data, specialized to monococcum +# note: this script could easily be generalized to n comparitive cases, using +# recursion $Data::Dumper::Pad = "... "; #file handles -my $esp_ssr = "./brasy-esp.ssr"; -my $gre_ssr = "./brasy-gre.ssr"; -my $cor_ssr = "./brasy-cor.ssr";; -my $ortho = "./sylvaticum_orthologs.txt"; +my $DV_ssr = "./TmDV92_ssr.txt"; +my $G3_ssr = "./TmG3116_ssr.txt"; +my $ortho = "./monococcum_orthologs.txt"; # hashes -my %esp; -my %gre; -my %cor; +my %DV; +my %G3; -#load ssr data into hash keyed by spain gene id -#add ssr_words hash to the spain_id_gene_key +#load ssr data into hash keyed by DV gene id +#add ssr_words hash to the DV_id_gene_key #add first ssr word as key to ssr_words hash with ssr as key and # as value #add additional ssr words for this gene as encountered (if Gene exists) sub pop_locus_hashes($) @@ -52,27 +52,18 @@ sub pop_locus_hashes($) return %local_hash } - - #Does this locus already exist? -# if (!exists $esp{$locus_id}) +# Does this locus already exist? +# if (!exists $DV{$locus_id}) # { - - - -#Read blastall results -#foreach line splits on tab get 1st gene identifier - #look in spain hash for gene id +# Read blastall results +# Foreach line splits on tab get 1st gene identifier + #look in DV hash for gene id #look at each ssr_words key -#look in greece hash for greece (2nd gene identifier) - #look in its ssr_words for current spain ssr_word key - #if present - - #look in corvallis hash for corvallis (3rd gene identifier) - #look in its ssr_words for current spain ssr_word key - #if present - #print 1st 2nd 3rd gene ids current spain ssr_word key current spain greece corvallis ssr_word{key} values (one for each separate key - i.e. corvallis_key) + # look in G3 hash for G3ece (2nd gene identifier) + #look in its ssr_words for current DV ssr_word key + #if present... sub stitch_ssr_to_ortho() { @@ -85,42 +76,30 @@ sub stitch_ssr_to_ortho() $line = $_; chomp $line; my @line_ary = split('\t',$line); - my $cor_locus = $line_ary[0]; - my $esp_locus = $line_ary[1]; - my $gre_locus = $line_ary[2]; + my $DV_locus = $line_ary[0]; + my $G3_locus = $line_ary[1]; - #print "$cor_locus\t$esp_locus\t$gre_locus\n"; + #print "$DV_locus\t$G3_locus\n"; - if (exists $esp{$esp_locus}) + if (exists $DV{$DV_locus}) { - foreach my $curr_esp_ssr_word (keys $esp{$esp_locus}{ssr_words}) { - #print "$esp_locus : $curr_esp_ssr_word: " . $esp{$esp_locus}{ssr_words}{$curr_esp_ssr_word} . "\n"; - if (exists $gre{$gre_locus}) + foreach my $curr_DV_ssr_word (keys $DV{$DV_locus}{ssr_words}) { + #print "$DV_locus : $curr_DV_ssr_word: " . $DV{$DV_locus}{ssr_words}{$curr_DV_ssr_word} . "\n"; + if (exists $G3{$G3_locus}) { - foreach my $curr_gre_ssr_word (keys $gre{$gre_locus}{ssr_words}) { - if ($curr_esp_ssr_word eq $curr_gre_ssr_word) { - #print "$esp_locus:$curr_esp_ssr_word:" . $esp{$esp_locus}{ssr_words}{$curr_esp_ssr_word} - # . "\t$gre_locus:$curr_gre_ssr_word:" . $gre{$gre_locus}{ssr_words}{$curr_gre_ssr_word} . "\n"; - if (exists $cor{$cor_locus}) + foreach my $curr_G3_ssr_word (keys $G3{$G3_locus}{ssr_words}) { + if ($curr_DV_ssr_word eq $curr_G3_ssr_word) { + #print "$DV_locus:$curr_DV_ssr_word:" . $DV{$DV_locus}{ssr_words}{$curr_DV_ssr_word} + # . "\t$G3_locus:$curr_G3_ssr_word:" . $G3{$G3_locus}{ssr_words}{$curr_G3_ssr_word} . "\n"; + if ($DV{$DV_locus}{ssr_words}{$curr_DV_ssr_word} != $G3{$G3_locus}{ssr_words}{$curr_G3_ssr_word}) { - foreach my $curr_cor_ssr_word (keys $cor{$cor_locus}{ssr_words}) { - if ($curr_esp_ssr_word eq $curr_cor_ssr_word) { - if (($esp{$esp_locus}{ssr_words}{$curr_esp_ssr_word} != $gre{$gre_locus}{ssr_words}{$curr_gre_ssr_word}) - || ($esp{$esp_locus}{ssr_words}{$curr_esp_ssr_word} != $cor{$cor_locus}{ssr_words}{$curr_cor_ssr_word}) - || ($gre{$gre_locus}{ssr_words}{$curr_gre_ssr_word} != $cor{$cor_locus}{ssr_words}{$curr_cor_ssr_word})) - { - if ((length($curr_esp_ssr_word) > 2)) { # dimer exclusion - print "$curr_esp_ssr_word" - . "\t$esp_locus\t$esp{$esp_locus}{ssr_words}{$curr_esp_ssr_word}" - . "\t$gre_locus\t$gre{$gre_locus}{ssr_words}{$curr_gre_ssr_word}" - . "\t$cor_locus\t$cor{$cor_locus}{ssr_words}{$curr_cor_ssr_word}\n"; - } - #print "$esp_locus:$curr_esp_ssr_word:" . $esp{$esp_locus}{ssr_words}{$curr_esp_ssr_word} - # . "\t$gre_locus:$curr_gre_ssr_word:" . $gre{$gre_locus}{ssr_words}{$curr_gre_ssr_word} - # . "\t$cor_locus:$curr_cor_ssr_word:" . $cor{$cor_locus}{ssr_words}{$curr_cor_ssr_word} . "\n"; - } - } + if ((length($curr_DV_ssr_word) > 2)) { # dimer exclusion + print "$curr_DV_ssr_word" + . "\t$DV_locus\t$DV{$DV_locus}{ssr_words}{$curr_DV_ssr_word}" + . "\t$G3_locus\t$G3{$G3_locus}{ssr_words}{$curr_G3_ssr_word}\n"; } + #print "$DV_locus:$curr_DV_ssr_word:" . $DV{$DV_locus}{ssr_words}{$curr_DV_ssr_word} + # . "\t$G3_locus:$curr_G3_ssr_word:" . $G3{$G3_locus}{ssr_words}{$curr_G3_ssr_word} . "\n"; } } } @@ -134,9 +113,8 @@ sub stitch_ssr_to_ortho() # main -%esp = pop_locus_hashes($esp_ssr); -%gre = pop_locus_hashes($gre_ssr); -%cor = pop_locus_hashes($cor_ssr); +%DV = pop_locus_hashes($DV_ssr); +%G3 = pop_locus_hashes($G3_ssr); stitch_ssr_to_ortho();