From daad3b6da9688afa0932203c90b66abf649ca400 Mon Sep 17 00:00:00 2001 From: preecej Date: Wed, 23 Jan 2013 22:30:11 +0000 Subject: [PATCH] Initial copy from B.sylvaticum version, unmodified. svn path=/; revision=422 --- .../perl_singletons/monococcum_ssr_mash.pl | 143 ++++++++++++++++++ 1 file changed, 143 insertions(+) create mode 100644 Personnel/preecej/perl_singletons/monococcum_ssr_mash.pl diff --git a/Personnel/preecej/perl_singletons/monococcum_ssr_mash.pl b/Personnel/preecej/perl_singletons/monococcum_ssr_mash.pl new file mode 100644 index 0000000..88cd1a7 --- /dev/null +++ b/Personnel/preecej/perl_singletons/monococcum_ssr_mash.pl @@ -0,0 +1,143 @@ +#!/usr/bin/perl -w +use strict; + +use Switch; +use Data::Dumper; + +#perl script for ssr data + +$Data::Dumper::Pad = "... "; + +#file handles +my $esp_ssr = "./brasy-esp.ssr"; +my $gre_ssr = "./brasy-gre.ssr"; +my $cor_ssr = "./brasy-cor.ssr";; +my $ortho = "./sylvaticum_orthologs.txt"; + +# hashes +my %esp; +my %gre; +my %cor; + +#load ssr data into hash keyed by spain gene id +#add ssr_words hash to the spain_id_gene_key +#add first ssr word as key to ssr_words hash with ssr as key and # as value +#add additional ssr words for this gene as encountered (if Gene exists) +sub pop_locus_hashes($) +{ + my $line; + my %local_hash; + + open(SSR_FILE, $_[0]) or die("Could not open $_[0]"); + + while () + { + $line = $_; + chomp $line; + my @line_ary = split('\t',$line); + my $locus_id = $line_ary[0]; + my $ssr_word = $line_ary[3]; + my $ssr_word_count = $line_ary[4]; + + # add new gene to hash value (array) for old gene hash key + $local_hash{$locus_id}{"ssr_words"}{$ssr_word} = $ssr_word_count; + } + + close SSR_FILE; + + #print "[Current Hash: " . $_[0] . "]\n"; + #print Dumper(\%local_hash) . "\n\n"; + #print "\n"; + + return %local_hash +} + + + #Does this locus already exist? +# if (!exists $esp{$locus_id}) +# { + + + + +#Read blastall results +#foreach line splits on tab get 1st gene identifier + #look in spain hash for gene id + #look at each ssr_words key + +#look in greece hash for greece (2nd gene identifier) + #look in its ssr_words for current spain ssr_word key + #if present + + #look in corvallis hash for corvallis (3rd gene identifier) + #look in its ssr_words for current spain ssr_word key + #if present + #print 1st 2nd 3rd gene ids current spain ssr_word key current spain greece corvallis ssr_word{key} values (one for each separate key - i.e. corvallis_key) + +sub stitch_ssr_to_ortho() +{ + my $line; + + open(ORTHO_FILE, $ortho) or die("Could not open $ortho"); + + while () + { + $line = $_; + chomp $line; + my @line_ary = split('\t',$line); + my $cor_locus = $line_ary[0]; + my $esp_locus = $line_ary[1]; + my $gre_locus = $line_ary[2]; + + #print "$cor_locus\t$esp_locus\t$gre_locus\n"; + + if (exists $esp{$esp_locus}) + { + foreach my $curr_esp_ssr_word (keys $esp{$esp_locus}{ssr_words}) { + #print "$esp_locus : $curr_esp_ssr_word: " . $esp{$esp_locus}{ssr_words}{$curr_esp_ssr_word} . "\n"; + if (exists $gre{$gre_locus}) + { + foreach my $curr_gre_ssr_word (keys $gre{$gre_locus}{ssr_words}) { + if ($curr_esp_ssr_word eq $curr_gre_ssr_word) { + #print "$esp_locus:$curr_esp_ssr_word:" . $esp{$esp_locus}{ssr_words}{$curr_esp_ssr_word} + # . "\t$gre_locus:$curr_gre_ssr_word:" . $gre{$gre_locus}{ssr_words}{$curr_gre_ssr_word} . "\n"; + if (exists $cor{$cor_locus}) + { + foreach my $curr_cor_ssr_word (keys $cor{$cor_locus}{ssr_words}) { + if ($curr_esp_ssr_word eq $curr_cor_ssr_word) { + if (($esp{$esp_locus}{ssr_words}{$curr_esp_ssr_word} != $gre{$gre_locus}{ssr_words}{$curr_gre_ssr_word}) + || ($esp{$esp_locus}{ssr_words}{$curr_esp_ssr_word} != $cor{$cor_locus}{ssr_words}{$curr_cor_ssr_word}) + || ($gre{$gre_locus}{ssr_words}{$curr_gre_ssr_word} != $cor{$cor_locus}{ssr_words}{$curr_cor_ssr_word})) + { + if ((length($curr_esp_ssr_word) > 2)) { # dimer exclusion + print "$curr_esp_ssr_word" + . "\t$esp_locus\t$esp{$esp_locus}{ssr_words}{$curr_esp_ssr_word}" + . "\t$gre_locus\t$gre{$gre_locus}{ssr_words}{$curr_gre_ssr_word}" + . "\t$cor_locus\t$cor{$cor_locus}{ssr_words}{$curr_cor_ssr_word}\n"; + } + #print "$esp_locus:$curr_esp_ssr_word:" . $esp{$esp_locus}{ssr_words}{$curr_esp_ssr_word} + # . "\t$gre_locus:$curr_gre_ssr_word:" . $gre{$gre_locus}{ssr_words}{$curr_gre_ssr_word} + # . "\t$cor_locus:$curr_cor_ssr_word:" . $cor{$cor_locus}{ssr_words}{$curr_cor_ssr_word} . "\n"; + } + } + } + } + } + } + } + } + } + #print "\n"; + } + close ORTHO_FILE; +} + + +# main +%esp = pop_locus_hashes($esp_ssr); +%gre = pop_locus_hashes($gre_ssr); +%cor = pop_locus_hashes($cor_ssr); + +stitch_ssr_to_ortho(); + +exit; -- 2.34.1