From: preecej Date: Thu, 7 Jun 2012 22:05:21 +0000 (+0000) Subject: sew together SSR data from 3 different sample transcriptome assemblies X-Git-Url: http://gitweb.planteome.org/?a=commitdiff_plain;h=dcf554d919c6fe02fa832dd01f7e2d2513c10c82;p=old-jaiswallab-svn%2F.git sew together SSR data from 3 different sample transcriptome assemblies , correlate via BLAST, output SSR count differentials amongst resulting orthologs > dimer svn path=/; revision=345 --- diff --git a/Personnel/preecej/perl_singletons/brasy_ssr_mash.pl b/Personnel/preecej/perl_singletons/brasy_ssr_mash.pl new file mode 100644 index 0000000..88cd1a7 --- /dev/null +++ b/Personnel/preecej/perl_singletons/brasy_ssr_mash.pl @@ -0,0 +1,143 @@ +#!/usr/bin/perl -w +use strict; + +use Switch; +use Data::Dumper; + +#perl script for ssr data + +$Data::Dumper::Pad = "... "; + +#file handles +my $esp_ssr = "./brasy-esp.ssr"; +my $gre_ssr = "./brasy-gre.ssr"; +my $cor_ssr = "./brasy-cor.ssr";; +my $ortho = "./sylvaticum_orthologs.txt"; + +# hashes +my %esp; +my %gre; +my %cor; + +#load ssr data into hash keyed by spain gene id +#add ssr_words hash to the spain_id_gene_key +#add first ssr word as key to ssr_words hash with ssr as key and # as value +#add additional ssr words for this gene as encountered (if Gene exists) +sub pop_locus_hashes($) +{ + my $line; + my %local_hash; + + open(SSR_FILE, $_[0]) or die("Could not open $_[0]"); + + while () + { + $line = $_; + chomp $line; + my @line_ary = split('\t',$line); + my $locus_id = $line_ary[0]; + my $ssr_word = $line_ary[3]; + my $ssr_word_count = $line_ary[4]; + + # add new gene to hash value (array) for old gene hash key + $local_hash{$locus_id}{"ssr_words"}{$ssr_word} = $ssr_word_count; + } + + close SSR_FILE; + + #print "[Current Hash: " . $_[0] . "]\n"; + #print Dumper(\%local_hash) . "\n\n"; + #print "\n"; + + return %local_hash +} + + + #Does this locus already exist? +# if (!exists $esp{$locus_id}) +# { + + + + +#Read blastall results +#foreach line splits on tab get 1st gene identifier + #look in spain hash for gene id + #look at each ssr_words key + +#look in greece hash for greece (2nd gene identifier) + #look in its ssr_words for current spain ssr_word key + #if present + + #look in corvallis hash for corvallis (3rd gene identifier) + #look in its ssr_words for current spain ssr_word key + #if present + #print 1st 2nd 3rd gene ids current spain ssr_word key current spain greece corvallis ssr_word{key} values (one for each separate key - i.e. corvallis_key) + +sub stitch_ssr_to_ortho() +{ + my $line; + + open(ORTHO_FILE, $ortho) or die("Could not open $ortho"); + + while () + { + $line = $_; + chomp $line; + my @line_ary = split('\t',$line); + my $cor_locus = $line_ary[0]; + my $esp_locus = $line_ary[1]; + my $gre_locus = $line_ary[2]; + + #print "$cor_locus\t$esp_locus\t$gre_locus\n"; + + if (exists $esp{$esp_locus}) + { + foreach my $curr_esp_ssr_word (keys $esp{$esp_locus}{ssr_words}) { + #print "$esp_locus : $curr_esp_ssr_word: " . $esp{$esp_locus}{ssr_words}{$curr_esp_ssr_word} . "\n"; + if (exists $gre{$gre_locus}) + { + foreach my $curr_gre_ssr_word (keys $gre{$gre_locus}{ssr_words}) { + if ($curr_esp_ssr_word eq $curr_gre_ssr_word) { + #print "$esp_locus:$curr_esp_ssr_word:" . $esp{$esp_locus}{ssr_words}{$curr_esp_ssr_word} + # . "\t$gre_locus:$curr_gre_ssr_word:" . $gre{$gre_locus}{ssr_words}{$curr_gre_ssr_word} . "\n"; + if (exists $cor{$cor_locus}) + { + foreach my $curr_cor_ssr_word (keys $cor{$cor_locus}{ssr_words}) { + if ($curr_esp_ssr_word eq $curr_cor_ssr_word) { + if (($esp{$esp_locus}{ssr_words}{$curr_esp_ssr_word} != $gre{$gre_locus}{ssr_words}{$curr_gre_ssr_word}) + || ($esp{$esp_locus}{ssr_words}{$curr_esp_ssr_word} != $cor{$cor_locus}{ssr_words}{$curr_cor_ssr_word}) + || ($gre{$gre_locus}{ssr_words}{$curr_gre_ssr_word} != $cor{$cor_locus}{ssr_words}{$curr_cor_ssr_word})) + { + if ((length($curr_esp_ssr_word) > 2)) { # dimer exclusion + print "$curr_esp_ssr_word" + . "\t$esp_locus\t$esp{$esp_locus}{ssr_words}{$curr_esp_ssr_word}" + . "\t$gre_locus\t$gre{$gre_locus}{ssr_words}{$curr_gre_ssr_word}" + . "\t$cor_locus\t$cor{$cor_locus}{ssr_words}{$curr_cor_ssr_word}\n"; + } + #print "$esp_locus:$curr_esp_ssr_word:" . $esp{$esp_locus}{ssr_words}{$curr_esp_ssr_word} + # . "\t$gre_locus:$curr_gre_ssr_word:" . $gre{$gre_locus}{ssr_words}{$curr_gre_ssr_word} + # . "\t$cor_locus:$curr_cor_ssr_word:" . $cor{$cor_locus}{ssr_words}{$curr_cor_ssr_word} . "\n"; + } + } + } + } + } + } + } + } + } + #print "\n"; + } + close ORTHO_FILE; +} + + +# main +%esp = pop_locus_hashes($esp_ssr); +%gre = pop_locus_hashes($gre_ssr); +%cor = pop_locus_hashes($cor_ssr); + +stitch_ssr_to_ortho(); + +exit;