From da87c9a28dd1e8fbc564c2dfad0100f4218c1411 Mon Sep 17 00:00:00 2001 From: elserj Date: Thu, 3 Dec 2015 01:20:45 +0000 Subject: [PATCH] Fixed all bugs and also now outputs the lines to a set of files. Probably should find a better way to name the output files, but can be changed later. svn path=/; revision=641 --- interactome_scripts/SNP_venn_stats.pl | 67 ++++++++++++++++++++++----- 1 file changed, 56 insertions(+), 11 deletions(-) diff --git a/interactome_scripts/SNP_venn_stats.pl b/interactome_scripts/SNP_venn_stats.pl index 9ef9d98..cc8a97b 100755 --- a/interactome_scripts/SNP_venn_stats.pl +++ b/interactome_scripts/SNP_venn_stats.pl @@ -18,8 +18,8 @@ my %file1hash; my %file2hash; my %file3hash; -# This will change the way the hash is keyed. A "0" will key with only the position, a "1" will include the snp value -my $include_snp = 0; +# This will change the way the hash is keyed. A "0" will key with only the position, a "1" will include the reference and snp value +my $include_snp = 1; open(FILE1, "$file_1"); while() { @@ -28,10 +28,10 @@ while() { my ($chrom,$pos,$ref,$snp) = split("\t", $line); my $key; - if($include_snp) { - $key = "$chrom" . "x" . "$pos"; + if($include_snp == 0) { + $key = "$chrom" . "xxx" . "$pos"; }else{ - $key = "$chrom" . "x" . "$pos" . "y" . "$snp"; + $key = "$chrom" . "xxx" . "$pos" . "xxx" . "$ref". "xxx" . "$snp"; } $file1hash{$key} = $snp; } @@ -45,10 +45,10 @@ while() { my ($chrom,$pos,$ref,$snp) = split("\t", $line); my $key; - if($include_snp) { - $key = "$chrom" . "x" . "$pos"; + if($include_snp == 0) { + $key = "$chrom" . "xxx" . "$pos"; }else{ - $key = "$chrom" . "x" . "$pos" . "y" . "$snp"; + $key = "$chrom" . "xxx" . "$pos" . "xxx" . "$ref". "xxx" . "$snp"; } $file2hash{$key} = $snp; } @@ -61,10 +61,10 @@ while() { my ($chrom,$pos,$ref,$snp) = split("\t", $line); my $key; - if($include_snp) { - $key = "$chrom" . "x" . "$pos"; + if($include_snp == 0) { + $key = "$chrom" . "xxx" . "$pos"; }else{ - $key = "$chrom" . "x" . "$pos" . "y" . "$snp"; + $key = "$chrom" . "xxx" . "$pos" . "xxx" . "$ref". "xxx" . "$snp"; } $file3hash{$key} = $snp; } @@ -80,20 +80,38 @@ my $count_file_23 = 0; my $count_file_123 = 0; +# add the output files to put out each line +open(OUT1, ">file_1_only.txt"); +open(OUT2, ">file_2_only.txt"); +open(OUT3, ">file_3_only.txt"); +open(OUT12, ">file_1_and_2.txt"); +open(OUT13, ">file_1_and_3.txt"); +open(OUT23, ">file_2_and_3.txt"); +open(OUT123, ">file_1__and_2_and_3.txt"); + foreach my $key (keys %file1hash) { #check if also in file2 if(defined($file2hash{$key})) { # and file3 if(defined($file3hash{$key})) { $count_file_123++; + my $output = split_key($key); + print OUT123 "$output\n"; + }else{# it's only in file1 and file2 $count_file_12++; + my $output = split_key($key); + print OUT12 "$output\n"; } #check if in file3 }elsif(defined($file3hash{$key})) { $count_file_13++; + my $output = split_key($key); + print OUT13 "$output\n"; #it's only in file1 }else{$count_file_1++; + my $output = split_key($key); + print OUT1 "$output\n"; } } @@ -101,9 +119,28 @@ foreach my $key (keys %file2hash) { #check if in file3 if(defined($file3hash{$key}) && !defined($file1hash{$key})) { $count_file_23++; + my $output = split_key($key); + print OUT23 "$output\n"; + }elsif(!defined($file3hash{$key}) && !defined($file1hash{$key})) { + my $output = split_key($key); + print OUT2 "$output\n"; + } +} + +foreach my $key (keys %file3hash) { + if(!defined($file1hash{$key}) && !defined($file2hash{$key})) { + my $output = split_key($key); + print OUT3 "$output\n"; } } +close(OUT1); +close(OUT2); +close(OUT3); +close(OUT12); +close(OUT13); +close(OUT23); +close(OUT123); # Get total counts in each file/hash my $total_count_file_1 = scalar(keys %file1hash); @@ -129,3 +166,11 @@ print OUTFILE "In $file_2 and $file_3\t=\t$count_file_23\n"; print OUTFILE "In all three files\t=\t$count_file_123\n"; close(OUTFILE); + +sub split_key{ + my $key = $_[0]; + my ($chrom, $pos, $ref, $snp) = split("xxx", $key); + my $string = "$chrom\t$pos\t$ref\t$snp"; + return $string; +} + -- 2.34.1