From 92b0ea999162dcc642df14905c616a4df4421f87 Mon Sep 17 00:00:00 2001 From: elserj Date: Wed, 2 Dec 2015 22:29:33 +0000 Subject: [PATCH] Initial version of SNP venn statistics script svn path=/; revision=638 --- interactome_scripts/SNP_venn_stats.pl | 135 ++++++++++++++++++++++++++ 1 file changed, 135 insertions(+) create mode 100755 interactome_scripts/SNP_venn_stats.pl diff --git a/interactome_scripts/SNP_venn_stats.pl b/interactome_scripts/SNP_venn_stats.pl new file mode 100755 index 0000000..ade720e --- /dev/null +++ b/interactome_scripts/SNP_venn_stats.pl @@ -0,0 +1,135 @@ +#!/usr/bin/perl + +use strict; +use warnings; + +# check for arguments and explain usage +if ($#ARGV !=3) { + print "usage: SNP_venn_stats.pl file_1 file_2 file_3 output_file\n"; + exit; +} + +my $file_1 = $ARGV[0]; +my $file_2 = $ARGV[1]; +my $file_3 = $ARGV[2]; +my $output_file = $ARGV[3]; + +my %file1hash; +my %file2hash; +my %file3hash; + +# This will change the way the hash is keyed. A "0" will key with only the position, a "1" will include the snp value +my $include_snp = 0; + +open(FILE1, "$file_1"); +while() { + my $line = $_; + chomp $line; + my ($chrom,$pos,$ref,$snp) = split("\t", $line); + my $key; + + if($include_snp) { + $key = "$chrom" . "x" . "$pos"; + }else{ + $key = "$chrom" . "x" . "$pos" . "y" . "$snp"; + } + $file1hash{$key} = $snp; +} +close(FILE1); + + +open(FILE2, "$file_2"); +while() { + my $line = $_; + chomp $line; + my ($chrom,$pos,$ref,$snp) = split("\t", $line); + my $key; + + if($include_snp) { + $key = "$chrom" . "x" . "$pos"; + }else{ + $key = "$chrom" . "x" . "$pos" . "y" . "$snp"; + } + $file2hash{$key} = $snp; +} +close(FILE2); + +open(FILE3, "$file_3"); +while() { + my $line = $_; + chomp $line; + my ($chrom,$pos,$ref,$snp) = split("\t", $line); + my $key; + + if($include_snp) { + $key = "$chrom" . "x" . "$pos"; + }else{ + $key = "$chrom" . "x" . "$pos" . "y" . "$snp"; + } + $file3hash{$key} = $snp; +} +close(FILE3); + +my $count_file_1 = 0; +my $count_file_2 = 0; +my $count_file_3 = 0; + +my $count_file_12 = 0; +my $count_file_13 = 0; +my $count_file_23 = 0; +my $count_file_21 = 0; + +my $count_file_123 = 0; + +foreach my $key (keys %file1hash) { + #check if also in file2 + if(defined($file2hash{$key})) { + # and file3 + if(defined($file3hash{$key})) { + $count_file_123++; + }else{# it's only in file1 and file2 + $count_file_12++; + } + #check if in file3 + }elsif(defined($file3hash{$key})) { + $count_file_13++; + #it's only in file1 + }else{$count_file_1++; + } +} + +foreach my $key (keys %file2hash) { + #check if in file3 + if(defined($file3hash{$key})) { + $count_file_23++; + } + if(defined($file1hash{$key})) { + $count_file_21++; + } +} + + +# Get total counts in each file/hash +my $total_count_file_1 = scalar(keys %file1hash); +my $total_count_file_2 = scalar(keys %file2hash); +my $total_count_file_3 = scalar(keys %file3hash); + +# calculate the count of those only in file2 and file3 +$count_file_2 = $total_count_file_2 - $count_file_123 - $count_file_12 - $count_file_23; +$count_file_3 = $total_count_file_3 - $count_file_123 - $count_file_13 - $count_file_23; + + +open (OUTFILE, ">$output_file"); + +print OUTFILE "Total count in $file_1\t=\t$total_count_file_1\n"; +print OUTFILE "Total count in $file_2\t=\t$total_count_file_2\n"; +print OUTFILE "Total count in $file_3\t=\t$total_count_file_3\n"; +print OUTFILE "In $file_1 only\t=\t$count_file_1\n"; +print OUTFILE "In $file_2 only\t=\t$count_file_2\n"; +print OUTFILE "In $file_3 only\t=\t$count_file_3\n"; +print OUTFILE "In $file_1 and $file_2\t=\t$count_file_12\n"; +print OUTFILE "In $file_1 and $file_3\t=\t$count_file_13\n"; +print OUTFILE "In $file_2 and $file_3\t=\t$count_file_23\n"; +print OUTFILE "In all three files\t=\t$count_file_123\n"; + +close(OUTFILE); -- 2.34.1