Hello!

To see the file structure, click on "tree".

Note that updates take place every 10 minutes, commits may not be seen immediately.
Initial version of SNP venn statistics script
authorelserj <elserj@localhost>
Wed, 2 Dec 2015 22:29:33 +0000 (22:29 +0000)
committerelserj <elserj@localhost>
Wed, 2 Dec 2015 22:29:33 +0000 (22:29 +0000)
svn path=/; revision=638

interactome_scripts/SNP_venn_stats.pl [new file with mode: 0755]

diff --git a/interactome_scripts/SNP_venn_stats.pl b/interactome_scripts/SNP_venn_stats.pl
new file mode 100755 (executable)
index 0000000..ade720e
--- /dev/null
@@ -0,0 +1,135 @@
+#!/usr/bin/perl
+
+use strict;
+use warnings;
+
+# check for arguments and explain usage
+if ($#ARGV !=3) {
+       print "usage: SNP_venn_stats.pl file_1 file_2 file_3 output_file\n";
+       exit;
+}
+
+my $file_1 = $ARGV[0];
+my $file_2 = $ARGV[1];
+my $file_3 = $ARGV[2];
+my $output_file = $ARGV[3];
+
+my %file1hash;
+my %file2hash;
+my %file3hash;
+
+# This will change the way the hash is keyed.  A "0" will key with only the position, a "1" will include the snp value
+my $include_snp = 0;
+
+open(FILE1, "$file_1");
+while(<FILE1>) {
+               my $line = $_;
+               chomp $line;
+               my ($chrom,$pos,$ref,$snp) = split("\t", $line);
+               my $key;
+               
+               if($include_snp) {
+                               $key = "$chrom" . "x" . "$pos";
+               }else{
+                               $key = "$chrom" . "x" . "$pos" . "y" . "$snp";
+               }
+               $file1hash{$key} = $snp;
+}
+close(FILE1);
+
+
+open(FILE2, "$file_2");
+while(<FILE2>) {
+               my $line = $_;
+               chomp $line;
+               my ($chrom,$pos,$ref,$snp) = split("\t", $line);
+               my $key;
+               
+               if($include_snp) {
+                               $key = "$chrom" . "x" . "$pos";
+               }else{
+                               $key = "$chrom" . "x" . "$pos" . "y" . "$snp";
+               }
+               $file2hash{$key} = $snp;
+}
+close(FILE2);
+
+open(FILE3, "$file_3");
+while(<FILE3>) {
+               my $line = $_;
+               chomp $line;
+               my ($chrom,$pos,$ref,$snp) = split("\t", $line);
+               my $key;
+               
+               if($include_snp) {
+                               $key = "$chrom" . "x" . "$pos";
+               }else{
+                               $key = "$chrom" . "x" . "$pos" . "y" . "$snp";
+               }
+               $file3hash{$key} = $snp;
+}
+close(FILE3);
+
+my $count_file_1 = 0;
+my $count_file_2 = 0;
+my $count_file_3 = 0;
+
+my $count_file_12 = 0;
+my $count_file_13 = 0;
+my $count_file_23 = 0;
+my $count_file_21 = 0;
+
+my $count_file_123 = 0;
+
+foreach my $key (keys %file1hash) {
+               #check if also in file2
+               if(defined($file2hash{$key})) {
+                               # and file3
+                               if(defined($file3hash{$key})) {
+                                               $count_file_123++;
+                               }else{# it's only in file1 and file2
+                                               $count_file_12++;
+                               }
+                               #check if in file3
+               }elsif(defined($file3hash{$key})) {
+                               $count_file_13++;
+               #it's only in file1
+               }else{$count_file_1++;
+               }
+}
+
+foreach my $key (keys %file2hash) {
+               #check if in file3
+               if(defined($file3hash{$key})) {
+                               $count_file_23++;
+               }
+               if(defined($file1hash{$key})) {
+                               $count_file_21++;
+               }
+}
+
+
+# Get total counts in each file/hash
+my $total_count_file_1 = scalar(keys %file1hash);
+my $total_count_file_2 = scalar(keys %file2hash);
+my $total_count_file_3 = scalar(keys %file3hash);
+
+# calculate the count of those only in file2 and file3
+$count_file_2 = $total_count_file_2 - $count_file_123 - $count_file_12 - $count_file_23;
+$count_file_3 = $total_count_file_3 - $count_file_123 - $count_file_13 - $count_file_23;
+                               
+                               
+open (OUTFILE, ">$output_file");
+
+print OUTFILE "Total count in $file_1\t=\t$total_count_file_1\n";
+print OUTFILE "Total count in $file_2\t=\t$total_count_file_2\n";
+print OUTFILE "Total count in $file_3\t=\t$total_count_file_3\n";
+print OUTFILE "In $file_1 only\t=\t$count_file_1\n";
+print OUTFILE "In $file_2 only\t=\t$count_file_2\n";
+print OUTFILE "In $file_3 only\t=\t$count_file_3\n";
+print OUTFILE "In $file_1 and $file_2\t=\t$count_file_12\n";
+print OUTFILE "In $file_1 and $file_3\t=\t$count_file_13\n";
+print OUTFILE "In $file_2 and $file_3\t=\t$count_file_23\n";
+print OUTFILE "In all three files\t=\t$count_file_123\n";
+
+close(OUTFILE);