From 14a96fe747cf217206b2e3d8b84eb7262a134f39 Mon Sep 17 00:00:00 2001 From: elserj Date: Wed, 19 Feb 2020 21:09:11 +0000 Subject: [PATCH] Add duplicate association checker svn path=/; revision=671 --- .../association_duplicate_with_checker.pl | 53 +++++++++++++++++++ 1 file changed, 53 insertions(+) create mode 100755 interactome_scripts/association_duplicate_with_checker.pl diff --git a/interactome_scripts/association_duplicate_with_checker.pl b/interactome_scripts/association_duplicate_with_checker.pl new file mode 100755 index 0000000..7b41fb7 --- /dev/null +++ b/interactome_scripts/association_duplicate_with_checker.pl @@ -0,0 +1,53 @@ +#!/usr/bin/perl + +use strict; +use warnings; + +use Digest::MD5 qw(md5_hex); + + +# check for arguments and explain usage +if ($#ARGV !=1) { + print "usage: association_duplicate_with_checker.pl input_file output_file\n"; + exit; +} + +my $infile = $ARGV[0]; +my $outfile = $ARGV[1]; + +my %line_hash; + +open (INFILE, $infile); +open (OUTFILE, ">$outfile"); +while() { + my $line = $_; + chomp $line; + + my ($db,$db_id,$db_symbol,$qual,$ont_id,$db_ref,$ev,$with,$aspect,$db_obj_name,$db_obj_syn,$db_obj_type,$taxon,$date,$assigned_by,$annot_ext,$gp_form_id) = split("\t", $line); + if($db =~ m/^!/) { + print OUTFILE "$line\n"; + next; + } + + # Need to modify this bit to work with the "with" column. + # I think this key is not long enough. Maybe use entire line (except the "with") and md5 it. Then store line in values. + my $before = "$db\t$db_id\t$db_symbol\t$qual\t$ont_id\t$db_ref\t$ev"; + my $rest_of_line = "$aspect\t$db_obj_name\t$db_obj_syn\t$db_obj_type\t$taxon\t$date\t$assigned_by\t$annot_ext\t$gp_form_id"; + + my $key = md5_hex("$before\t$rest_of_line"); + + if(defined($line_hash{$key})) { + $line_hash{$key}->{'with'} = "$line_hash{$key}->{'with'}|AGI_LocusCode:$with"; + $line_hash{$key}->{'line'} = "$before\t$line_hash{$key}->{'with'}\t$rest_of_line"; + }else{ + $line_hash{$key}->{'with'} = "AGI_LocusCode:$with"; + $line_hash{$key}->{'line'} = "$before\t$line_hash{$key}->{'with'}\t$rest_of_line"; + } + +} +close(INFILE); + +foreach my $key (keys %line_hash) { + print OUTFILE "$line_hash{$key}->{'line'}\n"; +} +close(OUTFILE); -- 2.34.1