--- /dev/null
+#!/usr/bin/perl
+
+use strict;
+use warnings;
+
+use Digest::MD5 qw(md5_hex);
+
+
+# check for arguments and explain usage
+if ($#ARGV !=1) {
+ print "usage: association_duplicate_with_checker.pl input_file output_file\n";
+ exit;
+}
+
+my $infile = $ARGV[0];
+my $outfile = $ARGV[1];
+
+my %line_hash;
+
+open (INFILE, $infile);
+open (OUTFILE, ">$outfile");
+while(<INFILE>) {
+ my $line = $_;
+ chomp $line;
+
+ my ($db,$db_id,$db_symbol,$qual,$ont_id,$db_ref,$ev,$with,$aspect,$db_obj_name,$db_obj_syn,$db_obj_type,$taxon,$date,$assigned_by,$annot_ext,$gp_form_id) = split("\t", $line);
+ if($db =~ m/^!/) {
+ print OUTFILE "$line\n";
+ next;
+ }
+
+ # Need to modify this bit to work with the "with" column.
+ # I think this key is not long enough. Maybe use entire line (except the "with") and md5 it. Then store line in values.
+ my $before = "$db\t$db_id\t$db_symbol\t$qual\t$ont_id\t$db_ref\t$ev";
+ my $rest_of_line = "$aspect\t$db_obj_name\t$db_obj_syn\t$db_obj_type\t$taxon\t$date\t$assigned_by\t$annot_ext\t$gp_form_id";
+
+ my $key = md5_hex("$before\t$rest_of_line");
+
+ if(defined($line_hash{$key})) {
+ $line_hash{$key}->{'with'} = "$line_hash{$key}->{'with'}|AGI_LocusCode:$with";
+ $line_hash{$key}->{'line'} = "$before\t$line_hash{$key}->{'with'}\t$rest_of_line";
+ }else{
+ $line_hash{$key}->{'with'} = "AGI_LocusCode:$with";
+ $line_hash{$key}->{'line'} = "$before\t$line_hash{$key}->{'with'}\t$rest_of_line";
+ }
+
+}
+close(INFILE);
+
+foreach my $key (keys %line_hash) {
+ print OUTFILE "$line_hash{$key}->{'line'}\n";
+}
+close(OUTFILE);