Hello!

To see the file structure, click on "tree".

Note that updates take place every 10 minutes, commits may not be seen immediately.
Add duplicate association checker
authorelserj <elserj@localhost>
Wed, 19 Feb 2020 21:09:11 +0000 (21:09 +0000)
committerelserj <elserj@localhost>
Wed, 19 Feb 2020 21:09:11 +0000 (21:09 +0000)
svn path=/; revision=671

interactome_scripts/association_duplicate_with_checker.pl [new file with mode: 0755]

diff --git a/interactome_scripts/association_duplicate_with_checker.pl b/interactome_scripts/association_duplicate_with_checker.pl
new file mode 100755 (executable)
index 0000000..7b41fb7
--- /dev/null
@@ -0,0 +1,53 @@
+#!/usr/bin/perl
+
+use strict;
+use warnings;
+
+use Digest::MD5 qw(md5_hex);
+
+
+# check for arguments and explain usage
+if ($#ARGV !=1) {
+       print "usage: association_duplicate_with_checker.pl input_file output_file\n";
+       exit;
+}
+
+my $infile = $ARGV[0];
+my $outfile = $ARGV[1];
+
+my %line_hash;
+
+open (INFILE, $infile);
+open (OUTFILE, ">$outfile");
+while(<INFILE>) {
+               my $line = $_;
+               chomp $line;
+               
+               my ($db,$db_id,$db_symbol,$qual,$ont_id,$db_ref,$ev,$with,$aspect,$db_obj_name,$db_obj_syn,$db_obj_type,$taxon,$date,$assigned_by,$annot_ext,$gp_form_id) = split("\t", $line);
+               if($db =~ m/^!/) {
+                               print OUTFILE "$line\n";
+                               next;
+               }
+               
+               # Need to modify this bit to work with the "with" column.
+               #  I think this key is not long enough.  Maybe use entire line (except the "with") and md5 it.  Then store line in values.
+               my $before = "$db\t$db_id\t$db_symbol\t$qual\t$ont_id\t$db_ref\t$ev";
+               my $rest_of_line = "$aspect\t$db_obj_name\t$db_obj_syn\t$db_obj_type\t$taxon\t$date\t$assigned_by\t$annot_ext\t$gp_form_id";
+               
+               my $key = md5_hex("$before\t$rest_of_line");
+               
+               if(defined($line_hash{$key})) {
+                               $line_hash{$key}->{'with'} = "$line_hash{$key}->{'with'}|AGI_LocusCode:$with";
+                               $line_hash{$key}->{'line'} = "$before\t$line_hash{$key}->{'with'}\t$rest_of_line";
+               }else{
+                               $line_hash{$key}->{'with'} = "AGI_LocusCode:$with";
+                               $line_hash{$key}->{'line'} = "$before\t$line_hash{$key}->{'with'}\t$rest_of_line";
+               }
+               
+}
+close(INFILE);
+
+foreach my $key (keys %line_hash) {
+               print OUTFILE "$line_hash{$key}->{'line'}\n";
+}
+close(OUTFILE);