--- /dev/null
+#!/usr/bin/perl -w
+use strict;
+
+# data-munching program for UniProt gene mappings. takes UniProt annotation files
+# for A.th. and picks out only the UniProt IDs and AT# loci numbers
+
+open(UNIPROT_FILE, $ARGV[0]) or die("Could not open $ARGV[0]");
+
+my $line = <UNIPROT_FILE>;
+my @loci;
+
+while (<UNIPROT_FILE>) {
+ $line = $_;
+ chomp $line;
+ my @line_ary = split('\t',$line);
+ my $uniprot_id = $line_ary[0];
+ my @gene_names = split('\s|/',$line_ary[4]);
+ my $seq_len = $line_ary[6];
+
+ @loci = ();
+ for (@gene_names) {
+ my $gene_name = $_;
+ if ($gene_name =~ /(A[Tt][0-9]g[0-9]{5})/) {
+ push(@loci,$1);
+ }
+ }
+ #print "@loci\n";
+ #print "$uniprot_id\t@gene_names\t$seq_len\n";
+ print "$uniprot_id\t@loci\t$seq_len\n";
+}
+
+close UNIPROT_FILE;
+
+exit;
+