Hello!

To see the file structure, click on "tree".

Note that updates take place every 10 minutes, commits may not be seen immediately.
new script for teasing out UniProt::locus mappings for A.th.
authorpreecej <preecej@localhost>
Wed, 12 Jun 2013 22:08:33 +0000 (22:08 +0000)
committerpreecej <preecej@localhost>
Wed, 12 Jun 2013 22:08:33 +0000 (22:08 +0000)
svn path=/; revision=487

Personnel/preecej/perl_singletons/uniprot_edits/create_uniprot_ath_mappings.pl [new file with mode: 0644]

diff --git a/Personnel/preecej/perl_singletons/uniprot_edits/create_uniprot_ath_mappings.pl b/Personnel/preecej/perl_singletons/uniprot_edits/create_uniprot_ath_mappings.pl
new file mode 100644 (file)
index 0000000..955e23b
--- /dev/null
@@ -0,0 +1,35 @@
+#!/usr/bin/perl -w
+use strict;
+
+# data-munching program for UniProt gene mappings. takes UniProt annotation files
+# for A.th. and picks out only the UniProt IDs and AT# loci numbers
+
+open(UNIPROT_FILE, $ARGV[0]) or die("Could not open $ARGV[0]"); 
+
+my $line = <UNIPROT_FILE>;
+my @loci;
+
+while (<UNIPROT_FILE>) {
+    $line = $_;
+    chomp $line;
+    my @line_ary = split('\t',$line);
+    my $uniprot_id = $line_ary[0];
+    my @gene_names = split('\s|/',$line_ary[4]);
+    my $seq_len = $line_ary[6];
+
+    @loci = ();
+    for (@gene_names) {
+        my $gene_name = $_;
+        if ($gene_name =~ /(A[Tt][0-9]g[0-9]{5})/) {
+            push(@loci,$1);
+        }
+    }
+    #print "@loci\n";
+    #print "$uniprot_id\t@gene_names\t$seq_len\n";
+    print "$uniprot_id\t@loci\t$seq_len\n";
+}
+
+close UNIPROT_FILE;
+
+exit;
+