From: preecej Date: Wed, 12 Jun 2013 22:08:33 +0000 (+0000) Subject: new script for teasing out UniProt::locus mappings for A.th. X-Git-Url: http://gitweb.planteome.org/?a=commitdiff_plain;h=4ce7130728c21207eac57560662abe5a8234507e;p=old-jaiswallab-svn%2F.git new script for teasing out UniProt::locus mappings for A.th. svn path=/; revision=487 --- diff --git a/Personnel/preecej/perl_singletons/uniprot_edits/create_uniprot_ath_mappings.pl b/Personnel/preecej/perl_singletons/uniprot_edits/create_uniprot_ath_mappings.pl new file mode 100644 index 0000000..955e23b --- /dev/null +++ b/Personnel/preecej/perl_singletons/uniprot_edits/create_uniprot_ath_mappings.pl @@ -0,0 +1,35 @@ +#!/usr/bin/perl -w +use strict; + +# data-munching program for UniProt gene mappings. takes UniProt annotation files +# for A.th. and picks out only the UniProt IDs and AT# loci numbers + +open(UNIPROT_FILE, $ARGV[0]) or die("Could not open $ARGV[0]"); + +my $line = ; +my @loci; + +while () { + $line = $_; + chomp $line; + my @line_ary = split('\t',$line); + my $uniprot_id = $line_ary[0]; + my @gene_names = split('\s|/',$line_ary[4]); + my $seq_len = $line_ary[6]; + + @loci = (); + for (@gene_names) { + my $gene_name = $_; + if ($gene_name =~ /(A[Tt][0-9]g[0-9]{5})/) { + push(@loci,$1); + } + } + #print "@loci\n"; + #print "$uniprot_id\t@gene_names\t$seq_len\n"; + print "$uniprot_id\t@loci\t$seq_len\n"; +} + +close UNIPROT_FILE; + +exit; +