From 761fad0ab5f5c68f3b462d72ba8fc9676fdf0354 Mon Sep 17 00:00:00 2001 From: preecej Date: Wed, 23 May 2012 20:43:06 +0000 Subject: [PATCH] Simple gene and expression data mapping scripts for the Rice Seed Dev project svn path=/; revision=334 --- .../preecej/perl_singletons/affy_mapping.pl | 67 +++++++++++++++++++ .../preecej/perl_singletons/ath_mapping.pl | 59 ++++++++++++++++ 2 files changed, 126 insertions(+) create mode 100644 Personnel/preecej/perl_singletons/affy_mapping.pl create mode 100644 Personnel/preecej/perl_singletons/ath_mapping.pl diff --git a/Personnel/preecej/perl_singletons/affy_mapping.pl b/Personnel/preecej/perl_singletons/affy_mapping.pl new file mode 100644 index 0000000..da2423e --- /dev/null +++ b/Personnel/preecej/perl_singletons/affy_mapping.pl @@ -0,0 +1,67 @@ +#!/usr/bin/perl -w +# maps affy chip expression data to gene loci, writes to file, and sorts + +use strict; +use Data::Dumper; + +my %gene_list; + +open GENES,$ARGV[0]; +readline GENES; + +while () { + chomp; + my @curr_ary = split('\t',$_); + + $curr_ary[0] =~ s/\s+$//; # rtrim whitespace + $curr_ary[0] =~ s/^\s+//; # ltrim whitespace + $curr_ary[1] =~ s/\s+$//; # rtrim whitespace + $curr_ary[1] =~ s/^\s+//; # ltrim whitespace + + $gene_list{$curr_ary[1]}{gene_id} = $curr_ary[0]; + + #print "$curr_ary[0]\n"; +} + +close GENES; + +#print Dumper(\%gene_list) . "\n\n"; + +open EXPR, $ARGV[1]; +readline EXPR; + +while () { + chomp; + my @expr_ary = split('\t',$_); + if (exists $gene_list{$expr_ary[0]}) { + $gene_list{$expr_ary[0]}{value} = $expr_ary[1]; + $gene_list{$expr_ary[0]}{abs_call} = $expr_ary[2]; + $gene_list{$expr_ary[0]}{p_value} = $expr_ary[3]; + } + + #print "$expr_ary[0]\n"; +} + +close EXPR; + +#print Dumper(\%gene_list); + +open OUTFILE, ">mapped_$ARGV[1]"; + +foreach my $mapping (keys %gene_list) { + if (exists $gene_list{$mapping}{value}) { + print OUTFILE "$gene_list{$mapping}{gene_id}\t" + . "$mapping\t" + . "$gene_list{$mapping}{value}\t" + . "$gene_list{$mapping}{abs_call}\t" + . "$gene_list{$mapping}{p_value}\n"; + } else { + print OUTFILE "$gene_list{$mapping}{gene_id}\t$mapping\tnone\tnone\tnone\n" + } +} + +close OUTFILE; + +system "cat ./mapped_$ARGV[1] | sort | uniq > ../mapping_output/sorted_mapped_$ARGV[1]; rm ./mapped_$ARGV[1]"; + +exit(0); diff --git a/Personnel/preecej/perl_singletons/ath_mapping.pl b/Personnel/preecej/perl_singletons/ath_mapping.pl new file mode 100644 index 0000000..b68a284 --- /dev/null +++ b/Personnel/preecej/perl_singletons/ath_mapping.pl @@ -0,0 +1,59 @@ +#!/usr/bin/perl -w +# maps A.th. expression data to gene loci, writes to file, and sorts +use strict; +use Data::Dumper; + +my %gene_list; + +open GENES,$ARGV[0]; +readline GENES; + +while () { + chomp; + + $_ =~ s/\s+$//; # rtrim whitespace + $_ =~ s/^\s+//; # ltrim whitespace + + $gene_list{$_} = ""; +} + +close GENES; + +#print Dumper(\%gene_list) . "\n\n"; + +open EXPR, $ARGV[1]; +readline EXPR; + +while () { + #print uc($_); + chomp; + my @expr_ary = split('\t',$_); + + $expr_ary[0] =~ s/\s+$//; # rtrim whitespace + $expr_ary[0] =~ s/^\s+//; # ltrim whitespace + $expr_ary[1] =~ s/\s+$//; # rtrim whitespace + $expr_ary[1] =~ s/^\s+//; # ltrim whitespace + + #print $expr_ary[0] . "|" . $expr_ary[1] . "\n"; + if (exists $gene_list{uc($expr_ary[0])}) { + $gene_list{uc($expr_ary[0])} = $expr_ary[1]; + } +} + +close EXPR; + +#print Dumper(\%gene_list); + +open OUTFILE, ">mapped_$ARGV[1]"; + +foreach my $mapping (keys %gene_list) { + if ($gene_list{$mapping} ne "") { + print OUTFILE "$mapping\t$gene_list{$mapping}\n" + } +} + +close OUTFILE; + +system "cat ./mapped_$ARGV[1] | sort | uniq > ./mapping_output/sorted_mapped_$ARGV[1]; rm ./mapped_$ARGV[1]"; + +exit(0); -- 2.34.1