From 39c009cf01bb41917ffa42ad93b7b241ab76c426 Mon Sep 17 00:00:00 2001 From: preecej Date: Sat, 15 Oct 2011 01:14:27 +0000 Subject: [PATCH] Completed gene symbol feature; working on heat map feature svn path=/; revision=185 --- .../perl_singletons/pathway_gene_swapper.pl | 53 ++++++++++++------- 1 file changed, 35 insertions(+), 18 deletions(-) diff --git a/preecej/perl_singletons/pathway_gene_swapper.pl b/preecej/perl_singletons/pathway_gene_swapper.pl index 82d9812..7f74b3a 100644 --- a/preecej/perl_singletons/pathway_gene_swapper.pl +++ b/preecej/perl_singletons/pathway_gene_swapper.pl @@ -25,6 +25,10 @@ a new group will be created. If an original gene had multiple instances displayed on the pathway diagram, each instance will be subjected to the replacement process. +There is also an option to read in an extra column of gene symbols, if the +user wishes to provide their own. Otherwise, the application will +continue to use the label prefix and auto-numbering suffix settings. + The replacement gene symbols can be prefixed to separate them from the original, and an ordinal suffix ('-#') will be added to a group of replacement genes. @@ -35,14 +39,12 @@ of cards). =head1 FUTURE CHANGES -Add the option to read in an extra column of gene symbols, if the -user wishes to provide their own. Otherwise, continue to use the label -prefix and auto-numbering suffix settings. - Add a comment containing the NCBI species id of the new homolog (for the purpose of multi-species pathway comparison or host-pathogen interaction diagrams). +Add a "heat-map" effect by making large paralog sets "hotter". + =head1 USAGE pathway_gene_swapper.pl -i INPUT_FILE -g GENE_FILE -c CONFIG_FILE -o OUTPUT_FILE -L -v -G -d @@ -56,6 +58,8 @@ pathway_gene_swapper.pl -i INPUT_FILE -g GENE_FILE -c CONFIG_FILE -o OUTPUT_FILE (NOTE: if no path supplied for input files, current working directory is assumed) -s use provided gene symbols instead of config file's LabelPrefix + -h apply a heat-map to any multi-mapped set of paralogs + (NOTE: precludes custom box-coloring via the config file) -L Remove literature references. -v View verbose information -G Display GPML input/output documents @@ -72,6 +76,10 @@ pathway_gene_swapper.pl -i INPUT_FILE -g GENE_FILE -c CONFIG_FILE -o OUTPUT_FILE The second column may have one and only one gene variant or homolog -- the "replacement" gene(s). + + An optional "gene symbol" column may be placed between the + first two columns, if the user would prefer to use a list of + symbols for the new homologous genes. - The config file may have any or all of the following entries, in addition to the required fields (in any order): @@ -115,6 +123,7 @@ use Getopt::Std; use Data::Dumper; # specific +use Graphics::ColorUtils qw( :gradients ); use XML::DOM; # --------------------------------------------------------------------------- @@ -127,6 +136,7 @@ my $input_gpml_file; my $input_gene_file; my $input_config_file; my $output_file; +my $apply_paralog_heat = 0; my $remove_lit = 0; # flag to remove literature and lit references my $use_symbols = 0; # flag to indicate use of provided gene symbols my $verbose = 0; # flag for verbose output @@ -219,7 +229,7 @@ screen output. sub init { # read and set options - getopts('i:g:c:o:LsvGd', \%opts); + getopts('i:g:c:o:hLsvGd', \%opts); foreach my $key (keys %opts) { my $value = $opts{$key}; @@ -253,6 +263,7 @@ sub init } } case "s" { $use_symbols = 1; } + case "h" { $apply_paralog_heat = 1; } case "L" { $remove_lit = 1; } case "v" { $verbose = 1; } case "G" { $doc_mode = 1; } @@ -274,6 +285,7 @@ sub init . "Output File: $output_file\n" . "\n" . "Use provided gene symbols? " . ($use_symbols ? "Yes" : "No") . "\n" + . "Provide paralog heat-map? " . ($apply_paralog_heat ? "Yes" : "No") . "\n" . "Remove literature references? " . ($remove_lit ? "Yes" : "No") . "\n" . "Running in verbose mode? " . ($verbose ? "Yes" : "No") . "\n" . "Running in document mode? " . ($doc_mode ? "Yes" : "No") . "\n" @@ -650,20 +662,17 @@ sub swap_genes } my $curr_new_node = $$_[0]; - my $curr_symbol = ""; - - if ($use_symbols) - { - $curr_symbol = ${$$_[1]}[0]; - } - - my $curr_homolog = ${$$_[1]}[1]; + my $curr_symbol = ${$$_[1]}{"symbol"}; + my $curr_homolog = ${$$_[1]}{"new_item"}; #print "$_: $curr_new_node, $curr_symbol, $curr_homolog\n"; # TEST #print "[Curr New Node before editing...]\n" . $curr_new_node->toString . "\n\n"; # TEST # update all new nodes w/ attributes... - + + # grab original text label + my $old_label = $curr_new_node->getAttributeNode("TextLabel")->getValue; + # rename TextLabel... if ($use_symbols && length($curr_symbol) > 0) # apply the provided gene symbol { @@ -687,7 +696,7 @@ sub swap_genes # NOTE: order is important in GPML; the tags are first my $new_comment = $gpml_doc->createElement("Comment"); $new_comment->setAttribute("Source",$configs{"Source"}); - $new_comment->addText($configs{"CommentPrefix"} . " $old_gene."); + $new_comment->addText($configs{"CommentPrefix"} . " $old_gene ($old_label)."); $curr_new_node->insertBefore($new_comment,$curr_new_node->getFirstChild); # assumes other child nodes # edit @@ -700,8 +709,17 @@ sub swap_genes $curr_graphics->setAttribute("Width",$configs{"BoxWidth"}); if ($configs{"BoxBorder"}) { $curr_graphics->setAttribute("Color",$configs{"BoxBorder"}); } - if ($configs{"BoxColor"}) { - $curr_graphics->setAttribute("FillColor",$configs{"BoxColor"}); } + + # add "heat" to genes with multiple paralogs + if ($apply_paralog_heat && ($gene_suffix_counter > 0)) + { + $curr_graphics->setAttribute("FillColor","cc0000"); + } + else + { + if ($configs{"BoxColor"}) { + $curr_graphics->setAttribute("FillColor",$configs{"BoxColor"}); } + } if ($is_first_homolog) { @@ -813,7 +831,6 @@ init; import_data; show_input; -exit swap_genes(); show_output; export_data; -- 2.34.1