From 49695d43bfc0a278090df0e4c164933958982fa0 Mon Sep 17 00:00:00 2001 From: preecej Date: Fri, 12 Aug 2011 17:44:05 +0000 Subject: [PATCH] Changed refs from "paralog" to "homolog"; edited perldoc svn path=/; revision=151 --- .../perl_singletons/pathway_gene_swapper.pl | 48 +++++++++---------- 1 file changed, 24 insertions(+), 24 deletions(-) diff --git a/preecej/perl_singletons/pathway_gene_swapper.pl b/preecej/perl_singletons/pathway_gene_swapper.pl index 1e05a49..c01ec1b 100644 --- a/preecej/perl_singletons/pathway_gene_swapper.pl +++ b/preecej/perl_singletons/pathway_gene_swapper.pl @@ -10,8 +10,8 @@ Pathway Gene Swapper =head1 DESCRIPTION -Swap out one set of genes for another in an existing PathVisio GPML -file. Optionally removes literature references. +Swap out one set of genes (or gene representation) for another in an +existing PathVisio GPML file. Optionally removes literature references. If multiple replacement genes map to a single original gene, multiple PathVisio boxes will be drawn in place of the @@ -56,8 +56,8 @@ pathway_gene_swapper.pl -i INPUT_FILE -g GENE_FILE -c CONFIG_FILE -o OUTPUT_FILE The first column must have one and only one gene -- the "original" gene. - The second column may have one and only one gene variants or - paralog -- the "replacement" gene(s). + The second column may have one and only one gene variant or + homolog -- the "replacement" gene(s). - The config file may have any or all of the following entries, in addition to the required fields (in any order): @@ -282,7 +282,7 @@ sub import_data # used to generate total counts of each species' gene list; sanity check my $original_gene_count = 0; - my $replacement_paralog_count = 0; + my $replacement_homolog_count = 0; my $line = ; @@ -299,19 +299,19 @@ sub import_data { $original_gene_count++; } - $replacement_paralog_count++; # count this every time + $replacement_homolog_count++; # count this every time # add new gene to hash value (array) for old gene hash key push @{$swap_genes{$data_field}},$data_val; } - if ($verbose) # give add'l stats on gene and paralog counts + if ($verbose) # give add'l stats on gene and homolog counts { - print "[Total number of original genes and paralogs]\n" + print "[Total number of original genes and homologs]\n" . "Original gene count: " . $original_gene_count . "\n" - . "Replacement paralog count: $replacement_paralog_count\n\n"; + . "Replacement homolog count: $replacement_homolog_count\n\n"; - print "[Number of paralogs per original gene]\n"; + print "[Number of homologs per original gene]\n"; foreach my $orig_gene_key (keys %swap_genes) { print "$orig_gene_key: " . scalar(@{$swap_genes{$orig_gene_key}}) . "\n"; @@ -367,7 +367,7 @@ Returns a string containing the new hex id. sub create_unique_hex_id($$) { # NOTE: This algorithm breaks down at VERY large scale (100K genes+). The - # larger the number of original genes, groups, and new paralogs you need to + # larger the number of original genes, groups, and new homologs you need to # create, the more inefficient it becomes to make sure your "random" 5-digit # hex number is not already present in your "existing ids" list via # recursion. However, for a few hundred or thousand genes, it should be ok. @@ -532,7 +532,7 @@ sub swap_genes } # print "@new_nodes_map\n"; # TEST - # if more than one new paralog exists, and the old gene doesn't + # if more than one new homolog exists, and the old gene doesn't # already belong to a group, you'll need a new Group for multiple # gene boxes my $new_GroupId; @@ -566,8 +566,8 @@ sub swap_genes } } - # flag for determining if there are one or many replacement paralogs - my $is_first_paralog = 1; + # flag for determining if there are one or many replacement homologs + my $is_first_homolog = 1; # makes sure each box is increasingly offset from the original # (in all three dimensions) @@ -583,14 +583,14 @@ sub swap_genes } my $curr_new_node = $$_[0]; - my $curr_paralog = $$_[1]; - # print "$_: $curr_new_node, $curr_paralog\n"; # TEST + my $curr_homolog = $$_[1]; + # print "$_: $curr_new_node, $curr_homolog\n"; # TEST # print "[Curr New Node before editing...]\n" . $curr_new_node->toString . "\n\n"; # TEST # update all new nodes w/ attributes... # rename TextLabel... - # prefix (from config), suffix: new '-#' for multiple paralogs + # prefix (from config), suffix: new '-#' for multiple homologs $curr_new_node->setAttribute("TextLabel", (($configs{"LabelPrefix"}) ? $configs{"LabelPrefix"} : "") . $curr_new_node->getAttributeNode("TextLabel")->getValue @@ -606,13 +606,13 @@ sub swap_genes # NOTE: order is important in GPML; the tags are first my $new_comment = $gpml_doc->createElement("Comment"); $new_comment->setAttribute("Source",$configs{"Source"}); - $new_comment->addText($configs{"CommentPrefix"} . $old_gene . "."); + $new_comment->addText($configs{"CommentPrefix"} . " $old_gene."); $curr_new_node->insertBefore($new_comment,$curr_new_node->getFirstChild); # assumes other child nodes # edit my $curr_xref = ($curr_new_node->getElementsByTagName("Xref"))[0]; $curr_xref->setAttribute("Database",$configs{"Database"}); - $curr_xref->setAttribute("ID",$curr_paralog); + $curr_xref->setAttribute("ID",$curr_homolog); # change box width and colors () my $curr_graphics = ($curr_new_node->getElementsByTagName("Graphics"))[0]; @@ -622,16 +622,16 @@ sub swap_genes if ($configs{"BoxColor"}) { $curr_graphics->setAttribute("FillColor",$configs{"BoxColor"}); } - if ($is_first_paralog) + if ($is_first_homolog) { - # print "that was the first paralog...\n"; # TEST - $is_first_paralog = 0; # first paralog complete + # print "that was the first homolog...\n"; # TEST + $is_first_homolog = 0; # first homolog complete } - else # add'l paralogs required + else # add'l homologs required { $offset_multiplier++; - # print "that was an add'l paralog, change more attrs...\n"; # TEST + # print "that was an add'l homolog, change more attrs...\n"; # TEST # update add'l nodes w/ special attributes... # generate a new DataNode GraphId not already in use -- 2.34.1