=head1 DESCRIPTION
-Swap out one set of genes for another in an existing PathVisio GPML
-file. Optionally removes literature references.
+Swap out one set of genes (or gene representation) for another in an
+existing PathVisio GPML file. Optionally removes literature references.
If multiple replacement genes map to a single original gene,
multiple PathVisio boxes will be drawn in place of the
The first column must have one and only one gene -- the
"original" gene.
- The second column may have one and only one gene variants or
- paralog -- the "replacement" gene(s).
+ The second column may have one and only one gene variant or
+ homolog -- the "replacement" gene(s).
- The config file may have any or all of the following entries,
in addition to the required fields (in any order):
# used to generate total counts of each species' gene list; sanity check
my $original_gene_count = 0;
- my $replacement_paralog_count = 0;
+ my $replacement_homolog_count = 0;
my $line = <GENE_FILE>;
{
$original_gene_count++;
}
- $replacement_paralog_count++; # count this every time
+ $replacement_homolog_count++; # count this every time
# add new gene to hash value (array) for old gene hash key
push @{$swap_genes{$data_field}},$data_val;
}
- if ($verbose) # give add'l stats on gene and paralog counts
+ if ($verbose) # give add'l stats on gene and homolog counts
{
- print "[Total number of original genes and paralogs]\n"
+ print "[Total number of original genes and homologs]\n"
. "Original gene count: " . $original_gene_count . "\n"
- . "Replacement paralog count: $replacement_paralog_count\n\n";
+ . "Replacement homolog count: $replacement_homolog_count\n\n";
- print "[Number of paralogs per original gene]\n";
+ print "[Number of homologs per original gene]\n";
foreach my $orig_gene_key (keys %swap_genes)
{
print "$orig_gene_key: " . scalar(@{$swap_genes{$orig_gene_key}}) . "\n";
sub create_unique_hex_id($$)
{
# NOTE: This algorithm breaks down at VERY large scale (100K genes+). The
- # larger the number of original genes, groups, and new paralogs you need to
+ # larger the number of original genes, groups, and new homologs you need to
# create, the more inefficient it becomes to make sure your "random" 5-digit
# hex number is not already present in your "existing ids" list via
# recursion. However, for a few hundred or thousand genes, it should be ok.
}
# print "@new_nodes_map\n"; # TEST
- # if more than one new paralog exists, and the old gene doesn't
+ # if more than one new homolog exists, and the old gene doesn't
# already belong to a group, you'll need a new Group for multiple
# gene boxes
my $new_GroupId;
}
}
- # flag for determining if there are one or many replacement paralogs
- my $is_first_paralog = 1;
+ # flag for determining if there are one or many replacement homologs
+ my $is_first_homolog = 1;
# makes sure each box is increasingly offset from the original
# (in all three dimensions)
}
my $curr_new_node = $$_[0];
- my $curr_paralog = $$_[1];
- # print "$_: $curr_new_node, $curr_paralog\n"; # TEST
+ my $curr_homolog = $$_[1];
+ # print "$_: $curr_new_node, $curr_homolog\n"; # TEST
# print "[Curr New Node before editing...]\n" . $curr_new_node->toString . "\n\n"; # TEST
# update all new nodes w/ attributes...
# rename TextLabel...
- # prefix (from config), suffix: new '-#' for multiple paralogs
+ # prefix (from config), suffix: new '-#' for multiple homologs
$curr_new_node->setAttribute("TextLabel",
(($configs{"LabelPrefix"}) ? $configs{"LabelPrefix"} : "")
. $curr_new_node->getAttributeNode("TextLabel")->getValue
# NOTE: order is important in GPML; the <Comment> tags are first
my $new_comment = $gpml_doc->createElement("Comment");
$new_comment->setAttribute("Source",$configs{"Source"});
- $new_comment->addText($configs{"CommentPrefix"} . $old_gene . ".");
+ $new_comment->addText($configs{"CommentPrefix"} . " $old_gene.");
$curr_new_node->insertBefore($new_comment,$curr_new_node->getFirstChild); # assumes other child nodes
# edit <Xref Database="JGI" ID="Egrandis..." />
my $curr_xref = ($curr_new_node->getElementsByTagName("Xref"))[0];
$curr_xref->setAttribute("Database",$configs{"Database"});
- $curr_xref->setAttribute("ID",$curr_paralog);
+ $curr_xref->setAttribute("ID",$curr_homolog);
# change box width and colors (<Graphics...Color="4488ff" ... />)
my $curr_graphics = ($curr_new_node->getElementsByTagName("Graphics"))[0];
if ($configs{"BoxColor"}) {
$curr_graphics->setAttribute("FillColor",$configs{"BoxColor"}); }
- if ($is_first_paralog)
+ if ($is_first_homolog)
{
- # print "that was the first paralog...\n"; # TEST
- $is_first_paralog = 0; # first paralog complete
+ # print "that was the first homolog...\n"; # TEST
+ $is_first_homolog = 0; # first homolog complete
}
- else # add'l paralogs required
+ else # add'l homologs required
{
$offset_multiplier++;
- # print "that was an add'l paralog, change more attrs...\n"; # TEST
+ # print "that was an add'l homolog, change more attrs...\n"; # TEST
# update add'l nodes w/ special attributes...
# generate a new DataNode GraphId not already in use