# print "\n";
}
+# ---------------------------------------------------------------------------
+=item B<string create_unique_hex_id()>
+Generates a "random" 5-digit hexadecimal id, checks to see if it
+already exists in the hex list, and if not, adds it to the list
+of hex ids already present in the GPML doc. Otherwise, generates
+another "random" id and repeats the process until a new unique id
+is identified.
+Returns a string containing the new hex id.
+=cut
+# ---------------------------------------------------------------------------
+sub create_unique_hex_id($$)
+{
+ # NOTE: This algorithm breaks down at VERY large scale (100K genes+). The
+ # larger the number of original genes, groups, and new paralogs you need to
+ # create, the more inefficient it becomes to make sure your "random" 5-digit
+ # hex number is not already present in your "existing ids" list via
+ # recursion. However, for a few hundred or thousand genes, it should be ok.
+
+ my $first_digit; # limited to a..f
+ my $last_four_digits; # 0..f
+
+ $first_digit = (('a'..'f')[rand(6)]);
+ $last_four_digits .= ((0..9,'a'..'f')[rand(16)]) for 1..4;
+
+ my $candidate_id = $first_digit . $last_four_digits;
+
+ if (exists ${$_[0]}{$candidate_id})
+ {
+ # print "not unique...\n"; # TEST
+ $candidate_id = create_unique_hex_id($_[0]);
+ }
+ else
+ {
+ # print "unique!\n"; # TEST
+ ${$_[0]}{$candidate_id} = $_[1];
+ }
+
+ return $candidate_id;
+}
+
# ---------------------------------------------------------------------------
=item B<void swap_genes()>
Substitutes gene data.
my $data_nodes = $pathway_node->getElementsByTagName("DataNode");
- # TEST: print $data_nodes->getLength . "\n";
+ # print $data_nodes->getLength . "\n"; # TEST
for (@$data_nodes)
{
my $biopaxref_nodes = $curr_datanode->getElementsByTagName("BiopaxRef");
for (@$biopaxref_nodes)
{
- # TEST: print $_->getTagName . "\n";
+ # print $_->getTagName . "\n"; # TEST
$curr_datanode->removeChild($_);
}
}
my %existing_hex_ids;
for (@$data_nodes)
{
- # TEST: print $_ . "\n";
+ # print $_ . "\n"; # TEST
if ($_->getAttributeNode("GraphId"))
{
$existing_hex_ids{$_->getAttributeNode("GraphId")->getValue}
# iterate through gene mappings from csv file
foreach my $old_gene (keys %swap_genes)
{
- # TEST: print $old_gene . "\n";
+ # print $old_gene . "\n"; # TEST
# find curr old gene node in doc
if (exists $data_nodes_by_gene_id{$old_gene})
{
my $curr_old_gene_node = $data_nodes_by_gene_id{$old_gene};
- # TEST: print $curr_old_gene_node . "\n";
+ # print $curr_old_gene_node . "\n"; # TEST
# holds list of newly-created nodes, used to replace old node
my @new_nodes_map;
# copy the curr old gene node
my $new_node = $curr_old_gene_node->cloneNode("deep");
- # TEST: print "[$_]\n$new_node->toString\n\n";
+ # print "[$_]\n$new_node->toString\n\n"; # TEST
# add to new nodes ary
- # TODO: if this errs, perhaps catch and skip?
push @new_nodes_map, [$new_node, $_];
}
- # TEST: print "@new_nodes_map\n";
+ # print "@new_nodes_map\n"; # TEST
# if more than one new gene maps, you'll need a group for multiple gene boxes
if (scalar(@new_nodes_map) > 1)
# flag for determining if there are one or many replacement paralogs
my $is_first_paralog = 1;
+ # makes sure each box is increasingly offset from the original
+ # (in all three dimensions)
+ my $offset_multiplier = 0;
+ my $gene_suffix_counter = 0; # used to affix numbers to multiple new gene symbols
+
# for new nodes ary
for (@new_nodes_map)
{
- # TODO: check for new group ref to use
- # TODO: integrate attr settings into the if..else below
-
+ if (scalar(@new_nodes_map) > 1)
+ {
+ $gene_suffix_counter++;
+ # TODO: check for new group ref to use
+ }
+
my $curr_new_node = $$_[0];
my $curr_paralog = $$_[1];
- # TEST: print "$_: $curr_new_node, $curr_paralog\n";
+ # print "$_: $curr_new_node, $curr_paralog\n"; # TEST
+ # print "[Curr New Node before editing...]\n" . $curr_new_node->toString . "\n\n"; # TEST
- if ($is_first_paralog)
- {
- # print "first paralog...\n";
- $is_first_paralog = 0; # first paralog complete
- }
- else # add'l paralogs required
- {
- # print "add'l paralog...\n";
- # SUB: generate a new DataNode hex id not already in use
- # update add'l nodes w/ special attributes:
- # decrement the Z-order
- # decrement CenterX and CenterY by 10px each
- }
# update all new nodes w/ attributes...
- # rename TextLabel (prefix (config), TODO: suffix: -alpha or numeric, based on regex)
+ # rename TextLabel...
+ # prefix (from config), suffix: new '-#' for multiple paralogs
$curr_new_node->setAttribute("TextLabel",
$configs{"LabelPrefix"}
- . $curr_new_node->getAttributeNode("TextLabel")->getValue);
+ . $curr_new_node->getAttributeNode("TextLabel")->getValue
+ . (($gene_suffix_counter > 0) ? "-$gene_suffix_counter" : ""));
# add Comment back-referencing TAIR locus id (use "source" attribute)
# NOTE: order is important in GPML; the <Comment> tags are first
$curr_graphics->setAttribute("Width",$configs{"BoxWidth"});
$curr_graphics->setAttribute("Color",$configs{"BoxBorder"});
$curr_graphics->setAttribute("FillColor",$configs{"BoxColor"});
+
+ if ($is_first_paralog)
+ {
+ # print "that was the first paralog...\n"; # TEST
+ $is_first_paralog = 0; # first paralog complete
+ }
+ else # add'l paralogs required
+ {
+ $offset_multiplier++;
+
+ # print "that was an add'l paralog, change more attrs...\n"; # TEST
+ # update add'l nodes w/ special attributes...
+
+ # generate a new DataNode GraphId not already in use
+ my $new_GraphId =
+ create_unique_hex_id(\%existing_hex_ids,"DataNode.GraphId");
+ # print $new_GraphId . "\n"; # TEST
+ $curr_new_node->setAttribute("GraphId",$new_GraphId);
+
+ # decrement the Z-order
+ $curr_graphics->setAttribute("ZOrder",
+ $curr_graphics->getAttributeNode("ZOrder")->getValue
+ - $offset_multiplier);
+ # stagger the extra boxes by decrementing the coords
+ $curr_graphics->setAttribute("CenterX",
+ $curr_graphics->getAttributeNode("CenterX")->getValue
+ - ($configs{"X-Offset"} * $offset_multiplier));
+ $curr_graphics->setAttribute("CenterY",
+ $curr_graphics->getAttributeNode("CenterY")->getValue
+ - ($configs{"Y-Offset"} * $offset_multiplier));
+ }
}
- # TODO: roll these node statments into the loop above
- # replace old node w/ new node(s); do this in place
+ # replace old node w/ new node(s)
for (@new_nodes_map) {
- # replace one...
- $pathway_node->replaceChild($$_[0],$curr_old_gene_node);
- # ...and add the others immediately preceding
- #insertBefore();
+ # add all the new nodes...
+ $pathway_node->insertBefore($$_[0],$curr_old_gene_node);
}
+ # ...and remove the original node
+ $pathway_node->removeChild($curr_old_gene_node);
}
else
{