. "Original gene count: " . $original_gene_count . "\n"
. "Replacement paralog count: $replacement_paralog_count\n\n";
+ print "[Number of paralogs per original gene]\n";
foreach my $orig_gene_key (keys %swap_genes)
{
print "$orig_gene_key: " . scalar(@{$swap_genes{$orig_gene_key}}) . "\n";
print Dumper(\%swap_genes) . "\n\n";
print "\n";
- print "[Source GPML]\n";
- print $gpml_doc->toString;
- print "\n";
+ # print "[Source GPML]\n";
+ # print $gpml_doc->toString;
+ # print "\n";
}
# ---------------------------------------------------------------------------
# ---------------------------------------------------------------------------
sub swap_genes
{
- print "Swapping gene data and making other modifications...\n\n";
-
- # remove all <BiopaxRef> and <bp:PublicationXref> elements and children
+ print "Swapping gene data and making other document modifications...\n\n";
+
my $pathway_node = ($gpml_doc->getElementsByTagName("Pathway"))[0];
+ # change Pathway header info to config settings
+ $pathway_node->setAttribute("Name",$configs{"Title"});
+ $pathway_node->setAttribute("Author",$configs{"Author"});
+ $pathway_node->setAttribute("Maintainer",$configs{"Maintainer"});
+ $pathway_node->setAttribute("Version",$configs{"Date"});
+ $pathway_node->setAttribute("Organism",$configs{"Organism"});
+
+ # remove all <BiopaxRef> and <bp:PublicationXref> elements and children
my $biopax_node = ($pathway_node->getElementsByTagName("Biopax"))[0];
$pathway_node->removeChild($biopax_node);
# also build a data node hash to make lookup easier in the next section
my $curr_xref_id = ($_->getElementsByTagName("Xref"))[0]
->getAttributeNode("ID")->getValue;
- $curr_xref_id =~ s/\s+$//; # rtrim whitespace
- $data_nodes_by_gene_id{$curr_xref_id} = $_;
+
+ $curr_xref_id =~ s/\s+$//; # rtrim whitespace
+ #$curr_xref_id =~ s/^("|')("|')$//; # strip quotes - may not be necessary
+
+ if (length($curr_xref_id) > 0)
+ {
+ $data_nodes_by_gene_id{$curr_xref_id} = $_;
+ }
+ else
+ {
+ if ($verbose) {
+ print "WARNING: Found DataNode (TextLabel: "
+ . $_->getAttributeNode('TextLabel')->getValue . ") "
+ . "with missing Xref ID.\n"
+ }
+ }
}
+
- # if ($debug) { print "...<DEBUG: \%data_nodes_by_gene_id>\n"
- # . Dumper(\%data_nodes_by_gene_id) . "\n\n"; }
+ if ($debug) {
+ print "...<DEBUG: \%data_nodes_by_gene_id>\n";
+ foreach my $tmp_node (keys %data_nodes_by_gene_id) {
+ print "... $tmp_node => $data_nodes_by_gene_id{$tmp_node}\n";
+ }
+ print "\n";
+ }
my $group_nodes = $pathway_node->getElementsByTagName("Group");
for (@$group_nodes)
# holds list of newly-created nodes, used to replace old node
my @new_nodes;
- # copy the curr old gene node
- my $new_node = $curr_old_gene_node->cloneNode("deep");
-
# testing...
- $new_node->setAttribute("GraphId","zzzzz");
- #print $new_node->getAttributeNode("GraphId")->getValue . "\n";
- print $new_node->toString;
+ # $new_node->setAttribute("GraphId","zzzzz");
+ # #print $new_node->getAttributeNode("GraphId")->getValue . "\n";
+ # print $new_node->toString;
# iterate through new gene replacements
- # for (@{$swap_genes{$old_gene}})
- # {
- # # add to new nodes ary
- push @new_nodes, $new_node;
- # }
+ for (@{$swap_genes{$old_gene}})
+ {
+ # copy the curr old gene node
+ my $new_node = $curr_old_gene_node->cloneNode("deep");
+
+ # testing...
+ # print "[$_]\n";
+ # print $new_node->toString;
+ # print "\n\n";
+
+ # add to new nodes ary
+ # TODO: if this errs, perhaps catch and skip?
+ push @new_nodes, $new_node;
+ }
# print "@new_nodes\n";
# if more than one new gene maps, you'll need a group for multiple gene boxes
-
+ if (scalar(@new_nodes) > 1)
+ {
# if curr old gene does not belong to a group
# create a new group node
# SUB: generate a new groupRef hex id not already in use
# for each new gene box
# use new or existing group id for new boxes
# add to new nodes ary
-
+ }
+
+ # flag for determining if there are one or many replacement paralogs
+ my $is_first_paralog = 1;
+
# for new nodes ary
- # SUB: generate a new DataNode hex id not already in use
+ for (@new_nodes)
+ {
+ if ($is_first_paralog)
+ {
+ # print "first paralog...\n";
+ $is_first_paralog = 0; # first paralog complete
+ }
+ else # add'l paralogs required
+ {
+ # print "add'l paralog...\n";
+ # SUB: generate a new DataNode hex id not already in use
+ }
# update new nodes w/ attributes:
- # rename TextLabel (prefix: Eu-, suffix: -alpha or numeric, based on regex)
- # add Comment back-referencing TAIR locus id (use "source" attribute)
- # edit <Xref Database="JGI" ID="Egrandis..." />
- # decrement the Z-order
- # decrement CenterX and CenterY by 10px each
- # change box Width if needed
- # color the box (<Graphics...Color="4488ff" />)
-
+ # rename TextLabel (prefix (config), suffix: -alpha or numeric, based on regex)
+ $_->setAttribute("TextLabel",
+ $configs{"LabelPrefix"}
+ . $_->getAttributeNode("TextLabel")->getValue);
+
+ # add Comment back-referencing TAIR locus id (use "source" attribute)
+ # NOTE: order is important in GPML; the <Comment> tags are first
+ my $new_comment = $gpml_doc->createElement("Comment");
+ $new_comment->setAttribute("Source",$configs{"Source"});
+ $new_comment->addText($configs{"CommentPrefix"} . $old_gene . ".");
+ $_->insertBefore($new_comment,$_->getFirstChild); # assumes other child nodes
+
+ # edit <Xref Database="JGI" ID="Egrandis..." />
+ # decrement the Z-order
+ # decrement CenterX and CenterY by 10px each
+ # change box Width if needed
+ # color the box (<Graphics...Color="4488ff" />)
+ }
+
# replace old node w/ new node(s); do this in place
for (@new_nodes) {
# replace one
# ---------------------------------------------------------------------------
sub show_output
{
- print "[Modified GPML Output]\n";
- print $gpml_doc->toString;
- print "\n";
+ # print "[Modified GPML Output]\n";
+ # print $gpml_doc->toString;
+ # print "\n";
}
# ---------------------------------------------------------------------------