my $biopax_node = ($pathway_node->getElementsByTagName("Biopax"))[0];
$pathway_node->removeChild($biopax_node);
- # for (@$biopax_nodes)
- # {
- # # print $_->getTagName . "\n";
- # $pathway_node->removeChild($_);
- # }
-
my $data_nodes = $pathway_node->getElementsByTagName("DataNode");
+
#print $data_nodes->getLength . "\n";
+ # if ($debug) { print "...<DEBUG: \@$data_nodes>\n"
+ # . Dumper(@$data_nodes) . "\n\n"; }
+
for (@$data_nodes)
{
my $curr_datanode = $_;
my $biopaxref_nodes = $curr_datanode->getElementsByTagName("BiopaxRef");
for (@$biopaxref_nodes)
{
+ # print $_->getTagName . "\n";
$curr_datanode->removeChild($_);
}
}
- # my $biopaxref_nodes = $gpml_doc->getElementsByTagName("BiopaxRef");
- # print $biopaxref_nodes->getLength . "\n";
- # for (@$biopaxref_nodes)
- # {
- # @$pathway_nodes[0]->removeChild($_);
- # }
+ # will hold a convenient list of data node references in the gpml doc,
+ # indexed by the id of the gene located in the <Xref> element for each
+ # node
+ my %data_nodes_by_gene_id;
+ # create a hash of all 5-digit hex ids in the gpml doc (this is the black list)
+ # one list of DataNode.GraphId, Group.GroupId, and Group.GraphId
+ my %existing_hex_ids;
+ for (@$data_nodes)
+ {
+ #print $_ . "\n";
+ if ($_->getAttributeNode("GraphId"))
+ {
+ $existing_hex_ids{$_->getAttributeNode("GraphId")->getValue}
+ = $_->getTagName . ".GraphId";
+ }
+ # also build a data node hash to make lookup easier in the next section
+ my $curr_xref_id = ($_->getElementsByTagName("Xref"))[0]
+ ->getAttributeNode("ID")->getValue;
+ $curr_xref_id =~ s/\s+$//; # rtrim whitespace
+ $data_nodes_by_gene_id{$curr_xref_id} = $_;
+ }
+
+ # if ($debug) { print "...<DEBUG: \%data_nodes_by_gene_id>\n"
+ # . Dumper(\%data_nodes_by_gene_id) . "\n\n"; }
+
+ my $group_nodes = $pathway_node->getElementsByTagName("Group");
+ for (@$group_nodes)
+ {
+ if ($_->getAttributeNode("GroupId"))
+ {
+ $existing_hex_ids{$_->getAttributeNode("GroupId")->getValue}
+ = $_->getTagName . ".GroupId";
+ }
+ if ($_->getAttributeNode("GraphId"))
+ {
+ $existing_hex_ids{$_->getAttributeNode("GraphId")->getValue}
+ = $_->getTagName . ".GraphId";
+ }
+ }
+
+ # if ($debug) { print "...<DEBUG: \%existing_hex_ids>\n"
+ # . Dumper(\%existing_hex_ids) . "\n\n"; }
+
+ # iterate through gene mappings from csv file
+ foreach my $old_gene (keys %swap_genes)
+ {
+ #print $old_gene . "\n";
+
+ # find curr old gene node in doc
+ if (exists $data_nodes_by_gene_id{$old_gene})
+ {
+ my $curr_old_gene_node = $data_nodes_by_gene_id{$old_gene};
+ # print $curr_old_gene_node . "\n";
+
+ # holds list of newly-created nodes, used to replace old node
+ my @new_nodes;
+
+ # copy the curr old gene node
+ my $new_node = $curr_old_gene_node->cloneNode("deep");
+
+ # testing...
+ $new_node->setAttribute("GraphId","zzzzz");
+ #print $new_node->getAttributeNode("GraphId")->getValue . "\n";
+ print $new_node->toString;
+
+ # iterate through new gene replacements
+ # for (@{$swap_genes{$old_gene}})
+ # {
+ # # add to new nodes ary
+ push @new_nodes, $new_node;
+ # }
+ # print "@new_nodes\n";
+
+ # if more than one new gene maps, you'll need a group for multiple gene boxes
+
+ # if curr old gene does not belong to a group
+ # create a new group node
+ # SUB: generate a new groupRef hex id not already in use
+ # add to end of group nodes
+ # for each new gene box
+ # use new or existing group id for new boxes
+ # add to new nodes ary
+
+ # for new nodes ary
+ # SUB: generate a new DataNode hex id not already in use
+ # update new nodes w/ attributes:
+ # rename TextLabel (prefix: Eu-, suffix: -alpha or numeric, based on regex)
+ # add Comment back-referencing TAIR locus id (use "source" attribute)
+ # edit <Xref Database="JGI" ID="Egrandis..." />
+ # decrement the Z-order
+ # decrement CenterX and CenterY by 10px each
+ # change box Width if needed
+ # color the box (<Graphics...Color="4488ff" />)
+
+ # replace old node w/ new node(s); do this in place
+ for (@new_nodes) {
+ # replace one
+ $pathway_node->replaceChild($_,$curr_old_gene_node);
+ # ...and add the others immediately preceding
+ #insertBefore();
+ }
+ }
+ else
+ {
+ print "ALERT: Gene identifier $old_gene is not present in this "
+ . "PathVisio GPML document.\n";
+ }
+ }
- # .identify mapped genes
- # .if mapped gene did not belong to a group, create new group node
- # .duplicate <DataNode> for each mapping
- # .create new 5-digit hex code "GraphId"
- # .if needed, add new group node
- # .create new, random 5-digit hex code
- # .make sure it doesn't already exist
- # .add "GroupRef" on each added node, including the orig.
- # .rename TextLabel (prefix: Eu-, suffix: -alpha or numeric, based on regex)
- # .add Comment back-referencing TAIR locus id (use "source" attribute)
- # .edit <Xref Database="JGI" ID="Egrandis..." />
- # .decrement the Z-order
- # .decrement CenterX and CenterY by 10px each
- # .change box Width if needed
- # .color the box (<Graphics...Color="4488ff" />)
- #
- # [hold]
+ # [later]
# .After PathVisio sanity check, before communication:
# .remove back-ref TAIR comments