From: preecej Date: Tue, 9 Aug 2011 00:10:22 +0000 (+0000) Subject: Added GPML hex id hash, nodes_by_gene_id hash, and new nodes array. Revised gene... X-Git-Url: http://gitweb.planteome.org/?a=commitdiff_plain;h=467c07e667e8b6d6423bd399275292b9e53397b4;p=old-jaiswallab-svn%2F.git Added GPML hex id hash, nodes_by_gene_id hash, and new nodes array. Revised gene swapping algorithm and successfully tested node-swapping DOM approach all the way to file. svn path=/; revision=143 --- diff --git a/preecej/perl_singletons/pathway_gene_swapper.pl b/preecej/perl_singletons/pathway_gene_swapper.pl index 8a490c0..6a9e03c 100644 --- a/preecej/perl_singletons/pathway_gene_swapper.pl +++ b/preecej/perl_singletons/pathway_gene_swapper.pl @@ -310,49 +310,134 @@ sub swap_genes my $biopax_node = ($pathway_node->getElementsByTagName("Biopax"))[0]; $pathway_node->removeChild($biopax_node); - # for (@$biopax_nodes) - # { - # # print $_->getTagName . "\n"; - # $pathway_node->removeChild($_); - # } - my $data_nodes = $pathway_node->getElementsByTagName("DataNode"); + #print $data_nodes->getLength . "\n"; + # if ($debug) { print "...\n" + # . Dumper(@$data_nodes) . "\n\n"; } + for (@$data_nodes) { my $curr_datanode = $_; my $biopaxref_nodes = $curr_datanode->getElementsByTagName("BiopaxRef"); for (@$biopaxref_nodes) { + # print $_->getTagName . "\n"; $curr_datanode->removeChild($_); } } - # my $biopaxref_nodes = $gpml_doc->getElementsByTagName("BiopaxRef"); - # print $biopaxref_nodes->getLength . "\n"; - # for (@$biopaxref_nodes) - # { - # @$pathway_nodes[0]->removeChild($_); - # } + # will hold a convenient list of data node references in the gpml doc, + # indexed by the id of the gene located in the element for each + # node + my %data_nodes_by_gene_id; + # create a hash of all 5-digit hex ids in the gpml doc (this is the black list) + # one list of DataNode.GraphId, Group.GroupId, and Group.GraphId + my %existing_hex_ids; + for (@$data_nodes) + { + #print $_ . "\n"; + if ($_->getAttributeNode("GraphId")) + { + $existing_hex_ids{$_->getAttributeNode("GraphId")->getValue} + = $_->getTagName . ".GraphId"; + } + # also build a data node hash to make lookup easier in the next section + my $curr_xref_id = ($_->getElementsByTagName("Xref"))[0] + ->getAttributeNode("ID")->getValue; + $curr_xref_id =~ s/\s+$//; # rtrim whitespace + $data_nodes_by_gene_id{$curr_xref_id} = $_; + } + + # if ($debug) { print "...\n" + # . Dumper(\%data_nodes_by_gene_id) . "\n\n"; } + + my $group_nodes = $pathway_node->getElementsByTagName("Group"); + for (@$group_nodes) + { + if ($_->getAttributeNode("GroupId")) + { + $existing_hex_ids{$_->getAttributeNode("GroupId")->getValue} + = $_->getTagName . ".GroupId"; + } + if ($_->getAttributeNode("GraphId")) + { + $existing_hex_ids{$_->getAttributeNode("GraphId")->getValue} + = $_->getTagName . ".GraphId"; + } + } + + # if ($debug) { print "...\n" + # . Dumper(\%existing_hex_ids) . "\n\n"; } + + # iterate through gene mappings from csv file + foreach my $old_gene (keys %swap_genes) + { + #print $old_gene . "\n"; + + # find curr old gene node in doc + if (exists $data_nodes_by_gene_id{$old_gene}) + { + my $curr_old_gene_node = $data_nodes_by_gene_id{$old_gene}; + # print $curr_old_gene_node . "\n"; + + # holds list of newly-created nodes, used to replace old node + my @new_nodes; + + # copy the curr old gene node + my $new_node = $curr_old_gene_node->cloneNode("deep"); + + # testing... + $new_node->setAttribute("GraphId","zzzzz"); + #print $new_node->getAttributeNode("GraphId")->getValue . "\n"; + print $new_node->toString; + + # iterate through new gene replacements + # for (@{$swap_genes{$old_gene}}) + # { + # # add to new nodes ary + push @new_nodes, $new_node; + # } + # print "@new_nodes\n"; + + # if more than one new gene maps, you'll need a group for multiple gene boxes + + # if curr old gene does not belong to a group + # create a new group node + # SUB: generate a new groupRef hex id not already in use + # add to end of group nodes + # for each new gene box + # use new or existing group id for new boxes + # add to new nodes ary + + # for new nodes ary + # SUB: generate a new DataNode hex id not already in use + # update new nodes w/ attributes: + # rename TextLabel (prefix: Eu-, suffix: -alpha or numeric, based on regex) + # add Comment back-referencing TAIR locus id (use "source" attribute) + # edit + # decrement the Z-order + # decrement CenterX and CenterY by 10px each + # change box Width if needed + # color the box () + + # replace old node w/ new node(s); do this in place + for (@new_nodes) { + # replace one + $pathway_node->replaceChild($_,$curr_old_gene_node); + # ...and add the others immediately preceding + #insertBefore(); + } + } + else + { + print "ALERT: Gene identifier $old_gene is not present in this " + . "PathVisio GPML document.\n"; + } + } - # .identify mapped genes - # .if mapped gene did not belong to a group, create new group node - # .duplicate for each mapping - # .create new 5-digit hex code "GraphId" - # .if needed, add new group node - # .create new, random 5-digit hex code - # .make sure it doesn't already exist - # .add "GroupRef" on each added node, including the orig. - # .rename TextLabel (prefix: Eu-, suffix: -alpha or numeric, based on regex) - # .add Comment back-referencing TAIR locus id (use "source" attribute) - # .edit - # .decrement the Z-order - # .decrement CenterX and CenterY by 10px each - # .change box Width if needed - # .color the box () - # - # [hold] + # [later] # .After PathVisio sanity check, before communication: # .remove back-ref TAIR comments