From 77d38e0c6495243b8df8a131a69003a88c149e2e Mon Sep 17 00:00:00 2001 From: preecej Date: Wed, 10 Aug 2011 00:17:11 +0000 Subject: [PATCH] Began modifying DataNodes for 1-to-1: added label prefix, added Comment svn path=/; revision=145 --- .../perl_singletons/pathway_gene_swapper.pl | 131 +++++++++++++----- 1 file changed, 96 insertions(+), 35 deletions(-) diff --git a/preecej/perl_singletons/pathway_gene_swapper.pl b/preecej/perl_singletons/pathway_gene_swapper.pl index 5066f39..c205efa 100644 --- a/preecej/perl_singletons/pathway_gene_swapper.pl +++ b/preecej/perl_singletons/pathway_gene_swapper.pl @@ -258,6 +258,7 @@ sub import_data . "Original gene count: " . $original_gene_count . "\n" . "Replacement paralog count: $replacement_paralog_count\n\n"; + print "[Number of paralogs per original gene]\n"; foreach my $orig_gene_key (keys %swap_genes) { print "$orig_gene_key: " . scalar(@{$swap_genes{$orig_gene_key}}) . "\n"; @@ -290,9 +291,9 @@ sub show_input print Dumper(\%swap_genes) . "\n\n"; print "\n"; - print "[Source GPML]\n"; - print $gpml_doc->toString; - print "\n"; + # print "[Source GPML]\n"; + # print $gpml_doc->toString; + # print "\n"; } # --------------------------------------------------------------------------- @@ -302,11 +303,18 @@ Substitutes gene data. # --------------------------------------------------------------------------- sub swap_genes { - print "Swapping gene data and making other modifications...\n\n"; - - # remove all and elements and children + print "Swapping gene data and making other document modifications...\n\n"; + my $pathway_node = ($gpml_doc->getElementsByTagName("Pathway"))[0]; + # change Pathway header info to config settings + $pathway_node->setAttribute("Name",$configs{"Title"}); + $pathway_node->setAttribute("Author",$configs{"Author"}); + $pathway_node->setAttribute("Maintainer",$configs{"Maintainer"}); + $pathway_node->setAttribute("Version",$configs{"Date"}); + $pathway_node->setAttribute("Organism",$configs{"Organism"}); + + # remove all and elements and children my $biopax_node = ($pathway_node->getElementsByTagName("Biopax"))[0]; $pathway_node->removeChild($biopax_node); @@ -346,12 +354,32 @@ sub swap_genes # also build a data node hash to make lookup easier in the next section my $curr_xref_id = ($_->getElementsByTagName("Xref"))[0] ->getAttributeNode("ID")->getValue; - $curr_xref_id =~ s/\s+$//; # rtrim whitespace - $data_nodes_by_gene_id{$curr_xref_id} = $_; + + $curr_xref_id =~ s/\s+$//; # rtrim whitespace + #$curr_xref_id =~ s/^("|')("|')$//; # strip quotes - may not be necessary + + if (length($curr_xref_id) > 0) + { + $data_nodes_by_gene_id{$curr_xref_id} = $_; + } + else + { + if ($verbose) { + print "WARNING: Found DataNode (TextLabel: " + . $_->getAttributeNode('TextLabel')->getValue . ") " + . "with missing Xref ID.\n" + } + } } + - # if ($debug) { print "...\n" - # . Dumper(\%data_nodes_by_gene_id) . "\n\n"; } + if ($debug) { + print "...\n"; + foreach my $tmp_node (keys %data_nodes_by_gene_id) { + print "... $tmp_node => $data_nodes_by_gene_id{$tmp_node}\n"; + } + print "\n"; + } my $group_nodes = $pathway_node->getElementsByTagName("Group"); for (@$group_nodes) @@ -385,24 +413,31 @@ sub swap_genes # holds list of newly-created nodes, used to replace old node my @new_nodes; - # copy the curr old gene node - my $new_node = $curr_old_gene_node->cloneNode("deep"); - # testing... - $new_node->setAttribute("GraphId","zzzzz"); - #print $new_node->getAttributeNode("GraphId")->getValue . "\n"; - print $new_node->toString; + # $new_node->setAttribute("GraphId","zzzzz"); + # #print $new_node->getAttributeNode("GraphId")->getValue . "\n"; + # print $new_node->toString; # iterate through new gene replacements - # for (@{$swap_genes{$old_gene}}) - # { - # # add to new nodes ary - push @new_nodes, $new_node; - # } + for (@{$swap_genes{$old_gene}}) + { + # copy the curr old gene node + my $new_node = $curr_old_gene_node->cloneNode("deep"); + + # testing... + # print "[$_]\n"; + # print $new_node->toString; + # print "\n\n"; + + # add to new nodes ary + # TODO: if this errs, perhaps catch and skip? + push @new_nodes, $new_node; + } # print "@new_nodes\n"; # if more than one new gene maps, you'll need a group for multiple gene boxes - + if (scalar(@new_nodes) > 1) + { # if curr old gene does not belong to a group # create a new group node # SUB: generate a new groupRef hex id not already in use @@ -410,18 +445,44 @@ sub swap_genes # for each new gene box # use new or existing group id for new boxes # add to new nodes ary - + } + + # flag for determining if there are one or many replacement paralogs + my $is_first_paralog = 1; + # for new nodes ary - # SUB: generate a new DataNode hex id not already in use + for (@new_nodes) + { + if ($is_first_paralog) + { + # print "first paralog...\n"; + $is_first_paralog = 0; # first paralog complete + } + else # add'l paralogs required + { + # print "add'l paralog...\n"; + # SUB: generate a new DataNode hex id not already in use + } # update new nodes w/ attributes: - # rename TextLabel (prefix: Eu-, suffix: -alpha or numeric, based on regex) - # add Comment back-referencing TAIR locus id (use "source" attribute) - # edit - # decrement the Z-order - # decrement CenterX and CenterY by 10px each - # change box Width if needed - # color the box () - + # rename TextLabel (prefix (config), suffix: -alpha or numeric, based on regex) + $_->setAttribute("TextLabel", + $configs{"LabelPrefix"} + . $_->getAttributeNode("TextLabel")->getValue); + + # add Comment back-referencing TAIR locus id (use "source" attribute) + # NOTE: order is important in GPML; the tags are first + my $new_comment = $gpml_doc->createElement("Comment"); + $new_comment->setAttribute("Source",$configs{"Source"}); + $new_comment->addText($configs{"CommentPrefix"} . $old_gene . "."); + $_->insertBefore($new_comment,$_->getFirstChild); # assumes other child nodes + + # edit + # decrement the Z-order + # decrement CenterX and CenterY by 10px each + # change box Width if needed + # color the box () + } + # replace old node w/ new node(s); do this in place for (@new_nodes) { # replace one @@ -460,9 +521,9 @@ Displays the transformed data. Verbose only. # --------------------------------------------------------------------------- sub show_output { - print "[Modified GPML Output]\n"; - print $gpml_doc->toString; - print "\n"; + # print "[Modified GPML Output]\n"; + # print $gpml_doc->toString; + # print "\n"; } # --------------------------------------------------------------------------- -- 2.34.1