# ---------------------------------------------------------------------------
sub config($)
{
- if ($debug) { print "...Config file path: $_[0]\n"; }
-
+
+ print "Opening configuration file and reading data...\n\n";
my %local_config_hash;
open(CONFIG_FILE, $_[0]) or die("Could not open $_[0]");
$local_config_hash{$data_field} = $data_val;
}
+ close(CONFIG_FILE);
+
return %local_config_hash;
}
. "\n";
%configs = config($input_config_file);
-
- if ($debug) { print "...<DEBUG: \%configs>\n"
- . Dumper(\%configs) . "\n\n"; }
}
# ---------------------------------------------------------------------------
=item B<void import_data()>
-Reads, parses, and stores source GPML.
+Reads, parses, and stores gene mapping file and source GPML.
=cut
# ---------------------------------------------------------------------------
sub import_data
{
- print "Opening input file and reading header info...\n\n";
- #open the file
- #read a line
- #skip header line
- #split line on comma
- #get original gene and swapped gene
- #add genes to %swap_genes
- open(CONFIG_FILE, $input_gene_file) or die("Could not open $input_gene_file");
+ print "Opening gene mapping file and reading data...\n\n";
+
+ open(GENE_FILE, $input_gene_file) or die("Could not open $input_gene_file");
- my $line = <CONFIG_FILE>;
- while (<CONFIG_FILE>)
+ my $line = <GENE_FILE>;
+ while (<GENE_FILE>)
{
$line = $_;
chomp $line;
$swap_genes{$data_field} = $data_val;
}
- #test:display %swap_genes
- if ($debug) { print "...<DEBUG: \%swap_genes>\n"
- . Dumper(\%swap_genes) . "\n\n"; }
-
-
-
+
+ close(GENE_FILE);
+
+ print "Opening GPML pathway file and reading data...\n\n";
+
+ my $parser = new XML::DOM::Parser;
+ $gpml_doc = $parser->parsefile($input_gpml_file);
}
# ---------------------------------------------------------------------------
sub show_input
{
+ print "[Configuration Settings]\n";
+ print Dumper(\%configs) . "\n\n";
+ print "\n";
+
+ print "[Gene Mappings]\n";
+ print Dumper(\%swap_genes) . "\n\n";
+ print "\n";
+
print "[Source GPML]\n";
+ print $gpml_doc->toString;
print "\n";
}
# ---------------------------------------------------------------------------
sub swap_genes
{
- print "Swapping gene data...\n\n";
+ print "Swapping gene data and making other modifications...\n\n";
# -------------------------------------------------------------------------
# [PathVisio Perl Pseudo-Script]
# -------------------------------------------------------------------------
# .remove all <BiopaxRef> and <bp:PublicationXref> elements and children
+ # my $pub_nodes = $gpml_doc->getElementsByTagName("bp:PublicationXref");
+ # for my $pub_node (\$pub_nodes)
+ # {
+ # $pub_node->removeChild();
+ # }
+
# .identify mapped genes
# .if mapped gene did not belong to a group, create new group node
# .duplicate <DataNode> for each mapping
# .create new 5-digit hex code "GraphId"
- # .if needed, add new group node (create new 5-digit hex code)
+ # .if needed, add new group node
+ # .create new, random 5-digit hex code
+ # .make sure it doesn't already exist
# .add "GroupRef" on each added node, including the orig.
# .rename TextLabel (prefix: Eu-, suffix: -#?)
# .add Comment back-referencing TAIR locus id (use "source" attribute)
# .decrement CenterX and CenterY by 10px each
# .change box Width if needed
# .color the box (<Graphics...Color="4488ff" />)
- # .After PathVisio sanity check:
+ # .After PathVisio sanity check, before communication:
# .remove back-ref TAIR comments
- # -------------------------------------------------------------------------
- # [XML::DOM]
- # -------------------------------------------------------------------------
- # my $parser = new XML::DOM::Parser;
- # my $doc = $parser->parsefile ("file.xml");
- # # print all HREF attributes of all CODEBASE elements
- # my $nodes = $doc->getElementsByTagName ("CODEBASE");
+ # [usage example]
+ # my $nodes = $gpml_doc->getElementsByTagName("DataNode");
# my $n = $nodes->getLength;
# for (my $i = 0; $i < $n; $i++)
# {
- # my $node = $nodes->item ($i);
- # my $href = $node->getAttributeNode ("HREF");
- # print $href->getValue . "\n";
+ # my $node = $nodes->item ($i);
+ # my $gene_label = $node->getAttributeNode("TextLabel");
+ # print $gene_label->getValue . "\n";
# }
-
- # # Print doc file
- # $doc->printToFile ("out.xml");
-
- # # Print to string
- # print $doc->toString;
-
- # # Avoid memory leaks - cleanup circular references for garbage collection
- # $doc->dispose;
-
}
# ---------------------------------------------------------------------------
# ---------------------------------------------------------------------------
sub show_output
{
- print "[Modified GPML]\n";
+ print "[Modified GPML Output]\n";
+ print $gpml_doc->toString;
print "\n";
}
# ---------------------------------------------------------------------------
sub export_data
{
- print "Writing GPML to new output file...\n";
- print "\n";
+ print "Writing GPML to output file...\n\n";
+
+ # ensures utf-8 encoding (for accent marks, etc.)
+ open my $out_file_handle, ">:utf8", "$output_file" or die $!;
+
+ $gpml_doc->print($out_file_handle);
}
=back
swap_genes();
if ($verbose) { show_output; }
export_data;
+$gpml_doc->dispose; # cleanup
exit;