From: preecej Date: Mon, 1 Aug 2011 20:31:23 +0000 (+0000) Subject: Added GPML read-in, STDOUT/file out/manipulation tests. X-Git-Url: http://gitweb.planteome.org/?a=commitdiff_plain;h=1c2ce1fd24cad1ec254846a691fcf6712d7c7011;p=old-jaiswallab-svn%2F.git Added GPML read-in, STDOUT/file out/manipulation tests. svn path=/; revision=136 --- diff --git a/preecej/perl_singletons/pathway_gene_swapper.pl b/preecej/perl_singletons/pathway_gene_swapper.pl index b6134a0..f907f45 100644 --- a/preecej/perl_singletons/pathway_gene_swapper.pl +++ b/preecej/perl_singletons/pathway_gene_swapper.pl @@ -119,8 +119,8 @@ Returns a hash with config values set. # --------------------------------------------------------------------------- sub config($) { - if ($debug) { print "...Config file path: $_[0]\n"; } - + + print "Opening configuration file and reading data...\n\n"; my %local_config_hash; open(CONFIG_FILE, $_[0]) or die("Could not open $_[0]"); @@ -136,6 +136,8 @@ sub config($) $local_config_hash{$data_field} = $data_val; } + close(CONFIG_FILE); + return %local_config_hash; } @@ -210,31 +212,23 @@ sub init . "\n"; %configs = config($input_config_file); - - if ($debug) { print "...\n" - . Dumper(\%configs) . "\n\n"; } } # --------------------------------------------------------------------------- =item B -Reads, parses, and stores source GPML. +Reads, parses, and stores gene mapping file and source GPML. =cut # --------------------------------------------------------------------------- sub import_data { - print "Opening input file and reading header info...\n\n"; - #open the file - #read a line - #skip header line - #split line on comma - #get original gene and swapped gene - #add genes to %swap_genes - open(CONFIG_FILE, $input_gene_file) or die("Could not open $input_gene_file"); + print "Opening gene mapping file and reading data...\n\n"; + + open(GENE_FILE, $input_gene_file) or die("Could not open $input_gene_file"); - my $line = ; - while () + my $line = ; + while () { $line = $_; chomp $line; @@ -244,12 +238,13 @@ sub import_data $swap_genes{$data_field} = $data_val; } - #test:display %swap_genes - if ($debug) { print "...\n" - . Dumper(\%swap_genes) . "\n\n"; } - - - + + close(GENE_FILE); + + print "Opening GPML pathway file and reading data...\n\n"; + + my $parser = new XML::DOM::Parser; + $gpml_doc = $parser->parsefile($input_gpml_file); } @@ -261,7 +256,16 @@ Verbose only. # --------------------------------------------------------------------------- sub show_input { + print "[Configuration Settings]\n"; + print Dumper(\%configs) . "\n\n"; + print "\n"; + + print "[Gene Mappings]\n"; + print Dumper(\%swap_genes) . "\n\n"; + print "\n"; + print "[Source GPML]\n"; + print $gpml_doc->toString; print "\n"; } @@ -272,17 +276,25 @@ Substitutes gene data. # --------------------------------------------------------------------------- sub swap_genes { - print "Swapping gene data...\n\n"; + print "Swapping gene data and making other modifications...\n\n"; # ------------------------------------------------------------------------- # [PathVisio Perl Pseudo-Script] # ------------------------------------------------------------------------- # .remove all and elements and children + # my $pub_nodes = $gpml_doc->getElementsByTagName("bp:PublicationXref"); + # for my $pub_node (\$pub_nodes) + # { + # $pub_node->removeChild(); + # } + # .identify mapped genes # .if mapped gene did not belong to a group, create new group node # .duplicate for each mapping # .create new 5-digit hex code "GraphId" - # .if needed, add new group node (create new 5-digit hex code) + # .if needed, add new group node + # .create new, random 5-digit hex code + # .make sure it doesn't already exist # .add "GroupRef" on each added node, including the orig. # .rename TextLabel (prefix: Eu-, suffix: -#?) # .add Comment back-referencing TAIR locus id (use "source" attribute) @@ -291,36 +303,21 @@ sub swap_genes # .decrement CenterX and CenterY by 10px each # .change box Width if needed # .color the box () - # .After PathVisio sanity check: + # .After PathVisio sanity check, before communication: # .remove back-ref TAIR comments - # ------------------------------------------------------------------------- - # [XML::DOM] - # ------------------------------------------------------------------------- - # my $parser = new XML::DOM::Parser; - # my $doc = $parser->parsefile ("file.xml"); - # # print all HREF attributes of all CODEBASE elements - # my $nodes = $doc->getElementsByTagName ("CODEBASE"); + # [usage example] + # my $nodes = $gpml_doc->getElementsByTagName("DataNode"); # my $n = $nodes->getLength; # for (my $i = 0; $i < $n; $i++) # { - # my $node = $nodes->item ($i); - # my $href = $node->getAttributeNode ("HREF"); - # print $href->getValue . "\n"; + # my $node = $nodes->item ($i); + # my $gene_label = $node->getAttributeNode("TextLabel"); + # print $gene_label->getValue . "\n"; # } - - # # Print doc file - # $doc->printToFile ("out.xml"); - - # # Print to string - # print $doc->toString; - - # # Avoid memory leaks - cleanup circular references for garbage collection - # $doc->dispose; - } # --------------------------------------------------------------------------- @@ -330,7 +327,8 @@ Displays the transformed data. Verbose only. # --------------------------------------------------------------------------- sub show_output { - print "[Modified GPML]\n"; + print "[Modified GPML Output]\n"; + print $gpml_doc->toString; print "\n"; } @@ -341,8 +339,12 @@ Writes the transformed GPML doc out to the specified output file. # --------------------------------------------------------------------------- sub export_data { - print "Writing GPML to new output file...\n"; - print "\n"; + print "Writing GPML to output file...\n\n"; + + # ensures utf-8 encoding (for accent marks, etc.) + open my $out_file_handle, ">:utf8", "$output_file" or die $!; + + $gpml_doc->print($out_file_handle); } =back @@ -358,5 +360,6 @@ if ($verbose) { show_input; } swap_genes(); if ($verbose) { show_output; } export_data; +$gpml_doc->dispose; # cleanup exit;