From c9993a35c89e7498a980b56595d232df528d6f8e Mon Sep 17 00:00:00 2001 From: preecej Date: Fri, 5 Aug 2011 23:14:12 +0000 Subject: [PATCH] Added MH's gene counting code, changed gene hash tp accept 1-to-many mappings, removed Biopax publication nodes in output file using XML:DOM svn path=/; revision=141 --- .../perl_singletons/pathway_gene_swapper.pl | 49 +++++++++++++++---- 1 file changed, 39 insertions(+), 10 deletions(-) diff --git a/preecej/perl_singletons/pathway_gene_swapper.pl b/preecej/perl_singletons/pathway_gene_swapper.pl index f907f45..49e65c7 100644 --- a/preecej/perl_singletons/pathway_gene_swapper.pl +++ b/preecej/perl_singletons/pathway_gene_swapper.pl @@ -227,7 +227,12 @@ sub import_data open(GENE_FILE, $input_gene_file) or die("Could not open $input_gene_file"); + # used to generate total counts of each species' gene list; sanity check + my $original_gene_count = 0; + my $replacement_paralog_count = 0; + my $line = ; + while () { $line = $_; @@ -235,10 +240,31 @@ sub import_data my @line_ary = split(',',$line); my $data_field = $line_ary[0]; my $data_val = $line_ary[1]; - - $swap_genes{$data_field} = $data_val; + + #Does ath_gene exist? + if (!exists $swap_genes{$data_field}) + { + $original_gene_count++; + } + $replacement_paralog_count++; # count this every time + + # add new gene to hash value (array) for old gene hash key + push @{$swap_genes{$data_field}},$data_val; } - + + if ($verbose) # give add'l stats on gene and paralog counts + { + print "[Total number of original genes and paralogs]\n" + . "Original gene count: " . $original_gene_count . "\n" + . "Replacement paralog count: $replacement_paralog_count\n\n"; + + foreach my $orig_gene_key (keys %swap_genes) + { + print "$orig_gene_key: " . scalar(@{$swap_genes{$orig_gene_key}}) . "\n"; + } + print "\n"; + } + close(GENE_FILE); print "Opening GPML pathway file and reading data...\n\n"; @@ -282,11 +308,16 @@ sub swap_genes # [PathVisio Perl Pseudo-Script] # ------------------------------------------------------------------------- # .remove all and elements and children - # my $pub_nodes = $gpml_doc->getElementsByTagName("bp:PublicationXref"); - # for my $pub_node (\$pub_nodes) - # { - # $pub_node->removeChild(); - # } + my $pathway_nodes = $gpml_doc->getElementsByTagName("Pathway"); + print @$pathway_nodes[0] . "\n"; + my $biopax_nodes = $gpml_doc->getElementsByTagName("Biopax"); + print $biopax_nodes . "\n"; + for (@$biopax_nodes) + { + # print $_->getTagName . "\n"; + print $_ . "\n"; + @$pathway_nodes[0]->removeChild($_); + } # .identify mapped genes # .if mapped gene did not belong to a group, create new group node @@ -305,8 +336,6 @@ sub swap_genes # .color the box () # .After PathVisio sanity check, before communication: # .remove back-ref TAIR comments - - # [usage example] # my $nodes = $gpml_doc->getElementsByTagName("DataNode"); -- 2.34.1