open(GENE_FILE, $input_gene_file) or die("Could not open $input_gene_file");
+ # used to generate total counts of each species' gene list; sanity check
+ my $original_gene_count = 0;
+ my $replacement_paralog_count = 0;
+
my $line = <GENE_FILE>;
+
while (<GENE_FILE>)
{
$line = $_;
my @line_ary = split(',',$line);
my $data_field = $line_ary[0];
my $data_val = $line_ary[1];
-
- $swap_genes{$data_field} = $data_val;
+
+ #Does ath_gene exist?
+ if (!exists $swap_genes{$data_field})
+ {
+ $original_gene_count++;
+ }
+ $replacement_paralog_count++; # count this every time
+
+ # add new gene to hash value (array) for old gene hash key
+ push @{$swap_genes{$data_field}},$data_val;
}
-
+
+ if ($verbose) # give add'l stats on gene and paralog counts
+ {
+ print "[Total number of original genes and paralogs]\n"
+ . "Original gene count: " . $original_gene_count . "\n"
+ . "Replacement paralog count: $replacement_paralog_count\n\n";
+
+ foreach my $orig_gene_key (keys %swap_genes)
+ {
+ print "$orig_gene_key: " . scalar(@{$swap_genes{$orig_gene_key}}) . "\n";
+ }
+ print "\n";
+ }
+
close(GENE_FILE);
print "Opening GPML pathway file and reading data...\n\n";
# [PathVisio Perl Pseudo-Script]
# -------------------------------------------------------------------------
# .remove all <BiopaxRef> and <bp:PublicationXref> elements and children
- # my $pub_nodes = $gpml_doc->getElementsByTagName("bp:PublicationXref");
- # for my $pub_node (\$pub_nodes)
- # {
- # $pub_node->removeChild();
- # }
+ my $pathway_nodes = $gpml_doc->getElementsByTagName("Pathway");
+ print @$pathway_nodes[0] . "\n";
+ my $biopax_nodes = $gpml_doc->getElementsByTagName("Biopax");
+ print $biopax_nodes . "\n";
+ for (@$biopax_nodes)
+ {
+ # print $_->getTagName . "\n";
+ print $_ . "\n";
+ @$pathway_nodes[0]->removeChild($_);
+ }
# .identify mapped genes
# .if mapped gene did not belong to a group, create new group node
# .color the box (<Graphics...Color="4488ff" />)
# .After PathVisio sanity check, before communication:
# .remove back-ref TAIR comments
-
-
# [usage example]
# my $nodes = $gpml_doc->getElementsByTagName("DataNode");