From 4e4fa5b00027f7139f7084cc21e8b51dfb44ee95 Mon Sep 17 00:00:00 2001 From: preecej Date: Tue, 26 Jul 2011 17:39:25 +0000 Subject: [PATCH] New script for directly modifying PathVisio GPML files - swaps one species' genes for another (after projection) svn path=/; revision=125 --- .../perl_singletons/pathway_gene_swapper.pl | 227 ++++++++++++++++++ .../reactome_chebi_mapping.pl | 8 +- 2 files changed, 231 insertions(+), 4 deletions(-) create mode 100644 preecej/perl_singletons/pathway_gene_swapper.pl diff --git a/preecej/perl_singletons/pathway_gene_swapper.pl b/preecej/perl_singletons/pathway_gene_swapper.pl new file mode 100644 index 0000000..d472432 --- /dev/null +++ b/preecej/perl_singletons/pathway_gene_swapper.pl @@ -0,0 +1,227 @@ +#!/usr/bin/perl -w + +=head1 NAME + +Pathway Gene Swapper + +=head1 VERSION + +0.1 + +=head1 DESCRIPTION + +Swap out one set of genes for another in an existing PathVisio GPML +file. + +=head1 USAGE + +pathway_gene_swapper.pl -i INPUT_FILE -g GENE_FILE -c CONFIG_FILE -o OUTPUT_FILE -v -d + +=head1 OPTIONS + + -i Name of input GPML file. + -g CSV file containing the genes to swap + -c config file containing color, label, and placement preferences + -o Name of output GPML file. + (NOTE: if no path supplied for input files, + current working directory is assumed) + -v View verbose information + -d View debugging information + +=head1 DEPENDENCIES and PREREQUISITES + + - Non-standard Perl modules: Switch, XML::DOM + - The input file must be a valid GPML file + - The CSV file must have a single-line column header + If the second column contains + - The config file may have any or all of the following entries: + Title= + MaintainedBy= + Organism= + BoxBorder= (RRGGBB hex, default: black) + BoxColor= (RRGGBB hex, default: white) + BoxWidth= (integer, in px) + CommentPrefix= (will precede back-reference to prior source) + LabelPrefix= (precedes current gene label) + X-Offset= (integer, in px) + Y-Offset= (integer, in px) + +=head1 AUTHORS + +Justin Preece and Mamatha Hanumappa + Faculty Research Assistants + Jaiswal Lab, Botany & Plant Pathology + Oregon State University + L + L + +=cut + +# --------------------------------------------------------------------------- +# modules +# --------------------------------------------------------------------------- + +# general +use strict; +use Cwd; +use Switch; +use Getopt::Std; + +# specific +use XML::DOM; + +# --------------------------------------------------------------------------- +# declarations +# --------------------------------------------------------------------------- + +my %opts; # arg options +my $input_file; +my $output_file; +my $verbose = 0; # flag for verbose output +my $debug = 0; # debugging switch + +my $gpml_doc; + +# --------------------------------------------------------------------------- +# functions +# --------------------------------------------------------------------------- + + +# --------------------------------------------------------------------------- +sub init +{ + # read and set options + getopts('i:o:vd', \%opts); + + foreach my $key (keys %opts) { + my $value = $opts{$key}; + switch ($key) { + case "i" { + if ($value =~ /\//) { # assume path + $input_file = $value; + } else { + $input_file = getcwd() . "\/$value"; + } + } + case "o" { + if ($value =~ /\//) { # assume path + $output_file = $value; + } else { + $output_file = getcwd() . "\/$value"; + } + } + case "v" { $verbose = 1; } + + case "d" { $debug = 1; } + } + } + + system "clear"; + print "\n" + . "------------------------------------------------------------\n" + . "------------------ Pathway Gene Swapper --------------------\n" + . "------------------------------------------------------------\n" + . "\n" + . "Input File: $input_file\n" + . "Output File: $output_file\n" + . "Running in verbose mode? " . ($verbose ? "Yes" : "No") . "\n" + . "Running in debug mode? " . ($verbose ? "Yes" : "No") . "\n" + . "\n" + . "------------------------------------------------------------\n" + . "------------------------------------------------------------\n" + . "------------------------------------------------------------\n" + . "\n"; +} + + +# read, parse, and store source GPML +# --------------------------------------------------------------------------- +sub import_data +{ + print "Opening input file and reading header info...\n\n"; +} + + +# spit out the data to make sure you've read in the files correctly +# --------------------------------------------------------------------------- +sub show_input +{ + print "[Source GPML]\n"; + print "\n"; +} + +# substitute gene data +# --------------------------------------------------------------------------- +sub swap_genes +{ + print "Swapping gene data...\n"; + + # ------------------------------------------------------------------------- + # [PathVisio Perl Pseudo-Script] + # ------------------------------------------------------------------------- + # .remove all and elements and children + # .identify mapped genes + # .if mapped gene did not belong to a group, create new group node + # .duplicate for each mapping + # .create new 5-digit hex code "GraphId" + # .if needed, add new group node (create new 5-digit hex code) + # .add "GroupRef" on each added node, including the orig. + # .rename TextLabel (prefix: Eu-, suffix: -#?) + # .add Comment back-referencing TAIR locus id (use "source" attribute) + # .edit + # .decrement the Z-order + # .decrement CenterX and CenterY by 10px each + # .change box Width if needed + # .color the box () + # .After PathVisio sanity check: + # .remove back-ref TAIR comments + + + # ------------------------------------------------------------------------- + # [XML::DOM] + # ------------------------------------------------------------------------- + # my $parser = new XML::DOM::Parser; + # my $doc = $parser->parsefile ("file.xml"); + + # # print all HREF attributes of all CODEBASE elements + # my $nodes = $doc->getElementsByTagName ("CODEBASE"); + # my $n = $nodes->getLength; + + # for (my $i = 0; $i < $n; $i++) + # { + # my $node = $nodes->item ($i); + # my $href = $node->getAttributeNode ("HREF"); + # print $href->getValue . "\n"; + # } + + # # Print doc file + # $doc->printToFile ("out.xml"); + + # # Print to string + # print $doc->toString; + + # # Avoid memory leaks - cleanup circular references for garbage collection + # $doc->dispose; + +} + +# display the transformed data +# --------------------------------------------------------------------------- +sub show_output +{ + print "Writing GPML to new output file...\n"; + print "\n"; +} + +# --------------------------------------------------------------------------- +# main +# --------------------------------------------------------------------------- + +init; +import_data; +if ($verbose) { show_input; } +swap_genes(); +if ($verbose) { show_output; } + +exit; + diff --git a/preecej/perl_singletons/reactome_chebi_mapping/reactome_chebi_mapping.pl b/preecej/perl_singletons/reactome_chebi_mapping/reactome_chebi_mapping.pl index 2daedfd..cf6f4c9 100755 --- a/preecej/perl_singletons/reactome_chebi_mapping/reactome_chebi_mapping.pl +++ b/preecej/perl_singletons/reactome_chebi_mapping/reactome_chebi_mapping.pl @@ -7,7 +7,7 @@ use strict; # Justin Preece, 10/06/10 # v1.0: 10/13/10 (svn rev. 66) # v1.1: 10/20/10 (svn rev. 70) -# v1.2: 02/07/11 (svn rev. ?) +# v1.2: 02/07/11 (svn rev. 86) # # Purpose: Map CHEBI ontology terms onto the Reactome database. # @@ -42,8 +42,8 @@ use GO::Parser; # --------------------------------------------------------------------------- # set paths to data files -my $data_path = "/home/preecej/Documents/projects/reactome/reactome_to_chebi_mapping/AraCyc/"; -my $chebi_obo_file = "chebi_v75.obo"; +my $data_path = "/home/preecej/Documents/projects/reactome/reactome_to_chebi_mapping/AraCyc/gk_central_041811/no_synonyms/"; +my $chebi_obo_file = "../../chebi_v78.obo"; my $reactome_file = "AraReferenceMolecules.txt"; my $mapped_output_file = "1.2_reactome_chebi_mapping_complete.txt"; my $sorted_output_file = "1.2_reactome_chebi_mapping_complete_sorted.txt"; @@ -53,7 +53,7 @@ my $sorted_no_match_file = "1.2_reactome_entries_with_no_chebi_match.txt"; # options my $allow_obsolete_terms = 1; my $allow_cyc = 0; -my $allow_synonyms = 1; +my $allow_synonyms = 0; my $ont; # chebi ontology -- 2.34.1