From b9e6436f83dd8e0278200b7693f4b015b4c44282 Mon Sep 17 00:00:00 2001 From: preecej Date: Sat, 23 Jul 2011 00:47:37 +0000 Subject: [PATCH] Started adding GAF format input option to Rev 0.2 svn path=/; revision=123 --- .../semantic_wiki/paw_TransformForImport.pl | 81 +++++++++++-------- 1 file changed, 48 insertions(+), 33 deletions(-) diff --git a/preecej/semantic_wiki/paw_TransformForImport.pl b/preecej/semantic_wiki/paw_TransformForImport.pl index b1b0b38..a6fbb62 100644 --- a/preecej/semantic_wiki/paw_TransformForImport.pl +++ b/preecej/semantic_wiki/paw_TransformForImport.pl @@ -2,7 +2,7 @@ =head1 NAME -Plant Semantics Import Transformation Script +Planteome Annotation Wiki - Data Import Script =head1 VERSION @@ -11,33 +11,35 @@ Plant Semantics Import Transformation Script =head1 DESCRIPTION Transform external gene annotation data into an XML document readable -by the MediaWiki extension DataTransfer (Special:ImportXML) feature, -including subpages. Also generates appropriate provenance of data based on header of -import file. +by the MediaWiki extension DataTransfer (Special:ImportXML) feature. + +Also generates appropriate provenance of data based on a prefixed +Source header in the import file. + +New to this revision: Added a GAF file type option for import; +accommodates ontologically-focused annotations. =head1 USAGE -PS_TransformForImport.pl -i INPUT_FILE -t TYPE -o OUTPUT_FILE -v -d +paw_TransformForImport.pl -i INPUT_FILE -t TYPE -o OUTPUT_FILE -v -d =head1 OPTIONS - -i Name of input CSV or tab-del file. - -t Specifies input type of file ('csv' or 'tab') - -o Name of output file. - -v View verbose information + -i Name of input file + (if no path supplied, assumes current working directory) + -t Specifies input type of file + (allowed values: 'csv', 'tab', 'gaf') + -o Name of output XML file. + (if no path supplied, assumes current working directory) + -v View verbose information -d View debugging information =head1 DEPENDENCIES Requires that the input file contain at least two headers: the first will hold the provenance information associated with the imported -data, and all following headers will specify the templates and field -names for the annotation data. - -The main templates pertain to Annotations, Sources, and Publications. -Other templates are to be used for subsidiary reference data -(i.e. gene synonyms, external references, ontology terms, -and sequence data). +data, and all following headers will specify the annotation data to +be imported in accordance with the input type. =head2 Source Header Format Example @@ -70,8 +72,8 @@ and sequence data). " " - NOTE: One of the fields in each format MUST be named "Accession ID" for the - tracking of provenance. + NOTE: One of the fields in each format MUST be named "Accession + ID" for the tracking of provenance. =head1 AUTHOR @@ -95,22 +97,19 @@ use Getopt::Std; # specific use XML::Smart; +use XML::DOM; # --------------------------------------------------------------------------- # declarations # --------------------------------------------------------------------------- my %opts; # arg options -my $file_type; # tab or csv -my $file_del; +my $file_type; # tab, csv, gaf +my $input_file; +my $output_file; my $verbose = 0; # flag for verbose output my $debug = 0; # debugging switch -# set paths for data files -my $path = getcwd() . "/"; -my $input_file = $path; -my $output_file = $path; - # data storage my %source; my $template_name; @@ -119,7 +118,7 @@ my @field_data; my $xml; my $output_data; -my $prov_title_seed = 2; +my $source_title_seed = 3; my $annot_title_seed = 3; # --------------------------------------------------------------------------- @@ -136,17 +135,33 @@ sub init foreach my $key (keys %opts) { my $value = $opts{$key}; switch ($key) { - case "i" { $input_file = $input_file . $value; } + case "i" { + if ($value =~ /\//) { # assume path + $input_file = $value; + } else { + $input_file = getcwd() . "\/$value"; + } + } case "t" { $file_type = $value; } - case "o" { $output_file = $output_file . $value; } + + case "o" { + if ($value =~ /\//) { # assume path + $output_file = $value; + } else { + $output_file = getcwd() . "\/$value"; + } + } case "v" { $verbose = 1; } + case "d" { $debug = 1; } } } - # split data on either commas or tabs, dependent on format - $file_del = ($file_type eq "csv") ? ',' : '\t'; - + # split data on either commas or tabs, if file type is generic + if ($file_type = "csv" || $file_type = "tab") { + my $file_del = ($file_type eq "csv") ? ',' : '\t'; + } + system "clear"; print "\n" . "------------------------------------------------------------\n" @@ -257,7 +272,7 @@ sub write_xml # set root element, source page and elements # (temp set of page title until moved to import extension) - $xml->{Pages}{Page} = {Title => "Source:$prov_title_seed"}; + $xml->{Pages}{Page} = {Title => "Source:$source_title_seed"}; $curr_node = $xml->{Pages}{Page}; $curr_node->{Template} = {Name => 'Source'}; @@ -343,7 +358,7 @@ sub write_xml # assign the relevant provenance field data $curr_prov_node->{Field}[0] = {Name => 'Source'}; - $curr_prov_node->{Field}[0]->content(0,"Source:$prov_title_seed"); + $curr_prov_node->{Field}[0]->content(0,"Source:$source_title_seed"); $curr_prov_node->{Field}[1] = {Name => 'Source Accession ID'}; $curr_prov_node->{Field}[1]->content(0,$curr_accession_id); $curr_prov_node->{Field}[2] = {Name => 'Source Template'}; -- 2.34.1