From b9e6436f83dd8e0278200b7693f4b015b4c44282 Mon Sep 17 00:00:00 2001
From: preecej <preecej@localhost>
Date: Sat, 23 Jul 2011 00:47:37 +0000
Subject: [PATCH] Started adding GAF format input option to Rev 0.2

svn path=/; revision=123
---
 .../semantic_wiki/paw_TransformForImport.pl   | 81 +++++++++++--------
 1 file changed, 48 insertions(+), 33 deletions(-)

diff --git a/preecej/semantic_wiki/paw_TransformForImport.pl b/preecej/semantic_wiki/paw_TransformForImport.pl
index b1b0b38..a6fbb62 100644
--- a/preecej/semantic_wiki/paw_TransformForImport.pl
+++ b/preecej/semantic_wiki/paw_TransformForImport.pl
@@ -2,7 +2,7 @@
 
 =head1 NAME
 
-Plant Semantics Import Transformation Script
+Planteome Annotation Wiki - Data Import Script
 
 =head1 VERSION
 
@@ -11,33 +11,35 @@ Plant Semantics Import Transformation Script
 =head1 DESCRIPTION
 
 Transform external gene annotation data into an XML document readable
-by the MediaWiki extension DataTransfer (Special:ImportXML) feature,
-including subpages. Also generates appropriate provenance of data based on header of 
-import file.
+by the MediaWiki extension DataTransfer (Special:ImportXML) feature.
+
+Also generates appropriate provenance of data based on a prefixed
+Source header in the import file.
+
+New to this revision: Added a GAF file type option for import; 
+accommodates ontologically-focused annotations.
 
 =head1 USAGE
 
-PS_TransformForImport.pl -i INPUT_FILE -t TYPE -o OUTPUT_FILE -v -d
+paw_TransformForImport.pl -i INPUT_FILE -t TYPE -o OUTPUT_FILE -v -d
 
 =head1 OPTIONS
 
-    -i  Name of input CSV or tab-del file.
-    -t  Specifies input type of file ('csv' or 'tab')
-    -o  Name of output file.
-    -v  View verbose information 
+    -i  Name of input file
+        (if no path supplied, assumes current working directory)
+    -t  Specifies input type of file
+        (allowed values: 'csv', 'tab', 'gaf')
+    -o  Name of output XML file.
+        (if no path supplied, assumes current working directory)
+    -v  View verbose information
     -d  View debugging information
 
 =head1 DEPENDENCIES
 
 Requires that the input file contain at least two headers: the first 
 will hold the provenance information associated with the imported 
-data, and all following headers will specify the templates and field
-names for the annotation data.
-
-The main templates pertain to Annotations, Sources, and Publications.
-Other templates are to be used for subsidiary reference data 
-(i.e. gene synonyms, external references, ontology terms, 
-and sequence data).
+data, and all following headers will specify the annotation data to
+be imported in accordance with the input type.
 
 =head2 Source Header Format Example
 
@@ -70,8 +72,8 @@ and sequence data).
     "
     "
 
-    NOTE: One of the fields in each format MUST be named "Accession ID" for the
-    tracking of provenance.
+    NOTE: One of the fields in each format MUST be named "Accession
+    ID" for the tracking of provenance.
 
 =head1 AUTHOR
 
@@ -95,22 +97,19 @@ use Getopt::Std;
 
 # specific
 use XML::Smart;
+use XML::DOM;
 
 # ---------------------------------------------------------------------------
 # declarations
 # ---------------------------------------------------------------------------
 
 my %opts; # arg options
-my $file_type; # tab or csv
-my $file_del;
+my $file_type; # tab, csv, gaf
+my $input_file;
+my $output_file;
 my $verbose = 0; # flag for verbose output
 my $debug = 0; # debugging switch
 
-# set paths for data files
-my $path = getcwd() . "/";
-my $input_file = $path;
-my $output_file = $path;
-
 # data storage
 my %source;
 my $template_name;
@@ -119,7 +118,7 @@ my @field_data;
 my $xml;
 my $output_data;
 
-my $prov_title_seed = 2;
+my $source_title_seed = 3;
 my $annot_title_seed = 3;
 
 # ---------------------------------------------------------------------------
@@ -136,17 +135,33 @@ sub init
     foreach my $key (keys %opts) { 
         my $value = $opts{$key};
         switch ($key) {
-            case "i" { $input_file = $input_file . $value; }
+            case "i" { 
+                if ($value =~ /\//) { # assume path
+                    $input_file = $value;
+                } else {
+                    $input_file = getcwd() . "\/$value";
+                }
+            }
             case "t" { $file_type = $value; }
-            case "o" { $output_file = $output_file . $value; }
+            
+            case "o" {
+                if ($value =~ /\//) { # assume path
+                    $output_file = $value;
+                } else {
+                    $output_file = getcwd() . "\/$value";
+                }
+            }
             case "v" { $verbose = 1; }
+            
             case "d" { $debug = 1; }
         }
     }
     
-    # split data on either commas or tabs, dependent on format
-    $file_del = ($file_type eq "csv") ? ',' : '\t';
-    
+    # split data on either commas or tabs, if file type is generic
+    if ($file_type = "csv" || $file_type = "tab") {
+        my $file_del = ($file_type eq "csv") ? ',' : '\t';
+    }
+        
     system "clear";
     print "\n"
         . "------------------------------------------------------------\n"
@@ -257,7 +272,7 @@ sub write_xml
 
     # set root element, source page and elements
     # (temp set of page title until moved to import extension) 
-    $xml->{Pages}{Page} = {Title => "Source:$prov_title_seed"};
+    $xml->{Pages}{Page} = {Title => "Source:$source_title_seed"};
 
     $curr_node = $xml->{Pages}{Page};
     $curr_node->{Template} = {Name => 'Source'};
@@ -343,7 +358,7 @@ sub write_xml
 
             # assign the relevant provenance field data            
             $curr_prov_node->{Field}[0] = {Name => 'Source'};
-            $curr_prov_node->{Field}[0]->content(0,"Source:$prov_title_seed");
+            $curr_prov_node->{Field}[0]->content(0,"Source:$source_title_seed");
             $curr_prov_node->{Field}[1] = {Name => 'Source Accession ID'};
             $curr_prov_node->{Field}[1]->content(0,$curr_accession_id);
             $curr_prov_node->{Field}[2] = {Name => 'Source Template'};
-- 
2.34.1