From: preecej Date: Sat, 28 May 2011 00:59:18 +0000 (+0000) Subject: Added input data viewing, started xml doc construction X-Git-Url: http://gitweb.planteome.org/?a=commitdiff_plain;h=c2df23ef6c3b03c2bb327f15400e496ee8e46e6b;p=old-jaiswallab-svn%2F.git Added input data viewing, started xml doc construction svn path=/; revision=103 --- diff --git a/preecej/semantic_wiki/PS_TransformForImport.pl b/preecej/semantic_wiki/PS_TransformForImport.pl index d58cc7d..9667f2b 100644 --- a/preecej/semantic_wiki/PS_TransformForImport.pl +++ b/preecej/semantic_wiki/PS_TransformForImport.pl @@ -36,7 +36,7 @@ data. [Provenance] Source Date Time Stamp=Apr 2 2008 - Source Data=Oryzabase + Source Database=Oryzabase Source Version=rel. 10 Source URI=http://www.shigen.nig.ac.jp/rice/oryzabase/ Source File=http://www.shigen.nig.ac.jp/rice/oryzabase/genes/... @@ -97,11 +97,15 @@ my %provenance; my $template_name; my @field_names; my @field_data; +my $xml; +my $output_data; # --------------------------------------------------------------------------- # functions # --------------------------------------------------------------------------- + +# --------------------------------------------------------------------------- sub init { # read and set options @@ -120,29 +124,30 @@ sub init $file_del = ($file_type eq "csv") ? ',' : '\t'; - if ($verbose) { - system "clear"; - print "\n" - . "------------------------------------------------------------\n" - . "------- Plant Semantics Import Transformation Script -------\n" - . "------------------------------------------------------------\n" - . "\n\n" - . "Input File: $input_file\n" - . "Designated input file type: $file_type\n" - . "Output File: $output_file\n" - . "Running in verbose mode? " . ($verbose ? "Yes" : "No") . "\n" - . "Running in debug mode? " . ($verbose ? "Yes" : "No") . "\n" - . "\n" - . "------------------------------------------------------------\n" - . "------------------------------------------------------------\n" - . "------------------------------------------------------------\n" - . "\n"; - } + system "clear"; + print "\n" + . "------------------------------------------------------------\n" + . "------- Plant Semantics Import Transformation Script -------\n" + . "------------------------------------------------------------\n" + . "\n" + . "Input File: $input_file\n" + . "Designated input file type: $file_type\n" + . "Output File: $output_file\n" + . "Running in verbose mode? " . ($verbose ? "Yes" : "No") . "\n" + . "Running in debug mode? " . ($verbose ? "Yes" : "No") . "\n" + . "\n" + . "------------------------------------------------------------\n" + . "------------------------------------------------------------\n" + . "------------------------------------------------------------\n" + . "\n"; } + +# read, parse, and store provenance, template, and annotations +# --------------------------------------------------------------------------- sub import_data { - if ($verbose) { print "Opening file and reading header info...\n\n"; } + print "Opening file and reading header info...\n\n"; # open file open(INPUT_FILE,$input_file) or die("Could not open input file."); @@ -157,7 +162,7 @@ sub import_data my $data_val = (split('=',$line))[1]; switch ($count) { case 2 { $provenance{'SourceDateTimeStamp'} = $data_val; } - case 3 { $provenance{'SourceDate'} = $data_val; } + case 3 { $provenance{'SourceDatabase'} = $data_val; } case 4 { $provenance{'SourceVersion'} = $data_val; } case 5 { $provenance{'SourceURI'} = $data_val; } case 6 { $provenance{'SourceFile'} = $data_val; } @@ -168,7 +173,7 @@ sub import_data if ($count == 10) { last; } } - if ($verbose) { print "Reading data...\n\n"; } + print "Reading data...\n\n"; # loop through data rows and add all data fields to an array of hashes while () @@ -185,9 +190,9 @@ sub import_data if ($debug) { print "$i: $field_names[$i]: $tmp_data_ary[$i]\n"; } $tmp_hash{$field_names[$i]} = $tmp_data_ary[$i]; } - push @field_data, %tmp_hash; - %tmp_hash = (); # clear your hash as a precaution + push @field_data, \%tmp_hash; } + close INPUT_FILE; } @@ -200,28 +205,61 @@ sub show_input print "$key: $provenance{$key}\n"; } print "\n"; - print "Template: $template_name\n"; - print "Fields: " . join(', ',@field_names) . "\n"; + print "[Template]\n$template_name\n\n"; + print "[Fields]\n" . join(', ',@field_names) . "\n\n"; print "[Data]\n"; foreach my $row (@field_data) { foreach my $key (keys %$row) { - print "$key: (%$row){$key}\n"; + print "$key => " . $row->{$key} . "\n"; } + print "\n"; } + print "\n"; } +# loop through the hash and build annotation data and provenance xml doc # --------------------------------------------------------------------------- sub write_xml { - # loop through the hash and build annotation data and provenance xml docs - # simultaneously + print "Transforming data...\n\n"; + my $current_node; # placeholder for node cursor + + $xml = new XML::Smart($output_data, 'XML::Smart::Parser'); + + # set root element, provenance page and elements + $xml->{Pages}{Page} = {Title => 'PROV:3'}; + + $current_node = $xml->{Pages}{Page}; + $current_node->{Template} = {Name => 'Provenance'}; + + $current_node = $current_node->{Template}; + $current_node->{Field} = {Name => 'Source Data Time Stamp'}; + $current_node = $current_node->{Field}; + $current_node->content(0,$provenance{'SourceDateTimeStamp'}); + + # iterate through the data - # write xml docs to a single ImportXML file + # build the annotation page + + # add a template to the annotation provenance page + + # write out xml doc to a single ImportXML file + $xml->save($output_file); } +# print the transformed data (as xml) +# --------------------------------------------------------------------------- +sub show_output +{ + print "[XML]\n"; + $output_data = $xml->data; + print $output_data; + print "\n"; +} + # --------------------------------------------------------------------------- # main # --------------------------------------------------------------------------- @@ -230,6 +268,7 @@ init; import_data; if ($verbose) { show_input; } write_xml; +if ($verbose) { show_output; } exit;