[Provenance]
Source Date Time Stamp=Apr 2 2008
- Source Data=Oryzabase
+ Source Database=Oryzabase
Source Version=rel. 10
Source URI=http://www.shigen.nig.ac.jp/rice/oryzabase/
Source File=http://www.shigen.nig.ac.jp/rice/oryzabase/genes/...
my $template_name;
my @field_names;
my @field_data;
+my $xml;
+my $output_data;
# ---------------------------------------------------------------------------
# functions
# ---------------------------------------------------------------------------
+
+# ---------------------------------------------------------------------------
sub init
{
# read and set options
$file_del = ($file_type eq "csv") ? ',' : '\t';
- if ($verbose) {
- system "clear";
- print "\n"
- . "------------------------------------------------------------\n"
- . "------- Plant Semantics Import Transformation Script -------\n"
- . "------------------------------------------------------------\n"
- . "\n\n"
- . "Input File: $input_file\n"
- . "Designated input file type: $file_type\n"
- . "Output File: $output_file\n"
- . "Running in verbose mode? " . ($verbose ? "Yes" : "No") . "\n"
- . "Running in debug mode? " . ($verbose ? "Yes" : "No") . "\n"
- . "\n"
- . "------------------------------------------------------------\n"
- . "------------------------------------------------------------\n"
- . "------------------------------------------------------------\n"
- . "\n";
- }
+ system "clear";
+ print "\n"
+ . "------------------------------------------------------------\n"
+ . "------- Plant Semantics Import Transformation Script -------\n"
+ . "------------------------------------------------------------\n"
+ . "\n"
+ . "Input File: $input_file\n"
+ . "Designated input file type: $file_type\n"
+ . "Output File: $output_file\n"
+ . "Running in verbose mode? " . ($verbose ? "Yes" : "No") . "\n"
+ . "Running in debug mode? " . ($verbose ? "Yes" : "No") . "\n"
+ . "\n"
+ . "------------------------------------------------------------\n"
+ . "------------------------------------------------------------\n"
+ . "------------------------------------------------------------\n"
+ . "\n";
}
+
+# read, parse, and store provenance, template, and annotations
+# ---------------------------------------------------------------------------
sub import_data
{
- if ($verbose) { print "Opening file and reading header info...\n\n"; }
+ print "Opening file and reading header info...\n\n";
# open file
open(INPUT_FILE,$input_file) or die("Could not open input file.");
my $data_val = (split('=',$line))[1];
switch ($count) {
case 2 { $provenance{'SourceDateTimeStamp'} = $data_val; }
- case 3 { $provenance{'SourceDate'} = $data_val; }
+ case 3 { $provenance{'SourceDatabase'} = $data_val; }
case 4 { $provenance{'SourceVersion'} = $data_val; }
case 5 { $provenance{'SourceURI'} = $data_val; }
case 6 { $provenance{'SourceFile'} = $data_val; }
if ($count == 10) { last; }
}
- if ($verbose) { print "Reading data...\n\n"; }
+ print "Reading data...\n\n";
# loop through data rows and add all data fields to an array of hashes
while (<INPUT_FILE>)
if ($debug) { print "$i: $field_names[$i]: $tmp_data_ary[$i]\n"; }
$tmp_hash{$field_names[$i]} = $tmp_data_ary[$i];
}
- push @field_data, %tmp_hash;
- %tmp_hash = (); # clear your hash as a precaution
+ push @field_data, \%tmp_hash;
}
+ close INPUT_FILE;
}
print "$key: $provenance{$key}\n";
}
print "\n";
- print "Template: $template_name\n";
- print "Fields: " . join(', ',@field_names) . "\n";
+ print "[Template]\n$template_name\n\n";
+ print "[Fields]\n" . join(', ',@field_names) . "\n\n";
print "[Data]\n";
foreach my $row (@field_data) {
foreach my $key (keys %$row) {
- print "$key: (%$row){$key}\n";
+ print "$key => " . $row->{$key} . "\n";
}
+ print "\n";
}
+ print "\n";
}
+# loop through the hash and build annotation data and provenance xml doc
# ---------------------------------------------------------------------------
sub write_xml
{
- # loop through the hash and build annotation data and provenance xml docs
- # simultaneously
+ print "Transforming data...\n\n";
+ my $current_node; # placeholder for node cursor
+
+ $xml = new XML::Smart($output_data, 'XML::Smart::Parser');
+
+ # set root element, provenance page and elements
+ $xml->{Pages}{Page} = {Title => 'PROV:3'};
+
+ $current_node = $xml->{Pages}{Page};
+ $current_node->{Template} = {Name => 'Provenance'};
+
+ $current_node = $current_node->{Template};
+ $current_node->{Field} = {Name => 'Source Data Time Stamp'};
+ $current_node = $current_node->{Field};
+ $current_node->content(0,$provenance{'SourceDateTimeStamp'});
+
+ # iterate through the data
- # write xml docs to a single ImportXML file
+ # build the annotation page
+
+ # add a template to the annotation provenance page
+
+ # write out xml doc to a single ImportXML file
+ $xml->save($output_file);
}
+# print the transformed data (as xml)
+# ---------------------------------------------------------------------------
+sub show_output
+{
+ print "[XML]\n";
+ $output_data = $xml->data;
+ print $output_data;
+ print "\n";
+}
+
# ---------------------------------------------------------------------------
# main
# ---------------------------------------------------------------------------
import_data;
if ($verbose) { show_input; }
write_xml;
+if ($verbose) { show_output; }
exit;