From ea63b45895d6da500e994e670527316efd865f2d Mon Sep 17 00:00:00 2001 From: preecej Date: Fri, 27 May 2011 00:46:15 +0000 Subject: [PATCH] Added input reads and tests svn path=/; revision=102 --- .../semantic_wiki/PS_TransformForImport.pl | 141 ++++++++++++++---- 1 file changed, 113 insertions(+), 28 deletions(-) diff --git a/preecej/semantic_wiki/PS_TransformForImport.pl b/preecej/semantic_wiki/PS_TransformForImport.pl index 9878e5b..d58cc7d 100644 --- a/preecej/semantic_wiki/PS_TransformForImport.pl +++ b/preecej/semantic_wiki/PS_TransformForImport.pl @@ -6,7 +6,7 @@ Plant Semantics Import Transformation Script =head1 VERSION -0.1 (svn r100) +0.1 =head1 DESCRIPTION @@ -51,6 +51,9 @@ data. " " + NOTE: One of the fields must be named Accession ID for the + tracking of provenance. + =head1 AUTHOR Justin Preece @@ -78,53 +81,134 @@ use XML::Smart; # declarations # --------------------------------------------------------------------------- -# read and set options my %opts; # arg options -getopts('i:t:o:', \%opts); +my $file_type; # tab or csv +my $file_del; +my $verbose = 0; # flag for verbose output +my $debug = 0; # debugging switch -# set paths to data files +# set paths for data files my $path = getcwd() . "/"; my $input_file = $path; my $output_file = $path; -my $file_type; - -foreach my $key (%opts) { - my $value = $opts{$key}; - switch ($key) { - case "i" { $input_file = $input_file . $value; } - case "t" { $file_type = $value; } - case "o" { $output_file = $output_file . $value; } - } -} -print "$input_file\n"; -print "$output_file\n"; -print "$file_type\n"; +# data storage +my %provenance; +my $template_name; +my @field_names; +my @field_data; # --------------------------------------------------------------------------- # functions # --------------------------------------------------------------------------- -sub import_data +sub init { - # open file + # read and set options + getopts('i:t:o:vd', \%opts); - # read in provenance header - - # read in data template + foreach my $key (keys %opts) { + my $value = $opts{$key}; + switch ($key) { + case "i" { $input_file = $input_file . $value; } + case "t" { $file_type = $value; } + case "o" { $output_file = $output_file . $value; } + case "v" { $verbose = 1; } + case "d" { $debug = 1; } + } + } - # read in data fields (columns) + $file_del = ($file_type eq "csv") ? ',' : '\t'; - # loop through data rows and add all fields to a named hash + if ($verbose) { + system "clear"; + print "\n" + . "------------------------------------------------------------\n" + . "------- Plant Semantics Import Transformation Script -------\n" + . "------------------------------------------------------------\n" + . "\n\n" + . "Input File: $input_file\n" + . "Designated input file type: $file_type\n" + . "Output File: $output_file\n" + . "Running in verbose mode? " . ($verbose ? "Yes" : "No") . "\n" + . "Running in debug mode? " . ($verbose ? "Yes" : "No") . "\n" + . "\n" + . "------------------------------------------------------------\n" + . "------------------------------------------------------------\n" + . "------------------------------------------------------------\n" + . "\n"; + } +} + +sub import_data +{ + if ($verbose) { print "Opening file and reading header info...\n\n"; } + + # open file + open(INPUT_FILE,$input_file) or die("Could not open input file."); + + # read in the provenance data + my $count = 0; + while () + { + $count++; + my $line = $_; + chomp $line; + my $data_val = (split('=',$line))[1]; + switch ($count) { + case 2 { $provenance{'SourceDateTimeStamp'} = $data_val; } + case 3 { $provenance{'SourceDate'} = $data_val; } + case 4 { $provenance{'SourceVersion'} = $data_val; } + case 5 { $provenance{'SourceURI'} = $data_val; } + case 6 { $provenance{'SourceFile'} = $data_val; } + case 8 { $template_name = $data_val; } + case 9 { @field_names = split($file_del,$data_val); } + else {;} + } + if ($count == 10) { last; } + } + + if ($verbose) { print "Reading data...\n\n"; } + # loop through data rows and add all data fields to an array of hashes + while () + { + my $line = $_; + chomp $line; + + my @tmp_data_ary = split($file_del, $line); + my %tmp_hash; + + if ($debug) { print join(',',@tmp_data_ary) . "\n"; } + + for (my $i=0; $i