Added input reads and tests

author preecej <preecej@localhost>

Fri, 27 May 2011 00:46:15 +0000 (00:46 +0000)

committer preecej <preecej@localhost>

Fri, 27 May 2011 00:46:15 +0000 (00:46 +0000)
author preecej <preecej@localhost>
Fri, 27 May 2011 00:46:15 +0000 (00:46 +0000)
committer preecej <preecej@localhost>
Fri, 27 May 2011 00:46:15 +0000 (00:46 +0000)
diff --git a/preecej/semantic_wiki/PS_TransformForImport.pl b/preecej/semantic_wiki/PS_TransformForImport.pl

index 9878e5b69dc2589c06897d54c7311d57e2c11525..d58cc7da4ad1881270e62454119adb8648e530e3 100644 (file)
--- a/preecej/semantic_wiki/PS_TransformForImport.pl
+++ b/preecej/semantic_wiki/PS_TransformForImport.pl
@@ -6,7 +6,7 @@ Plant Semantics Import Transformation Script
  
  =head1 VERSION
  
-0.1 (svn r100)
+0.1
  
  =head1 DESCRIPTION
  
@@ -51,6 +51,9 @@ data.
      "
      "
  
+    NOTE: One of the fields must be named Accession ID for the
+    tracking of provenance.
+
  =head1 AUTHOR
  
  Justin Preece
@@ -78,53 +81,134 @@ use XML::Smart;
  # declarations
  # ---------------------------------------------------------------------------
  
-# read and set options
  my %opts; # arg options
-getopts('i:t:o:', \%opts);
+my $file_type; # tab or csv
+my $file_del;
+my $verbose = 0; # flag for verbose output
+my $debug = 0; # debugging switch
  
-# set paths to data files
+# set paths for data files
  my $path = getcwd() . "/";
  my $input_file = $path;
  my $output_file = $path;
-my $file_type;
-
-foreach my $key (%opts) { 
-    my $value = $opts{$key};
-    switch ($key) {
-        case "i" { $input_file = $input_file . $value; }
-        case "t" { $file_type = $value; }
-        case "o" { $output_file = $output_file . $value; }
-    }
-}
  
-print "$input_file\n";
-print "$output_file\n";
-print "$file_type\n";
+# data storage
+my %provenance;
+my $template_name;
+my @field_names;
+my @field_data;
  
  # ---------------------------------------------------------------------------
  # functions
  # ---------------------------------------------------------------------------
  
-sub import_data
+sub init
  {
-    # open file
+    # read and set options
+    getopts('i:t:o:vd', \%opts);
      
-    # read in provenance header
-    
-    # read in data template
+    foreach my $key (keys %opts) { 
+        my $value = $opts{$key};
+        switch ($key) {
+            case "i" { $input_file = $input_file . $value; }
+            case "t" { $file_type = $value; }
+            case "o" { $output_file = $output_file . $value; }
+            case "v" { $verbose = 1; }
+            case "d" { $debug = 1; }
+        }
+    }
      
-    # read in data fields (columns)
+    $file_del = ($file_type eq "csv") ? ',' : '\t';
      
-    # loop through data rows and add all fields to a named hash
+    if ($verbose) {
+        system "clear";
+        print "\n"
+            . "------------------------------------------------------------\n"
+            . "------- Plant Semantics Import Transformation Script -------\n"
+            . "------------------------------------------------------------\n"
+            . "\n\n"
+            . "Input File: $input_file\n"
+            . "Designated input file type: $file_type\n"
+            . "Output File: $output_file\n"
+            . "Running in verbose mode? " . ($verbose ? "Yes" : "No") . "\n"
+            . "Running in debug mode? " . ($verbose ? "Yes" : "No") . "\n"
+            . "\n"
+            . "------------------------------------------------------------\n"
+            . "------------------------------------------------------------\n"
+            . "------------------------------------------------------------\n"
+            . "\n";
+    }
+}
+
+sub import_data
+{
+    if ($verbose) { print "Opening file and reading header info...\n\n"; }
+
+    # open file
+    open(INPUT_FILE,$input_file) or die("Could not open input file.");
+
+    # read in the provenance data
+    my $count = 0;
+    while (<INPUT_FILE>)
+    {
+       $count++;
+       my $line = $_;
+       chomp $line;
+       my $data_val = (split('=',$line))[1];
+       switch ($count) {
+           case 2 { $provenance{'SourceDateTimeStamp'} = $data_val; }
+           case 3 { $provenance{'SourceDate'} = $data_val; }
+           case 4 { $provenance{'SourceVersion'} = $data_val; }
+           case 5 { $provenance{'SourceURI'} = $data_val; }
+           case 6 { $provenance{'SourceFile'} = $data_val; }
+           case 8 { $template_name = $data_val; }
+           case 9 { @field_names = split($file_del,$data_val); }
+           else {;}
+       }
+       if ($count == 10) { last; }
+    }
+
+    if ($verbose) { print "Reading data...\n\n"; }
      
+    # loop through data rows and add all data fields to an array of hashes
+    while (<INPUT_FILE>)
+    {
+        my $line = $_;
+        chomp $line;
+        
+        my @tmp_data_ary = split($file_del, $line);
+        my %tmp_hash;
+        
+        if ($debug) { print join(',',@tmp_data_ary) . "\n"; }
+        
+        for (my $i=0; $i<scalar(@field_names); $i++) {
+            if ($debug) { print "$i: $field_names[$i]: $tmp_data_ary[$i]\n"; }           
+            $tmp_hash{$field_names[$i]} = $tmp_data_ary[$i];
+        }
+        push @field_data, %tmp_hash;
+        %tmp_hash = (); # clear your hash as a precaution
+    }
  }
  
  
-# spit out some data to make sure you've read in the files correctly
+# spit out the data to make sure you've read in the files correctly
  # ---------------------------------------------------------------------------
-sub test_inputs
+sub show_input
  {
-    # print out your hash
+    print "[Provenance]\n";
+    foreach my $key (keys %provenance) {
+        print "$key: $provenance{$key}\n";
+    }
+    print "\n";
+    print "Template: $template_name\n";
+    print "Fields: " . join(', ',@field_names) . "\n";
+    
+    print "[Data]\n";
+    foreach my $row (@field_data) {
+        foreach my $key (keys %$row) {
+            print "$key: (%$row){$key}\n";
+        }
+    }
  }
  
  
@@ -142,8 +226,9 @@ sub write_xml
  # main
  # ---------------------------------------------------------------------------
  
+init;
  import_data;
-test_inputs;
+if ($verbose) { show_input; }
  write_xml;
  
  exit;
author	preecej <preecej@localhost>
	Fri, 27 May 2011 00:46:15 +0000 (00:46 +0000)
committer	preecej <preecej@localhost>
	Fri, 27 May 2011 00:46:15 +0000 (00:46 +0000)