Began breaking out GAF from generic CSV, tab format.

author preecej <preecej@localhost>

Mon, 25 Jul 2011 23:46:47 +0000 (23:46 +0000)

committer preecej <preecej@localhost>

Mon, 25 Jul 2011 23:46:47 +0000 (23:46 +0000)
author preecej <preecej@localhost>
Mon, 25 Jul 2011 23:46:47 +0000 (23:46 +0000)
committer preecej <preecej@localhost>
Mon, 25 Jul 2011 23:46:47 +0000 (23:46 +0000)
diff --git a/preecej/semantic_wiki/paw_TransformForImport.pl b/preecej/semantic_wiki/paw_TransformForImport.pl

index a6fbb62e67cdc9ac4cd5fc3b8c1a177a9a751667..92a47133f7615632533183db0e3eaf46d1467cbc 100644 (file)
--- a/preecej/semantic_wiki/paw_TransformForImport.pl
+++ b/preecej/semantic_wiki/paw_TransformForImport.pl
@@ -105,6 +105,7 @@ use XML::DOM;
  
  my %opts; # arg options
  my $file_type; # tab, csv, gaf
+my $file_del; # data delimeter
  my $input_file;
  my $output_file;
  my $verbose = 0; # flag for verbose output
@@ -157,9 +158,15 @@ sub init
          }
      }
      
-    # split data on either commas or tabs, if file type is generic
-    if ($file_type = "csv" || $file_type = "tab") {
-        my $file_del = ($file_type eq "csv") ? ',' : '\t';
+    # split data on either commas or tabs
+    switch ($file_type) {
+        case "csv" { $file_del = ','; }
+        case "tab" { $file_del = '\t'; }
+        case "gaf" { $file_del = '\t'; }
+        else {
+            die(uc($file_type) . " is not a valid file type. Please supply "
+                . "a tab, csv, or gaf file.\n");
+        }
      }
          
      system "clear";
@@ -190,12 +197,14 @@ sub import_data
      # open file
      open(INPUT_FILE,$input_file) or die("Could not open input file.");
  
+    my $line; # all-purpose line counter
+    
      # read in the source data
      my $count = 0;
      while (<INPUT_FILE>)
      {
         $count++;
-       my $line = $_;
+       $line = $_;
         chomp $line;
         my $data_val = (split('=',$line))[1];
         switch ($count) {
@@ -204,19 +213,38 @@ sub import_data
             case 4 { $source{'SourceVersion'} = $data_val; }
             case 5 { $source{'SourceURI'} = $data_val; }
             case 6 { $source{'SourceFile'} = $data_val; }
-           case 8 { $template_name = $data_val; }
-           case 9 { @field_names = split($file_del,$data_val); }
             else {;}
         }
-       if ($count == 10) { last; }
+       if ($count == 6) { last; }
      }
  
+    # read in "[Format] section if filetype is tab or csv"
+    $count++;
+    if ($file_type =~ /(csv)|(tab)/) { 
+
+        $count = 0;
+        while (<INPUT_FILE>)
+        {
+           $count++;
+           $line = $_;
+           chomp $line;
+           my $data_val = (split('=',$line))[1];
+           switch ($count) {
+               case 2 { $template_name = $data_val; }
+               case 3 { @field_names = split($file_del,$data_val); }
+               else {;}
+           }
+           if ($count == 3) { last; }
+        }
+    }
+   
      print "Reading data...\n\n";
+    $line = <INPUT_FILE>; # skip "[Data]"
      
      # loop through data rows and add all data fields to an array of hashes
      while (<INPUT_FILE>)
      {
-        my $line = $_;
+        $line = $_;
          chomp $line;
          
          my @tmp_data_ary = split($file_del, $line);
@@ -238,14 +266,17 @@ sub import_data
  # ---------------------------------------------------------------------------
  sub show_input
  {
-    print "[Source]\n";
+    print "\n[Source]\n";
      foreach my $key (keys %source) {
          print "$key: $source{$key}\n";
      }
      print "\n";
-    print "[Template]\n$template_name\n\n";
-    print "[Fields]\n" . join(', ',@field_names) . "\n\n";
-    
+
+    if ($file_type =~ /(csv)|(tab)/) { 
+        print "[Template]\n$template_name\n\n";
+        print "[Fields]\n" . join(', ',@field_names) . "\n\n";
+    }
+        
      print "[Data]\n";
      foreach my $row (@field_data) {
          foreach my $key (keys %$row) {
@@ -256,12 +287,11 @@ sub show_input
      print "\n";
  }
  
-
-# loop through the hash and build annotation data and source xml doc
+# xml transformation for generic tab or CSV-templated data
+# (currently uses XML::Smart)
  # ---------------------------------------------------------------------------
-sub write_xml
+sub transform_generic
  {
-    print "Transforming data...\n\n";
      my $curr_node; # placeholder for general node cursor
      my $curr_prov_node; # placeholder for node cursor in provenance pages
      my $curr_annot_node; # placeholder for node cursor in annotation pages
@@ -311,7 +341,7 @@ sub write_xml
          $curr_annot_node = $curr_annot_node->{Template};    
        
          # set up next provenance page
-        my $next_page = { Title => "Annotation:$next_page_title_id/Provenance" };
+        $next_page = { Title => "Annotation:$next_page_title_id/Provenance" };
          push(@{$curr_node->{Page}}, $next_page);
  
          $curr_prov_node = $curr_node->{Page}(
@@ -374,6 +404,28 @@ sub write_xml
      # write out xml doc to a single ImportXML file
      print "Writing data to output file...\n\n";
      $xml->save($output_file);
+    $output_data = $xml->data;
+}
+
+# xml transformation for GAF data
+# (currently uses XML::DOM)
+# ---------------------------------------------------------------------------
+sub transform_gaf
+{
+    $output_data = "hullo, gaf";
+}
+
+
+# loop through the hash and build annotation data and source xml doc
+# ---------------------------------------------------------------------------
+sub write_xml
+{
+    print "Transforming " . uc($file_type) . " data to SMW/SF XML...\n\n";
+
+    switch ($file_type) {
+        case ('csv' || 'tab') { transform_generic(); }
+        case 'gaf' { transform_gaf(); }
+    }
  }
  
  
@@ -382,7 +434,6 @@ sub write_xml
  sub show_output
  {
      print "[XML]\n";
-    $output_data = $xml->data;
      print $output_data;
      print "\n";
  }
@@ -394,7 +445,7 @@ sub show_output
  init;
  import_data;
  if ($verbose) { show_input; }
-write_xml;
+write_xml();
  if ($verbose) { show_output; }
  
  exit;
author	preecej <preecej@localhost>
	Mon, 25 Jul 2011 23:46:47 +0000 (23:46 +0000)
committer	preecej <preecej@localhost>
	Mon, 25 Jul 2011 23:46:47 +0000 (23:46 +0000)