From: preecej Date: Mon, 25 Jul 2011 23:46:47 +0000 (+0000) Subject: Began breaking out GAF from generic CSV, tab format. X-Git-Url: http://gitweb.planteome.org/?a=commitdiff_plain;h=5eb66b93d0a37c6b0c2aa2a5df03ffd241a09b4c;p=old-jaiswallab-svn%2F.git Began breaking out GAF from generic CSV, tab format. svn path=/; revision=124 --- diff --git a/preecej/semantic_wiki/paw_TransformForImport.pl b/preecej/semantic_wiki/paw_TransformForImport.pl index a6fbb62..92a4713 100644 --- a/preecej/semantic_wiki/paw_TransformForImport.pl +++ b/preecej/semantic_wiki/paw_TransformForImport.pl @@ -105,6 +105,7 @@ use XML::DOM; my %opts; # arg options my $file_type; # tab, csv, gaf +my $file_del; # data delimeter my $input_file; my $output_file; my $verbose = 0; # flag for verbose output @@ -157,9 +158,15 @@ sub init } } - # split data on either commas or tabs, if file type is generic - if ($file_type = "csv" || $file_type = "tab") { - my $file_del = ($file_type eq "csv") ? ',' : '\t'; + # split data on either commas or tabs + switch ($file_type) { + case "csv" { $file_del = ','; } + case "tab" { $file_del = '\t'; } + case "gaf" { $file_del = '\t'; } + else { + die(uc($file_type) . " is not a valid file type. Please supply " + . "a tab, csv, or gaf file.\n"); + } } system "clear"; @@ -190,12 +197,14 @@ sub import_data # open file open(INPUT_FILE,$input_file) or die("Could not open input file."); + my $line; # all-purpose line counter + # read in the source data my $count = 0; while () { $count++; - my $line = $_; + $line = $_; chomp $line; my $data_val = (split('=',$line))[1]; switch ($count) { @@ -204,19 +213,38 @@ sub import_data case 4 { $source{'SourceVersion'} = $data_val; } case 5 { $source{'SourceURI'} = $data_val; } case 6 { $source{'SourceFile'} = $data_val; } - case 8 { $template_name = $data_val; } - case 9 { @field_names = split($file_del,$data_val); } else {;} } - if ($count == 10) { last; } + if ($count == 6) { last; } } + # read in "[Format] section if filetype is tab or csv" + $count++; + if ($file_type =~ /(csv)|(tab)/) { + + $count = 0; + while () + { + $count++; + $line = $_; + chomp $line; + my $data_val = (split('=',$line))[1]; + switch ($count) { + case 2 { $template_name = $data_val; } + case 3 { @field_names = split($file_del,$data_val); } + else {;} + } + if ($count == 3) { last; } + } + } + print "Reading data...\n\n"; + $line = ; # skip "[Data]" # loop through data rows and add all data fields to an array of hashes while () { - my $line = $_; + $line = $_; chomp $line; my @tmp_data_ary = split($file_del, $line); @@ -238,14 +266,17 @@ sub import_data # --------------------------------------------------------------------------- sub show_input { - print "[Source]\n"; + print "\n[Source]\n"; foreach my $key (keys %source) { print "$key: $source{$key}\n"; } print "\n"; - print "[Template]\n$template_name\n\n"; - print "[Fields]\n" . join(', ',@field_names) . "\n\n"; - + + if ($file_type =~ /(csv)|(tab)/) { + print "[Template]\n$template_name\n\n"; + print "[Fields]\n" . join(', ',@field_names) . "\n\n"; + } + print "[Data]\n"; foreach my $row (@field_data) { foreach my $key (keys %$row) { @@ -256,12 +287,11 @@ sub show_input print "\n"; } - -# loop through the hash and build annotation data and source xml doc +# xml transformation for generic tab or CSV-templated data +# (currently uses XML::Smart) # --------------------------------------------------------------------------- -sub write_xml +sub transform_generic { - print "Transforming data...\n\n"; my $curr_node; # placeholder for general node cursor my $curr_prov_node; # placeholder for node cursor in provenance pages my $curr_annot_node; # placeholder for node cursor in annotation pages @@ -311,7 +341,7 @@ sub write_xml $curr_annot_node = $curr_annot_node->{Template}; # set up next provenance page - my $next_page = { Title => "Annotation:$next_page_title_id/Provenance" }; + $next_page = { Title => "Annotation:$next_page_title_id/Provenance" }; push(@{$curr_node->{Page}}, $next_page); $curr_prov_node = $curr_node->{Page}( @@ -374,6 +404,28 @@ sub write_xml # write out xml doc to a single ImportXML file print "Writing data to output file...\n\n"; $xml->save($output_file); + $output_data = $xml->data; +} + +# xml transformation for GAF data +# (currently uses XML::DOM) +# --------------------------------------------------------------------------- +sub transform_gaf +{ + $output_data = "hullo, gaf"; +} + + +# loop through the hash and build annotation data and source xml doc +# --------------------------------------------------------------------------- +sub write_xml +{ + print "Transforming " . uc($file_type) . " data to SMW/SF XML...\n\n"; + + switch ($file_type) { + case ('csv' || 'tab') { transform_generic(); } + case 'gaf' { transform_gaf(); } + } } @@ -382,7 +434,6 @@ sub write_xml sub show_output { print "[XML]\n"; - $output_data = $xml->data; print $output_data; print "\n"; } @@ -394,7 +445,7 @@ sub show_output init; import_data; if ($verbose) { show_input; } -write_xml; +write_xml(); if ($verbose) { show_output; } exit;