+++ /dev/null
-#!/usr/bin/perl -w
-
-=head1 NAME
-
-Plant Semantics Import Transformation Script
-
-=head1 VERSION
-
-0.2
-
-=head1 DESCRIPTION
-
-Transform external gene annotation data into an XML document readable
-by the MediaWiki extension DataTransfer (Special:ImportXML) feature.
-Also generates appropriate provenance of data based on header of
-import file.
-
-=head1 USAGE
-
-PS_TransformForImport.pl -i INPUT_FILE -t TYPE -o OUTPUT_FILE
-
-=head1 OPTIONS
-
- -i Name of input CSV or tab-del file.
- -t Specifies input type of file ('csv' or 'tab')
- -o Name of output file.
- -v View verbose information
- -d View debugging information
-
-=head1 DEPENDENCIES
-
-Requires that the input files contain two headers: the first will hold
-the provenance information associated with the imported data, and the
-second will specify the template and field names for the annotation
-data.
-
-=head2 Provenance Header Format Example
-
- [Provenance]
- Source Date Time Stamp=Apr 2 2008
- Source Database=Oryzabase
- Source Version=rel. 10
- Source URI=http://www.shigen.nig.ac.jp/rice/oryzabase/
- Source File=http://www.shigen.nig.ac.jp/rice/oryzabase/genes/...
-
-=head2 Data Header Format Example (field separator may also be a tab)
-
- [Format]
- Template=Gene
- Fields=Species Name,Gene Symbol,Gene Name,Chromosome,Gene Type,Accession ID
- [Data]
- val1,val2,val3,...
- "
- "
- "
-
- NOTE: One of the fields MUST be named "Accession ID" for the
- tracking of provenance.
-
-=head1 AUTHOR
-
-Justin Preece
- Faculty Research Assistant, Bioinformatics
- Jaiswal Lab, Botany & Plant Pathology
- Oregon State University
- L<mailto:preecej@science.oregonstate.edu>
-
-=cut
-
-# ---------------------------------------------------------------------------
-# modules
-# ---------------------------------------------------------------------------
-
-# general
-use strict;
-use Cwd;
-use Switch;
-use Getopt::Std;
-
-# specific
-use XML::Smart;
-
-# ---------------------------------------------------------------------------
-# declarations
-# ---------------------------------------------------------------------------
-
-my %opts; # arg options
-my $file_type; # tab or csv
-my $file_del;
-my $verbose = 0; # flag for verbose output
-my $debug = 0; # debugging switch
-
-# set paths for data files
-my $path = getcwd() . "/";
-my $input_file = $path;
-my $output_file = $path;
-
-# data storage
-my %provenance;
-my $template_name;
-my @field_names;
-my @field_data;
-my $xml;
-my $output_data;
-
-my $prov_title_seed = 3;
-my $annot_title_seed = 1563;
-
-# ---------------------------------------------------------------------------
-# functions
-# ---------------------------------------------------------------------------
-
-
-# ---------------------------------------------------------------------------
-sub init
-{
- # read and set options
- getopts('i:t:o:vd', \%opts);
-
- foreach my $key (keys %opts) {
- my $value = $opts{$key};
- switch ($key) {
- case "i" { $input_file = $input_file . $value; }
- case "t" { $file_type = $value; }
- case "o" { $output_file = $output_file . $value; }
- case "v" { $verbose = 1; }
- case "d" { $debug = 1; }
- }
- }
-
- # split data on either commas or tabs, dependent on format
- $file_del = ($file_type eq "csv") ? ',' : '\t';
-
- system "clear";
- print "\n"
- . "------------------------------------------------------------\n"
- . "------- Plant Semantics Import Transformation Script -------\n"
- . "------------------------------------------------------------\n"
- . "\n"
- . "Input File: $input_file\n"
- . "Designated input file type: $file_type\n"
- . "Output File: $output_file\n"
- . "Running in verbose mode? " . ($verbose ? "Yes" : "No") . "\n"
- . "Running in debug mode? " . ($verbose ? "Yes" : "No") . "\n"
- . "\n"
- . "------------------------------------------------------------\n"
- . "------------------------------------------------------------\n"
- . "------------------------------------------------------------\n"
- . "\n";
-}
-
-
-# read, parse, and store provenance, template, and annotations
-# ---------------------------------------------------------------------------
-sub import_data
-{
- print "Opening input file and reading header info...\n\n";
-
- # open file
- open(INPUT_FILE,$input_file) or die("Could not open input file.");
-
- # read in the provenance data
- my $count = 0;
- while (<INPUT_FILE>)
- {
- $count++;
- my $line = $_;
- chomp $line;
- my $data_val = (split('=',$line))[1];
- switch ($count) {
- case 2 { $provenance{'SourceDateTimeStamp'} = $data_val; }
- case 3 { $provenance{'SourceDatabase'} = $data_val; }
- case 4 { $provenance{'SourceVersion'} = $data_val; }
- case 5 { $provenance{'SourceURI'} = $data_val; }
- case 6 { $provenance{'SourceFile'} = $data_val; }
- case 8 { $template_name = $data_val; }
- case 9 { @field_names = split($file_del,$data_val); }
- else {;}
- }
- if ($count == 10) { last; }
- }
-
- print "Reading data...\n\n";
-
- # loop through data rows and add all data fields to an array of hashes
- while (<INPUT_FILE>)
- {
- my $line = $_;
- chomp $line;
-
- my @tmp_data_ary = split($file_del, $line);
- my %tmp_hash;
-
- if ($debug) { print join(',',@tmp_data_ary) . "\n"; }
-
- for (my $i=0; $i<scalar(@field_names); $i++) {
- if ($debug) { print "$i: $field_names[$i]: $tmp_data_ary[$i]\n"; }
- $tmp_hash{$field_names[$i]} = $tmp_data_ary[$i];
- }
- push @field_data, \%tmp_hash;
- }
- close INPUT_FILE;
-}
-
-
-# spit out the data to make sure you've read in the files correctly
-# ---------------------------------------------------------------------------
-sub show_input
-{
- print "[Provenance]\n";
- foreach my $key (keys %provenance) {
- print "$key: $provenance{$key}\n";
- }
- print "\n";
- print "[Template]\n$template_name\n\n";
- print "[Fields]\n" . join(', ',@field_names) . "\n\n";
-
- print "[Data]\n";
- foreach my $row (@field_data) {
- foreach my $key (keys %$row) {
- print "$key => " . $row->{$key} . "\n";
- }
- print "\n";
- }
- print "\n";
-}
-
-
-# loop through the hash and build annotation data and provenance xml doc
-# ---------------------------------------------------------------------------
-sub write_xml
-{
- print "Transforming data...\n\n";
- my $curr_node; # placeholder for general node cursor
- my $curr_prov_node; # placeholder for node cursor in provenance pages
- my $curr_annot_node; # placeholder for node cursor in annotation pages
- my $curr_accession_id; # holds each rows accession id for provenance marking
-
-
- $xml = new XML::Smart($output_data, 'XML::Smart::Parser');
-
- # set root element, provenance page and elements
- # (temp set of page title until moved to import extension)
- $xml->{Pages}{Page} = {Title => "PROV:$prov_title_seed"};
-
- $curr_node = $xml->{Pages}{Page};
- $curr_node->{Template} = {Name => 'Provenance'};
- $curr_node = $curr_node->{Template};
-
- $curr_node->{Field}[0] = {Name => 'Source Data Time Stamp'};
- $curr_node->{Field}[0]->content(0,$provenance{'SourceDateTimeStamp'});
- $curr_node->{Field}[1] = {Name => 'Source Database'};
- $curr_node->{Field}[1]->content(0,$provenance{'SourceDatabase'});
- $curr_node->{Field}[2] = {Name => 'Source Version'};
- $curr_node->{Field}[2]->content(0,$provenance{'SourceVersion'});
- $curr_node->{Field}[3] = {Name => 'Source URI'};
- $curr_node->{Field}[3]->content(0,$provenance{'SourceURI'});
- $curr_node->{Field}[4] = {Name => 'Source File'};
- $curr_node->{Field}[4]->content(0,$provenance{'SourceFile'});
-
- $curr_node = $curr_node->back->back; # return to <Pages> node
-
- if ($debug) { print "Current node: " . $curr_node->path . "\n"; }
-
- my $next_page_title_id = $annot_title_seed;
-
- # iterate through the data
- foreach my $row (@field_data) {
-
- # set up next annotation page
- my $next_page = { Title => "PS:$next_page_title_id" };
- push(@{$curr_node->{Page}}, $next_page);
-
- $curr_annot_node = $curr_node->{Page}(
- "Title","eq","PS:$next_page_title_id");
-
- if ($debug) { print "Curr annot node: ".$curr_annot_node->path."\n";}
-
- $curr_annot_node->{Template} = {Name => "$template_name"};
- $curr_annot_node = $curr_annot_node->{Template};
-
- # set up next provenance page
- my $next_page = { Title => "PS:$next_page_title_id/PROV" };
- push(@{$curr_node->{Page}}, $next_page);
-
- $curr_prov_node = $curr_node->{Page}(
- "Title","eq","PS:$next_page_title_id/PROV");
-
- if ($debug) {print "Curr prov node: " . $curr_prov_node->path . "\n"; }
-
- $curr_prov_node->{Template} = {Name => 'Provenance_Reference_Data'};
- $curr_prov_node = $curr_prov_node->{Template};
- $curr_prov_node->{Field} = {Name => 'Annotation Page'};
- $curr_prov_node->{Field}->content(0,"PS:$next_page_title_id");
- $curr_prov_node = $curr_prov_node->back;
-
- my $field_ct = 0; # counter for field position in pages
-
- # grab the Accession ID for the current row of data
- foreach my $key (keys %$row) {
- if ($key eq "Accession ID") {
- $curr_accession_id = $row->{$key};
- if ($debug) {
- print "* Found Accession ID: $curr_accession_id *\n";
- }
- }
- }
- if (!(defined $curr_accession_id)) {
- die "Error: No Accession ID available. Ending program.\n";
- }
-
- # iterate through the annotation data and assign to <Field> elements
- foreach my $key (keys %$row) {
- if ($debug) { print "$key => " . $row->{$key} . "\n"; }
-
- # build the annotation page
- $curr_annot_node->{Field}[$field_ct] = {Name => $key};
- $curr_annot_node->{Field}[$field_ct]->content(0,$row->{$key});
- $field_ct++;
-
- # add a corresponding template to the annotation provenance page
- my $next_prov_node = {Name => 'Provenance_Reference_Data_Repeater'};
- push(@{$curr_prov_node->{Template}}, $next_prov_node);
-
- # grab the last template you added
- $curr_prov_node = @{$curr_prov_node->{Template}}[-1];
-
- # assign the relevant provenance field data
- $curr_prov_node->{Field}[0] = {Name => 'Provenance Page'};
- $curr_prov_node->{Field}[0]->content(0,"PROV:$prov_title_seed");
- $curr_prov_node->{Field}[1] = {Name => 'Accession ID'};
- $curr_prov_node->{Field}[1]->content(0,$curr_accession_id);
- $curr_prov_node->{Field}[2] = {Name => 'Source Field'};
- $curr_prov_node->{Field}[2]->content(0,$key);
- $curr_prov_node->{Field}[3] = {Name => 'Source Template'};
- $curr_prov_node->{Field}[3]->content(0,$template_name);
-
- $curr_prov_node = $curr_prov_node->back;
- }
- $next_page_title_id++;
- }
-
- # write out xml doc to a single ImportXML file
- print "Writing data to output file...\n\n";
- $xml->save($output_file);
-}
-
-
-# print the transformed data (as xml)
-# ---------------------------------------------------------------------------
-sub show_output
-{
- print "[XML]\n";
- $output_data = $xml->data;
- print $output_data;
- print "\n";
-}
-
-# ---------------------------------------------------------------------------
-# main
-# ---------------------------------------------------------------------------
-
-init;
-import_data;
-if ($verbose) { show_input; }
-write_xml;
-if ($verbose) { show_output; }
-
-exit;
-
{{{for template|Annotation}}}
{| class="formtable"
-! Accession ID:
-| {{{field|Accession ID}}}
-|-
! Species Name:
| {{{field|Species Name}}}
|-
| {{{field|Species ID}}}
|-
! Gene Symbol:
-| {{#if:
- {{#ask:[[Category:Provenance]][[Is Associated With Annotation::~{{PAGENAME}}/Provenance]]
+| {{#if: {{#ask: [[Is Provenance::~Annotation:1/Provenance*]]
[[Is Associated With Field::Gene Symbol]]
[[Is Associated With Template::Annotation]]
- |?is_Provenance
- }}
- |{{{field|Gene Symbol|restricted}}}
- |{{{field|Gene Symbol}}}
- }}
+ |?has_Provenance
+ }}
+ |{{{field|Gene Symbol|restricted}}}
+ |{{{field|Gene Symbol}}}
+ }}
|-
! Gene Name:
| {{{field|Gene Name}}}
|-
! Gene Synonyms:
-| synonym code here...
+| {{#if: {{#ask:[[Category:Gene_Synonyms]][[Is Associated With Annotation::{{PAGENAME}} ]] }}
+ | {{#ask:[[Is Gene Synonym Pair::~{{PAGENAME}}/Gene_Synonyms]]
+ | ?is_Gene_Synonym
+ | headers=hide
+ | mainlabel=-
+ | format=list
+ }} {{#formlink:form=Gene_Synonyms|link text=Edit|link type=button|target={{PAGENAME}}/Gene_Synonyms }}
+ | {{#formlink:form=Gene_Synonyms|link text=Add Synonyms|link type=button|query string=Gene_Synonyms[Annotation Page]={{PAGENAME}}&super_page={{PAGENAME}} }}
+}}
|-
! Gene Type:
| {{{field|Gene Type}}}
| {{{field|EC Numbers|list|delimiter=,}}}
|-
! Chromosome:
-| {{#if:
- {{#ask:[[Category:Provenance]][[Is Associated With Annotation::~{{PAGENAME}}/Provenance]]
+| {{#if: {{#ask: [[Is Provenance::~Annotation:1/Provenance*]]
[[Is Associated With Field::Chromosome]]
[[Is Associated With Template::Annotation]]
- |?is_Provenance
- }}
- |{{{field|Chromosome|restricted}}}
- |{{{field|Chromosome}}}
- }}
+ |?has_Provenance
+ }}
+ |{{{field|Chromosome|restricted}}}
+ |{{{field|Chromosome}}}
+ }}
|-
! Has Phenotype:
| {{{field|Has Phenotype}}}
| Add, edit, or remove ontology references {{#formlink:form=Ontological_References|link text=here|target={{PAGENAME}}/Ontologies }}.
{{#ask:[[Is Ontological Reference::~{{PAGENAME}}/Ontologies*]]
| mainlabel=-
+ |? from_Ontology
|? has_Term_ID
|? has_Term_Name
|? has_Aspect
--- /dev/null
+#!/usr/bin/perl -w
+
+=head1 NAME
+
+Plant Semantics Import Transformation Script
+
+=head1 VERSION
+
+0.2
+
+=head1 DESCRIPTION
+
+Transform external gene annotation data into an XML document readable
+by the MediaWiki extension DataTransfer (Special:ImportXML) feature,
+including subpages. Also generates appropriate provenance of data based on header of
+import file.
+
+=head1 USAGE
+
+PS_TransformForImport.pl -i INPUT_FILE -t TYPE -o OUTPUT_FILE -v -d
+
+=head1 OPTIONS
+
+ -i Name of input CSV or tab-del file.
+ -t Specifies input type of file ('csv' or 'tab')
+ -o Name of output file.
+ -v View verbose information
+ -d View debugging information
+
+=head1 DEPENDENCIES
+
+Requires that the input file contain at least two headers: the first
+will hold the provenance information associated with the imported
+data, and all following headers will specify the templates and field
+names for the annotation data.
+
+The main templates pertain to Annotations, Sources, and Publications.
+Other templates are to be used for subsidiary reference data
+(i.e. gene synonyms, external references, ontology terms,
+and sequence data).
+
+=head2 Source Header Format Example
+
+ [Source]
+ Source Date Time Stamp=Apr 2 2008
+ Source Database=Oryzabase
+ Source Version=rel. 10
+ Source URI=http://www.shigen.nig.ac.jp/rice/oryzabase/
+ Source File=http://www.shigen.nig.ac.jp/rice/oryzabase/genes/...
+
+=head2 Data Header Format Examples
+
+ NOTE: The field separator may also be a tab instead, but must be
+ employed consistently across the entire file.
+
+ [Format]
+ Template=Annotation
+ Fields=Species Name,Gene Symbol,Gene Name,Chromosome,Gene Type,Accession ID
+ [Data]
+ val1,val2,val3,...
+ "
+ "
+ "
+ [Format]
+ Template=Publication
+ Fields=Publication,Publication ID,Publication Title,...,Accession ID
+ [Data]
+ val1,val2,val3,...
+ "
+ "
+ "
+
+ NOTE: One of the fields in each format MUST be named "Accession ID" for the
+ tracking of provenance.
+
+=head1 AUTHOR
+
+Justin Preece
+ Faculty Research Assistant, Bioinformatics
+ Jaiswal Lab, Botany & Plant Pathology
+ Oregon State University
+ L<mailto:preecej@science.oregonstate.edu>
+
+=cut
+
+# ---------------------------------------------------------------------------
+# modules
+# ---------------------------------------------------------------------------
+
+# general
+use strict;
+use Cwd;
+use Switch;
+use Getopt::Std;
+
+# specific
+use XML::Smart;
+
+# ---------------------------------------------------------------------------
+# declarations
+# ---------------------------------------------------------------------------
+
+my %opts; # arg options
+my $file_type; # tab or csv
+my $file_del;
+my $verbose = 0; # flag for verbose output
+my $debug = 0; # debugging switch
+
+# set paths for data files
+my $path = getcwd() . "/";
+my $input_file = $path;
+my $output_file = $path;
+
+# data storage
+my %source;
+my $template_name;
+my @field_names;
+my @field_data;
+my $xml;
+my $output_data;
+
+my $prov_title_seed = 2;
+my $annot_title_seed = 3;
+
+# ---------------------------------------------------------------------------
+# functions
+# ---------------------------------------------------------------------------
+
+
+# ---------------------------------------------------------------------------
+sub init
+{
+ # read and set options
+ getopts('i:t:o:vd', \%opts);
+
+ foreach my $key (keys %opts) {
+ my $value = $opts{$key};
+ switch ($key) {
+ case "i" { $input_file = $input_file . $value; }
+ case "t" { $file_type = $value; }
+ case "o" { $output_file = $output_file . $value; }
+ case "v" { $verbose = 1; }
+ case "d" { $debug = 1; }
+ }
+ }
+
+ # split data on either commas or tabs, dependent on format
+ $file_del = ($file_type eq "csv") ? ',' : '\t';
+
+ system "clear";
+ print "\n"
+ . "------------------------------------------------------------\n"
+ . "-- Planteome Annotation Wiki Import Transformation Script --\n"
+ . "------------------------------------------------------------\n"
+ . "\n"
+ . "Input File: $input_file\n"
+ . "Designated input file type: $file_type\n"
+ . "Output File: $output_file\n"
+ . "Running in verbose mode? " . ($verbose ? "Yes" : "No") . "\n"
+ . "Running in debug mode? " . ($verbose ? "Yes" : "No") . "\n"
+ . "\n"
+ . "------------------------------------------------------------\n"
+ . "------------------------------------------------------------\n"
+ . "------------------------------------------------------------\n"
+ . "\n";
+}
+
+
+# read, parse, and store source, template, and annotations
+# ---------------------------------------------------------------------------
+sub import_data
+{
+ print "Opening input file and reading header info...\n\n";
+
+ # open file
+ open(INPUT_FILE,$input_file) or die("Could not open input file.");
+
+ # read in the source data
+ my $count = 0;
+ while (<INPUT_FILE>)
+ {
+ $count++;
+ my $line = $_;
+ chomp $line;
+ my $data_val = (split('=',$line))[1];
+ switch ($count) {
+ case 2 { $source{'SourceDateStamp'} = $data_val; }
+ case 3 { $source{'SourceDatabase'} = $data_val; }
+ case 4 { $source{'SourceVersion'} = $data_val; }
+ case 5 { $source{'SourceURI'} = $data_val; }
+ case 6 { $source{'SourceFile'} = $data_val; }
+ case 8 { $template_name = $data_val; }
+ case 9 { @field_names = split($file_del,$data_val); }
+ else {;}
+ }
+ if ($count == 10) { last; }
+ }
+
+ print "Reading data...\n\n";
+
+ # loop through data rows and add all data fields to an array of hashes
+ while (<INPUT_FILE>)
+ {
+ my $line = $_;
+ chomp $line;
+
+ my @tmp_data_ary = split($file_del, $line);
+ my %tmp_hash;
+
+ if ($debug) { print join(',',@tmp_data_ary) . "\n"; }
+
+ for (my $i=0; $i<scalar(@field_names); $i++) {
+ if ($debug) { print "$i: $field_names[$i]: $tmp_data_ary[$i]\n"; }
+ $tmp_hash{$field_names[$i]} = $tmp_data_ary[$i];
+ }
+ push @field_data, \%tmp_hash;
+ }
+ close INPUT_FILE;
+}
+
+
+# spit out the data to make sure you've read in the files correctly
+# ---------------------------------------------------------------------------
+sub show_input
+{
+ print "[Source]\n";
+ foreach my $key (keys %source) {
+ print "$key: $source{$key}\n";
+ }
+ print "\n";
+ print "[Template]\n$template_name\n\n";
+ print "[Fields]\n" . join(', ',@field_names) . "\n\n";
+
+ print "[Data]\n";
+ foreach my $row (@field_data) {
+ foreach my $key (keys %$row) {
+ print "$key => " . $row->{$key} . "\n";
+ }
+ print "\n";
+ }
+ print "\n";
+}
+
+
+# loop through the hash and build annotation data and source xml doc
+# ---------------------------------------------------------------------------
+sub write_xml
+{
+ print "Transforming data...\n\n";
+ my $curr_node; # placeholder for general node cursor
+ my $curr_prov_node; # placeholder for node cursor in provenance pages
+ my $curr_annot_node; # placeholder for node cursor in annotation pages
+ my $curr_accession_id; # holds each rows accession id for provenance marking
+
+
+ $xml = new XML::Smart($output_data, 'XML::Smart::Parser');
+
+ # set root element, source page and elements
+ # (temp set of page title until moved to import extension)
+ $xml->{Pages}{Page} = {Title => "Source:$prov_title_seed"};
+
+ $curr_node = $xml->{Pages}{Page};
+ $curr_node->{Template} = {Name => 'Source'};
+ $curr_node = $curr_node->{Template};
+
+ $curr_node->{Field}[0] = {Name => 'Source Date Stamp'};
+ $curr_node->{Field}[0]->content(0,$source{'SourceDateStamp'});
+ $curr_node->{Field}[1] = {Name => 'Source Database'};
+ $curr_node->{Field}[1]->content(0,$source{'SourceDatabase'});
+ $curr_node->{Field}[2] = {Name => 'Source Version'};
+ $curr_node->{Field}[2]->content(0,$source{'SourceVersion'});
+ $curr_node->{Field}[3] = {Name => 'Source URI'};
+ $curr_node->{Field}[3]->content(0,$source{'SourceURI'});
+ $curr_node->{Field}[4] = {Name => 'Source File'};
+ $curr_node->{Field}[4]->content(0,$source{'SourceFile'});
+
+ $curr_node = $curr_node->back->back; # return to <Pages> node
+
+ if ($debug) { print "Current node: " . $curr_node->path . "\n"; }
+
+ my $next_page_title_id = $annot_title_seed;
+
+ # iterate through the data
+ foreach my $row (@field_data) {
+
+ # set up next annotation page
+ my $next_page = { Title => "Annotation:$next_page_title_id" };
+ push(@{$curr_node->{Page}}, $next_page);
+
+ $curr_annot_node = $curr_node->{Page}(
+ "Title","eq","Annotation:$next_page_title_id");
+
+ if ($debug) { print "Curr annot node: ".$curr_annot_node->path."\n";}
+
+ $curr_annot_node->{Template} = {Name => "$template_name"};
+ $curr_annot_node = $curr_annot_node->{Template};
+
+ # set up next provenance page
+ my $next_page = { Title => "Annotation:$next_page_title_id/Provenance" };
+ push(@{$curr_node->{Page}}, $next_page);
+
+ $curr_prov_node = $curr_node->{Page}(
+ "Title","eq","Annotation:$next_page_title_id/Provenance");
+
+ if ($debug) {print "Curr prov node: " . $curr_prov_node->path . "\n"; }
+
+ $curr_prov_node->{Template} = {Name => 'Provenance'};
+ $curr_prov_node = $curr_prov_node->{Template};
+ $curr_prov_node->{Field} = {Name => 'Annotation Page'};
+ $curr_prov_node->{Field}->content(0,"Annotation:$next_page_title_id");
+ $curr_prov_node = $curr_prov_node->back;
+
+ my $field_ct = 0; # counter for field position in pages
+
+ # grab the Accession ID for the current row of data
+ foreach my $key (keys %$row) {
+ if ($key eq "Accession ID") {
+ $curr_accession_id = $row->{$key};
+ if ($debug) {
+ print "* Found Accession ID: $curr_accession_id *\n";
+ }
+ }
+ }
+ if (!(defined $curr_accession_id)) {
+ die "Error: No Accession ID available. Ending program.\n";
+ }
+
+ # iterate through the annotation data and assign to <Field> elements
+ foreach my $key (keys %$row) {
+ if ($debug) { print "$key => " . $row->{$key} . "\n"; }
+
+ # build the annotation page
+ $curr_annot_node->{Field}[$field_ct] = {Name => $key};
+ $curr_annot_node->{Field}[$field_ct]->content(0,$row->{$key});
+ $field_ct++;
+
+ # add a corresponding template to the annotation provenance page
+ my $next_prov_node = {Name => 'Provenance_Repeater'};
+ push(@{$curr_prov_node->{Template}}, $next_prov_node);
+
+ # grab the last template you added
+ $curr_prov_node = @{$curr_prov_node->{Template}}[-1];
+
+ # assign the relevant provenance field data
+ $curr_prov_node->{Field}[0] = {Name => 'Source'};
+ $curr_prov_node->{Field}[0]->content(0,"Provenance:$prov_title_seed");
+ $curr_prov_node->{Field}[1] = {Name => 'Source Accession ID'};
+ $curr_prov_node->{Field}[1]->content(0,$curr_accession_id);
+ $curr_prov_node->{Field}[2] = {Name => 'Source Template'};
+ $curr_prov_node->{Field}[2]->content(0,$template_name);
+ $curr_prov_node->{Field}[3] = {Name => 'Source Field'};
+ $curr_prov_node->{Field}[3]->content(0,$key);
+
+ $curr_prov_node = $curr_prov_node->back;
+ }
+ $next_page_title_id++;
+ }
+
+ # write out xml doc to a single ImportXML file
+ print "Writing data to output file...\n\n";
+ $xml->save($output_file);
+}
+
+
+# print the transformed data (as xml)
+# ---------------------------------------------------------------------------
+sub show_output
+{
+ print "[XML]\n";
+ $output_data = $xml->data;
+ print $output_data;
+ print "\n";
+}
+
+# ---------------------------------------------------------------------------
+# main
+# ---------------------------------------------------------------------------
+
+init;
+import_data;
+if ($verbose) { show_input; }
+write_xml;
+if ($verbose) { show_output; }
+
+exit;
+
It should be called in the following format:
<pre>
{{Annotation
-|Accession ID=
|Species Name=
|Species ID=
|Gene Symbol=
= Annotation =
{| class="wikitable"
-! Accession ID
-| [[Has Accession ID::{{{Accession ID|}}}]]
-|-
! Species Name
| [[Has Species Name::{{{Species Name|}}}]]
|-
| [[Has Gene Name::{{{Gene Name|}}}]]
|-
! Gene Synonyms
-| coming soon...
+| {{#if: {{#ask:[[Category:Gene_Synonyms]][[Is Associated With Annotation::{{PAGENAME}} ]] }}
+ | {{#ask:[[Is Gene Synonym Pair::~{{PAGENAME}}/Gene_Synonyms]]
+ | ?is_Gene_Synonym
+ | headers=hide
+ | mainlabel=-
+ | format=list
+ }}
+ | None available
+}}
|-
! Gene Type
| [[Has Gene Type::{{{Gene Type|}}}]]
{{#if: {{#ask:[[Category:Ontological_References]][[Is Associated With Annotation::{{PAGENAME}} ]] }}
| {{#ask:[[Is Ontological Reference::~{{PAGENAME}}/Ontologies]]
| mainlabel=-
+ |? from_Ontology
|? has_Term_ID
|? has_Term_Name
|? has_Aspect