# universal
my %source;
my $output_data;
-my %aspects;
# tab, csv
my $template_name;
my $xml; # represents the xml doc
# gaf
+my %aspects;
+my $ontology_name;
my %annotation_universals; # holds values assumed not to vary across the file
my %annotations; # keyed on Gene Symbol
. Dumper(\%annotation_universals) . "\n\n"; }
# identify what kind of ontology term we're dealing with and set our
- # aspect names accordingly
+ # aspects and ontology name accordingly
switch ((split(':',$curr_line_hash{"term_id"}))[0]) {
case 'GO' { # Gene Ontology
%aspects = (
C => "Cellular Component",
F => "Molecular Function"
);
+ $ontology_name = "Gene Ontology";
}
case 'PO' { # Plant Ontology
%aspects = (
A => "Plant Anatomy",
G => "Plant Growth and Development Stage"
);
+ $ontology_name = "Plant Ontology";
}
}
}
# ---------------------------------------------------------------------------
sub transform_gaf
{
+ # define temaplates and their fields for Provenance-generation
+ my $template_field_map = {
+ Annotation => [
+ 'Species Name',
+ 'Species ID',
+ 'Gene Symbol',
+ 'Gene Name',
+ 'Gene Locus',
+ 'Gene Type',
+ 'Chromosome',
+ 'Has Phenotype'
+ ],
+ Gene_Synonym_Repeater => [
+ 'Gene Synonym'
+ ],
+ Ontological_Reference_Repeater => [
+ 'Ontology',
+ 'Term ID',
+ 'Aspect',
+ 'Evidence Code'
+ ]
+ };
+
# create new xml doc, write to string
my $writer = new XML::Writer(
OUTPUT => \$output_data,
for ($element) {
@pretty_elements = /[A-Z](?:[A-Z]+|[a-z]*)(?=$|[A-Z])/g;
}
- $writer->startTag("Field",Name=>"@pretty_elements");
- $writer->characters($source{$element});
- $writer->endTag("Field");
+ $writer->dataElement("Field", $source{$element}, Name=>"@pretty_elements");
}
$writer->endTag("Template");
foreach my $annotation (keys %annotations)
{
- #my %curr_annot = \$annotation; # ?
-
# create annotation page
$writer->startTag("Page",Title=>"Annotation:$annot_title_count");
$writer->startTag("Template",Name=>"Annotation");
-
- $writer->startTag("Field",Name=>"Species Name");
- $writer->characters($annotation_universals{'Species Name'});
- $writer->endTag("Field");
- $writer->startTag("Field",Name=>"Species ID");
- $writer->characters($annotation_universals{'Species ID'});
- $writer->endTag("Field");
- $writer->startTag("Field",Name=>"Gene Symbol");
- $writer->characters($annotation);
- $writer->endTag("Field");
- # $writer->startTag("Field",Name=>"Gene Name");
- # $writer->characters();
- # $writer->endTag("Field");
- # $writer->startTag("Field",Name=>"Gene Locus");
- # $writer->characters();
- # $writer->endTag("Field");
- $writer->startTag("Field",Name=>"Gene Type");
- $writer->characters($annotation_universals{'Gene Type'});
- $writer->endTag("Field");
- # $writer->startTag("Field",Name=>"Chromosome");
- # $writer->characters();
- # $writer->endTag("Field");
- $writer->startTag("Field",Name=>"Has Phenotype");
- $writer->characters("No");
- $writer->endTag("Field");
-
+ $writer->dataElement("Field", $annotation_universals{"Species Name"}, Name=>"Species Name");
+ $writer->dataElement("Field", $annotation_universals{"Species ID"}, Name=>"Species ID");
+ $writer->dataElement("Field", $annotation, Name=>"Gene Symbol");
+ $writer->dataElement("Field", $annotations{$annotation}{"Gene Name"}, Name=>"Gene Name");
+ $writer->dataElement("Field", $annotations{$annotation}{"Gene Locus"}, Name=>"Gene Locus");
+ $writer->dataElement("Field", ucfirst($annotation_universals{"Gene Type"}), Name=>"Gene Type");
+ $writer->dataElement("Field", $annotations{$annotation}{"Chromosome"}, Name=>"Chromosome");
+ $writer->dataElement("Field", "No", Name=>"Has Phenotype");
$writer->endTag("Template");
$writer->endTag("Page");
- # start provenance page (separate node and attach at the end?)
+ # create gene synonyms page
+ if (length($annotations{$annotation}{'Gene Synonyms'}) > 0)
+ {
+ $writer->startTag("Page",Title=>"Annotation:$annot_title_count/Gene Synonyms");
+ $writer->startTag("Template",Name=>"Gene_Synonyms");
+ $writer->dataElement("Field", "Annotation:$annot_title_count", Name=>"Annotation Page");
+ $writer->endTag("Template");
+
+ foreach (split('\|',$annotations{$annotation}{'Gene Synonyms'}))
+ {
+ $writer->startTag("Template",Name=>"Gene_Synonym_Repeater");
+ $writer->dataElement("Field", $_, Name=>"Gene Synonym");
+ $writer->endTag("Template");
+ }
+ $writer->endTag("Page");
+ }
+
+ # create ont refs page
+ if (scalar(@{$annotations{$annotation}{"Ontological Reference"}}) > 0)
+ {
+ $writer->startTag("Page",Title=>"Annotation:$annot_title_count/Ontologies");
+ $writer->startTag("Template",Name=>"Ontological_References");
+ $writer->dataElement("Field", "Annotation:$annot_title_count", Name=>"Annotation Page");
+ $writer->endTag("Template");
+
+ foreach (@{$annotations{$annotation}{"Ontological Reference"}})
+ {
+ $writer->startTag("Template",Name=>"Ontological_Reference_Repeater");
+ $writer->dataElement("Field", $ontology_name, Name=>"Ontology");
+ $writer->dataElement("Field", $$_{"Term ID"}, Name=>"Term ID");
+ $writer->dataElement("Field", $$_{"Aspect"}, Name=>"Aspect");
+ $writer->dataElement("Field", $$_{"Evidence Code"}, Name=>"Evidence Code");
+ $writer->endTag("Template");
+ }
+ $writer->endTag("Page");
+ }
- # iterate synonyms
- # create gene synonyms page
- # add to provenance
+ # create provenance page
+ $writer->startTag("Page",Title=>"Annotation:$annot_title_count/Provenance");
+ $writer->startTag("Template",Name=>"Provenance");
+ $writer->dataElement("Field", "Annotation:$annot_title_count", Name=>"Annotation Page");
+ $writer->endTag("Template");
+
+ foreach (@{@$template_field_map{"Annotation"}})
+ {
+ $writer->startTag("Template",Name=>"Provenance_Repeater");
+ $writer->dataElement("Field", $_, Name=>"Source Field");
+ $writer->dataElement("Field", "Annotation", Name=>"Source Template");
+ $writer->dataElement("Field", $annotations{$annotation}{"Gene Locus"}, Name=>"Source Accession ID");
+ $writer->dataElement("Field", "Source:$SOURCE_TITLE_SEED", Name=>"Source");
+ $writer->endTag("Template");
+ }
+
+ $writer->endTag("Page");
- # iterate @%ont refs
- # create ont ref
- # add to provenance
-
$annot_title_count++;
}