Hello!

To see the file structure, click on "tree".

Note that updates take place every 10 minutes, commits may not be seen immediately.
Completed GAF import transformation script for scale testing.
authorpreecej <preecej@localhost>
Fri, 29 Jul 2011 22:36:33 +0000 (22:36 +0000)
committerpreecej <preecej@localhost>
Fri, 29 Jul 2011 22:36:33 +0000 (22:36 +0000)
svn path=/; revision=130

preecej/semantic_wiki/paw_TransformForImport.pl

index cdbf52083c7739ef7689e0caa0caacabfe4f4cd2..f1f3dd77397450fda550e9b3bb17b1b206a64e52 100644 (file)
@@ -118,7 +118,6 @@ my $debug = 0; # debugging switch
 # universal 
 my %source;
 my $output_data;
-my %aspects;
 
 # tab, csv
 my $template_name;
@@ -127,6 +126,8 @@ my @field_data;
 my $xml; # represents the xml doc
 
 # gaf
+my %aspects;
+my $ontology_name;
 my %annotation_universals; # holds values assumed not to vary across the file
 my %annotations; # keyed on Gene Symbol
 
@@ -319,7 +320,7 @@ sub import_gaf
                 . Dumper(\%annotation_universals) . "\n\n"; }
     
             # identify what kind of ontology term we're dealing with and set our 
-            # aspect names accordingly
+            # aspects and ontology name accordingly
             switch ((split(':',$curr_line_hash{"term_id"}))[0]) {
                 case 'GO' { # Gene Ontology
                     %aspects = (
@@ -327,12 +328,14 @@ sub import_gaf
                         C => "Cellular Component",
                         F => "Molecular Function"
                     );
+                    $ontology_name = "Gene Ontology";
                 }
                 case 'PO' { # Plant Ontology
                     %aspects = (
                         A => "Plant Anatomy",
                         G => "Plant Growth and Development Stage"
                     );
+                    $ontology_name = "Plant Ontology";
                 }
             }
         }
@@ -607,6 +610,29 @@ sub transform_generic
 # ---------------------------------------------------------------------------
 sub transform_gaf
 {
+    # define temaplates and their fields for Provenance-generation
+    my $template_field_map = {
+        Annotation => [
+            'Species Name',
+            'Species ID',
+            'Gene Symbol',
+            'Gene Name',
+            'Gene Locus',
+            'Gene Type',
+            'Chromosome',
+            'Has Phenotype'
+            ],
+        Gene_Synonym_Repeater => [
+            'Gene Synonym'
+            ],
+        Ontological_Reference_Repeater => [
+            'Ontology',
+            'Term ID',
+            'Aspect',
+            'Evidence Code'
+            ]
+        };
+    
     # create new xml doc, write to string
     my $writer = new XML::Writer(
         OUTPUT      => \$output_data,
@@ -631,9 +657,7 @@ sub transform_gaf
                 for ($element) {
                     @pretty_elements = /[A-Z](?:[A-Z]+|[a-z]*)(?=$|[A-Z])/g;
                 }
-                $writer->startTag("Field",Name=>"@pretty_elements");
-                    $writer->characters($source{$element});
-                $writer->endTag("Field");
+                $writer->dataElement("Field", $source{$element}, Name=>"@pretty_elements");
             }
 
             $writer->endTag("Template");
@@ -644,50 +668,75 @@ sub transform_gaf
         
         foreach my $annotation (keys %annotations)
         {
-            #my %curr_annot = \$annotation; # ?
-
             # create annotation page
             $writer->startTag("Page",Title=>"Annotation:$annot_title_count");
                 $writer->startTag("Template",Name=>"Annotation");
-
-                $writer->startTag("Field",Name=>"Species Name");
-                $writer->characters($annotation_universals{'Species Name'});
-                $writer->endTag("Field");
-                $writer->startTag("Field",Name=>"Species ID");
-                    $writer->characters($annotation_universals{'Species ID'});
-                $writer->endTag("Field");
-                $writer->startTag("Field",Name=>"Gene Symbol");
-                    $writer->characters($annotation);
-                $writer->endTag("Field");
-                # $writer->startTag("Field",Name=>"Gene Name");
-                #     $writer->characters();
-                # $writer->endTag("Field");
-                # $writer->startTag("Field",Name=>"Gene Locus");
-                #     $writer->characters();
-                # $writer->endTag("Field");
-                $writer->startTag("Field",Name=>"Gene Type");
-                    $writer->characters($annotation_universals{'Gene Type'});
-                $writer->endTag("Field");
-                # $writer->startTag("Field",Name=>"Chromosome");
-                #     $writer->characters();
-                # $writer->endTag("Field");
-                $writer->startTag("Field",Name=>"Has Phenotype");
-                    $writer->characters("No");
-                $writer->endTag("Field");
-
+                    $writer->dataElement("Field", $annotation_universals{"Species Name"}, Name=>"Species Name");
+                    $writer->dataElement("Field", $annotation_universals{"Species ID"}, Name=>"Species ID");
+                    $writer->dataElement("Field", $annotation, Name=>"Gene Symbol");
+                    $writer->dataElement("Field", $annotations{$annotation}{"Gene Name"}, Name=>"Gene Name");
+                    $writer->dataElement("Field", $annotations{$annotation}{"Gene Locus"}, Name=>"Gene Locus");
+                    $writer->dataElement("Field", ucfirst($annotation_universals{"Gene Type"}), Name=>"Gene Type");
+                    $writer->dataElement("Field", $annotations{$annotation}{"Chromosome"}, Name=>"Chromosome");
+                    $writer->dataElement("Field", "No", Name=>"Has Phenotype");
                 $writer->endTag("Template");
             $writer->endTag("Page");
             
-            # start provenance page (separate node and attach at the end?)
+            # create gene synonyms page
+            if (length($annotations{$annotation}{'Gene Synonyms'}) > 0)
+            {
+                $writer->startTag("Page",Title=>"Annotation:$annot_title_count/Gene Synonyms");
+                    $writer->startTag("Template",Name=>"Gene_Synonyms");
+                        $writer->dataElement("Field", "Annotation:$annot_title_count", Name=>"Annotation Page");
+                    $writer->endTag("Template");
+    
+                    foreach (split('\|',$annotations{$annotation}{'Gene Synonyms'}))
+                    {
+                        $writer->startTag("Template",Name=>"Gene_Synonym_Repeater");
+                            $writer->dataElement("Field", $_, Name=>"Gene Synonym");    
+                        $writer->endTag("Template");
+                    }
+                $writer->endTag("Page");
+            }
+            
+            # create ont refs page
+            if (scalar(@{$annotations{$annotation}{"Ontological Reference"}}) > 0)
+            {
+                $writer->startTag("Page",Title=>"Annotation:$annot_title_count/Ontologies");
+                    $writer->startTag("Template",Name=>"Ontological_References");
+                        $writer->dataElement("Field", "Annotation:$annot_title_count", Name=>"Annotation Page");
+                    $writer->endTag("Template");
+    
+                    foreach (@{$annotations{$annotation}{"Ontological Reference"}})
+                    {
+                        $writer->startTag("Template",Name=>"Ontological_Reference_Repeater");
+                            $writer->dataElement("Field", $ontology_name, Name=>"Ontology");
+                            $writer->dataElement("Field", $$_{"Term ID"}, Name=>"Term ID");
+                            $writer->dataElement("Field", $$_{"Aspect"}, Name=>"Aspect");
+                            $writer->dataElement("Field", $$_{"Evidence Code"}, Name=>"Evidence Code");
+                        $writer->endTag("Template");
+                    }
+                $writer->endTag("Page");
+            }
             
-            # iterate synonyms 
-                # create gene synonyms page
-                # add to provenance
+            # create provenance page
+            $writer->startTag("Page",Title=>"Annotation:$annot_title_count/Provenance");
+                $writer->startTag("Template",Name=>"Provenance");
+                    $writer->dataElement("Field", "Annotation:$annot_title_count", Name=>"Annotation Page");
+                $writer->endTag("Template");
+                
+                    foreach (@{@$template_field_map{"Annotation"}})
+                    {
+                        $writer->startTag("Template",Name=>"Provenance_Repeater");
+                            $writer->dataElement("Field", $_, Name=>"Source Field");
+                            $writer->dataElement("Field", "Annotation", Name=>"Source Template");
+                            $writer->dataElement("Field", $annotations{$annotation}{"Gene Locus"}, Name=>"Source Accession ID");
+                            $writer->dataElement("Field", "Source:$SOURCE_TITLE_SEED", Name=>"Source");
+                        $writer->endTag("Template");
+                    }
+                
+            $writer->endTag("Page");
             
-            # iterate @%ont refs
-                # create ont ref
-                # add to provenance
-
             $annot_title_count++;
         }