Hello!

To see the file structure, click on "tree".

Note that updates take place every 10 minutes, commits may not be seen immediately.
Added Reference Publication page generation; modified ontology
authorpreecej <preecej@localhost>
Fri, 2 Sep 2011 21:51:09 +0000 (21:51 +0000)
committerpreecej <preecej@localhost>
Fri, 2 Sep 2011 21:51:09 +0000 (21:51 +0000)
provenance format

svn path=/; revision=174

preecej/semantic_wiki/paw_TransformForImport.pl

index 32864e056eb3afef7f4d72ba3b4af251b83416c7..ce7e82c855c1f309497abc1c2d12c7cca3def30a 100644 (file)
@@ -140,8 +140,7 @@ my %annotations; # keyed on Gene Symbol
 # script
 my $SOURCE_TITLE_SEED = 1;
 my $ANNOT_TITLE_SEED = 1;
-
-my $species_name = "Arabidopsis thaliana"; # obviously temporary; need an NCBI lookup
+my $PUB_TITLE_SEED = 1;
 
 $Data::Dumper::Pad = "... "; 
 
@@ -407,21 +406,25 @@ sub import_gaf
             %annotation_universals = (
                 "Source"       => $curr_line_hash{"db"}, # currently not in use
                 "Gene Type"    => $curr_line_hash{"db_object_type"},
-                "Species ID"   => "NCBI:" . (split(':',$curr_line_hash{"taxon"}))[1],
-                "Species Name" => $species_name # TODO: get this from NCBI
+                "Species ID"   => "NCBI:" . (split(':',$curr_line_hash{"taxon"}))[1]#,
+                #"Species Name" => $species_name # TODO: get this from NCBI
                 );
             
-            if ($debug) { print "...<DEBUG: \%annotation_universals>\n"
-                . Dumper(\%annotation_universals) . "\n\n"; }
-
             # set species-specific values
             switch ($annotation_universals{"Species ID"}) {
-                case "NCBI:3702" { $locus_finder_expr = "/[Aa][Tt].[Gg]/"; }
+                case "NCBI:3702" {
+                    $locus_finder_expr = "/[Aa][Tt].[Gg]/";
+                    $annotation_universals{"Species Name"} = "Arabidopsis thaliana"; # temp; need an NCBI lookup
+                }
                 else {
                     die($annotation_universals{"Species ID"} 
                         . " is not a valid NCBI taxon ID.\n");
                 }
             }
+
+            if ($debug) { print "...<DEBUG: \%annotation_universals>\n"
+                . Dumper(\%annotation_universals) . "\n\n"; }
+
         }
         
         # set the ontology for the current line
@@ -733,6 +736,8 @@ sub transform_gaf
             ]
         };
     
+    my %ref_pubs; # list of non-duplicate publication references
+        
     # create new xml doc, write to string
     my $writer = new XML::Writer(
         OUTPUT      => \$output_data,
@@ -809,8 +814,19 @@ sub transform_gaf
     
                     foreach my $ont_term (keys %{$annotations{$annotation}{"Ontological References"}})
                     {
-
-                        # TODO: gather PMID's for separate Ref Publication page creation (avoid dupes)
+                        # gather PMID's for separate Ref Publication page creation (avoid dupes)
+                        if (scalar keys (%{$annotations{$annotation}{"Ontological References"}{$ont_term}{"Publications"}}) > 0) {
+                            foreach my $pub_term (keys %{$annotations{$annotation}{"Ontological References"}{$ont_term}{"Publications"}})
+                            {
+                                if (!exists $ref_pubs{$pub_term}) {
+                                    $ref_pubs{$pub_term} = { "Annotation:$annot_title_count" => "" };
+                                }
+                                 else
+                                 {
+                                     $ref_pubs{$pub_term}{"Annotation:$annot_title_count"} = "";
+                                 }
+                            }
+                        }
                         
                         $writer->startTag("Template",Name=>"$ontology_info{$curr_ontology_type}{name} Reference Repeater");
                             $writer->dataElement("Field", $ontology_info{$curr_ontology_type}{"name"}, Name=>"Ontology");
@@ -868,13 +884,10 @@ sub transform_gaf
                 # items on the Ontologies subpage
                 if (scalar keys (%{$annotations{$annotation}{"Ontological References"}}) > 0)
                 {
-                    my $ont_count = 0;
                     foreach my $ont_term (keys %{$annotations{$annotation}{"Ontological References"}})
                     {
-                        $ont_count++;
-                        
                         $writer->startTag("Template",Name=>"Provenance_Repeater");
-                            $writer->dataElement("Field", "Ontologies#$ont_count", Name=>"Source Field or Object");    
+                            $writer->dataElement("Field", $ont_term, Name=>"Source Field or Object");    
                             $writer->dataElement("Field", "Ontological References", Name=>"Source Category");
                             $writer->dataElement("Field", $annotations{$annotation}{"Accession ID"}, Name=>"Source Accession ID");
                             $writer->dataElement("Field", "Source:$SOURCE_TITLE_SEED", Name=>"Source");
@@ -887,6 +900,32 @@ sub transform_gaf
             
             $annot_title_count++;
         }
+
+    if ($debug) { print "...<DEBUG: \%ref_pubs>\n"
+        . Dumper(\%ref_pubs) . "\n\n"; }
+        
+    # create Reference Publication pages
+    my $pub_title_count = $PUB_TITLE_SEED;
+
+    if (scalar keys (%ref_pubs) > 0) {
+    
+        foreach my $ref_pub (keys %ref_pubs)
+        {
+            $writer->startTag("Page",Title=>"Publication:$pub_title_count");
+                $writer->startTag("Template",Name=>"Reference_Publication");
+                    $writer->dataElement("Field", $ref_pub, Name=>"Publication ID");
+                    my @annot_refs; # holds com-del string of annotation page references
+                    foreach my $annot_ref (keys %{$ref_pubs{$ref_pub}})
+                    {
+                        push @annot_refs, $annot_ref;
+                    }
+                    $writer->dataElement("Field", join(',',@annot_refs), Name=>"Annotation References");
+                $writer->endTag("Template");
+            $writer->endTag("Page");
+            
+            $pub_title_count++;
+        }
+    }
         
     # close doc
     $writer->endTag("Pages");