From: preecej Date: Fri, 2 Sep 2011 21:51:09 +0000 (+0000) Subject: Added Reference Publication page generation; modified ontology X-Git-Url: http://gitweb.planteome.org/?a=commitdiff_plain;h=851fe8731a804697d66667dd69b6602a3d3242f6;p=old-jaiswallab-svn%2F.git Added Reference Publication page generation; modified ontology provenance format svn path=/; revision=174 --- diff --git a/preecej/semantic_wiki/paw_TransformForImport.pl b/preecej/semantic_wiki/paw_TransformForImport.pl index 32864e0..ce7e82c 100644 --- a/preecej/semantic_wiki/paw_TransformForImport.pl +++ b/preecej/semantic_wiki/paw_TransformForImport.pl @@ -140,8 +140,7 @@ my %annotations; # keyed on Gene Symbol # script my $SOURCE_TITLE_SEED = 1; my $ANNOT_TITLE_SEED = 1; - -my $species_name = "Arabidopsis thaliana"; # obviously temporary; need an NCBI lookup +my $PUB_TITLE_SEED = 1; $Data::Dumper::Pad = "... "; @@ -407,21 +406,25 @@ sub import_gaf %annotation_universals = ( "Source" => $curr_line_hash{"db"}, # currently not in use "Gene Type" => $curr_line_hash{"db_object_type"}, - "Species ID" => "NCBI:" . (split(':',$curr_line_hash{"taxon"}))[1], - "Species Name" => $species_name # TODO: get this from NCBI + "Species ID" => "NCBI:" . (split(':',$curr_line_hash{"taxon"}))[1]#, + #"Species Name" => $species_name # TODO: get this from NCBI ); - if ($debug) { print "...\n" - . Dumper(\%annotation_universals) . "\n\n"; } - # set species-specific values switch ($annotation_universals{"Species ID"}) { - case "NCBI:3702" { $locus_finder_expr = "/[Aa][Tt].[Gg]/"; } + case "NCBI:3702" { + $locus_finder_expr = "/[Aa][Tt].[Gg]/"; + $annotation_universals{"Species Name"} = "Arabidopsis thaliana"; # temp; need an NCBI lookup + } else { die($annotation_universals{"Species ID"} . " is not a valid NCBI taxon ID.\n"); } } + + if ($debug) { print "...\n" + . Dumper(\%annotation_universals) . "\n\n"; } + } # set the ontology for the current line @@ -733,6 +736,8 @@ sub transform_gaf ] }; + my %ref_pubs; # list of non-duplicate publication references + # create new xml doc, write to string my $writer = new XML::Writer( OUTPUT => \$output_data, @@ -809,8 +814,19 @@ sub transform_gaf foreach my $ont_term (keys %{$annotations{$annotation}{"Ontological References"}}) { - - # TODO: gather PMID's for separate Ref Publication page creation (avoid dupes) + # gather PMID's for separate Ref Publication page creation (avoid dupes) + if (scalar keys (%{$annotations{$annotation}{"Ontological References"}{$ont_term}{"Publications"}}) > 0) { + foreach my $pub_term (keys %{$annotations{$annotation}{"Ontological References"}{$ont_term}{"Publications"}}) + { + if (!exists $ref_pubs{$pub_term}) { + $ref_pubs{$pub_term} = { "Annotation:$annot_title_count" => "" }; + } + else + { + $ref_pubs{$pub_term}{"Annotation:$annot_title_count"} = ""; + } + } + } $writer->startTag("Template",Name=>"$ontology_info{$curr_ontology_type}{name} Reference Repeater"); $writer->dataElement("Field", $ontology_info{$curr_ontology_type}{"name"}, Name=>"Ontology"); @@ -868,13 +884,10 @@ sub transform_gaf # items on the Ontologies subpage if (scalar keys (%{$annotations{$annotation}{"Ontological References"}}) > 0) { - my $ont_count = 0; foreach my $ont_term (keys %{$annotations{$annotation}{"Ontological References"}}) { - $ont_count++; - $writer->startTag("Template",Name=>"Provenance_Repeater"); - $writer->dataElement("Field", "Ontologies#$ont_count", Name=>"Source Field or Object"); + $writer->dataElement("Field", $ont_term, Name=>"Source Field or Object"); $writer->dataElement("Field", "Ontological References", Name=>"Source Category"); $writer->dataElement("Field", $annotations{$annotation}{"Accession ID"}, Name=>"Source Accession ID"); $writer->dataElement("Field", "Source:$SOURCE_TITLE_SEED", Name=>"Source"); @@ -887,6 +900,32 @@ sub transform_gaf $annot_title_count++; } + + if ($debug) { print "...\n" + . Dumper(\%ref_pubs) . "\n\n"; } + + # create Reference Publication pages + my $pub_title_count = $PUB_TITLE_SEED; + + if (scalar keys (%ref_pubs) > 0) { + + foreach my $ref_pub (keys %ref_pubs) + { + $writer->startTag("Page",Title=>"Publication:$pub_title_count"); + $writer->startTag("Template",Name=>"Reference_Publication"); + $writer->dataElement("Field", $ref_pub, Name=>"Publication ID"); + my @annot_refs; # holds com-del string of annotation page references + foreach my $annot_ref (keys %{$ref_pubs{$ref_pub}}) + { + push @annot_refs, $annot_ref; + } + $writer->dataElement("Field", join(',',@annot_refs), Name=>"Annotation References"); + $writer->endTag("Template"); + $writer->endTag("Page"); + + $pub_title_count++; + } + } # close doc $writer->endTag("Pages");