From 8ec483e8dd4c203ec3415ff5a29ae3c9486f2ffb Mon Sep 17 00:00:00 2001 From: elserj Date: Thu, 10 Nov 2016 00:18:45 +0000 Subject: [PATCH] Add aiso xml parsing script. Runs over zip files in current directory and adds them to floret/image_annotation msyql. svn path=/; revision=646 --- interactome_scripts/aiso_xml_parser.pl | 153 +++++++++++++++++++++++++ 1 file changed, 153 insertions(+) create mode 100755 interactome_scripts/aiso_xml_parser.pl diff --git a/interactome_scripts/aiso_xml_parser.pl b/interactome_scripts/aiso_xml_parser.pl new file mode 100755 index 0000000..858f770 --- /dev/null +++ b/interactome_scripts/aiso_xml_parser.pl @@ -0,0 +1,153 @@ +#!/usr/bin/perl + +use strict; +use warnings; + +if(-e "$ENV{HOME}/scripts/jaiswallab/interactome_scripts/find_species.pl") { + require "$ENV{HOME}/scripts/jaiswallab/interactome_scripts/find_species.pl"; +}elsif(-e "$ENV{HOME}/jaiswallab_svn/interactome_scripts/find_species.pl") { + require "$ENV{HOME}/jaiswallab_svn/interactome_scripts/find_species.pl"; +} + +use lib "$ENV{HOME}/scripts/jaiswallab/interactome_scripts"; + +use XML::LibXML; +use Archive::Zip; +use Bio::DB::Taxonomy; +use DbiFloret; + +# check for arguments and explain usage +# if ($#ARGV !=0) { + # print "usage: aiso_xml_parser.pl input_zip_file\n"; + # exit; +# } +my @files = glob("*.zip"); + +my $dbh = DbiFloret::dbconnect_local; + +for my $infile (@files){ + print "working on file $infile\n"; + + my $zip = Archive::Zip->new($infile); + + my $xml_content = $zip->contents('metadata.xml'); + + my $dom = XML::LibXML->load_xml(string => $xml_content); + + my $orig_filename = $dom->findvalue('//orig_image_filename'); + my $species = $dom->findvalue('//species/@name'); + my $ubio_id = $dom->findvalue('//species/@ubio_id'); + my $curator = $dom->findvalue('//curator_name'); + my $comment = $dom->findvalue('//comment'); + + # Get the ncbi taxon id + my $db = Bio::DB::Taxonomy->new(-source => 'entrez'); + my $ncbi_id = $db->get_taxonid($species); + + #insert the curator info into the curator table + # check if curator already in db + my $sth_check_curator = $dbh->prepare("select curator_id from curator where firstname = ?"); + $sth_check_curator->execute($curator); + + my $curator_id; + + $curator_id = $sth_check_curator->fetchrow_array and $sth_check_curator->finish; + + if(!defined($curator_id)) { + my $sth_insert_curator = $dbh->prepare("insert into curator (firstname) values (?)"); + $sth_insert_curator->execute($curator); + $curator_id = $sth_insert_curator->{mysql_insertid}; + } + + + + # insert the taxon info into the taxon table + my $sth_check_taxon = $dbh->prepare("select taxon_id from taxon where species_id = ?"); + $sth_check_taxon->execute($ncbi_id); + + my $taxon_id; + + $taxon_id = $sth_check_taxon->fetchrow_array and $sth_check_taxon->finish; + + if(!defined($taxon_id)) { + my $sth_insert_taxon = $dbh->prepare("insert into taxon (species_id, species_name) values (?,?)"); + $sth_insert_taxon->execute($ncbi_id, $species); + $taxon_id = $sth_insert_taxon->{mysql_insertid}; + } + + + #insert the image and data into annotated_image table + my @files = $zip->memberNames(); + my $image; + + for my $filename (@files) { + if($filename =~ /image/) { + $image = $zip->contents($filename); + } + } + + # open IMAGE, "$orig_filename" or die $!; + # + # my ($image, $buff); + # while(read IMAGE, $buff, 16384) { + # $image .= $buff; + # } + + my $sth_insert_image = $dbh->prepare("insert into annotated_image(curator_id, taxon_id, annotated_image) values (?,?,?)"); + $sth_insert_image->execute($curator_id,$taxon_id,$image); + my $annotated_image_id = $sth_insert_image->{mysql_insertid}; + + + #set the statement handler to inser the segment info + my $sth_insert_segment = $dbh->prepare("insert into segment(annotated_image_id,coordinates) values (?,GeomFromText(?))"); + + #annotated_term table + my $sth_insert_annotated_term = $dbh->prepare("insert into annotated_term(ontology_term_id, ontology_term_name) values (?,?)"); + my $sth_check_annotated_term = $dbh->prepare("select annotated_term_id from annotated_term where ontology_term_id = ?"); + + #annotated_term_image table + my $sth_insert_annotated_term_image = $dbh->prepare("insert into annotated_term_image(annotated_term_id, annotated_image_id) values (?,?)"); + + #annotated_term_segment table + my $sth_insert_annotated_term_segment = $dbh->prepare("insert into annotated_term_segment(segment_id, annotated_term_id) values (?,?)"); + + foreach my $segment ($dom->findnodes('/image_data/segments/segment')) { + my $layer = $segment->findvalue('./layer'); + my $term = $segment->findvalue('./annotation_term'); + my $term_id = $segment->findvalue('./annotation_id'); + my $coords = $segment->findvalue('./polygon_coords'); + + #Need to get the coordinates in WKT (Well known text) format for input + $coords =~ s/(\d+),(\d+)/$1 $2/; # remove the first comma + $coords =~ s/(.*?,.*?),/$1 /mgs; # put the points in (x1 y1, x2 y2,...) format + $coords =~ s/,$//; # remove the last comma + $coords =~ s/,/, /g; # add a space after each comma + $coords = "LineString($coords)"; # need the text to start with LineString and surround points with parantheses + + $sth_insert_segment->execute($annotated_image_id,$coords); + my $segment_id = $sth_insert_segment->{mysql_insertid}; + + #only need to insert if term isn't already in db + $sth_check_annotated_term->execute($term_id); + my $annotated_term_id; + $annotated_term_id = $sth_check_annotated_term->fetchrow_array and $sth_check_annotated_term->finish; + if(!defined($annotated_term_id)){ + $sth_insert_annotated_term->execute($term_id,$term); + $annotated_term_id = $sth_insert_annotated_term->{mysql_insertid}; + } + + $sth_insert_annotated_term_image->execute($annotated_term_id,$annotated_image_id); + + $sth_insert_annotated_term_segment->execute($segment_id,$annotated_term_id); + + } + +} +# The following will retrieve the image from the database +# my $sth_get_image = $dbh->prepare("select annotated_image from annotated_image where annotated_image_id=?"); +# $sth_get_image->execute($annotated_image_id); +# my $retrieved_image = $sth_get_image->fetch(); +# +# open IMAGE, ">retrieved_image.png" or die $!; +# print IMAGE @$retrieved_image; +# close IMAGE; -- 2.34.1