--- /dev/null
+#!/usr/bin/perl
+
+use strict;
+use warnings;
+
+if(-e "$ENV{HOME}/scripts/jaiswallab/interactome_scripts/find_species.pl") {
+ require "$ENV{HOME}/scripts/jaiswallab/interactome_scripts/find_species.pl";
+}elsif(-e "$ENV{HOME}/jaiswallab_svn/interactome_scripts/find_species.pl") {
+ require "$ENV{HOME}/jaiswallab_svn/interactome_scripts/find_species.pl";
+}
+
+use lib "$ENV{HOME}/scripts/jaiswallab/interactome_scripts";
+
+use XML::LibXML;
+use Archive::Zip;
+use Bio::DB::Taxonomy;
+use DbiFloret;
+
+# check for arguments and explain usage
+# if ($#ARGV !=0) {
+ # print "usage: aiso_xml_parser.pl input_zip_file\n";
+ # exit;
+# }
+my @files = glob("*.zip");
+
+my $dbh = DbiFloret::dbconnect_local;
+
+for my $infile (@files){
+ print "working on file $infile\n";
+
+ my $zip = Archive::Zip->new($infile);
+
+ my $xml_content = $zip->contents('metadata.xml');
+
+ my $dom = XML::LibXML->load_xml(string => $xml_content);
+
+ my $orig_filename = $dom->findvalue('//orig_image_filename');
+ my $species = $dom->findvalue('//species/@name');
+ my $ubio_id = $dom->findvalue('//species/@ubio_id');
+ my $curator = $dom->findvalue('//curator_name');
+ my $comment = $dom->findvalue('//comment');
+
+ # Get the ncbi taxon id
+ my $db = Bio::DB::Taxonomy->new(-source => 'entrez');
+ my $ncbi_id = $db->get_taxonid($species);
+
+ #insert the curator info into the curator table
+ # check if curator already in db
+ my $sth_check_curator = $dbh->prepare("select curator_id from curator where firstname = ?");
+ $sth_check_curator->execute($curator);
+
+ my $curator_id;
+
+ $curator_id = $sth_check_curator->fetchrow_array and $sth_check_curator->finish;
+
+ if(!defined($curator_id)) {
+ my $sth_insert_curator = $dbh->prepare("insert into curator (firstname) values (?)");
+ $sth_insert_curator->execute($curator);
+ $curator_id = $sth_insert_curator->{mysql_insertid};
+ }
+
+
+
+ # insert the taxon info into the taxon table
+ my $sth_check_taxon = $dbh->prepare("select taxon_id from taxon where species_id = ?");
+ $sth_check_taxon->execute($ncbi_id);
+
+ my $taxon_id;
+
+ $taxon_id = $sth_check_taxon->fetchrow_array and $sth_check_taxon->finish;
+
+ if(!defined($taxon_id)) {
+ my $sth_insert_taxon = $dbh->prepare("insert into taxon (species_id, species_name) values (?,?)");
+ $sth_insert_taxon->execute($ncbi_id, $species);
+ $taxon_id = $sth_insert_taxon->{mysql_insertid};
+ }
+
+
+ #insert the image and data into annotated_image table
+ my @files = $zip->memberNames();
+ my $image;
+
+ for my $filename (@files) {
+ if($filename =~ /image/) {
+ $image = $zip->contents($filename);
+ }
+ }
+
+ # open IMAGE, "$orig_filename" or die $!;
+ #
+ # my ($image, $buff);
+ # while(read IMAGE, $buff, 16384) {
+ # $image .= $buff;
+ # }
+
+ my $sth_insert_image = $dbh->prepare("insert into annotated_image(curator_id, taxon_id, annotated_image) values (?,?,?)");
+ $sth_insert_image->execute($curator_id,$taxon_id,$image);
+ my $annotated_image_id = $sth_insert_image->{mysql_insertid};
+
+
+ #set the statement handler to inser the segment info
+ my $sth_insert_segment = $dbh->prepare("insert into segment(annotated_image_id,coordinates) values (?,GeomFromText(?))");
+
+ #annotated_term table
+ my $sth_insert_annotated_term = $dbh->prepare("insert into annotated_term(ontology_term_id, ontology_term_name) values (?,?)");
+ my $sth_check_annotated_term = $dbh->prepare("select annotated_term_id from annotated_term where ontology_term_id = ?");
+
+ #annotated_term_image table
+ my $sth_insert_annotated_term_image = $dbh->prepare("insert into annotated_term_image(annotated_term_id, annotated_image_id) values (?,?)");
+
+ #annotated_term_segment table
+ my $sth_insert_annotated_term_segment = $dbh->prepare("insert into annotated_term_segment(segment_id, annotated_term_id) values (?,?)");
+
+ foreach my $segment ($dom->findnodes('/image_data/segments/segment')) {
+ my $layer = $segment->findvalue('./layer');
+ my $term = $segment->findvalue('./annotation_term');
+ my $term_id = $segment->findvalue('./annotation_id');
+ my $coords = $segment->findvalue('./polygon_coords');
+
+ #Need to get the coordinates in WKT (Well known text) format for input
+ $coords =~ s/(\d+),(\d+)/$1 $2/; # remove the first comma
+ $coords =~ s/(.*?,.*?),/$1 /mgs; # put the points in (x1 y1, x2 y2,...) format
+ $coords =~ s/,$//; # remove the last comma
+ $coords =~ s/,/, /g; # add a space after each comma
+ $coords = "LineString($coords)"; # need the text to start with LineString and surround points with parantheses
+
+ $sth_insert_segment->execute($annotated_image_id,$coords);
+ my $segment_id = $sth_insert_segment->{mysql_insertid};
+
+ #only need to insert if term isn't already in db
+ $sth_check_annotated_term->execute($term_id);
+ my $annotated_term_id;
+ $annotated_term_id = $sth_check_annotated_term->fetchrow_array and $sth_check_annotated_term->finish;
+ if(!defined($annotated_term_id)){
+ $sth_insert_annotated_term->execute($term_id,$term);
+ $annotated_term_id = $sth_insert_annotated_term->{mysql_insertid};
+ }
+
+ $sth_insert_annotated_term_image->execute($annotated_term_id,$annotated_image_id);
+
+ $sth_insert_annotated_term_segment->execute($segment_id,$annotated_term_id);
+
+ }
+
+}
+# The following will retrieve the image from the database
+# my $sth_get_image = $dbh->prepare("select annotated_image from annotated_image where annotated_image_id=?");
+# $sth_get_image->execute($annotated_image_id);
+# my $retrieved_image = $sth_get_image->fetch();
+#
+# open IMAGE, ">retrieved_image.png" or die $!;
+# print IMAGE @$retrieved_image;
+# close IMAGE;