Hello!

To see the file structure, click on "tree".

Note that updates take place every 10 minutes, commits may not be seen immediately.
Add aiso xml parsing script. Runs over zip files in current directory and adds them...
authorelserj <elserj@localhost>
Thu, 10 Nov 2016 00:18:45 +0000 (00:18 +0000)
committerelserj <elserj@localhost>
Thu, 10 Nov 2016 00:18:45 +0000 (00:18 +0000)
svn path=/; revision=646

interactome_scripts/aiso_xml_parser.pl [new file with mode: 0755]

diff --git a/interactome_scripts/aiso_xml_parser.pl b/interactome_scripts/aiso_xml_parser.pl
new file mode 100755 (executable)
index 0000000..858f770
--- /dev/null
@@ -0,0 +1,153 @@
+#!/usr/bin/perl
+
+use strict;
+use warnings;
+
+if(-e "$ENV{HOME}/scripts/jaiswallab/interactome_scripts/find_species.pl") {
+       require "$ENV{HOME}/scripts/jaiswallab/interactome_scripts/find_species.pl";
+}elsif(-e "$ENV{HOME}/jaiswallab_svn/interactome_scripts/find_species.pl") {
+       require "$ENV{HOME}/jaiswallab_svn/interactome_scripts/find_species.pl";
+}
+
+use lib "$ENV{HOME}/scripts/jaiswallab/interactome_scripts";
+
+use XML::LibXML;
+use Archive::Zip;
+use Bio::DB::Taxonomy;
+use DbiFloret;
+
+# check for arguments and explain usage
+# if ($#ARGV !=0) {
+       # print "usage: aiso_xml_parser.pl input_zip_file\n";
+       # exit;
+# }
+my @files = glob("*.zip");
+
+my $dbh = DbiFloret::dbconnect_local;
+
+for my $infile (@files){
+               print "working on file $infile\n";
+               
+               my $zip = Archive::Zip->new($infile);
+               
+               my $xml_content = $zip->contents('metadata.xml');
+               
+               my $dom = XML::LibXML->load_xml(string => $xml_content);
+               
+               my $orig_filename = $dom->findvalue('//orig_image_filename');
+               my $species = $dom->findvalue('//species/@name');
+               my $ubio_id = $dom->findvalue('//species/@ubio_id');
+               my $curator = $dom->findvalue('//curator_name');
+               my $comment = $dom->findvalue('//comment');
+               
+               # Get the ncbi taxon id
+               my $db = Bio::DB::Taxonomy->new(-source => 'entrez');
+               my $ncbi_id = $db->get_taxonid($species);
+               
+               #insert the curator info into the curator table
+               # check if curator already in db
+               my $sth_check_curator = $dbh->prepare("select curator_id from curator where firstname = ?");
+               $sth_check_curator->execute($curator);
+               
+               my $curator_id;
+               
+               $curator_id = $sth_check_curator->fetchrow_array and $sth_check_curator->finish;
+               
+               if(!defined($curator_id)) {
+                               my $sth_insert_curator = $dbh->prepare("insert into curator (firstname) values (?)");
+                               $sth_insert_curator->execute($curator);
+                               $curator_id = $sth_insert_curator->{mysql_insertid};
+               }
+               
+               
+               
+               # insert the taxon info into the taxon table
+               my $sth_check_taxon = $dbh->prepare("select taxon_id from taxon where species_id = ?");
+               $sth_check_taxon->execute($ncbi_id);
+               
+               my $taxon_id;
+               
+               $taxon_id = $sth_check_taxon->fetchrow_array and $sth_check_taxon->finish;
+               
+               if(!defined($taxon_id)) {
+                               my $sth_insert_taxon = $dbh->prepare("insert into taxon (species_id, species_name) values (?,?)");
+                               $sth_insert_taxon->execute($ncbi_id, $species);
+                               $taxon_id = $sth_insert_taxon->{mysql_insertid};
+               }
+               
+               
+               #insert the image and data into annotated_image table
+               my @files = $zip->memberNames();
+               my $image;
+               
+               for my $filename (@files) {
+                               if($filename =~ /image/) {
+                                               $image = $zip->contents($filename);
+                               }
+               }
+               
+               # open IMAGE, "$orig_filename" or die $!;
+               # 
+               # my ($image, $buff);
+               # while(read IMAGE, $buff, 16384) {
+                               # $image .= $buff;
+               # }
+               
+               my $sth_insert_image = $dbh->prepare("insert into annotated_image(curator_id, taxon_id, annotated_image) values (?,?,?)");
+               $sth_insert_image->execute($curator_id,$taxon_id,$image);
+               my $annotated_image_id = $sth_insert_image->{mysql_insertid};
+               
+               
+               #set the statement handler to inser the segment info
+               my $sth_insert_segment = $dbh->prepare("insert into segment(annotated_image_id,coordinates) values (?,GeomFromText(?))");
+               
+               #annotated_term table
+               my $sth_insert_annotated_term = $dbh->prepare("insert into annotated_term(ontology_term_id, ontology_term_name) values (?,?)");
+               my $sth_check_annotated_term = $dbh->prepare("select annotated_term_id from annotated_term where ontology_term_id = ?");
+               
+               #annotated_term_image table
+               my $sth_insert_annotated_term_image = $dbh->prepare("insert into annotated_term_image(annotated_term_id, annotated_image_id) values (?,?)");
+               
+               #annotated_term_segment table
+               my $sth_insert_annotated_term_segment = $dbh->prepare("insert into annotated_term_segment(segment_id, annotated_term_id) values (?,?)");
+               
+               foreach my $segment ($dom->findnodes('/image_data/segments/segment')) {
+                               my $layer = $segment->findvalue('./layer');
+                               my $term = $segment->findvalue('./annotation_term');
+                               my $term_id = $segment->findvalue('./annotation_id');
+                               my $coords = $segment->findvalue('./polygon_coords');
+                               
+                               #Need to get the coordinates in WKT (Well known text) format for input
+                               $coords =~ s/(\d+),(\d+)/$1 $2/; # remove the first comma
+                               $coords =~ s/(.*?,.*?),/$1 /mgs; # put the points in (x1 y1, x2 y2,...) format
+                               $coords =~ s/,$//; # remove the last comma
+                               $coords =~ s/,/, /g; # add a space after each comma
+                               $coords = "LineString($coords)";  # need the text to start with LineString and surround points with parantheses
+                                       
+                               $sth_insert_segment->execute($annotated_image_id,$coords);
+                               my $segment_id = $sth_insert_segment->{mysql_insertid};
+                               
+                               #only need to insert if term isn't already in db
+                               $sth_check_annotated_term->execute($term_id);
+                               my $annotated_term_id;
+                               $annotated_term_id = $sth_check_annotated_term->fetchrow_array and $sth_check_annotated_term->finish;
+                               if(!defined($annotated_term_id)){
+                                               $sth_insert_annotated_term->execute($term_id,$term);
+                                               $annotated_term_id = $sth_insert_annotated_term->{mysql_insertid};
+                               }
+                               
+                               $sth_insert_annotated_term_image->execute($annotated_term_id,$annotated_image_id);
+                               
+                               $sth_insert_annotated_term_segment->execute($segment_id,$annotated_term_id);
+                               
+               }
+
+}
+# The following will retrieve the image from the database
+# my $sth_get_image = $dbh->prepare("select annotated_image from annotated_image where annotated_image_id=?");
+# $sth_get_image->execute($annotated_image_id);
+# my $retrieved_image = $sth_get_image->fetch();
+# 
+# open IMAGE, ">retrieved_image.png" or die $!;
+# print IMAGE @$retrieved_image;
+# close IMAGE;