From: lingutln Date: Tue, 7 May 2013 20:34:47 +0000 (+0000) Subject: Latest commits. X-Git-Url: http://gitweb.planteome.org/?a=commitdiff_plain;h=bafd3b3effd799844d53c3bcc9fc85a9acd3a1a0;p=old-jaiswallab-svn%2F.git Latest commits. svn path=/; revision=471 --- diff --git a/image_annotation_db/ia_upload/annotation_data_importer.pl b/image_annotation_db/ia_upload/annotation_data_importer.pl index debbbef..6b8a151 100644 --- a/image_annotation_db/ia_upload/annotation_data_importer.pl +++ b/image_annotation_db/ia_upload/annotation_data_importer.pl @@ -24,6 +24,7 @@ use strict; use English; use DBI; +use String::Util qw(:all); # configurations @@ -36,38 +37,146 @@ sub establish_db_connection ); } -sub insert_curator_data(my $dbh, my @image_data_fields) -{ - my($curator_first, $curator_last) = split(/ /, $image_data_fields[6], 2); - my $curator_email = $image_data_fields[7]; - my $curator_affiliation = $image_data_fields[8]; +sub insert_image_data_into_database +{ + + my($dbh, $image_data) = @_; + my @image_data_fields = split(/\t/, $image_data); + + + + # Creating curator data and storing the respective id + my($curator_first_name, $curator_last_name) = split(/ /, $image_data_fields[6], 2); + my $curator_email = $image_data_fields[7]; + my $curator_affiliation = $image_data_fields[8]; + + $dbh->do('INSERT INTO curator (firstname, lastname, primary_email, affiliation) VALUES(?, ?, ?, ?)', undef, $curator_first_name, $curator_last_name, $curator_email, $curator_affiliation); + my $curator_id = $dbh->{'mysql_insertid'}; + + + + # Creating taxon data and storing the respective id + my $species = $image_data_fields[4]; + my ($genus, $species_name) = split(/\s/, $species, 2); + my $species_id = trim($image_data_fields[5]); + $dbh->do('INSERT INTO taxon (species_id, species_name, genus) VALUES(?, ?, ?)', undef, $species_id, $species_name, $genus); + my $taxon_id = $dbh->{'mysql_insertid'}; + + # Forming image_path by concatinating import_location and filename + my $image_path = trim($image_data_fields[1]) . trim($image_data_fields[0]); + + + # preprocessing ip_comment data + my $ip_comment = trim($image_data_fields[12]); + + if(undef $ip_comment){ + $ip_comment = "" + } + + my $source_db = trim($image_data_fields[14]); + my $source_db_name = ""; + my $source_db_id = ""; + if(defined $source_db) + { + ($source_db_name, $source_db_id) = split(/:/, $source_db, 2); + } + my $doi = $image_data_fields[13]; + my $collection_location = $image_data_fields[9]; + my $collection_date = $image_data_fields[11]; + my $comments = $image_data_fields[15]; + + $dbh->do('INSERT INTO annotated_image (curator_id, taxon_id, image_path, collection_location, collection_date, ip_comment, doi, source_db_name, source_db_id, comments) VALUES(?, ?, ?, ?, ?, ?, ?, ?, ?, ?)', + undef, $curator_id, $taxon_id, $image_path, $collection_location, $collection_date, $ip_comment, $doi, $source_db_name, $source_db_id, $comments); + my $annotated_image_id = $dbh->{'mysql_insertid'}; + + my @keywords = split(/\|/, trim($image_data_fields[2])); + my @ontology_term_ids = split(/\|/, trim($image_data_fields[3])); + + foreach (0..(scalar(@keywords)-1)) + { + + #creating annotated_term + $dbh->do('INSERT INTO annotated_term (keyword, ontology_term_id) VALUES(?, ?)', undef, $keywords[$_], $ontology_term_ids[$_]); + my $annotated_term_id = $dbh->{'mysql_insertid'}; - $dbh->do('INSERT INTO curator (firstname, lastname, primary_email, alternate_email, affiliation) VALUES(?, ?, ?, ?, ?)', undef, $curator_first, $curator_last , $curator_email ,'', $curator_affiliation); + + #creating annotated_term_image + $dbh->do('INSERT INTO annotated_term_image (annotated_term_id, annotated_image_id) VALUES(?, ?)', undef, $annotated_term_id, $annotated_image_id); + my $annotated_term_image_id = $dbh->{'mysql_insertid'}; + + } } -sub insert_record_into_database -{ - my $dbh = establish_db_connection; - my @image_data_fields = split(/\t/); +sub insert_collection_data_into_database +{ + my($dbh, $collection_data) = @_; + my @collection_data_fields = split(/\t/, $collection_data); - insert_curator_data($dbh, @image_data_fields); - + #creating image_source + my($source_name) = trim($collection_data_fields[0]); + my($url) = trim($collection_data_fields[1]); + my($contact_email) = trim($collection_data_fields[3]); + my($contributor_name) = trim($collection_data_fields[2]); + + $dbh->do('INSERT INTO image_source (source_name, url, contact_email, contributor_name) VALUES(?, ?, ?, ?)', undef, $source_name, $url, $contact_email, $contributor_name); + my $image_source_id = $dbh->{'mysql_insertid'}; + + + + #creating image_source_version + my($source_version) = trim($collection_data_fields[6]); + my($contribution_date) = trim($collection_data_fields[4]); + my($publication_id) = trim($collection_data_fields[5]); + + $dbh->do('INSERT INTO image_source_version (image_source_id, source_version, contribution_date, publication_id) VALUES(?, ?, ?, ?)', undef, $image_source_id, $source_version, $contribution_date, $publication_id); + my $image_source_version_id = $dbh->{'mysql_insertid'}; } + sub import_image_data { - print "Opening each image data(*.tsv) file and reading data...\n\n"; + print "Connecting to the Database...\n\n"; + my $dbh = establish_db_connection; + + + print "Reading content of each collection specification data(IADB_coll_spec_*.tsv) file and reading data...\n\n"; + + foreach (glob(IMAGE_DATA_DIR."/IADB_coll_spec_*.tsv")) + { + + my $content = do { + local $/ = undef; + open (my $file, "<", $_) or die("could not open $_: $!"); + <$file>; + }; + + chomp; - foreach (glob(IMAGE_DATA_DIR."/*.tsv")) + my @collection_spec_records = split(/\n/, $content); + + shift @collection_spec_records; + + foreach (@collection_spec_records) + { + insert_collection_data_into_database($dbh, $_); + } + + } + + + print "Reading content of each image data(IADB_img_data_*.tsv) file and reading data...\n\n"; + + foreach (glob(IMAGE_DATA_DIR."/IADB_img_data_*.tsv")) { my $content = do { local $/ = undef; + print "file is " . $_ . "\n"; open (my $file, "<", $_) or die("could not open $_: $!"); <$file>; }; @@ -76,16 +185,15 @@ sub import_image_data my @image_data_records = split(/\n/, $content); - splice @image_data_records, 0, 1; + shift @image_data_records; foreach (@image_data_records) { - insert_record_into_database($_); + insert_image_data_into_database($dbh, $_); } - close $_ or die("Could not close '$_': $OS_ERROR"); - } + } # --------------------------------------------------------------------------- diff --git a/image_annotation_db/ia_upload/image_data/IADB_coll_spec_NYBG_20130404.tab - Sheet1.tsv b/image_annotation_db/ia_upload/image_data/IADB_coll_spec_NYBG_20130404.tab - Sheet1.tsv new file mode 100644 index 0000000..2468817 --- /dev/null +++ b/image_annotation_db/ia_upload/image_data/IADB_coll_spec_NYBG_20130404.tab - Sheet1.tsv @@ -0,0 +1,2 @@ +source_name url contributor_name contact_email contribution_date publication_id source_version +New York Botanical Garden http://www.plantsystematics.org/ Dennis Stevenson dws@nybg.org 20130403 1 \ No newline at end of file diff --git a/image_annotation_db/ia_upload/image_data/IADB_img_data_DWS_NYBG_20130404.tab - Sheet1.tsv b/image_annotation_db/ia_upload/image_data/IADB_img_data_DWS_NYBG_20130404.tab - Sheet1.tsv index 9a7ce3e..02c447e 100644 --- a/image_annotation_db/ia_upload/image_data/IADB_img_data_DWS_NYBG_20130404.tab - Sheet1.tsv +++ b/image_annotation_db/ia_upload/image_data/IADB_img_data_DWS_NYBG_20130404.tab - Sheet1.tsv @@ -1,17 +1,23 @@ filename import_location keywords ontology_term_ids species species_id curator_name curator_email curator_affiliation collection_location geo_coords collection_date ip_comment doi source_db comments -Saurauia xs Ovary.jpg / locule|ovule|pericarp|septum PO:0025266|PO:0020003|PO:0009084|PO:0025262 Saurauia sp Dennis Stevenson dws@nybg.org NYBG COLOMBIA: Boyaca: Villa de Lleva, Iguaque National Park 2500-2800 M 20070429 PlantSystematics:DOL27827 -Helleborus_argutifolius_Carpels.jpg / carpel|style PO:0009030|PO:0009074 Helleborus argutifolius Dennis Stevenson dws@nybg.org NYBG AUSTRALIA: South Australia: Botanic Gardens of Adelaide Accession G880864 20110810 PlantSystematics:DOL40035 -Yucca_schidigera_flower.jpg / anther|filament|flower|ovary|petal|sepal|stigma PO:0009066|PO:0009067|PO:0009046|PO:0009072|PO:0009032|PO:0009031|PO:0009073 Yucca schidigera Lawrence M. Kelly lkelly@nybg.org NYBG USA: California:: San Diego Co. Near Miramar NAS 20040320 PlantSystematics:DOL11866 -Gunnera_tinctoria Prickle.jpg / prickle PO:0025169 Gunnera tinctoria Dennis Stevenson dws@nybg.org NYBG DENMARK: University of Copenhagen, Botanical Garden E6139 C001 A 20080813 PlantSystematics:DOL33711 - Acacia cornigera Stipules as Spines.jpg / stipule|stipule spine PO:0020041|PO:0025174 Acacia cornigera Lawrence M. Kelly lkelly@nybg.org NYBG COSTA RICA: Guanacaste: Palo Verde 2003 PlantSystematics:DOL4821 -Bougainvillea_spectabilis Thorn.jpg / thorn PO:0025172 Bougainvillea spectabilis Dennis Stevenson dws@nybg.org NYBG CHINA: Shenzhen, Fairylake Botanical Garden 20111025 -Parmentiera_cerifera Style & Stigma.jpg / stigma|style PO:0009073|PO:0009074 Parmentiera cerifera Dennis Stevenson dws@nybg.org NYBG USA: Florida:: Dade Co. Montgomery Botanical Center  20070404 PlantSystematics:DOL27681 -Leucojum_aestivum Flower.jpg / collective tepal structure|free tepal|plant ovary|stamen|tepal PO:0025021|PO:0025136PO:0009072|PO:0009029|PO:0009033 Leucojum aestivum Dennis Stevenson dws@nybg.org NYBG USA: NY:: Tompkins Co. Mins Garden, Cornell University 20050428 PlantSystematics:DOL12340|DOL12342 -Ticodendron incognitum.jpg / shoot node|petiole|shoot internode|stipule PO:0005004|PO:0020038|PO:0005005|PO:0020041| Ticodendron incognitum Lawrence M. Kelly lkelly@nybg.org NYBG COSTA RICA: Alajuela:: San Ramon Estacion Biologica Alberto M. Brenes 2003 PlantSystematics:DOL4844 -Agave sebastiana Labelled.jpg / locule|ovule|septum PO:0025266|PO:0020003|PO:0025262 Agave sebastiana Dennis Stevenson dws@nybg.org NYBG USA: California:: Santa Barbara Co. Santa Barbara Botanic Garden 20061208 PlantSystematics:DOL26528 -Acacia mangium Phyllode.JPG / inflorescence|phyllode leaf PO:0009049|PO:0025335 Acacia mangium Dennis Stevenson dws@nybg.org NYBG VIETNAM: Vinh Phuc Province: Me Linh District, Me Linh Station of the Institute of Ecology and Biological Resources, Vietnamese Academy of Science and Technology 21 23.309 N 105 42.837 E 20051210 PlantSystematics:DOL17750 -Acacia sp Phyllode.JPG / axillary inflorescence bud|phyllode leaf PO:0004711|PO:0025335 Acacia sp Dennis Stevenson dws@nybg.org NYBG AUSTRALIA: Western Australia: Kings Park, Perth 20110805 PlantSystematics:DOL39936 -EucalyptusKingsmillii6.JPG / calyptra corolla PO:0025330 Eucalyptus kingsmillii Dennis Stevenson dws@nybg.org NYBG AUSTRALIA: Western Australia: Kings Park, Perth 20110805 PlantSystematics:DOL39933 -EucalyptusKingsmillii2.JPG / calyptra corolla PO:0025330 Eucalyptus kingsmillii Dennis Stevenson dws@nybg.org NYBG AUSTRALIA: Western Australia: Kings Park, Perth 20110805 PlantSystematics:DOL39929 -GymnocalyciumMarsoneri1.JPG / areole bud|spine leaf PO:0025353|PO:0025173 Gymnocalycium marsoneri Dennis Stevenson dws@nybg.org NYBG USA: Arizona:: Maricopa Co. Desert Botanical Garden, Phoenix 20070114 PlantSystematics:DOL26949 -Anchomanes giganteus.JPG / prickle PO:0025169 Anchomanes giganteus Dennis Stevenson dws@nybg.org NYBG GERMANY: Botanical Garden, University of Bonn. Accession No. 02608 20060924 \ No newline at end of file +Saurauia xs Ovary.jpg / locule|ovule|pericarp|septum PO:0025266|PO:0020003|PO:0009084|PO:0025262 Saurauia sp Dennis Stevenson dws@nybg.org NYBG COLOMBIA: Boyaca: Villa de Lleva, Iguaque National Park 2500-2800 M 20070429 PlantSystematics:DOL27827 +Helleborus argutifolius Carpels.jpg / carpel|style PO:0009030|PO:0009074 Helleborus argutifolius Dennis Stevenson dws@nybg.org NYBG AUSTRALIA: South Australia: Botanic Gardens of Adelaide Accession G880864 20110810 PlantSystematics:DOL40035 +Yucca_schidigera_flower.jpg / anther|filament|flower|ovary|petal|sepal|stigma PO:0009066|PO:0009067|PO:0009046|PO:0009072|PO:0009032|PO:0009031|PO:0009073 Yucca schidigera Lawrence M. Kelly lkelly@nybg.org NYBG USA: California:: San Diego Co. Near Miramar NAS 20040320 PlantSystematics:DOL11866 +Gunnera_tinctoria Prickle.jpg / prickle PO:0025169 Gunnera tinctoria Dennis Stevenson dws@nybg.org NYBG DENMARK: University of Copenhagen, Botanical Garden E6139 C001 A 20080813 PlantSystematics:DOL33711 + Acacia cornigera Stipules as Spines.jpg / stipule|stipule spine PO:0020041|PO:0025174 Acacia cornigera Lawrence M. Kelly lkelly@nybg.org NYBG COSTA RICA: Guanacaste: Palo Verde 2003 PlantSystematics:DOL4821 +Bougainvillea_spectabilis Thorn.jpg / thorn PO:0025172 Bougainvillea spectabilis Dennis Stevenson dws@nybg.org NYBG CHINA: Shenzhen, Fairylake Botanical Garden 20111025 +Parmentiera_cerifera Style & Stigma.jpg / stigma|style PO:0009073|PO:0009074 Parmentiera cerifera Dennis Stevenson dws@nybg.org NYBG USA: Florida:: Dade Co. Montgomery Botanical Center  20070404 PlantSystematics:DOL27681 +Leucojum_aestivum Flower.jpg / collective tepal structure|free tepal|plant ovary|stamen|tepal PO:0025021|PO:0025136|PO:0009072|PO:0009029|PO:0009033 Leucojum aestivum Dennis Stevenson dws@nybg.org NYBG USA: NY:: Tompkins Co. Mins Garden, Cornell University 20050428 PlantSystematics:DOL12340|DOL12342 +Ticodendron incognitum.jpg / shoot node|petiole|shoot internode|stipule PO:0005004|PO:0020038|PO:0005005|PO:0020041| Ticodendron incognitum Lawrence M. Kelly lkelly@nybg.org NYBG COSTA RICA: Alajuela:: San Ramon Estacion Biologica Alberto M. Brenes 2003 PlantSystematics:DOL4844 +Agave sebastiana Labelled.jpg / locule|ovule|septum PO:0025266|PO:0020003|PO:0025262 Agave sebastiana Dennis Stevenson dws@nybg.org NYBG USA: California:: Santa Barbara Co. Santa Barbara Botanic Garden 20061208 PlantSystematics:DOL26528 +Acacia mangium Phyllode.JPG / inflorescence|phyllode leaf PO:0009049|PO:0025335 Acacia mangium Dennis Stevenson dws@nybg.org NYBG VIETNAM: Vinh Phuc Province: Me Linh District, Me Linh Station of the Institute of Ecology and Biological Resources, Vietnamese Academy of Science and Technology 21 23.309 N 105 42.837 E 20051210 PlantSystematics:DOL17750 +Acacia sp Phyllode.JPG / axillary inflorescence bud|phyllode leaf PO:0004711|PO:0025335 Acacia sp Dennis Stevenson dws@nybg.org NYBG AUSTRALIA: Western Australia: Kings Park, Perth 20110805 PlantSystematics:DOL39936 +EucalyptusKingsmillii6.JPG / calyptra corolla PO:0025330 Eucalyptus kingsmillii Dennis Stevenson dws@nybg.org NYBG AUSTRALIA: Western Australia: Kings Park, Perth 20110805 PlantSystematics:DOL39933 +EucalyptusKingsmillii2.JPG / calyptra corolla PO:0025330 Eucalyptus kingsmillii Dennis Stevenson dws@nybg.org NYBG AUSTRALIA: Western Australia: Kings Park, Perth 20110805 PlantSystematics:DOL39929 +GymnocalyciumMarsoneri1.JPG / areole bud|spine leaf PO:0025353|PO:0025173 Gymnocalycium marsoneri Dennis Stevenson dws@nybg.org NYBG USA: Arizona:: Maricopa Co. Desert Botanical Garden, Phoenix 20070114 PlantSystematics:DOL26949 +Anchomanes giganteus.JPG / prickle PO:0025169 Anchomanes giganteus Dennis Stevenson dws@nybg.org NYBG GERMANY: Botanical Garden, University of Bonn. Accession No. 02608 20060924 +Mickelia_nicotianifolia Diarch.jpg protoxylem PO:0000272 Mickelia nicotianifolia Dennis Stevenson dws@nybg.org NYBG USA: New York:: Bronx Co. New York Botanical Garden 20130412 +Angiopteris_evecta_root_xs Polyarch.jpg protoxylem PO:0000272 Angiopteris evecta Dennis Stevenson dws@nybg.org NYBG USA: New York:: Bronx Co. New York Botanical Garden 20130412 +Angiopteris_evecta_root_xs3 Exarch.JPG protoxylem PO:0000272 Angiopteris evecta Dennis Stevenson dws@nybg.org NYBG USA: New York:: Bronx Co. New York Botanical Garden 20130412 +PunicaGranatumThorn.jpg thorn PO:0025172 Punicata granatum Dennis Stevenson dws@nybg.org NYBG USA: Arizona:: Pima Co. Tucson Botanical Garden, Tucson 20130423 +Sciaphyllum_amoenum5 Adaxial.jpg leaf adaxial epidermis|leaf lamina adaxial epidermis PO:0006018|PO:0000050 Sciaphyllum amoenum Dennis Stevenson dws@nybg.org NYBG USA: New York:: Bronx Co. Conservatory, New York Botanical Garden 20070309 PlantSystematics DOL27401 +Sciaphyllum_amoenum4 Abaxial.jpg leaf abaxial epidermis|leaf lamina abaxial epidermis PO:0006019|PO:0000049 Sciaphyllum amoenum Dennis Stevenson dws@nybg.org NYBG USA: New York:: Bronx Co. Conservatory, New York Botanical Garden 20070309 PlantSystematics DOL21104