From 18c3a522dfe3d9238d2533857065b886763f6e7e Mon Sep 17 00:00:00 2001 From: elserj Date: Fri, 6 Nov 2009 18:36:24 +0000 Subject: [PATCH] updated versions of flowering_interaction and inparanoid_output_parse, added supercluster and inparanoid_orthologs scripts svn path=/; revision=11 --- interactome_scripts/flowering_interaction.pl | 278 ++++++++++++++++++ interactome_scripts/inparanoid_orthologs.pl | 99 +++++++ .../inparanoid_output_parse.pl | 44 ++- interactome_scripts/supercluster.pl | 180 ++++++++++++ 4 files changed, 590 insertions(+), 11 deletions(-) create mode 100755 interactome_scripts/inparanoid_orthologs.pl create mode 100755 interactome_scripts/supercluster.pl diff --git a/interactome_scripts/flowering_interaction.pl b/interactome_scripts/flowering_interaction.pl index 6b1d643..703efec 100755 --- a/interactome_scripts/flowering_interaction.pl +++ b/interactome_scripts/flowering_interaction.pl @@ -1,3 +1,281 @@ #!/usr/bin/perl +######################################################### +# Written by Justin Elser 4/29/2009 # +# # +# This program takes the genes known to interact # +# in the long day flowering process in # +# Arabidopsis and predicts the similar short day # +# interaction in rice # +# # +# Uses the orthologs first, then paralogs # +# # +######################################################### + + +use strict; +use warnings; + +# print usage +if ($#ARGV !=4) { + print "usage: flowering_interaction.pl input_file interaction_network_file ortholog_file paralog_file output_file_prefix\n"; + exit; +} + +my $in_file = $ARGV[0]; +my $inter_file = $ARGV[1]; +my $ortho_file = $ARGV[2]; +my $para_file = $ARGV[3]; +my $out_file = $ARGV[4] . ".sif"; +my $out_file_inter = $ARGV[4] . "_inter.sif"; +my $out_file_ortho = $ARGV[4] . "_ortho.sif"; +my $out_file_para = $ARGV[4] . "_para.sif"; + +my %in_hash; + + +open (input_file, "$in_file") || die "Error: file '$in_file' can not be opened \n"; +while() { + my $entry = $_; + # strip off newline characters + $entry =~ s/\r//g; + $entry =~ s/\n//g; + # split the columns into 2 separate variables (using either multiple congruent spaces or a tab) + my ($gene_name, $gene_id) = split /\s{2,}|\t/, $entry; + $in_hash{$gene_id} = $gene_name; +} +close (input_file); + +open (interaction_file, "$inter_file") || die "Error: file '$inter_file' can not be opened\n"; +my %inter_hash; +while() { + my $entry = $_; + # strip off newline characters + $entry =~ s/\r//g; + $entry =~ s/\n//g; + # split the columns into 3 separate variables + my ($gene_1, $inter_type, $gene_2) = split ("\t", $entry); + # change gene ids to all caps (for matching) + $gene_1 =~ tr/a-z/A-Z/; + $gene_2 =~ tr/a-z/A-Z/; + + # because it doesn't matter which way the interaction goes, need to do the interaction in both directions + # ie. as gene_1 as the key then as gene_2 as the key + if(defined($inter_hash{$gene_1})){ + # if gene already in hash, just add to the list + my $oldhash = $inter_hash{$gene_1}; + $oldhash->{'type'} = "$oldhash->{'type'}\t$inter_type"; + $oldhash->{'inter_gene'} = "$oldhash->{'inter_gene'}\t$gene_2"; + $inter_hash{$gene_1} = $oldhash; + # if new gene, create hash element + }else{ + my %hash; + $hash{'map_gene'} = $gene_1; + $hash{'type'} = $inter_type; + $hash{'inter_gene'} = $gene_2; + $inter_hash{$gene_1} = \%hash; + } + + if(defined($inter_hash{$gene_2})){ + # if new gene in the hash, just add to the list + my $oldhash = $inter_hash{$gene_2}; + $oldhash->{'type'} = "$oldhash->{'type'}\t$inter_type"; + $oldhash->{'inter_gene'} = "$oldhash->{'inter_gene'}\t$gene_1"; + $inter_hash{$gene_2} = $oldhash; + # if new gene, create hash element + }else{ + my %hash; + $hash{'map_gene'} = $gene_2; + $hash{'type'} = $inter_type; + $hash{'inter_gene'} = $gene_1; + $inter_hash{$gene_2} = \%hash; + } +} +close(interaction_file); + + + +my $min_ident=0; + +my %ortho_hash; +#my @ortho_array; + +open (ortholog_file, "$ortho_file") || die "Error: file '$ortho_file' can not be opened\n"; +while() { + my $entry = $_; + # strip off newline characters + $entry =~ s/\r//g; + $entry =~ s/\n//g; + # split the columns into 3 separate variables + my ($gene_id, $ortho_id, $ident_id) = split("\t", $entry); + # change gene ids to all caps + $gene_id =~ tr/a-z/A-Z/; + $ortho_id =~ tr/a-z/A-Z/; + # strip off the TAIR-G suffix on ids + $gene_id =~ s/\-TAIR-G//g; + + # check to make sure the %identity is above the minimum defined above + if($ident_id =~ /\d/ && $ident_id>=$min_ident){ + # if gene_id already found, add the new ortholog to hash + if(defined($ortho_hash{$gene_id})){ + my $oldhash = $ortho_hash{$gene_id}; + $oldhash->{'ortho'} = "$oldhash->{'ortho'}\t$ortho_id";; + $ortho_hash{$gene_id} = $oldhash; + # if new gene_id, create hash element + }else{ + my %hash; + $hash{'ortho'} = $ortho_id; + $hash{'gene'} = $gene_id; + $ortho_hash{$gene_id} = \%hash; + #push @ortho_array, $gene_id; + } + } +} +close (ortholog_file); + +my %para_hash; +open(paralog_file, "$para_file") || die "Error: file '$para_file' can not be opened\n"; + + my $min_ident_para = 0; # set the threshold for paralog identities + while() { + my $entry = $_; + # strip off newline characters + $entry =~ s/\r//g; + $entry =~ s/\n//g; + # split the columns into 3 separate variables + my ($gene_1, $gene_2, $ident) = split("\t", $entry); + # skip if $ident is empty or Nan + next if(!defined($ident)); + next if($ident =~ /D/); + next if($ident eq ""); + next if($ident =~ /\%/); + # change gene ids to all caps + $gene_1 =~ tr/a-z/A-Z/; + $gene_2 =~ tr/a-z/A-Z/; + + # put the genes in ASCII order to help remove dupes + if($gene_1 gt $gene_2) { + my $temp = $gene_1; + $gene_1 = $gene_2; + $gene_2 = $temp; + } + + # only use paralogs with high confidence values + if($ident>=$min_ident_para) { + # most genes will show up many times, so push values for hash key + if(defined($para_hash{$gene_1})) { + my $oldhash = $para_hash{$gene_1}; + $oldhash->{'gene2'} = "$oldhash->{'gene2'}\t$gene_2";; + $para_hash{$gene_1} = $oldhash; + #print "$oldhash\n"; + }else{ + my %hash; + $hash{'gene1'} = $gene_1; + $hash{'gene2'} = $gene_2; + $para_hash{$gene_1} = \%hash; + } + } + + } + + close(paralog_file); + +# open output files for writing +open(output_file, ">$out_file"); +open(output_file_inter, ">$out_file_inter"); +open(output_file_ortho, ">$out_file_ortho"); +open(output_file_para, ">$out_file_para"); + + + +# initialize the counters +my $Ath_inter_count = 0; +my $rice_ortho_count = 0; +my $rice_para_count = 0; + +foreach my $key (keys %in_hash) { + my $name = $in_hash{$key}; + + # find all interactions with the genes in %in_hash + if(defined($inter_hash{$key})) { + + my @inter_array = split ("\t", $inter_hash{$key}->{'inter_gene'}); + my @inter_type_array = split ("\t", $inter_hash{$key}->{'type'}); + my $inter_index = @inter_type_array; + for (my $i=0; $i<$inter_index; $i++) { + my $in_gene_1 = $key; + my $in_gene_2 = $inter_array[$i]; + # add one to the number of Ath interactions counted + $Ath_inter_count++; + # output the interactions to file + print output_file_inter "$in_gene_1\t$inter_type_array[$i]\t$in_gene_2\n"; + + # make the genes be listed in ASCII order to make sorting out dupes easier + if ($in_gene_1 gt $in_gene_2) { # note that this should not be used if order matters for interaction type + my $temp = $in_gene_1; + $in_gene_1 = $in_gene_2; + $in_gene_2 = $temp; + } + + # next step is to find all the orthologs for both genes in the interaction + if(exists($ortho_hash{$in_gene_2})) { + my @ortho_gene_2_array = split ("\t", $ortho_hash{$in_gene_2}->{'ortho'}); + foreach my $ortho_gene_2 (@ortho_gene_2_array) { + # output orthologs to file + print output_file_ortho "$in_gene_2\tortho\t$ortho_gene_2\n"; + + if(exists($ortho_hash{$in_gene_1})) { + my @ortho_gene_1_array = split ("\t", $ortho_hash{$in_gene_1}->{'ortho'}); + foreach my $ortho_gene_1 (@ortho_gene_1_array) { + #print output_file "$ortho_gene_1\t$inter_type_array[$i]\t$ortho_gene_2\n"; + print output_file "$ortho_gene_1\tortho\t$ortho_gene_2\n"; + # add one to the number of orthologs counted + $rice_ortho_count++; + # output the orthologs to file + print output_file_ortho "$in_gene_1\tortho\t$ortho_gene_1\n"; + + # now find all paralogs to the orthologs found + if(exists($para_hash{$ortho_gene_1})) { + my @para_gene_1_array = split ("\t", $para_hash{$ortho_gene_1}->{'gene2'}); + foreach my $para_gene_1 (@para_gene_1_array) { + # output the paralogs to file + print output_file_para "$ortho_gene_1\tpara\t$para_gene_1\n"; + + if(exists($para_hash{$ortho_gene_2})) { + my @para_gene_2_array = split ("\t", $para_hash{$ortho_gene_2}->{'gene2'}); + foreach my $para_gene_2 (@para_gene_2_array) { + # add one to the number of paralogs counted + $rice_para_count++; + #print output_file "$para_gene_1\t$inter_type_array[$i]\t$para_gene_2\n"; + print output_file "$para_gene_1\tpara\t$para_gene_2\n"; + # output the paralogs to file + print output_file_para "$ortho_gene_2\tpara\t$para_gene_2\n"; + + } + } + } + } + } + } + } + } + + } + } +} + +close(output_file); +print "Ath_inter_count = $Ath_inter_count\nrice_ortho_count = $rice_ortho_count\nrice_para_count = $rice_para_count\n"; +print "Now removing duplicates\n"; +# sort the file and get rid of duplicates +# Note: this requires replacing the file, hence the mv command +system "sort $out_file | uniq > $out_file.tmp; mv $out_file.tmp $out_file"; + + +close(output_file_inter); +close(output_file_ortho); +close(output_file_para); + + + diff --git a/interactome_scripts/inparanoid_orthologs.pl b/interactome_scripts/inparanoid_orthologs.pl new file mode 100755 index 0000000..194a427 --- /dev/null +++ b/interactome_scripts/inparanoid_orthologs.pl @@ -0,0 +1,99 @@ +#!/usr/bin/perl + +use warnings; +use strict; + +use DBI; +use Term::Screen::ReadLine; + +if ($#ARGV != 0) { + print "usage: inparanoid_orthologs.pl output_file\n"; + exit; +} +open(outfile,">$ARGV[0]") || die "Error: file '$ARGV[0]' can not be opened\n"; + +# define the database handle to be used + +my $screen = Term::Screen::ReadLine->new(); + # clear the screen + $screen->clrscr; + # ask for username + $screen->at(0,0)->puts("Username: "); + my $username = $screen->readline(ROW => 0, COL=>11); + + # ask for password, replace character presses with stars + $screen->at(1,0)->puts("Password: "); + my $password = $screen->readline(ROW => 1, COL => 11, PASSWORD => 1); + + $screen->at(2,0); + undef $screen; + +my $dbh = DBI->connect('DBI:mysql:inparanoid_data;host=floret.cgrb.oregonstate.edu', $username, $password, + { RaiseError=> 1, AutoCommit=>1 } + ) or die "Failed to connect to database: $DBI::errstr"; + +my @species = ("Maize", "Oryza_sativa", "Ath", "Sorghum", "Brachy"); +my $num_species = @species; + +for (my $i=0; $i<$num_species-1; $i++) { + for (my $j=$i+1; $j<$num_species; $j++) { + # hashes to store the paralogs and orthologs + my (%species_1_gene_hash, %species_2_gene_hash); + + # make sure the species are listed in alphabetical order to get correct table names + my $species_1 = $species[$i]; + my $species_2 = $species[$j]; + if ($species_1 lt $species_2) { + $species_1 = $species_1; + $species_2 = $species_2; + } else { + my $spec_temp = $species_1; + $species_1 = $species_2; + $species_2 = $spec_temp; + } + + my $spec_table = "$species_1" . "_" . "$species_2"; + my $safe_species_table = $dbh->quote_identifier($spec_table); + my $sth = $dbh->prepare("select * from $safe_species_table"); + + my $id_prev = ""; + + $sth->execute(); + + while (my @line = $sth->fetchrow_array()) { + my ($id, $bit_score, $spec, $score, $gene) = @line; + if ($id ne $id_prev) { + if ($spec eq $species_1) { + $species_1_gene_hash{$id} = $gene; + }else { + $species_2_gene_hash{$id} = $gene; + } + }else { + if ($spec eq $species_1) { + if(defined($species_1_gene_hash{$id})) { + $species_1_gene_hash{$id} = "$species_1_gene_hash{$id} $gene"; + } else { + $species_1_gene_hash{$id} = $gene; + } + }else { + if(defined($species_2_gene_hash{$id})) { + $species_2_gene_hash{$id} = "$species_2_gene_hash{$id} $gene"; + } else { + $species_2_gene_hash{$id} = $gene; + } + } + } + $id_prev = $id; + } + + foreach my $key (keys %species_1_gene_hash) { + if(defined($species_2_gene_hash{$key})) { + print outfile "$species_1_gene_hash{$key}\t$species_2_gene_hash{$key}\n"; + } + } + $sth->finish(); + } +} + +close(outfile); + diff --git a/interactome_scripts/inparanoid_output_parse.pl b/interactome_scripts/inparanoid_output_parse.pl index 3528d97..8f64b9a 100755 --- a/interactome_scripts/inparanoid_output_parse.pl +++ b/interactome_scripts/inparanoid_output_parse.pl @@ -1,5 +1,18 @@ #!/usr/bin/perl +############################################################### +# Justin Elser (elserj@science.oregonstate.edu) # +# Parses the output from an inparanoid run and # +# enters it into a database # +# # +# Version 1.00 - September 2009 # +# Seems to work fine # +# Version 1.01 - Nov 4 '09 # +# Added support for strawberry # +# # +############################################################### + + use strict; use warnings; @@ -90,6 +103,10 @@ foreach my $input_file (@files) { $clust_id = "$species_1"."___".$species_2."___".$clust_id; $species = find_species($species); + my $gene = find_gene($gene_header,$species); + + # skip isomers that are not .1 + next if ($gene =~ /\.[2-9]$/); if(!defined($id_hash{$clust_id})) { if ($clust_id ne $clust_id_prev) { @@ -104,7 +121,7 @@ foreach my $input_file (@files) { $id = $id_hash{$clust_id}; } - my $gene = find_gene($gene_header,$species); + $sth->execute($id, $bit_score, $species, $score, $gene); $clust_id_prev = $clust_id; @@ -132,6 +149,8 @@ sub find_species { $temp = "Danio"; }elsif ($temp =~ /E\_coli/) { $temp = "E_coli"; + }elsif ($temp =~ /Fragaria/) { + $temp = "Fragaria"; }elsif ($temp =~ /Glycine/) { $temp = "Soy"; }elsif ($temp =~ /Homo\_sapiens/) { @@ -171,22 +190,25 @@ sub find_gene { my $species = $_[1]; my $gene; if ($species eq "Ath") { - my ($name,$gene_id,$chrom,$isomer) = split("\|", $gene_header); + my ($name,$gene_id,$chrom,$isomer) = split(/\|/, $gene_header); $gene = $isomer; }elsif ($species eq "Brachy") { $gene = $gene_header; }elsif ($species eq "C_elegans") { - my ($gene_id,$temp) = split("\|", $gene_header); + my ($gene_id,$temp) = split(/\|/, $gene_header); $gene = $gene_id; #??? }elsif ($species eq "Chlamy") { - my ($name,$locus_id,$scaff_id,$temp) = split("\|",$gene_header); + my ($name,$locus_id,$scaff_id,$temp) = split(/\|/,$gene_header); $gene = $locus_id; #??? }elsif ($species eq "Danio") { $gene = $gene_header; }elsif ($species eq "E_coli") { $gene = $gene_header; #??? + }elsif ($species eq "Fragaria") { + my ($gene_id, $mrna_id, $method, $length) = split(/\|/, $gene_header); + $gene = $gene_id; }elsif ($species eq "Soy") { - my ($name,$locus_id,$scaff_id,$isomer) = split("\|",$gene_header); + my ($name,$locus_id,$scaff_id,$isomer) = split(/\|/,$gene_header); $gene = $isomer; }elsif ($species eq "Human") { $gene = $gene_header; @@ -197,29 +219,29 @@ sub find_gene { }elsif ($species eq "Neurospora") { $gene = $gene_header; }elsif ($species eq "Oryza_sativa") { - my ($isomer,$temp,$type) = split("\|",$gene_header); + my ($isomer,$temp,$type) = split(/\|/,$gene_header); $gene = $isomer; }elsif ($species eq "Physcomitreall") { - my ($name,$locus_id,$chrom_id,$prot_id) = split("\|",$gene_header); + my ($name,$locus_id,$chrom_id,$prot_id) = split(/\|/,$gene_header); $gene = $prot_id; #??? }elsif ($species eq "Poplar") { - my ($name,$locus_id,$chrom_id,$prot_id) = split("\|",$gene_header); + my ($name,$locus_id,$chrom_id,$prot_id) = split(/\|/,$gene_header); $gene = $prot_id; #??? }elsif ($species eq "Sacc_cerevisiae") { $gene = $gene_header; }elsif ($species eq "Sacc_pombe") { $gene = $gene_header; }elsif ($species eq "Selaginella") { - my ($name,$locus_id,$chrom_id,$prot_id) = split("\|",$gene_header); + my ($name,$locus_id,$chrom_id,$prot_id) = split(/\|/,$gene_header); $gene = $prot_id; #??? }elsif ($species eq "Sorghum") { - my ($name,$locus_id,$scaff_id,$prot_id) = split("\|",$gene_header); + my ($name,$locus_id,$scaff_id,$prot_id) = split(/\|/,$gene_header); $gene = $prot_id; #??? }elsif ($species eq "Synechosystis") { my ($gene_id,$type,$temp) = split(" ",$gene_header); $gene = $gene_id; #??? }elsif ($species eq "Grape") { - my ($name,$gene_id,$chrom_id,$id) = split("\|",$gene_header); + my ($name,$gene_id,$chrom_id,$id) = split(/\|/,$gene_header); $gene = $gene_id; #??? }else { die "Error: Gene id can not be found!"; diff --git a/interactome_scripts/supercluster.pl b/interactome_scripts/supercluster.pl new file mode 100755 index 0000000..5a5a1f8 --- /dev/null +++ b/interactome_scripts/supercluster.pl @@ -0,0 +1,180 @@ +#!/usr/bin/perl + +use warnings; +use strict; + +use DBI; +use Term::Screen::ReadLine; + + +# define the database handle to be used + +my $screen = Term::Screen::ReadLine->new(); + # clear the screen + $screen->clrscr; + # ask for username + $screen->at(0,0)->puts("Username: "); + my $username = $screen->readline(ROW => 0, COL=>11); + + # ask for password, replace character presses with stars + $screen->at(1,0)->puts("Password: "); + my $password = $screen->readline(ROW => 1, COL => 11, PASSWORD => 1); + + $screen->at(2,0); + undef $screen; + +my $dbh = DBI->connect('DBI:mysql:inparanoid_data;host=floret.cgrb.oregonstate.edu', $username, $password, + { RaiseError=> 1, AutoCommit=>1 } + ) or die "Failed to connect to database: $DBI::errstr"; + + +#### Note to self #### +## To not have multiple values in mysql table, use insert ignore instead of insert. Or use replace. +### also note that for insert ignore to work, must have a "unique" field + +# make the new table to hold the super clusters +my $super_table = "super_clust"; +my $safe_super_table = $dbh->quote_identifier($super_table); + +$dbh->do("drop table if exists $safe_super_table"); +$dbh->do("CREATE TABLE $safe_super_table ( + `super_id` INT( 11 ) NOT NULL , + `species` VARCHAR( 255 ) NOT NULL , + `gene` VARCHAR( 255 ) NOT NULL , + UNIQUE ( `gene` ) + ) TYPE = MYISAM"); +my $insert_sth = $dbh->prepare("insert ignore into $safe_super_table (super_id, species, gene) values (?,?,?)"); + +my $super_id = 0; #initialize the super cluster id +my %super_hash; + +#my @species = ("Maize", "Oryza_sativa", "Ath", "Sorghum", "Brachy"); +#my @species = ("Maize", "Oryza_sativa", "Ath"); +my @species = ("Ath", "Brachy", "C_elegans", "Chlamy", "Danio", "E_coli", "Fragaria", "Glycine", "Human", "Maize", "Mouse", "Neurospora", "Oryza_sativa", "Physcomitreall", "Poplar", "Sacc_cerevisiae", "Sacc_pombe", "Selaginella", "Sorghum", "Synechosystis", "Vitis_vinifera"); +my $num_species = @species; + +my %species_hash; + +for (my $i=0; $i<$num_species-1; $i++) { + for (my $j=$i+1; $j<$num_species; $j++) { + # hashes to store the paralogs and orthologs + my (%species_1_gene_hash, %species_2_gene_hash); + + # make sure the species are listed in alphabetical order to get correct table names + my $species_1 = $species[$i]; + my $species_2 = $species[$j]; + if ($species_1 lt $species_2) { + $species_1 = $species_1; + $species_2 = $species_2; + } else { + my $spec_temp = $species_1; + $species_1 = $species_2; + $species_2 = $spec_temp; + } + + if ($species_1 eq "Glycine") { + $species_1 = "Soy"; + } + + if ($species_2 eq "Glycine") { + $species_2 = "Soy"; + } + + if ($species_1 eq "Vitis_vinifera") { + $species_1 = "Grape"; + } + + if ($species_2 eq "Vitis_vinifera") { + $species_2 = "Grape"; + } + + + my $spec_table = "$species_1" . "_" . "$species_2"; + my $safe_species_table = $dbh->quote_identifier($spec_table); + my $sth = $dbh->prepare("select * from $safe_species_table"); + + my $id_prev = ""; + + my $rv = $sth->execute(); + + # error handling, make sure the table exists + if (!$rv) { + next; + } + + while (my @line = $sth->fetchrow_array()) { + my ($id, $bit_score, $spec, $score, $gene) = @line; + if ($id ne $id_prev) { + if ($spec eq $species_1) { + $species_1_gene_hash{$id} = $gene; + }else { + $species_2_gene_hash{$id} = $gene; + } + }else { + if ($spec eq $species_1) { + if(defined($species_1_gene_hash{$id})) { + $species_1_gene_hash{$id} = "$species_1_gene_hash{$id} $gene"; + } else { + $species_1_gene_hash{$id} = $gene; + } + }else { + if(defined($species_2_gene_hash{$id})) { + $species_2_gene_hash{$id} = "$species_2_gene_hash{$id} $gene"; + } else { + $species_2_gene_hash{$id} = $gene; + } + } + } + $id_prev = $id; + } + $sth->finish(); + + # Each key defines a species pair cluster + foreach my $key (keys %species_1_gene_hash) { + if(defined($species_2_gene_hash{$key})) { + my (@spec_1_array, @spec_2_array); + @spec_1_array = split " ", $species_1_gene_hash{$key}; + @spec_2_array = split " ", $species_2_gene_hash{$key}; + + # if the gene is already in a cluster, use its id # + my $super_temp_id; + + foreach my $super_gene (@spec_1_array) { + if(defined($super_hash{$super_gene})) { + $super_temp_id = $super_hash{$super_gene}; + last; + } + } + + if(!defined($super_temp_id)) { + foreach my $super_gene (@spec_2_array) { + if(defined($super_hash{$super_gene})) { + $super_temp_id = $super_hash{$super_gene}; + last; + } + } + } + + # if none of the genes are in a cluster already, get a new id # + if(!defined($super_temp_id)) { + ++$super_id; + $super_temp_id = $super_id; + } + + # build the hash and put the entries in the database + foreach my $super_gene (@spec_1_array) { + $super_hash{$super_gene} = $super_temp_id; + $insert_sth->execute($super_temp_id,$species_1,$super_gene); + } + foreach my $super_gene (@spec_2_array) { + $super_hash{$super_gene} = $super_temp_id; + $insert_sth->execute($super_temp_id,$species_2,$super_gene); + } + + } + } + + } +} + + -- 2.34.1