--- /dev/null
+#!/usr/bin/perl
+
+###########################################################################
+# Written by Justin Elser 4/14/10 #
+# #
+# This program takes an input file with a list of genes and #
+# finds the orthologs (and paralogs if the score in sth_get_ortho #
+# is changed) from the supercluster table #
+# #
+###########################################################################
+
+use strict;
+use warnings;
+
+if($#ARGV != 1) {
+ print "usage: find_ortho_super.pl input_gene_list output_file_prefix\n";
+ exit;
+}
+
+use lib "$ENV{HOME}/scripts/jaiswallab/interactome_scripts";
+
+use DbiFloret;
+
+my $dbh = DbiFloret::dbconnect;
+
+print $ARGV[1];
+
+#my @species_array = ("Oryza_sativa",
+my @species_array = ("Oryza_sativa.japonica.IRGSP",
+#my @species_array = ("Oryza_sativa.japonica.MSU",
+ # use db inparanoid_data_plantreactome
+ "Arachis_duramensis", # typo: should be "duranensis", notify JE
+ "Arachis_ipaensis",
+ "Capsicum_annuum",
+ "Cajanus_cajan", # still pulling from inparanoid while Ensembl decides when to bring it back
+ "Cicer_arietinum",
+ "Citrus_sinensis",
+ "Coffea_canephora",
+ "Fragaria_vesca",
+ # "Gossypium_hirsutum", # on hold
+ "Malus_domestica",
+ "Oryza_australiensis",
+ "Oryza_granulata",
+ "Oryza_kasalath",
+ "Oryza_minuta",
+ "Oryza_officinalis",
+ "Phoenix_dactylifera",
+ "Picea_abies",
+ "Pinus_taeda",
+ "Triticum_turgidum"
+
+ # use slice14 data, not db inparanoid_data_bigset - too big, not in sync with selections from inparanoid_data_plantreactome
+ #"Eucalyptus_grandis",
+ #"Jatropha_curcas",
+ #"Mimulus_guttatus"
+ #
+ # use db inparanoid_data_eucalyptus
+ #"Synechocystis_pcc6803"
+ );
+#my @species_array = ("Oryza_sativa.japonica.IRGSP", $ARGV[1]);
+#my @species_array = ("Ath", "Maize");
+my $spec_array_size = @species_array;
+
+# read in list of genes from csv file given as argument
+my $in_file = $ARGV[0];
+
+open (in_file, "$in_file");
+
+my @in_gene_array;
+
+while(<in_file>) {
+ my $in_gene = $_;
+ chomp $in_gene;
+ $in_gene =~ s/\s//g;
+ if ($in_gene !~ /\.\d$/) {
+ if ($in_gene =~ /^LOC/) {
+ $in_gene .= ".1"; # add the suffix back so that the gene matches the db
+ }
+ if ($in_gene =~ /^OS/) {
+ $in_gene .= "-01"; # for IRGSP
+ $in_gene =~ s/G/T/g;
+ }
+ }
+ push(@in_gene_array, $in_gene);
+ #print "$in_gene\n";
+
+}
+close(in_file);
+
+for (my $i = 1; $i<$spec_array_size; $i++) {
+
+ my $out_file = $ARGV[1] . "_$species_array[$i].txt";
+ open(out_file, ">$out_file");
+
+ my $table = "super_clust";
+ #print out_file "$species_array[$i]\t$table\n";
+
+ # set up the db query statement
+ my $sth_get_id = $dbh->prepare("select super_id from $table where gene = ?");
+
+ # set up db query statement to get the orthologs using the cluster id
+ my $sth_get_ortho = $dbh->prepare("select gene from $table where super_id = ? and species = '$species_array[$i]'");
+ #my $sth_get_ortho = $dbh->prepare("select gene,species from $table where super_id = ?");
+
+ foreach my $gene (@in_gene_array) {
+
+ my $rv1 = $sth_get_id->execute($gene);
+ if (!$rv1) {
+ next;
+ }
+
+ while (my $id = $sth_get_id->fetchrow_array()) {
+
+ my $rv2 = $sth_get_ortho->execute($id);
+ if (!$rv2) {
+ next;
+ }
+
+ while (my ($ortho, $species) = $sth_get_ortho->fetchrow_array()) {
+ $gene =~ s/T/G/g;
+ $gene =~ s/-0\d$//g;
+ print out_file "$gene\t$ortho\n";
+ }
+ }
+ }
+ close(out_file);
+}
+