From 30b0063c68fa40277d2e183dc26f727f2a8855a3 Mon Sep 17 00:00:00 2001
From: preecej <preecej@localhost>
Date: Wed, 28 Aug 2019 22:37:01 +0000
Subject: [PATCH] still used as of slice 19

svn path=/; revision=667
---
 .../find_ortho_super_PR_current.pl            | 128 ++++++++++++++++++
 1 file changed, 128 insertions(+)
 create mode 100755 interactome_scripts/find_ortho_super_PR_current.pl

diff --git a/interactome_scripts/find_ortho_super_PR_current.pl b/interactome_scripts/find_ortho_super_PR_current.pl
new file mode 100755
index 0000000..409b634
--- /dev/null
+++ b/interactome_scripts/find_ortho_super_PR_current.pl
@@ -0,0 +1,128 @@
+#!/usr/bin/perl
+
+###########################################################################
+#    Written by Justin Elser 4/14/10                                      #
+#                                                                         #
+#    This program takes an input file with a list of genes and            #
+#      finds the orthologs (and paralogs if the score in sth_get_ortho    #
+#      is changed) from the supercluster table                            #
+#                                                                         #
+###########################################################################
+
+use strict;
+use warnings;
+
+if($#ARGV != 1) {
+	print "usage: find_ortho_super.pl input_gene_list output_file_prefix\n";
+	exit;
+}
+
+use lib "$ENV{HOME}/scripts/jaiswallab/interactome_scripts";
+
+use DbiFloret;
+
+my $dbh = DbiFloret::dbconnect;
+
+print $ARGV[1];
+
+#my @species_array = ("Oryza_sativa",
+my @species_array = ("Oryza_sativa.japonica.IRGSP",
+#my @species_array = ("Oryza_sativa.japonica.MSU",
+	# use db inparanoid_data_plantreactome
+	"Arachis_duramensis", # typo: should be "duranensis", notify JE
+	"Arachis_ipaensis",
+	"Capsicum_annuum",
+	"Cajanus_cajan", # still pulling from inparanoid while Ensembl decides when to bring it back
+	"Cicer_arietinum",
+	"Citrus_sinensis",
+	"Coffea_canephora",
+	"Fragaria_vesca",
+	# "Gossypium_hirsutum", # on hold
+	"Malus_domestica",
+	"Oryza_australiensis",
+	"Oryza_granulata",
+	"Oryza_kasalath",
+	"Oryza_minuta",
+	"Oryza_officinalis",
+	"Phoenix_dactylifera",
+	"Picea_abies",
+	"Pinus_taeda",
+	"Triticum_turgidum"
+	
+	# use slice14 data, not db inparanoid_data_bigset - too big, not in sync with selections from inparanoid_data_plantreactome
+	#"Eucalyptus_grandis",
+	#"Jatropha_curcas",
+	#"Mimulus_guttatus"
+	#
+	# use db inparanoid_data_eucalyptus
+	#"Synechocystis_pcc6803"
+	);
+#my @species_array = ("Oryza_sativa.japonica.IRGSP", $ARGV[1]);
+#my @species_array = ("Ath", "Maize");
+my $spec_array_size = @species_array;
+
+# read in list of genes from csv file given as argument
+my $in_file = $ARGV[0];
+
+open (in_file, "$in_file");
+
+my @in_gene_array;
+
+while(<in_file>) {
+	my $in_gene = $_;
+	chomp $in_gene;	
+	$in_gene =~ s/\s//g;
+	if ($in_gene !~ /\.\d$/) {
+		if ($in_gene =~ /^LOC/) {
+			$in_gene .= ".1"; # add the suffix back so that the gene matches the db
+		}
+		if ($in_gene =~ /^OS/) {
+			$in_gene .= "-01"; # for IRGSP
+			$in_gene =~ s/G/T/g;
+		}
+	}
+	push(@in_gene_array, $in_gene);
+	#print "$in_gene\n";
+	
+}
+close(in_file);
+
+for (my $i = 1; $i<$spec_array_size; $i++) {
+	
+	my $out_file = $ARGV[1] . "_$species_array[$i].txt";
+	open(out_file, ">$out_file");
+	
+	my $table = "super_clust";
+	#print out_file "$species_array[$i]\t$table\n";
+	
+	# set up the db query statement
+	my $sth_get_id = $dbh->prepare("select super_id from $table where gene = ?");
+	
+	# set up db query statement to get the orthologs using the cluster id
+	my $sth_get_ortho = $dbh->prepare("select gene from $table where super_id = ? and species = '$species_array[$i]'");
+	#my $sth_get_ortho = $dbh->prepare("select gene,species from $table where super_id = ?");
+
+	foreach my $gene (@in_gene_array) {
+		
+		my $rv1 = $sth_get_id->execute($gene);
+		if (!$rv1) {
+			next;
+		}
+		
+		while (my $id = $sth_get_id->fetchrow_array()) {
+
+			my $rv2 = $sth_get_ortho->execute($id);
+			if (!$rv2) {
+				next;
+			}
+			
+			while (my ($ortho, $species) = $sth_get_ortho->fetchrow_array()) {
+				$gene =~ s/T/G/g;
+				$gene =~ s/-0\d$//g;
+				print out_file "$gene\t$ortho\n";
+			}
+		}
+	}
+	close(out_file);
+}
+
-- 
2.34.1