still used as of slice 19

author preecej <preecej@localhost>

Wed, 28 Aug 2019 22:37:01 +0000 (22:37 +0000)

committer preecej <preecej@localhost>

Wed, 28 Aug 2019 22:37:01 +0000 (22:37 +0000)
author preecej <preecej@localhost>
Wed, 28 Aug 2019 22:37:01 +0000 (22:37 +0000)
committer preecej <preecej@localhost>
Wed, 28 Aug 2019 22:37:01 +0000 (22:37 +0000)
diff --git a/interactome_scripts/find_ortho_super_PR_current.pl b/interactome_scripts/find_ortho_super_PR_current.pl

new file mode 100755 (executable)

index 0000000..409b634
--- /dev/null
+++ b/interactome_scripts/find_ortho_super_PR_current.pl
@@ -0,0 +1,128 @@
+#!/usr/bin/perl
+
+###########################################################################
+#    Written by Justin Elser 4/14/10                                      #
+#                                                                         #
+#    This program takes an input file with a list of genes and            #
+#      finds the orthologs (and paralogs if the score in sth_get_ortho    #
+#      is changed) from the supercluster table                            #
+#                                                                         #
+###########################################################################
+
+use strict;
+use warnings;
+
+if($#ARGV != 1) {
+       print "usage: find_ortho_super.pl input_gene_list output_file_prefix\n";
+       exit;
+}
+
+use lib "$ENV{HOME}/scripts/jaiswallab/interactome_scripts";
+
+use DbiFloret;
+
+my $dbh = DbiFloret::dbconnect;
+
+print $ARGV[1];
+
+#my @species_array = ("Oryza_sativa",
+my @species_array = ("Oryza_sativa.japonica.IRGSP",
+#my @species_array = ("Oryza_sativa.japonica.MSU",
+       # use db inparanoid_data_plantreactome
+       "Arachis_duramensis", # typo: should be "duranensis", notify JE
+       "Arachis_ipaensis",
+       "Capsicum_annuum",
+       "Cajanus_cajan", # still pulling from inparanoid while Ensembl decides when to bring it back
+       "Cicer_arietinum",
+       "Citrus_sinensis",
+       "Coffea_canephora",
+       "Fragaria_vesca",
+       # "Gossypium_hirsutum", # on hold
+       "Malus_domestica",
+       "Oryza_australiensis",
+       "Oryza_granulata",
+       "Oryza_kasalath",
+       "Oryza_minuta",
+       "Oryza_officinalis",
+       "Phoenix_dactylifera",
+       "Picea_abies",
+       "Pinus_taeda",
+       "Triticum_turgidum"
+       
+       # use slice14 data, not db inparanoid_data_bigset - too big, not in sync with selections from inparanoid_data_plantreactome
+       #"Eucalyptus_grandis",
+       #"Jatropha_curcas",
+       #"Mimulus_guttatus"
+       #
+       # use db inparanoid_data_eucalyptus
+       #"Synechocystis_pcc6803"
+       );
+#my @species_array = ("Oryza_sativa.japonica.IRGSP", $ARGV[1]);
+#my @species_array = ("Ath", "Maize");
+my $spec_array_size = @species_array;
+
+# read in list of genes from csv file given as argument
+my $in_file = $ARGV[0];
+
+open (in_file, "$in_file");
+
+my @in_gene_array;
+
+while(<in_file>) {
+       my $in_gene = $_;
+       chomp $in_gene; 
+       $in_gene =~ s/\s//g;
+       if ($in_gene !~ /\.\d$/) {
+               if ($in_gene =~ /^LOC/) {
+                       $in_gene .= ".1"; # add the suffix back so that the gene matches the db
+               }
+               if ($in_gene =~ /^OS/) {
+                       $in_gene .= "-01"; # for IRGSP
+                       $in_gene =~ s/G/T/g;
+               }
+       }
+       push(@in_gene_array, $in_gene);
+       #print "$in_gene\n";
+       
+}
+close(in_file);
+
+for (my $i = 1; $i<$spec_array_size; $i++) {
+       
+       my $out_file = $ARGV[1] . "_$species_array[$i].txt";
+       open(out_file, ">$out_file");
+       
+       my $table = "super_clust";
+       #print out_file "$species_array[$i]\t$table\n";
+       
+       # set up the db query statement
+       my $sth_get_id = $dbh->prepare("select super_id from $table where gene = ?");
+       
+       # set up db query statement to get the orthologs using the cluster id
+       my $sth_get_ortho = $dbh->prepare("select gene from $table where super_id = ? and species = '$species_array[$i]'");
+       #my $sth_get_ortho = $dbh->prepare("select gene,species from $table where super_id = ?");
+
+       foreach my $gene (@in_gene_array) {
+               
+               my $rv1 = $sth_get_id->execute($gene);
+               if (!$rv1) {
+                       next;
+               }
+               
+               while (my $id = $sth_get_id->fetchrow_array()) {
+
+                       my $rv2 = $sth_get_ortho->execute($id);
+                       if (!$rv2) {
+                               next;
+                       }
+                       
+                       while (my ($ortho, $species) = $sth_get_ortho->fetchrow_array()) {
+                               $gene =~ s/T/G/g;
+                               $gene =~ s/-0\d$//g;
+                               print out_file "$gene\t$ortho\n";
+                       }
+               }
+       }
+       close(out_file);
+}
+
author	preecej <preecej@localhost>
	Wed, 28 Aug 2019 22:37:01 +0000 (22:37 +0000)
committer	preecej <preecej@localhost>
	Wed, 28 Aug 2019 22:37:01 +0000 (22:37 +0000)