From 30b0063c68fa40277d2e183dc26f727f2a8855a3 Mon Sep 17 00:00:00 2001 From: preecej Date: Wed, 28 Aug 2019 22:37:01 +0000 Subject: [PATCH] still used as of slice 19 svn path=/; revision=667 --- .../find_ortho_super_PR_current.pl | 128 ++++++++++++++++++ 1 file changed, 128 insertions(+) create mode 100755 interactome_scripts/find_ortho_super_PR_current.pl diff --git a/interactome_scripts/find_ortho_super_PR_current.pl b/interactome_scripts/find_ortho_super_PR_current.pl new file mode 100755 index 0000000..409b634 --- /dev/null +++ b/interactome_scripts/find_ortho_super_PR_current.pl @@ -0,0 +1,128 @@ +#!/usr/bin/perl + +########################################################################### +# Written by Justin Elser 4/14/10 # +# # +# This program takes an input file with a list of genes and # +# finds the orthologs (and paralogs if the score in sth_get_ortho # +# is changed) from the supercluster table # +# # +########################################################################### + +use strict; +use warnings; + +if($#ARGV != 1) { + print "usage: find_ortho_super.pl input_gene_list output_file_prefix\n"; + exit; +} + +use lib "$ENV{HOME}/scripts/jaiswallab/interactome_scripts"; + +use DbiFloret; + +my $dbh = DbiFloret::dbconnect; + +print $ARGV[1]; + +#my @species_array = ("Oryza_sativa", +my @species_array = ("Oryza_sativa.japonica.IRGSP", +#my @species_array = ("Oryza_sativa.japonica.MSU", + # use db inparanoid_data_plantreactome + "Arachis_duramensis", # typo: should be "duranensis", notify JE + "Arachis_ipaensis", + "Capsicum_annuum", + "Cajanus_cajan", # still pulling from inparanoid while Ensembl decides when to bring it back + "Cicer_arietinum", + "Citrus_sinensis", + "Coffea_canephora", + "Fragaria_vesca", + # "Gossypium_hirsutum", # on hold + "Malus_domestica", + "Oryza_australiensis", + "Oryza_granulata", + "Oryza_kasalath", + "Oryza_minuta", + "Oryza_officinalis", + "Phoenix_dactylifera", + "Picea_abies", + "Pinus_taeda", + "Triticum_turgidum" + + # use slice14 data, not db inparanoid_data_bigset - too big, not in sync with selections from inparanoid_data_plantreactome + #"Eucalyptus_grandis", + #"Jatropha_curcas", + #"Mimulus_guttatus" + # + # use db inparanoid_data_eucalyptus + #"Synechocystis_pcc6803" + ); +#my @species_array = ("Oryza_sativa.japonica.IRGSP", $ARGV[1]); +#my @species_array = ("Ath", "Maize"); +my $spec_array_size = @species_array; + +# read in list of genes from csv file given as argument +my $in_file = $ARGV[0]; + +open (in_file, "$in_file"); + +my @in_gene_array; + +while() { + my $in_gene = $_; + chomp $in_gene; + $in_gene =~ s/\s//g; + if ($in_gene !~ /\.\d$/) { + if ($in_gene =~ /^LOC/) { + $in_gene .= ".1"; # add the suffix back so that the gene matches the db + } + if ($in_gene =~ /^OS/) { + $in_gene .= "-01"; # for IRGSP + $in_gene =~ s/G/T/g; + } + } + push(@in_gene_array, $in_gene); + #print "$in_gene\n"; + +} +close(in_file); + +for (my $i = 1; $i<$spec_array_size; $i++) { + + my $out_file = $ARGV[1] . "_$species_array[$i].txt"; + open(out_file, ">$out_file"); + + my $table = "super_clust"; + #print out_file "$species_array[$i]\t$table\n"; + + # set up the db query statement + my $sth_get_id = $dbh->prepare("select super_id from $table where gene = ?"); + + # set up db query statement to get the orthologs using the cluster id + my $sth_get_ortho = $dbh->prepare("select gene from $table where super_id = ? and species = '$species_array[$i]'"); + #my $sth_get_ortho = $dbh->prepare("select gene,species from $table where super_id = ?"); + + foreach my $gene (@in_gene_array) { + + my $rv1 = $sth_get_id->execute($gene); + if (!$rv1) { + next; + } + + while (my $id = $sth_get_id->fetchrow_array()) { + + my $rv2 = $sth_get_ortho->execute($id); + if (!$rv2) { + next; + } + + while (my ($ortho, $species) = $sth_get_ortho->fetchrow_array()) { + $gene =~ s/T/G/g; + $gene =~ s/-0\d$//g; + print out_file "$gene\t$ortho\n"; + } + } + } + close(out_file); +} + -- 2.34.1