From 36058bce23f8c3bbd84f4dddd424c23943b51f88 Mon Sep 17 00:00:00 2001 From: elserj Date: Wed, 14 Apr 2010 21:19:14 +0000 Subject: [PATCH] Split find_ortho scripts to do it from the super_clust table instead of inparanoid svn path=/; revision=16 --- interactome_scripts/find_ortho_inpara.pl | 11 ++-- interactome_scripts/find_ortho_super.pl | 76 ++++++++++++++++++++++++ 2 files changed, 83 insertions(+), 4 deletions(-) create mode 100755 interactome_scripts/find_ortho_super.pl diff --git a/interactome_scripts/find_ortho_inpara.pl b/interactome_scripts/find_ortho_inpara.pl index 51d9504..08fca80 100755 --- a/interactome_scripts/find_ortho_inpara.pl +++ b/interactome_scripts/find_ortho_inpara.pl @@ -32,13 +32,15 @@ my @in_gene_array; while() { my $in_gene = $_; chomp $in_gene; + $in_gene =~ s/\s//g; $in_gene .= ".1"; # add the suffix back so that the gene matches the db push(@in_gene_array, $in_gene); + #print "$in_gene\n"; } close(in_file); -open(out_file, ">flower_ortho.txt"); +open(out_file, ">Ath_ortho.txt"); for (my $i = 1; $i<$spec_array_size; $i++) { @@ -49,7 +51,7 @@ for (my $i = 1; $i<$spec_array_size; $i++) { my $sth_get_id = $dbh->prepare("select id from $table where gene = ?"); # set up db query statement to get the orthologs using the cluster id - my $sth_get_ortho = $dbh->prepare("select gene from $table where id = ? and species = '$species_array[$i]' and score = '1'"); + my $sth_get_ortho = $dbh->prepare("select gene, score from $table where id = ? and species = '$species_array[$i]' and score >= '0.0'"); foreach my $gene (@in_gene_array) { @@ -64,8 +66,9 @@ for (my $i = 1; $i<$spec_array_size; $i++) { next; } - while (my $ortho = $sth_get_ortho->fetchrow_array()) { - print out_file "$gene\t$ortho\n"; + while (my @line = $sth_get_ortho->fetchrow_array()) { + my ($ortho,$score) = @line; + print out_file "$gene\t$ortho\t$score\n"; } } } diff --git a/interactome_scripts/find_ortho_super.pl b/interactome_scripts/find_ortho_super.pl new file mode 100755 index 0000000..45cbfa9 --- /dev/null +++ b/interactome_scripts/find_ortho_super.pl @@ -0,0 +1,76 @@ +#!/usr/bin/perl + +##################################################################### +# Written by Justin Elser 4/14/10 # +# # +# This program takes an input file with a list of genes and # +# finds the orthologs (and paralogs if the score in sth_get_ortho # +# is changed). # +# # +##################################################################### + +use strict; +use warnings; + +use lib "$ENV{HOME}/scripts/jaiswallab/interactome_scripts"; + +use DbiFloret; + +my $dbh = DbiFloret::dbconnect; + +#my @species_array = ("Ath", "Oryza_sativa", "Sorghum", "Maize", "Glycine"); +my @species_array = ("Ath", "Maize"); +my $spec_array_size = @species_array; + +# read in list of genes from csv file given as argument +my $in_file = $ARGV[0]; + +open (in_file, "$in_file"); + +my @in_gene_array; + +while() { + my $in_gene = $_; + chomp $in_gene; + $in_gene =~ s/\s//g; + $in_gene .= ".1"; # add the suffix back so that the gene matches the db + push(@in_gene_array, $in_gene); + #print "$in_gene\n"; + +} +close(in_file); + +open(out_file, ">Ath_ortho_super.txt"); + +for (my $i = 1; $i<$spec_array_size; $i++) { + + my $table = "super_clust"; + #print out_file "$species_array[$i]\t$table\n"; + + # set up the db query statement + my $sth_get_id = $dbh->prepare("select super_id from $table where gene = ?"); + + # set up db query statement to get the orthologs using the cluster id + my $sth_get_ortho = $dbh->prepare("select gene from $table where super_id = ? and species = '$species_array[$i]'"); + + foreach my $gene (@in_gene_array) { + + my $rv1 = $sth_get_id->execute($gene); + if (!$rv1) { + next; + } + + while (my $id = $sth_get_id->fetchrow_array()) { + my $rv2 = $sth_get_ortho->execute($id); + if (!$rv2) { + next; + } + + while (my $ortho = $sth_get_ortho->fetchrow_array()) { + print out_file "$gene\t$ortho\n"; + } + } + } +} + +close(out_file); -- 2.34.1