From d6bc0a0131e3897cef2ab457722c127e1e369c87 Mon Sep 17 00:00:00 2001 From: elserj Date: Fri, 3 Sep 2010 00:26:16 +0000 Subject: [PATCH] Fixed scripts for Mamathas needs, and added better use of vars svn path=/; revision=25 --- interactome_scripts/find_ortho_inpara.pl | 28 +++++++++--- interactome_scripts/find_ortho_super.pl | 57 +++++++++++------------- 2 files changed, 47 insertions(+), 38 deletions(-) diff --git a/interactome_scripts/find_ortho_inpara.pl b/interactome_scripts/find_ortho_inpara.pl index 08fca80..a7876e4 100755 --- a/interactome_scripts/find_ortho_inpara.pl +++ b/interactome_scripts/find_ortho_inpara.pl @@ -12,16 +12,25 @@ use strict; use warnings; +if($#ARGV != 1) { + print "usage: find_ortho_inpara.pl input_gene_list output_file_prefix\n"; + exit; +} + use lib "$ENV{HOME}/scripts/jaiswallab/interactome_scripts"; use DbiFloret; my $dbh = DbiFloret::dbconnect; -#my @species_array = ("Ath", "Oryza_sativa", "Sorghum", "Maize", "Glycine"); -my @species_array = ("Ath", "Maize"); +my @species_array = ("Ath", "Oryza_sativa", "Sorghum", "Maize", "Glycine", "Brachy", "Poplar"); +#my @species_array = ("Ath", "Maize"); my $spec_array_size = @species_array; +# similarity score to determine if we should keep the paralogs. +# 0.0 would keep all paralogs, 1.0 would restrict to strict orthologs +my $sim_score = 0.0; + # read in list of genes from csv file given as argument my $in_file = $ARGV[0]; @@ -33,25 +42,29 @@ while() { my $in_gene = $_; chomp $in_gene; $in_gene =~ s/\s//g; - $in_gene .= ".1"; # add the suffix back so that the gene matches the db + if ($in_gene !~ /\.\d$/) { + $in_gene .= ".1"; # add the suffix back so that the gene matches the db + } push(@in_gene_array, $in_gene); #print "$in_gene\n"; } close(in_file); -open(out_file, ">Ath_ortho.txt"); for (my $i = 1; $i<$spec_array_size; $i++) { + my $out_file = $ARGV[1] . "_$species_array[$i].txt"; + open(out_file, ">$out_file"); + my $table = $species_array[0] . "_" . $species_array[$i]; - print out_file "$species_array[$i]\t$table\n"; + print out_file "gene\tortho_gene\tscore\n"; # set up the db query statement my $sth_get_id = $dbh->prepare("select id from $table where gene = ?"); # set up db query statement to get the orthologs using the cluster id - my $sth_get_ortho = $dbh->prepare("select gene, score from $table where id = ? and species = '$species_array[$i]' and score >= '0.0'"); + my $sth_get_ortho = $dbh->prepare("select gene, score from $table where id = ? and species = '$species_array[$i]' and score >= '$sim_score'"); foreach my $gene (@in_gene_array) { @@ -72,6 +85,7 @@ for (my $i = 1; $i<$spec_array_size; $i++) { } } } + close(out_file); } -close(out_file); + diff --git a/interactome_scripts/find_ortho_super.pl b/interactome_scripts/find_ortho_super.pl index ca7af46..d4a3a51 100755 --- a/interactome_scripts/find_ortho_super.pl +++ b/interactome_scripts/find_ortho_super.pl @@ -1,25 +1,30 @@ #!/usr/bin/perl -##################################################################### -# Written by Justin Elser 4/14/10 # -# # -# This program takes an input file with a list of genes and # -# finds the orthologs (and paralogs if the score in sth_get_ortho # -# is changed). # -# # -##################################################################### +########################################################################### +# Written by Justin Elser 4/14/10 # +# # +# This program takes an input file with a list of genes and # +# finds the orthologs (and paralogs if the score in sth_get_ortho # +# is changed) from the supercluster table # +# # +########################################################################### use strict; use warnings; +if($#ARGV != 1) { + print "usage: find_ortho_super.pl input_gene_list output_file_prefix\n"; + exit; +} + use lib "$ENV{HOME}/scripts/jaiswallab/interactome_scripts"; use DbiFloret; my $dbh = DbiFloret::dbconnect; -#my @species_array = ("Ath", "Oryza_sativa", "Sorghum", "Maize", "Glycine"); -my @species_array = ("Ath", "Maize"); +my @species_array = ("Ath", "Oryza_sativa", "Sorghum", "Maize", "Glycine", "Brachy"); +#my @species_array = ("Ath", "Maize"); my $spec_array_size = @species_array; # read in list of genes from csv file given as argument @@ -33,17 +38,20 @@ while() { my $in_gene = $_; chomp $in_gene; $in_gene =~ s/\s//g; - $in_gene .= ".1"; # add the suffix back so that the gene matches the db + if ($in_gene !~ /\.\d$/) { + $in_gene .= ".1"; # add the suffix back so that the gene matches the db + } push(@in_gene_array, $in_gene); #print "$in_gene\n"; } close(in_file); -open(out_file, ">Ath_ortho_super.txt"); - for (my $i = 1; $i<$spec_array_size; $i++) { + my $out_file = $ARGV[1] . "_$species_array[$i].txt"; + open(out_file, ">$out_file"); + my $table = "super_clust"; #print out_file "$species_array[$i]\t$table\n"; @@ -53,10 +61,7 @@ for (my $i = 1; $i<$spec_array_size; $i++) { # set up db query statement to get the orthologs using the cluster id my $sth_get_ortho = $dbh->prepare("select gene from $table where super_id = ? and species = '$species_array[$i]'"); #my $sth_get_ortho = $dbh->prepare("select gene,species from $table where super_id = ?"); - - # setup query to make sure there is an ortholog gene in the cluster - my $sth_check_ortho = $dbh->prepare("select species from $table where super_id = ? and species = '$species_array[0]'"); - + foreach my $gene (@in_gene_array) { my $rv1 = $sth_get_id->execute($gene); @@ -65,19 +70,9 @@ for (my $i = 1; $i<$spec_array_size; $i++) { } while (my $id = $sth_get_id->fetchrow_array()) { - #my $rv2 = $sth_check_ortho->execute($id); - #if (!$rv2) { - # next; - #} - #my $check = $sth_check_ortho->fetchrow_array(); - #print "$id\t$check\n"; - #if ($check ne "$species_array[0]") { - # print "no match\t\t$id\t$check\n"; - # next; - #} - - my $rv3 = $sth_get_ortho->execute($id); - if (!$rv3) { + + my $rv2 = $sth_get_ortho->execute($id); + if (!$rv2) { next; } @@ -86,6 +81,6 @@ for (my $i = 1; $i<$spec_array_size; $i++) { } } } + close(out_file); } -close(out_file); -- 2.34.1