Hello!

To see the file structure, click on "tree".

Note that updates take place every 10 minutes, commits may not be seen immediately.
Add script to generate ortholog association files from GO Ath file
authorelserj <elserj@localhost>
Wed, 19 Feb 2020 20:32:46 +0000 (20:32 +0000)
committerelserj <elserj@localhost>
Wed, 19 Feb 2020 20:32:46 +0000 (20:32 +0000)
svn path=/; revision=668

interactome_scripts/association_file_from_orthologs.pl [new file with mode: 0755]
interactome_scripts/association_file_from_orthologs_simple.pl [new file with mode: 0755]

diff --git a/interactome_scripts/association_file_from_orthologs.pl b/interactome_scripts/association_file_from_orthologs.pl
new file mode 100755 (executable)
index 0000000..66e327c
--- /dev/null
@@ -0,0 +1,96 @@
+#!/usr/bin/perl
+
+use strict;
+use warnings;
+
+use Time::localtime;
+my $tm=localtime;
+my ($day,$month,$year)=($tm->mday,$tm->mon,$tm->year);
+my $curr_date = "$year$month$day";
+
+
+# check for arguments and explain usage
+if ($#ARGV !=3) {
+       print "usage: association_file_from_orthologs.pl in_assoc_file gene_name_file ortho_file out_file\n";
+       exit;
+}
+
+# Grab the find_species subroutines to get the species and taxon id
+if(-e "$ENV{HOME}/scripts/jaiswallab/interactome_scripts/find_species.pl") {
+       require "$ENV{HOME}/scripts/jaiswallab/interactome_scripts/find_species.pl";
+}elsif(-e "$ENV{HOME}/bin/find_species.pl") {
+       require "$ENV{HOME}/bin/find_species.pl";
+}
+
+
+my $ingaffile = $ARGV[0];
+my $ingenefile = $ARGV[1];
+my $inorthofile = $ARGV[2];
+my $outfile = $ARGV[3];
+
+# Get species and taxon
+my $species = find_species("$ingaffile");
+my $found_taxon = find_taxon($species);
+
+my %gene_hash;
+# hash to store symbol->gene lookups
+open (INGENEFILE, $ingenefile);
+while(<INGENEFILE>) {
+               my $line = $_;
+               chomp $line;
+               my ($gene, $description, $trembl, $swissprot, $uniprot) = split("\t", $line);
+               if($swissprot ne "") {
+                               $gene_hash{$swissprot}->{'gene'} = $gene;
+                               $gene_hash{$swissprot}->{'description'} = $description;
+               }
+               
+               if($uniprot ne "") {
+                               $gene_hash{$uniprot}->{'gene'} = $gene;
+                               $gene_hash{$uniprot}->{'description'} = $description;
+               }
+               
+}
+close(INGENEFILE);
+
+my %ortho_hash;
+open (INORTHOFILE, $inorthofile);
+while(<INORTHOFILE>) {
+               my $line = $_;
+               chomp $line;
+               next if ($line =~ /^id/);
+               my ($id, $gene, $ortho, $score) = split("\t", $line);
+               
+               if($score > "0.05") {
+                               $ortho_hash{$gene} = $ortho;
+               }
+}
+close(INORTHOFILE);
+               
+
+open (INGAFFILE, $ingaffile);
+open (OUTFILE, ">$outfile");
+
+print OUTFILE "!gaf-version: 2.0\n";
+print OUTFILE "!Created via ortholog similiarity from Arabidopsis thaliana\n";
+while(<INGAFFILE>) {
+               my $line = $_;
+               chomp $line;
+               #skip comments
+               next if ($line =~ /^!/);
+               
+               my ($db,$db_id,$db_symbol,$qual,$ont_id,$db_ref,$ev,$with,$aspect,$db_obj_name,$db_obj_syn,$db_obj_type,$taxon,$date,$assigned_by,$annot_ext,$gp_form_id) = split("\t", $line);
+               #skip with evidence codes IEA and ISS
+               next if ($ev eq "IEA");
+               next if ($ev eq "ISS");
+               #check if symbol found in %gene_hash
+               if(defined($gene_hash{$db_id})) {
+                               if(defined($ortho_hash{$gene_hash{$db_id}->{'gene'}})) {
+                                               my $ortho_gene = $ortho_hash{$gene_hash{$db_id}->{'gene'}};
+                                               my $ortho_desc = $gene_hash{$db_id}->{'description'};
+                                               print OUTFILE "$db\t$ortho_gene\t$ortho_gene\t$qual\t$ont_id\t$db_ref\t$ev\tUniProtKB:$gene_hash{$db_id}->{'gene'}\t$aspect\t$ortho_desc\t$db_obj_syn\t$db_obj_type\ttaxon:$found_taxon\t$curr_date\tJustin_Elser\t$annot_ext\t$gp_form_id\n";
+                               }
+               }
+               
+}
+close(INGAFFILE);
+close(OUTFILE);
diff --git a/interactome_scripts/association_file_from_orthologs_simple.pl b/interactome_scripts/association_file_from_orthologs_simple.pl
new file mode 100755 (executable)
index 0000000..16e7dca
--- /dev/null
@@ -0,0 +1,74 @@
+#!/usr/bin/perl
+
+use strict;
+use warnings;
+
+use Time::localtime;
+my $tm=localtime;
+my ($day,$month,$year)=($tm->mday,$tm->mon,$tm->year);
+$year += 1900;
+$month = sprintf '%02d', $month+1;
+$day = sprintf '%02d', $day;
+my $curr_date = "$year$month$day";
+print "$curr_date\n";
+
+
+
+# check for arguments and explain usage
+if ($#ARGV !=2) {
+       print "usage: association_file_from_orthologs.pl in_assoc_file ortho_file out_file\n";
+       exit;
+
+       
+# Grab the find_species subroutines to get the species and taxon id
+if(-e "$ENV{HOME}/scripts/jaiswallab/interactome_scripts/find_species.pl") {
+       require "$ENV{HOME}/scripts/jaiswallab/interactome_scripts/find_species.pl";
+}elsif(-e "$ENV{HOME}/bin/find_species.pl") {
+       require "$ENV{HOME}/bin/find_species.pl";
+}
+
+
+my $ingaffile = $ARGV[0];
+my $inorthofile = $ARGV[1];
+my $outfile = $ARGV[2];
+
+# Get species and taxon
+my $species = find_species("$ingaffile");
+my $new_taxon = find_taxon($species);
+
+
+my %ortho_hash;
+open (INORTHOFILE, $inorthofile);
+while(<INORTHOFILE>) {
+               my $line = $_;
+               chomp $line;
+               next if ($line =~ /^id/);
+               my ($gene, $ortho) = split("\t", $line);
+
+               $ortho_hash{$ortho} = $gene;
+
+}
+close(INORTHOFILE);
+               
+
+open (INGAFFILE, $ingaffile);
+open (OUTFILE, ">$outfile");
+
+print OUTFILE "!gaf-version: 2.0\n";
+print OUTFILE "!Created via ortholog similiarity from Arabidopsis thaliana\n";
+while(<INGAFFILE>) {
+               my $line = $_;
+               chomp $line;
+               #skip comments
+               next if ($line =~ /^!/);
+               
+               my ($db,$db_id,$db_symbol,$qual,$ont_id,$db_ref,$ev,$with,$aspect,$db_obj_name,$db_obj_syn,$db_obj_type,$taxon,$date,$assigned_by,$annot_ext,$gp_form_id) = split("\t", $line);
+               #check if symbol found in %gene_hash
+               if(defined($ortho_hash{$db_obj_name})) {
+                               my $ortho_gene = $ortho_hash{$db_obj_name};
+                               print OUTFILE "MaizeGDB\t$ortho_gene\t$ortho_gene\t$qual\t$ont_id\tPMID:24919147|PMID:21186353\tISO\tAGI_LocusCode:$db_obj_name\t$aspect\t$ortho_gene\t\t$db_obj_type\ttaxon:$new_taxon\t$curr_date\tPlanteome:Justin_Elser\t$annot_ext\t$gp_form_id\n";
+               }
+               
+}
+close(INGAFFILE);
+close(OUTFILE);