# Seems to work fine #
# Version 1.01 - Nov 4 '09 #
# Added support for strawberry #
+# Version 1.1 - April 2010 #
+# Changed to use external subs to find species #
# #
###############################################################
use DBI;
use Term::Screen::ReadLine;
+require "$ENV{HOME}/scripts/jaiswallab/interactome_scripts/find_species.pl";
+
# open the file to be parsed, assume this is all sqltable* files in directory
my @files = glob("sqltable*");
my $clust_table = "clusters";
my %id_hash;
+
+ print "Working on $species_table\n";
# skip isomers that are not .1
next if ($gene =~ /\.[2-9]$/);
+ next if ($gene =~ /\.1[0-9]$/);
+ next if ($gene =~ /\.2[0-9]$/);
+ next if ($gene =~ /\.3[0-9]$/);
+ next if ($gene =~ /\.4[0-9]$/);
+ next if ($gene =~ /\.5[0-9]$/);
+ next if ($gene =~ /\.6[0-9]$/);
+ next if ($gene =~ /\.7[0-9]$/);
+ next if ($gene =~ /\.8[0-9]$/);
+ next if ($gene =~ /\.9[0-9]$/);
+ # assume there are never more than 29 isomers
if(!defined($id_hash{$clust_id})) {
if ($clust_id ne $clust_id_prev) {
$dbh->disconnect;
-sub find_species {
- my $temp = $_[0];
- if ($temp =~ /Arabidopsis/) {
- $temp = "Ath";
- }elsif ($temp =~ /brachypodium/) {
- $temp = "Brachy";
- }elsif ($temp =~ /elegans/) {
- $temp = "C_elegans";
- }elsif ($temp =~ /Chlamy/) {
- $temp = "Chlamy";
- }elsif ($temp =~ /Danio/) {
- $temp = "Danio";
- }elsif ($temp =~ /E\_coli/) {
- $temp = "E_coli";
- }elsif ($temp =~ /Fragaria/) {
- $temp = "Fragaria";
- }elsif ($temp =~ /Glycine/) {
- $temp = "Soy";
- }elsif ($temp =~ /Homo\_sapiens/) {
- $temp = "Human";
- }elsif ($temp =~ /Maize/) {
- $temp = "Maize";
- }elsif ($temp =~ /musculus/) {
- $temp = "Mouse";
- }elsif ($temp =~ /neurospora/) {
- $temp = "Neurospora";
- }elsif ($temp =~ /Oryza\_sativa/) {
- $temp = "Oryza_sativa";
- }elsif ($temp =~ /Physcomit/) {
- $temp = "Physcomitreall";
- }elsif ($temp =~ /Populus/) {
- $temp = "Poplar";
- }elsif ($temp =~ /cerevisiae/) {
- $temp = "Sacc_cerevisiae";
- }elsif ($temp =~ /pombe/) {
- $temp = "Sacc_pombe";
- }elsif ($temp =~ /Selaginella/) {
- $temp = "Selaginella";
- }elsif ($temp =~ /Sorghum/) {
- $temp = "Sorghum";
- }elsif ($temp =~ /Synechosystis/) {
- $temp = "Synechosystis";
- }elsif ($temp =~ /Vitis\_vinifera/) {
- $temp = "Grape";
- }else {
- die "Error: Species can not be found from file name!";
- }
- return $temp;
-}
-
-sub find_gene {
- my $gene_header = $_[0];
- my $species = $_[1];
- my $gene;
- if ($species eq "Ath") {
- my ($name,$gene_id,$chrom,$isomer) = split(/\|/, $gene_header);
- $gene = $isomer;
- }elsif ($species eq "Brachy") {
- $gene = $gene_header;
- }elsif ($species eq "C_elegans") {
- my ($gene_id,$temp) = split(/\|/, $gene_header);
- $gene = $gene_id; #???
- }elsif ($species eq "Chlamy") {
- my ($name,$locus_id,$scaff_id,$temp) = split(/\|/,$gene_header);
- $gene = $locus_id; #???
- }elsif ($species eq "Danio") {
- $gene = $gene_header;
- }elsif ($species eq "E_coli") {
- $gene = $gene_header; #???
- }elsif ($species eq "Fragaria") {
- my ($gene_id, $mrna_id, $method, $length) = split(/\|/, $gene_header);
- $gene = $gene_id;
- }elsif ($species eq "Soy") {
- my ($name,$locus_id,$scaff_id,$isomer) = split(/\|/,$gene_header);
- $gene = $isomer;
- }elsif ($species eq "Human") {
- $gene = $gene_header;
- }elsif ($species eq "Maize") {
- $gene = $gene_header;
- }elsif ($species eq "Mouse") {
- $gene = $gene_header;
- }elsif ($species eq "Neurospora") {
- $gene = $gene_header;
- }elsif ($species eq "Oryza_sativa") {
- my ($isomer,$temp,$type) = split(/\|/,$gene_header);
- $gene = $isomer;
- }elsif ($species eq "Physcomitreall") {
- my ($name,$locus_id,$chrom_id,$prot_id) = split(/\|/,$gene_header);
- $gene = $prot_id; #???
- }elsif ($species eq "Poplar") {
- my ($name,$locus_id,$chrom_id,$prot_id) = split(/\|/,$gene_header);
- $gene = $prot_id; #???
- }elsif ($species eq "Sacc_cerevisiae") {
- $gene = $gene_header;
- }elsif ($species eq "Sacc_pombe") {
- $gene = $gene_header;
- }elsif ($species eq "Selaginella") {
- my ($name,$locus_id,$chrom_id,$prot_id) = split(/\|/,$gene_header);
- $gene = $prot_id; #???
- }elsif ($species eq "Sorghum") {
- my ($name,$locus_id,$scaff_id,$prot_id) = split(/\|/,$gene_header);
- $gene = $prot_id; #???
- }elsif ($species eq "Synechosystis") {
- my ($gene_id,$type,$temp) = split(" ",$gene_header);
- $gene = $gene_id; #???
- }elsif ($species eq "Grape") {
- my ($name,$gene_id,$chrom_id,$id) = split(/\|/,$gene_header);
- $gene = $gene_id; #???
- }else {
- die "Error: Gene id can not be found!";
- }
- return $gene;
-}
-
-
-
-