From 4744191d600850523cb5eaf08bd1a0b175cb2d56 Mon Sep 17 00:00:00 2001 From: elserj Date: Fri, 1 Mar 2019 20:59:22 +0000 Subject: [PATCH] Add new species and subroutines to use for common mart and ensembl downloads svn path=/; revision=659 --- interactome_scripts/find_species.pl | 662 ++++++++++++++-------------- 1 file changed, 340 insertions(+), 322 deletions(-) diff --git a/interactome_scripts/find_species.pl b/interactome_scripts/find_species.pl index 5dcb80d..fd38e36 100755 --- a/interactome_scripts/find_species.pl +++ b/interactome_scripts/find_species.pl @@ -16,6 +16,8 @@ sub find_species { $species = "Aegilops_tauschii"; }elsif ($temp =~ /Amborella\_trichopoda/) { $species = "Amborella_trichopoda"; + }elsif ($temp =~ /Arabidopsis\_halleri/) { + $species = "Arabidopsis_halleri"; }elsif ($temp =~ /Arabidopsis\_lyrata/) { $species = "Arabidopsis_lyrata"; }elsif ($temp =~ /Arabidopsis\_thaliana/) { @@ -24,6 +26,10 @@ sub find_species { $species = "Arachis_duranensis"; }elsif ($temp =~ /Arachis\_ipaensis/) { $species = "Arachis_ipaensis"; + }elsif ($temp =~ /Asparagus\_officinalis/) { + $species = "Asparagus_officinalis"; + }elsif ($temp =~ /Azolla/) { + $species = "Azolla_filiculoides"; }elsif ($temp =~ /Batrachochytrium/) { $species = "Batrachochytrium_dendrobatidis"; }elsif ($temp =~ /Brachypodium\_distachyon/) { @@ -42,7 +48,9 @@ sub find_species { $species = "Caenorhabditis_elegans"; }elsif ($temp =~ /Cajanus\_cajan/) { $species = "Cajanus_cajan"; - }elsif ($temp =~ /Cannabis/) { + }elsif ($temp =~ /Cannabis\_sativa\.CRBRx/) { + $species = "Cannabis_sativa.CRBRx"; + }elsif ($temp =~ /Cannabis\_sativa\.Purple/) { $species = "Cannabis_sativa.Purple.Kush"; }elsif ($temp =~ /Capsella/) { $species = "Capsella_rubella"; @@ -66,6 +74,8 @@ sub find_species { $species = "Cavia_porcellus"; }elsif ($temp =~ /Chlamy/) { $species = "Chlamydomonas_reinhardtii"; + }elsif ($temp =~ /Chondrus/) { + $species = "Chondrus_crispus"; }elsif ($temp =~ /Cicer\_arietinum/) { $species = "Cicer_arietinum"; }elsif ($temp =~ /Citrullus\_lanatus/) { @@ -76,6 +86,10 @@ sub find_species { $species = "Citrus_sinensis"; }elsif ($temp =~ /Coffea\_canephora/) {; $species = "Coffea_canephora"; + }elsif ($temp =~ /Corchorus\_capsularis/) { + $species = "Corchorus_capsularis"; + }elsif ($temp =~ /Corchorus\_olitorius/) { + $species = "Corchorus_olitorius"; }elsif ($temp =~ /Cucumis\_sativus/) { $species = "Cucumis_sativus"; }elsif ($temp =~ /Danio/) { @@ -86,6 +100,8 @@ sub find_species { $species = "Dipodomys_ordii"; }elsif ($temp =~ /Drosophila/) { $species = "Drosophila_melanogaster"; + }elsif ($temp =~ /Durio/) { + $species = "Durio_zibethinus"; }elsif ($temp =~ /Echinops_telfairi/) { $species = "Echinops_telfairi"; }elsif ($temp =~ /Ectocarpus/) { @@ -100,6 +116,8 @@ sub find_species { $species = "Epichloe\_festucae"; }elsif ($temp =~ /Erinaceus_europaeus/) { $species = "Erinaceus_europaeus"; + }elsif ($temp =~ /Erythranthe/) { + $species = "Erythranthe_guttata"; }elsif ($temp =~ /Escherichia\_coli/) { $species = "Escherichia_coli"; }elsif ($temp =~ /Eucalyptus\_grandis/) { @@ -126,6 +144,10 @@ sub find_species { $species = "Homo_sapiens"; }elsif ($temp =~ /Hordeum\_vulgare/) { $species = "Hordeum_vulgare"; + }elsif ($temp =~ /Humulus\_lupulus\.haplotig/) { + $species = "Humulus_lupulus.haplotig"; + }elsif ($temp =~ /Humulus\_lupulus\.primary/) { + $species = "Humulus_lupulus.primary"; }elsif ($temp =~ /Ictidomys_tridecemlineatus/) { $species = "Ictidomys_tridecemlineatus"; }elsif ($temp =~ /Jatropha/) { @@ -148,6 +170,8 @@ sub find_species { $species = "Manihot_esculenta"; }elsif ($temp =~ /Medicago/) { $species = "Medicago_truncatula"; + }elsif ($temp =~ /Mentha\_longifolia/) { + $species = "Mentha_longifolia"; }elsif ($temp =~ /Mimulus/) { $species = "Mimulus_guttatus"; }elsif ($temp =~ /Mus\_musculus/) { @@ -162,8 +186,12 @@ sub find_species { $species = "Nelumbo_nucifera"; }elsif ($temp =~ /Neurospora/) { $species = "Neurospora_crassa"; + }elsif ($temp =~ /Nicotiana/) { + $species = "Nicotiana_attenuata"; }elsif ($temp =~ /Nostoc/) { - $species = "Nostoc_punctiforme"; + $species = "Nostoc_punctiforme.pcc.73102"; + }elsif ($temp =~ /Ocimum/) { + $species = "Ocimum_tenuiflorum"; }elsif ($temp =~ /Oncorhynchus\_mykiss/) { $species = "Oncorhynchus\_mykiss"; }elsif ($temp =~ /Ornithorhynchus_anatinus/) { @@ -178,8 +206,8 @@ sub find_species { $species = "Oryza_brachyantha"; }elsif ($temp =~ /Oryza\_glaberrima/) { $species = "Oryza_glaberrima"; - }elsif ($temp =~ /Oryza\_glumaepatula/) { - $species = "Oryza_glumaepatula"; + }elsif ($temp =~ /Oryza\_glumipatula/) { + $species = "Oryza_glumipatula"; }elsif ($temp =~ /Oryza\_granulata/) { $species = "Oryza_granulata"; }elsif ($temp =~ /Oryza\_kasalath/) { @@ -198,10 +226,12 @@ sub find_species { $species = "Oryza_punctata"; }elsif ($temp =~ /Oryza\_rufipogon/) { $species = "Oryza_rufipogon"; - }elsif ($temp =~ /Oryza\_sativa.indica\.gramene/) { - $species = "Oryza_sativa.indica.gramene"; - }elsif ($temp =~ /Oryza\_sativa.indica\.iplant/) { - $species = "Oryza_sativa.indica.iplant"; + }elsif ($temp =~ /Oryza\_sativa\.AUS\.N22/) { + $species = "Oryza_sativa.AUS.N22"; + }elsif ($temp =~ /Oryza\_sativa.indica\_/) { + $species = "Oryza_sativa.indica"; + }elsif ($temp =~ /Oryza\_sativa.indica\.9311/) { + $species = "Oryza_sativa.indica.9311.OGE"; }elsif ($temp =~ /Oryza\_sativa.indica.IR29/) { $species = "Oryza_sativa.indica.IR29"; }elsif ($temp =~ /Oryza\_sativa.indica.pokkali/) { @@ -210,6 +240,8 @@ sub find_species { $species = "Oryza_sativa.japonica.IRGSP"; }elsif ($temp =~ /Oryza\_sativa.japonica.MSU/) { $species = "Oryza_sativa.japonica.MSU"; + }elsif ($temp =~ /Oryza\_sativa.japonica.OGE/) { + $species = "Oryza_sativa.japonica.OGE"; }elsif ($temp =~ /Oryza\_sativa.japonica\_iplant/) { $species = "Oryza_sativa.japonica.iplant"; }elsif ($temp =~ /Oryzias\_latipes/) { @@ -236,8 +268,8 @@ sub find_species { $species = "Pinus_taeda"; }elsif ($temp =~ /Populus\_trichocarpa\.ver2/) { $species = "Populus_trichocarpa.ver2"; - }elsif ($temp =~ /Populus\_trichocarpa\.ver3/) { - $species = "Populus_trichocarpa.ver3"; + }elsif ($temp =~ /Populus\_trichocarpa/) { + $species = "Populus_trichocarpa"; }elsif ($temp =~ /Procavia_capensis/) { $species = "Procavia_capensis"; }elsif ($temp =~ /Prunus/) { @@ -252,10 +284,12 @@ sub find_species { $species = "Ricinus_communis"; }elsif ($temp =~ /cerevisiae/) { $species = "Saccharomyces_cerevisiae"; - }elsif ($temp =~ /Salvia_hispancia\.salba/) { - $species = "Salvia_hispancia.salba"; + }elsif ($temp =~ /Salvia_hispanica/) { + $species = "Salvia_hispanica"; }elsif ($temp =~ /Salvia_splendens/) { $species = "Salvia_splendens"; + }elsif ($temp =~ /Salvinia\_cucullata/) { + $species = "Salvinia_cucullata"; }elsif ($temp =~ /pombe/) { $species = "Schizosaccharomyces_pombe"; }elsif ($temp =~ /Selaginella/) { @@ -271,7 +305,7 @@ sub find_species { }elsif ($temp =~ /Sorghum/) { $species = "Sorghum_bicolor"; }elsif ($temp =~ /Synechocystis/) { - $species = "Synechocystis_pcc6803"; + $species = "Synechocystis_sp.pcc.6803"; }elsif ($temp =~ /Takifugu/) { $species = "Takifugu\_rubripes"; }elsif ($temp =~ /Tarsius_syrichta/) { @@ -286,6 +320,8 @@ sub find_species { $species = "Trichodesmium_erythraeum"; }elsif ($temp =~ /Triticum\_aestivum/) { $species = "Triticum_aestivum"; + }elsif ($temp =~ /Triticum\_dicoccoides/) { + $species = "Triticum_dicoccoides"; }elsif ($temp =~ /Triticum\_monococcum\.DV92/) { $species = "Triticum_monococcum.DV92"; }elsif ($temp =~ /Triticum\_monococcum\.G3116/) { @@ -294,10 +330,20 @@ sub find_species { $species = "Triticum_turgidum"; }elsif ($temp =~ /Triticum\_urartu/) { $species = "Triticum_urartu"; + }elsif ($temp =~ /Utricularia/) { + $species = "Utricularia_gibba"; + }elsif ($temp =~ /Vigna/) { + $species = "Vigna_radiata"; }elsif ($temp =~ /Vitis\_vinifera/) { $species = "Vitis_vinifera"; }elsif ($temp =~ /Zea\_mays/) { $species = "Zea_mays"; + }elsif ($temp =~/Zoysia_japonica/) { + $species = "Zoysia_japonica.Nagirizaki"; + }elsif ($temp =~/Zoysia_matrella/) { + $species = "Zoysia_matrella.Wakaba"; + }elsif ($temp =~/Zoysia_pacifica/) { + $species = "Zoysia_pacifica.Zanpa"; }else { die "Error: Species can not be found from file name $temp!"; } @@ -310,28 +356,35 @@ sub find_gene { my $species = $_[1]; my $gene; if ($species eq "Aegilops_tauschii") { - my ($gene_id, $type, $location, $info) = split(/\s/, $gene_header); - $gene = $gene_id; + $gene = ensembl_gene($gene_header); }elsif ($species eq "Amborella_trichopoda") { - $gene = $gene_header; + $gene = ensembl_gene($gene_header); + }elsif ($species eq "Arabidopsis_halleri") { + $gene = ensembl_gene($gene_header); }elsif ($species eq "Arabidopsis_lyrata") { - my ($name,$gene_id) = split(/\|/, $gene_header); - $gene = $gene_id; + $gene = ensembl_gene($gene_header); }elsif ($species eq "Arabidopsis_thaliana") { - my ($gene_id,$isomer) = split(/\|/, $gene_header); - $gene = $isomer; + $gene = ensembl_gene($gene_header); }elsif ($species eq "Arachis_duranensis") { - my ($gene_id, $desc) = split(/\s/, $gene_header); - $gene = $gene_id; + my ($gene_info, $desc) = split(/\s/, $gene_header); + my ($spec, $random, $meth, $type, $spec2, $id, $transcript_no) = split(/\./, $gene_info); + $gene = "$spec2" . "." . "$id" . "." . "$transcipt_no"; + }elsif ($species eq "Arachis_ipaensis") { + my ($gene_info, $desc) = split(/\s/, $gene_header); + my ($spec, $random, $meth, $type, $spec2, $id, $transcript_no) = split(/\./, $gene_info); + $gene = "$spec2" . "." . "$id" . "." . "$transcipt_no"; }elsif ($species eq "Arachis_ipaensis") { my ($gene_id, $desc) = split(/\s/, $gene_header); $gene = $gene_id; + }elsif ($species eq "Asparagus_officinalis") { + $gene = mart_gene($gene_header); + }elsif ($species eq "Azolla_filiculoides") { + $gene = $gene_header; }elsif ($species eq "Batrachochytrium_dendrobatidis") { my ($transcript,$gene_id,$info) = split(/\s\|\s/, $gene_header); $gene = $transcript; }elsif ($species eq "Brachypodium_distachyon") { - my ($gene_id,$isomer) = split(/\|/, $gene_header); - $gene = $isomer; + $gene = ensembl_gene($gene_header); }elsif ($species eq "Brachypodium_sylvaticum.Corv") { my ($gene_id,$temp,$start,$end) = split(/\|/, $gene_header); $gene = $gene_id; @@ -341,29 +394,28 @@ sub find_gene { my ($gene_id,$temp,$start,$end) = split(/\|/, $gene_header); $gene = $gene_id; }elsif ($species eq "Brassica_oleracea") { - my ($protein,$type,$chrom,$gene_id,$transcript) = split(/\s/,$gene_header); - $gene = $protein; + $gene = ensembl_gene($gene_header); }elsif ($species eq "Brassica_rapa") { - my ($gene_id,$transcript) = split(/\|/, $gene_header); - $gene = $transcript; #???? + $gene = ensembl_gene($gene_header); }elsif ($species eq "Caenorhabditis_elegans") { - my ($peptide,$temp,$chrom,$gene_id,$transcript,$gene_type,$transcript_type) = split(/\s/,$gene_header); - $transcript =~ s/transcript\://; - $gene = $transcript; + $gene = ensembl_gene($gene_header); }elsif ($species eq "Cajanus_cajan") { - my ($gene_id,$type,$locus,$temp) = split(/\s/, $gene_header); - $gene = $gene_id; + my ($gene_info, $desc) = split(/\s/, $gene_header); + my ($spec, $random, $meth, $type, $spec2, $spec_gene, $transcript_no) = split(/\./, $gene_info); + $gene = "$spec2" . "." . "$spec_gene" . "." . "$transcript_no"; }elsif ($species eq "Cannabis_sativa.Purple.Kush") { my ($gene_id, $strand, $start, $stop) = split(/\|/, $gene_header); $gene = $gene_id; + }elsif ($species eq "Cannabis_sativa.CRBRx") { + my ($isoform, $gene_id, $tempj) = split(/\s/, $gene_header); + $gene = $isoform; }elsif ($species eq "Capsella_rubella") { - my ($gene_id, $transcript) = split(/\|/, $gene_header); - $gene = $transcript; + $gene = mart_gene($gene_header); }elsif ($species eq "Capsicum_annuum") { - $gene = $gene_header; - }elsif ($species eq "Carica_papaya") { - my ($contig,$gene_id) = split(/\|/, $gene_header); + my ($gene_id, $temp) = split(/\|/, $gene_header); $gene = $gene_id; + }elsif ($species eq "Carica_papaya") { + $gene = mart_gene($gene_header); }elsif ($species eq "Castor_canadensis") { my ($gene_info,$type,$gc,$gene_length) = split(/\s/, $gene_header); my ($gene_number,$gene_id) = split(/::/, $gene_info); @@ -390,30 +442,30 @@ sub find_gene { my ($isoform, $type, $scaffold, $temp_gene, $transcript, $gene_biotype, $transcript_biotype, $gene_symbol, $description) = split(/\s/, $gene_header); $gene = $isoform; }elsif ($species eq "Chlamydomonas_reinhardtii") { - my ($locus,$transcript) = split(/\|/,$gene_header); - $gene = $transcript; #??? + $gene = ensembl_gene($gene_header); + }elsif ($species eq "Chondrus_crispus") { + $gene = ensembl_gene($gene_header); }elsif ($species eq "Cicer_arietinum") { - my ($gi,$number,$ref,$gene_id,$function) = split(/\|/, $gene_header); - $gene = $gene_id; + $gene = mart_gene($gene_header); }elsif ($species eq "Citrullus_lanatus") { my ($gene_id, $type, $locus, $method) = split(/\s/, $gene_header); $gene = $gene_id; }elsif ($species eq "Citrus_clementina") { - my ($transcript_id,$gene_id) = split(/\|/,$gene_header); - $gene = $gene_id; + $gene = mart_gene($gene_header); }elsif ($species eq "Citrus_sinensis") { - my ($transcript_id,$gene_id) = split(/\|/,$gene_header); - $gene = $gene_id; + $gene = mart_gene($gene_header); }elsif ($species eq "Coffea_canephora") { my ($gene_id, $function) = split(/\s/,$gene_header); $gene = $gene_id; + }elsif ($species eq "Corchorus_capsularis") { + $gene = ensembl_gene($gene_header); + }elsif ($species eq "Corchorus_olitorius") { + my ($type, $gene_id, $gene_id_suffix, $function) = split(/[\|\s]+/, $gene_header); + $gene = $gene_id; }elsif ($species eq "Cucumis_sativus") { - my ($gene_id,$isomer) = split(/\|/,$gene_header); - $gene = $isomer; + $gene = ensembl_gene($gene_header); }elsif ($species eq "Danio_rerio") { - my ($peptide,$temp,$chrom,$gene_id,$transcript,$gene_type,$transcript_type) = split(/\s/,$gene_header); - $transcript =~ s/transcript\://; - $gene = $transcript; + $gene = ensembl_gene($gene_header); }elsif ($species eq "Dasypus_novemcinctus") { my ($isoform, $type, $scaffold, $temp_gene, $transcript, $gene_biotype, $transcript_biotype, $gene_symbol, $description) = split(/\s/, $gene_header); $gene = $isoform; @@ -421,9 +473,10 @@ sub find_gene { my ($isoform, $type, $scaffold, $temp_gene, $transcript, $gene_biotype, $transcript_biotype, $gene_symbol, $description) = split(/\s/, $gene_header); $gene = $isoform; }elsif ($species eq "Drosophila_melanogaster") { - my ($peptide,$temp,$chrom,$gene_id,$transcript,$gene_type,$transcript_type) = split(/\s/,$gene_header); - $transcript =~ s/transcript\://; - $gene = $transcript; + $gene = ensembl_gene($gene_header); + }elsif ($species eq "Durio_zibethinus") { + my ($gene_id, $type, $idk, $idk2, $quality) = split(/\s/, $gene_header); + $gene = $gene_id; }elsif ($species eq "Echinops_telfairi") { my ($isoform, $type, $scaffold, $temp_gene, $transcript, $gene_biotype, $transcript_biotype, $gene_symbol, $description) = split(/\s/, $gene_header); $gene = $isoform; @@ -442,15 +495,14 @@ sub find_gene { }elsif ($species eq "Erinaceus_europaeus") { my ($isoform, $type, $scaffold, $temp_gene, $transcript, $gene_biotype, $transcript_biotype, $gene_symbol, $description) = split(/\s/, $gene_header); $gene = $isoform; + }elsif ($species eq "Erythranthe_guttata") { + $gene = mart_gene($gene_header); }elsif ($species eq "Escherichia_coli") { $gene = $gene_header; #??? }elsif ($species eq "Eucalyptus_grandis") { - my ($gene_id,$transcript_id) = split(/\|/,$gene_header); - $gene = $transcript_id; + $gene = mart_gene($gene_header); }elsif ($species eq "Fragaria_vesca") { - my ($gene_id, $mrna_id) = split(/\|/, $gene_header); - $gene_id =~ s/\-v1\.0\-hybrid//; - $gene = $gene_id; + $gene = mart_gene($gene_header); }elsif ($species eq "Fusarium_graminearum") { my ($transcript, $gene_id, $info) = split(/\s\|\s/, $gene_header); $gene = $transcript; @@ -469,20 +521,19 @@ sub find_gene { $transcript =~ s/transcript\://; $gene = $transcript; }elsif ($species eq "Glycine_max") { - my ($locus_id,$isomer) = split(/\|/,$gene_header); - $gene = $isomer; + $gene = ensembl_gene($gene_header); }elsif ($species eq "Gossypium_raimondii") { - my ($locus_id,$isomer) = split(/\|/,$gene_header); - $gene = $isomer; + $gene = ensembl_gene($gene_header); }elsif ($species eq "Gossypium_hirsutum") { $gene = $gene_header; }elsif ($species eq "Homo_sapiens") { - my ($isoform, $type, $scaffold, $temp_gene, $transcript, $gene_biotype, $transcript_biotype, $gene_symbol, $description) = split(/\s/, $gene_header); - $gene = $isoform + $gene = ensembl_gene($gene_header); }elsif ($species eq "Hordeum_vulgare") { - my ($peptide,$temp,$chrom,$gene_id,$transcript,$gene_type,$transcript_type) = split(/\s/,$gene_header); - $transcript =~ s/transcript\://; - $gene = $transcript; + $gene = ensembl_gene($gene_header); + }elsif ($species eq "Humulus_lupulus.haplotig") { + $gene = $gene_header; + }elsif ($species eq "Humulus_lupulus.primary") { + $gene = $gene_header; }elsif ($species eq "Ictidomys_tridecemlineatus") { my ($isoform, $type, $scaffold, $temp_gene, $transcript, $gene_biotype, $transcript_biotype, $gene_symbol, $description) = split(/\s/, $gene_header); $gene = $isoform @@ -496,11 +547,9 @@ sub find_gene { my ($source,$spec,$gene_id,$scaffold) = split(/\|/, $gene_header); $gene = $gene_id; }elsif ($species eq "Leersia_perrieri") { - my ($gene_id,$chrom,$program,$function,$type) = split(/[\|\s]+/, $gene_header); - $gene = $gene_id; + $gene = ensembl_gene($gene_header); }elsif ($species eq "Linum_usitatissimum") { - my ($transcript,$gene_id) = split(/\|/, $gene_header); - $gene = $gene_id; + $gene = mart_gene($gene_header); }elsif ($species eq "Macropus_eugenii") { my ($isoform, $type, $scaffold, $temp_gene, $transcript, $gene_biotype, $transcript_biotype, $gene_symbol, $description) = split(/\s/, $gene_header); $gene = $isoform @@ -508,23 +557,20 @@ sub find_gene { my ($transcript,$gene_id,$temp) = split(/\s\|\s/, $gene_header); $gene = $transcript; }elsif ($species eq "Malus_domestica" ) { - my ($gene_id1,$gene_id2) = split(/\|/, $gene_header); - $gene = $gene_id2; + $gene = mart_gene($gene_header); }elsif ($species eq "Manihot_esculenta") { - my ($transcript,$gene_id) = split(/\|/, $gene_header); - $gene = $gene_id; + $gene = ensembl_gene($gene_header); }elsif ($species eq "Medicago_truncatula") { - my ($gene_id,$transcript) = split(/\|/, $gene_header); - $gene = $transcript; + $gene = ensembl_gene($gene_header); + }elsif ($species eq "Mentha_longifolia") { + $gene = $gene_header; }elsif ($species eq "Mimulus_guttatus") { my ($gene_id,$transcript) = split(/\|/, $gene_header); $gene = $transcript; }elsif ($species eq "Mus_musculus") { - my ($isoform, $type, $scaffold, $temp_gene, $transcript, $gene_biotype, $transcript_biotype, $gene_symbol, $description) = split(/\s/, $gene_header); - $gene = $isoform + $gene = ensembl_gene($gene_header); }elsif ($species eq "Musa_acuminata") { - my ($protein,$type,$chrom,$gene_id,$transcript) = split(/\s/,$gene_header); - $gene = $protein; + $gene = ensembl_gene($gene_header); }elsif ($species eq "Mustela_putorius.furo") { my ($isoform, $type, $scaffold, $temp_gene, $transcript, $gene_biotype, $transcript_biotype, $gene_symbol, $description) = split(/\s/, $gene_header); $gene = $isoform; @@ -538,9 +584,13 @@ sub find_gene { }elsif ($species eq "Neurospora_crassa") { my ($transcript,$gene_id,$temp) = split(/\s\|\s/,$gene_header); $gene = $transcript; - }elsif ($species eq "Nostoc_punctiforme") { - my ($temp,$transcript,$source,$gene_id,$func) = split(/\|/, $gene_header); - $gene = $transcript; + }elsif ($species eq "Nicotiana_attenuata") { + $gene = ensembl_gene($gene_header); + }elsif ($species eq "Nostoc_punctiforme.pcc.73102") { + $gene = ensembl_gene($gene_header); + }elsif ($species eq "Ocimum_tenuiflorum") { + my ($gene_id, $number) = split(/\|/, $gene_header); + $gene = $gene_id; }elsif ($species eq "Oncorhynchus_mykiss") { my ($gi, $number, $type, $gene_id, $function) = split(/\|/,$gene_header); $gene = $number; @@ -554,50 +604,40 @@ sub find_gene { my ($gene_id, $strand, $start, $stop) = split(/\|/, $gene_header); $gene = $gene_id; }elsif ($species eq "Oryza_barthii") { - my ($gene_id,$chrom,$program,$function,$type) = split(/[\|\s]+/, $gene_header); - $gene = $gene_id; + $gene = ensembl_gene($gene_header); }elsif ($species eq "Oryza_brachyantha") { - my ($gene_id,$chrom,$program,$function,$type) = split(/[\|\s]+/, $gene_header); - $gene = $gene_id; + $gene = ensembl_gene($gene_header); }elsif ($species eq "Oryza_glaberrima") { - my ($gene_id,$chrom,$program,$function,$type) = split(/[\|\s]+/, $gene_header); - $gene = $gene_id; - }elsif ($species eq "Oryza_glumaepatula") { - my ($gene_id,$chrom,$program,$function,$type) = split(/[\|\s]+/, $gene_header); - $gene = $gene_id; + $gene = ensembl_gene($gene_header); + }elsif ($species eq "Oryza_glumipatula") { + $gene = ensembl_gene($gene_header); }elsif ($species eq "Oryza_granulata") { my ($gene_id, $strand, $start, $stop) = split(/\|/, $gene_header); $gene = $gene_id; }elsif ($species eq "Oryza_kasalath") { $gene = $gene_header; }elsif ($species eq "Oryza_longistaminata") { - my ($gene_id,$chrom,$program,$function,$type) = split(/[\|\s]+/, $gene_header); - $gene = $gene_id; + $gene = ensembl_gene($gene_header); }elsif ($species eq "Oryza_meridionalis") { - my ($gene_id,$chrom,$program,$function,$type) = split(/[\|\s]+/, $gene_header); - $gene = $gene_id; + $gene = ensembl_gene($gene_header); }elsif ($species eq "Oryza_minuta") { my ($gene_id, $strand, $start, $stop) = split(/\|/, $gene_header); $gene = $gene_id; }elsif ($species eq "Oryza_nivara") { - my ($gene_id,$chrom,$program,$function,$type) = split(/[\|\s]+/, $gene_header); - $gene = $gene_id; + $gene = ensembl_gene($gene_header); }elsif ($species eq "Oryza_officinalis") { my ($gene_id, $strand, $start, $stop) = split(/\|/, $gene_header); $gene = $gene_id; }elsif ($species eq "Oryza_punctata") { - my ($gene_id,$chrom,$program,$function,$type) = split(/[\|\s]+/, $gene_header); - $gene = $gene_id; + $gene = ensembl_gene($gene_header); }elsif ($species eq "Oryza_rufipogon") { - my ($gene_id,$chrom,$program,$function,$type) = split(/[\|\s]+/, $gene_header); - $gene = $gene_id; - }elsif ($species eq "Oryza_sativa.indica.iplant") { - my ($gene_id,$chrom,$program,$function,$type) = split(/[\|\s]+/, $gene_header); - $gene = $gene_id; - }elsif ($species eq "Oryza_sativa.indica.gramene") { - my ($peptide,$temp,$chrom,$gene_id,$transcript,$gene_type,$transcript_type) = split(/\s/,$gene_header); - $transcript =~ s/transcript\://; - $gene = $transcript; + $gene = ensembl_gene($gene_header); + }elsif ($species eq "Oryza_sativa.indica") { + $gene = ensembl_gene($gene_header); + }elsif ($species eq "Oryza_sativa.AUS.N22") { + $gene = $gene_header; + }elsif ($species eq "Oryza_sativa.indica.9311.OGE") { + $gene = $gene_header; }elsif ($species eq "Oryza_sativa.indica.IR29") { my ($gene_id,$position, $start, $length) = split(/\|/, $gene_header); $gene = $gene_id; @@ -608,12 +648,12 @@ sub find_gene { my ($gene_id,$chrom,$program,$function,$type) = split(/[\|\s]+/, $gene_header); $gene = $gene_id; }elsif ($species eq "Oryza_sativa.japonica.IRGSP") { - my ($peptide,$temp,$chrom,$gene_id,$transcript,$gene_type,$transcript_type) = split(/\s/,$gene_header); - $transcript =~ s/transcript\://; - $gene = $transcript; + $gene = ensembl_gene($gene_header); }elsif ($species eq "Oryza_sativa.japonica.MSU") { - my ($gene_id,$transcript) = split(/\|/,$gene_header); - $gene = $transcript; + my ($gene_id,$function) = split(/\s/,$gene_header); + $gene = $gene_id; + }elsif ($species eq "Oryza_sativa.japonica.OGE") { + $gene = $gene_header; }elsif ($species eq "Oryzias_latipes") { my ($peptide,$temp,$chrom,$gene_id,$transcript,$gene_type,$transcript_type) = split(/\s/,$gene_header); $transcript =~ s/transcript\://; @@ -625,8 +665,7 @@ sub find_gene { my ($isoform, $type, $scaffold, $temp_gene, $transcript, $gene_biotype, $transcript_biotype, $gene_symbol, $description) = split(/\s/, $gene_header); $gene = $isoform; }elsif ($species eq "Phaseolus_vulgaris") { - my ($gene_id, $transcript) = split(/\|/,$gene_header); - $gene = $transcript; + $gene = ensembl_gene($gene_header); }elsif ($species eq "Pediculus_humanus") { my ($gene_pa,$func,$func2,$unknown,$gene_id) = split(/[\|\s+]/, $gene_header); $gene = $gene_pa; @@ -637,8 +676,7 @@ sub find_gene { my ($gene_id, $gene_model, $location, $dot, $strand, $dot2, $info) = split (/\|/, $gene_header); $gene = $gene_id; }elsif ($species eq "Physcomitrella_patens") { - my ($gene_id,$transcript_id) = split(/\|/,$gene_header); - $gene = $transcript_id; + $gene = ensembl_gene($gene_header); }elsif ($species eq "Phytophthora_infestans") { my ($transcript, $gene_id, $function) = split(/\s\|\s/,$gene_header); $gene = $transcript; @@ -651,60 +689,48 @@ sub find_gene { }elsif ($species eq "Populus_trichocarpa.ver2") { my ($gene_id,$transcript_id) = split(/\|/,$gene_header); $gene = $transcript_id; - }elsif ($species eq "Populus_trichocarpa.ver3") { - my ($gene_id,$transcript_id) = split(/\|/,$gene_header); - $gene = $transcript_id; + }elsif ($species eq "Populus_trichocarpa") { + $gene = ensembl_gene($gene_header); }elsif ($species eq "Procavia_capensis") { my ($isoform, $type, $scaffold, $temp_gene, $transcript, $gene_biotype, $transcript_biotype, $gene_symbol, $description) = split(/\s/, $gene_header); $gene = $isoform; }elsif ($species eq "Prunus_persica") { - my ($transcript,$gene_id) = split(/\|/,$gene_header); - $gene = $gene_id; + $gene = ensembl_gene($gene_header); }elsif ($species eq "Pteropus_vampyrus") { my ($isoform, $type, $scaffold, $temp_gene, $transcript, $gene_biotype, $transcript_biotype, $gene_symbol, $description) = split(/\s/, $gene_header); $gene = $isoform; }elsif ($species eq "Rattus_norvegicus") { - my ($isoform, $type, $scaffold, $temp_gene, $transcript, $gene_biotype, $transcript_biotype, $gene_symbol, $description) = split(/\s/, $gene_header); - $gene = $isoform; + $gene = ensembl_gene($gene_header); }elsif ($species eq "Rhizopus_oryzae") { my ($jgi,$temp,$num,$gene_id) = split(/\|/, $gene_header); $gene = $num; }elsif ($species eq "Ricinus_communis") { - my ($transcript,$model) = split(/\|/, $gene_header); - $gene = $transcript; + $gene = mart_gene($gene_header); }elsif ($species eq "Saccharomyces_cerevisiae") { - my ($peptide,$temp,$chrom,$gene_id,$transcript,$gene_type,$transcript_type) = split(/\s/,$gene_header); - $transcript =~ s/transcript\://; - $gene = $transcript - }elsif ($species eq "Salvia_hispancia.salba") { + $gene = ensembl_gene($gene_header); + }elsif ($species eq "Salvia_hispanica") { $gene = $gene_header; }elsif ($species eq "Salvia_splendens") { $gene = $gene_header; + }elsif ($species eq "Salvinia_cucullata") { + $gene = $gene_header; }elsif ($species eq "Schizosaccharomyces_pombe") { - my ($peptide,$temp,$chrom,$gene_id,$transcript,$gene_type,$transcript_type) = split(/\s/,$gene_header); - $transcript =~ s/transcript\://; - $gene = $transcript + $gene = ensembl_gene($gene_header); }elsif ($species eq "Selaginella_moellendorffii") { - my ($locus_id,$prot_id) = split(/\|/,$gene_header); - $gene = $locus_id; + $gene = ensembl_gene($gene_header); }elsif ($species eq "Setaria_italica") { - my ($locus_id,$prot_id) = split(/\|/,$gene_header); - $gene = $prot_id; + $gene = ensembl_gene($gene_header); }elsif ($species eq "Solanum_lycopersicum") { - my ($locus_id,$prot_id) = split(/\|/,$gene_header); - $gene = $locus_id; + $gene = ensembl_gene($gene_header); }elsif ($species eq "Solanum_tuberosum") { - my ($locus_id,$prot_id) = split(/\|/,$gene_header); - $gene = $prot_id; + $gene = ensembl_gene($gene_header); }elsif ($species eq "Sorex_araneus") { my ($isoform, $type, $scaffold, $temp_gene, $transcript, $gene_biotype, $transcript_biotype, $gene_symbol, $description) = split(/\s/, $gene_header); $gene = $isoform; }elsif ($species eq "Sorghum_bicolor") { - my ($gene_id,$transcript_id) = split(/\|/,$gene_header); - $gene = $transcript_id; - }elsif ($species eq "Synechocystis_pcc6803") { - my ($gene_id,$temp) = split(" ",$gene_header); - $gene = $gene_id; #??? + $gene = ensembl_gene($gene_header); + }elsif ($species eq "Synechocystis_sp.pcc.6803") { + $gene = ensembl_gene($gene_header); }elsif ($species eq "Takifugu_rubripes") { my ($peptide,$temp,$chrom,$gene_id,$transcript,$gene_type,$transcript_type) = split(/\s/,$gene_header); $transcript =~ s/transcript\://; @@ -713,22 +739,17 @@ sub find_gene { my ($isoform, $type, $scaffold, $temp_gene, $transcript, $gene_biotype, $transcript_biotype, $gene_symbol, $description) = split(/\s/, $gene_header); $gene = $isoform; }elsif ($species eq "Tetraodon_nigroviridis") { - my ($peptide,$temp,$chrom,$gene_id,$transcript,$gene_type,$transcript_type) = split(/\s/,$gene_header); - $transcript =~ s/transcript\://; - $gene = $transcript; + $gene = ensembl_gene($gene_header); }elsif ($species eq "Theobroma_cacao") { - my ($gene_id,$transcript) = split(/\|/,$gene_header); - $gene = $transcript; + $gene = ensembl_gene($gene_header); }elsif ($species eq "Tolypocladium_inflatum") { $gene = $gene_header; }elsif ($species eq "Trichodesmium_erythraeum") { - my ($peptide,$temp,$chrom,$gene_id,$transcript,$gene_type,$transcript_type) = split(/\s/,$gene_header); - $transcript =~ s/transcript\://; - $gene = $transcript; + $gene = ensembl_gene($gene_header); }elsif ($species eq "Triticum_aestivum") { - my ($peptide,$temp,$chrom,$gene_id,$transcript,$gene_type,$transcript_type) = split(/\s/,$gene_header); - $transcript =~ s/transcript\://; - $gene = $transcript; + $gene = ensembl_gene($gene_header); + }elsif ($species eq "Triticum_dicoccoides") { + $gene = ensembl_gene($gene_header); }elsif ($species eq "Triticum_monococcum.DV92") { my ($gene_id,$temp) = split(/\|/,$gene_header); $gene = $gene_id; @@ -739,16 +760,25 @@ sub find_gene { my ($gi,$temp,$gb,$gene_id,$strand,$start,$stop) = split(/[\|\s]+/, $gene_header); $gene = $gene_id; }elsif ($species eq "Triticum_urartu") { - my ($peptide,$temp,$chrom,$gene_id,$transcript,$gene_type,$transcript_type) = split(/\s/,$gene_header); - $transcript =~ s/transcript\://; - $gene = $transcript; + $gene = ensembl_gene($gene_header); + }elsif ($species eq "Utricularia_gibba") { + my ($isoform, $gene_id) = split(/\|/, $gene_header); + $gene = $isoform; + }elsif ($species eq "Vigna_radiata") { + $gene = ensembl_gene($gene_header); }elsif ($species eq "Vitis_vinifera") { - my ($gene_id,$transcript_id) = split(/\|/,$gene_header); - $gene = $gene_id; + $gene = ensembl_gene($gene_header); }elsif ($species eq "Zea_mays") { - my ($peptide,$temp,$chrom,$gene_id,$transcript,$gene_type,$transcript_type) = split(/\s/,$gene_header); - $transcript =~ s/transcript\://; - $gene = $transcript; + $gene = ensembl_gene($gene_header); + }elsif ($species eq "Zoysia_japonica.Nagirizaki") { + my ($gene_id, $location, $strand) = split(/\s/, $gene_header); + $gene = $gene_id; + }elsif ($species eq "Zoysia_matrella.Wakaba") { + my ($gene_id, $location, $strand) = split(/\s/, $gene_header); + $gene = $gene_id; + }elsif ($species eq "Zoysia_pacifica.Zanpa") { + my ($gene_id, $location, $strand) = split(/\s/, $gene_header); + $gene = $gene_id; }else { die "Error: Gene id can not be found for species $species!"; @@ -762,30 +792,39 @@ sub find_gene_synonym { $gene_header =~ s/^>//; # strip off the header line identifier, if it isn't already my $species = $_[1]; my $synonym; - if ($species eq "Batrachochytrium_dendrobatidis") { + if ($species eq "Aegilops_tauschii") { + $synonym = ensembl_synonym($gene_header); + }elsif ($species eq "Amborella_trichopoda") { + $synonym = ensembl_synonym($gene_header); + }elsif ($species eq "Arabidopsis_halleri") { + $gene = ensembl_synonym($gene_header); + }elsif ($species eq "Arabidopsis_lyrata") { + $synonym = ensembl_synonym($gene_header); + }elsif ($species eq "Arabidopsis_thaliana") { + $synonym = ensembl_synonym($gene_header); + }elsif ($species eq "Asparagus_officinalis") { + $synonym = mart_synonym($gene_header); + }elsif ($species eq "Batrachochytrium_dendrobatidis") { my ($transcript,$gene_id,$info) = split(/\s\|\s/, $gene_header); $synonym = $gene_id; + }elsif ($species eq "Brachypodium_distachyon") { + $synonym = ensembl_synonym($gene_header); }elsif ($species eq "Brassica_oleracea") { - my ($protein,$type,$chrom,$gene_id,$transcript) = split(/\s/,$gene_header); - $transcript =~ s/transcript\://; - $gene_id =~ s/gene\://; - $peptide =~ s/\:pep//; - $synonym = "$peptide,$gene_id"; + $synonym = ensembl_synonym($gene_header); + }elsif ($species eq "Brassica_rapa") { + $synonym = ensembl_synonym($gene_header); }elsif ($species eq "Caenorhabditis_elegans") { - my ($peptide,$temp,$chrom,$gene_id,$transcript,$gene_type,$transcript_type) = split(/\s/,$gene_header); - $transcript =~ s/transcript\://; - $gene_id =~ s/gene\://; - $peptide =~ s/\:pep//; - $synonym = "$peptide,$gene_id"; + $synonym = ensembl_synonym($gene_header); }elsif ($species eq "Cajanus_cajan") { my ($gene_id,$type,$locus,$temp) = split(/\s/, $gene_header); $synonym = $locus; - }elsif ($species eq "Capsella_rubella") { - my ($gene_id, $transcript) = split(/\|/, $gene_header); + }elsif ($species eq "Cannabis_sativa") { + my ($isoform, $gene_id, $tempj) = split(/\s/, $gene_header); $synonym = $gene_id; + }elsif ($species eq "Capsella_rubella") { + $synonym = mart_synonym($gene_header); }elsif ($species eq "Carica_papaya") { - my ($contig,$gene_id) = split(/\|/, $gene_header); - $synonym = $contig; + $synonym = mart_synonym($gene_header); }elsif ($species eq "Castor_canadensis") { my ($gene_info,$type,$gc,$gene_length) = split(/\s/, $gene_header); my ($gene_number,$gene_id) = split(/::/, $gene_info); @@ -809,26 +848,24 @@ sub find_gene_synonym { $gene_symbol =~ s/gene_symbol\://; $synonym = "$temp_gene,$transcript,$gene_symbol"; }elsif ($species eq "Chlamydomonas_reinhardtii") { - my ($locus,$transcript) = split(/\|/,$gene_header); - $synonym = $locus; #??? + $synonym = ensembl_synonym($gene_header); + }elsif ($species eq "Chondrus_crispus") { + $synonym = ensembl_synonym($gene_header); }elsif ($species eq "Cicer_arietinum") { - my ($gi,$number,$ref,$gene_id,$function) = split(/\|/, $gene_header); - $synonym = $number; + $synonym = mart_synonym($gene_header); }elsif ($species eq "Citrus_clementina") { - my ($transcript_id,$gene_id) = split(/\|/,$gene_header); - $synonym = $transcript_id; + $synonym = mart_synonym($gene_header); }elsif ($species eq "Citrus_sinensis") { - my ($transcript_id,$gene_id) = split(/\|/,$gene_header); - $synonym = $transcript_id; + $synonym = mart_synonym($gene_header); + }elsif ($species eq "Corchorus_capsularis") { + $synonym = ensembl_synonym($gene_header); + }elsif ($species eq "Corchorus_olitorius") { + my ($type, $gene_id, $gene_id_suffix, $function) = split(/[\|\s]+/, $gene_header); + $synonym = $gene_id_suffix; }elsif ($species eq "Cucumis_sativus") { - my ($gene_id,$isomer) = split(/\|/,$gene_header); - $synonym = $gene_id; + $synonym = ensembl_synonym($gene_header); }elsif ($species eq "Danio_rerio") { - my ($peptide,$temp,$chrom,$gene_id,$transcript,$gene_type,$transcript_type) = split(/\s/,$gene_header); - $transcript =~ s/transcript\://; - $gene_id =~ s/gene\://; - $peptide =~ s/\:pep//; - $synonym = "$peptide,$gene_id"; + $synonym = ensembl_synonym($gene_header); }elsif ($species eq "Dasypus_novemcinctus") { my ($isoform, $type, $scaffold, $temp_gene, $transcript, $gene_biotype, $transcript_biotype, $gene_symbol, $description) = split(/\s/, $gene_header); $temp_gene =~ s/gene\://; @@ -842,11 +879,7 @@ sub find_gene_synonym { $gene_symbol =~ s/gene_symbol\://; $synonym = "$temp_gene,$transcript,$gene_symbol"; }elsif ($species eq "Drosophila_melanogaster") { - my ($peptide,$temp,$chrom,$gene_id,$transcript,$gene_type,$transcript_type) = split(/\s/,$gene_header); - $transcript =~ s/transcript\://; - $gene_id =~ s/gene\://; - $peptide =~ s/\:pep//; - $synonym = "$peptide,$gene_id"; + $synonym = ensembl_synonym($gene_header); }elsif ($species eq "Echinops_telfairi") { my ($isoform, $type, $scaffold, $temp_gene, $transcript, $gene_biotype, $transcript_biotype, $gene_symbol, $description) = split(/\s/, $gene_header); $temp_gene =~ s/gene\://; @@ -859,14 +892,12 @@ sub find_gene_synonym { $transcript =~ s/transcript\://; $gene_symbol =~ s/gene_symbol\://; $synonym = "$temp_gene,$transcript,$gene_symbol"; + }elsif ($species eq "Erythranthe_guttata") { + $synonym = mart_synonym($gene_header); }elsif ($species eq "Eucalyptus_grandis") { - my ($gene_id,$transcript_id) = split(/\|/,$gene_header); - $synonym = $gene_id; + $synonym = mart_synonym($gene_header); }elsif ($species eq "Fragaria_vesca") { - my ($gene_id, $mrna_id) = split(/\|/, $gene_header); - $gene_id =~ s/\-v1\.0\-hybrid//; - $mrna_id =~ s/\-v1\.0\-hybrid//; - $synonym = $mrna_id; + $synonym = mart_synonym($gene_header); }elsif ($species eq "Fusarium_graminearum") { my ($transcript, $gene_id, $info) = split(/\s\|\s/, $gene_header); $synonym = $gene_id; @@ -889,23 +920,13 @@ sub find_gene_synonym { $peptide =~ s/\:pep//; $synonym = "$peptide,$gene_id"; }elsif ($species eq "Glycine_max") { - my ($locus_id,$isomer) = split(/\|/,$gene_header); - $synonym = $locus_id; + $synonym = ensembl_synonym($gene_header); }elsif ($species eq "Gossypium_raimondii") { - my ($locus_id,$isomer) = split(/\|/,$gene_header); - $synonym = $locus_id; + $synonym = ensembl_synonym($gene_header); }elsif ($species eq "Homo_sapiens") { - my ($isoform, $type, $scaffold, $temp_gene, $transcript, $gene_biotype, $transcript_biotype, $gene_symbol, $description) = split(/\s/, $gene_header); - $temp_gene =~ s/gene\://; - $transcript =~ s/transcript\://; - $gene_symbol =~ s/gene_symbol\://; - $synonym = "$temp_gene,$transcript,$gene_symbol"; + $synonym = ensembl_synonym($gene_header); }elsif ($species eq "Hordeum_vulgare") { - my ($peptide,$temp,$chrom,$gene_id,$transcript,$gene_type,$transcript_type) = split(/\s/,$gene_header); - $transcript =~ s/transcript\://; - $gene_id =~ s/gene\://; - $peptide =~ s/\:pep//; - $synonym = "$peptide,$gene_id"; + $synonym = ensembl_synonym($gene_header); }elsif ($species eq "Ictidomys_tridecemlineatus") { my ($isoform, $type, $scaffold, $temp_gene, $transcript, $gene_biotype, $transcript_biotype, $gene_symbol, $description) = split(/\s/, $gene_header); $temp_gene =~ s/gene\://; @@ -918,9 +939,10 @@ sub find_gene_synonym { $locus =~ s/locus\=//; $ID =~ s/ID\=//; $synonym = "$peptide,$locus,$ID"; + }elsif ($species eq "Leersia_perrieri") { + $synonym = ensembl_synonym($gene_header); }elsif ($species eq "Linum_usitatissimum") { - my ($transcript,$gene_id) = split(/\|/, $gene_header); - $synonym = $transcript; + $synonym = mart_synonym($gene_header); }elsif ($species eq "Macropus_eugenii") { my ($isoform, $type, $scaffold, $temp_gene, $transcript, $gene_biotype, $transcript_biotype, $gene_symbol, $description) = split(/\s/, $gene_header); $temp_gene =~ s/gene\://; @@ -931,29 +953,18 @@ sub find_gene_synonym { my ($transcript,$gene_id,$temp) = split(/\s\|\s/, $gene_header); $synonym = $gene_id; }elsif ($species eq "Malus_domestica" ) { - my ($gene_id1,$gene_id2) = split(/\|/, $gene_header); - $synonym = $gene_id1; + $synonym = mart_synonym($gene_header); }elsif ($species eq "Manihot_esculenta") { - my ($transcript,$gene_id) = split(/\|/, $gene_header); - $synonym = $transcript; + $synonym = ensembl_synonym($gene_header); }elsif ($species eq "Medicago_truncatula") { - my ($gene_id,$transcript) = split(/\|/, $gene_header); - $synonym = $gene_id; + $synonym = ensembl_synonym($gene_header); }elsif ($species eq "Mimulus_guttatus") { my ($gene_id,$transcript) = split(/\|/, $gene_header); $synonym = $gene_id; }elsif ($species eq "Mus_musculus") { - my ($isoform, $type, $scaffold, $temp_gene, $transcript, $gene_biotype, $transcript_biotype, $gene_symbol, $description) = split(/\s/, $gene_header); - $temp_gene =~ s/gene\://; - $transcript =~ s/transcript\://; - $gene_symbol =~ s/gene_symbol\://; - $synonym = "$temp_gene,$transcript,$gene_symbol"; + $synonym = ensembl_synonym($gene_header); }elsif ($species eq "Musa_acuminata") { - my ($protein,$type,$chrom,$gene_id,$transcript) = split(/\s/,$gene_header); - $transcript =~ s/transcript\://; - $gene_id =~ s/gene\://; - $peptide =~ s/\:pep//; - $synonym = "$peptide,$gene_id"; + $synonym = ensembl_synonym($gene_header); }elsif ($species eq "Mustela_putorius.furo") { my ($isoform, $type, $scaffold, $temp_gene, $transcript, $gene_biotype, $transcript_biotype, $gene_symbol, $description) = split(/\s/, $gene_header); $temp_gene =~ s/gene\://; @@ -969,9 +980,10 @@ sub find_gene_synonym { }elsif ($species eq "Neurospora_crassa") { my ($transcript,$gene_id,$temp) = split(/\s\|\s/,$gene_header); $synonym = $gene_id; - }elsif ($species eq "Nostoc_punctiforme") { - my ($temp,$transcript,$source,$gene_id,$func) = split(/\|/, $gene_header); - $synonym = $gene_id; + }elsif ($species eq "Nicotiana_attenuata") { + $synonym = ensembl_synonym($gene_header); + }elsif ($species eq "Nostoc_punctiforme.pcc.73102") { + $synonym = ensembl_synonym($gene_header); }elsif ($species eq "Oncorhynchus_mykiss") { my ($gi, $number, $type, $gene_id, $function) = split(/\|/,$gene_header); $synonym = $gene_id; @@ -987,18 +999,28 @@ sub find_gene_synonym { $transcript =~ s/transcript\://; $gene_symbol =~ s/gene_symbol\://; $synonym = "$temp_gene,$transcript,$gene_symbol"; - }elsif ($species eq "Oryza_sativa.indica.gramene") { - my ($peptide,$temp,$chrom,$gene_id,$transcript,$gene_type,$transcript_type) = split(/\s/,$gene_header); - $transcript =~ s/transcript\://; - $gene_id =~ s/gene\://; - $peptide =~ s/\:pep//; - $synonym = "$peptide,$gene_id"; + }elsif ($species eq "Oryza_barthii") { + $synonym = ensembl_synonym($gene_header); + }elsif ($species eq "Oryza_brachyantha") { + $synonym = ensembl_synonym($gene_header); + }elsif ($species eq "Oryza_glaberrima") { + $synonym = ensembl_synonym($gene_header); + }elsif ($species eq "Oryza_glumipatula") { + $synonym = ensembl_synonym($gene_header); + }elsif ($species eq "Oryza_longistaminata") { + $synonym = ensembl_synonym($gene_header); + }elsif ($species eq "Oryza_meridionalis") { + $synonym = ensembl_synonym($gene_header); + }elsif ($species eq "Oryza_nivara") { + $synonym = ensembl_synonym($gene_header); + }elsif ($species eq "Oryza_punctata") { + $synonym = ensembl_synonym($gene_header); + }elsif ($species eq "Oryza_rufipogon") { + $synonym = ensembl_synonym($gene_header); + }elsif ($species eq "Oryza_sativa.indica") { + $synonym = ensembl_synonym($gene_header); }elsif ($species eq "Oryza_sativa.japonica.IRGSP") { - my ($peptide,$temp,$chrom,$gene_id,$transcript,$gene_type,$transcript_type) = split(/\s/,$gene_header); - $transcript =~ s/transcript\://; - $gene_id =~ s/gene\://; - $peptide =~ s/\:pep//; - $synonym = "$peptide,$gene_id"; + $synonym = ensembl_synonym($gene_header); }elsif ($species eq "Oryzias_latipes") { my ($peptide,$temp,$chrom,$gene_id,$transcript,$gene_type,$transcript_type) = split(/\s/,$gene_header); $transcript =~ s/transcript\://; @@ -1022,25 +1044,19 @@ sub find_gene_synonym { $gene_id =~ s/gene\://; $synonym = $gene_id; }elsif ($species eq "Phaseolus_vulgaris") { - my ($gene_id, $transcript) = split(/\|/,$gene_header); - $synonym = $gene_id; + $synonym = ensembl_synonym($gene_header); }elsif ($species eq "Phoenix_dactylifera") { my ($gene_id, $locus_tag, $product, $strand) = split(/\s\[/, $gene_header); $locus_tag =~ s/locus_tag=//; $locus_tag =~ s/"//g; $synonym = $locus_tag; }elsif ($species eq "Physcomitrella_patens") { - my ($gene_id,$transcript_id) = split(/\|/,$gene_header); - $synonym = $gene_id; + $synonym = ensembl_synonym($gene_header); }elsif ($species eq "Phytophthora_infestans") { my ($transcript, $gene_id, $function) = split(/\s\|\s/,$gene_header); $synonym = $gene_id; - }elsif ($species eq "Populus_trichocarpa.ver2") { - my ($gene_id,$transcript_id) = split(/\|/,$gene_header); - $synonym = $gene_id; - }elsif ($species eq "Populus_trichocarpa.ver3") { - my ($gene_id,$transcript_id) = split(/\|/,$gene_header); - $synonym = $gene_id; + }elsif ($species eq "Populus_trichocarpa") { + $synonym = ensembl_synonym($gene_header); }elsif ($species eq "Procavia_capensis") { my ($isoform, $type, $scaffold, $temp_gene, $transcript, $gene_biotype, $transcript_biotype, $gene_symbol, $description) = split(/\s/, $gene_header); $temp_gene =~ s/gene\://; @@ -1048,8 +1064,7 @@ sub find_gene_synonym { $gene_symbol =~ s/gene_symbol\://; $synonym = "$temp_gene,$transcript,$gene_symbol"; }elsif ($species eq "Prunus_persica") { - my ($transcript,$gene_id) = split(/\|/,$gene_header); - $synonym = $transcript; + $synonym = ensembl_synonym($gene_header); }elsif ($species eq "Pteropus_vampyrus") { my ($isoform, $type, $scaffold, $temp_gene, $transcript, $gene_biotype, $transcript_biotype, $gene_symbol, $description) = split(/\s/, $gene_header); $temp_gene =~ s/gene\://; @@ -1057,38 +1072,21 @@ sub find_gene_synonym { $gene_symbol =~ s/gene_symbol\://; $synonym = "$temp_gene,$transcript,$gene_symbol"; }elsif ($species eq "Rattus_norvegicus") { - my ($isoform, $type, $scaffold, $temp_gene, $transcript, $gene_biotype, $transcript_biotype, $gene_symbol, $description) = split(/\s/, $gene_header); - $temp_gene =~ s/gene\://; - $transcript =~ s/transcript\://; - $gene_symbol =~ s/gene_symbol\://; - $synonym = "$temp_gene,$transcript,$gene_symbol"; + $synonym = ensembl_synonym($gene_header); }elsif ($species eq "Ricinus_communis") { - my ($transcript,$model) = split(/\|/, $gene_header); - $synonym = $model; + $synonym = mart_synonym($gene_header); }elsif ($species eq "Saccharomyces_cerevisiae") { - my ($peptide,$temp,$chrom,$gene_id,$transcript,$gene_type,$transcript_type) = split(/\s/,$gene_header); - $transcript =~ s/transcript\://; - $gene_id =~ s/gene\://; - $peptide =~ s/\:pep//; - $synonym = "$peptide,$gene_id"; + $synonym = ensembl_synonym($gene_header); }elsif ($species eq "Schizosaccharomyces_pombe") { - my ($peptide,$temp,$chrom,$gene_id,$transcript,$gene_type,$transcript_type) = split(/\s/,$gene_header); - $transcript =~ s/transcript\://; - $gene_id =~ s/gene\://; - $peptide =~ s/\:pep//; - $synonym = "$peptide,$gene_id"; + $synonym = ensembl_synonym($gene_header); }elsif ($species eq "Selaginella_moellendorffii") { - my ($locus_id,$prot_id) = split(/\|/,$gene_header); - $synonym = $prot_id; + $synonym = ensembl_synonym($gene_header); }elsif ($species eq "Setaria_italica") { - my ($locus_id,$prot_id) = split(/\|/,$gene_header); - $synonym = $locus_id; + $synonym = ensembl_synonym($gene_header); }elsif ($species eq "Solanum_lycopersicum") { - my ($locus_id,$prot_id) = split(/\|/,$gene_header); - $synonym = $locus_id; + $synonym = ensembl_synonym($gene_header); }elsif ($species eq "Solanum_tuberosum") { - my ($locus_id,$prot_id) = split(/\|/,$gene_header); - $synonym = $locus_id; + $synonym = ensembl_synonym($gene_header); }elsif ($species eq "Sorex_araneus") { my ($isoform, $type, $scaffold, $temp_gene, $transcript, $gene_biotype, $transcript_biotype, $gene_symbol, $description) = split(/\s/, $gene_header); $temp_gene =~ s/gene\://; @@ -1096,8 +1094,9 @@ sub find_gene_synonym { $gene_symbol =~ s/gene_symbol\://; $synonym = "$temp_gene,$transcript,$gene_symbol"; }elsif ($species eq "Sorghum_bicolor") { - my ($gene_id,$transcript_id) = split(/\|/,$gene_header); - $synonym = $gene_id; + $synonym = ensembl_synonym($gene_header); + }elsif ($species eq "Synechocystis_sp.pcc.6803") { + $synonym = ensembl_synonym($gene_header); }elsif ($species eq "Takifugu_rubripes") { my ($peptide,$temp,$chrom,$gene_id,$transcript,$gene_type,$transcript_type) = split(/\s/,$gene_header); $transcript =~ s/transcript\://; @@ -1111,40 +1110,26 @@ sub find_gene_synonym { $gene_symbol =~ s/gene_symbol\://; $synonym = "$temp_gene,$transcript,$gene_symbol"; }elsif ($species eq "Tetraodon_nigroviridis") { - my ($peptide,$temp,$chrom,$gene_id,$transcript,$gene_type,$transcript_type) = split(/\s/,$gene_header); - $transcript =~ s/transcript\://; - $gene_id =~ s/gene\://; - $peptide =~ s/\:pep//; - $synonym = "$peptide,$gene_id"; + $synonym = ensembl_synonym($gene_header); }elsif ($species eq "Theobroma_cacao") { - my ($gene_id,$transcript) = split(/\|/,$gene_header); - $synonym = $gene_id; + $synonym = ensembl_synonym($gene_header); }elsif ($species eq "Trichodesmium_erythraeum") { - my ($peptide,$temp,$chrom,$gene_id,$transcript,$gene_type,$transcript_type) = split(/\s/,$gene_header); - $transcript =~ s/transcript\://; - $gene_id =~ s/gene\://; - $peptide =~ s/\:pep//; - $synonym = "$peptide,$gene_id"; + $synonym = ensembl_synonym($gene_header); }elsif ($species eq "Triticum_aestivum") { - my ($peptide,$temp,$chrom,$gene_id,$transcript,$gene_type,$transcript_type) = split(/\s/,$gene_header); - $transcript =~ s/transcript\://; - $gene_id =~ s/gene\://; - $peptide =~ s/\:pep//; - $synonym = "$peptide,$gene_id"; + $synonym = ensembl_synonym($gene_header); + }elsif ($species eq "Triticum_dicoccoides") { + $synonym = ensembl_synonym($gene_header); }elsif ($species eq "Triticum_urartu") { - my ($peptide,$temp,$chrom,$gene_id,$transcript,$gene_type,$transcript_type) = split(/\s/,$gene_header); - $transcript =~ s/transcript\://; - $gene_id =~ s/gene\://; - $synonym = "$peptide,$gene_id"; + $synonym = ensembl_synonym($gene_header); + }elsif ($species eq "Utricularia_gibba") { + my ($isoform, $gene_id) = split(/\|/, $gene_header); + $synonym = $gene_id; + }elsif ($species eq "Vigna_radiata") { + $synonym = ensembl_synonym($gene_header); }elsif ($species eq "Vitis_vinifera") { - my ($gene_id,$transcript_id) = split(/\|/,$gene_header); - $synonym = $transcript_id; + $synonym = ensembl_synonym($gene_header); }elsif ($species eq "Zea_mays") { - my ($peptide,$temp,$chrom,$gene_id,$transcript,$gene_type,$transcript_type) = split(/\s/,$gene_header); - $transcript =~ s/transcript\://; - $gene_id =~ s/gene\://; - $peptide =~ s/\:pep//; - $synonym = "$peptide,$gene_id"; + $synonym = ensembl_synonym($gene_header); }else { $synonym = "null"; @@ -1178,6 +1163,7 @@ sub all_species_array { push (@spec_array, "Castor_canadensis"); push (@spec_array, "Cavia_porcellus"); push (@spec_array, "Chlamydomonas_reinhardtii"); + push (@spec_array, "Chondrus_crispus"); push (@spec_array, "Cicer_arietinum"); push (@spec_array, "Citrullus_lanatus"); push (@spec_array, "Citrus_clementina"); @@ -1432,4 +1418,36 @@ sub find_taxon { return $taxon; } +sub ensembl_gene { + my $gene_header = $_[0]; + my ($gene, $type, $contig, $gene_alias, $transcript, $biotype, $description) = split(/\s/, $gene_header); + $transcipt =~ s/transcript\://; + return $transcript; +} + +sub ensembl_synonym { + my $gene_header = $_[0]; + my ($gene, $type, $contig, $gene_alias, $transcript, $biotype, $description) = split(/\s/, $gene_header); + $gene_alias =~ s/gene\://; + my $synonym = "$gene,$gene_alias"; + return $synonym; +} + +sub mart_gene { + my $gene_header = $_[0]; + my ($gene, $transcript, $synonyms) = split(/\|/, $gene_header); + return $transcript; +} + +sub mart_synonym { + my $gene_header = $_[0]; + my ($gene, $transcript, $synonyms) = split(/\|/, $gene_header); + if ($synonyms eq "") { + my $synonym = "$gene"; + return $synonym; + }else{ + my $synonym = "$gene,$synonyms"; + return $synonym; + } +} 1; -- 2.34.1