From: elserj Date: Tue, 14 May 2019 21:55:22 +0000 (+0000) Subject: Script to generate fasta_taxon_mapping.txt file for iprscan X-Git-Url: http://gitweb.planteome.org/?a=commitdiff_plain;h=31742c1b9e91b7ba842cd5317244597980816fce;p=old-jaiswallab-svn%2F.git Script to generate fasta_taxon_mapping.txt file for iprscan svn path=/; revision=663 --- diff --git a/interactome_scripts/create_taxon_mapping.pl b/interactome_scripts/create_taxon_mapping.pl new file mode 100755 index 0000000..45c3884 --- /dev/null +++ b/interactome_scripts/create_taxon_mapping.pl @@ -0,0 +1,20 @@ +#!/usr/bin/perl + +use strict; +use warnings; + +if(-e "$ENV{HOME}/scripts/jaiswallab/interactome_scripts/find_species.pl") { + require "$ENV{HOME}/scripts/jaiswallab/interactome_scripts/find_species.pl"; +}elsif(-e "$ENV{HOME}/bin/find_species.pl") { + require "$ENV{HOME}/bin/find_species.pl"; +} + +my @files = glob(*.fa); +open(OUTFILE, ">fasta_taxon_mapping.txt"; + +foreach my $file (@files){ + my $species = find_species($file); + my $taxon = find_taxon($species); + print OUTFILE "$file\t$taxon\n"; +} +close(OUTFILE); diff --git a/interactome_scripts/find_species.pl b/interactome_scripts/find_species.pl index 10716bc..f7ff442 100755 --- a/interactome_scripts/find_species.pl +++ b/interactome_scripts/find_species.pl @@ -1299,10 +1299,13 @@ sub find_taxon { my %taxon_hash = ( 'Aegilops_tauschii', '37682', 'Amborella_trichopoda', '13333', + 'Arabidopsis_halleri', '81970', 'Arabidopsis_lyrata' , '59689', 'Arabidopsis_thaliana', '3702', 'Arachis_duranensis', '130453', 'Arachis_ipaensis', '130454', + 'Asparagus_officinalis', '4686', + 'Azolla_filiculoides', '84609', 'Batrachochytrium_dendrobatidis', '109871', 'Brachypodium_distachyon', '15368', 'Brachypodium_sylvaticum.Corv', '29664', @@ -1317,19 +1320,24 @@ sub find_taxon { 'Capsicum_annuum', '4072', 'Carica_papaya', '3649', 'Chlamydomonas_reinhardtii', '3055', + 'Chondrus_crispus', '2769', 'Cicer_arietinum', '3827', 'Citrullus_lanatus', '3654', 'Citrus_clementina', '85681', 'Citrus_sinensis', '2711', 'Coffea_canephora', '49390', + 'Corchorus_capsularis', '210143', + 'Corchorus_olitorius', '93759', 'Cucumis_sativus', '3659', 'Danio_rerio', '7955', 'Drosophila_melanogaster', '7227', + 'Durio_zibethinus', '66656', 'Ectocarpus_siliculosus', '2880', 'Elaphocordyceps_capitata', '45325', 'Elaphocordyceps_ophioglossoides', '71617', 'Elaphocordyceps_paradoxa', '94208', 'Epichloe_festucae', '35717', + 'Erythranthe_guttata', '4155', 'Escherichia_coli', '562', 'Eucalyptus_grandis', '71139', 'Fragaria_vesca', '57918', @@ -1343,6 +1351,8 @@ sub find_taxon { 'Gossypium_hirsutum', '3635', 'Homo_sapiens', '9606', 'Hordeum_vulgare', '4513', + 'Humulus_lupulus.haplotig', '3486', + 'Humulus_lupulus.primary', '3486', 'Jatropha_curcas', '180498', 'Kalanchoe_laxiflora', '1670617', 'Laccaria_bicolor', '29883', @@ -1352,13 +1362,16 @@ sub find_taxon { 'Malus_domestica', '3750', 'Manihot_esculenta', '3983', 'Medicago_truncatula', '3880', + 'Mentha_longifolia', '38859', 'Mimulus_guttatus', '4155', 'Mus_musculus', '10090', 'Musa_acuminata', '4641', 'Nectria_haematococca', '140110', 'Nelumbo_nucifera', '4432', 'Neurospora_crassa', '5141', + 'Nicotiana_attenuata', '49451', 'Nostoc_punctiforme', '272131', + 'Ocimum_tenuiflorum', '204149', 'Oncorhynchus_mykiss', '8022', 'Oryza_australiensis', '4532', 'Oryza_barthii', '65489', @@ -1374,6 +1387,10 @@ sub find_taxon { 'Oryza_officinalis', '4535', 'Oryza_punctata', '4537', 'Oryza_rufipogon', '4529', + 'Oryza_sativa.AUS.kasalath', '1736659', + 'Oryza_sativa.AUS.N22', '1736659', + 'Oryza_sativa.indica', '39946', + 'Oryza_sativa.indica.9311.OGE', '39946', 'Oryza_sativa.indica.gramene', '39946', 'Oryza_sativa.indica.iplant', '39946', 'Oryza_sativa.indica.IR29', '39946', @@ -1381,6 +1398,7 @@ sub find_taxon { 'Oryza_sativa.japonica.iplant', '39947', 'Oryza_sativa.japonica.IRGSP', '39947', 'Oryza_sativa.japonica.MSU', '39947', + 'Oryza_sativa.japonica.OGE', '39947', 'Oryzias_latipes', '8090', 'Pediculus_humanus', '121225', 'Phaseolus_vulgaris', '3885', @@ -1397,27 +1415,35 @@ sub find_taxon { 'Rhizopus_oryzae', '64495', 'Ricinus_communis', '3988', 'Saccharomyces_cerevisiae', '4932', + 'Salvia_hispanica', '49212', 'Salvia_hispancia.salba', '49212', 'Salvia_splendens', '180675', + 'Salvinia_cucullata', '32188', 'Schizosaccharomyces_pombe', '4896', 'Selaginella_moellendorffii', '88036', 'Setaria_italica', '4555', 'Solanum_lycopersicum', '4081', 'Solanum_tuberosum', '4113', 'Sorghum_bicolor', '4558', - 'Synechocystis_pcc6803', '1148', + 'Synechocystis_sp.pcc.6803', '1148', 'Takifugu_rubripes', '31033', 'Tetraodon_nigroviridis', '99883', 'Theobroma_cacao', '3641', 'Tolypocladium_inflatum', '29910', 'Trichodesmium_erythraeum', '1206', 'Triticum_aestivum', '4565', + 'Triticum_dicoccoides', '85692', 'Triticum_monococcum.DV92', '4568', 'Triticum_monococcum.G3116', '4568', 'Triticum_turgidum', '4571', 'Triticum_urartu', '4572', + 'Utricularia_gibba', '13748', + 'Vigna_radiata', '157791', 'Vitis_vinifera', '29760', 'Zea_mays', '4577', + 'Zoysia_japonica.Nagirizaki', '309978', + 'Zoysia_matrella.Wakaba', '38722', + 'Zoysia_pacifica.Zanpa', '1070856', ); my $taxon = $taxon_hash{$species}; return $taxon; @@ -1426,7 +1452,7 @@ sub find_taxon { sub ensembl_gene { my $gene_header = $_[0]; my ($gene, $type, $contig, $gene_alias, $transcript, $biotype, $description) = split(/\s/, $gene_header); - $transcipt =~ s/transcript\://; + $transcript =~ s/transcript\://; return $transcript; }