From 73794a540cd4347e162a3550c79d5234f1955989 Mon Sep 17 00:00:00 2001 From: preecej Date: Tue, 15 Apr 2014 19:05:23 +0000 Subject: [PATCH] added percentage coverage comparisons, more captioning on diagrams svn path=/; revision=574 --- .../python_singletons/incomparanoid.py | 46 +++++++++++++------ 1 file changed, 32 insertions(+), 14 deletions(-) diff --git a/Personnel/preecej/python_singletons/incomparanoid.py b/Personnel/preecej/python_singletons/incomparanoid.py index 4cb4804..62c0a3e 100755 --- a/Personnel/preecej/python_singletons/incomparanoid.py +++ b/Personnel/preecej/python_singletons/incomparanoid.py @@ -20,6 +20,7 @@ list_stats = [] dict_uniprot_map = {} dict_ens_map = {} dict_inp_map = {} +COUNT_TOTAL_REF_LOCI = 0 # num of TOTAL curated reference loci used to generate orthology projections #---------------------------------------------------------------------------------------------------------------------- # functions @@ -108,6 +109,9 @@ def create_ens_map(filtering_loci_path, ensembl_input_path, rap_map_path, recip_ for line in FILTER : loci_filter.add(line.rstrip()) + global COUNT_TOTAL_REF_LOCI + COUNT_TOTAL_REF_LOCI = len(loci_filter) + #for locus in loci_filter : # print locus @@ -149,8 +153,9 @@ def compare_maps(dict_cmp_map, dict_inp_map, comparison_file_path, compara_outpu union_ref_loci = len(set_inp_ref_loci | set_cmp_ref_loci) # all ref loci intersection_ref_loci = len(set_inp_ref_loci & set_cmp_ref_loci) # all shared ref loci - inp_ref_loci_coverage = '{:.2%}'.format(inp_ref_loci/float(union_ref_loci)) - cmp_ref_loci_coverage = '{:.2%}'.format(cmp_ref_loci/float(union_ref_loci)) + # JP - not needed at this time; coverage over total curated set of reference loci is more useful + #inp_ref_loci_coverage = '{:.2%}'.format(inp_ref_loci/float(union_ref_loci)) + #cmp_ref_loci_coverage = '{:.2%}'.format(cmp_ref_loci/float(union_ref_loci)) """ projected loci (all non-unique, meaning recounted relative to each ref locus; in other words, @@ -192,9 +197,14 @@ def compare_maps(dict_cmp_map, dict_inp_map, comparison_file_path, compara_outpu inp_exc_prj_loci += len(set(v[1]) - set(v[0])) cmp_exc_prj_loci += len(set(v[0]) - set(v[1])) - inp_prj_loci_coverage = '{:.2%}'.format(inp_prj_loci/float(union_prj_loci)) - cmp_prj_loci_coverage = '{:.2%}'.format(cmp_prj_loci/float(union_prj_loci)) + # JP - not needed at this time + #inp_prj_loci_coverage = '{:.2%}'.format(inp_prj_loci/float(union_prj_loci)) + #cmp_prj_loci_coverage = '{:.2%}'.format(cmp_prj_loci/float(union_prj_loci)) + # percent coverage of orig. curated reference loci set + inp_prj_curated_loci_coverage = '{:.2%}'.format(inp_ref_loci/float(COUNT_TOTAL_REF_LOCI)) + cmp_prj_curated_loci_coverage = '{:.2%}'.format(cmp_ref_loci/float(COUNT_TOTAL_REF_LOCI)) + # other stats #avg_projections_per_locus #avg_overlapping_projections_per_locus @@ -206,14 +216,17 @@ def compare_maps(dict_cmp_map, dict_inp_map, comparison_file_path, compara_outpu list_stats.extend([{"[Comparison Stats]": ""}, {"\n-- Reference Loci --":""}, + {"Total number of curated reference loci:": str(COUNT_TOTAL_REF_LOCI)}, {"Inparanoid reference loci:": str(inp_ref_loci)}, {"Compara reference loci:": str(cmp_ref_loci)}, {"Exclusive Inparanoid reference loci:": str(inp_exc_ref_loci)}, {"Exclusive Compara reference loci:": str(cmp_exc_ref_loci)}, {"All reference loci (union):": str(union_ref_loci)}, {"All shared reference loci (intersection):": str(intersection_ref_loci)}, - {"Inparanoid reference loci coverage:": inp_ref_loci_coverage}, - {"Compara reference loci coverage:": cmp_ref_loci_coverage}, + #{"Inparanoid reference loci coverage:": inp_ref_loci_coverage}, + #{"Compara reference loci coverage:": cmp_ref_loci_coverage}, + {"Inparanoid coverage of curated reference loci:": inp_prj_curated_loci_coverage}, + {"Compara coverage of curated reference loci:": cmp_prj_curated_loci_coverage}, {"\n-- Projected Loci (all non-unique with respect to reference loci) --":""}, {"Inparanoid projected loci:": str(inp_prj_loci)}, @@ -222,8 +235,8 @@ def compare_maps(dict_cmp_map, dict_inp_map, comparison_file_path, compara_outpu {"Compara exclusive projected loci:": str(cmp_exc_prj_loci)}, {"All projected loci (union):": str(union_prj_loci)}, {"All shared projected loci (intersection):": str(intersection_prj_loci)}, - {"Inparanoid projected loci coverage:": inp_prj_loci_coverage}, - {"Compara projected loci coverage:": cmp_prj_loci_coverage}, + #{"Inparanoid projected loci coverage:": inp_prj_loci_coverage}, + #{"Compara projected loci coverage:": cmp_prj_loci_coverage}, ]) @@ -258,7 +271,7 @@ def compare_maps(dict_cmp_map, dict_inp_map, comparison_file_path, compara_outpu CMP_OUT_FILE.close() CMP_FLAT_OUT_FILE.close() - return [[set_inp_ref_loci, set_cmp_ref_loci], [inp_exc_prj_loci, cmp_exc_prj_loci, intersection_prj_loci]] + return [[set_inp_ref_loci, set_cmp_ref_loci], [inp_exc_prj_loci, cmp_exc_prj_loci, intersection_prj_loci], [inp_prj_curated_loci_coverage, cmp_prj_curated_loci_coverage]] #---------------------------------------------------------------------------------------------------------------------- def write_reactome_files(dict_map, reactome_gene_protein_path, reactome_projection_path, projection_prefix) : @@ -282,11 +295,11 @@ def write_reactome_files(dict_map, reactome_gene_protein_path, reactome_projecti #---------------------------------------------------------------------------------------------------------------------- -def generate_venn(venn_data, colors, is_ref, ref_species, proj_species, reciprocal_id, confidence, venn_output_path) : +def generate_venn(venn_data, list_coverage, colors, is_ref, ref_species, proj_species, reciprocal_id, confidence, venn_output_path) : #---------------------------------------------------------------------------------------------------------------------- """build and display Venn diagrams representing reference loci overlap""" - plt.figure(figsize=(9, 7)) + plt.figure(figsize=(9, 9)) if is_ref : intersection_loci = len(venn_data[0] & venn_data[1]) @@ -333,7 +346,12 @@ def generate_venn(venn_data, colors, is_ref, ref_species, proj_species, reciproc plt.annotate('Compara', xy = v.get_label_by_id('01').get_position(), xytext = (30,-70), size = 'x-large', ha = 'center', textcoords = 'offset points', bbox = dict(boxstyle = 'round, pad = 0.5', fc = colors[4], alpha = 0.3)) - plt.title(ref_species + ' > ' + proj_species + (' reference ' if is_ref else ' projection ') + 'loci (' + str(union_loci) + ' total) coverage comparison\nbetween Inparanoid super-clusters and Compara orthology data,\ngiven ' + str(reciprocal_id) + '% Compara reciprocal identity' + (', high-confidence only' if confidence else '')) + # show percentage coverage of curated reference loci on reference diagram only + if is_ref : + plt.annotate('[Coverage of curated reference loci]\nInparanoid: ' + list_coverage[0] + '\nCompara: ' + list_coverage[1], xy = v.get_label_by_id('11').get_position(), xytext = (0,-300), size = 'large', + ha = 'center', va = 'bottom', textcoords = 'offset points') + + plt.title('[' + ref_species + ' > ' + proj_species + ']\n' + (' Reference ' if is_ref else ' Projection ') + 'loci (' + str(union_loci) + ' projected' + (' of ' + str(COUNT_TOTAL_REF_LOCI) + ' total' if is_ref else '') + ') coverage comparison\nbetween Inparanoid super-clusters and Compara orthology data,\ngiven ' + str(reciprocal_id) + '% Compara reciprocal identity' + (', high-confidence only' if confidence else '')) if venn_output_path : plt.savefig(venn_output_path + '/incomparanoid_' + ref_species.replace(' ','_') + '_2_' + proj_species.replace(' ','_') + '_recip_' @@ -396,8 +414,8 @@ if args.generate_reactome_output == 'inparanoid' : # NOTE: requires local matplotlib backend configuration if args.venn_diagram : - generate_venn(all_venn_data[0], ['red', 'yellow', 'orange', 'blue', 'lime'], 1, args.ref_species, args.proj_species, args.reciprocal_id, 1 if args.confidence_high else 0, args.venn_output_path) - generate_venn(all_venn_data[1], ['green', 'yellow', 'lightgreen', 'blue', 'lime'], 0, args.ref_species, args.proj_species, args.reciprocal_id, 1 if args.confidence_high else 0, args.venn_output_path) + generate_venn(all_venn_data[0], all_venn_data[2], ['red', 'yellow', 'orange', 'blue', 'lime'], 1, args.ref_species, args.proj_species, args.reciprocal_id, 1 if args.confidence_high else 0, args.venn_output_path) + generate_venn(all_venn_data[1], all_venn_data[2], ['green', 'yellow', 'lightgreen', 'blue', 'lime'], 0, args.ref_species, args.proj_species, args.reciprocal_id, 1 if args.confidence_high else 0, args.venn_output_path) #---------------------------------------------------------------------------------------------------------------------- # end -- 2.34.1