dict_uniprot_map = {}
dict_ens_map = {}
dict_inp_map = {}
+COUNT_TOTAL_REF_LOCI = 0 # num of TOTAL curated reference loci used to generate orthology projections
#----------------------------------------------------------------------------------------------------------------------
# functions
for line in FILTER :
loci_filter.add(line.rstrip())
+ global COUNT_TOTAL_REF_LOCI
+ COUNT_TOTAL_REF_LOCI = len(loci_filter)
+
#for locus in loci_filter :
# print locus
union_ref_loci = len(set_inp_ref_loci | set_cmp_ref_loci) # all ref loci
intersection_ref_loci = len(set_inp_ref_loci & set_cmp_ref_loci) # all shared ref loci
- inp_ref_loci_coverage = '{:.2%}'.format(inp_ref_loci/float(union_ref_loci))
- cmp_ref_loci_coverage = '{:.2%}'.format(cmp_ref_loci/float(union_ref_loci))
+ # JP - not needed at this time; coverage over total curated set of reference loci is more useful
+ #inp_ref_loci_coverage = '{:.2%}'.format(inp_ref_loci/float(union_ref_loci))
+ #cmp_ref_loci_coverage = '{:.2%}'.format(cmp_ref_loci/float(union_ref_loci))
"""
projected loci (all non-unique, meaning recounted relative to each ref locus; in other words,
inp_exc_prj_loci += len(set(v[1]) - set(v[0]))
cmp_exc_prj_loci += len(set(v[0]) - set(v[1]))
- inp_prj_loci_coverage = '{:.2%}'.format(inp_prj_loci/float(union_prj_loci))
- cmp_prj_loci_coverage = '{:.2%}'.format(cmp_prj_loci/float(union_prj_loci))
+ # JP - not needed at this time
+ #inp_prj_loci_coverage = '{:.2%}'.format(inp_prj_loci/float(union_prj_loci))
+ #cmp_prj_loci_coverage = '{:.2%}'.format(cmp_prj_loci/float(union_prj_loci))
+ # percent coverage of orig. curated reference loci set
+ inp_prj_curated_loci_coverage = '{:.2%}'.format(inp_ref_loci/float(COUNT_TOTAL_REF_LOCI))
+ cmp_prj_curated_loci_coverage = '{:.2%}'.format(cmp_ref_loci/float(COUNT_TOTAL_REF_LOCI))
+
# other stats
#avg_projections_per_locus
#avg_overlapping_projections_per_locus
list_stats.extend([{"[Comparison Stats]": ""},
{"\n-- Reference Loci --":""},
+ {"Total number of curated reference loci:": str(COUNT_TOTAL_REF_LOCI)},
{"Inparanoid reference loci:": str(inp_ref_loci)},
{"Compara reference loci:": str(cmp_ref_loci)},
{"Exclusive Inparanoid reference loci:": str(inp_exc_ref_loci)},
{"Exclusive Compara reference loci:": str(cmp_exc_ref_loci)},
{"All reference loci (union):": str(union_ref_loci)},
{"All shared reference loci (intersection):": str(intersection_ref_loci)},
- {"Inparanoid reference loci coverage:": inp_ref_loci_coverage},
- {"Compara reference loci coverage:": cmp_ref_loci_coverage},
+ #{"Inparanoid reference loci coverage:": inp_ref_loci_coverage},
+ #{"Compara reference loci coverage:": cmp_ref_loci_coverage},
+ {"Inparanoid coverage of curated reference loci:": inp_prj_curated_loci_coverage},
+ {"Compara coverage of curated reference loci:": cmp_prj_curated_loci_coverage},
{"\n-- Projected Loci (all non-unique with respect to reference loci) --":""},
{"Inparanoid projected loci:": str(inp_prj_loci)},
{"Compara exclusive projected loci:": str(cmp_exc_prj_loci)},
{"All projected loci (union):": str(union_prj_loci)},
{"All shared projected loci (intersection):": str(intersection_prj_loci)},
- {"Inparanoid projected loci coverage:": inp_prj_loci_coverage},
- {"Compara projected loci coverage:": cmp_prj_loci_coverage},
+ #{"Inparanoid projected loci coverage:": inp_prj_loci_coverage},
+ #{"Compara projected loci coverage:": cmp_prj_loci_coverage},
])
CMP_OUT_FILE.close()
CMP_FLAT_OUT_FILE.close()
- return [[set_inp_ref_loci, set_cmp_ref_loci], [inp_exc_prj_loci, cmp_exc_prj_loci, intersection_prj_loci]]
+ return [[set_inp_ref_loci, set_cmp_ref_loci], [inp_exc_prj_loci, cmp_exc_prj_loci, intersection_prj_loci], [inp_prj_curated_loci_coverage, cmp_prj_curated_loci_coverage]]
#----------------------------------------------------------------------------------------------------------------------
def write_reactome_files(dict_map, reactome_gene_protein_path, reactome_projection_path, projection_prefix) :
#----------------------------------------------------------------------------------------------------------------------
-def generate_venn(venn_data, colors, is_ref, ref_species, proj_species, reciprocal_id, confidence, venn_output_path) :
+def generate_venn(venn_data, list_coverage, colors, is_ref, ref_species, proj_species, reciprocal_id, confidence, venn_output_path) :
#----------------------------------------------------------------------------------------------------------------------
"""build and display Venn diagrams representing reference loci overlap"""
- plt.figure(figsize=(9, 7))
+ plt.figure(figsize=(9, 9))
if is_ref :
intersection_loci = len(venn_data[0] & venn_data[1])
plt.annotate('Compara', xy = v.get_label_by_id('01').get_position(), xytext = (30,-70), size = 'x-large',
ha = 'center', textcoords = 'offset points', bbox = dict(boxstyle = 'round, pad = 0.5', fc = colors[4], alpha = 0.3))
- plt.title(ref_species + ' > ' + proj_species + (' reference ' if is_ref else ' projection ') + 'loci (' + str(union_loci) + ' total) coverage comparison\nbetween Inparanoid super-clusters and Compara orthology data,\ngiven ' + str(reciprocal_id) + '% Compara reciprocal identity' + (', high-confidence only' if confidence else ''))
+ # show percentage coverage of curated reference loci on reference diagram only
+ if is_ref :
+ plt.annotate('[Coverage of curated reference loci]\nInparanoid: ' + list_coverage[0] + '\nCompara: ' + list_coverage[1], xy = v.get_label_by_id('11').get_position(), xytext = (0,-300), size = 'large',
+ ha = 'center', va = 'bottom', textcoords = 'offset points')
+
+ plt.title('[' + ref_species + ' > ' + proj_species + ']\n' + (' Reference ' if is_ref else ' Projection ') + 'loci (' + str(union_loci) + ' projected' + (' of ' + str(COUNT_TOTAL_REF_LOCI) + ' total' if is_ref else '') + ') coverage comparison\nbetween Inparanoid super-clusters and Compara orthology data,\ngiven ' + str(reciprocal_id) + '% Compara reciprocal identity' + (', high-confidence only' if confidence else ''))
if venn_output_path :
plt.savefig(venn_output_path + '/incomparanoid_' + ref_species.replace(' ','_') + '_2_' + proj_species.replace(' ','_') + '_recip_'
# NOTE: requires local matplotlib backend configuration
if args.venn_diagram :
- generate_venn(all_venn_data[0], ['red', 'yellow', 'orange', 'blue', 'lime'], 1, args.ref_species, args.proj_species, args.reciprocal_id, 1 if args.confidence_high else 0, args.venn_output_path)
- generate_venn(all_venn_data[1], ['green', 'yellow', 'lightgreen', 'blue', 'lime'], 0, args.ref_species, args.proj_species, args.reciprocal_id, 1 if args.confidence_high else 0, args.venn_output_path)
+ generate_venn(all_venn_data[0], all_venn_data[2], ['red', 'yellow', 'orange', 'blue', 'lime'], 1, args.ref_species, args.proj_species, args.reciprocal_id, 1 if args.confidence_high else 0, args.venn_output_path)
+ generate_venn(all_venn_data[1], all_venn_data[2], ['green', 'yellow', 'lightgreen', 'blue', 'lime'], 0, args.ref_species, args.proj_species, args.reciprocal_id, 1 if args.confidence_high else 0, args.venn_output_path)
#----------------------------------------------------------------------------------------------------------------------
# end