CMP_OUT_FILE.close()
CMP_FLAT_OUT_FILE.close()
- return [set_inp_ref_loci, set_cmp_ref_loci]
+ return [[set_inp_ref_loci, set_cmp_ref_loci], [inp_exc_prj_loci, cmp_exc_prj_loci, intersection_prj_loci]]
#----------------------------------------------------------------------------------------------------------------------
def write_reactome_files(dict_map, reactome_gene_protein_path, reactome_projection_path, projection_prefix) :
#----------------------------------------------------------------------------------------------------------------------
-def generate_venn(venn_data, colors, is_ref, ref_species, proj_species, reciprocal_id, confidence) :
+def generate_venn(venn_data, colors, is_ref, ref_species, proj_species, reciprocal_id, confidence, venn_output_path) :
#----------------------------------------------------------------------------------------------------------------------
"""build and display Venn diagrams representing reference loci overlap"""
- intersection_loci = len(venn_data[0] & venn_data[1])
- union_loci = len(venn_data[0] | venn_data[1])
- inp_exc_loci = len(venn_data[0] - venn_data[1])
- cmp_exc_loci = len(venn_data[1] - venn_data[0])
+ plt.figure(figsize=(9, 7))
- plt.figure(figsize=(8, 7))
+ if is_ref :
+ intersection_loci = len(venn_data[0] & venn_data[1])
+ union_loci = len(venn_data[0] | venn_data[1])
+ inp_exc_loci = len(venn_data[0] - venn_data[1])
+ cmp_exc_loci = len(venn_data[1] - venn_data[0])
+
+ v = venn2(venn_data, ('Inparanoid', 'Compara'))
- v = venn2(venn_data, ('Inparanoid', 'Compara'))
+ else :
+ intersection_loci = venn_data[2]
+ inp_exc_loci = venn_data[0]
+ cmp_exc_loci = venn_data[1]
+ union_loci = inp_exc_loci + intersection_loci + cmp_exc_loci
+
+ v = venn2(subsets = venn_data)
v.get_patch_by_id('10').set_alpha(0.5)
v.get_patch_by_id('10').set_color(colors[0])
v.get_patch_by_id('11').set_alpha(0.75)
v.get_patch_by_id('11').set_color(colors[2])
- v.get_label_by_id('10').set_text(str(inp_exc_loci))
- v.get_label_by_id('01').set_text(str(cmp_exc_loci))
- v.get_label_by_id('11').set_text(str(intersection_loci))
+ v.get_label_by_id('10').set_text('')
+ v.get_label_by_id('01').set_text('')
+ v.get_label_by_id('11').set_text('')
+
+ plt.annotate(str(inp_exc_loci), xy = v.get_label_by_id('10').get_position(), xytext = (0,-10), size = 'x-large',
+ ha = 'center', textcoords = 'offset points')
+ plt.annotate(str(cmp_exc_loci), xy = v.get_label_by_id('01').get_position(), xytext = (0,-10), size = 'x-large',
+ ha = 'center', textcoords = 'offset points')
+ plt.annotate(str(intersection_loci), xy = v.get_label_by_id('11').get_position(), xytext = (0,-10), size = 'x-large',
+ ha = 'center', textcoords = 'offset points')
v.get_label_by_id('A').set_text('')
v.get_label_by_id('B').set_text('')
plt.annotate('Compara', xy = v.get_label_by_id('01').get_position(), xytext = (30,-70), size = 'x-large',
ha = 'center', textcoords = 'offset points', bbox = dict(boxstyle = 'round, pad = 0.5', fc = colors[4], alpha = 0.3))
-
- plt.title('Overlap of ' + ref_species + (' reference ' if not is_ref else ' projection ') + 'loci (' + str(union_loci) + ' total)\nbetween Inparanoid super-clusters and Compara orthology data,\ngiven ' + str(reciprocal_id) + '% Compara reciprocal identity' + (', high-confidence only' if confidence else ''))
- plt.show()
+ plt.title(ref_species + ' > ' + proj_species + (' reference ' if is_ref else ' projection ') + 'loci (' + str(union_loci) + ' total) coverage comparison\nbetween Inparanoid super-clusters and Compara orthology data,\ngiven ' + str(reciprocal_id) + '% Compara reciprocal identity' + (', high-confidence only' if confidence else ''))
+
+ if venn_output_path :
+ plt.savefig(venn_output_path + '/incomparanoid_' + ref_species.replace(' ','_') + '_2_' + proj_species.replace(' ','_') + '_recip_'
+ + str(reciprocal_id) + ('_all_confidence' if confidence else '') + ('_reference_loci' if is_ref else '_projection_loci') + '_comparison.png',
+ dpi=None, facecolor='w', edgecolor='w',
+ orientation=None, papertype=None, format='png',
+ transparent=True, bbox_inches=None, pad_inches=0.1,
+ frameon=None)
+ plt.show()
#----------------------------------------------------------------------------------------------------------------------
# main
parser.add_argument('-R', '--reactome_gene_protein_path', help='output file containing gene::protein mappings for Reactome projection inference') # e.g. 'zmay_gene_protein_mapping.txt'
parser.add_argument('-P', '--reactome_projection_path', help='output file containing reference::projection protein mappings for Reactome inference') # e.g. 'osat_zmay_mapping.txt'
parser.add_argument('-V', '--venn_diagram', help='generate Venn diagram', action='store_true')
+parser.add_argument('-v', '--venn_output_path', help='save Venn diagrams to this directory')
args = parser.parse_args()
#print args
dict_ens_map = create_ens_map(args.filtering_loci_path, args.ensembl_input_path, args.rap_map_path, args.reciprocal_id, dict_uniprot_map, 1 if args.confidence_high else 0)
# generate stats and output them
-venn_data = compare_maps(dict_ens_map, dict_inp_map, args.comparison_file_path, args.ensembl_output_path, args.inparanoid_output_path)
+all_venn_data = compare_maps(dict_ens_map, dict_inp_map, args.comparison_file_path, args.ensembl_output_path, args.inparanoid_output_path)
if args.generate_reactome_output == 'ensembl' :
write_reactome_files(dict_ens_map, args.reactome_gene_protein_path, args.reactome_projection_path, args.projection_prefix)
# NOTE: requires local matplotlib backend configuration
if args.venn_diagram :
- generate_venn(venn_data, ['red', 'yellow', 'orange', 'purple', 'lime'], 0, args.ref_species, args.proj_species, args.reciprocal_id, 1 if args.confidence_high else 0)
- generate_venn(venn_data, ['green', 'yellow', 'lightgreen', 'purple', 'lime'], 1, args.ref_species, args.proj_species, args.reciprocal_id, 1 if args.confidence_high else 0)
+ generate_venn(all_venn_data[0], ['red', 'yellow', 'orange', 'blue', 'lime'], 1, args.ref_species, args.proj_species, args.reciprocal_id, 1 if args.confidence_high else 0, args.venn_output_path)
+ generate_venn(all_venn_data[1], ['green', 'yellow', 'lightgreen', 'blue', 'lime'], 0, args.ref_species, args.proj_species, args.reciprocal_id, 1 if args.confidence_high else 0, args.venn_output_path)
#----------------------------------------------------------------------------------------------------------------------
# end