added percentage coverage comparisons, more captioning on diagrams

author preecej <preecej@localhost>

Tue, 15 Apr 2014 19:05:23 +0000 (19:05 +0000)

committer preecej <preecej@localhost>

Tue, 15 Apr 2014 19:05:23 +0000 (19:05 +0000)
author preecej <preecej@localhost>
Tue, 15 Apr 2014 19:05:23 +0000 (19:05 +0000)
committer preecej <preecej@localhost>
Tue, 15 Apr 2014 19:05:23 +0000 (19:05 +0000)
diff --git a/Personnel/preecej/python_singletons/incomparanoid.py b/Personnel/preecej/python_singletons/incomparanoid.py

index 4cb4804d9c4a9e3c00df8fba5232a202a706c3e5..62c0a3ebfb38b1bba0df20cc6f9c3078e3476345 100755 (executable)
--- a/Personnel/preecej/python_singletons/incomparanoid.py
+++ b/Personnel/preecej/python_singletons/incomparanoid.py
@@ -20,6 +20,7 @@ list_stats = []
  dict_uniprot_map = {}
  dict_ens_map = {}
  dict_inp_map = {}
+COUNT_TOTAL_REF_LOCI = 0 # num of TOTAL curated reference loci used to generate orthology projections  
  
  #----------------------------------------------------------------------------------------------------------------------
  # functions
@@ -108,6 +109,9 @@ def create_ens_map(filtering_loci_path, ensembl_input_path, rap_map_path, recip_
      for line in FILTER :
          loci_filter.add(line.rstrip())
  
+    global COUNT_TOTAL_REF_LOCI
+    COUNT_TOTAL_REF_LOCI = len(loci_filter)
+
      #for locus in loci_filter :
      #    print locus
  
@@ -149,8 +153,9 @@ def compare_maps(dict_cmp_map, dict_inp_map, comparison_file_path, compara_outpu
      union_ref_loci = len(set_inp_ref_loci | set_cmp_ref_loci)           # all ref loci 
      intersection_ref_loci = len(set_inp_ref_loci & set_cmp_ref_loci)    # all shared ref loci
  
-    inp_ref_loci_coverage = '{:.2%}'.format(inp_ref_loci/float(union_ref_loci))
-    cmp_ref_loci_coverage = '{:.2%}'.format(cmp_ref_loci/float(union_ref_loci))
+    # JP - not needed at this time; coverage over total curated set of reference loci is more useful
+    #inp_ref_loci_coverage = '{:.2%}'.format(inp_ref_loci/float(union_ref_loci))
+    #cmp_ref_loci_coverage = '{:.2%}'.format(cmp_ref_loci/float(union_ref_loci))
  
      """
      projected loci (all non-unique, meaning recounted relative to each ref locus; in other words, 
@@ -192,9 +197,14 @@ def compare_maps(dict_cmp_map, dict_inp_map, comparison_file_path, compara_outpu
          inp_exc_prj_loci += len(set(v[1]) - set(v[0]))
          cmp_exc_prj_loci += len(set(v[0]) - set(v[1]))
  
-    inp_prj_loci_coverage = '{:.2%}'.format(inp_prj_loci/float(union_prj_loci))
-    cmp_prj_loci_coverage = '{:.2%}'.format(cmp_prj_loci/float(union_prj_loci))
+    # JP - not needed at this time
+    #inp_prj_loci_coverage = '{:.2%}'.format(inp_prj_loci/float(union_prj_loci)) 
+    #cmp_prj_loci_coverage = '{:.2%}'.format(cmp_prj_loci/float(union_prj_loci))
  
+    # percent coverage of orig. curated reference loci set
+    inp_prj_curated_loci_coverage = '{:.2%}'.format(inp_ref_loci/float(COUNT_TOTAL_REF_LOCI))
+    cmp_prj_curated_loci_coverage = '{:.2%}'.format(cmp_ref_loci/float(COUNT_TOTAL_REF_LOCI))
+    
      # other stats
      #avg_projections_per_locus
      #avg_overlapping_projections_per_locus
@@ -206,14 +216,17 @@ def compare_maps(dict_cmp_map, dict_inp_map, comparison_file_path, compara_outpu
      list_stats.extend([{"[Comparison Stats]": ""},
              
          {"\n-- Reference Loci --":""}, 
+        {"Total number of curated reference loci:": str(COUNT_TOTAL_REF_LOCI)},
          {"Inparanoid reference loci:": str(inp_ref_loci)}, 
          {"Compara reference loci:": str(cmp_ref_loci)}, 
          {"Exclusive Inparanoid reference loci:": str(inp_exc_ref_loci)}, 
          {"Exclusive Compara reference loci:": str(cmp_exc_ref_loci)}, 
          {"All reference loci (union):": str(union_ref_loci)},
          {"All shared reference loci (intersection):": str(intersection_ref_loci)},
-        {"Inparanoid reference loci coverage:": inp_ref_loci_coverage},
-        {"Compara reference loci coverage:": cmp_ref_loci_coverage},
+        #{"Inparanoid reference loci coverage:": inp_ref_loci_coverage},
+        #{"Compara reference loci coverage:": cmp_ref_loci_coverage},
+        {"Inparanoid coverage of curated reference loci:": inp_prj_curated_loci_coverage},
+        {"Compara coverage of curated reference loci:": cmp_prj_curated_loci_coverage},
          
          {"\n-- Projected Loci (all non-unique with respect to reference loci) --":""}, 
          {"Inparanoid projected loci:": str(inp_prj_loci)},
@@ -222,8 +235,8 @@ def compare_maps(dict_cmp_map, dict_inp_map, comparison_file_path, compara_outpu
          {"Compara exclusive projected loci:": str(cmp_exc_prj_loci)},
          {"All projected loci (union):": str(union_prj_loci)},
          {"All shared projected loci (intersection):": str(intersection_prj_loci)},
-        {"Inparanoid projected loci coverage:": inp_prj_loci_coverage},
-        {"Compara projected loci coverage:": cmp_prj_loci_coverage},
+        #{"Inparanoid projected loci coverage:": inp_prj_loci_coverage},
+        #{"Compara projected loci coverage:": cmp_prj_loci_coverage},
  
      ])     
  
@@ -258,7 +271,7 @@ def compare_maps(dict_cmp_map, dict_inp_map, comparison_file_path, compara_outpu
          CMP_OUT_FILE.close()
          CMP_FLAT_OUT_FILE.close()
  
-    return [[set_inp_ref_loci, set_cmp_ref_loci], [inp_exc_prj_loci, cmp_exc_prj_loci, intersection_prj_loci]]
+    return [[set_inp_ref_loci, set_cmp_ref_loci], [inp_exc_prj_loci, cmp_exc_prj_loci, intersection_prj_loci], [inp_prj_curated_loci_coverage, cmp_prj_curated_loci_coverage]]
  
  #----------------------------------------------------------------------------------------------------------------------
  def write_reactome_files(dict_map, reactome_gene_protein_path, reactome_projection_path, projection_prefix) :
@@ -282,11 +295,11 @@ def write_reactome_files(dict_map, reactome_gene_protein_path, reactome_projecti
  
  
  #----------------------------------------------------------------------------------------------------------------------
-def generate_venn(venn_data, colors, is_ref, ref_species, proj_species, reciprocal_id, confidence, venn_output_path) :
+def generate_venn(venn_data, list_coverage, colors, is_ref, ref_species, proj_species, reciprocal_id, confidence, venn_output_path) :
  #----------------------------------------------------------------------------------------------------------------------
      """build and display Venn diagrams representing reference loci overlap"""
  
-    plt.figure(figsize=(9, 7))
+    plt.figure(figsize=(9, 9))
  
      if is_ref :
          intersection_loci = len(venn_data[0] & venn_data[1])
@@ -333,7 +346,12 @@ def generate_venn(venn_data, colors, is_ref, ref_species, proj_species, reciproc
      plt.annotate('Compara', xy = v.get_label_by_id('01').get_position(), xytext = (30,-70), size = 'x-large',
                  ha = 'center', textcoords = 'offset points', bbox = dict(boxstyle = 'round, pad = 0.5', fc = colors[4], alpha = 0.3))
  
-    plt.title(ref_species + ' > ' + proj_species + (' reference ' if is_ref else ' projection ') + 'loci (' + str(union_loci) + ' total) coverage comparison\nbetween Inparanoid super-clusters and Compara orthology data,\ngiven ' + str(reciprocal_id) + '% Compara reciprocal identity' + (', high-confidence only' if confidence else ''))
+    # show percentage coverage of curated reference loci on reference diagram only
+    if is_ref :
+        plt.annotate('[Coverage of curated reference loci]\nInparanoid: ' + list_coverage[0] + '\nCompara: ' + list_coverage[1], xy = v.get_label_by_id('11').get_position(), xytext = (0,-300), size = 'large',
+                    ha = 'center', va = 'bottom', textcoords = 'offset points')
+
+    plt.title('[' + ref_species + ' > ' + proj_species + ']\n' + (' Reference ' if is_ref else ' Projection ') + 'loci (' + str(union_loci) + ' projected' + (' of ' + str(COUNT_TOTAL_REF_LOCI) + ' total' if is_ref else '') + ') coverage comparison\nbetween Inparanoid super-clusters and Compara orthology data,\ngiven ' + str(reciprocal_id) + '% Compara reciprocal identity' + (', high-confidence only' if confidence else ''))
  
      if venn_output_path :
          plt.savefig(venn_output_path + '/incomparanoid_' + ref_species.replace(' ','_') + '_2_' + proj_species.replace(' ','_') + '_recip_' 
@@ -396,8 +414,8 @@ if args.generate_reactome_output == 'inparanoid' :
  
  # NOTE: requires local matplotlib backend configuration
  if args.venn_diagram :
-    generate_venn(all_venn_data[0], ['red', 'yellow', 'orange', 'blue', 'lime'], 1, args.ref_species, args.proj_species, args.reciprocal_id, 1 if args.confidence_high else 0, args.venn_output_path)
-    generate_venn(all_venn_data[1], ['green', 'yellow', 'lightgreen', 'blue', 'lime'], 0, args.ref_species, args.proj_species, args.reciprocal_id, 1 if args.confidence_high else 0, args.venn_output_path)
+    generate_venn(all_venn_data[0], all_venn_data[2], ['red', 'yellow', 'orange', 'blue', 'lime'], 1, args.ref_species, args.proj_species, args.reciprocal_id, 1 if args.confidence_high else 0, args.venn_output_path)
+    generate_venn(all_venn_data[1], all_venn_data[2], ['green', 'yellow', 'lightgreen', 'blue', 'lime'], 0, args.ref_species, args.proj_species, args.reciprocal_id, 1 if args.confidence_high else 0, args.venn_output_path)
  
  #----------------------------------------------------------------------------------------------------------------------
  # end
author	preecej <preecej@localhost>
	Tue, 15 Apr 2014 19:05:23 +0000 (19:05 +0000)
committer	preecej <preecej@localhost>
	Tue, 15 Apr 2014 19:05:23 +0000 (19:05 +0000)