The generation of the MSU-RAP dictionary now accepts .2, .3, .4, etc.

author preecej <preecej@localhost>

Tue, 12 Aug 2014 23:44:10 +0000 (23:44 +0000)

committer preecej <preecej@localhost>

Tue, 12 Aug 2014 23:44:10 +0000 (23:44 +0000)
author preecej <preecej@localhost>
Tue, 12 Aug 2014 23:44:10 +0000 (23:44 +0000)
committer preecej <preecej@localhost>
Tue, 12 Aug 2014 23:44:10 +0000 (23:44 +0000)
diff --git a/Personnel/preecej/python_singletons/incomparanoid.py b/Personnel/preecej/python_singletons/incomparanoid.py

index 1292f6b4a1e0dac5d55ad787258882f4746305da..75e485621b185ef30b1c3a5b6209ae42306f4eda 100755 (executable)
--- a/Personnel/preecej/python_singletons/incomparanoid.py
+++ b/Personnel/preecej/python_singletons/incomparanoid.py
@@ -40,6 +40,7 @@ def create_dict_uniprot_map(uniprot_substitution_path) :
          loc = cols[0]
          uniprot = cols[1]
          if loc in dict_uniprot_map :
+            # this should not occur; 1-to-many LOC-to-Uniprot relationships not allowed (only the inverse)
              dict_uniprot_map[loc].add(uniprot)
          else :
              dict_uniprot_map[loc] = uniprot
@@ -85,7 +86,7 @@ def create_ens_map(filtering_loci_path, ensembl_input_path, rap_map_path, recip_
      reference loci
      """
      dict_ens_map = {} # local ensembl orthology dict
-    dict_rap_map = {} # local MSU-RAP dict, w/ only filtered canonical LOC loci (orig. ".1")
+    dict_rap_map = {} # local MSU-RAP dict, using only filtered canonical LOC loci (lowest available: ".1" is first preference, ".2" second preference, and so on)
  
      # generate internal MSU-RAP map    
      RAP_MAP = open(rap_map_path)
@@ -95,14 +96,17 @@ def create_ens_map(filtering_loci_path, ensembl_input_path, rap_map_path, recip_
              rap_id = cols[0].upper()
  
              set_loc_ids = set(cols[1].upper().split(","))
-            # select only the locus w/ a .1 suffix, if it exists
+            # select only the first locus, if it exists
              for loc_id in set_loc_ids :
-                curr_canon_locus = re.match('.*\.1', loc_id)
-                if curr_canon_locus :
-                    dict_rap_map[rap_id] = curr_canon_locus.group(0).rstrip("1").rstrip(".")
+                if loc_id != "NONE" :
+                    canonical = loc_id.split(".")
+                    dict_rap_map[rap_id] = canonical[0]
                      break;                    
      RAP_MAP.close()
  
+    #for keys, values in dict_rap_map.items() :
+    #    print(keys + " | " + values)
+
      # generate ref loci filter 
      FILTER = open(filtering_loci_path)
      loci_filter = set()
@@ -401,11 +405,13 @@ if args.uniprot_substitution :
      dict_uniprot_map = create_dict_uniprot_map(args.uniprot_substitution)
  
  # create projection maps
-dict_inp_map = create_inp_map(args.inparanoid_input_path, dict_uniprot_map)
+if (args.inparanoid_input_path) :
+    dict_inp_map = create_inp_map(args.inparanoid_input_path, dict_uniprot_map)
  dict_ens_map = create_ens_map(args.filtering_loci_path, args.ensembl_input_path, args.rap_map_path, args.reciprocal_id, dict_uniprot_map, 1 if args.confidence_high else 0)
  
  # generate stats and output them
-all_venn_data = compare_maps(dict_ens_map, dict_inp_map, args.comparison_file_path, args.ensembl_output_path, args.inparanoid_output_path)
+if (args.inparanoid_input_path) :
+    all_venn_data = compare_maps(dict_ens_map, dict_inp_map, args.comparison_file_path, args.ensembl_output_path, args.inparanoid_output_path)
  
  if args.generate_reactome_output == 'ensembl' :
      write_reactome_files(dict_ens_map, args.reactome_gene_protein_path, args.reactome_projection_path, args.projection_prefix)
author	preecej <preecej@localhost>
	Tue, 12 Aug 2014 23:44:10 +0000 (23:44 +0000)
committer	preecej <preecej@localhost>
	Tue, 12 Aug 2014 23:44:10 +0000 (23:44 +0000)