wip: uniprot substitution for ref loci

author preecej <preecej@localhost>

Tue, 25 Mar 2014 00:00:54 +0000 (00:00 +0000)

committer preecej <preecej@localhost>

Tue, 25 Mar 2014 00:00:54 +0000 (00:00 +0000)
author preecej <preecej@localhost>
Tue, 25 Mar 2014 00:00:54 +0000 (00:00 +0000)
committer preecej <preecej@localhost>
Tue, 25 Mar 2014 00:00:54 +0000 (00:00 +0000)
diff --git a/Personnel/preecej/python_singletons/incomparanoid.py b/Personnel/preecej/python_singletons/incomparanoid.py

index 7bf3969942e5c0452a1ba85b9f48edf12c29abf8..6a6f231ea666324f87a1c742cfa2f8d71c33284b 100755 (executable)
--- a/Personnel/preecej/python_singletons/incomparanoid.py
+++ b/Personnel/preecej/python_singletons/incomparanoid.py
@@ -15,6 +15,7 @@ import re
  # globals
  #----------------------------------------------------------------------------------------------------------------------
  list_stats = []
+dict_uniprot_map = {}
  dict_ens_map = {}
  dict_inp_map = {}
  
@@ -46,7 +47,18 @@ class switch(object):
  #----------------------------------------------------------------------------------------------------------------------
  
  #----------------------------------------------------------------------------------------------------------------------
-def create_inp_map(inparanoid_input_path) :
+def create_dict_uniprot_map() :
+#----------------------------------------------------------------------------------------------------------------------
+    """
+    create reference-to-uniprot mapping dictionary
+    """
+    dict_uniprot_map = {} # local
+    # read map file, populate dict (note possibility of one uniprot id to many ref loci)
+    
+    return dict_uniprot_map
+
+#----------------------------------------------------------------------------------------------------------------------
+def create_inp_map(inparanoid_input_path, uniprot_substitution, dict_uniprot_map) :
  #----------------------------------------------------------------------------------------------------------------------
      """
      open the inparanoid file (which is already loci-filtered for curated reference set) and generate a 2-col mapping of PRJ to LOC loci
@@ -80,7 +92,7 @@ def create_inp_map(inparanoid_input_path) :
  
  
  #----------------------------------------------------------------------------------------------------------------------
-def create_ens_map(filtering_loci_path, ensembl_input_path, rap_map_path, recip_id) :
+def create_ens_map(filtering_loci_path, ensembl_input_path, rap_map_path, recip_id, uniprot_substitution, dict_uniprot_map) :
  #----------------------------------------------------------------------------------------------------------------------
      """
      open the ensemble plants and rap::irgsp mapping files and generate a hash mapping of reference to projected loci where 
@@ -159,7 +171,7 @@ def compare_maps(dict_ens_map, dict_inp_map, comparison_file_path, ensembl_outpu
      ref_dict = {}
      set_ens_os_loci = set()
      set_inp_os_loci = set()
-
+    
      #ref_dict structure: {os locus : [[ens projected locus, ...], [inp projected locus, ...], # of common loci]}
  
      # iterate over both map files and build an overlap map, counting inclusions and exclusions at the reference and projection level
@@ -227,7 +239,7 @@ def compare_maps(dict_ens_map, dict_inp_map, comparison_file_path, ensembl_outpu
      INP_OUT_FILE = open(inparanoid_output_path,'w')
      INP_FLAT_OUT_FILE = open(inparanoid_output_path + ".flat",'w')
      for k, v in sorted(dict_inp_map.iteritems()) :
-        INP_OUT_FILE.write(k + "\t" + ",".join(v) + "\n")
+        INP_OUT_FILE.write(k + "\t" + " ".join(v) + "\n")
          for projection in sorted(v) :
              INP_FLAT_OUT_FILE.write(k + "\t" + projection + "\n")
      INP_OUT_FILE.close()
@@ -236,7 +248,7 @@ def compare_maps(dict_ens_map, dict_inp_map, comparison_file_path, ensembl_outpu
      ENS_OUT_FILE = open(ensembl_output_path,'w')
      ENS_FLAT_OUT_FILE = open(ensembl_output_path + ".flat",'w')
      for k, v in sorted(dict_ens_map.iteritems()) :
-        ENS_OUT_FILE.write(k + "\t" + ",".join(v) + "\n")
+        ENS_OUT_FILE.write(k + "\t" + " ".join(v) + "\n")
          for projection in sorted(v) :
              ENS_FLAT_OUT_FILE.write(k + "\t" + projection + "\n")
      ENS_OUT_FILE.close()
@@ -258,6 +270,7 @@ parser.add_argument('-r', '--reciprocal_id', type=int, help='reciprocal identity
  # TODO: add an "inparanoid super-cluster vs. conventional input" flag
  
  # output settings
+parser.add_argument('-u', '--uniprot_substitution', help='substitute UniProt for reference loci', action='store_true')
  parser.add_argument('-c', '--comparison_file_path', help='output file containing statistical comparisons')
  parser.add_argument('-E', '--ensembl_output_path', help='output file containing flat (1-to-many) ensemble ortho pairs')
  parser.add_argument('-I', '--inparanoid_output_path', help='output file containing flat (1-to-many) inparanoid ortho pairs')
@@ -265,9 +278,13 @@ parser.add_argument('-I', '--inparanoid_output_path', help='output file containi
  args = parser.parse_args()
  #print args
  
+# create ref loci::UniProt map, if specified
+if args.uniprot_substitution :
+    dict_uniprot_map = create_dict_uniprot_map()
+
  # create projection maps
-dict_inp_map = create_inp_map(args.inparanoid_input_path)
-dict_ens_map = create_ens_map(args.filtering_loci_path, args.ensembl_input_path, args.rap_map_path, args.reciprocal_id)
+dict_inp_map = create_inp_map(args.inparanoid_input_path, args.uniprot_substitution, dict_uniprot_map)
+dict_ens_map = create_ens_map(args.filtering_loci_path, args.ensembl_input_path, args.rap_map_path, args.reciprocal_id, args.uniprot_substitution, dict_uniprot_map)
  
  # generate stats and output them
  compare_maps(dict_ens_map, dict_inp_map, args.comparison_file_path, args.ensembl_output_path, args.inparanoid_output_path)
author	preecej <preecej@localhost>
	Tue, 25 Mar 2014 00:00:54 +0000 (00:00 +0000)
committer	preecej <preecej@localhost>
	Tue, 25 Mar 2014 00:00:54 +0000 (00:00 +0000)