From fac5db24d8fb9eebf75f98f8b656b9091e0dd682 Mon Sep 17 00:00:00 2001 From: preecej Date: Wed, 26 Mar 2014 01:04:47 +0000 Subject: [PATCH] finished optional uniprot mapping svn path=/; revision=561 --- .../python_singletons/incomparanoid.py | 44 ++++++++++++++----- 1 file changed, 32 insertions(+), 12 deletions(-) diff --git a/Personnel/preecej/python_singletons/incomparanoid.py b/Personnel/preecej/python_singletons/incomparanoid.py index 6a6f231..31465fa 100755 --- a/Personnel/preecej/python_singletons/incomparanoid.py +++ b/Personnel/preecej/python_singletons/incomparanoid.py @@ -47,18 +47,31 @@ class switch(object): #---------------------------------------------------------------------------------------------------------------------- #---------------------------------------------------------------------------------------------------------------------- -def create_dict_uniprot_map() : +def create_dict_uniprot_map(uniprot_substitution_path) : #---------------------------------------------------------------------------------------------------------------------- """ create reference-to-uniprot mapping dictionary """ dict_uniprot_map = {} # local - # read map file, populate dict (note possibility of one uniprot id to many ref loci) + # read map file, populate dict (it is possible to have a many-to-1 LOC-to-Uniprot relationship; this is ok for projection inference) + UNI = open(uniprot_substitution_path) + for line in UNI : + cols = line.rstrip().split() + loc = cols[0] + uniprot = cols[1] + if loc in dict_uniprot_map : + dict_uniprot_map[loc].add(uniprot) + else : + dict_uniprot_map[loc] = uniprot + UNI.close() + + #for k, v in sorted(dict_uniprot_map.iteritems()) : + #print k, v return dict_uniprot_map #---------------------------------------------------------------------------------------------------------------------- -def create_inp_map(inparanoid_input_path, uniprot_substitution, dict_uniprot_map) : +def create_inp_map(inparanoid_input_path, dict_uniprot_map) : #---------------------------------------------------------------------------------------------------------------------- """ open the inparanoid file (which is already loci-filtered for curated reference set) and generate a 2-col mapping of PRJ to LOC loci @@ -74,6 +87,9 @@ def create_inp_map(inparanoid_input_path, uniprot_substitution, dict_uniprot_map if int(cols[1][-1]) > 1 : continue os_locus = cols[0].rstrip("1").rstrip(".") + # swap loc for uniprot, if specified + if dict_uniprot_map : + os_locus = dict_uniprot_map[os_locus] prj_locus = cols[1].rsplit("_",1)[0].rsplit(".",1)[0] # remove any isoform suffixes (i.e. '.#', '_T0#') if os_locus in dict_inp_map : dict_inp_map[os_locus].add(prj_locus) @@ -92,7 +108,7 @@ def create_inp_map(inparanoid_input_path, uniprot_substitution, dict_uniprot_map #---------------------------------------------------------------------------------------------------------------------- -def create_ens_map(filtering_loci_path, ensembl_input_path, rap_map_path, recip_id, uniprot_substitution, dict_uniprot_map) : +def create_ens_map(filtering_loci_path, ensembl_input_path, rap_map_path, recip_id, dict_uniprot_map) : #---------------------------------------------------------------------------------------------------------------------- """ open the ensemble plants and rap::irgsp mapping files and generate a hash mapping of reference to projected loci where @@ -137,13 +153,17 @@ def create_ens_map(filtering_loci_path, ensembl_input_path, rap_map_path, recip_ cols = line.rstrip().split() if len(cols) == 5 : if cols[0] in dict_rap_map : - if dict_rap_map[cols[0]] in loci_filter : + os_locus = dict_rap_map[cols[0]] + if os_locus in loci_filter : + # swap loc for uniprot, if specified + if dict_uniprot_map : + os_locus = dict_uniprot_map[os_locus] # reciprocal identity is >= recip_id%, high confidence if int(cols[2]) >= recip_id and int(cols[3]) >= recip_id and int(cols[4]) == 1 : - if dict_rap_map[cols[0]] in dict_ens_map : - dict_ens_map[dict_rap_map[cols[0]]].add(cols[1]) + if os_locus in dict_ens_map : + dict_ens_map[os_locus].add(cols[1]) else : - dict_ens_map[dict_rap_map[cols[0]]] = set([cols[1]]) + dict_ens_map[os_locus] = set([cols[1]]) ENS.close() for k, v in dict_ens_map.iteritems() : @@ -267,10 +287,10 @@ parser.add_argument('-e', '--ensembl_input_path', help='ensembl compara input fi parser.add_argument('-i', '--inparanoid_input_path', help='inparanoid supercluster input file') parser.add_argument('-m', '--rap_map_path', help='MSU-RAP mapping file') parser.add_argument('-r', '--reciprocal_id', type=int, help='reciprocal identity percentage') +parser.add_argument('-u', '--uniprot_substitution', help='substitute UniProt for reference loci') # TODO: add an "inparanoid super-cluster vs. conventional input" flag # output settings -parser.add_argument('-u', '--uniprot_substitution', help='substitute UniProt for reference loci', action='store_true') parser.add_argument('-c', '--comparison_file_path', help='output file containing statistical comparisons') parser.add_argument('-E', '--ensembl_output_path', help='output file containing flat (1-to-many) ensemble ortho pairs') parser.add_argument('-I', '--inparanoid_output_path', help='output file containing flat (1-to-many) inparanoid ortho pairs') @@ -280,11 +300,11 @@ args = parser.parse_args() # create ref loci::UniProt map, if specified if args.uniprot_substitution : - dict_uniprot_map = create_dict_uniprot_map() + dict_uniprot_map = create_dict_uniprot_map(args.uniprot_substitution) # create projection maps -dict_inp_map = create_inp_map(args.inparanoid_input_path, args.uniprot_substitution, dict_uniprot_map) -dict_ens_map = create_ens_map(args.filtering_loci_path, args.ensembl_input_path, args.rap_map_path, args.reciprocal_id, args.uniprot_substitution, dict_uniprot_map) +dict_inp_map = create_inp_map(args.inparanoid_input_path, dict_uniprot_map) +dict_ens_map = create_ens_map(args.filtering_loci_path, args.ensembl_input_path, args.rap_map_path, args.reciprocal_id, dict_uniprot_map) # generate stats and output them compare_maps(dict_ens_map, dict_inp_map, args.comparison_file_path, args.ensembl_output_path, args.inparanoid_output_path) -- 2.34.1