From 073b90ec7f82aa17413de172c148d3b4cc9463ca Mon Sep 17 00:00:00 2001 From: preecej Date: Fri, 28 Mar 2014 00:43:55 +0000 Subject: [PATCH] added projection prefix option svn path=/; revision=562 --- .../preecej/python_singletons/incomparanoid.py | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/Personnel/preecej/python_singletons/incomparanoid.py b/Personnel/preecej/python_singletons/incomparanoid.py index 31465fa..a298804 100755 --- a/Personnel/preecej/python_singletons/incomparanoid.py +++ b/Personnel/preecej/python_singletons/incomparanoid.py @@ -71,7 +71,7 @@ def create_dict_uniprot_map(uniprot_substitution_path) : return dict_uniprot_map #---------------------------------------------------------------------------------------------------------------------- -def create_inp_map(inparanoid_input_path, dict_uniprot_map) : +def create_inp_map(inparanoid_input_path, dict_uniprot_map, projection_prefix) : #---------------------------------------------------------------------------------------------------------------------- """ open the inparanoid file (which is already loci-filtered for curated reference set) and generate a 2-col mapping of PRJ to LOC loci @@ -90,7 +90,7 @@ def create_inp_map(inparanoid_input_path, dict_uniprot_map) : # swap loc for uniprot, if specified if dict_uniprot_map : os_locus = dict_uniprot_map[os_locus] - prj_locus = cols[1].rsplit("_",1)[0].rsplit(".",1)[0] # remove any isoform suffixes (i.e. '.#', '_T0#') + prj_locus = (projection_prefix if projection_prefix else "") + cols[1].rsplit("_",1)[0].rsplit(".",1)[0] # remove any isoform suffixes (i.e. '.#', '_T0#') if os_locus in dict_inp_map : dict_inp_map[os_locus].add(prj_locus) else : @@ -108,7 +108,7 @@ def create_inp_map(inparanoid_input_path, dict_uniprot_map) : #---------------------------------------------------------------------------------------------------------------------- -def create_ens_map(filtering_loci_path, ensembl_input_path, rap_map_path, recip_id, dict_uniprot_map) : +def create_ens_map(filtering_loci_path, ensembl_input_path, rap_map_path, recip_id, dict_uniprot_map, projection_prefix) : #---------------------------------------------------------------------------------------------------------------------- """ open the ensemble plants and rap::irgsp mapping files and generate a hash mapping of reference to projected loci where @@ -161,9 +161,9 @@ def create_ens_map(filtering_loci_path, ensembl_input_path, rap_map_path, recip_ # reciprocal identity is >= recip_id%, high confidence if int(cols[2]) >= recip_id and int(cols[3]) >= recip_id and int(cols[4]) == 1 : if os_locus in dict_ens_map : - dict_ens_map[os_locus].add(cols[1]) + dict_ens_map[os_locus].add((projection_prefix if projection_prefix + ":" else "") + cols[1]) else : - dict_ens_map[os_locus] = set([cols[1]]) + dict_ens_map[os_locus] = set([(projection_prefix if projection_prefix else "") + cols[1]]) ENS.close() for k, v in dict_ens_map.iteritems() : @@ -291,6 +291,7 @@ parser.add_argument('-u', '--uniprot_substitution', help='substitute UniProt for # TODO: add an "inparanoid super-cluster vs. conventional input" flag # output settings +parser.add_argument('-p', '--projection_prefix', help='add a platform-specific prefix to the projectied protein identifiers', default='') parser.add_argument('-c', '--comparison_file_path', help='output file containing statistical comparisons') parser.add_argument('-E', '--ensembl_output_path', help='output file containing flat (1-to-many) ensemble ortho pairs') parser.add_argument('-I', '--inparanoid_output_path', help='output file containing flat (1-to-many) inparanoid ortho pairs') @@ -303,8 +304,8 @@ if args.uniprot_substitution : dict_uniprot_map = create_dict_uniprot_map(args.uniprot_substitution) # create projection maps -dict_inp_map = create_inp_map(args.inparanoid_input_path, dict_uniprot_map) -dict_ens_map = create_ens_map(args.filtering_loci_path, args.ensembl_input_path, args.rap_map_path, args.reciprocal_id, dict_uniprot_map) +dict_inp_map = create_inp_map(args.inparanoid_input_path, dict_uniprot_map, args.projection_prefix) +dict_ens_map = create_ens_map(args.filtering_loci_path, args.ensembl_input_path, args.rap_map_path, args.reciprocal_id, dict_uniprot_map, args.projection_prefix) # generate stats and output them compare_maps(dict_ens_map, dict_inp_map, args.comparison_file_path, args.ensembl_output_path, args.inparanoid_output_path) -- 2.34.1