From eac51564dab6eef190e571bfb30c5d93c873173c Mon Sep 17 00:00:00 2001 From: preecej Date: Fri, 4 Apr 2014 01:36:45 +0000 Subject: [PATCH] added ensembl confidence flag svn path=/; revision=568 --- Personnel/preecej/python_singletons/incomparanoid.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/Personnel/preecej/python_singletons/incomparanoid.py b/Personnel/preecej/python_singletons/incomparanoid.py index 256b40d..7841bd3 100755 --- a/Personnel/preecej/python_singletons/incomparanoid.py +++ b/Personnel/preecej/python_singletons/incomparanoid.py @@ -90,7 +90,7 @@ def create_inp_map(inparanoid_input_path, dict_uniprot_map) : # swap loc for uniprot, if specified if dict_uniprot_map : os_locus = dict_uniprot_map[os_locus] - prj_locus = cols[1].rsplit("_",1)[0].rsplit(".",1)[0] # remove any isoform suffixes (i.e. '.#', '_T0#') + prj_locus = cols[1].rsplit("_",1)[0].rsplit(".",1)[0] # remove any isoform suffixes (i.e. '.#', '_T0#') if os_locus in dict_inp_map : dict_inp_map[os_locus].add(prj_locus) else : @@ -108,7 +108,7 @@ def create_inp_map(inparanoid_input_path, dict_uniprot_map) : #---------------------------------------------------------------------------------------------------------------------- -def create_ens_map(filtering_loci_path, ensembl_input_path, rap_map_path, recip_id, dict_uniprot_map) : +def create_ens_map(filtering_loci_path, ensembl_input_path, rap_map_path, recip_id, dict_uniprot_map, is_confident) : #---------------------------------------------------------------------------------------------------------------------- """ open the ensemble plants and rap::irgsp mapping files and generate a hash mapping of reference to projected loci where @@ -158,8 +158,8 @@ def create_ens_map(filtering_loci_path, ensembl_input_path, rap_map_path, recip_ # swap loc for uniprot, if specified if dict_uniprot_map : os_locus = dict_uniprot_map[os_locus] - # reciprocal identity is >= recip_id%, high confidence - if int(cols[2]) >= recip_id and int(cols[3]) >= recip_id and int(cols[4]) == 1 : + # reciprocal identity is >= recip_id%, optional high confidence + if int(cols[2]) >= recip_id and int(cols[3]) >= recip_id and int(cols[4]) >= is_confident : if os_locus in dict_ens_map : dict_ens_map[os_locus].add(cols[1]) else : @@ -307,6 +307,7 @@ parser.add_argument('-e', '--ensembl_input_path', help='ensembl compara input fi parser.add_argument('-i', '--inparanoid_input_path', help='inparanoid supercluster input file') parser.add_argument('-m', '--rap_map_path', help='MSU-RAP mapping file') parser.add_argument('-r', '--reciprocal_id', type=int, help='reciprocal identity percentage') +parser.add_argument('-C', '--confidence_high', help='only use ensembl projections marked as high-confidence', action='store_true') parser.add_argument('-u', '--uniprot_substitution', help='file path to UniProt substitution data for reference loci') # TODO: add an "inparanoid super-cluster vs. conventional input" flag @@ -328,7 +329,7 @@ if args.uniprot_substitution : # create projection maps dict_inp_map = create_inp_map(args.inparanoid_input_path, dict_uniprot_map) -dict_ens_map = create_ens_map(args.filtering_loci_path, args.ensembl_input_path, args.rap_map_path, args.reciprocal_id, dict_uniprot_map) +dict_ens_map = create_ens_map(args.filtering_loci_path, args.ensembl_input_path, args.rap_map_path, args.reciprocal_id, dict_uniprot_map, 1 if args.confidence_high else 0) # generate stats and output them compare_maps(dict_ens_map, dict_inp_map, args.comparison_file_path, args.ensembl_output_path, args.inparanoid_output_path) -- 2.34.1