From: preecej Date: Tue, 25 Mar 2014 00:00:54 +0000 (+0000) Subject: wip: uniprot substitution for ref loci X-Git-Url: http://gitweb.planteome.org/?a=commitdiff_plain;h=0f8532046d0501c40199bb2c34f7b85123cb82e1;p=old-jaiswallab-svn%2F.git wip: uniprot substitution for ref loci svn path=/; revision=560 --- diff --git a/Personnel/preecej/python_singletons/incomparanoid.py b/Personnel/preecej/python_singletons/incomparanoid.py index 7bf3969..6a6f231 100755 --- a/Personnel/preecej/python_singletons/incomparanoid.py +++ b/Personnel/preecej/python_singletons/incomparanoid.py @@ -15,6 +15,7 @@ import re # globals #---------------------------------------------------------------------------------------------------------------------- list_stats = [] +dict_uniprot_map = {} dict_ens_map = {} dict_inp_map = {} @@ -46,7 +47,18 @@ class switch(object): #---------------------------------------------------------------------------------------------------------------------- #---------------------------------------------------------------------------------------------------------------------- -def create_inp_map(inparanoid_input_path) : +def create_dict_uniprot_map() : +#---------------------------------------------------------------------------------------------------------------------- + """ + create reference-to-uniprot mapping dictionary + """ + dict_uniprot_map = {} # local + # read map file, populate dict (note possibility of one uniprot id to many ref loci) + + return dict_uniprot_map + +#---------------------------------------------------------------------------------------------------------------------- +def create_inp_map(inparanoid_input_path, uniprot_substitution, dict_uniprot_map) : #---------------------------------------------------------------------------------------------------------------------- """ open the inparanoid file (which is already loci-filtered for curated reference set) and generate a 2-col mapping of PRJ to LOC loci @@ -80,7 +92,7 @@ def create_inp_map(inparanoid_input_path) : #---------------------------------------------------------------------------------------------------------------------- -def create_ens_map(filtering_loci_path, ensembl_input_path, rap_map_path, recip_id) : +def create_ens_map(filtering_loci_path, ensembl_input_path, rap_map_path, recip_id, uniprot_substitution, dict_uniprot_map) : #---------------------------------------------------------------------------------------------------------------------- """ open the ensemble plants and rap::irgsp mapping files and generate a hash mapping of reference to projected loci where @@ -159,7 +171,7 @@ def compare_maps(dict_ens_map, dict_inp_map, comparison_file_path, ensembl_outpu ref_dict = {} set_ens_os_loci = set() set_inp_os_loci = set() - + #ref_dict structure: {os locus : [[ens projected locus, ...], [inp projected locus, ...], # of common loci]} # iterate over both map files and build an overlap map, counting inclusions and exclusions at the reference and projection level @@ -227,7 +239,7 @@ def compare_maps(dict_ens_map, dict_inp_map, comparison_file_path, ensembl_outpu INP_OUT_FILE = open(inparanoid_output_path,'w') INP_FLAT_OUT_FILE = open(inparanoid_output_path + ".flat",'w') for k, v in sorted(dict_inp_map.iteritems()) : - INP_OUT_FILE.write(k + "\t" + ",".join(v) + "\n") + INP_OUT_FILE.write(k + "\t" + " ".join(v) + "\n") for projection in sorted(v) : INP_FLAT_OUT_FILE.write(k + "\t" + projection + "\n") INP_OUT_FILE.close() @@ -236,7 +248,7 @@ def compare_maps(dict_ens_map, dict_inp_map, comparison_file_path, ensembl_outpu ENS_OUT_FILE = open(ensembl_output_path,'w') ENS_FLAT_OUT_FILE = open(ensembl_output_path + ".flat",'w') for k, v in sorted(dict_ens_map.iteritems()) : - ENS_OUT_FILE.write(k + "\t" + ",".join(v) + "\n") + ENS_OUT_FILE.write(k + "\t" + " ".join(v) + "\n") for projection in sorted(v) : ENS_FLAT_OUT_FILE.write(k + "\t" + projection + "\n") ENS_OUT_FILE.close() @@ -258,6 +270,7 @@ parser.add_argument('-r', '--reciprocal_id', type=int, help='reciprocal identity # TODO: add an "inparanoid super-cluster vs. conventional input" flag # output settings +parser.add_argument('-u', '--uniprot_substitution', help='substitute UniProt for reference loci', action='store_true') parser.add_argument('-c', '--comparison_file_path', help='output file containing statistical comparisons') parser.add_argument('-E', '--ensembl_output_path', help='output file containing flat (1-to-many) ensemble ortho pairs') parser.add_argument('-I', '--inparanoid_output_path', help='output file containing flat (1-to-many) inparanoid ortho pairs') @@ -265,9 +278,13 @@ parser.add_argument('-I', '--inparanoid_output_path', help='output file containi args = parser.parse_args() #print args +# create ref loci::UniProt map, if specified +if args.uniprot_substitution : + dict_uniprot_map = create_dict_uniprot_map() + # create projection maps -dict_inp_map = create_inp_map(args.inparanoid_input_path) -dict_ens_map = create_ens_map(args.filtering_loci_path, args.ensembl_input_path, args.rap_map_path, args.reciprocal_id) +dict_inp_map = create_inp_map(args.inparanoid_input_path, args.uniprot_substitution, dict_uniprot_map) +dict_ens_map = create_ens_map(args.filtering_loci_path, args.ensembl_input_path, args.rap_map_path, args.reciprocal_id, args.uniprot_substitution, dict_uniprot_map) # generate stats and output them compare_maps(dict_ens_map, dict_inp_map, args.comparison_file_path, args.ensembl_output_path, args.inparanoid_output_path)