From: preecej Date: Tue, 12 Aug 2014 23:44:10 +0000 (+0000) Subject: The generation of the MSU-RAP dictionary now accepts .2, .3, .4, etc. X-Git-Url: http://gitweb.planteome.org/?a=commitdiff_plain;h=38c32e12e2bf3a3eb96783a863ff7ea9162a46c1;p=old-jaiswallab-svn%2F.git The generation of the MSU-RAP dictionary now accepts .2, .3, .4, etc. isoforms if no .1 is present. This will ensure the inclusion of more orthology mappings. svn path=/; revision=581 --- diff --git a/Personnel/preecej/python_singletons/incomparanoid.py b/Personnel/preecej/python_singletons/incomparanoid.py index 1292f6b..75e4856 100755 --- a/Personnel/preecej/python_singletons/incomparanoid.py +++ b/Personnel/preecej/python_singletons/incomparanoid.py @@ -40,6 +40,7 @@ def create_dict_uniprot_map(uniprot_substitution_path) : loc = cols[0] uniprot = cols[1] if loc in dict_uniprot_map : + # this should not occur; 1-to-many LOC-to-Uniprot relationships not allowed (only the inverse) dict_uniprot_map[loc].add(uniprot) else : dict_uniprot_map[loc] = uniprot @@ -85,7 +86,7 @@ def create_ens_map(filtering_loci_path, ensembl_input_path, rap_map_path, recip_ reference loci """ dict_ens_map = {} # local ensembl orthology dict - dict_rap_map = {} # local MSU-RAP dict, w/ only filtered canonical LOC loci (orig. ".1") + dict_rap_map = {} # local MSU-RAP dict, using only filtered canonical LOC loci (lowest available: ".1" is first preference, ".2" second preference, and so on) # generate internal MSU-RAP map RAP_MAP = open(rap_map_path) @@ -95,14 +96,17 @@ def create_ens_map(filtering_loci_path, ensembl_input_path, rap_map_path, recip_ rap_id = cols[0].upper() set_loc_ids = set(cols[1].upper().split(",")) - # select only the locus w/ a .1 suffix, if it exists + # select only the first locus, if it exists for loc_id in set_loc_ids : - curr_canon_locus = re.match('.*\.1', loc_id) - if curr_canon_locus : - dict_rap_map[rap_id] = curr_canon_locus.group(0).rstrip("1").rstrip(".") + if loc_id != "NONE" : + canonical = loc_id.split(".") + dict_rap_map[rap_id] = canonical[0] break; RAP_MAP.close() + #for keys, values in dict_rap_map.items() : + # print(keys + " | " + values) + # generate ref loci filter FILTER = open(filtering_loci_path) loci_filter = set() @@ -401,11 +405,13 @@ if args.uniprot_substitution : dict_uniprot_map = create_dict_uniprot_map(args.uniprot_substitution) # create projection maps -dict_inp_map = create_inp_map(args.inparanoid_input_path, dict_uniprot_map) +if (args.inparanoid_input_path) : + dict_inp_map = create_inp_map(args.inparanoid_input_path, dict_uniprot_map) dict_ens_map = create_ens_map(args.filtering_loci_path, args.ensembl_input_path, args.rap_map_path, args.reciprocal_id, dict_uniprot_map, 1 if args.confidence_high else 0) # generate stats and output them -all_venn_data = compare_maps(dict_ens_map, dict_inp_map, args.comparison_file_path, args.ensembl_output_path, args.inparanoid_output_path) +if (args.inparanoid_input_path) : + all_venn_data = compare_maps(dict_ens_map, dict_inp_map, args.comparison_file_path, args.ensembl_output_path, args.inparanoid_output_path) if args.generate_reactome_output == 'ensembl' : write_reactome_files(dict_ens_map, args.reactome_gene_protein_path, args.reactome_projection_path, args.projection_prefix)