loc = cols[0]
uniprot = cols[1]
if loc in dict_uniprot_map :
+ # this should not occur; 1-to-many LOC-to-Uniprot relationships not allowed (only the inverse)
dict_uniprot_map[loc].add(uniprot)
else :
dict_uniprot_map[loc] = uniprot
reference loci
"""
dict_ens_map = {} # local ensembl orthology dict
- dict_rap_map = {} # local MSU-RAP dict, w/ only filtered canonical LOC loci (orig. ".1")
+ dict_rap_map = {} # local MSU-RAP dict, using only filtered canonical LOC loci (lowest available: ".1" is first preference, ".2" second preference, and so on)
# generate internal MSU-RAP map
RAP_MAP = open(rap_map_path)
rap_id = cols[0].upper()
set_loc_ids = set(cols[1].upper().split(","))
- # select only the locus w/ a .1 suffix, if it exists
+ # select only the first locus, if it exists
for loc_id in set_loc_ids :
- curr_canon_locus = re.match('.*\.1', loc_id)
- if curr_canon_locus :
- dict_rap_map[rap_id] = curr_canon_locus.group(0).rstrip("1").rstrip(".")
+ if loc_id != "NONE" :
+ canonical = loc_id.split(".")
+ dict_rap_map[rap_id] = canonical[0]
break;
RAP_MAP.close()
+ #for keys, values in dict_rap_map.items() :
+ # print(keys + " | " + values)
+
# generate ref loci filter
FILTER = open(filtering_loci_path)
loci_filter = set()
dict_uniprot_map = create_dict_uniprot_map(args.uniprot_substitution)
# create projection maps
-dict_inp_map = create_inp_map(args.inparanoid_input_path, dict_uniprot_map)
+if (args.inparanoid_input_path) :
+ dict_inp_map = create_inp_map(args.inparanoid_input_path, dict_uniprot_map)
dict_ens_map = create_ens_map(args.filtering_loci_path, args.ensembl_input_path, args.rap_map_path, args.reciprocal_id, dict_uniprot_map, 1 if args.confidence_high else 0)
# generate stats and output them
-all_venn_data = compare_maps(dict_ens_map, dict_inp_map, args.comparison_file_path, args.ensembl_output_path, args.inparanoid_output_path)
+if (args.inparanoid_input_path) :
+ all_venn_data = compare_maps(dict_ens_map, dict_inp_map, args.comparison_file_path, args.ensembl_output_path, args.inparanoid_output_path)
if args.generate_reactome_output == 'ensembl' :
write_reactome_files(dict_ens_map, args.reactome_gene_protein_path, args.reactome_projection_path, args.projection_prefix)