# globals
#----------------------------------------------------------------------------------------------------------------------
list_stats = []
+dict_uniprot_map = {}
dict_ens_map = {}
dict_inp_map = {}
#----------------------------------------------------------------------------------------------------------------------
#----------------------------------------------------------------------------------------------------------------------
-def create_inp_map(inparanoid_input_path) :
+def create_dict_uniprot_map() :
+#----------------------------------------------------------------------------------------------------------------------
+ """
+ create reference-to-uniprot mapping dictionary
+ """
+ dict_uniprot_map = {} # local
+ # read map file, populate dict (note possibility of one uniprot id to many ref loci)
+
+ return dict_uniprot_map
+
+#----------------------------------------------------------------------------------------------------------------------
+def create_inp_map(inparanoid_input_path, uniprot_substitution, dict_uniprot_map) :
#----------------------------------------------------------------------------------------------------------------------
"""
open the inparanoid file (which is already loci-filtered for curated reference set) and generate a 2-col mapping of PRJ to LOC loci
#----------------------------------------------------------------------------------------------------------------------
-def create_ens_map(filtering_loci_path, ensembl_input_path, rap_map_path, recip_id) :
+def create_ens_map(filtering_loci_path, ensembl_input_path, rap_map_path, recip_id, uniprot_substitution, dict_uniprot_map) :
#----------------------------------------------------------------------------------------------------------------------
"""
open the ensemble plants and rap::irgsp mapping files and generate a hash mapping of reference to projected loci where
ref_dict = {}
set_ens_os_loci = set()
set_inp_os_loci = set()
-
+
#ref_dict structure: {os locus : [[ens projected locus, ...], [inp projected locus, ...], # of common loci]}
# iterate over both map files and build an overlap map, counting inclusions and exclusions at the reference and projection level
INP_OUT_FILE = open(inparanoid_output_path,'w')
INP_FLAT_OUT_FILE = open(inparanoid_output_path + ".flat",'w')
for k, v in sorted(dict_inp_map.iteritems()) :
- INP_OUT_FILE.write(k + "\t" + ",".join(v) + "\n")
+ INP_OUT_FILE.write(k + "\t" + " ".join(v) + "\n")
for projection in sorted(v) :
INP_FLAT_OUT_FILE.write(k + "\t" + projection + "\n")
INP_OUT_FILE.close()
ENS_OUT_FILE = open(ensembl_output_path,'w')
ENS_FLAT_OUT_FILE = open(ensembl_output_path + ".flat",'w')
for k, v in sorted(dict_ens_map.iteritems()) :
- ENS_OUT_FILE.write(k + "\t" + ",".join(v) + "\n")
+ ENS_OUT_FILE.write(k + "\t" + " ".join(v) + "\n")
for projection in sorted(v) :
ENS_FLAT_OUT_FILE.write(k + "\t" + projection + "\n")
ENS_OUT_FILE.close()
# TODO: add an "inparanoid super-cluster vs. conventional input" flag
# output settings
+parser.add_argument('-u', '--uniprot_substitution', help='substitute UniProt for reference loci', action='store_true')
parser.add_argument('-c', '--comparison_file_path', help='output file containing statistical comparisons')
parser.add_argument('-E', '--ensembl_output_path', help='output file containing flat (1-to-many) ensemble ortho pairs')
parser.add_argument('-I', '--inparanoid_output_path', help='output file containing flat (1-to-many) inparanoid ortho pairs')
args = parser.parse_args()
#print args
+# create ref loci::UniProt map, if specified
+if args.uniprot_substitution :
+ dict_uniprot_map = create_dict_uniprot_map()
+
# create projection maps
-dict_inp_map = create_inp_map(args.inparanoid_input_path)
-dict_ens_map = create_ens_map(args.filtering_loci_path, args.ensembl_input_path, args.rap_map_path, args.reciprocal_id)
+dict_inp_map = create_inp_map(args.inparanoid_input_path, args.uniprot_substitution, dict_uniprot_map)
+dict_ens_map = create_ens_map(args.filtering_loci_path, args.ensembl_input_path, args.rap_map_path, args.reciprocal_id, args.uniprot_substitution, dict_uniprot_map)
# generate stats and output them
compare_maps(dict_ens_map, dict_inp_map, args.comparison_file_path, args.ensembl_output_path, args.inparanoid_output_path)