#----------------------------------------------------------------------------------------------------------------------
import os
import sys
-
-#----------------------------------------------------------------------------------------------------------------------
-# globals
-#----------------------------------------------------------------------------------------------------------------------
-path = os.getcwd() + "/"
-input_file = sys.argv[1]
-output_file = sys.argv[2]
-map_source_type = sys.argv[3]
+import argparse
#----------------------------------------------------------------------------------------------------------------------
# classes
#----------------------------------------------------------------------------------------------------------------------
#----------------------------------------------------------------------------------------------------------------------
-def ens_map(input_file, output_file) :
+def ens_map(input_file, output_file, recip_id) :
#----------------------------------------------------------------------------------------------------------------------
"""
open the ensemble plants and rap::irgsp mapping files and generate a 2-col mapping of PRJ to LOC loci where
- reciprocal identity is >= 50% and confidence is high
+ reciprocal identity is >= param% and confidence is high
"""
count_loc = 0
for line in ENS :
cols = line.rstrip().split()
if len(cols) == 5 :
- if int(cols[2]) >= 50 and int(cols[3]) >= 50 and int(cols[4]) == 1 : # reciprocal identity is >= 50%, high confidence
+ if int(cols[2]) >= recip_id and int(cols[3]) >= recip_id and int(cols[4]) == 1 : # reciprocal identity is >= recip_id%, high confidence
if cols[0] in dict_ens_ids :
dict_ens_ids[cols[0]].append(cols[1])
else :
#for k, v in dict_ens_ids.iteritems() :
# print k, v
- OS_2_PRJ_MAP = open(path + output_file,'w')
+ #OS_2_PRJ_MAP = open(path + output_file,'w')
- RAP_IRGSP = open(path + "loc_rap_mappings_uniq.txt")
+ RAP_IRGSP = open(path + "RAP-MSU.txt")
RAP_IRGSP.readline();
for line in RAP_IRGSP:
if line.strip() != "" :
cols = line.rstrip().split()
- rap_id = cols[1].upper()
- if rap_id in dict_ens_ids and rap_id != "NONE" :
- #print cols[0] + "\t" + ",".join(dict_ens_ids[rap_id])
- OS_2_PRJ_MAP.write(cols[0].upper() + "\t" + ",".join(dict_ens_ids[rap_id]) + "\n")
- count_prj += len(dict_ens_ids[rap_id])
- count_loc += 1
+ rap_id = cols[0].upper()
+ if rap_id in dict_ens_ids :
+ print cols[1].split(",")[0] + "\t" + ",".join(dict_ens_ids[rap_id])
+ #OS_2_PRJ_MAP.write(cols[1].upper() + "\t" + ",".join(set(dict_ens_ids[rap_id])) + "\n")
+ #count_prj += len(set(dict_ens_ids[rap_id]))
+ #count_loc += 1
RAP_IRGSP.close()
- OS_2_PRJ_MAP.close()
+ #OS_2_PRJ_MAP.close()
return [{"Unique Os loci: ": str(count_loc)}, {"Total projected loci:": str(count_prj)}]
INP.readline();
for line in INP :
cols = line.rstrip().split()
- os_locus = cols[1].rstrip(".1")
- prj_locus = cols[2].rsplit("_",1)[0].rsplit(".",1)[0] # remove any isoform suffixes
+ # ignore non-canonical orthologs
+ if int(cols[1].rsplit(".",1)[1]) > 1 :
+ continue
+ os_locus = cols[0].rstrip(".1")
+ prj_locus = cols[1].rsplit("_",1)[0].rsplit(".",1)[0] # remove any isoform suffixes
if os_locus in dict_inp_ids :
dict_inp_ids[os_locus].append(prj_locus)
else :
OS_2_PRJ_MAP = open(path + output_file,'w')
for k, v in sorted(dict_inp_ids.iteritems()) :
#print k, v
- OS_2_PRJ_MAP.write(k + "\t" + ",".join(v) + "\n")
- count_prj += len(v)
+ OS_2_PRJ_MAP.write(k + "\t" + ",".join(set(v)) + "\n")
+ count_prj += len(set(v))
count_loc += 1
OS_2_PRJ_MAP.close()
#----------------------------------------------------------------------------------------------------------------------
# main
#----------------------------------------------------------------------------------------------------------------------
+
+# process args
+parser = argparse.ArgumentParser(description='Script with different analysis methods used to compare Ensembl and Inparanoid species projections.')
+
+parser.add_argument('-r', '--reciprocal_id', type=int, help='reciprocal identity percentage')
+parser.add_argument('-e', '--ensembl_input', help='ensmbl input file')
+parser.add_argument('-i', '--inparanoid_input', help='inparanoid input file')
+parser.add_argument('-o', '--output', help='destination output file')
+
+args = parser.parse_args()
+print args.accumulate(args.integers)
+
+input_file =
+output_file =
+map_source_type =
+recip_id = sys.argv[4]
+
+
list_counts = []
for case in switch(map_source_type):
if case("ens"):
- list_counts = ens_map(input_file, output_file)
+ list_counts = ens_map(input_file, output_file, recip_id)
break
if case("inp"):
list_counts = inp_map(input_file, output_file)