#!/bin/python
-# open the ensemble plants and rap::irgsp mapping files and generate a 2-col mapping of AT to LOC loci
+# open the ensemble plants and rap::irgsp mapping files and generate a 2-col mapping of AT to LOC loci where reciprocal identity is >= 50% and confidence is high
path = "/home/preecej/Documents/projects/plant_reactome/plant_reactome_site/projection/rice_to_arabidopsis/"
dict_ens_ids = {}
-ENS = open(path + "ensemble_plants_40_ath_to_rice_IRGSP.tab")
+ENS = open(path + "ensembl_plants_40_os_2_at_uniq.tab")
ENS.readline();
for line in ENS :
- words = line.rstrip().split()
- dict_ens_ids[words[2]] = words[0]
+ cols = line.rstrip().split()
+ if len(cols) == 5 :
+ if int(cols[2]) >= 50 and int(cols[3]) >= 50 and int(cols[4]) == 1 : # reciprocal identity is >= 50%, high confidence
+ dict_ens_ids[cols[0]] = cols[1]
ENS.close()
-OS_2_AT_MAP = open(path + "ensembl_ortho_os_2_at.tab",'w')
+for k, v in dict_ens_ids.iteritems() :
+ print k, v
-RAP_IRGSP = open(path + "loc_rap_mappings.txt")
-RAP_IRGSP.readline();
-for line in RAP_IRGSP:
- if line.strip() != "" :
- words = line.rstrip().split()
- rap_id = words[1].upper()
- if rap_id in dict_ens_ids and rap_id != "NONE" :
- #print dict_ens_ids[rap_id] + "\t" + words[0]
- OS_2_AT_MAP.write(words[0] + "\t" + dict_ens_ids[rap_id] + "\n")
-RAP_IRGSP.close()
-
-OS_2_AT_MAP.close()
+# OS_2_AT_MAP = open(path + "ensembl_ortho_os_2_at.tab",'w')
+#
+# RAP_IRGSP = open(path + "loc_rap_mappings.txt")
+# RAP_IRGSP.readline();
+# for line in RAP_IRGSP:
+ # if line.strip() != "" :
+ # cols = line.rstrip().split()
+ # rap_id = cols[1].upper()
+ # if rap_id in dict_ens_ids and rap_id != "NONE" :
+ # #print dict_ens_ids[rap_id] + "\t" + cols[0]
+ # OS_2_AT_MAP.write(cols[0] + "\t" + dict_ens_ids[rap_id] + "\n")
+# RAP_IRGSP.close()
+#
+# OS_2_AT_MAP.close()