# reciprocal identity is >= recip_id%, high confidence
if int(cols[2]) >= recip_id and int(cols[3]) >= recip_id and int(cols[4]) == 1 :
if os_locus in dict_ens_map :
- dict_ens_map[os_locus].add((projection_prefix if projection_prefix + ":" else "") + cols[1])
+ dict_ens_map[os_locus].add((projection_prefix if projection_prefix else "") + cols[1])
else :
dict_ens_map[os_locus] = set([(projection_prefix if projection_prefix else "") + cols[1]])
ENS.close()
ENS_OUT_FILE.close()
ENS_FLAT_OUT_FILE.close()
+#----------------------------------------------------------------------------------------------------------------------
+def write_reactome_files(dict_map, reactome_gene_protein_path, reactome_projection_path, projection_prefix) :
+#----------------------------------------------------------------------------------------------------------------------
+ """
+ accepts a single projection map source and outputs the appropriate reference::projection protein and gene::protein
+ "orthopair" format files, including a projection prefix as needed
+ """
+ REACTOME_GENE_PROTEIN_OUT_FILE = open(os.getcwd() + reactome_projection_species + '_gene_protein_mapping.txt','w')
+ for v in sorted(dict_map.values()) :
+ REACTOME_GENE_PROTEIN_OUT_FILE.write(v + "\t" + (projection_prefix if projection_prefix else "") + v) + "\n")
+ REACTOME_GENE_PROTEIN_OUT_FILE.close()
+
+
#----------------------------------------------------------------------------------------------------------------------
# main
#----------------------------------------------------------------------------------------------------------------------
# TODO: add an "inparanoid super-cluster vs. conventional input" flag
# output settings
-parser.add_argument('-p', '--projection_prefix', help='add a platform-specific prefix to the projectied protein identifiers', default='')
+parser.add_argument('-p', '--projection_prefix', help='add a platform-specific prefix to the projectied protein identifiers', default='') # e.g. 'MaizeGDB:'
parser.add_argument('-c', '--comparison_file_path', help='output file containing statistical comparisons')
parser.add_argument('-E', '--ensembl_output_path', help='output file containing flat (1-to-many) ensemble ortho pairs')
parser.add_argument('-I', '--inparanoid_output_path', help='output file containing flat (1-to-many) inparanoid ortho pairs')
-parser.add_argument('-g', '--generate_reactome_files', help='produce ortho_pair files required by Reactome projection inference script', action="store_true")
+parser.add_argument('-g', '--generate_reactome_output', help='produce ortho_pair files required by Reactome projection inference script for specified projection source', choices=['ensembl', 'inparanoid'])
+parser.add_argument('-R', '--reactome_gene_protein_path', help='four-letter Reactome reference species abbreviation') # e.g. 'zmay_gene_protein_mapping.txt'
+parser.add_argument('-P', '--reactome_projection_path', help='four-letter Reactome projection species abbreviation') # e.g. 'osat_zmay_mapping.txt'
args = parser.parse_args()
#print args
# generate stats and output them
compare_maps(dict_ens_map, dict_inp_map, args.comparison_file_path, args.ensembl_output_path, args.inparanoid_output_path)
+if args.generate_reactome_output == 'ensemble' :
+ write_reactome_files(dict_ens_map, args.reactome_gene_protein_path, args.reactome_projection_path, args.projection_prefix)
+if args.generate_reactome_output == 'inparanoid' :
+ write_reactome_files(dict_inp_map, args.reactome_gene_protein_path, args.reactome_projection_path, args.projection_prefix)
+
+#----------------------------------------------------------------------------------------------------------------------
+# end
+#----------------------------------------------------------------------------------------------------------------------
+