Hello!

To see the file structure, click on "tree".

Note that updates take place every 10 minutes, commits may not be seen immediately.
wip: reactome output files
authorpreecej <preecej@localhost>
Sat, 29 Mar 2014 02:58:03 +0000 (02:58 +0000)
committerpreecej <preecej@localhost>
Sat, 29 Mar 2014 02:58:03 +0000 (02:58 +0000)
svn path=/; revision=564

Personnel/preecej/python_singletons/incomparanoid.py

index 15aca2205f320d2af78e1d580ddbe6efd46bc7b6..b9a3bbc180c5141e341f0657f31a68af1cac38e0 100755 (executable)
@@ -161,7 +161,7 @@ def create_ens_map(filtering_loci_path, ensembl_input_path, rap_map_path, recip_
                     # reciprocal identity is >= recip_id%, high confidence
                     if int(cols[2]) >= recip_id and int(cols[3]) >= recip_id and int(cols[4]) == 1 :
                         if os_locus in dict_ens_map :
-                            dict_ens_map[os_locus].add((projection_prefix if projection_prefix + ":" else "") + cols[1])
+                            dict_ens_map[os_locus].add((projection_prefix if projection_prefix else "") + cols[1])
                         else :
                             dict_ens_map[os_locus] = set([(projection_prefix if projection_prefix else "") + cols[1]])
     ENS.close()
@@ -274,6 +274,19 @@ def compare_maps(dict_ens_map, dict_inp_map, comparison_file_path, ensembl_outpu
     ENS_OUT_FILE.close()
     ENS_FLAT_OUT_FILE.close()
 
+#----------------------------------------------------------------------------------------------------------------------
+def write_reactome_files(dict_map, reactome_gene_protein_path, reactome_projection_path, projection_prefix) :
+#----------------------------------------------------------------------------------------------------------------------
+    """
+    accepts a single projection map source and outputs the appropriate reference::projection protein and gene::protein
+    "orthopair" format files, including a projection prefix as needed
+    """
+    REACTOME_GENE_PROTEIN_OUT_FILE = open(os.getcwd() + reactome_projection_species + '_gene_protein_mapping.txt','w')
+    for v in sorted(dict_map.values()) :
+        REACTOME_GENE_PROTEIN_OUT_FILE.write(v + "\t" + (projection_prefix if projection_prefix else "") + v) + "\n")
+    REACTOME_GENE_PROTEIN_OUT_FILE.close()
+
+
 #----------------------------------------------------------------------------------------------------------------------
 # main
 #----------------------------------------------------------------------------------------------------------------------
@@ -291,11 +304,13 @@ parser.add_argument('-u', '--uniprot_substitution', help='substitute UniProt for
 # TODO: add an "inparanoid super-cluster vs. conventional input" flag
 
 # output settings
-parser.add_argument('-p', '--projection_prefix', help='add a platform-specific prefix to the projectied protein identifiers', default='')
+parser.add_argument('-p', '--projection_prefix', help='add a platform-specific prefix to the projectied protein identifiers', default='') # e.g. 'MaizeGDB:'
 parser.add_argument('-c', '--comparison_file_path', help='output file containing statistical comparisons')
 parser.add_argument('-E', '--ensembl_output_path', help='output file containing flat (1-to-many) ensemble ortho pairs')
 parser.add_argument('-I', '--inparanoid_output_path', help='output file containing flat (1-to-many) inparanoid ortho pairs')
-parser.add_argument('-g', '--generate_reactome_files', help='produce ortho_pair files required by Reactome projection inference script', action="store_true")
+parser.add_argument('-g', '--generate_reactome_output', help='produce ortho_pair files required by Reactome projection inference script for specified projection source', choices=['ensembl', 'inparanoid'])
+parser.add_argument('-R', '--reactome_gene_protein_path', help='four-letter Reactome reference species abbreviation') # e.g. 'zmay_gene_protein_mapping.txt'
+parser.add_argument('-P', '--reactome_projection_path', help='four-letter Reactome projection species abbreviation') # e.g. 'osat_zmay_mapping.txt'
 
 args = parser.parse_args()
 #print args
@@ -311,3 +326,12 @@ dict_ens_map = create_ens_map(args.filtering_loci_path, args.ensembl_input_path,
 # generate stats and output them
 compare_maps(dict_ens_map, dict_inp_map, args.comparison_file_path, args.ensembl_output_path, args.inparanoid_output_path)
 
+if args.generate_reactome_output == 'ensemble' :
+    write_reactome_files(dict_ens_map, args.reactome_gene_protein_path, args.reactome_projection_path, args.projection_prefix)
+if args.generate_reactome_output == 'inparanoid' :
+    write_reactome_files(dict_inp_map, args.reactome_gene_protein_path, args.reactome_projection_path, args.projection_prefix)
+
+#----------------------------------------------------------------------------------------------------------------------
+# end
+#----------------------------------------------------------------------------------------------------------------------
+