Hello!

To see the file structure, click on "tree".

Note that updates take place every 10 minutes, commits may not be seen immediately.
Added system call to sort the results file and remove exact dupes.
authorpreecej <preecej@localhost>
Wed, 13 Oct 2010 00:47:00 +0000 (00:47 +0000)
committerpreecej <preecej@localhost>
Wed, 13 Oct 2010 00:47:00 +0000 (00:47 +0000)
svn path=/; revision=66

preecej/perl_singletons/reactome_chebi_mapping/reactome_chebi_mapping.pl

index bb44688064b3b157a5583d338cfdd322a0b2e98b..46b3cf52e15aa186675ae7a614d4a8d8abe551f8 100755 (executable)
@@ -38,9 +38,10 @@ use GO::Parser;
 
 # set paths to data files
 my $data_path = "/home/preecej/Documents/Projects/Reactome/";
-my $chebi_obo_file = "chebi_sample.obo";
-my $reactome_file = "RiceReferenceMolecules_sample.txt";
-my $mapped_output_file = "reactome_chebi_mapping.txt";
+my $chebi_obo_file = "chebi.obo";
+my $reactome_file = "RiceReferenceMolecules.txt";
+my $mapped_output_file = "reactome_chebi_mapping_complete.txt";
+my $sorted_output_file = "reactome_chebi_mapping_complete_sorted.txt";
 
 # options
 my $allow_obsolete_terms = 1;
@@ -317,6 +318,9 @@ sub create_mapfile
         print OUTPUT_FILE "$_\n" foreach @map_results;
         
         close OUTPUT_FILE;
+        
+        # sort on all cols (keep the header at the top), remove exact dupes
+        system "awk 'NR == 1; NR > 1 {print \$0 | \"sort\"}' $data_path$mapped_output_file | uniq > $data_path$sorted_output_file";        
     } else {
         print "\n\nSorry, there are no mapped results.\n\n";
     }