# set paths to data files
my $data_path = "/home/preecej/Documents/Projects/Reactome/";
-my $chebi_obo_file = "chebi_sample.obo";
-my $reactome_file = "RiceReferenceMolecules_sample.txt";
-my $mapped_output_file = "reactome_chebi_mapping.txt";
+my $chebi_obo_file = "chebi.obo";
+my $reactome_file = "RiceReferenceMolecules.txt";
+my $mapped_output_file = "reactome_chebi_mapping_complete.txt";
+my $sorted_output_file = "reactome_chebi_mapping_complete_sorted.txt";
# options
my $allow_obsolete_terms = 1;
print OUTPUT_FILE "$_\n" foreach @map_results;
close OUTPUT_FILE;
+
+ # sort on all cols (keep the header at the top), remove exact dupes
+ system "awk 'NR == 1; NR > 1 {print \$0 | \"sort\"}' $data_path$mapped_output_file | uniq > $data_path$sorted_output_file";
} else {
print "\n\nSorry, there are no mapped results.\n\n";
}