00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020 #include "pcfg_extract.h"
00021
00022 #include <cassert>
00023 #include <cstdlib>
00024 #include <fstream>
00025 #include <iostream>
00026 #include <map>
00027 #include <memory>
00028 #include <set>
00029 #include <string>
00030 #include <vector>
00031
00032 #include <boost/program_options.hpp>
00033
00034 #include "syntax-common/exception.h"
00035 #include "syntax-common/pcfg.h"
00036 #include "syntax-common/vocabulary.h"
00037 #include "syntax-common/xml_tree_parser.h"
00038
00039 #include "SyntaxTree.h"
00040
00041 #include "options.h"
00042 #include "rule_collection.h"
00043 #include "rule_extractor.h"
00044
00045 namespace MosesTraining
00046 {
00047 namespace Syntax
00048 {
00049 namespace PCFG
00050 {
00051
00052 int PcfgExtract::Main(int argc, char *argv[])
00053 {
00054
00055 Options options;
00056 ProcessOptions(argc, argv, options);
00057
00058
00059 Vocabulary non_term_vocab;
00060 RuleExtractor rule_extractor(non_term_vocab);
00061 RuleCollection rule_collection;
00062 XmlTreeParser parser;
00063 std::string line;
00064 std::size_t line_num = 0;
00065 std::auto_ptr<MosesTraining::SyntaxTree> tree;
00066 while (std::getline(std::cin, line)) {
00067 ++line_num;
00068 try {
00069 tree = parser.Parse(line);
00070 } catch (Exception &e) {
00071 std::ostringstream msg;
00072 msg << "line " << line_num << ": " << e.msg();
00073 Error(msg.str());
00074 }
00075 if (!tree.get()) {
00076 std::ostringstream msg;
00077 msg << "no tree at line " << line_num;
00078 Warn(msg.str());
00079 continue;
00080 }
00081 rule_extractor.Extract(*tree, rule_collection);
00082 }
00083
00084
00085 Pcfg pcfg;
00086 rule_collection.CreatePcfg(pcfg);
00087 pcfg.Write(non_term_vocab, std::cout);
00088
00089 return 0;
00090 }
00091
00092 void PcfgExtract::ProcessOptions(int argc, char *argv[],
00093 Options &options) const
00094 {
00095 namespace po = boost::program_options;
00096
00097 std::ostringstream usage_top;
00098 usage_top << "Usage: " << name() << "\n\n" << "Options";
00099
00100
00101 po::options_description visible(usage_top.str());
00102 visible.add_options()
00103 ("help", "print help message and exit")
00104 ;
00105
00106
00107
00108 po::options_description hidden("Hidden options");
00109 hidden.add_options();
00110
00111
00112 po::options_description cmd_line_options;
00113 cmd_line_options.add(visible).add(hidden);
00114
00115
00116 po::positional_options_description p;
00117
00118
00119 po::variables_map vm;
00120 try {
00121 po::store(po::command_line_parser(argc, argv).style(MosesOptionStyle()).
00122 options(cmd_line_options).positional(p).run(), vm);
00123 po::notify(vm);
00124 } catch (const std::exception &e) {
00125 std::ostringstream msg;
00126 msg << e.what() << "\n\n" << visible;
00127 Error(msg.str());
00128 }
00129
00130 if (vm.count("help")) {
00131 std::cout << visible << std::endl;
00132 std::exit(0);
00133 }
00134 }
00135
00136 }
00137 }
00138 }