00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022 #ifndef moses_LanguageModelReloading_h
00023 #define moses_LanguageModelReloading_h
00024
00025 #include <string>
00026
00027 #include "moses/LM/Base.h"
00028 #include "moses/LM/Ken.h"
00029
00030 #include "util/tokenize_piece.hh"
00031 #include "util/string_stream.hh"
00032
00033 #include <iostream>
00034 namespace Moses
00035 {
00036
00037 class FFState;
00038 template <class Model> class ReloadingLanguageModel : public LanguageModelKen<Model>
00039 {
00040 public:
00041
00042 ReloadingLanguageModel(const std::string &line, const std::string &file, FactorType factorType, bool lazy) : LanguageModelKen<Model>(line, file, factorType, lazy ? util::LAZY : util::POPULATE_OR_READ), m_file(file), m_lazy(lazy) {
00043
00044 VERBOSE(1, "ReloadingLM constructor: " << m_file << std::endl);
00045
00046 }
00047
00048 virtual void InitializeForInput(ttasksptr const& ttask) {
00049 VERBOSE(1, "ReloadingLM InitializeForInput" << std::endl);
00050
00051
00052
00053 boost::shared_ptr<Moses::ContextScope> contextScope = ttask->GetScope();
00054
00055
00056 void const* key = static_cast<void const*>(this);
00057
00058
00059 boost::shared_ptr<string> value = contextScope->get<string>(key);
00060
00061
00062 stringstream strme(*(value.get()));
00063
00064 ofstream tmp;
00065 tmp.open(m_file.c_str());
00066
00067
00068 string line;
00069 while (getline(strme, line)) {
00070
00071 tmp << line << "\n";
00072
00073 }
00074
00075 tmp.close();
00076
00077 LanguageModelKen<Model>::LoadModel(m_file, m_lazy ? util::LAZY : util::POPULATE_OR_READ);
00078 };
00079
00080
00081 protected:
00082
00083 using LanguageModelKen<Model>::m_ngram;
00084 using LanguageModelKen<Model>::m_lmIdLookup;
00085 using LanguageModelKen<Model>::m_beginSentenceFactor;
00086
00087 const std::string m_file;
00088 bool m_lazy;
00089 };
00090
00091
00092 LanguageModel *ConstructReloadingLM(const std::string &line, const std::string &file, FactorType factorType, bool lazy)
00093 {
00094 lm::ngram::ModelType model_type;
00095 if (lm::ngram::RecognizeBinary(file.c_str(), model_type)) {
00096 switch(model_type) {
00097 case lm::ngram::PROBING:
00098 return new ReloadingLanguageModel<lm::ngram::ProbingModel>(line, file, factorType, lazy);
00099 case lm::ngram::REST_PROBING:
00100 return new ReloadingLanguageModel<lm::ngram::RestProbingModel>(line, file, factorType, lazy);
00101 case lm::ngram::TRIE:
00102 return new ReloadingLanguageModel<lm::ngram::TrieModel>(line, file, factorType, lazy);
00103 case lm::ngram::QUANT_TRIE:
00104 return new ReloadingLanguageModel<lm::ngram::QuantTrieModel>(line, file, factorType, lazy);
00105 case lm::ngram::ARRAY_TRIE:
00106 return new ReloadingLanguageModel<lm::ngram::ArrayTrieModel>(line, file, factorType, lazy);
00107 case lm::ngram::QUANT_ARRAY_TRIE:
00108 return new ReloadingLanguageModel<lm::ngram::QuantArrayTrieModel>(line, file, factorType, lazy);
00109 default:
00110 UTIL_THROW2("Unrecognized kenlm model type " << model_type);
00111 }
00112 } else {
00113 return new ReloadingLanguageModel<lm::ngram::ProbingModel>(line, file, factorType, lazy);
00114 }
00115 }
00116
00117 LanguageModel *ConstructReloadingLM(const std::string &lineOrig)
00118 {
00119 FactorType factorType = 0;
00120 std::string filePath;
00121 bool lazy = false;
00122
00123 util::TokenIter<util::SingleCharacter, true> argument(lineOrig, ' ');
00124 ++argument;
00125
00126 util::StringStream line;
00127 line << "KENLM";
00128
00129 for (; argument; ++argument) {
00130 const char *equals = std::find(argument->data(), argument->data() + argument->size(), '=');
00131 UTIL_THROW_IF2(equals == argument->data() + argument->size(),
00132 "Expected = in ReloadingLM argument " << *argument);
00133 StringPiece name(argument->data(), equals - argument->data());
00134 StringPiece value(equals + 1, argument->data() + argument->size() - equals - 1);
00135 if (name == "factor") {
00136 factorType = boost::lexical_cast<FactorType>(value);
00137 } else if (name == "order") {
00138
00139 } else if (name == "path") {
00140 filePath.assign(value.data(), value.size());
00141 } else if (name == "lazyken") {
00142 lazy = boost::lexical_cast<bool>(value);
00143 } else {
00144
00145 line << " " << name << "=" << value;
00146 }
00147 }
00148
00149 return ConstructReloadingLM(line.str(), filePath, factorType, lazy);
00150 }
00151
00152
00153 }
00154
00155 #endif
00156