00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022 #include "lm/binary_format.hh"
00023 #include "lm/enumerate_vocab.hh"
00024 #include "lm/left.hh"
00025 #include "lm/model.hh"
00026
00027 #include "moses/FF/FFState.h"
00028 #include "moses/Hypothesis.h"
00029 #include "moses/Phrase.h"
00030
00031 #include "moses/LM/Ken.h"
00032 #include "moses/LM/Backward.h"
00033 #include "util/exception.hh"
00034
00035
00036
00037
00038
00039 namespace Moses
00040 {
00041
00043
00044 template <class Model> BackwardLanguageModel<Model>::BackwardLanguageModel(const std::string &line, const std::string &file, FactorType factorType, bool lazy) : LanguageModelKen<Model>(line,file,factorType, lazy ? util::LAZY : util::POPULATE_OR_READ)
00045 {
00046
00047
00048
00049 }
00050
00063 template <class Model> const FFState *BackwardLanguageModel<Model>::EmptyHypothesisState(const InputType &) const
00064 {
00065 BackwardLMState *ret = new BackwardLMState();
00066 lm::ngram::RuleScore<Model> ruleScore(*m_ngram, ret->state);
00067 ruleScore.Terminal(m_ngram->GetVocabulary().EndSentence());
00068
00069 ruleScore.Finish();
00070
00071 return ret;
00072 }
00073
00074
00075
00076
00077
00078
00079
00080
00123 template <class Model> void BackwardLanguageModel<Model>::CalcScore(const Phrase &phrase, float &fullScore, float &ngramScore, size_t &oovCount) const
00124 {
00125 fullScore = 0;
00126 ngramScore = 0;
00127 oovCount = 0;
00128
00129 if (!phrase.GetSize()) return;
00130
00131 lm::ngram::ChartState discarded_sadly;
00132 lm::ngram::RuleScore<Model> scorer(*m_ngram, discarded_sadly);
00133
00134 UTIL_THROW_IF2(m_beginSentenceFactor == phrase.GetWord(0).GetFactor(m_factorType),
00135 "BackwardLanguageModel does not currently support rules that include <s>"
00136 );
00137
00138 float before_boundary = 0.0f;
00139
00140 int lastWord = phrase.GetSize() - 1;
00141 int ngramBoundary = m_ngram->Order() - 1;
00142 int boundary = ( lastWord < ngramBoundary ) ? 0 : ngramBoundary;
00143
00144 int position;
00145 for (position = lastWord; position >= 0; position-=1) {
00146 const Word &word = phrase.GetWord(position);
00147 UTIL_THROW_IF2(word.IsNonTerminal(),
00148 "BackwardLanguageModel does not currently support rules that include non-terminals "
00149 );
00150
00151 lm::WordIndex index = TranslateID(word);
00152 scorer.Terminal(index);
00153 if (!index) ++oovCount;
00154
00155 if (position==boundary) {
00156 before_boundary = scorer.Finish();
00157 }
00158
00159 }
00160
00161 fullScore = scorer.Finish();
00162
00163 ngramScore = TransformLMScore(fullScore - before_boundary);
00164 fullScore = TransformLMScore(fullScore);
00165
00166 }
00167
00219 template <class Model> FFState *BackwardLanguageModel<Model>::Evaluate(const Hypothesis &hypo, const FFState *ps, ScoreComponentCollection *out) const
00220 {
00221
00222
00223 if (!hypo.GetCurrTargetLength()) {
00224
00225
00226 std::auto_ptr<BackwardLMState> ret(new BackwardLMState());
00227 ret->state = static_cast<const BackwardLMState&>(*ps).state;
00228 return ret.release();
00229
00230 } else {
00231
00232 float returnedScore;
00233
00234 FFState *returnedState = this->Evaluate(hypo.GetCurrTargetPhrase(), ps, returnedScore);
00235
00236 out->PlusEquals(this, returnedScore);
00237
00238 return returnedState;
00239
00240 }
00241 }
00242
00243
00244 template <class Model> FFState *BackwardLanguageModel<Model>::Evaluate(const Phrase &phrase, const FFState *ps, float &returnedScore) const
00245 {
00246
00247 returnedScore = 0.0f;
00248
00249 const lm::ngram::ChartState &previous = static_cast<const BackwardLMState&>(*ps).state;
00250
00251 std::auto_ptr<BackwardLMState> ret(new BackwardLMState());
00252
00253 lm::ngram::RuleScore<Model> scorer(*m_ngram, ret->state);
00254
00255 int ngramBoundary = m_ngram->Order() - 1;
00256 int lastWord = phrase.GetSize() - 1;
00257
00258
00259
00260 for (int position=std::min( lastWord, ngramBoundary - 1); position >= 0; position-=1) {
00261 const Word &word = phrase.GetWord(position);
00262 UTIL_THROW_IF2(word.IsNonTerminal(),
00263 "BackwardLanguageModel does not currently support rules that include non-terminals "
00264 );
00265
00266 lm::WordIndex index = TranslateID(word);
00267 scorer.Terminal(index);
00268 }
00269 scorer.NonTerminal(previous);
00270 returnedScore = scorer.Finish();
00271
00272
00273
00274
00275
00276
00277
00278
00279
00280
00281 return ret.release();
00282
00283
00284
00285 }
00286
00287 LanguageModel *ConstructBackwardLM(const std::string &line, const std::string &file, FactorType factorType, bool lazy)
00288 {
00289 lm::ngram::ModelType model_type;
00290 if (lm::ngram::RecognizeBinary(file.c_str(), model_type)) {
00291 switch(model_type) {
00292 case lm::ngram::PROBING:
00293 return new BackwardLanguageModel<lm::ngram::ProbingModel>(line, file, factorType, lazy);
00294 case lm::ngram::REST_PROBING:
00295 return new BackwardLanguageModel<lm::ngram::RestProbingModel>(line, file, factorType, lazy);
00296 case lm::ngram::TRIE:
00297 return new BackwardLanguageModel<lm::ngram::TrieModel>(line, file, factorType, lazy);
00298 case lm::ngram::QUANT_TRIE:
00299 return new BackwardLanguageModel<lm::ngram::QuantTrieModel>(line, file, factorType, lazy);
00300 case lm::ngram::ARRAY_TRIE:
00301 return new BackwardLanguageModel<lm::ngram::ArrayTrieModel>(line, file, factorType, lazy);
00302 case lm::ngram::QUANT_ARRAY_TRIE:
00303 return new BackwardLanguageModel<lm::ngram::QuantArrayTrieModel>(line, file, factorType, lazy);
00304 default:
00305 UTIL_THROW2("Unrecognized kenlm model type " << model_type);
00306 }
00307 } else {
00308 return new BackwardLanguageModel<lm::ngram::ProbingModel>(line, file, factorType, lazy);
00309 }
00310 }
00311
00312 }