00001
00002 #include <vector>
00003 #include <string>
00004
00005 #include "moses/FF/FFState.h"
00006 #include "moses/Hypothesis.h"
00007 #include "moses/Range.h"
00008 #include "moses/TranslationOption.h"
00009 #include "moses/Util.h"
00010
00011 #include "LexicalReordering.h"
00012 #include "LexicalReorderingState.h"
00013 #include "ReorderingStack.h"
00014
00015 namespace Moses
00016 {
00017
00018 bool
00019 IsMonotonicStep(Range const& prev,
00020 Range const& cur,
00021 Bitmap const& cov)
00022 {
00023 size_t e = prev.GetEndPos() + 1;
00024 size_t s = cur.GetStartPos();
00025 return (s == e || (s >= e && !cov.GetValue(e)));
00026 }
00027
00028 bool
00029 IsSwap(Range const& prev, Range const& cur, Bitmap const& cov)
00030 {
00031 size_t s = prev.GetStartPos();
00032 size_t e = cur.GetEndPos();
00033 return (e+1 == s || (e < s && !cov.GetValue(s-1)));
00034 }
00035
00036 size_t
00037 LRModel::
00038 GetNumberOfTypes() const
00039 {
00040 return ((m_modelType == MSD) ? 3 :
00041 (m_modelType == MSLR) ? 4 : 2);
00042 }
00043
00044 size_t
00045 LRModel::
00046 GetNumScoreComponents() const
00047 {
00048 size_t score_per_dir = m_collapseScores ? 1 : GetNumberOfTypes();
00049 return ((m_direction == Bidirectional)
00050 ? 2 * score_per_dir + m_additionalScoreComponents
00051 : score_per_dir + m_additionalScoreComponents);
00052 }
00053
00054 void
00055 LRModel::
00056 ConfigureSparse(const std::map<std::string,std::string>& sparseArgs,
00057 const LexicalReordering* producer)
00058 {
00059 if (sparseArgs.size()) {
00060 m_sparse.reset(new SparseReordering(sparseArgs, producer));
00061 }
00062 }
00063
00064 void
00065 LRModel::
00066 SetAdditionalScoreComponents(size_t number)
00067 {
00068 m_additionalScoreComponents = number;
00069 }
00070
00072 LRModel::ReorderingType
00073 LRModel::
00074 GetOrientation(Range const& cur) const
00075 {
00076 UTIL_THROW_IF2(m_modelType == None, "Reordering Model Type is None");
00077 return ((m_modelType == LeftRight) ? R :
00078 (cur.GetStartPos() == 0) ? M :
00079 (m_modelType == MSD) ? D :
00080 (m_modelType == MSLR) ? DR : NM);
00081 }
00082
00083 LRModel::ReorderingType
00084 LRModel::
00085 GetOrientation(Range const& prev, Range const& cur) const
00086 {
00087 UTIL_THROW_IF2(m_modelType == None, "No reordering model type specified");
00088 return ((m_modelType == LeftRight)
00089 ? prev.GetEndPos() <= cur.GetStartPos() ? R : L
00090 : (cur.GetStartPos() == prev.GetEndPos() + 1) ? M
00091 : (m_modelType == Monotonic) ? NM
00092 : (prev.GetStartPos() == cur.GetEndPos() + 1) ? S
00093 : (m_modelType == MSD) ? D
00094 : (cur.GetStartPos() > prev.GetEndPos()) ? DR : DL);
00095 }
00096
00097 LRModel::ReorderingType
00098 LRModel::
00099 GetOrientation(int const reoDistance) const
00100 {
00101
00102 return ((m_modelType == LeftRight)
00103 ? (reoDistance >= 1) ? R : L
00104 : (reoDistance == 1) ? M
00105 : (m_modelType == Monotonic) ? NM
00106 : (reoDistance == -1) ? S
00107 : (m_modelType == MSD) ? D
00108 : (reoDistance > 1) ? DR : DL);
00109 }
00110
00111 LRModel::ReorderingType
00112 LRModel::
00113 GetOrientation(Range const& prev, Range const& cur,
00114 Bitmap const& cov) const
00115 {
00116 return ((m_modelType == LeftRight)
00117 ? cur.GetStartPos() > prev.GetEndPos() ? R : L
00118 : IsMonotonicStep(prev,cur,cov) ? M
00119 : (m_modelType == Monotonic) ? NM
00120 : IsSwap(prev,cur,cov) ? S
00121 : (m_modelType == MSD) ? D
00122 : cur.GetStartPos() > prev.GetEndPos() ? DR : DL);
00123 }
00124
00125 LRModel::
00126 LRModel(const std::string &modelType)
00127 : m_modelString(modelType)
00128 , m_scoreProducer(NULL)
00129 , m_modelType(None)
00130 , m_phraseBased(true)
00131 , m_collapseScores(false)
00132 , m_direction(Backward)
00133 , m_additionalScoreComponents(0)
00134 {
00135 std::vector<std::string> config = Tokenize<std::string>(modelType, "-");
00136
00137 for (size_t i=0; i<config.size(); ++i) {
00138 if (config[i] == "hier") {
00139 m_phraseBased = false;
00140 } else if (config[i] == "phrase") {
00141 m_phraseBased = true;
00142 } else if (config[i] == "wbe") {
00143 m_phraseBased = true;
00144 }
00145
00146
00147
00148 else if (config[i] == "msd") {
00149 m_modelType = MSD;
00150 } else if (config[i] == "mslr") {
00151 m_modelType = MSLR;
00152 } else if (config[i] == "monotonicity") {
00153 m_modelType = Monotonic;
00154 } else if (config[i] == "leftright") {
00155 m_modelType = LeftRight;
00156 }
00157
00158
00159 else if (config[i] == "unidirectional") {
00160 m_direction = Backward;
00161 } else if (config[i] == "backward") {
00162 m_direction = Backward;
00163 } else if (config[i] == "forward") {
00164 m_direction = Forward;
00165 } else if (config[i] == "bidirectional") {
00166 m_direction = Bidirectional;
00167 }
00168
00169 else if (config[i] == "f") {
00170 m_condition = F;
00171 } else if (config[i] == "fe") {
00172 m_condition = FE;
00173 }
00174
00175 else if (config[i] == "collapseff") {
00176 m_collapseScores = true;
00177 } else if (config[i] == "allff") {
00178 m_collapseScores = false;
00179 } else {
00180 std::cerr
00181 << "Illegal part in the lexical reordering configuration string: "
00182 << config[i] << std::endl;
00183 exit(1);
00184 }
00185 }
00186
00187 if (m_modelType == None) {
00188 std::cerr
00189 << "You need to specify the type of the reordering model "
00190 << "(msd, monotonicity,...)" << std::endl;
00191 exit(1);
00192 }
00193 }
00194
00195 LRState *
00196 LRModel::
00197 CreateLRState(const InputType &input) const
00198 {
00199 LRState *bwd = NULL, *fwd = NULL;
00200 size_t offset = 0;
00201
00202 switch(m_direction) {
00203 case Backward:
00204 case Bidirectional:
00205 if (m_phraseBased)
00206 bwd = new PhraseBasedReorderingState(*this, Backward, offset);
00207 else
00208 bwd = new HReorderingBackwardState(*this, offset);
00209 offset += m_collapseScores ? 1 : GetNumberOfTypes();
00210 if (m_direction == Backward) return bwd;
00211 case Forward:
00212 if (m_phraseBased)
00213 fwd = new PhraseBasedReorderingState(*this, Forward, offset);
00214 else
00215 fwd = new HReorderingForwardState(*this, input.GetSize(), offset);
00216 offset += m_collapseScores ? 1 : GetNumberOfTypes();
00217 if (m_direction == Forward) return fwd;
00218 }
00219 return new BidirectionalReorderingState(*this, bwd, fwd, 0);
00220 }
00221
00222
00223 void
00224 LRState::
00225 CopyScores(ScoreComponentCollection* accum,
00226 const TranslationOption &topt,
00227 const InputType& input,
00228 ReorderingType reoType) const
00229 {
00230
00231 UTIL_THROW_IF2(m_direction != LRModel::Backward &&
00232 m_direction != LRModel::Forward,
00233 "Unknown direction: " << m_direction);
00234
00235 TranslationOption const* relevantOpt = ((m_direction == LRModel::Backward)
00236 ? &topt : m_prevOption);
00237
00238 LexicalReordering* producer = m_configuration.GetScoreProducer();
00239 Scores const* cached = relevantOpt->GetLexReorderingScores(producer);
00240
00241
00242
00243 size_t off_remote = m_offset + reoType;
00244 size_t off_local = m_configuration.CollapseScores() ? m_offset : off_remote;
00245
00246 UTIL_THROW_IF2(off_local >= producer->GetNumScoreComponents(),
00247 "offset out of vector bounds!");
00248
00249
00250 if(cached) {
00251 UTIL_THROW_IF2(off_remote >= cached->size(), "offset out of vector bounds!");
00252 Scores scores(producer->GetNumScoreComponents(),0);
00253 scores[off_local ] = (*cached)[off_remote];
00254 accum->PlusEquals(producer, scores);
00255 }
00256
00257
00258 else if (producer->GetHaveDefaultScores()) {
00259 Scores scores(producer->GetNumScoreComponents(),0);
00260 scores[off_local] = producer->GetDefaultScore(off_remote);
00261 accum->PlusEquals(m_configuration.GetScoreProducer(), scores);
00262 }
00263
00264
00265 const SparseReordering* sparse = m_configuration.GetSparseReordering();
00266 if (sparse) sparse->CopyScores(*relevantOpt, m_prevOption, input, reoType,
00267 m_direction, accum);
00268 }
00269
00270
00271 int
00272 LRState::
00273 ComparePrevScores(const TranslationOption *other) const
00274 {
00275 LexicalReordering* producer = m_configuration.GetScoreProducer();
00276 const Scores* myScores = m_prevOption->GetLexReorderingScores(producer);
00277 const Scores* yrScores = other->GetLexReorderingScores(producer);
00278
00279 if(myScores == yrScores) return 0;
00280
00281
00282 if(yrScores == NULL) return -1;
00283 if(myScores == NULL) return 1;
00284
00285 size_t stop = m_offset + m_configuration.GetNumberOfTypes();
00286 for(size_t i = m_offset; i < stop; i++) {
00287 if((*myScores)[i] < (*yrScores)[i]) return -1;
00288 if((*myScores)[i] > (*yrScores)[i]) return 1;
00289 }
00290 return 0;
00291 }
00292
00293
00294
00295
00296 bool PhraseBasedReorderingState::m_useFirstBackwardScore = true;
00297
00298 PhraseBasedReorderingState::
00299 PhraseBasedReorderingState(const PhraseBasedReorderingState *prev,
00300 const TranslationOption &topt)
00301 : LRState(prev, topt)
00302 , m_prevRange(topt.GetSourceWordsRange())
00303 , m_first(false)
00304 { }
00305
00306
00307 PhraseBasedReorderingState::
00308 PhraseBasedReorderingState(const LRModel &config,
00309 LRModel::Direction dir, size_t offset)
00310 : LRState(config, dir, offset)
00311 , m_prevRange(NOT_FOUND,NOT_FOUND)
00312 , m_first(true)
00313 { }
00314
00315
00316 size_t PhraseBasedReorderingState::hash() const
00317 {
00318 size_t ret;
00319 ret = hash_value(m_prevRange);
00320 boost::hash_combine(ret, m_direction);
00321
00322 return ret;
00323 }
00324
00325 bool PhraseBasedReorderingState::operator==(const FFState& o) const
00326 {
00327 if (&o == this) return true;
00328
00329 const PhraseBasedReorderingState &other = static_cast<const PhraseBasedReorderingState&>(o);
00330 if (m_prevRange == other.m_prevRange) {
00331 if (m_direction == LRModel::Forward) {
00332 int compareScore = ComparePrevScores(other.m_prevOption);
00333 return compareScore == 0;
00334 } else {
00335 return true;
00336 }
00337 } else {
00338 return false;
00339 }
00340 }
00341
00342 LRState*
00343 PhraseBasedReorderingState::
00344 Expand(const TranslationOption& topt, const InputType& input,
00345 ScoreComponentCollection* scores) const
00346 {
00347
00348
00349 if ((m_direction != LRModel::Forward && m_useFirstBackwardScore) || !m_first) {
00350 LRModel const& lrmodel = m_configuration;
00351 Range const cur = topt.GetSourceWordsRange();
00352 LRModel::ReorderingType reoType = (m_first ? lrmodel.GetOrientation(cur)
00353 : lrmodel.GetOrientation(m_prevRange,cur));
00354 CopyScores(scores, topt, input, reoType);
00355 }
00356 return new PhraseBasedReorderingState(this, topt);
00357 }
00358
00359
00361
00362
00363 size_t BidirectionalReorderingState::hash() const
00364 {
00365 size_t ret = m_backward->hash();
00366 boost::hash_combine(ret, m_forward->hash());
00367 return ret;
00368 }
00369
00370 bool BidirectionalReorderingState::operator==(const FFState& o) const
00371 {
00372 if (&o == this) return 0;
00373
00374 BidirectionalReorderingState const &other
00375 = static_cast<BidirectionalReorderingState const&>(o);
00376
00377 bool ret = (*m_backward == *other.m_backward) && (*m_forward == *other.m_forward);
00378 return ret;
00379 }
00380
00381 LRState*
00382 BidirectionalReorderingState::
00383 Expand(const TranslationOption& topt, const InputType& input,
00384 ScoreComponentCollection* scores) const
00385 {
00386 LRState *newbwd = m_backward->Expand(topt,input, scores);
00387 LRState *newfwd = m_forward->Expand(topt, input, scores);
00388 return new BidirectionalReorderingState(m_configuration, newbwd, newfwd, m_offset);
00389 }
00390
00392
00393
00394 HReorderingBackwardState::
00395 HReorderingBackwardState(const HReorderingBackwardState *prev,
00396 const TranslationOption &topt,
00397 ReorderingStack reoStack)
00398 : LRState(prev, topt), m_reoStack(reoStack)
00399 { }
00400
00401 HReorderingBackwardState::
00402 HReorderingBackwardState(const LRModel &config, size_t offset)
00403 : LRState(config, LRModel::Backward, offset)
00404 { }
00405
00406 size_t HReorderingBackwardState::hash() const
00407 {
00408 size_t ret = m_reoStack.hash();
00409 return ret;
00410 }
00411
00412 bool HReorderingBackwardState::operator==(const FFState& o) const
00413 {
00414 const HReorderingBackwardState& other
00415 = static_cast<const HReorderingBackwardState&>(o);
00416 bool ret = m_reoStack == other.m_reoStack;
00417 return ret;
00418 }
00419
00420 LRState*
00421 HReorderingBackwardState::
00422 Expand(const TranslationOption& topt, const InputType& input,
00423 ScoreComponentCollection* scores) const
00424 {
00425 HReorderingBackwardState* nextState;
00426 nextState = new HReorderingBackwardState(this, topt, m_reoStack);
00427 Range swrange = topt.GetSourceWordsRange();
00428 int reoDistance = nextState->m_reoStack.ShiftReduce(swrange);
00429 ReorderingType reoType = m_configuration.GetOrientation(reoDistance);
00430 CopyScores(scores, topt, input, reoType);
00431 return nextState;
00432 }
00433
00435
00436
00437 HReorderingForwardState::
00438 HReorderingForwardState(const LRModel &config,
00439 size_t size, size_t offset)
00440 : LRState(config, LRModel::Forward, offset)
00441 , m_first(true)
00442 , m_prevRange(NOT_FOUND,NOT_FOUND)
00443 , m_coverage(size)
00444 { }
00445
00446 HReorderingForwardState::
00447 HReorderingForwardState(const HReorderingForwardState *prev,
00448 const TranslationOption &topt)
00449 : LRState(prev, topt)
00450 , m_first(false)
00451 , m_prevRange(topt.GetSourceWordsRange())
00452 , m_coverage(prev->m_coverage, topt.GetSourceWordsRange())
00453 {
00454 }
00455
00456 size_t HReorderingForwardState::hash() const
00457 {
00458 size_t ret;
00459 ret = hash_value(m_prevRange);
00460 return ret;
00461 }
00462
00463 bool HReorderingForwardState::operator==(const FFState& o) const
00464 {
00465 if (&o == this) return true;
00466
00467 HReorderingForwardState const& other
00468 = static_cast<HReorderingForwardState const&>(o);
00469
00470 int compareScores = ((m_prevRange == other.m_prevRange)
00471 ? ComparePrevScores(other.m_prevOption)
00472 : (m_prevRange < other.m_prevRange) ? -1 : 1);
00473 return compareScores == 0;
00474 }
00475
00476
00477
00478
00479
00480
00481
00482
00483
00484
00485
00486
00487
00488
00489
00490 LRState*
00491 HReorderingForwardState::
00492 Expand(TranslationOption const& topt, InputType const& input,
00493 ScoreComponentCollection* scores) const
00494 {
00495 const Range cur = topt.GetSourceWordsRange();
00496
00497 Bitmap cov(m_coverage, cur);
00498 if (!m_first) {
00499 LRModel::ReorderingType reoType;
00500 reoType = m_configuration.GetOrientation(m_prevRange,cur,cov);
00501 CopyScores(scores, topt, input, reoType);
00502 }
00503 return new HReorderingForwardState(this, topt);
00504 }
00505 }
00506