00001
00002 #include <vector>
00003 #include <string>
00004 #include "util/check.hh"
00005
00006 #include "FFState.h"
00007 #include "Hypothesis.h"
00008 #include "WordsRange.h"
00009 #include "ReorderingStack.h"
00010 #include "TranslationOption.h"
00011
00012 #include "LexicalReordering.h"
00013 #include "LexicalReorderingState.h"
00014
00015 namespace Moses
00016 {
00017
00018 size_t LexicalReorderingConfiguration::GetNumberOfTypes() const
00019 {
00020 switch (m_modelType) {
00021 case LexicalReorderingConfiguration::MSD:
00022 return 3;
00023 break;
00024 case LexicalReorderingConfiguration::MSLR:
00025 return 4;
00026 break;
00027 default:
00028 return 2;
00029 }
00030 }
00031
00032 size_t LexicalReorderingConfiguration::GetNumScoreComponents() const
00033 {
00034 size_t score_per_dir = m_collapseScores ? 1 : GetNumberOfTypes();
00035 if (m_direction == Bidirectional) {
00036 return 2 * score_per_dir;
00037 } else {
00038 return score_per_dir;
00039 }
00040 }
00041
00042 LexicalReorderingConfiguration::LexicalReorderingConfiguration(ScoreProducer *scoreProducer, const std::string &modelType)
00043 : m_scoreProducer(scoreProducer), m_modelType(None), m_phraseBased(true), m_collapseScores(false), m_direction(Backward)
00044 {
00045 std::vector<std::string> config = Tokenize<std::string>(modelType, "-");
00046
00047 for (size_t i=0; i<config.size(); ++i) {
00048 if (config[i] == "hier") {
00049 m_phraseBased = false;
00050 } else if (config[i] == "phrase") {
00051 m_phraseBased = true;
00052 } else if (config[i] == "wbe") {
00053 m_phraseBased = true;
00054
00055
00056 } else if (config[i] == "msd") {
00057 m_modelType = MSD;
00058 } else if (config[i] == "mslr") {
00059 m_modelType = MSLR;
00060 } else if (config[i] == "monotonicity") {
00061 m_modelType = Monotonic;
00062 } else if (config[i] == "leftright") {
00063 m_modelType = LeftRight;
00064 } else if (config[i] == "backward" || config[i] == "unidirectional") {
00065
00066 m_direction = Backward;
00067 } else if (config[i] == "forward") {
00068 m_direction = Forward;
00069 } else if (config[i] == "bidirectional") {
00070 m_direction = Bidirectional;
00071 } else if (config[i] == "f") {
00072 m_condition = F;
00073 } else if (config[i] == "fe") {
00074 m_condition = FE;
00075 } else if (config[i] == "collapseff") {
00076 m_collapseScores = true;
00077 } else if (config[i] == "allff") {
00078 m_collapseScores = false;
00079 } else {
00080 UserMessage::Add("Illegal part in the lexical reordering configuration string: "+config[i]);
00081 exit(1);
00082 }
00083 }
00084
00085 if (m_modelType == None) {
00086 UserMessage::Add("You need to specify the type of the reordering model (msd, monotonicity,...)");
00087 exit(1);
00088 }
00089 }
00090
00091 LexicalReorderingState *LexicalReorderingConfiguration::CreateLexicalReorderingState(const InputType &input) const
00092 {
00093 LexicalReorderingState *bwd = NULL, *fwd = NULL;
00094 size_t offset = 0;
00095
00096 switch(m_direction) {
00097 case Backward:
00098 case Bidirectional:
00099 if (m_phraseBased) {
00100 bwd = new PhraseBasedReorderingState(*this, LexicalReorderingConfiguration::Backward, offset);
00101 } else {
00102 bwd = new HierarchicalReorderingBackwardState(*this, offset);
00103 }
00104 offset += m_collapseScores ? 1 : GetNumberOfTypes();
00105 if (m_direction == Backward)
00106 return bwd;
00107 case Forward:
00108 if (m_phraseBased) {
00109 fwd = new PhraseBasedReorderingState(*this, LexicalReorderingConfiguration::Forward, offset);
00110 } else {
00111 fwd = new HierarchicalReorderingForwardState(*this, input.GetSize(), offset);
00112 }
00113 offset += m_collapseScores ? 1 : GetNumberOfTypes();
00114 if (m_direction == Forward)
00115 return fwd;
00116 }
00117
00118 return new BidirectionalReorderingState(*this, bwd, fwd, 0);
00119 }
00120
00121 void LexicalReorderingState::CopyScores(Scores& scores, const TranslationOption &topt, ReorderingType reoType) const
00122 {
00123
00124 CHECK(m_direction == LexicalReorderingConfiguration::Backward || m_direction == LexicalReorderingConfiguration::Forward);
00125 const Scores *cachedScores = (m_direction == LexicalReorderingConfiguration::Backward) ?
00126 topt.GetCachedScores(m_configuration.GetScoreProducer()) : m_prevScore;
00127
00128
00129 if(cachedScores == NULL)
00130 return;
00131
00132 const Scores &scoreSet = *cachedScores;
00133 if(m_configuration.CollapseScores())
00134 scores[m_offset] = scoreSet[m_offset + reoType];
00135 else {
00136 std::fill(scores.begin() + m_offset, scores.begin() + m_offset + m_configuration.GetNumberOfTypes(), 0);
00137 scores[m_offset + reoType] = scoreSet[m_offset + reoType];
00138 }
00139 }
00140
00141 void LexicalReorderingState::ClearScores(Scores& scores) const
00142 {
00143 if(m_configuration.CollapseScores())
00144 scores[m_offset] = 0;
00145 else
00146 std::fill(scores.begin() + m_offset, scores.begin() + m_offset + m_configuration.GetNumberOfTypes(), 0);
00147 }
00148
00149 int LexicalReorderingState::ComparePrevScores(const Scores *other) const
00150 {
00151 if(m_prevScore == other)
00152 return 0;
00153
00154
00155 if(other == NULL)
00156 return -1;
00157 if(m_prevScore == NULL)
00158 return 1;
00159
00160 const Scores &my = *m_prevScore;
00161 const Scores &their = *other;
00162 for(size_t i = m_offset; i < m_offset + m_configuration.GetNumberOfTypes(); i++)
00163 if(my[i] < their[i])
00164 return -1;
00165 else if(my[i] > their[i])
00166 return 1;
00167
00168 return 0;
00169 }
00170
00171 PhraseBasedReorderingState::PhraseBasedReorderingState(const PhraseBasedReorderingState *prev, const TranslationOption &topt)
00172 : LexicalReorderingState(prev, topt), m_prevRange(topt.GetSourceWordsRange()), m_first(false) {}
00173
00174
00175 PhraseBasedReorderingState::PhraseBasedReorderingState(const LexicalReorderingConfiguration &config,
00176 LexicalReorderingConfiguration::Direction dir, size_t offset)
00177 : LexicalReorderingState(config, dir, offset), m_prevRange(NOT_FOUND,NOT_FOUND), m_first(true) {}
00178
00179
00180 int PhraseBasedReorderingState::Compare(const FFState& o) const
00181 {
00182 if (&o == this)
00183 return 0;
00184
00185 const PhraseBasedReorderingState* other = dynamic_cast<const PhraseBasedReorderingState*>(&o);
00186 CHECK(other != NULL);
00187 if (m_prevRange == other->m_prevRange) {
00188 if (m_direction == LexicalReorderingConfiguration::Forward) {
00189 return ComparePrevScores(other->m_prevScore);
00190 } else {
00191 return 0;
00192 }
00193 } else if (m_prevRange < other->m_prevRange) {
00194 return -1;
00195 }
00196 return 1;
00197 }
00198
00199 LexicalReorderingState* PhraseBasedReorderingState::Expand(const TranslationOption& topt, Scores& scores) const
00200 {
00201 ReorderingType reoType;
00202 const WordsRange currWordsRange = topt.GetSourceWordsRange();
00203 const LexicalReorderingConfiguration::ModelType modelType = m_configuration.GetModelType();
00204
00205 if (m_direction == LexicalReorderingConfiguration::Forward && m_first) {
00206 ClearScores(scores);
00207 } else {
00208 if (modelType == LexicalReorderingConfiguration::MSD) {
00209 reoType = GetOrientationTypeMSD(currWordsRange);
00210 } else if (modelType == LexicalReorderingConfiguration::MSLR) {
00211 reoType = GetOrientationTypeMSLR(currWordsRange);
00212 } else if (modelType == LexicalReorderingConfiguration::Monotonic) {
00213 reoType = GetOrientationTypeMonotonic(currWordsRange);
00214 } else {
00215 reoType = GetOrientationTypeLeftRight(currWordsRange);
00216 }
00217
00218 CopyScores(scores, topt, reoType);
00219 }
00220
00221 return new PhraseBasedReorderingState(this, topt);
00222 }
00223
00224 LexicalReorderingState::ReorderingType PhraseBasedReorderingState::GetOrientationTypeMSD(WordsRange currRange) const
00225 {
00226 if (m_first) {
00227 if (currRange.GetStartPos() == 0) {
00228 return M;
00229 } else {
00230 return D;
00231 }
00232 }
00233 if (m_prevRange.GetEndPos() == currRange.GetStartPos()-1) {
00234 return M;
00235 } else if (m_prevRange.GetStartPos() == currRange.GetEndPos()+1) {
00236 return S;
00237 }
00238 return D;
00239 }
00240
00241 LexicalReorderingState::ReorderingType PhraseBasedReorderingState::GetOrientationTypeMSLR(WordsRange currRange) const
00242 {
00243 if (m_first) {
00244 if (currRange.GetStartPos() == 0) {
00245 return M;
00246 } else {
00247 return DR;
00248 }
00249 }
00250 if (m_prevRange.GetEndPos() == currRange.GetStartPos()-1) {
00251 return M;
00252 } else if (m_prevRange.GetStartPos() == currRange.GetEndPos()+1) {
00253 return S;
00254 } else if (m_prevRange.GetEndPos() < currRange.GetStartPos()) {
00255 return DR;
00256 }
00257 return DL;
00258 }
00259
00260
00261 LexicalReorderingState::ReorderingType PhraseBasedReorderingState::GetOrientationTypeMonotonic(WordsRange currRange) const
00262 {
00263 if ((m_first && currRange.GetStartPos() == 0) ||
00264 (m_prevRange.GetEndPos() == currRange.GetStartPos()-1)) {
00265 return M;
00266 }
00267 return NM;
00268 }
00269
00270 LexicalReorderingState::ReorderingType PhraseBasedReorderingState::GetOrientationTypeLeftRight(WordsRange currRange) const
00271 {
00272 if (m_first ||
00273 (m_prevRange.GetEndPos() <= currRange.GetStartPos())) {
00274 return R;
00275 }
00276 return L;
00277 }
00278
00280
00281
00282 int BidirectionalReorderingState::Compare(const FFState& o) const
00283 {
00284 if (&o == this)
00285 return 0;
00286
00287 const BidirectionalReorderingState &other = dynamic_cast<const BidirectionalReorderingState &>(o);
00288 if(m_backward->Compare(*other.m_backward) < 0)
00289 return -1;
00290 else if(m_backward->Compare(*other.m_backward) > 0)
00291 return 1;
00292 else
00293 return m_forward->Compare(*other.m_forward);
00294 }
00295
00296 LexicalReorderingState* BidirectionalReorderingState::Expand(const TranslationOption& topt, Scores& scores) const
00297 {
00298 LexicalReorderingState *newbwd = m_backward->Expand(topt, scores);
00299 LexicalReorderingState *newfwd = m_forward->Expand(topt, scores);
00300 return new BidirectionalReorderingState(m_configuration, newbwd, newfwd, m_offset);
00301 }
00302
00304
00305
00306 HierarchicalReorderingBackwardState::HierarchicalReorderingBackwardState(const HierarchicalReorderingBackwardState *prev,
00307 const TranslationOption &topt, ReorderingStack reoStack)
00308 : LexicalReorderingState(prev, topt), m_reoStack(reoStack) {}
00309
00310 HierarchicalReorderingBackwardState::HierarchicalReorderingBackwardState(const LexicalReorderingConfiguration &config, size_t offset)
00311 : LexicalReorderingState(config, LexicalReorderingConfiguration::Backward, offset) {}
00312
00313
00314 int HierarchicalReorderingBackwardState::Compare(const FFState& o) const
00315 {
00316 const HierarchicalReorderingBackwardState& other = dynamic_cast<const HierarchicalReorderingBackwardState&>(o);
00317 return m_reoStack.Compare(other.m_reoStack);
00318 }
00319
00320 LexicalReorderingState* HierarchicalReorderingBackwardState::Expand(const TranslationOption& topt, Scores& scores) const
00321 {
00322
00323 HierarchicalReorderingBackwardState* nextState = new HierarchicalReorderingBackwardState(this, topt, m_reoStack);
00324 ReorderingType reoType;
00325 const LexicalReorderingConfiguration::ModelType modelType = m_configuration.GetModelType();
00326
00327 int reoDistance = nextState->m_reoStack.ShiftReduce(topt.GetSourceWordsRange());
00328
00329 if (modelType == LexicalReorderingConfiguration::MSD) {
00330 reoType = GetOrientationTypeMSD(reoDistance);
00331 } else if (modelType == LexicalReorderingConfiguration::MSLR) {
00332 reoType = GetOrientationTypeMSLR(reoDistance);
00333 } else if (modelType == LexicalReorderingConfiguration::LeftRight) {
00334 reoType = GetOrientationTypeLeftRight(reoDistance);
00335 } else {
00336 reoType = GetOrientationTypeMonotonic(reoDistance);
00337 }
00338
00339 CopyScores(scores, topt, reoType);
00340 return nextState;
00341 }
00342
00343 LexicalReorderingState::ReorderingType HierarchicalReorderingBackwardState::GetOrientationTypeMSD(int reoDistance) const
00344 {
00345 if (reoDistance == 1) {
00346 return M;
00347 } else if (reoDistance == -1) {
00348 return S;
00349 }
00350 return D;
00351 }
00352
00353 LexicalReorderingState::ReorderingType HierarchicalReorderingBackwardState::GetOrientationTypeMSLR(int reoDistance) const
00354 {
00355 if (reoDistance == 1) {
00356 return M;
00357 } else if (reoDistance == -1) {
00358 return S;
00359 } else if (reoDistance > 1) {
00360 return DR;
00361 }
00362 return DL;
00363 }
00364
00365 LexicalReorderingState::ReorderingType HierarchicalReorderingBackwardState::GetOrientationTypeMonotonic(int reoDistance) const
00366 {
00367 if (reoDistance == 1) {
00368 return M;
00369 }
00370 return NM;
00371 }
00372
00373 LexicalReorderingState::ReorderingType HierarchicalReorderingBackwardState::GetOrientationTypeLeftRight(int reoDistance) const
00374 {
00375 if (reoDistance >= 1) {
00376 return R;
00377 }
00378 return L;
00379 }
00380
00381
00382
00383
00385
00386
00387 HierarchicalReorderingForwardState::HierarchicalReorderingForwardState(const LexicalReorderingConfiguration &config, size_t size, size_t offset)
00388 : LexicalReorderingState(config, LexicalReorderingConfiguration::Forward, offset), m_first(true), m_prevRange(NOT_FOUND,NOT_FOUND), m_coverage(size) {}
00389
00390 HierarchicalReorderingForwardState::HierarchicalReorderingForwardState(const HierarchicalReorderingForwardState *prev, const TranslationOption &topt)
00391 : LexicalReorderingState(prev, topt), m_first(false), m_prevRange(topt.GetSourceWordsRange()), m_coverage(prev->m_coverage)
00392 {
00393 const WordsRange currWordsRange = topt.GetSourceWordsRange();
00394 m_coverage.SetValue(currWordsRange.GetStartPos(), currWordsRange.GetEndPos(), true);
00395 }
00396
00397 int HierarchicalReorderingForwardState::Compare(const FFState& o) const
00398 {
00399 if (&o == this)
00400 return 0;
00401
00402 const HierarchicalReorderingForwardState* other = dynamic_cast<const HierarchicalReorderingForwardState*>(&o);
00403 CHECK(other != NULL);
00404 if (m_prevRange == other->m_prevRange) {
00405 return ComparePrevScores(other->m_prevScore);
00406 } else if (m_prevRange < other->m_prevRange) {
00407 return -1;
00408 }
00409 return 1;
00410 }
00411
00412
00413
00414
00415
00416
00417
00418
00419
00420
00421
00422
00423 LexicalReorderingState* HierarchicalReorderingForwardState::Expand(const TranslationOption& topt, Scores& scores) const
00424 {
00425 const LexicalReorderingConfiguration::ModelType modelType = m_configuration.GetModelType();
00426 const WordsRange currWordsRange = topt.GetSourceWordsRange();
00427
00428 WordsBitmap coverage = m_coverage;
00429 coverage.SetValue(currWordsRange.GetStartPos(), currWordsRange.GetEndPos(), true);
00430
00431 ReorderingType reoType;
00432
00433 if (m_first) {
00434 ClearScores(scores);
00435 } else {
00436 if (modelType == LexicalReorderingConfiguration::MSD) {
00437 reoType = GetOrientationTypeMSD(currWordsRange, coverage);
00438 } else if (modelType == LexicalReorderingConfiguration::MSLR) {
00439 reoType = GetOrientationTypeMSLR(currWordsRange, coverage);
00440 } else if (modelType == LexicalReorderingConfiguration::Monotonic) {
00441 reoType = GetOrientationTypeMonotonic(currWordsRange, coverage);
00442 } else {
00443 reoType = GetOrientationTypeLeftRight(currWordsRange, coverage);
00444 }
00445
00446 CopyScores(scores, topt, reoType);
00447 }
00448
00449 return new HierarchicalReorderingForwardState(this, topt);
00450 }
00451
00452 LexicalReorderingState::ReorderingType HierarchicalReorderingForwardState::GetOrientationTypeMSD(WordsRange currRange, WordsBitmap coverage) const
00453 {
00454 if (currRange.GetStartPos() > m_prevRange.GetEndPos() &&
00455 (!coverage.GetValue(m_prevRange.GetEndPos()+1) || currRange.GetStartPos() == m_prevRange.GetEndPos()+1)) {
00456 return M;
00457 } else if (currRange.GetEndPos() < m_prevRange.GetStartPos() &&
00458 (!coverage.GetValue(m_prevRange.GetStartPos()-1) || currRange.GetEndPos() == m_prevRange.GetStartPos()-1)) {
00459 return S;
00460 }
00461 return D;
00462 }
00463
00464 LexicalReorderingState::ReorderingType HierarchicalReorderingForwardState::GetOrientationTypeMSLR(WordsRange currRange, WordsBitmap coverage) const
00465 {
00466 if (currRange.GetStartPos() > m_prevRange.GetEndPos() &&
00467 (!coverage.GetValue(m_prevRange.GetEndPos()+1) || currRange.GetStartPos() == m_prevRange.GetEndPos()+1)) {
00468 return M;
00469 } else if (currRange.GetEndPos() < m_prevRange.GetStartPos() &&
00470 (!coverage.GetValue(m_prevRange.GetStartPos()-1) || currRange.GetEndPos() == m_prevRange.GetStartPos()-1)) {
00471 return S;
00472 } else if (currRange.GetStartPos() > m_prevRange.GetEndPos()) {
00473 return DR;
00474 }
00475 return DL;
00476 }
00477
00478 LexicalReorderingState::ReorderingType HierarchicalReorderingForwardState::GetOrientationTypeMonotonic(WordsRange currRange, WordsBitmap coverage) const
00479 {
00480 if (currRange.GetStartPos() > m_prevRange.GetEndPos() &&
00481 (!coverage.GetValue(m_prevRange.GetEndPos()+1) || currRange.GetStartPos() == m_prevRange.GetEndPos()+1)) {
00482 return M;
00483 }
00484 return NM;
00485 }
00486
00487 LexicalReorderingState::ReorderingType HierarchicalReorderingForwardState::GetOrientationTypeLeftRight(WordsRange currRange, WordsBitmap ) const
00488 {
00489 if (currRange.GetStartPos() > m_prevRange.GetEndPos()) {
00490 return R;
00491 }
00492 return L;
00493 }
00494
00495
00496 }