00001 #include "Manager.h"
00002 #include "Timer.h"
00003 #include "SearchNormal.h"
00004 #include "SentenceStats.h"
00005
00006 #include <boost/foreach.hpp>
00007
00008 using namespace std;
00009
00010 namespace Moses
00011 {
00018 SearchNormal::
00019 SearchNormal(Manager& manager, const TranslationOptionCollection &transOptColl)
00020 : Search(manager)
00021 , m_hypoStackColl(manager.GetSource().GetSize() + 1)
00022 , m_transOptColl(transOptColl)
00023 {
00024 VERBOSE(1, "Translating: " << m_source << endl);
00025
00026
00027 std::vector < HypothesisStackNormal >::iterator iterStack;
00028 for (size_t ind = 0 ; ind < m_hypoStackColl.size() ; ++ind) {
00029 HypothesisStackNormal *sourceHypoColl = new HypothesisStackNormal(m_manager);
00030 sourceHypoColl->SetMaxHypoStackSize(this->m_options.search.stack_size,
00031 this->m_options.search.stack_diversity);
00032 sourceHypoColl->SetBeamWidth(this->m_options.search.beam_width);
00033 m_hypoStackColl[ind] = sourceHypoColl;
00034 }
00035 }
00036
00037 SearchNormal::~SearchNormal()
00038 {
00039 RemoveAllInColl(m_hypoStackColl);
00040 }
00041
00042
00043 bool
00044 SearchNormal::
00045 ProcessOneStack(HypothesisStack* hstack)
00046 {
00047 if (this->out_of_time()) return false;
00048 SentenceStats &stats = m_manager.GetSentenceStats();
00049 HypothesisStackNormal &sourceHypoColl
00050 = *static_cast<HypothesisStackNormal*>(hstack);
00051
00052
00053 VERBOSE(3,"processing hypothesis from next stack");
00054 IFVERBOSE(2) stats.StartTimeStack();
00055 sourceHypoColl.PruneToSize(m_options.search.stack_size);
00056 VERBOSE(3,std::endl);
00057 sourceHypoColl.CleanupArcList();
00058 IFVERBOSE(2) stats.StopTimeStack();
00059
00060
00061
00062 HypothesisStackNormal::const_iterator h;
00063 for (h = sourceHypoColl.begin(); h != sourceHypoColl.end(); ++h)
00064 ProcessOneHypothesis(**h);
00065 return true;
00066 }
00067
00068
00073 void SearchNormal::Decode()
00074 {
00075
00076 const Bitmap &initBitmap = m_bitmaps.GetInitialBitmap();
00077 Hypothesis *hypo = new Hypothesis(m_manager, m_source, m_initialTransOpt, initBitmap, m_manager.GetNextHypoId());
00078
00079 m_hypoStackColl[0]->AddPrune(hypo);
00080
00081
00082 BOOST_FOREACH(HypothesisStack* hstack, m_hypoStackColl) {
00083 if (!ProcessOneStack(hstack)) return;
00084 IFVERBOSE(2) OutputHypoStackSize();
00085 actual_hypoStack = static_cast<HypothesisStackNormal*>(hstack);
00086 }
00087 }
00088
00089
00095 void
00096 SearchNormal::
00097 ProcessOneHypothesis(const Hypothesis &hypothesis)
00098 {
00099
00100 bool isWordLattice = m_source.GetType() == WordLatticeInput;
00101
00102 const Bitmap &hypoBitmap = hypothesis.GetWordsBitmap();
00103 const size_t hypoFirstGapPos = hypoBitmap.GetFirstGapPos();
00104 size_t const sourceSize = m_source.GetSize();
00105
00106 ReorderingConstraint const&
00107 ReoConstraint = m_source.GetReorderingConstraint();
00108
00109
00110 if (m_options.reordering.max_distortion < 0) {
00111
00112 for (size_t startPos = hypoFirstGapPos ; startPos < sourceSize ; ++startPos) {
00113 TranslationOptionList const* tol;
00114 size_t endPos = startPos;
00115 for (tol = m_transOptColl.GetTranslationOptionList(startPos, endPos);
00116 tol && endPos < sourceSize;
00117 tol = m_transOptColl.GetTranslationOptionList(startPos, ++endPos)) {
00118 if (tol->size() == 0
00119 || hypoBitmap.Overlap(Range(startPos, endPos))
00120 || !ReoConstraint.Check(hypoBitmap, startPos, endPos)) {
00121 continue;
00122 }
00123
00124
00125 ExpandAllHypotheses(hypothesis, startPos, endPos);
00126 }
00127 }
00128 return;
00129 }
00130
00131
00132
00133 Range prevRange = hypothesis.GetCurrSourceWordsRange();
00134 for (size_t startPos = hypoFirstGapPos ; startPos < sourceSize ; ++startPos) {
00135
00136
00137 if(hypoBitmap.GetValue(startPos)) continue;
00138
00139 size_t maxSize = sourceSize - startPos;
00140 size_t maxSizePhrase = m_options.search.max_phrase_length;
00141 maxSize = (maxSize < maxSizePhrase) ? maxSize : maxSizePhrase;
00142 size_t closestLeft = hypoBitmap.GetEdgeToTheLeftOf(startPos);
00143
00144 if (isWordLattice) {
00145
00146
00147
00148
00149
00150
00151
00152
00153
00154 if (closestLeft != 0 && closestLeft != startPos
00155 && !m_source.CanIGetFromAToB(closestLeft, startPos))
00156 continue;
00157
00158 if (prevRange.GetStartPos() != NOT_FOUND &&
00159 prevRange.GetStartPos() > startPos &&
00160 !m_source.CanIGetFromAToB(startPos, prevRange.GetStartPos()))
00161 continue;
00162 }
00163
00164 Range currentStartRange(startPos, startPos);
00165 if(m_source.ComputeDistortionDistance(prevRange, currentStartRange)
00166 > m_options.reordering.max_distortion)
00167 continue;
00168
00169 TranslationOptionList const* tol;
00170 size_t endPos = startPos;
00171 for (tol = m_transOptColl.GetTranslationOptionList(startPos, endPos);
00172 tol && endPos < sourceSize;
00173 tol = m_transOptColl.GetTranslationOptionList(startPos, ++endPos)) {
00174 Range extRange(startPos, endPos);
00175 if (tol->size() == 0
00176 || hypoBitmap.Overlap(extRange)
00177 || !ReoConstraint.Check(hypoBitmap, startPos, endPos)
00178 || (isWordLattice && !m_source.IsCoveragePossible(extRange))) {
00179 continue;
00180 }
00181
00182
00183
00184
00185
00186
00187
00188
00189 bool isLeftMostEdge = (hypoFirstGapPos == startPos);
00190
00191 size_t closestRight = hypoBitmap.GetEdgeToTheRightOf(endPos);
00192 if (isWordLattice) {
00193 if (closestRight != endPos
00194 && ((closestRight + 1) < sourceSize)
00195 && !m_source.CanIGetFromAToB(endPos + 1, closestRight + 1)) {
00196 continue;
00197 }
00198 }
00199
00200 if (isLeftMostEdge) {
00201
00202 ExpandAllHypotheses(hypothesis, startPos, endPos);
00203 } else {
00204
00205
00206
00207
00208
00209
00210
00211
00212 Range bestNextExtension(hypoFirstGapPos, hypoFirstGapPos);
00213
00214 if (m_source.ComputeDistortionDistance(extRange, bestNextExtension)
00215 > m_options.reordering.max_distortion) continue;
00216
00217
00218 ExpandAllHypotheses(hypothesis, startPos, endPos);
00219 }
00220 }
00221 }
00222 }
00223
00224
00232 void
00233 SearchNormal::
00234 ExpandAllHypotheses(const Hypothesis &hypothesis, size_t startPos, size_t endPos)
00235 {
00236
00237
00238 float expectedScore = 0.0f;
00239
00240 const Bitmap &sourceCompleted = hypothesis.GetWordsBitmap();
00241 float estimatedScore = m_transOptColl.GetEstimatedScores().CalcEstimatedScore( sourceCompleted, startPos, endPos );
00242
00243 const Range &hypoRange = hypothesis.GetCurrSourceWordsRange();
00244
00245
00246
00247 if (m_options.search.UseEarlyDiscarding()) {
00248
00249 expectedScore = hypothesis.GetScore();
00250
00251
00252 expectedScore += estimatedScore;
00253 }
00254
00255
00256 const TranslationOptionList* tol
00257 = m_transOptColl.GetTranslationOptionList(startPos, endPos);
00258 if (!tol || tol->size() == 0) return;
00259
00260
00261 const TranslationOption &transOpt = **tol->begin();
00262 const Range &nextRange = transOpt.GetSourceWordsRange();
00263 const Bitmap &nextBitmap = m_bitmaps.GetBitmap(sourceCompleted, nextRange);
00264
00265 TranslationOptionList::const_iterator iter;
00266 for (iter = tol->begin() ; iter != tol->end() ; ++iter) {
00267 const TranslationOption &transOpt = **iter;
00268 ExpandHypothesis(hypothesis, transOpt, expectedScore, estimatedScore, nextBitmap);
00269 }
00270 }
00271
00281 void SearchNormal::ExpandHypothesis(const Hypothesis &hypothesis,
00282 const TranslationOption &transOpt,
00283 float expectedScore,
00284 float estimatedScore,
00285 const Bitmap &bitmap)
00286 {
00287 SentenceStats &stats = m_manager.GetSentenceStats();
00288
00289 Hypothesis *newHypo;
00290 if (! m_options.search.UseEarlyDiscarding()) {
00291
00292 IFVERBOSE(2) {
00293 stats.StartTimeBuildHyp();
00294 }
00295 newHypo = new Hypothesis(hypothesis, transOpt, bitmap, m_manager.GetNextHypoId());
00296 IFVERBOSE(2) {
00297 stats.StopTimeBuildHyp();
00298 }
00299 if (newHypo==NULL) return;
00300
00301 IFVERBOSE(2) {
00302 m_manager.GetSentenceStats().StartTimeOtherScore();
00303 }
00304 newHypo->EvaluateWhenApplied(estimatedScore);
00305 IFVERBOSE(2) {
00306 m_manager.GetSentenceStats().StopTimeOtherScore();
00307
00308
00309
00310
00311
00312
00313
00314
00315 m_manager.GetSentenceStats().StartTimeEstimateScore();
00316 m_manager.GetSentenceStats().StopTimeEstimateScore();
00317 }
00318 } else
00319
00320 {
00321
00322 size_t wordsTranslated = hypothesis.GetWordsBitmap().GetNumWordsCovered() + transOpt.GetSize();
00323 float allowedScore = m_hypoStackColl[wordsTranslated]->GetWorstScore();
00324 if (m_options.search.stack_diversity) {
00325 WordsBitmapID id = hypothesis.GetWordsBitmap().GetIDPlus(transOpt.GetStartPos(), transOpt.GetEndPos());
00326 float allowedScoreForBitmap = m_hypoStackColl[wordsTranslated]->GetWorstScoreForBitmap( id );
00327 allowedScore = std::min( allowedScore, allowedScoreForBitmap );
00328 }
00329 allowedScore += m_options.search.early_discarding_threshold;
00330
00331
00332 expectedScore += transOpt.GetFutureScore();
00333
00334
00335 if (expectedScore < allowedScore) {
00336 IFVERBOSE(2) {
00337 stats.AddNotBuilt();
00338 }
00339 return;
00340 }
00341
00342
00343 IFVERBOSE(2) {
00344 stats.StartTimeBuildHyp();
00345 }
00346 newHypo = new Hypothesis(hypothesis, transOpt, bitmap, m_manager.GetNextHypoId());
00347 if (newHypo==NULL) return;
00348 IFVERBOSE(2) {
00349 stats.StopTimeBuildHyp();
00350 }
00351
00352
00353 if (expectedScore < allowedScore) {
00354 IFVERBOSE(2) {
00355 stats.AddEarlyDiscarded();
00356 }
00357 delete newHypo;
00358 return;
00359 }
00360
00361 }
00362
00363
00364 IFVERBOSE(3) {
00365 newHypo->PrintHypothesis();
00366 }
00367
00368
00369 size_t wordsTranslated = newHypo->GetWordsBitmap().GetNumWordsCovered();
00370 IFVERBOSE(2) {
00371 stats.StartTimeStack();
00372 }
00373 m_hypoStackColl[wordsTranslated]->AddPrune(newHypo);
00374 IFVERBOSE(2) {
00375 stats.StopTimeStack();
00376 }
00377 }
00378
00379 const std::vector < HypothesisStack* >& SearchNormal::GetHypothesisStacks() const
00380 {
00381 return m_hypoStackColl;
00382 }
00383
00388 const Hypothesis *SearchNormal::GetBestHypothesis() const
00389 {
00390 if (interrupted_flag == 0) {
00391 const HypothesisStackNormal &hypoColl = *static_cast<HypothesisStackNormal*>(m_hypoStackColl.back());
00392 return hypoColl.GetBestHypothesis();
00393 } else {
00394 const HypothesisStackNormal &hypoColl = *actual_hypoStack;
00395 return hypoColl.GetBestHypothesis();
00396 }
00397 }
00398
00402 void SearchNormal::OutputHypoStackSize()
00403 {
00404 std::vector < HypothesisStack* >::const_iterator iterStack = m_hypoStackColl.begin();
00405 TRACE_ERR( "Stack sizes: " << (int)(*iterStack)->size());
00406 for (++iterStack; iterStack != m_hypoStackColl.end() ; ++iterStack) {
00407 TRACE_ERR( ", " << (int)(*iterStack)->size());
00408 }
00409 TRACE_ERR( endl);
00410 }
00411
00412 void SearchNormal::OutputHypoStack()
00413 {
00414
00415 int i = 0;
00416 vector < HypothesisStack* >::iterator iterStack;
00417 for (iterStack = m_hypoStackColl.begin() ; iterStack != m_hypoStackColl.end() ; ++iterStack) {
00418 HypothesisStackNormal &hypoColl = *static_cast<HypothesisStackNormal*>(*iterStack);
00419 TRACE_ERR( "Stack " << i++ << ": " << endl << hypoColl << endl);
00420 }
00421 }
00422
00423 }