00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022 #include "util/check.hh"
00023 #include <iostream>
00024 #include <limits>
00025 #include <vector>
00026 #include <algorithm>
00027
00028 #include "FFState.h"
00029 #include "TranslationOption.h"
00030 #include "TranslationOptionCollection.h"
00031 #include "DummyScoreProducers.h"
00032 #include "Hypothesis.h"
00033 #include "Util.h"
00034 #include "SquareMatrix.h"
00035 #include "LexicalReordering.h"
00036 #include "StaticData.h"
00037 #include "InputType.h"
00038 #include "LMList.h"
00039 #include "Manager.h"
00040 #include "hash.h"
00041
00042 using namespace std;
00043
00044 namespace Moses
00045 {
00046
00047 #ifdef USE_HYPO_POOL
00048 ObjectPool<Hypothesis> Hypothesis::s_objectPool("Hypothesis", 300000);
00049 #endif
00050
00051 Hypothesis::Hypothesis(Manager& manager, InputType const& source, const TargetPhrase &emptyTarget)
00052 : m_prevHypo(NULL)
00053 , m_targetPhrase(emptyTarget)
00054 , m_sourcePhrase(0)
00055 , m_sourceCompleted(source.GetSize(), manager.m_source.m_sourceCompleted)
00056 , m_sourceInput(source)
00057 , m_currSourceWordsRange(
00058 m_sourceCompleted.GetFirstGapPos()>0 ? 0 : NOT_FOUND,
00059 m_sourceCompleted.GetFirstGapPos()>0 ? m_sourceCompleted.GetFirstGapPos()-1 : NOT_FOUND)
00060 , m_currTargetWordsRange(0, emptyTarget.GetSize()-1)
00061 , m_wordDeleted(false)
00062 , m_ffStates(manager.GetTranslationSystem()->GetStatefulFeatureFunctions().size())
00063 , m_arcList(NULL)
00064 , m_transOpt(NULL)
00065 , m_manager(manager)
00066
00067 , m_id(m_manager.GetNextHypoId())
00068 {
00069
00070
00071
00072
00073 ResetScore();
00074 const vector<const StatefulFeatureFunction*>& ffs = m_manager.GetTranslationSystem()->GetStatefulFeatureFunctions();
00075 for (unsigned i = 0; i < ffs.size(); ++i)
00076 m_ffStates[i] = ffs[i]->EmptyHypothesisState(source);
00077 m_manager.GetSentenceStats().AddCreated();
00078 }
00079
00080
00081
00082
00083 Hypothesis::Hypothesis(const Hypothesis &prevHypo, const TranslationOption &transOpt)
00084 : m_prevHypo(&prevHypo)
00085 , m_targetPhrase(transOpt.GetTargetPhrase())
00086 , m_sourcePhrase(transOpt.GetSourcePhrase())
00087 , m_sourceCompleted (prevHypo.m_sourceCompleted )
00088 , m_sourceInput (prevHypo.m_sourceInput)
00089 , m_currSourceWordsRange (transOpt.GetSourceWordsRange())
00090 , m_currTargetWordsRange ( prevHypo.m_currTargetWordsRange.GetEndPos() + 1
00091 ,prevHypo.m_currTargetWordsRange.GetEndPos() + transOpt.GetTargetPhrase().GetSize())
00092 , m_wordDeleted(false)
00093 , m_totalScore(0.0f)
00094 , m_futureScore(0.0f)
00095 , m_scoreBreakdown (prevHypo.m_scoreBreakdown)
00096 , m_ffStates(prevHypo.m_ffStates.size())
00097 , m_arcList(NULL)
00098 , m_transOpt(&transOpt)
00099 , m_manager(prevHypo.GetManager())
00100 , m_id(m_manager.GetNextHypoId())
00101 {
00102
00103
00104 CHECK(!m_sourceCompleted.Overlap(m_currSourceWordsRange));
00105
00106
00107 m_sourceCompleted.SetValue(m_currSourceWordsRange.GetStartPos(), m_currSourceWordsRange.GetEndPos(), true);
00108 m_wordDeleted = transOpt.IsDeletionOption();
00109 m_manager.GetSentenceStats().AddCreated();
00110 }
00111
00112 Hypothesis::~Hypothesis()
00113 {
00114 for (unsigned i = 0; i < m_ffStates.size(); ++i)
00115 delete m_ffStates[i];
00116
00117 if (m_arcList) {
00118 ArcList::iterator iter;
00119 for (iter = m_arcList->begin() ; iter != m_arcList->end() ; ++iter) {
00120 FREEHYPO(*iter);
00121 }
00122 m_arcList->clear();
00123
00124 delete m_arcList;
00125 m_arcList = NULL;
00126 }
00127 }
00128
00129 void Hypothesis::AddArc(Hypothesis *loserHypo)
00130 {
00131 if (!m_arcList) {
00132 if (loserHypo->m_arcList) {
00133 this->m_arcList = loserHypo->m_arcList;
00134 loserHypo->m_arcList = 0;
00135 } else {
00136 this->m_arcList = new ArcList();
00137 }
00138 } else {
00139 if (loserHypo->m_arcList) {
00140 size_t my_size = m_arcList->size();
00141 size_t add_size = loserHypo->m_arcList->size();
00142 this->m_arcList->resize(my_size + add_size, 0);
00143 std::memcpy(&(*m_arcList)[0] + my_size, &(*loserHypo->m_arcList)[0], add_size * sizeof(Hypothesis *));
00144 delete loserHypo->m_arcList;
00145 loserHypo->m_arcList = 0;
00146 } else {
00147
00148 }
00149 }
00150 m_arcList->push_back(loserHypo);
00151 }
00152
00153
00154
00155
00156 Hypothesis* Hypothesis::CreateNext(const TranslationOption &transOpt, const Phrase* constraint) const
00157 {
00158 return Create(*this, transOpt, constraint);
00159 }
00160
00161
00162
00163
00164 Hypothesis* Hypothesis::Create(const Hypothesis &prevHypo, const TranslationOption &transOpt, const Phrase* constrainingPhrase)
00165 {
00166
00167
00168
00169 bool createHypothesis = true;
00170
00171 if (constrainingPhrase != NULL) {
00172
00173 size_t constraintSize = constrainingPhrase->GetSize();
00174
00175 size_t start = 1 + prevHypo.GetCurrTargetWordsRange().GetEndPos();
00176
00177 const Phrase &transOptPhrase = transOpt.GetTargetPhrase();
00178 size_t transOptSize = transOptPhrase.GetSize();
00179
00180 size_t endpoint = start + transOptSize - 1;
00181
00182
00183 if (endpoint < constraintSize) {
00184 WordsRange range(start, endpoint);
00185 Phrase relevantConstraint = constrainingPhrase->GetSubString(range);
00186
00187 if ( ! relevantConstraint.IsCompatible(transOptPhrase) ) {
00188 createHypothesis = false;
00189
00190 }
00191 } else {
00192 createHypothesis = false;
00193 }
00194
00195 }
00196
00197
00198 if (createHypothesis) {
00199
00200 #ifdef USE_HYPO_POOL
00201 Hypothesis *ptr = s_objectPool.getPtr();
00202 return new(ptr) Hypothesis(prevHypo, transOpt);
00203 #else
00204 return new Hypothesis(prevHypo, transOpt);
00205 #endif
00206
00207 } else {
00208
00209
00210
00211 return NULL;
00212 }
00213
00214 }
00215
00216
00217
00218
00219 Hypothesis* Hypothesis::Create(Manager& manager, InputType const& m_source, const TargetPhrase &emptyTarget)
00220 {
00221 #ifdef USE_HYPO_POOL
00222 Hypothesis *ptr = s_objectPool.getPtr();
00223 return new(ptr) Hypothesis(manager, m_source, emptyTarget);
00224 #else
00225 return new Hypothesis(manager, m_source, emptyTarget);
00226 #endif
00227 }
00228
00234 int Hypothesis::RecombineCompare(const Hypothesis &compare) const
00235 {
00236
00237
00238
00239 int comp = m_sourceCompleted.Compare(compare.m_sourceCompleted);
00240 if (comp != 0)
00241 return comp;
00242
00243 for (unsigned i = 0; i < m_ffStates.size(); ++i) {
00244 if (m_ffStates[i] == NULL || compare.m_ffStates[i] == NULL) {
00245 comp = m_ffStates[i] - compare.m_ffStates[i];
00246 } else {
00247 comp = m_ffStates[i]->Compare(*compare.m_ffStates[i]);
00248 }
00249 if (comp != 0) return comp;
00250 }
00251
00252 return 0;
00253 }
00254
00255 void Hypothesis::ResetScore()
00256 {
00257 m_scoreBreakdown.ZeroAll();
00258 m_futureScore = m_totalScore = 0.0f;
00259 }
00260
00261
00262
00263
00264 void Hypothesis::CalcScore(const SquareMatrix &futureScore)
00265 {
00266
00267
00268
00269
00270 m_scoreBreakdown.PlusEquals(m_transOpt->GetScoreBreakdown());
00271
00272 const StaticData &staticData = StaticData::Instance();
00273 clock_t t=0;
00274
00275
00276
00277 const vector<const StatelessFeatureFunction*>& sfs =
00278 m_manager.GetTranslationSystem()->GetStatelessFeatureFunctions();
00279 for (unsigned i = 0; i < sfs.size(); ++i) {
00280 sfs[i]->Evaluate(m_targetPhrase, &m_scoreBreakdown);
00281 }
00282
00283 const vector<const StatefulFeatureFunction*>& ffs =
00284 m_manager.GetTranslationSystem()->GetStatefulFeatureFunctions();
00285 for (unsigned i = 0; i < ffs.size(); ++i) {
00286 m_ffStates[i] = ffs[i]->Evaluate(
00287 *this,
00288 m_prevHypo ? m_prevHypo->m_ffStates[i] : NULL,
00289 &m_scoreBreakdown);
00290 }
00291
00292 IFVERBOSE(2) {
00293 t = clock();
00294 }
00295
00296
00297 m_futureScore = futureScore.CalcFutureScore( m_sourceCompleted );
00298
00299
00300 m_totalScore = m_scoreBreakdown.InnerProduct(staticData.GetAllWeights()) + m_futureScore;
00301
00302 IFVERBOSE(2) {
00303 m_manager.GetSentenceStats().AddTimeOtherScore( clock()-t );
00304 }
00305 }
00306
00313 float Hypothesis::CalcExpectedScore( const SquareMatrix &futureScore )
00314 {
00315 const StaticData &staticData = StaticData::Instance();
00316 clock_t t=0;
00317 IFVERBOSE(2) {
00318 t = clock();
00319 }
00320
00321 CHECK(!"Need to add code to get the distortion scores");
00322
00323
00324
00325 float estimatedLMScore = m_transOpt->GetFutureScore() - m_transOpt->GetScoreBreakdown().InnerProduct(staticData.GetAllWeights());
00326
00327
00328 m_futureScore = futureScore.CalcFutureScore( m_sourceCompleted );
00329
00330
00331 float total = m_scoreBreakdown.InnerProduct(staticData.GetAllWeights()) + m_futureScore + estimatedLMScore;
00332
00333 IFVERBOSE(2) {
00334 m_manager.GetSentenceStats().AddTimeEstimateScore( clock()-t );
00335 }
00336 return total;
00337 }
00338
00339 void Hypothesis::CalcRemainingScore()
00340 {
00341 const StaticData &staticData = StaticData::Instance();
00342 clock_t t=0;
00343
00344
00345 CHECK(!"Need to add code to get the LM score(s)");
00346
00347
00348 IFVERBOSE(2) {
00349 t = clock();
00350 }
00351
00352
00353 m_scoreBreakdown.PlusEquals(m_manager.GetTranslationSystem()->GetWordPenaltyProducer()
00354 , - (float)m_currTargetWordsRange.GetNumWordsCovered());
00355
00356
00357 m_totalScore = m_scoreBreakdown.InnerProduct(staticData.GetAllWeights()) + m_futureScore;
00358
00359 IFVERBOSE(2) {
00360 m_manager.GetSentenceStats().AddTimeOtherScore( clock()-t );
00361 }
00362 }
00363
00364 const Hypothesis* Hypothesis::GetPrevHypo()const
00365 {
00366 return m_prevHypo;
00367 }
00368
00372 void Hypothesis::PrintHypothesis() const
00373 {
00374 if (!m_prevHypo) {
00375 TRACE_ERR(endl << "NULL hypo" << endl);
00376 return;
00377 }
00378 TRACE_ERR(endl << "creating hypothesis "<< m_id <<" from "<< m_prevHypo->m_id<<" ( ");
00379 int end = (int)(m_prevHypo->m_targetPhrase.GetSize()-1);
00380 int start = end-1;
00381 if ( start < 0 ) start = 0;
00382 if ( m_prevHypo->m_currTargetWordsRange.GetStartPos() == NOT_FOUND ) {
00383 TRACE_ERR( "<s> ");
00384 } else {
00385 TRACE_ERR( "... ");
00386 }
00387 if (end>=0) {
00388 WordsRange range(start, end);
00389 TRACE_ERR( m_prevHypo->m_targetPhrase.GetSubString(range) << " ");
00390 }
00391 TRACE_ERR( ")"<<endl);
00392 TRACE_ERR( "\tbase score "<< (m_prevHypo->m_totalScore - m_prevHypo->m_futureScore) <<endl);
00393 TRACE_ERR( "\tcovering "<<m_currSourceWordsRange.GetStartPos()<<"-"<<m_currSourceWordsRange.GetEndPos()<<": "
00394 << *m_sourcePhrase <<endl);
00395 TRACE_ERR( "\ttranslated as: "<<(Phrase&) m_targetPhrase<<endl);
00396
00397 if (m_wordDeleted) TRACE_ERR( "\tword deleted"<<endl);
00398
00399
00400
00401 TRACE_ERR( "\tscore "<<m_totalScore - m_futureScore<<" + future cost "<<m_futureScore<<" = "<<m_totalScore<<endl);
00402 TRACE_ERR( "\tunweighted feature scores: " << m_scoreBreakdown << endl);
00403
00404 }
00405
00406 void Hypothesis::CleanupArcList()
00407 {
00408
00409 SetWinningHypo(this);
00410
00411 if (!m_arcList) return;
00412
00413
00414
00415
00416
00417 const StaticData &staticData = StaticData::Instance();
00418 size_t nBestSize = staticData.GetNBestSize();
00419 bool distinctNBest = staticData.GetDistinctNBest() || staticData.UseMBR() || staticData.GetOutputSearchGraph() || staticData.UseLatticeMBR() ;
00420
00421 if (!distinctNBest && m_arcList->size() > nBestSize * 5) {
00422
00423 nth_element(m_arcList->begin()
00424 , m_arcList->begin() + nBestSize - 1
00425 , m_arcList->end()
00426 , CompareHypothesisTotalScore());
00427
00428
00429 ArcList::iterator iter;
00430 for (iter = m_arcList->begin() + nBestSize ; iter != m_arcList->end() ; ++iter) {
00431 Hypothesis *arc = *iter;
00432 FREEHYPO(arc);
00433 }
00434 m_arcList->erase(m_arcList->begin() + nBestSize
00435 , m_arcList->end());
00436 }
00437
00438
00439 ArcList::iterator iter = m_arcList->begin();
00440 for (; iter != m_arcList->end() ; ++iter) {
00441 Hypothesis *arc = *iter;
00442 arc->SetWinningHypo(this);
00443 }
00444 }
00445
00446 TO_STRING_BODY(Hypothesis)
00447
00448
00449 ostream& operator<<(ostream& out, const Hypothesis& hypo)
00450 {
00451 hypo.ToStream(out);
00452
00453 out << "[" << hypo.m_sourceCompleted << "] ";
00454
00455
00456 out << " [total=" << hypo.GetTotalScore() << "]";
00457 out << " " << hypo.GetScoreBreakdown();
00458
00459
00460 out << " " << hypo.GetCurrTargetPhrase().GetAlignmentInfo();
00461
00462
00463
00464
00465
00466
00467
00468 return out;
00469 }
00470
00471
00472 std::string Hypothesis::GetSourcePhraseStringRep(const vector<FactorType> factorsToPrint) const
00473 {
00474 if (!m_prevHypo) {
00475 return "";
00476 }
00477 return m_sourcePhrase->GetStringRep(factorsToPrint);
00478 #if 0
00479 if(m_sourcePhrase) {
00480 return m_sourcePhrase->GetSubString(m_currSourceWordsRange).GetStringRep(factorsToPrint);
00481 } else {
00482 return m_sourceInput.GetSubString(m_currSourceWordsRange).GetStringRep(factorsToPrint);
00483 }
00484 #endif
00485 }
00486 std::string Hypothesis::GetTargetPhraseStringRep(const vector<FactorType> factorsToPrint) const
00487 {
00488 if (!m_prevHypo) {
00489 return "";
00490 }
00491 return m_targetPhrase.GetStringRep(factorsToPrint);
00492 }
00493
00494 std::string Hypothesis::GetSourcePhraseStringRep() const
00495 {
00496 vector<FactorType> allFactors;
00497 const size_t maxSourceFactors = StaticData::Instance().GetMaxNumFactors(Input);
00498 for(size_t i=0; i < maxSourceFactors; i++) {
00499 allFactors.push_back(i);
00500 }
00501 return GetSourcePhraseStringRep(allFactors);
00502 }
00503 std::string Hypothesis::GetTargetPhraseStringRep() const
00504 {
00505 vector<FactorType> allFactors;
00506 const size_t maxTargetFactors = StaticData::Instance().GetMaxNumFactors(Output);
00507 for(size_t i=0; i < maxTargetFactors; i++) {
00508 allFactors.push_back(i);
00509 }
00510 return GetTargetPhraseStringRep(allFactors);
00511 }
00512
00513 }
00514