00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021 #ifdef WIN32
00022 #include <hash_set>
00023 #else
00024
00025 #endif
00026
00027 #include <algorithm>
00028 #include <cmath>
00029 #include <limits>
00030 #include <map>
00031 #include <set>
00032 #include "Manager.h"
00033 #include "TypeDef.h"
00034 #include "Util.h"
00035 #include "TargetPhrase.h"
00036 #include "TrellisPath.h"
00037 #include "TrellisPathCollection.h"
00038 #include "TranslationOption.h"
00039 #include "TranslationOptionCollection.h"
00040 #include "Timer.h"
00041 #include "moses/OutputCollector.h"
00042 #include "moses/FF/DistortionScoreProducer.h"
00043 #include "moses/LM/Base.h"
00044 #include "moses/TranslationModel/PhraseDictionary.h"
00045 #include "moses/TranslationAnalysis.h"
00046 #include "moses/TranslationTask.h"
00047 #include "moses/HypergraphOutput.h"
00048 #include "moses/mbr.h"
00049 #include "moses/LatticeMBR.h"
00050 #include "moses/SearchNormal.h"
00051 #include "moses/SearchCubePruning.h"
00052 #include <boost/foreach.hpp>
00053
00054 #ifdef HAVE_PROTOBUF
00055 #include "hypergraph.pb.h"
00056 #include "rule.pb.h"
00057 #endif
00058
00059 #include "util/exception.hh"
00060 #include "util/random.hh"
00061 #include "util/string_stream.hh"
00062
00063 using namespace std;
00064
00065 namespace Moses
00066 {
00067
00068 Manager::Manager(ttasksptr const& ttask)
00069 : BaseManager(ttask)
00070 , interrupted_flag(0)
00071 , m_hypoId(0)
00072 {
00073 boost::shared_ptr<InputType> source = ttask->GetSource();
00074 m_transOptColl = source->CreateTranslationOptionCollection(ttask);
00075
00076 switch(options()->search.algo) {
00077 case Normal:
00078 m_search = new SearchNormal(*this, *m_transOptColl);
00079 break;
00080 case CubePruning:
00081 m_search = new SearchCubePruning(*this, *m_transOptColl);
00082 break;
00083 default:
00084 UTIL_THROW2("ERROR: search. Aborting\n");
00085 }
00086
00087 StaticData::Instance().InitializeForInput(ttask);
00088 }
00089
00090 Manager::~Manager()
00091 {
00092 delete m_transOptColl;
00093 delete m_search;
00094 StaticData::Instance().CleanUpAfterSentenceProcessing(m_ttask.lock());
00095 }
00096
00097 const InputType&
00098 Manager::GetSource() const
00099 {
00100 return m_source ;
00101 }
00102
00107 void Manager::Decode()
00108 {
00109
00110
00111
00112
00113
00114 ResetSentenceStats(m_source);
00115 IFVERBOSE(2) {
00116 GetSentenceStats().StartTimeTotal();
00117 }
00118
00119
00120
00121 if (StaticData::Instance().GetHasAlternateWeightSettings()) {
00122 if (m_source.GetSpecifiesWeightSetting()) {
00123 StaticData::Instance().SetWeightSetting(m_source.GetWeightSetting());
00124 } else {
00125 StaticData::Instance().SetWeightSetting("default");
00126 }
00127 }
00128
00129
00130 IFVERBOSE(1) {
00131 GetSentenceStats().StartTimeCollectOpts();
00132 }
00133 m_transOptColl->CreateTranslationOptions();
00134
00135
00136 IFVERBOSE(1) {
00137 GetSentenceStats().StopTimeCollectOpts();
00138 TRACE_ERR("Line "<< m_source.GetTranslationId()
00139 << ": Collecting options took "
00140 << GetSentenceStats().GetTimeCollectOpts() << " seconds at "
00141 << __FILE__ << " Line " << __LINE__ << endl);
00142 }
00143
00144
00145 Timer searchTime;
00146 searchTime.start();
00147 m_search->Decode();
00148 VERBOSE(1, "Line " << m_source.GetTranslationId()
00149 << ": Search took " << searchTime << " seconds" << endl);
00150 IFVERBOSE(2) {
00151 GetSentenceStats().StopTimeTotal();
00152 TRACE_ERR(GetSentenceStats());
00153 }
00154 }
00155
00161 void Manager::PrintAllDerivations(long translationId, ostream& outputStream) const
00162 {
00163 const std::vector < HypothesisStack* > &hypoStackColl = m_search->GetHypothesisStacks();
00164
00165 vector<const Hypothesis*> sortedPureHypo = hypoStackColl.back()->GetSortedList();
00166
00167 if (sortedPureHypo.size() == 0)
00168 return;
00169
00170 float remainingScore = 0;
00171 vector<const TargetPhrase*> remainingPhrases;
00172
00173
00174 vector<const Hypothesis*>::const_iterator iterBestHypo;
00175 for (iterBestHypo = sortedPureHypo.begin()
00176 ; iterBestHypo != sortedPureHypo.end()
00177 ; ++iterBestHypo) {
00178 printThisHypothesis(translationId, *iterBestHypo, remainingPhrases, remainingScore, outputStream);
00179 printDivergentHypothesis(translationId, *iterBestHypo, remainingPhrases, remainingScore, outputStream);
00180 }
00181 }
00182
00183 const TranslationOptionCollection* Manager::getSntTranslationOptions()
00184 {
00185 return m_transOptColl;
00186 }
00187
00188 void Manager::printDivergentHypothesis(long translationId, const Hypothesis* hypo, const vector <const TargetPhrase*> & remainingPhrases, float remainingScore , ostream& outputStream ) const
00189 {
00190
00191 if (hypo->GetId() > 0) {
00192 vector <const TargetPhrase*> followingPhrases;
00193 followingPhrases.push_back(& (hypo->GetCurrTargetPhrase()));
00195 followingPhrases.insert(followingPhrases.end()--, remainingPhrases.begin(), remainingPhrases.end());
00196 printDivergentHypothesis(translationId, hypo->GetPrevHypo(), followingPhrases , remainingScore + hypo->GetScore() - hypo->GetPrevHypo()->GetScore(), outputStream);
00197 }
00198
00199
00200 const ArcList *pAL = hypo->GetArcList();
00201 if (pAL) {
00202 const ArcList &arcList = *pAL;
00203
00204 ArcList::const_iterator iterArc;
00205 for (iterArc = arcList.begin() ; iterArc != arcList.end() ; ++iterArc) {
00206 const Hypothesis *loserHypo = *iterArc;
00207 const Hypothesis* loserPrevHypo = loserHypo->GetPrevHypo();
00208 float arcScore = loserHypo->GetScore() - loserPrevHypo->GetScore();
00209 vector <const TargetPhrase* > followingPhrases;
00210 followingPhrases.push_back(&(loserHypo->GetCurrTargetPhrase()));
00211 followingPhrases.insert(followingPhrases.end()--, remainingPhrases.begin(), remainingPhrases.end());
00212 printThisHypothesis(translationId, loserPrevHypo, followingPhrases, remainingScore + arcScore, outputStream);
00213 printDivergentHypothesis(translationId, loserPrevHypo, followingPhrases, remainingScore + arcScore, outputStream);
00214 }
00215 }
00216 }
00217
00218
00219 void
00220 Manager::
00221 printThisHypothesis(long translationId, const Hypothesis* hypo,
00222 const vector <const TargetPhrase*> & remainingPhrases,
00223 float remainingScore, ostream& outputStream) const
00224 {
00225
00226 outputStream << translationId << " ||| ";
00227
00228
00229 hypo->ToStream(outputStream);
00230 for (size_t p = 0; p < remainingPhrases.size(); ++p) {
00231 const TargetPhrase * phrase = remainingPhrases[p];
00232 size_t size = phrase->GetSize();
00233 for (size_t pos = 0 ; pos < size ; pos++) {
00234 const Factor *factor = phrase->GetFactor(pos, 0);
00235 outputStream << *factor;
00236 outputStream << " ";
00237 }
00238 }
00239
00240 outputStream << "||| " << hypo->GetScore() + remainingScore;
00241 outputStream << endl;
00242 }
00243
00244
00245
00246
00256 void Manager::CalcNBest(size_t count, TrellisPathList &ret, bool onlyDistinct) const
00257 {
00258 if (count <= 0)
00259 return;
00260
00261 const std::vector < HypothesisStack* > &hypoStackColl = m_search->GetHypothesisStacks();
00262
00263 vector<const Hypothesis*> sortedPureHypo = hypoStackColl.back()->GetSortedList();
00264
00265 if (sortedPureHypo.size() == 0)
00266 return;
00267
00268 TrellisPathCollection contenders;
00269
00270 set<Phrase> distinctHyps;
00271
00272
00273 vector<const Hypothesis*>::const_iterator iterBestHypo;
00274 for (iterBestHypo = sortedPureHypo.begin()
00275 ; iterBestHypo != sortedPureHypo.end()
00276 ; ++iterBestHypo) {
00277 contenders.Add(new TrellisPath(*iterBestHypo));
00278 }
00279
00280
00281
00282 size_t nBestFactor = options()->nbest.factor;
00283 if (nBestFactor < 1) nBestFactor = 1000;
00284
00285
00286 for (size_t iteration = 0 ; (onlyDistinct ? distinctHyps.size() : ret.GetSize()) < count && contenders.GetSize() > 0 && (iteration < count * nBestFactor) ; iteration++) {
00287
00288 TrellisPath *path = contenders.pop();
00289 UTIL_THROW_IF2(path == NULL, "path is NULL");
00290
00291 path->CreateDeviantPaths(contenders);
00292 if(onlyDistinct) {
00293 Phrase tgtPhrase = path->GetSurfacePhrase();
00294 if (distinctHyps.insert(tgtPhrase).second) {
00295 ret.Add(path);
00296 } else {
00297 delete path;
00298 path = NULL;
00299 }
00300 } else {
00301 ret.Add(path);
00302 }
00303
00304
00305 if(onlyDistinct) {
00306 const size_t nBestFactor = options()->nbest.factor;
00307 if (nBestFactor > 0)
00308 contenders.Prune(count * nBestFactor);
00309 } else {
00310 contenders.Prune(count);
00311 }
00312 }
00313 }
00314
00315 struct SGNReverseCompare {
00316 bool operator() (const SearchGraphNode& s1, const SearchGraphNode& s2) const {
00317 return s1.hypo->GetId() > s2.hypo->GetId();
00318 }
00319 };
00320
00324 void Manager::CalcLatticeSamples(size_t count, TrellisPathList &ret) const
00325 {
00326
00327 vector<SearchGraphNode> searchGraph;
00328 GetSearchGraph(searchGraph);
00329
00330
00331
00332
00333 typedef pair<int, int> Edge;
00334 map<const Hypothesis*, float> sigmas;
00335 map<Edge, float> edgeScores;
00336 map<const Hypothesis*, set<const Hypothesis*> > outgoingHyps;
00337 map<int,const Hypothesis*> idToHyp;
00338 map<int,float> fscores;
00339
00340
00341
00342
00343
00344 sort(searchGraph.begin(), searchGraph.end(), SGNReverseCompare());
00345
00346
00347 for (vector<SearchGraphNode>::const_iterator i = searchGraph.begin();
00348 i != searchGraph.end(); ++i) {
00349 const Hypothesis* hypo = i->hypo;
00350 idToHyp[hypo->GetId()] = hypo;
00351 fscores[hypo->GetId()] = i->fscore;
00352 if (hypo->GetId()) {
00353
00354 const Hypothesis* prevHypo = i->hypo->GetPrevHypo();
00355 outgoingHyps[prevHypo].insert(hypo);
00356 edgeScores[Edge(prevHypo->GetId(),hypo->GetId())] =
00357 hypo->GetScore() - prevHypo->GetScore();
00358 }
00359
00360 if (i->forward >= 0) {
00361 map<int,const Hypothesis*>::const_iterator idToHypIter = idToHyp.find(i->forward);
00362 UTIL_THROW_IF2(idToHypIter == idToHyp.end(),
00363 "Couldn't find hypothesis " << i->forward);
00364 const Hypothesis* nextHypo = idToHypIter->second;
00365 outgoingHyps[hypo].insert(nextHypo);
00366 map<int,float>::const_iterator fscoreIter = fscores.find(nextHypo->GetId());
00367 UTIL_THROW_IF2(fscoreIter == fscores.end(),
00368 "Couldn't find scores for hypothsis " << nextHypo->GetId());
00369 edgeScores[Edge(hypo->GetId(),nextHypo->GetId())] =
00370 i->fscore - fscoreIter->second;
00371 }
00372 }
00373
00374
00375
00376 for (vector<SearchGraphNode>::const_iterator i = searchGraph.begin();
00377 i != searchGraph.end(); ++i) {
00378
00379 if (i->forward == -1) {
00380 sigmas[i->hypo] = 0;
00381 } else {
00382 map<const Hypothesis*, set<const Hypothesis*> >::const_iterator outIter =
00383 outgoingHyps.find(i->hypo);
00384
00385 UTIL_THROW_IF2(outIter == outgoingHyps.end(),
00386 "Couldn't find hypothesis " << i->hypo->GetId());
00387 float sigma = 0;
00388 for (set<const Hypothesis*>::const_iterator j = outIter->second.begin();
00389 j != outIter->second.end(); ++j) {
00390 map<const Hypothesis*, float>::const_iterator succIter = sigmas.find(*j);
00391 UTIL_THROW_IF2(succIter == sigmas.end(),
00392 "Couldn't find hypothesis " << (*j)->GetId());
00393 map<Edge,float>::const_iterator edgeScoreIter =
00394 edgeScores.find(Edge(i->hypo->GetId(),(*j)->GetId()));
00395 UTIL_THROW_IF2(edgeScoreIter == edgeScores.end(),
00396 "Couldn't find edge for hypothesis " << (*j)->GetId());
00397 float term = edgeScoreIter->second + succIter->second;
00398 if (sigma == 0) {
00399 sigma = term;
00400 } else {
00401 sigma = log_sum(sigma,term);
00402 }
00403 }
00404 sigmas[i->hypo] = sigma;
00405 }
00406 }
00407
00408
00409 const Hypothesis* startHypo = searchGraph.back().hypo;
00410 UTIL_THROW_IF2(startHypo->GetId() != 0, "Expecting the start hypothesis ");
00411 for (size_t i = 0; i < count; ++i) {
00412 vector<const Hypothesis*> path;
00413 path.push_back(startHypo);
00414 while(1) {
00415 map<const Hypothesis*, set<const Hypothesis*> >::const_iterator outIter =
00416 outgoingHyps.find(path.back());
00417 if (outIter == outgoingHyps.end() || !outIter->second.size()) {
00418
00419 break;
00420 }
00421
00422 vector<const Hypothesis*> candidates;
00423 vector<float> candidateScores;
00424 float scoreTotal = 0;
00425 for (set<const Hypothesis*>::const_iterator j = outIter->second.begin();
00426 j != outIter->second.end(); ++j) {
00427 candidates.push_back(*j);
00428 UTIL_THROW_IF2(sigmas.find(*j) == sigmas.end(),
00429 "Hypothesis " << (*j)->GetId() << " not found");
00430 Edge edge(path.back()->GetId(),(*j)->GetId());
00431 UTIL_THROW_IF2(edgeScores.find(edge) == edgeScores.end(),
00432 "Edge not found");
00433 candidateScores.push_back(sigmas[*j] + edgeScores[edge]);
00434 if (scoreTotal == 0) {
00435 scoreTotal = candidateScores.back();
00436 } else {
00437 scoreTotal = log_sum(candidateScores.back(), scoreTotal);
00438 }
00439 }
00440
00441
00442 transform(candidateScores.begin(), candidateScores.end(), candidateScores.begin(), bind2nd(minus<float>(),scoreTotal));
00443
00444
00445
00446
00447 const float frandom = log(util::rand_incl(0.0f, 1.0f));
00448 size_t position = 1;
00449 float sum = candidateScores[0];
00450 for (; position < candidateScores.size() && sum < frandom; ++position) {
00451 sum = log_sum(sum,candidateScores[position]);
00452 }
00453
00454 const Hypothesis* chosen = candidates[position-1];
00455 path.push_back(chosen);
00456 }
00457
00458
00459
00460
00461
00462
00463
00464 ret.Add(new TrellisPath(path));
00465
00466 }
00467
00468 }
00469
00470
00471
00472 void Manager::CalcDecoderStatistics() const
00473 {
00474 const Hypothesis *hypo = GetBestHypothesis();
00475 if (hypo != NULL) {
00476 GetSentenceStats().CalcFinalStats(*hypo);
00477 IFVERBOSE(2) {
00478 if (hypo != NULL) {
00479 string buff;
00480 string buff2;
00481 TRACE_ERR( "Source and Target Units:"
00482 << hypo->GetInput());
00483 buff2.insert(0,"] ");
00484 buff2.insert(0,(hypo->GetCurrTargetPhrase()).ToString());
00485 buff2.insert(0,":");
00486 buff2.insert(0,(hypo->GetCurrSourceWordsRange()).ToString());
00487 buff2.insert(0,"[");
00488
00489 hypo = hypo->GetPrevHypo();
00490 while (hypo != NULL) {
00491
00492 buff.insert(0,buff2);
00493 buff2.clear();
00494 buff2.insert(0,"] ");
00495 buff2.insert(0,(hypo->GetCurrTargetPhrase()).ToString());
00496 buff2.insert(0,":");
00497 buff2.insert(0,(hypo->GetCurrSourceWordsRange()).ToString());
00498 buff2.insert(0,"[");
00499 hypo = hypo->GetPrevHypo();
00500 }
00501 TRACE_ERR( buff << endl);
00502 }
00503 }
00504 }
00505 }
00506
00507 void Manager::OutputWordGraph(std::ostream &outputWordGraphStream, const Hypothesis *hypo, size_t &linkId) const
00508 {
00509
00510 const Hypothesis *prevHypo = hypo->GetPrevHypo();
00511
00512
00513 outputWordGraphStream << "J=" << linkId++
00514 << "\tS=" << prevHypo->GetId()
00515 << "\tE=" << hypo->GetId()
00516 << "\ta=";
00517
00518
00519 const std::vector<PhraseDictionary*> &phraseTables = PhraseDictionary::GetColl();
00520 std::vector<PhraseDictionary*>::const_iterator iterPhraseTable;
00521 for (iterPhraseTable = phraseTables.begin() ; iterPhraseTable != phraseTables.end() ; ++iterPhraseTable) {
00522 const PhraseDictionary *phraseTable = *iterPhraseTable;
00523 vector<float> scores = hypo->GetScoreBreakdown().GetScoresForProducer(phraseTable);
00524
00525 outputWordGraphStream << scores[0];
00526 vector<float>::const_iterator iterScore;
00527 for (iterScore = ++scores.begin() ; iterScore != scores.end() ; ++iterScore) {
00528 outputWordGraphStream << ", " << *iterScore;
00529 }
00530 }
00531
00532
00533 outputWordGraphStream << "\tl=";
00534
00535 const std::vector<const StatefulFeatureFunction*> &statefulFFs = StatefulFeatureFunction::GetStatefulFeatureFunctions();
00536 for (size_t i = 0; i < statefulFFs.size(); ++i) {
00537 const StatefulFeatureFunction *ff = statefulFFs[i];
00538 const LanguageModel *lm = static_cast<const LanguageModel*>(ff);
00539
00540 vector<float> scores = hypo->GetScoreBreakdown().GetScoresForProducer(lm);
00541
00542 outputWordGraphStream << scores[0];
00543 vector<float>::const_iterator iterScore;
00544 for (iterScore = ++scores.begin() ; iterScore != scores.end() ; ++iterScore) {
00545 outputWordGraphStream << ", " << *iterScore;
00546 }
00547 }
00548
00549
00550 outputWordGraphStream << "\tr=";
00551
00552 const std::vector<FeatureFunction*> &ffs = FeatureFunction::GetFeatureFunctions();
00553 std::vector<FeatureFunction*>::const_iterator iter;
00554 for (iter = ffs.begin(); iter != ffs.end(); ++iter) {
00555 const FeatureFunction *ff = *iter;
00556
00557 const DistortionScoreProducer *model = dynamic_cast<const DistortionScoreProducer*>(ff);
00558 if (model) {
00559 outputWordGraphStream << hypo->GetScoreBreakdown().GetScoreForProducer(model);
00560 }
00561 }
00562
00563
00564 outputWordGraphStream << "\tw=" << hypo->GetSourcePhraseStringRep()
00565 << "|" << hypo->GetCurrTargetPhrase();
00566
00567 outputWordGraphStream << endl;
00568 }
00569
00570
00571 void Manager::OutputPassthroughInformation(std::ostream &out, const Hypothesis *hypo) const
00572 {
00573 const std::string passthrough = hypo->GetManager().GetSource().GetPassthroughInformation();
00574 out << passthrough;
00575 }
00576
00577
00578 void Manager::GetOutputLanguageModelOrder( std::ostream &out, const Hypothesis *hypo ) const
00579 {
00580 Phrase translation;
00581 hypo->GetOutputPhrase(translation);
00582 const std::vector<const StatefulFeatureFunction*> &statefulFFs = StatefulFeatureFunction::GetStatefulFeatureFunctions();
00583 for (size_t i = 0; i < statefulFFs.size(); ++i) {
00584 const StatefulFeatureFunction *ff = statefulFFs[i];
00585 if (const LanguageModel *lm = dynamic_cast<const LanguageModel*>(ff)) {
00586 lm->ReportHistoryOrder(out, translation);
00587 }
00588 }
00589 }
00590
00591 void Manager::GetWordGraph(long translationId, std::ostream &outputWordGraphStream) const
00592 {
00593 const StaticData &staticData = StaticData::Instance();
00594 const PARAM_VEC *params;
00595
00596 string fileName;
00597 bool outputNBest = false;
00598 params = staticData.GetParameter().GetParam("output-word-graph");
00599 if (params && params->size()) {
00600 fileName = params->at(0);
00601
00602 if (params->size() == 2) {
00603 outputNBest = Scan<bool>(params->at(1));
00604 }
00605 }
00606
00607 const std::vector < HypothesisStack* > &hypoStackColl = m_search->GetHypothesisStacks();
00608
00609 outputWordGraphStream << "VERSION=1.0" << endl
00610 << "UTTERANCE=" << translationId << endl;
00611
00612 size_t linkId = 0;
00613 std::vector < HypothesisStack* >::const_iterator iterStack;
00614 for (iterStack = ++hypoStackColl.begin() ; iterStack != hypoStackColl.end() ; ++iterStack) {
00615 const HypothesisStack &stack = **iterStack;
00616 HypothesisStack::const_iterator iterHypo;
00617 for (iterHypo = stack.begin() ; iterHypo != stack.end() ; ++iterHypo) {
00618 const Hypothesis *hypo = *iterHypo;
00619 OutputWordGraph(outputWordGraphStream, hypo, linkId);
00620
00621 if (outputNBest) {
00622 const ArcList *arcList = hypo->GetArcList();
00623 if (arcList != NULL) {
00624 ArcList::const_iterator iterArcList;
00625 for (iterArcList = arcList->begin() ; iterArcList != arcList->end() ; ++iterArcList) {
00626 const Hypothesis *loserHypo = *iterArcList;
00627 OutputWordGraph(outputWordGraphStream, loserHypo, linkId);
00628 }
00629 }
00630 }
00631 }
00632 }
00633 }
00634
00635 void Manager::GetSearchGraph(vector<SearchGraphNode>& searchGraph) const
00636 {
00637 std::map < int, bool > connected;
00638 std::map < int, int > forward;
00639 std::map < int, double > forwardScore;
00640
00641
00642 std::vector< const Hypothesis *> connectedList;
00643 GetConnectedGraph(&connected, &connectedList);
00644
00645
00646
00647
00648 const std::vector < HypothesisStack* > &hypoStackColl = m_search->GetHypothesisStacks();
00649 const HypothesisStack &finalStack = *hypoStackColl.back();
00650 HypothesisStack::const_iterator iterHypo;
00651 for (iterHypo = finalStack.begin() ; iterHypo != finalStack.end() ; ++iterHypo) {
00652 const Hypothesis *hypo = *iterHypo;
00653 forwardScore[ hypo->GetId() ] = 0.0f;
00654 forward[ hypo->GetId() ] = -1;
00655 }
00656
00657
00658 std::vector < HypothesisStack* >::const_iterator iterStack;
00659 for (iterStack = --hypoStackColl.end() ; iterStack != hypoStackColl.begin() ; --iterStack) {
00660 const HypothesisStack &stack = **iterStack;
00661 HypothesisStack::const_iterator iterHypo;
00662 for (iterHypo = stack.begin() ; iterHypo != stack.end() ; ++iterHypo) {
00663 const Hypothesis *hypo = *iterHypo;
00664 if (connected.find( hypo->GetId() ) != connected.end()) {
00665
00666 const Hypothesis *prevHypo = hypo->GetPrevHypo();
00667 double fscore = forwardScore[ hypo->GetId() ] +
00668 hypo->GetScore() - prevHypo->GetScore();
00669 if (forwardScore.find( prevHypo->GetId() ) == forwardScore.end()
00670 || forwardScore.find( prevHypo->GetId() )->second < fscore) {
00671 forwardScore[ prevHypo->GetId() ] = fscore;
00672 forward[ prevHypo->GetId() ] = hypo->GetId();
00673 }
00674
00675 const ArcList *arcList = hypo->GetArcList();
00676 if (arcList != NULL) {
00677 ArcList::const_iterator iterArcList;
00678 for (iterArcList = arcList->begin() ; iterArcList != arcList->end() ; ++iterArcList) {
00679 const Hypothesis *loserHypo = *iterArcList;
00680
00681 const Hypothesis *loserPrevHypo = loserHypo->GetPrevHypo();
00682 double fscore = forwardScore[ hypo->GetId() ] +
00683 loserHypo->GetScore() - loserPrevHypo->GetScore();
00684 if (forwardScore.find( loserPrevHypo->GetId() ) == forwardScore.end()
00685 || forwardScore.find( loserPrevHypo->GetId() )->second < fscore) {
00686 forwardScore[ loserPrevHypo->GetId() ] = fscore;
00687 forward[ loserPrevHypo->GetId() ] = loserHypo->GetId();
00688 }
00689 }
00690 }
00691 }
00692 }
00693 }
00694
00695
00696
00697 connected[ 0 ] = true;
00698 for (iterStack = hypoStackColl.begin() ; iterStack != hypoStackColl.end() ; ++iterStack) {
00699 const HypothesisStack &stack = **iterStack;
00700 HypothesisStack::const_iterator iterHypo;
00701 for (iterHypo = stack.begin() ; iterHypo != stack.end() ; ++iterHypo) {
00702 const Hypothesis *hypo = *iterHypo;
00703 if (connected.find( hypo->GetId() ) != connected.end()) {
00704 searchGraph.push_back(SearchGraphNode(hypo,NULL,forward[hypo->GetId()],
00705 forwardScore[hypo->GetId()]));
00706
00707 const ArcList *arcList = hypo->GetArcList();
00708 if (arcList != NULL) {
00709 ArcList::const_iterator iterArcList;
00710 for (iterArcList = arcList->begin() ; iterArcList != arcList->end() ; ++iterArcList) {
00711 const Hypothesis *loserHypo = *iterArcList;
00712 searchGraph.push_back(SearchGraphNode(loserHypo,hypo,
00713 forward[hypo->GetId()], forwardScore[hypo->GetId()]));
00714 }
00715 }
00716 }
00717 }
00718 }
00719
00720 }
00721
00722 void Manager::OutputFeatureWeightsForSLF(std::ostream &outputSearchGraphStream) const
00723 {
00724 outputSearchGraphStream.setf(std::ios::fixed);
00725 outputSearchGraphStream.precision(6);
00726
00727 const vector<const StatelessFeatureFunction*>& slf = StatelessFeatureFunction::GetStatelessFeatureFunctions();
00728 const vector<const StatefulFeatureFunction*>& sff = StatefulFeatureFunction::GetStatefulFeatureFunctions();
00729 size_t featureIndex = 1;
00730 for (size_t i = 0; i < sff.size(); ++i) {
00731 featureIndex = OutputFeatureWeightsForSLF(featureIndex, sff[i], outputSearchGraphStream);
00732 }
00733 for (size_t i = 0; i < slf.size(); ++i) {
00734
00735
00736
00737
00738
00739
00740 {
00741 featureIndex = OutputFeatureWeightsForSLF(featureIndex, slf[i], outputSearchGraphStream);
00742 }
00743 }
00744 const vector<PhraseDictionary*>& pds = PhraseDictionary::GetColl();
00745 for( size_t i=0; i<pds.size(); i++ ) {
00746 featureIndex = OutputFeatureWeightsForSLF(featureIndex, pds[i], outputSearchGraphStream);
00747 }
00748 const vector<GenerationDictionary*>& gds = GenerationDictionary::GetColl();
00749 for( size_t i=0; i<gds.size(); i++ ) {
00750 featureIndex = OutputFeatureWeightsForSLF(featureIndex, gds[i], outputSearchGraphStream);
00751 }
00752 }
00753
00754 void Manager::OutputFeatureValuesForSLF(const Hypothesis* hypo, bool zeros, std::ostream &outputSearchGraphStream) const
00755 {
00756 outputSearchGraphStream.setf(std::ios::fixed);
00757 outputSearchGraphStream.precision(6);
00758
00759
00760
00761
00762
00763
00764 const vector<const StatelessFeatureFunction*>& slf =StatelessFeatureFunction::GetStatelessFeatureFunctions();
00765 const vector<const StatefulFeatureFunction*>& sff = StatefulFeatureFunction::GetStatefulFeatureFunctions();
00766 size_t featureIndex = 1;
00767 for (size_t i = 0; i < sff.size(); ++i) {
00768 featureIndex = OutputFeatureValuesForSLF(featureIndex, zeros, hypo, sff[i], outputSearchGraphStream);
00769 }
00770 for (size_t i = 0; i < slf.size(); ++i) {
00771
00772
00773
00774
00775
00776
00777 {
00778 featureIndex = OutputFeatureValuesForSLF(featureIndex, zeros, hypo, slf[i], outputSearchGraphStream);
00779 }
00780 }
00781 const vector<PhraseDictionary*>& pds = PhraseDictionary::GetColl();
00782 for( size_t i=0; i<pds.size(); i++ ) {
00783 featureIndex = OutputFeatureValuesForSLF(featureIndex, zeros, hypo, pds[i], outputSearchGraphStream);
00784 }
00785 const vector<GenerationDictionary*>& gds = GenerationDictionary::GetColl();
00786 for( size_t i=0; i<gds.size(); i++ ) {
00787 featureIndex = OutputFeatureValuesForSLF(featureIndex, zeros, hypo, gds[i], outputSearchGraphStream);
00788 }
00789
00790 }
00791
00792 void Manager::OutputFeatureValuesForHypergraph(const Hypothesis* hypo, std::ostream &outputSearchGraphStream) const
00793 {
00794 outputSearchGraphStream.setf(std::ios::fixed);
00795 outputSearchGraphStream.precision(6);
00796 ScoreComponentCollection scores = hypo->GetScoreBreakdown();
00797 const Hypothesis *prevHypo = hypo->GetPrevHypo();
00798 if (prevHypo) {
00799 scores.MinusEquals(prevHypo->GetScoreBreakdown());
00800 }
00801 scores.Save(outputSearchGraphStream, false);
00802 }
00803
00804
00805 size_t Manager::OutputFeatureWeightsForSLF(size_t index, const FeatureFunction* ff, std::ostream &outputSearchGraphStream) const
00806 {
00807 size_t numScoreComps = ff->GetNumScoreComponents();
00808 if (numScoreComps != 0) {
00809 vector<float> values = StaticData::Instance().GetAllWeights().GetScoresForProducer(ff);
00810 for (size_t i = 0; i < numScoreComps; ++i) {
00811 outputSearchGraphStream << "# " << ff->GetScoreProducerDescription()
00812 << " " << ff->GetScoreProducerDescription()
00813 << " " << (i+1) << " of " << numScoreComps << endl
00814 << "x" << (index+i) << "scale=" << values[i] << endl;
00815 }
00816 return index+numScoreComps;
00817 } else {
00818 cerr << "Sparse features are not supported when outputting HTK standard lattice format" << endl;
00819 assert(false);
00820 return 0;
00821 }
00822 }
00823
00824 size_t
00825 Manager::
00826 OutputFeatureValuesForSLF(size_t index, bool zeros, const Hypothesis* hypo,
00827 const FeatureFunction* ff, std::ostream &out) const
00828 {
00829 const ScoreComponentCollection& scoreCollection = hypo->GetScoreBreakdown();
00830 vector<float> featureValues = scoreCollection.GetScoresForProducer(ff);
00831 size_t numScoreComps = featureValues.size();
00832 for (size_t i = 0; i < numScoreComps; ++i) {
00833 out << "x" << (index+i) << "=" << ((zeros) ? 0.0 : featureValues[i]) << " ";
00834 }
00835 return index + numScoreComps;
00836 }
00837
00839 void
00840 Manager::
00841 OutputSearchGraphAsHypergraph(std::ostream &outputSearchGraphStream) const
00842 {
00843
00844 VERBOSE(2,"Getting search graph to output as hypergraph for sentence " << m_source.GetTranslationId() << std::endl)
00845
00846 vector<SearchGraphNode> searchGraph;
00847 GetSearchGraph(searchGraph);
00848
00849
00850 map<int,int> mosesIDToHypergraphID;
00851
00852 set<int> terminalNodes;
00853 multimap<int,int> hypergraphIDToArcs;
00854
00855 VERBOSE(2,"Gathering information about search graph to output as hypergraph for sentence " << m_source.GetTranslationId() << std::endl)
00856
00857 long numNodes = 0;
00858 long endNode = 0;
00859 {
00860 long hypergraphHypothesisID = 0;
00861 for (size_t arcNumber = 0, size=searchGraph.size(); arcNumber < size; ++arcNumber) {
00862
00863
00864 const Hypothesis *prevHypo = searchGraph[arcNumber].hypo->GetPrevHypo();
00865 if (prevHypo!=NULL) {
00866 int mosesPrevHypothesisID = prevHypo->GetId();
00867 if (mosesIDToHypergraphID.count(mosesPrevHypothesisID) == 0) {
00868 mosesIDToHypergraphID[mosesPrevHypothesisID] = hypergraphHypothesisID;
00869
00870 hypergraphHypothesisID += 1;
00871 }
00872 }
00873
00874
00875 int mosesHypothesisID;
00876 if (searchGraph[arcNumber].recombinationHypo) {
00877 mosesHypothesisID = searchGraph[arcNumber].recombinationHypo->GetId();
00878 } else {
00879 mosesHypothesisID = searchGraph[arcNumber].hypo->GetId();
00880 }
00881
00882 if (mosesIDToHypergraphID.count(mosesHypothesisID) == 0) {
00883
00884 mosesIDToHypergraphID[mosesHypothesisID] = hypergraphHypothesisID;
00885
00886
00887 bool terminalNode = (searchGraph[arcNumber].forward == -1);
00888 if (terminalNode) {
00889
00890 terminalNodes.insert(hypergraphHypothesisID);
00891 }
00892
00893 hypergraphHypothesisID += 1;
00894 }
00895
00896
00897 hypergraphIDToArcs.insert(pair<int,int>(mosesIDToHypergraphID[mosesHypothesisID],arcNumber));
00898
00899 }
00900
00901
00902 endNode = hypergraphHypothesisID;
00903
00904 numNodes = endNode + 1;
00905
00906 }
00907
00908
00909 long numArcs = searchGraph.size() + terminalNodes.size();
00910
00911
00912 outputSearchGraphStream << "# target ||| features ||| source-covered" << endl;
00913
00914
00915 outputSearchGraphStream << numNodes << " " << numArcs << endl;
00916
00917 VERBOSE(2,"Search graph to output as hypergraph for sentence " << m_source.GetTranslationId()
00918 << " contains " << numArcs << " arcs and " << numNodes << " nodes" << std::endl)
00919
00920 VERBOSE(2,"Outputting search graph to output as hypergraph for sentence " << m_source.GetTranslationId() << std::endl)
00921
00922
00923 for (int hypergraphHypothesisID=0; hypergraphHypothesisID < endNode; hypergraphHypothesisID+=1) {
00924 if (hypergraphHypothesisID % 100000 == 0) {
00925 VERBOSE(2,"Processed " << hypergraphHypothesisID << " of " << numNodes << " hypergraph nodes for sentence " << m_source.GetTranslationId() << std::endl);
00926 }
00927
00928 size_t count = hypergraphIDToArcs.count(hypergraphHypothesisID);
00929
00930 if (count > 0) {
00931 outputSearchGraphStream << "# node " << hypergraphHypothesisID << endl;
00932 outputSearchGraphStream << count << "\n";
00933
00934 pair<multimap<int,int>::iterator, multimap<int,int>::iterator> range =
00935 hypergraphIDToArcs.equal_range(hypergraphHypothesisID);
00936 for (multimap<int,int>::iterator it=range.first; it!=range.second; ++it) {
00937 int lineNumber = (*it).second;
00938 const Hypothesis *thisHypo = searchGraph[lineNumber].hypo;
00939 int mosesHypothesisID;
00940 if (searchGraph[lineNumber].recombinationHypo) {
00941 mosesHypothesisID = searchGraph[lineNumber].recombinationHypo->GetId();
00942 } else {
00943 mosesHypothesisID = searchGraph[lineNumber].hypo->GetId();
00944 }
00945
00946 UTIL_THROW_IF2(
00947 (hypergraphHypothesisID != mosesIDToHypergraphID[mosesHypothesisID]),
00948 "Error while writing search lattice as hypergraph for sentence " << m_source.GetTranslationId() << ". " <<
00949 "Moses node " << mosesHypothesisID << " was expected to have hypergraph id " << hypergraphHypothesisID <<
00950 ", but actually had hypergraph id " << mosesIDToHypergraphID[mosesHypothesisID] <<
00951 ". There are " << numNodes << " nodes in the search lattice."
00952 );
00953
00954 const Hypothesis *prevHypo = thisHypo->GetPrevHypo();
00955 if (prevHypo==NULL) {
00956
00957 outputSearchGraphStream << "<s> ||| ||| 0\n";
00958 } else {
00959 int startNode = mosesIDToHypergraphID[prevHypo->GetId()];
00960
00961 UTIL_THROW_IF2(
00962 (startNode >= hypergraphHypothesisID),
00963 "Error while writing search lattice as hypergraph for sentence" << m_source.GetTranslationId() << ". " <<
00964 "The nodes must be output in topological order. The code attempted to violate this restriction."
00965 );
00966
00967 const TargetPhrase &targetPhrase = thisHypo->GetCurrTargetPhrase();
00968 int targetWordCount = targetPhrase.GetSize();
00969
00970 outputSearchGraphStream << "[" << startNode << "] ";
00971 for (int targetWordIndex=0; targetWordIndex<targetWordCount; targetWordIndex+=1) {
00972 outputSearchGraphStream << targetPhrase.GetWord(targetWordIndex)[0]->GetString() << " ";
00973 }
00974 outputSearchGraphStream << " ||| ";
00975 OutputFeatureValuesForHypergraph(thisHypo, outputSearchGraphStream);
00976 outputSearchGraphStream << " ||| " << thisHypo->GetWordsBitmap().GetNumWordsCovered();
00977 outputSearchGraphStream << "\n";
00978 }
00979 }
00980 }
00981 }
00982
00983
00984 outputSearchGraphStream << "# node " << endNode << endl;
00985 outputSearchGraphStream << terminalNodes.size() << "\n";
00986 for (set<int>::iterator it=terminalNodes.begin(); it!=terminalNodes.end(); ++it) {
00987 outputSearchGraphStream << "[" << (*it) << "] </s> ||| ||| " << GetSource().GetSize() << "\n";
00988 }
00989
00990 }
00991
00992
00994 void Manager::OutputSearchGraphAsSLF(long translationId, std::ostream &outputSearchGraphStream) const
00995 {
00996
00997 vector<SearchGraphNode> searchGraph;
00998 GetSearchGraph(searchGraph);
00999
01000 long numArcs = 0;
01001 long numNodes = 0;
01002
01003 map<int,int> nodes;
01004 set<int> terminalNodes;
01005
01006
01007 nodes[0] = 0;
01008
01009 for (size_t arcNumber = 0; arcNumber < searchGraph.size(); ++arcNumber) {
01010
01011 int targetWordCount = searchGraph[arcNumber].hypo->GetCurrTargetPhrase().GetSize();
01012 numArcs += targetWordCount;
01013
01014 int hypothesisID = searchGraph[arcNumber].hypo->GetId();
01015 if (nodes.count(hypothesisID) == 0) {
01016
01017 numNodes += targetWordCount;
01018 nodes[hypothesisID] = numNodes;
01019
01020
01021 bool terminalNode = (searchGraph[arcNumber].forward == -1);
01022 if (terminalNode) {
01023 numArcs += 1;
01024 }
01025 }
01026
01027 }
01028 numNodes += 1;
01029
01030
01031 nodes[numNodes] = numNodes;
01032
01033 outputSearchGraphStream << "UTTERANCE=Sentence_" << translationId << endl;
01034 outputSearchGraphStream << "VERSION=1.1" << endl;
01035 outputSearchGraphStream << "base=2.71828182845905" << endl;
01036 outputSearchGraphStream << "NODES=" << (numNodes+1) << endl;
01037 outputSearchGraphStream << "LINKS=" << numArcs << endl;
01038
01039 OutputFeatureWeightsForSLF(outputSearchGraphStream);
01040
01041 for (size_t arcNumber = 0, lineNumber = 0; lineNumber < searchGraph.size(); ++lineNumber) {
01042 const Hypothesis *thisHypo = searchGraph[lineNumber].hypo;
01043 const Hypothesis *prevHypo = thisHypo->GetPrevHypo();
01044 if (prevHypo) {
01045
01046 int startNode = nodes[prevHypo->GetId()];
01047 int endNode = nodes[thisHypo->GetId()];
01048 bool terminalNode = (searchGraph[lineNumber].forward == -1);
01049 const TargetPhrase &targetPhrase = thisHypo->GetCurrTargetPhrase();
01050 int targetWordCount = targetPhrase.GetSize();
01051
01052 for (int targetWordIndex=0; targetWordIndex<targetWordCount; targetWordIndex+=1) {
01053 int x = (targetWordCount-targetWordIndex);
01054
01055 outputSearchGraphStream << "J=" << arcNumber;
01056
01057 if (targetWordIndex==0) {
01058 outputSearchGraphStream << " S=" << startNode;
01059 } else {
01060 outputSearchGraphStream << " S=" << endNode - x;
01061 }
01062
01063 outputSearchGraphStream << " E=" << endNode - (x-1)
01064 << " W=" << targetPhrase.GetWord(targetWordIndex);
01065
01066 OutputFeatureValuesForSLF(thisHypo, (targetWordIndex>0), outputSearchGraphStream);
01067
01068 outputSearchGraphStream << endl;
01069
01070 arcNumber += 1;
01071 }
01072
01073 if (terminalNode && terminalNodes.count(endNode) == 0) {
01074 terminalNodes.insert(endNode);
01075 outputSearchGraphStream << "J=" << arcNumber
01076 << " S=" << endNode
01077 << " E=" << numNodes
01078 << endl;
01079 arcNumber += 1;
01080 }
01081 }
01082 }
01083
01084 }
01085
01086
01087 void
01088 OutputSearchNode(AllOptions const& opts, long translationId,
01089 std::ostream &out,
01090 SearchGraphNode const& searchNode)
01091 {
01092 const vector<FactorType> &outputFactorOrder = opts.output.factor_order;
01093 bool extendedFormat = opts.output.SearchGraphExtended.size();
01094 out << translationId;
01095
01096
01097 if ( searchNode.hypo->GetId() == 0 ) {
01098 out << " hyp=0 stack=0";
01099 if (extendedFormat) {
01100 out << " forward=" << searchNode.forward << " fscore=" << searchNode.fscore;
01101 }
01102 out << endl;
01103 return;
01104 }
01105
01106 const Hypothesis *prevHypo = searchNode.hypo->GetPrevHypo();
01107
01108
01109 if (!extendedFormat) {
01110 out << " hyp=" << searchNode.hypo->GetId()
01111 << " stack=" << searchNode.hypo->GetWordsBitmap().GetNumWordsCovered()
01112 << " back=" << prevHypo->GetId()
01113 << " score=" << searchNode.hypo->GetScore()
01114 << " transition=" << (searchNode.hypo->GetScore() - prevHypo->GetScore());
01115
01116 if (searchNode.recombinationHypo != NULL)
01117 out << " recombined=" << searchNode.recombinationHypo->GetId();
01118
01119 out << " forward=" << searchNode.forward << " fscore=" << searchNode.fscore
01120 << " covered=" << searchNode.hypo->GetCurrSourceWordsRange().GetStartPos()
01121 << "-" << searchNode.hypo->GetCurrSourceWordsRange().GetEndPos()
01122 << " out=" << searchNode.hypo->GetCurrTargetPhrase().GetStringRep(outputFactorOrder)
01123 << endl;
01124 return;
01125 }
01126
01127 out << " hyp=" << searchNode.hypo->GetId();
01128 out << " stack=" << searchNode.hypo->GetWordsBitmap().GetNumWordsCovered()
01129 << " back=" << prevHypo->GetId()
01130 << " score=" << searchNode.hypo->GetScore()
01131 << " transition=" << (searchNode.hypo->GetScore() - prevHypo->GetScore());
01132
01133 if (searchNode.recombinationHypo != NULL)
01134 out << " recombined=" << searchNode.recombinationHypo->GetId();
01135
01136 out << " forward=" << searchNode.forward << " fscore=" << searchNode.fscore
01137 << " covered=" << searchNode.hypo->GetCurrSourceWordsRange().GetStartPos()
01138 << "-" << searchNode.hypo->GetCurrSourceWordsRange().GetEndPos();
01139
01140
01141 ScoreComponentCollection scoreBreakdown = searchNode.hypo->GetScoreBreakdown();
01142 scoreBreakdown.MinusEquals( prevHypo->GetScoreBreakdown() );
01143 out << " scores=\"" << scoreBreakdown << "\""
01144 << " out=\"" << searchNode.hypo->GetSourcePhraseStringRep()
01145 << "|" << searchNode.hypo->GetCurrTargetPhrase().GetStringRep(outputFactorOrder) << "\"" << endl;
01146 }
01147
01148 void Manager::GetConnectedGraph(
01149 std::map< int, bool >* pConnected,
01150 std::vector< const Hypothesis* >* pConnectedList) const
01151 {
01152 std::map < int, bool >& connected = *pConnected;
01153 std::vector< const Hypothesis *>& connectedList = *pConnectedList;
01154
01155
01156 const std::vector < HypothesisStack* > &hypoStackColl
01157 = m_search->GetHypothesisStacks();
01158 const HypothesisStack &finalStack = *hypoStackColl.back();
01159 HypothesisStack::const_iterator iterHypo;
01160 for (iterHypo = finalStack.begin() ; iterHypo != finalStack.end() ; ++iterHypo) {
01161 const Hypothesis *hypo = *iterHypo;
01162 connected[ hypo->GetId() ] = true;
01163 connectedList.push_back( hypo );
01164 }
01165
01166 for(size_t i=0; i<connectedList.size(); i++) {
01167 const Hypothesis *hypo = connectedList[i];
01168
01169
01170 const Hypothesis *prevHypo = hypo->GetPrevHypo();
01171 if (prevHypo && prevHypo->GetId() > 0
01172 && connected.find( prevHypo->GetId() ) == connected.end()) {
01173 connected[ prevHypo->GetId() ] = true;
01174 connectedList.push_back( prevHypo );
01175 }
01176
01177
01178 const ArcList *arcList = hypo->GetArcList();
01179 if (arcList != NULL) {
01180 ArcList::const_iterator iterArcList;
01181 for (iterArcList = arcList->begin() ; iterArcList != arcList->end() ; ++iterArcList) {
01182 const Hypothesis *loserHypo = *iterArcList;
01183 if (connected.find( loserHypo->GetId() ) == connected.end()) {
01184 connected[ loserHypo->GetId() ] = true;
01185 connectedList.push_back( loserHypo );
01186 }
01187 }
01188 }
01189 }
01190 }
01191
01192 void Manager::GetWinnerConnectedGraph(
01193 std::map< int, bool >* pConnected,
01194 std::vector< const Hypothesis* >* pConnectedList) const
01195 {
01196 std::map < int, bool >& connected = *pConnected;
01197 std::vector< const Hypothesis *>& connectedList = *pConnectedList;
01198
01199
01200 const std::vector < HypothesisStack* > &hypoStackColl = m_search->GetHypothesisStacks();
01201 const HypothesisStack &finalStack = *hypoStackColl.back();
01202 HypothesisStack::const_iterator iterHypo;
01203 for (iterHypo = finalStack.begin() ; iterHypo != finalStack.end() ; ++iterHypo) {
01204 const Hypothesis *hypo = *iterHypo;
01205 connected[ hypo->GetId() ] = true;
01206 connectedList.push_back( hypo );
01207 }
01208
01209
01210 for(size_t i=0; i<connectedList.size(); i++) {
01211 const Hypothesis *hypo = connectedList[i];
01212
01213
01214 const Hypothesis *prevHypo = hypo->GetPrevHypo();
01215 if (prevHypo->GetId() > 0
01216 && connected.find( prevHypo->GetId() ) == connected.end()) {
01217 connected[ prevHypo->GetId() ] = true;
01218 connectedList.push_back( prevHypo );
01219 }
01220
01221
01222 const ArcList *arcList = hypo->GetArcList();
01223 if (arcList != NULL) {
01224 ArcList::const_iterator iterArcList;
01225 for (iterArcList = arcList->begin() ; iterArcList != arcList->end() ; ++iterArcList) {
01226 const Hypothesis *loserHypo = *iterArcList;
01227 if (connected.find( loserHypo->GetPrevHypo()->GetId() ) == connected.end() && loserHypo->GetPrevHypo()->GetId() > 0) {
01228 connected[ loserHypo->GetPrevHypo()->GetId() ] = true;
01229 connectedList.push_back( loserHypo->GetPrevHypo() );
01230 }
01231 }
01232 }
01233 }
01234 }
01235
01236
01237 #ifdef HAVE_PROTOBUF
01238
01239 void SerializeEdgeInfo(const Hypothesis* hypo, hgmert::Hypergraph_Edge* edge)
01240 {
01241 hgmert::Rule* rule = edge->mutable_rule();
01242 hypo->GetCurrTargetPhrase().WriteToRulePB(rule);
01243 const Hypothesis* prev = hypo->GetPrevHypo();
01244
01245 if (!prev) return;
01246
01247
01248 const ScoreComponentCollection& scores = hypo->GetScoreBreakdown();
01249 const ScoreComponentCollection& pscores = prev->GetScoreBreakdown();
01250 for (unsigned int i = 0; i < scores.size(); ++i)
01251 edge->add_feature_values((scores[i] - pscores[i]) * -1.0);
01252 }
01253
01254 hgmert::Hypergraph_Node* GetHGNode(
01255 const Hypothesis* hypo,
01256 std::map< int, int>* i2hgnode,
01257 hgmert::Hypergraph* hg,
01258 int* hgNodeIdx)
01259 {
01260 hgmert::Hypergraph_Node* hgnode;
01261 std::map < int, int >::iterator idxi = i2hgnode->find(hypo->GetId());
01262 if (idxi == i2hgnode->end()) {
01263 *hgNodeIdx = ((*i2hgnode)[hypo->GetId()] = hg->nodes_size());
01264 hgnode = hg->add_nodes();
01265 } else {
01266 *hgNodeIdx = idxi->second;
01267 hgnode = hg->mutable_nodes(*hgNodeIdx);
01268 }
01269 return hgnode;
01270 }
01271
01272 void Manager::SerializeSearchGraphPB(
01273 long translationId,
01274 std::ostream& outputStream) const
01275 {
01276 using namespace hgmert;
01277 std::map < int, bool > connected;
01278 std::map < int, int > i2hgnode;
01279 std::vector< const Hypothesis *> connectedList;
01280 GetConnectedGraph(&connected, &connectedList);
01281 connected[ 0 ] = true;
01282 Hypergraph hg;
01283 hg.set_is_sorted(false);
01284 int num_feats = (*m_search->GetHypothesisStacks().back()->begin())->GetScoreBreakdown().size();
01285 hg.set_num_features(num_feats);
01286 StaticData::Instance().GetScoreIndexManager().SerializeFeatureNamesToPB(&hg);
01287 Hypergraph_Node* goal = hg.add_nodes();
01288 Hypergraph_Node* source = hg.add_nodes();
01289 i2hgnode[-1] = 1;
01290 const std::vector < HypothesisStack* > &hypoStackColl = m_search->GetHypothesisStacks();
01291 const HypothesisStack &finalStack = *hypoStackColl.back();
01292 for (std::vector < HypothesisStack* >::const_iterator iterStack = hypoStackColl.begin();
01293 iterStack != hypoStackColl.end() ; ++iterStack) {
01294 const HypothesisStack &stack = **iterStack;
01295 HypothesisStack::const_iterator iterHypo;
01296
01297 for (iterHypo = stack.begin() ; iterHypo != stack.end() ; ++iterHypo) {
01298 const Hypothesis *hypo = *iterHypo;
01299 bool is_goal = hypo->GetWordsBitmap().IsComplete();
01300 if (connected.find( hypo->GetId() ) != connected.end()) {
01301 int headNodeIdx;
01302 Hypergraph_Node* headNode = GetHGNode(hypo, &i2hgnode, &hg, &headNodeIdx);
01303 if (is_goal) {
01304 Hypergraph_Edge* ge = hg.add_edges();
01305 ge->set_head_node(0);
01306 ge->add_tail_nodes(headNodeIdx);
01307 ge->mutable_rule()->add_trg_words("[X,1]");
01308 }
01309 Hypergraph_Edge* edge = hg.add_edges();
01310 SerializeEdgeInfo(hypo, edge);
01311 edge->set_head_node(headNodeIdx);
01312 const Hypothesis* prev = hypo->GetPrevHypo();
01313 int tailNodeIdx = 1;
01314 if (prev)
01315 tailNodeIdx = i2hgnode.find(prev->GetId())->second;
01316 edge->add_tail_nodes(tailNodeIdx);
01317
01318 const ArcList *arcList = hypo->GetArcList();
01319 if (arcList != NULL) {
01320 ArcList::const_iterator iterArcList;
01321 for (iterArcList = arcList->begin() ; iterArcList != arcList->end() ; ++iterArcList) {
01322 const Hypothesis *loserHypo = *iterArcList;
01323 UTIL_THROW_IF2(!connected[loserHypo->GetId()],
01324 "Hypothesis " << loserHypo->GetId() << " is not connected");
01325 Hypergraph_Edge* edge = hg.add_edges();
01326 SerializeEdgeInfo(loserHypo, edge);
01327 edge->set_head_node(headNodeIdx);
01328 tailNodeIdx = i2hgnode.find(loserHypo->GetPrevHypo()->GetId())->second;
01329 edge->add_tail_nodes(tailNodeIdx);
01330 }
01331 }
01332 }
01333 }
01334 }
01335 hg.SerializeToOstream(&outputStream);
01336 }
01337 #endif
01338
01339 void
01340 Manager::
01341 OutputSearchGraph(long translationId, std::ostream &out) const
01342 {
01343 vector<SearchGraphNode> searchGraph;
01344 GetSearchGraph(searchGraph);
01345 for (size_t i = 0; i < searchGraph.size(); ++i) {
01346 OutputSearchNode(*options(),translationId,out,searchGraph[i]);
01347 }
01348 }
01349
01350 void
01351 Manager::
01352 GetForwardBackwardSearchGraph
01353 ( std::map< int, bool >* pConnected,
01354 std::vector<Hypothesis const* >* pConnectedList,
01355 std::map<Hypothesis const*, set<Hypothesis const*> >* pOutgoingHyps,
01356 vector< float>* pFwdBwdScores) const
01357 {
01358 std::map < int, bool > &connected = *pConnected;
01359 std::vector< const Hypothesis *>& connectedList = *pConnectedList;
01360 std::map < int, int > forward;
01361 std::map < int, double > forwardScore;
01362
01363 std::map < const Hypothesis*, set <const Hypothesis*> > & outgoingHyps
01364 = *pOutgoingHyps;
01365 vector< float> & estimatedScores = *pFwdBwdScores;
01366
01367
01368 GetWinnerConnectedGraph(&connected, &connectedList);
01369
01370
01371
01372
01373 const std::vector < HypothesisStack* > &hypoStackColl
01374 = m_search->GetHypothesisStacks();
01375 const HypothesisStack &finalStack = *hypoStackColl.back();
01376 HypothesisStack::const_iterator iterHypo;
01377 for (iterHypo = finalStack.begin() ; iterHypo != finalStack.end() ; ++iterHypo) {
01378 const Hypothesis *hypo = *iterHypo;
01379 forwardScore[ hypo->GetId() ] = 0.0f;
01380 forward[ hypo->GetId() ] = -1;
01381 }
01382
01383
01384 std::vector < HypothesisStack* >::const_iterator iterStack;
01385 for (iterStack = --hypoStackColl.end() ; iterStack != hypoStackColl.begin() ; --iterStack) {
01386 const HypothesisStack &stack = **iterStack;
01387 HypothesisStack::const_iterator iterHypo;
01388 for (iterHypo = stack.begin() ; iterHypo != stack.end() ; ++iterHypo) {
01389 const Hypothesis *hypo = *iterHypo;
01390 if (connected.find( hypo->GetId() ) != connected.end()) {
01391
01392 const Hypothesis *prevHypo = hypo->GetPrevHypo();
01393 double fscore = forwardScore[ hypo->GetId() ] +
01394 hypo->GetScore() - prevHypo->GetScore();
01395 if (forwardScore.find( prevHypo->GetId() ) == forwardScore.end()
01396 || forwardScore.find( prevHypo->GetId() )->second < fscore) {
01397 forwardScore[ prevHypo->GetId() ] = fscore;
01398 forward[ prevHypo->GetId() ] = hypo->GetId();
01399 }
01400
01401 outgoingHyps[prevHypo].insert(hypo);
01402
01403
01404 const ArcList *arcList = hypo->GetArcList();
01405 if (arcList != NULL) {
01406 ArcList::const_iterator iterArcList;
01407 for (iterArcList = arcList->begin() ; iterArcList != arcList->end() ; ++iterArcList) {
01408 const Hypothesis *loserHypo = *iterArcList;
01409
01410 const Hypothesis *loserPrevHypo = loserHypo->GetPrevHypo();
01411 double fscore = forwardScore[ hypo->GetId() ] +
01412 loserHypo->GetScore() - loserPrevHypo->GetScore();
01413 if (forwardScore.find( loserPrevHypo->GetId() ) == forwardScore.end()
01414 || forwardScore.find( loserPrevHypo->GetId() )->second < fscore) {
01415 forwardScore[ loserPrevHypo->GetId() ] = fscore;
01416 forward[ loserPrevHypo->GetId() ] = loserHypo->GetId();
01417 }
01418
01419 outgoingHyps[loserPrevHypo].insert(hypo);
01420
01421
01422 }
01423 }
01424 }
01425 }
01426 }
01427
01428 for (std::vector< const Hypothesis *>::iterator it = connectedList.begin(); it != connectedList.end(); ++it) {
01429 float estimatedScore = (*it)->GetScore() + forwardScore[(*it)->GetId()];
01430 estimatedScores.push_back(estimatedScore);
01431 }
01432 }
01433
01434
01435 const Hypothesis *Manager::GetBestHypothesis() const
01436 {
01437 return m_search->GetBestHypothesis();
01438 }
01439
01440 int Manager::GetNextHypoId()
01441 {
01442 GetSentenceStats().AddCreated();
01443 return m_hypoId++;
01444 }
01445
01446 void Manager::ResetSentenceStats(const InputType& source)
01447 {
01448 m_sentenceStats = std::auto_ptr<SentenceStats>(new SentenceStats(source));
01449 }
01450 SentenceStats& Manager::GetSentenceStats() const
01451 {
01452 return *m_sentenceStats;
01453
01454 }
01455
01456 void Manager::OutputBest(OutputCollector *collector) const
01457 {
01458 long translationId = m_source.GetTranslationId();
01459
01460 Timer additionalReportingTime;
01461
01462
01463 if (collector) {
01464 ostringstream out;
01465 ostringstream debug;
01466 FixPrecision(debug,PRECISION);
01467
01468
01469 if (options()->output.PrintAllDerivations) {
01470 additionalReportingTime.start();
01471 PrintAllDerivations(translationId, debug);
01472 additionalReportingTime.stop();
01473 }
01474
01475 Timer decisionRuleTime;
01476 decisionRuleTime.start();
01477
01478
01479 const Hypothesis* bestHypo = NULL;
01480 if (!options()->mbr.enabled) {
01481 bestHypo = GetBestHypothesis();
01482 if (bestHypo) {
01483 if (options()->output.ReportHypoScore) {
01484 out << bestHypo->GetFutureScore() << ' ';
01485 }
01486 if (options()->output.RecoverPath) {
01487 bestHypo->OutputInput(out);
01488 out << "||| ";
01489 }
01490
01491 if (options()->output.PrintID) {
01492 out << translationId << " ";
01493 }
01494
01495
01496 if (options()->output.PrintPassThrough) {
01497 OutputPassthroughInformation(out, bestHypo);
01498 }
01499
01500
01501 if (options()->output.ReportSegmentation == 2) {
01502 GetOutputLanguageModelOrder(out, bestHypo);
01503 }
01504 OutputSurface(out,*bestHypo, true);
01505 if (options()->output.PrintAlignmentInfo) {
01506 out << "||| ";
01507 bestHypo->OutputAlignment(out, true);
01508 }
01509
01510 IFVERBOSE(1) {
01511 debug << "BEST TRANSLATION: " << *bestHypo << endl;
01512 }
01513 } else {
01514 VERBOSE(1, "NO BEST TRANSLATION" << endl);
01515 }
01516
01517 out << endl;
01518 }
01519
01520
01521 else {
01522
01523 size_t nBestSize = options()->mbr.size;
01524 if (nBestSize <= 0) {
01525 cerr << "ERROR: negative size for number of MBR candidate translations not allowed (option mbr-size)" << endl;
01526 exit(1);
01527 }
01528 TrellisPathList nBestList;
01529 CalcNBest(nBestSize, nBestList, true);
01530 VERBOSE(2,"size of n-best: " << nBestList.GetSize() << " (" << nBestSize << ")" << endl);
01531 IFVERBOSE(2) {
01532 PrintUserTime("calculated n-best list for (L)MBR decoding");
01533 }
01534
01535
01536 if (options()->lmbr.enabled) {
01537 if (options()->nbest.enabled) {
01538
01539 vector<LatticeMBRSolution> solutions;
01540 size_t n = min(nBestSize, options()->nbest.nbest_size);
01541 getLatticeMBRNBest(*this,nBestList,solutions,n);
01542 OutputLatticeMBRNBest(m_latticeNBestOut, solutions, translationId);
01543 } else {
01544
01545 vector<Word> mbrBestHypo = doLatticeMBR(*this,nBestList);
01546 OutputBestHypo(mbrBestHypo, out);
01547 IFVERBOSE(2) {
01548 PrintUserTime("finished Lattice MBR decoding");
01549 }
01550 }
01551 }
01552
01553
01554 else if (options()->search.consensus) {
01555 const TrellisPath &conBestHypo = doConsensusDecoding(*this,nBestList);
01556 OutputBestHypo(conBestHypo, out);
01557 OutputAlignment(m_alignmentOut, conBestHypo);
01558 IFVERBOSE(2) {
01559 PrintUserTime("finished Consensus decoding");
01560 }
01561 }
01562
01563
01564 else {
01565 const TrellisPath &mbrBestHypo = doMBR(nBestList, *options());
01566 OutputBestHypo(mbrBestHypo, out);
01567 OutputAlignment(m_alignmentOut, mbrBestHypo);
01568 IFVERBOSE(2) {
01569 PrintUserTime("finished MBR decoding");
01570 }
01571 }
01572 }
01573
01574
01575 collector->Write(translationId,out.str(),debug.str());
01576
01577 decisionRuleTime.stop();
01578 VERBOSE(1, "Line " << translationId << ": Decision rule took " << decisionRuleTime << " seconds total" << endl);
01579 }
01580
01581 }
01582
01583 void Manager::OutputNBest(OutputCollector *collector) const
01584 {
01585 if (collector == NULL) {
01586 return;
01587 }
01588
01589 if (options()->lmbr.enabled) {
01590 if (options()->nbest.enabled) {
01591 collector->Write(m_source.GetTranslationId(), m_latticeNBestOut.str());
01592 }
01593 } else {
01594 TrellisPathList nBestList;
01595 ostringstream out;
01596 NBestOptions const& nbo = options()->nbest;
01597 CalcNBest(nbo.nbest_size, nBestList, nbo.only_distinct);
01598 OutputNBest(out, nBestList);
01599 collector->Write(m_source.GetTranslationId(), out.str());
01600 }
01601
01602 }
01603
01604 void
01605 Manager::
01606 OutputNBest(std::ostream& out, Moses::TrellisPathList const& nBestList) const
01607 {
01608 NBestOptions const& nbo = options()->nbest;
01609 bool reportAllFactors = nbo.include_all_factors;
01610 bool includeSegmentation = nbo.include_segmentation;
01611 bool includeWordAlignment = nbo.include_alignment_info;
01612
01613 TrellisPathList::const_iterator iter;
01614 for (iter = nBestList.begin() ; iter != nBestList.end() ; ++iter) {
01615 const TrellisPath &path = **iter;
01616 const std::vector<const Hypothesis *> &edges = path.GetEdges();
01617
01618
01619 out << m_source.GetTranslationId() << " ||| ";
01620 for (int currEdge = (int)edges.size() - 1 ; currEdge >= 0 ; currEdge--) {
01621 const Hypothesis &edge = *edges[currEdge];
01622 OutputSurface(out, edge);
01623 }
01624 out << " |||";
01625
01626
01627 bool with_labels = options()->nbest.include_feature_labels;
01628 path.GetScoreBreakdown()->OutputAllFeatureScores(out, with_labels);
01629
01630
01631 out << " ||| " << path.GetFutureScore();
01632
01633
01634 if (includeSegmentation) {
01635 out << " |||";
01636 for (int currEdge = (int)edges.size() - 2 ; currEdge >= 0 ; currEdge--) {
01637 const Hypothesis &edge = *edges[currEdge];
01638 const Range &sourceRange = edge.GetCurrSourceWordsRange();
01639 Range targetRange = path.GetTargetWordsRange(edge);
01640 out << " " << sourceRange.GetStartPos();
01641 if (sourceRange.GetStartPos() < sourceRange.GetEndPos()) {
01642 out << "-" << sourceRange.GetEndPos();
01643 }
01644 out<< "=" << targetRange.GetStartPos();
01645 if (targetRange.GetStartPos() < targetRange.GetEndPos()) {
01646 out<< "-" << targetRange.GetEndPos();
01647 }
01648 }
01649 }
01650
01651 if (includeWordAlignment) {
01652 out << " ||| ";
01653 for (int currEdge = (int)edges.size() - 2 ; currEdge >= 0 ; currEdge--) {
01654 const Hypothesis &edge = *edges[currEdge];
01655 const Range &sourceRange = edge.GetCurrSourceWordsRange();
01656 Range targetRange = path.GetTargetWordsRange(edge);
01657 const int sourceOffset = sourceRange.GetStartPos();
01658 const int targetOffset = targetRange.GetStartPos();
01659 const AlignmentInfo &ai = edge.GetCurrTargetPhrase().GetAlignTerm();
01660
01661 OutputAlignment(out, ai, sourceOffset, targetOffset);
01662
01663 }
01664 }
01665
01666 if (options()->output.RecoverPath) {
01667 out << " ||| ";
01668 OutputInput(out, edges[0]);
01669 }
01670
01671 out << endl;
01672 }
01673
01674 out << std::flush;
01675 }
01676
01678
01679
01680
01681 void
01682 Manager::
01683 OutputSurface(std::ostream &out, Hypothesis const& edge, bool const recursive) const
01684 {
01685 if (recursive && edge.GetPrevHypo()) {
01686 OutputSurface(out,*edge.GetPrevHypo(), true);
01687 }
01688
01689 std::vector<FactorType> outputFactorOrder = options()->output.factor_order;
01690 UTIL_THROW_IF2(outputFactorOrder.size() == 0,
01691 "Must specific at least 1 output factor");
01692
01693 FactorType placeholderFactor = options()->input.placeholder_factor;
01694 std::map<size_t, const Factor*> placeholders;
01695 if (placeholderFactor != NOT_FOUND) {
01696
01697 placeholders = GetPlaceholders(edge, placeholderFactor);
01698 }
01699
01700 bool markUnknown = options()->unk.mark;
01701 std::string const& fd = options()->output.factor_delimiter;
01702
01703 TargetPhrase const& phrase = edge.GetCurrTargetPhrase();
01704 size_t size = phrase.GetSize();
01705 for (size_t pos = 0 ; pos < size ; pos++) {
01706 const Factor *factor = phrase.GetFactor(pos, outputFactorOrder[0]);
01707 if (placeholders.size()) {
01708
01709 std::map<size_t, const Factor*>::const_iterator iter = placeholders.find(pos);
01710 if (iter != placeholders.end()) {
01711 factor = iter->second;
01712 }
01713 }
01714
01715 UTIL_THROW_IF2(factor == NULL, "No factor 0 at position " << pos);
01716
01717
01718 const Word &word = phrase.GetWord(pos);
01719 if(markUnknown && word.IsOOV()) {
01720 out << options()->unk.prefix;
01721 }
01722
01723 out << *factor;
01724 for (size_t i = 1 ; i < outputFactorOrder.size() ; i++) {
01725 const Factor *factor = phrase.GetFactor(pos, outputFactorOrder[i]);
01726 if (factor) out << fd << *factor;
01727
01728 }
01729
01730 if(markUnknown && word.IsOOV()) {
01731 out << options()->unk.suffix;
01732 }
01733
01734 out << " ";
01735
01736 }
01737
01738
01739 int reportSegmentation = options()->output.ReportSegmentation;
01740 if (reportSegmentation > 0 && phrase.GetSize() > 0) {
01741 const Range &sourceRange = edge.GetCurrSourceWordsRange();
01742 const int sourceStart = sourceRange.GetStartPos();
01743 const int sourceEnd = sourceRange.GetEndPos();
01744 out << "|" << sourceStart << "-" << sourceEnd;
01745 if (reportSegmentation == 2) {
01746 out << ",wa=";
01747 const AlignmentInfo &ai = edge.GetCurrTargetPhrase().GetAlignTerm();
01748 OutputAlignment(out, ai, 0, 0);
01749 out << ",total=";
01750 out << edge.GetScore() - edge.GetPrevHypo()->GetScore();
01751 out << ",";
01752 ScoreComponentCollection scoreBreakdown(edge.GetScoreBreakdown());
01753 scoreBreakdown.MinusEquals(edge.GetPrevHypo()->GetScoreBreakdown());
01754 bool with_labels = options()->nbest.include_feature_labels;
01755 scoreBreakdown.OutputAllFeatureScores(out, with_labels);
01756 }
01757 out << "| ";
01758 }
01759 }
01760
01761 void
01762 Manager::
01763 OutputAlignment(ostream &out, const AlignmentInfo &ai,
01764 size_t sourceOffset, size_t targetOffset) const
01765 {
01766 typedef std::vector< const std::pair<size_t,size_t>* > AlignVec;
01767 AlignVec alignments = ai.GetSortedAlignments(options()->output.WA_SortOrder);
01768
01769 AlignVec::const_iterator it;
01770 for (it = alignments.begin(); it != alignments.end(); ++it) {
01771 const std::pair<size_t,size_t> &alignment = **it;
01772 out << alignment.first + sourceOffset << "-"
01773 << alignment.second + targetOffset << " ";
01774 }
01775
01776 }
01777
01778 void
01779 Manager::
01780 OutputInput(std::ostream& os, const Hypothesis* hypo) const
01781 {
01782 size_t len = hypo->GetInput().GetSize();
01783 std::vector<const Phrase*> inp_phrases(len, 0);
01784 OutputInput(inp_phrases, hypo);
01785 for (size_t i=0; i<len; ++i)
01786 if (inp_phrases[i]) os << *inp_phrases[i];
01787 }
01788
01789 void Manager::OutputInput(std::vector<const Phrase*>& map, const Hypothesis* hypo) const
01790 {
01791 if (hypo->GetPrevHypo()) {
01792 OutputInput(map, hypo->GetPrevHypo());
01793 map[hypo->GetCurrSourceWordsRange().GetStartPos()] = &hypo->GetTranslationOption().GetInputPath().GetPhrase();
01794 }
01795 }
01796
01797 std::map<size_t, const Factor*> Manager::GetPlaceholders(const Hypothesis &hypo, FactorType placeholderFactor) const
01798 {
01799 const InputPath &inputPath = hypo.GetTranslationOption().GetInputPath();
01800 const Phrase &inputPhrase = inputPath.GetPhrase();
01801
01802 std::map<size_t, const Factor*> ret;
01803
01804 for (size_t sourcePos = 0; sourcePos < inputPhrase.GetSize(); ++sourcePos) {
01805 const Factor *factor = inputPhrase.GetFactor(sourcePos, placeholderFactor);
01806 if (factor) {
01807 TargetPhrase const& tp = hypo.GetTranslationOption().GetTargetPhrase();
01808 std::set<size_t> targetPos = tp.GetAlignTerm().GetAlignmentsForSource(sourcePos);
01809 UTIL_THROW_IF2(targetPos.size() != 1,
01810 "Placeholder should be aligned to 1, and only 1, word");
01811 ret[*targetPos.begin()] = factor;
01812 }
01813 }
01814
01815 return ret;
01816 }
01817
01818 void Manager::OutputLatticeSamples(OutputCollector *collector) const
01819 {
01820 if (collector) {
01821 TrellisPathList latticeSamples;
01822 ostringstream out;
01823 CalcLatticeSamples(options()->output.lattice_sample_size, latticeSamples);
01824 OutputNBest(out,latticeSamples);
01825 collector->Write(m_source.GetTranslationId(), out.str());
01826 }
01827
01828 }
01829
01830 void Manager::OutputAlignment(OutputCollector *collector) const
01831 {
01832 if (collector == NULL) {
01833 return;
01834 }
01835
01836 if (!m_alignmentOut.str().empty()) {
01837 collector->Write(m_source.GetTranslationId(), m_alignmentOut.str());
01838 } else {
01839 std::vector<const Hypothesis *> edges;
01840 const Hypothesis *currentHypo = GetBestHypothesis();
01841 while (currentHypo) {
01842 edges.push_back(currentHypo);
01843 currentHypo = currentHypo->GetPrevHypo();
01844 }
01845 ostringstream out;
01846 size_t targetOffset = 0;
01847 BOOST_REVERSE_FOREACH(Hypothesis const* e, edges) {
01848 const TargetPhrase &tp = e->GetCurrTargetPhrase();
01849 size_t sourceOffset = e->GetCurrSourceWordsRange().GetStartPos();
01850 OutputAlignment(out, tp.GetAlignTerm(), sourceOffset, targetOffset);
01851 targetOffset += tp.GetSize();
01852 }
01853 out << std::endl;
01854 collector->Write(m_source.GetTranslationId(), out.str());
01855
01856 }
01857 }
01858
01859 void
01860 Manager::
01861 OutputDetailedTranslationReport(OutputCollector *collector) const
01862 {
01863 if (collector) {
01864 ostringstream out;
01865 FixPrecision(out,PRECISION);
01866 TranslationAnalysis::PrintTranslationAnalysis(out, GetBestHypothesis());
01867 collector->Write(m_source.GetTranslationId(),out.str());
01868 }
01869
01870 }
01871
01872 void
01873 Manager::
01874 OutputUnknowns(OutputCollector *collector) const
01875 {
01876 if (collector) {
01877 long translationId = m_source.GetTranslationId();
01878 const vector<const Phrase*>& unknowns = m_transOptColl->GetUnknownSources();
01879 ostringstream out;
01880 for (size_t i = 0; i < unknowns.size(); ++i) {
01881 out << *(unknowns[i]);
01882 }
01883 out << endl;
01884 collector->Write(translationId, out.str());
01885 }
01886
01887 }
01888
01889 void
01890 Manager::
01891 OutputWordGraph(OutputCollector *collector) const
01892 {
01893 if (collector) {
01894 long translationId = m_source.GetTranslationId();
01895 ostringstream out;
01896 FixPrecision(out,PRECISION);
01897 GetWordGraph(translationId, out);
01898 collector->Write(translationId, out.str());
01899 }
01900 }
01901
01902 void
01903 Manager::
01904 OutputSearchGraph(OutputCollector *collector) const
01905 {
01906 if (collector) {
01907 long translationId = m_source.GetTranslationId();
01908 ostringstream out;
01909 FixPrecision(out,PRECISION);
01910 OutputSearchGraph(translationId, out);
01911 collector->Write(translationId, out.str());
01912
01913 #ifdef HAVE_PROTOBUF
01914 const StaticData &staticData = StaticData::Instance();
01915 if (staticData.GetOutputSearchGraphPB()) {
01916 ostringstream sfn;
01917 sfn << staticData.GetParam("output-search-graph-pb")[0] << '/' << translationId << ".pb" << ends;
01918 string fn = sfn.str();
01919 VERBOSE(2, "Writing search graph to " << fn << endl);
01920 fstream output(fn.c_str(), ios::trunc | ios::binary | ios::out);
01921 SerializeSearchGraphPB(translationId, output);
01922 }
01923 #endif
01924 }
01925
01926 }
01927
01928 void Manager::OutputSearchGraphSLF() const
01929 {
01930
01931 long translationId = m_source.GetTranslationId();
01932
01933
01934 std::string const& slf = options()->output.SearchGraphSLF;
01935 if (slf.size()) {
01936 util::StringStream fileName;
01937 fileName << slf << "/" << translationId << ".slf";
01938 ofstream *file = new ofstream;
01939 file->open(fileName.str().c_str());
01940 if (file->is_open() && file->good()) {
01941 ostringstream out;
01942 FixPrecision(out,PRECISION);
01943 OutputSearchGraphAsSLF(translationId, out);
01944 *file << out.str();
01945 file -> flush();
01946 } else {
01947 TRACE_ERR("Cannot output HTK standard lattice for line " << translationId << " because the output file is not open or not ready for writing" << endl);
01948 }
01949 delete file;
01950 }
01951
01952 }
01953
01954 void Manager::OutputLatticeMBRNBest(std::ostream& out, const vector<LatticeMBRSolution>& solutions,long translationId) const
01955 {
01956 for (vector<LatticeMBRSolution>::const_iterator si = solutions.begin(); si != solutions.end(); ++si) {
01957 out << translationId;
01958 out << " |||";
01959 const vector<Word> mbrHypo = si->GetWords();
01960 for (size_t i = 0 ; i < mbrHypo.size() ; i++) {
01961 const Factor *factor = mbrHypo[i].GetFactor(options()->output.factor_order[0]);
01962 if (i>0) out << " " << *factor;
01963 else out << *factor;
01964 }
01965 out << " |||";
01966 out << " map: " << si->GetMapScore();
01967 out << " w: " << mbrHypo.size();
01968 const vector<float>& ngramScores = si->GetNgramScores();
01969 for (size_t i = 0; i < ngramScores.size(); ++i) {
01970 out << " " << ngramScores[i];
01971 }
01972 out << " ||| " << si->GetScore();
01973
01974 out << endl;
01975 }
01976 }
01977
01978 void
01979 Manager::
01980 OutputBestHypo(const std::vector<Word>& mbrBestHypo, ostream& out) const
01981 {
01982 FactorType f = options()->output.factor_order[0];
01983 for (size_t i = 0 ; i < mbrBestHypo.size() ; i++) {
01984 const Factor *factor = mbrBestHypo[i].GetFactor(f);
01985 UTIL_THROW_IF2(factor == NULL, "No factor " << f << " at position " << i);
01986 if (i) out << " ";
01987 out << *factor;
01988 }
01989 out << endl;
01990 }
01991
01992 void
01993 Manager::
01994 OutputBestHypo(const Moses::TrellisPath &path, std::ostream &out) const
01995 {
01996 std::vector<const Hypothesis *> const& edges = path.GetEdges();
01997 for (int currEdge = (int)edges.size() - 1 ; currEdge >= 0 ; currEdge--) {
01998 Hypothesis const& edge = *edges[currEdge];
01999 OutputSurface(out, edge);
02000 }
02001 out << endl;
02002 }
02003
02004 void
02005 Manager::
02006 OutputAlignment(std::ostringstream &out, const TrellisPath &path) const
02007 {
02008 WordAlignmentSort waso = options()->output.WA_SortOrder;
02009 BOOST_REVERSE_FOREACH(Hypothesis const* e, path.GetEdges())
02010 e->OutputAlignment(out, false);
02011
02012
02013 out << std::endl;
02014 }
02015
02016 }