00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035 #include <iostream>
00036 #include "IOWrapper.h"
00037 #include "moses/TypeDef.h"
00038 #include "moses/Util.h"
00039 #include "moses/WordsRange.h"
00040 #include "moses/StaticData.h"
00041 #include "moses/InputFileStream.h"
00042 #include "moses/Incremental.h"
00043 #include "moses/TranslationModel/PhraseDictionary.h"
00044 #include "moses/ChartTrellisPathList.h"
00045 #include "moses/ChartTrellisPath.h"
00046 #include "moses/ChartTrellisNode.h"
00047 #include "moses/ChartTranslationOptions.h"
00048 #include "moses/ChartHypothesis.h"
00049 #include "moses/FeatureVector.h"
00050
00051 #include <boost/algorithm/string.hpp>
00052
00053
00054 using namespace std;
00055 using namespace Moses;
00056
00057 namespace MosesChartCmd
00058 {
00059
00060 IOWrapper::IOWrapper(const std::vector<FactorType> &inputFactorOrder
00061 , const std::vector<FactorType> &outputFactorOrder
00062 , const FactorMask &inputFactorUsed
00063 , size_t nBestSize
00064 , const std::string &nBestFilePath
00065 , const std::string &inputFilePath)
00066 :m_inputFactorOrder(inputFactorOrder)
00067 ,m_outputFactorOrder(outputFactorOrder)
00068 ,m_inputFactorUsed(inputFactorUsed)
00069 ,m_outputSearchGraphStream(NULL)
00070 ,m_detailedTranslationReportingStream(NULL)
00071 ,m_alignmentInfoStream(NULL)
00072 ,m_inputFilePath(inputFilePath)
00073 ,m_detailOutputCollector(NULL)
00074 ,m_nBestOutputCollector(NULL)
00075 ,m_searchGraphOutputCollector(NULL)
00076 ,m_singleBestOutputCollector(NULL)
00077 ,m_alignmentInfoCollector(NULL)
00078 {
00079 const StaticData &staticData = StaticData::Instance();
00080
00081 if (m_inputFilePath.empty()) {
00082 m_inputStream = &std::cin;
00083 } else {
00084 m_inputStream = new InputFileStream(inputFilePath);
00085 }
00086
00087 bool suppressSingleBestOutput = false;
00088
00089 if (nBestSize > 0) {
00090 if (nBestFilePath == "-") {
00091 m_nBestOutputCollector = new Moses::OutputCollector(&std::cout);
00092 suppressSingleBestOutput = true;
00093 } else {
00094 m_nBestOutputCollector = new Moses::OutputCollector(new std::ofstream(nBestFilePath.c_str()));
00095 m_nBestOutputCollector->HoldOutputStream();
00096 }
00097 }
00098
00099 if (!suppressSingleBestOutput) {
00100 m_singleBestOutputCollector = new Moses::OutputCollector(&std::cout);
00101 }
00102
00103
00104 if (staticData.GetOutputSearchGraph()) {
00105 string fileName = staticData.GetParam("output-search-graph")[0];
00106 std::ofstream *file = new std::ofstream;
00107 m_outputSearchGraphStream = file;
00108 file->open(fileName.c_str());
00109 m_searchGraphOutputCollector = new Moses::OutputCollector(m_outputSearchGraphStream);
00110 }
00111
00112
00113 if (staticData.IsDetailedTranslationReportingEnabled()) {
00114 const std::string &path = staticData.GetDetailedTranslationReportingFilePath();
00115 m_detailedTranslationReportingStream = new std::ofstream(path.c_str());
00116 m_detailOutputCollector = new Moses::OutputCollector(m_detailedTranslationReportingStream);
00117 }
00118
00119 if (!staticData.GetAlignmentOutputFile().empty()) {
00120 m_alignmentInfoStream = new std::ofstream(staticData.GetAlignmentOutputFile().c_str());
00121 m_alignmentInfoCollector = new Moses::OutputCollector(m_alignmentInfoStream);
00122 CHECK(m_alignmentInfoStream->good());
00123 }
00124 }
00125
00126 IOWrapper::~IOWrapper()
00127 {
00128 if (!m_inputFilePath.empty()) {
00129 delete m_inputStream;
00130 }
00131 delete m_outputSearchGraphStream;
00132 delete m_detailedTranslationReportingStream;
00133 delete m_alignmentInfoStream;
00134 delete m_detailOutputCollector;
00135 delete m_nBestOutputCollector;
00136 delete m_searchGraphOutputCollector;
00137 delete m_singleBestOutputCollector;
00138 delete m_alignmentInfoCollector;
00139 }
00140
00141 void IOWrapper::ResetTranslationId()
00142 {
00143 m_translationId = StaticData::Instance().GetStartTranslationId();
00144 }
00145
00146 InputType*IOWrapper::GetInput(InputType* inputType)
00147 {
00148 if(inputType->Read(*m_inputStream, m_inputFactorOrder)) {
00149 if (long x = inputType->GetTranslationId()) {
00150 if (x>=m_translationId) m_translationId = x+1;
00151 } else inputType->SetTranslationId(m_translationId++);
00152
00153 return inputType;
00154 } else {
00155 delete inputType;
00156 return NULL;
00157 }
00158 }
00159
00160
00161
00162
00163
00164 void OutputSurface(std::ostream &out, const Phrase &phrase, const std::vector<FactorType> &outputFactorOrder, bool reportAllFactors)
00165 {
00166 CHECK(outputFactorOrder.size() > 0);
00167 if (reportAllFactors == true) {
00168 out << phrase;
00169 } else {
00170 size_t size = phrase.GetSize();
00171 for (size_t pos = 0 ; pos < size ; pos++) {
00172 const Factor *factor = phrase.GetFactor(pos, outputFactorOrder[0]);
00173 out << *factor;
00174 CHECK(factor);
00175
00176 for (size_t i = 1 ; i < outputFactorOrder.size() ; i++) {
00177 const Factor *factor = phrase.GetFactor(pos, outputFactorOrder[i]);
00178 CHECK(factor);
00179
00180 out << "|" << *factor;
00181 }
00182 out << " ";
00183 }
00184 }
00185 }
00186
00187 void OutputSurface(std::ostream &out, const ChartHypothesis *hypo, const std::vector<FactorType> &outputFactorOrder
00188 ,bool reportSegmentation, bool reportAllFactors)
00189 {
00190 if ( hypo != NULL) {
00191
00192
00193 const vector<const ChartHypothesis*> &prevHypos = hypo->GetPrevHypos();
00194
00195 vector<const ChartHypothesis*>::const_iterator iter;
00196 for (iter = prevHypos.begin(); iter != prevHypos.end(); ++iter) {
00197 const ChartHypothesis *prevHypo = *iter;
00198
00199 OutputSurface(out, prevHypo, outputFactorOrder, reportSegmentation, reportAllFactors);
00200 }
00201 }
00202 }
00203
00204 void IOWrapper::Backtrack(const ChartHypothesis *hypo)
00205 {
00206 const vector<const ChartHypothesis*> &prevHypos = hypo->GetPrevHypos();
00207
00208 vector<const ChartHypothesis*>::const_iterator iter;
00209 for (iter = prevHypos.begin(); iter != prevHypos.end(); ++iter) {
00210 const ChartHypothesis *prevHypo = *iter;
00211
00212 VERBOSE(3,prevHypo->GetId() << " <= ");
00213 Backtrack(prevHypo);
00214 }
00215 }
00216
00217 void IOWrapper::OutputBestHypo(const std::vector<const Factor*>& mbrBestHypo, long )
00218 {
00219 for (size_t i = 0 ; i < mbrBestHypo.size() ; i++) {
00220 const Factor *factor = mbrBestHypo[i];
00221 CHECK(factor);
00222
00223 cout << *factor << " ";
00224 }
00225 }
00226
00227
00228
00229
00230
00231
00232
00233
00234
00235
00236
00237
00238
00239
00240
00241
00242
00243
00244
00245
00246
00247
00248 void IOWrapper::ReconstructApplicationContext(const ChartHypothesis &hypo,
00249 const Sentence &sentence,
00250 ApplicationContext &context)
00251 {
00252 context.clear();
00253 const std::vector<const ChartHypothesis*> &prevHypos = hypo.GetPrevHypos();
00254 std::vector<const ChartHypothesis*>::const_iterator p = prevHypos.begin();
00255 std::vector<const ChartHypothesis*>::const_iterator end = prevHypos.end();
00256 const WordsRange &span = hypo.GetCurrSourceRange();
00257 size_t i = span.GetStartPos();
00258 while (i <= span.GetEndPos()) {
00259 if (p == end || i < (*p)->GetCurrSourceRange().GetStartPos()) {
00260
00261 const Word &symbol = sentence.GetWord(i);
00262 context.push_back(std::make_pair(symbol, WordsRange(i, i)));
00263 ++i;
00264 } else {
00265
00266 const Word &symbol = (*p)->GetTargetLHS();
00267 const WordsRange &range = (*p)->GetCurrSourceRange();
00268 context.push_back(std::make_pair(symbol, range));
00269 i = range.GetEndPos()+1;
00270 ++p;
00271 }
00272 }
00273 }
00274
00275
00276
00277
00278
00279 void IOWrapper::WriteApplicationContext(std::ostream &out,
00280 const ApplicationContext &context)
00281 {
00282 assert(!context.empty());
00283 ApplicationContext::const_reverse_iterator p = context.rbegin();
00284 while (true) {
00285 out << p->second << "=" << p->first << " ";
00286 if (++p == context.rend()) {
00287 break;
00288 }
00289 out << " ";
00290 }
00291 }
00292
00293 void IOWrapper::OutputTranslationOptions(std::ostream &out, ApplicationContext &applicationContext, const ChartHypothesis *hypo, const Sentence &sentence, long translationId)
00294 {
00295
00296 if (hypo != NULL) {
00297 ReconstructApplicationContext(*hypo, sentence, applicationContext);
00298 out << "Trans Opt " << translationId
00299 << " " << hypo->GetCurrSourceRange()
00300 << ": ";
00301 WriteApplicationContext(out, applicationContext);
00302 out << ": " << hypo->GetCurrTargetPhrase().GetTargetLHS()
00303 << "->" << hypo->GetCurrTargetPhrase()
00304 << " " << hypo->GetTotalScore() << hypo->GetScoreBreakdown()
00305 << endl;
00306 }
00307
00308 const std::vector<const ChartHypothesis*> &prevHypos = hypo->GetPrevHypos();
00309 std::vector<const ChartHypothesis*>::const_iterator iter;
00310 for (iter = prevHypos.begin(); iter != prevHypos.end(); ++iter) {
00311 const ChartHypothesis *prevHypo = *iter;
00312 OutputTranslationOptions(out, applicationContext, prevHypo, sentence, translationId);
00313 }
00314 }
00315
00316 void IOWrapper::OutputDetailedTranslationReport(
00317 const ChartHypothesis *hypo,
00318 const Sentence &sentence,
00319 long translationId)
00320 {
00321 if (hypo == NULL) {
00322 return;
00323 }
00324 std::ostringstream out;
00325 ApplicationContext applicationContext;
00326
00327 OutputTranslationOptions(out, applicationContext, hypo, sentence, translationId);
00328 CHECK(m_detailOutputCollector);
00329 m_detailOutputCollector->Write(translationId, out.str());
00330 }
00331
00332
00333 void IOWrapper::OutputBestHypo(const ChartHypothesis *hypo, long translationId)
00334 {
00335 if (!m_singleBestOutputCollector)
00336 return;
00337 std::ostringstream out;
00338 IOWrapper::FixPrecision(out);
00339 if (hypo != NULL) {
00340 VERBOSE(1,"BEST TRANSLATION: " << *hypo << endl);
00341 VERBOSE(3,"Best path: ");
00342 Backtrack(hypo);
00343 VERBOSE(3,"0" << std::endl);
00344
00345 if (StaticData::Instance().GetOutputHypoScore()) {
00346 out << hypo->GetTotalScore() << " ";
00347 }
00348
00349 if (StaticData::Instance().IsPathRecoveryEnabled()) {
00350 out << "||| ";
00351 }
00352 Phrase outPhrase(ARRAY_SIZE_INCR);
00353 hypo->CreateOutputPhrase(outPhrase);
00354
00355
00356 CHECK(outPhrase.GetSize() >= 2);
00357 outPhrase.RemoveWord(0);
00358 outPhrase.RemoveWord(outPhrase.GetSize() - 1);
00359
00360 const std::vector<FactorType> outputFactorOrder = StaticData::Instance().GetOutputFactorOrder();
00361 string output = outPhrase.GetStringRep(outputFactorOrder);
00362 out << output << endl;
00363 } else {
00364 VERBOSE(1, "NO BEST TRANSLATION" << endl);
00365
00366 if (StaticData::Instance().GetOutputHypoScore()) {
00367 out << "0 ";
00368 }
00369
00370 out << endl;
00371 }
00372 m_singleBestOutputCollector->Write(translationId, out.str());
00373 }
00374
00375 void IOWrapper::OutputBestHypo(search::Applied applied, long translationId)
00376 {
00377 if (!m_singleBestOutputCollector) return;
00378 std::ostringstream out;
00379 IOWrapper::FixPrecision(out);
00380 if (StaticData::Instance().GetOutputHypoScore()) {
00381 out << applied.GetScore() << ' ';
00382 }
00383 Phrase outPhrase;
00384 Incremental::ToPhrase(applied, outPhrase);
00385
00386 CHECK(outPhrase.GetSize() >= 2);
00387 outPhrase.RemoveWord(0);
00388 outPhrase.RemoveWord(outPhrase.GetSize() - 1);
00389 out << outPhrase.GetStringRep(StaticData::Instance().GetOutputFactorOrder());
00390 out << '\n';
00391 m_singleBestOutputCollector->Write(translationId, out.str());
00392 }
00393
00394 void IOWrapper::OutputBestNone(long translationId)
00395 {
00396 if (!m_singleBestOutputCollector) return;
00397 if (StaticData::Instance().GetOutputHypoScore()) {
00398 m_singleBestOutputCollector->Write(translationId, "0 \n");
00399 } else {
00400 m_singleBestOutputCollector->Write(translationId, "\n");
00401 }
00402 }
00403
00404 void IOWrapper::OutputAllFeatureScores(const ScoreComponentCollection &features, std::ostream &out)
00405 {
00406 std::string lastName = "";
00407 const vector<const StatefulFeatureFunction*>& sff = StatefulFeatureFunction::GetStatefulFeatureFunctions();
00408 for( size_t i=0; i<sff.size(); i++ ) {
00409 const StatefulFeatureFunction *ff = sff[i];
00410 if (ff->GetScoreProducerDescription() != "BleuScoreFeature"
00411 && ff->IsTuneable()) {
00412 OutputFeatureScores( out, features, ff, lastName );
00413 }
00414 }
00415 const vector<const StatelessFeatureFunction*>& slf = StatelessFeatureFunction::GetStatelessFeatureFunctions();
00416 for( size_t i=0; i<slf.size(); i++ ) {
00417 const StatelessFeatureFunction *ff = slf[i];
00418 if (ff->IsTuneable()) {
00419 OutputFeatureScores( out, features, ff, lastName );
00420 }
00421 }
00422 }
00423
00424 void IOWrapper::OutputFeatureScores( std::ostream& out, const ScoreComponentCollection &features, const FeatureFunction *ff, std::string &lastName )
00425 {
00426 const StaticData &staticData = StaticData::Instance();
00427 bool labeledOutput = staticData.IsLabeledNBestList();
00428
00429
00430 if (ff->GetNumScoreComponents() != 0) {
00431 if( labeledOutput && lastName != ff->GetScoreProducerDescription() ) {
00432 lastName = ff->GetScoreProducerDescription();
00433 out << " " << lastName << "=";
00434 }
00435 vector<float> scores = features.GetScoresForProducer( ff );
00436 for (size_t j = 0; j<scores.size(); ++j) {
00437 out << " " << scores[j];
00438 }
00439 }
00440
00441
00442 const FVector scores = features.GetVectorForProducer( ff );
00443 for(FVector::FNVmap::const_iterator i = scores.cbegin(); i != scores.cend(); i++) {
00444 out << " " << i->first << "= " << i->second;
00445 }
00446 }
00447
00448 void IOWrapper::OutputNBestList(const ChartTrellisPathList &nBestList, long translationId)
00449 {
00450 std::ostringstream out;
00451
00452
00453 if (m_nBestOutputCollector->OutputIsCout()) {
00454
00455
00456 IOWrapper::FixPrecision(out);
00457
00458
00459 }
00460
00461
00462 bool includeWordAlignment = StaticData::Instance().PrintAlignmentInfoInNbest();
00463
00464 ChartTrellisPathList::const_iterator iter;
00465 for (iter = nBestList.begin() ; iter != nBestList.end() ; ++iter) {
00466 const ChartTrellisPath &path = **iter;
00467
00468
00469 Moses::Phrase outputPhrase = path.GetOutputPhrase();
00470
00471
00472 CHECK(outputPhrase.GetSize() >= 2);
00473 outputPhrase.RemoveWord(0);
00474 outputPhrase.RemoveWord(outputPhrase.GetSize() - 1);
00475
00476
00477 out << translationId << " ||| ";
00478 OutputSurface(out, outputPhrase, m_outputFactorOrder, false);
00479 out << " ||| ";
00480
00481
00482
00483
00484
00485 OutputAllFeatureScores(path.GetScoreBreakdown(), out);
00486
00487
00488 out << " ||| " << path.GetTotalScore();
00489
00490
00491
00492
00493
00494
00495
00496
00497
00498
00499
00500
00501
00502
00503
00504
00505
00506
00507
00508
00509
00510 if (includeWordAlignment) {
00511 out << " ||| ";
00512
00513 Alignments retAlign;
00514
00515 const ChartTrellisNode &node = path.GetFinalNode();
00516 OutputAlignmentNBest(retAlign, node, 0);
00517
00518 Alignments::const_iterator iter;
00519 for (iter = retAlign.begin(); iter != retAlign.end(); ++iter) {
00520 const pair<size_t, size_t> &alignPoint = *iter;
00521 out << alignPoint.first << "-" << alignPoint.second << " ";
00522 }
00523 }
00524
00525 out << endl;
00526 }
00527
00528 out <<std::flush;
00529
00530 assert(m_nBestOutputCollector);
00531 m_nBestOutputCollector->Write(translationId, out.str());
00532 }
00533
00534 void IOWrapper::OutputNBestList(const std::vector<search::Applied> &nbest, long translationId)
00535 {
00536 std::ostringstream out;
00537
00538 if (m_nBestOutputCollector->OutputIsCout()) {
00539 IOWrapper::FixPrecision(out);
00540 }
00541 Phrase outputPhrase;
00542 ScoreComponentCollection features;
00543 for (std::vector<search::Applied>::const_iterator i = nbest.begin(); i != nbest.end(); ++i) {
00544 Incremental::PhraseAndFeatures(*i, outputPhrase, features);
00545
00546 CHECK(outputPhrase.GetSize() >= 2);
00547 outputPhrase.RemoveWord(0);
00548 outputPhrase.RemoveWord(outputPhrase.GetSize() - 1);
00549 out << translationId << " ||| ";
00550 OutputSurface(out, outputPhrase, m_outputFactorOrder, false);
00551 out << " ||| ";
00552 OutputAllFeatureScores(features, out);
00553 out << " ||| " << i->GetScore() << '\n';
00554 }
00555 out << std::flush;
00556 assert(m_nBestOutputCollector);
00557 m_nBestOutputCollector->Write(translationId, out.str());
00558 }
00559
00560 void IOWrapper::FixPrecision(std::ostream &stream, size_t size)
00561 {
00562 stream.setf(std::ios::fixed);
00563 stream.precision(size);
00564 }
00565
00566 template <class T>
00567 void ShiftOffsets(vector<T> &offsets, T shift)
00568 {
00569 T currPos = shift;
00570 for (size_t i = 0; i < offsets.size(); ++i) {
00571 if (offsets[i] == 0) {
00572 offsets[i] = currPos;
00573 ++currPos;
00574 } else {
00575 currPos += offsets[i];
00576 }
00577 }
00578 }
00579
00580 size_t CalcSourceSize(const Moses::ChartHypothesis *hypo)
00581 {
00582 size_t ret = hypo->GetCurrSourceRange().GetNumWordsCovered();
00583 const std::vector<const ChartHypothesis*> &prevHypos = hypo->GetPrevHypos();
00584 for (size_t i = 0; i < prevHypos.size(); ++i) {
00585 size_t childSize = prevHypos[i]->GetCurrSourceRange().GetNumWordsCovered();
00586 ret -= (childSize - 1);
00587 }
00588 return ret;
00589 }
00590
00591 size_t IOWrapper::OutputAlignmentNBest(Alignments &retAlign, const Moses::ChartTrellisNode &node, size_t startTarget)
00592 {
00593 const ChartHypothesis *hypo = &node.GetHypothesis();
00594
00595 size_t totalTargetSize = 0;
00596 size_t startSource = hypo->GetCurrSourceRange().GetStartPos();
00597
00598 const TargetPhrase &tp = hypo->GetCurrTargetPhrase();
00599
00600 size_t thisSourceSize = CalcSourceSize(hypo);
00601
00602
00603
00604 vector<size_t> sourceOffsets(thisSourceSize, 0);
00605 vector<size_t> targetOffsets(tp.GetSize(), 0);
00606
00607 const ChartTrellisNode::NodeChildren &prevNodes = node.GetChildren();
00608
00609 const AlignmentInfo &aiNonTerm = hypo->GetCurrTargetPhrase().GetAlignNonTerm();
00610 vector<size_t> sourceInd2pos = aiNonTerm.GetSourceIndex2PosMap();
00611 const AlignmentInfo::NonTermIndexMap &targetPos2SourceInd = aiNonTerm.GetNonTermIndexMap();
00612
00613 CHECK(sourceInd2pos.size() == prevNodes.size());
00614
00615 size_t targetInd = 0;
00616 for (size_t targetPos = 0; targetPos < tp.GetSize(); ++targetPos) {
00617 if (tp.GetWord(targetPos).IsNonTerminal()) {
00618 CHECK(targetPos < targetPos2SourceInd.size());
00619 size_t sourceInd = targetPos2SourceInd[targetPos];
00620 size_t sourcePos = sourceInd2pos[sourceInd];
00621
00622 const ChartTrellisNode &prevNode = *prevNodes[sourceInd];
00623
00624
00625 size_t sourceSize = prevNode.GetHypothesis().GetCurrSourceRange().GetNumWordsCovered();
00626 sourceOffsets[sourcePos] = sourceSize;
00627
00628
00629
00630 size_t currStartTarget = startTarget + totalTargetSize;
00631 size_t targetSize = OutputAlignmentNBest(retAlign, prevNode, currStartTarget);
00632 targetOffsets[targetPos] = targetSize;
00633
00634 totalTargetSize += targetSize;
00635 ++targetInd;
00636 } else {
00637 ++totalTargetSize;
00638 }
00639 }
00640
00641
00642
00643 ShiftOffsets(sourceOffsets, startSource);
00644 ShiftOffsets(targetOffsets, startTarget);
00645
00646
00647 const AlignmentInfo &aiTerm = hypo->GetCurrTargetPhrase().GetAlignTerm();
00648
00649
00650 AlignmentInfo::const_iterator iter;
00651 for (iter = aiTerm.begin(); iter != aiTerm.end(); ++iter) {
00652 const std::pair<size_t,size_t> &align = *iter;
00653 size_t relSource = align.first;
00654 size_t relTarget = align.second;
00655 size_t absSource = sourceOffsets[relSource];
00656 size_t absTarget = targetOffsets[relTarget];
00657
00658 pair<size_t, size_t> alignPoint(absSource, absTarget);
00659 pair<Alignments::iterator, bool> ret = retAlign.insert(alignPoint);
00660 CHECK(ret.second);
00661 }
00662
00663 return totalTargetSize;
00664 }
00665
00666 void IOWrapper::OutputAlignment(size_t translationId , const Moses::ChartHypothesis *hypo)
00667 {
00668 ostringstream out;
00669
00670 if (hypo) {
00671 Alignments retAlign;
00672 OutputAlignment(retAlign, hypo, 0);
00673
00674
00675 Alignments::const_iterator iter;
00676 for (iter = retAlign.begin(); iter != retAlign.end(); ++iter) {
00677 const pair<size_t, size_t> &alignPoint = *iter;
00678 out << alignPoint.first << "-" << alignPoint.second << " ";
00679 }
00680 }
00681 out << endl;
00682
00683 m_alignmentInfoCollector->Write(translationId, out.str());
00684 }
00685
00686 size_t IOWrapper::OutputAlignment(Alignments &retAlign, const Moses::ChartHypothesis *hypo, size_t startTarget)
00687 {
00688 size_t totalTargetSize = 0;
00689 size_t startSource = hypo->GetCurrSourceRange().GetStartPos();
00690
00691 const TargetPhrase &tp = hypo->GetCurrTargetPhrase();
00692
00693 size_t thisSourceSize = CalcSourceSize(hypo);
00694
00695
00696
00697 vector<size_t> sourceOffsets(thisSourceSize, 0);
00698 vector<size_t> targetOffsets(tp.GetSize(), 0);
00699
00700 const vector<const ChartHypothesis*> &prevHypos = hypo->GetPrevHypos();
00701
00702 const AlignmentInfo &aiNonTerm = hypo->GetCurrTargetPhrase().GetAlignNonTerm();
00703 vector<size_t> sourceInd2pos = aiNonTerm.GetSourceIndex2PosMap();
00704 const AlignmentInfo::NonTermIndexMap &targetPos2SourceInd = aiNonTerm.GetNonTermIndexMap();
00705
00706 CHECK(sourceInd2pos.size() == prevHypos.size());
00707
00708 size_t targetInd = 0;
00709 for (size_t targetPos = 0; targetPos < tp.GetSize(); ++targetPos) {
00710 if (tp.GetWord(targetPos).IsNonTerminal()) {
00711 CHECK(targetPos < targetPos2SourceInd.size());
00712 size_t sourceInd = targetPos2SourceInd[targetPos];
00713 size_t sourcePos = sourceInd2pos[sourceInd];
00714
00715 const ChartHypothesis *prevHypo = prevHypos[sourceInd];
00716
00717
00718 size_t sourceSize = prevHypo->GetCurrSourceRange().GetNumWordsCovered();
00719 sourceOffsets[sourcePos] = sourceSize;
00720
00721
00722
00723 size_t currStartTarget = startTarget + totalTargetSize;
00724 size_t targetSize = OutputAlignment(retAlign, prevHypo, currStartTarget);
00725 targetOffsets[targetPos] = targetSize;
00726
00727 totalTargetSize += targetSize;
00728 ++targetInd;
00729 } else {
00730 ++totalTargetSize;
00731 }
00732 }
00733
00734
00735
00736 ShiftOffsets(sourceOffsets, startSource);
00737 ShiftOffsets(targetOffsets, startTarget);
00738
00739
00740 const AlignmentInfo &aiTerm = hypo->GetCurrTargetPhrase().GetAlignTerm();
00741
00742
00743 AlignmentInfo::const_iterator iter;
00744 for (iter = aiTerm.begin(); iter != aiTerm.end(); ++iter) {
00745 const std::pair<size_t,size_t> &align = *iter;
00746 size_t relSource = align.first;
00747 size_t relTarget = align.second;
00748 size_t absSource = sourceOffsets[relSource];
00749 size_t absTarget = targetOffsets[relTarget];
00750
00751 pair<size_t, size_t> alignPoint(absSource, absTarget);
00752 pair<Alignments::iterator, bool> ret = retAlign.insert(alignPoint);
00753 CHECK(ret.second);
00754
00755 }
00756
00757 return totalTargetSize;
00758 }
00759
00760 void IOWrapper::OutputAlignment(vector< set<size_t> > &retAlignmentsS2T, const AlignmentInfo &ai)
00761 {
00762 typedef std::vector< const std::pair<size_t,size_t>* > AlignVec;
00763 AlignVec alignments = ai.GetSortedAlignments();
00764
00765 AlignVec::const_iterator it;
00766 for (it = alignments.begin(); it != alignments.end(); ++it) {
00767 const std::pair<size_t,size_t> &alignPoint = **it;
00768
00769 CHECK(alignPoint.first < retAlignmentsS2T.size());
00770 pair<set<size_t>::iterator, bool> ret = retAlignmentsS2T[alignPoint.first].insert(alignPoint.second);
00771 CHECK(ret.second);
00772 }
00773 }
00774
00775 }
00776