00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035 #include <iostream>
00036 #include "IOWrapper.h"
00037 #include "moses/TypeDef.h"
00038 #include "moses/Util.h"
00039 #include "moses/WordsRange.h"
00040 #include "moses/StaticData.h"
00041 #include "moses/DummyScoreProducers.h"
00042 #include "moses/InputFileStream.h"
00043 #include "moses/Incremental.h"
00044 #include "moses/TranslationModel/PhraseDictionary.h"
00045 #include "moses/ChartTrellisPathList.h"
00046 #include "moses/ChartTrellisPath.h"
00047 #include "moses/ChartTrellisNode.h"
00048 #include "moses/ChartTranslationOptions.h"
00049 #include "moses/ChartHypothesis.h"
00050 #include "moses/FeatureVector.h"
00051
00052 #include <boost/algorithm/string.hpp>
00053
00054
00055 using namespace std;
00056 using namespace Moses;
00057
00058 namespace MosesChartCmd
00059 {
00060
00061 IOWrapper::IOWrapper(const std::vector<FactorType> &inputFactorOrder
00062 , const std::vector<FactorType> &outputFactorOrder
00063 , const FactorMask &inputFactorUsed
00064 , size_t nBestSize
00065 , const std::string &nBestFilePath
00066 , const std::string &inputFilePath)
00067 :m_inputFactorOrder(inputFactorOrder)
00068 ,m_outputFactorOrder(outputFactorOrder)
00069 ,m_inputFactorUsed(inputFactorUsed)
00070 ,m_outputSearchGraphStream(NULL)
00071 ,m_detailedTranslationReportingStream(NULL)
00072 ,m_alignmentInfoStream(NULL)
00073 ,m_inputFilePath(inputFilePath)
00074 ,m_detailOutputCollector(NULL)
00075 ,m_nBestOutputCollector(NULL)
00076 ,m_searchGraphOutputCollector(NULL)
00077 ,m_singleBestOutputCollector(NULL)
00078 ,m_alignmentInfoCollector(NULL)
00079 {
00080 const StaticData &staticData = StaticData::Instance();
00081
00082 if (m_inputFilePath.empty()) {
00083 m_inputStream = &std::cin;
00084 } else {
00085 m_inputStream = new InputFileStream(inputFilePath);
00086 }
00087
00088 bool suppressSingleBestOutput = false;
00089
00090 if (nBestSize > 0) {
00091 if (nBestFilePath == "-") {
00092 m_nBestOutputCollector = new Moses::OutputCollector(&std::cout);
00093 suppressSingleBestOutput = true;
00094 } else {
00095 m_nBestOutputCollector = new Moses::OutputCollector(new std::ofstream(nBestFilePath.c_str()));
00096 m_nBestOutputCollector->HoldOutputStream();
00097 }
00098 }
00099
00100 if (!suppressSingleBestOutput) {
00101 m_singleBestOutputCollector = new Moses::OutputCollector(&std::cout);
00102 }
00103
00104
00105 if (staticData.GetOutputSearchGraph()) {
00106 string fileName = staticData.GetParam("output-search-graph")[0];
00107 std::ofstream *file = new std::ofstream;
00108 m_outputSearchGraphStream = file;
00109 file->open(fileName.c_str());
00110 m_searchGraphOutputCollector = new Moses::OutputCollector(m_outputSearchGraphStream);
00111 }
00112
00113
00114 if (staticData.IsDetailedTranslationReportingEnabled()) {
00115 const std::string &path = staticData.GetDetailedTranslationReportingFilePath();
00116 m_detailedTranslationReportingStream = new std::ofstream(path.c_str());
00117 m_detailOutputCollector = new Moses::OutputCollector(m_detailedTranslationReportingStream);
00118 }
00119
00120 if (!staticData.GetAlignmentOutputFile().empty()) {
00121 m_alignmentInfoStream = new std::ofstream(staticData.GetAlignmentOutputFile().c_str());
00122 m_alignmentInfoCollector = new Moses::OutputCollector(m_alignmentInfoStream);
00123 CHECK(m_alignmentInfoStream->good());
00124 }
00125 }
00126
00127 IOWrapper::~IOWrapper()
00128 {
00129 if (!m_inputFilePath.empty()) {
00130 delete m_inputStream;
00131 }
00132 delete m_outputSearchGraphStream;
00133 delete m_detailedTranslationReportingStream;
00134 delete m_alignmentInfoStream;
00135 delete m_detailOutputCollector;
00136 delete m_nBestOutputCollector;
00137 delete m_searchGraphOutputCollector;
00138 delete m_singleBestOutputCollector;
00139 delete m_alignmentInfoCollector;
00140 }
00141
00142 void IOWrapper::ResetTranslationId() {
00143 m_translationId = StaticData::Instance().GetStartTranslationId();
00144 }
00145
00146 InputType*IOWrapper::GetInput(InputType* inputType)
00147 {
00148 if(inputType->Read(*m_inputStream, m_inputFactorOrder)) {
00149 if (long x = inputType->GetTranslationId()) {
00150 if (x>=m_translationId) m_translationId = x+1;
00151 } else inputType->SetTranslationId(m_translationId++);
00152
00153 return inputType;
00154 } else {
00155 delete inputType;
00156 return NULL;
00157 }
00158 }
00159
00160
00161
00162
00163
00164 void OutputSurface(std::ostream &out, const Phrase &phrase, const std::vector<FactorType> &outputFactorOrder, bool reportAllFactors)
00165 {
00166 CHECK(outputFactorOrder.size() > 0);
00167 if (reportAllFactors == true) {
00168 out << phrase;
00169 } else {
00170 size_t size = phrase.GetSize();
00171 for (size_t pos = 0 ; pos < size ; pos++) {
00172 const Factor *factor = phrase.GetFactor(pos, outputFactorOrder[0]);
00173 out << *factor;
00174 CHECK(factor);
00175
00176 for (size_t i = 1 ; i < outputFactorOrder.size() ; i++) {
00177 const Factor *factor = phrase.GetFactor(pos, outputFactorOrder[i]);
00178 CHECK(factor);
00179
00180 out << "|" << *factor;
00181 }
00182 out << " ";
00183 }
00184 }
00185 }
00186
00187 void OutputSurface(std::ostream &out, const ChartHypothesis *hypo, const std::vector<FactorType> &outputFactorOrder
00188 ,bool reportSegmentation, bool reportAllFactors)
00189 {
00190 if ( hypo != NULL) {
00191
00192
00193 const vector<const ChartHypothesis*> &prevHypos = hypo->GetPrevHypos();
00194
00195 vector<const ChartHypothesis*>::const_iterator iter;
00196 for (iter = prevHypos.begin(); iter != prevHypos.end(); ++iter) {
00197 const ChartHypothesis *prevHypo = *iter;
00198
00199 OutputSurface(out, prevHypo, outputFactorOrder, reportSegmentation, reportAllFactors);
00200 }
00201 }
00202 }
00203
00204 void IOWrapper::Backtrack(const ChartHypothesis *hypo)
00205 {
00206 const vector<const ChartHypothesis*> &prevHypos = hypo->GetPrevHypos();
00207
00208 vector<const ChartHypothesis*>::const_iterator iter;
00209 for (iter = prevHypos.begin(); iter != prevHypos.end(); ++iter) {
00210 const ChartHypothesis *prevHypo = *iter;
00211
00212 VERBOSE(3,prevHypo->GetId() << " <= ");
00213 Backtrack(prevHypo);
00214 }
00215 }
00216
00217 void IOWrapper::OutputBestHypo(const std::vector<const Factor*>& mbrBestHypo, long )
00218 {
00219 for (size_t i = 0 ; i < mbrBestHypo.size() ; i++) {
00220 const Factor *factor = mbrBestHypo[i];
00221 CHECK(factor);
00222
00223 cout << *factor << " ";
00224 }
00225 }
00226
00227
00228
00229
00230
00231
00232
00233
00234
00235
00236
00237
00238
00239
00240
00241
00242
00243
00244
00245
00246
00247
00248 void IOWrapper::ReconstructApplicationContext(const ChartHypothesis &hypo,
00249 const Sentence &sentence,
00250 ApplicationContext &context)
00251 {
00252 context.clear();
00253 const std::vector<const ChartHypothesis*> &prevHypos = hypo.GetPrevHypos();
00254 std::vector<const ChartHypothesis*>::const_iterator p = prevHypos.begin();
00255 std::vector<const ChartHypothesis*>::const_iterator end = prevHypos.end();
00256 const WordsRange &span = hypo.GetCurrSourceRange();
00257 size_t i = span.GetStartPos();
00258 while (i <= span.GetEndPos()) {
00259 if (p == end || i < (*p)->GetCurrSourceRange().GetStartPos()) {
00260
00261 const Word &symbol = sentence.GetWord(i);
00262 context.push_back(std::make_pair(symbol, WordsRange(i, i)));
00263 ++i;
00264 } else {
00265
00266 const Word &symbol = (*p)->GetTargetLHS();
00267 const WordsRange &range = (*p)->GetCurrSourceRange();
00268 context.push_back(std::make_pair(symbol, range));
00269 i = range.GetEndPos()+1;
00270 ++p;
00271 }
00272 }
00273 }
00274
00275
00276
00277
00278
00279 void IOWrapper::WriteApplicationContext(std::ostream &out,
00280 const ApplicationContext &context)
00281 {
00282 assert(!context.empty());
00283 ApplicationContext::const_reverse_iterator p = context.rbegin();
00284 while (true) {
00285 out << p->second << "=" << p->first << " ";
00286 if (++p == context.rend()) {
00287 break;
00288 }
00289 out << " ";
00290 }
00291 }
00292
00293 void IOWrapper::OutputTranslationOptions(std::ostream &out, ApplicationContext &applicationContext, const ChartHypothesis *hypo, const Sentence &sentence, long translationId)
00294 {
00295
00296 if (hypo != NULL) {
00297 ReconstructApplicationContext(*hypo, sentence, applicationContext);
00298 out << "Trans Opt " << translationId
00299 << " " << hypo->GetCurrSourceRange()
00300 << ": ";
00301 WriteApplicationContext(out, applicationContext);
00302 out << ": " << hypo->GetCurrTargetPhrase().GetTargetLHS()
00303 << "->" << hypo->GetCurrTargetPhrase()
00304 << " " << hypo->GetTotalScore() << hypo->GetScoreBreakdown()
00305 << endl;
00306 }
00307
00308 const std::vector<const ChartHypothesis*> &prevHypos = hypo->GetPrevHypos();
00309 std::vector<const ChartHypothesis*>::const_iterator iter;
00310 for (iter = prevHypos.begin(); iter != prevHypos.end(); ++iter) {
00311 const ChartHypothesis *prevHypo = *iter;
00312 OutputTranslationOptions(out, applicationContext, prevHypo, sentence, translationId);
00313 }
00314 }
00315
00316 void IOWrapper::OutputDetailedTranslationReport(
00317 const ChartHypothesis *hypo,
00318 const Sentence &sentence,
00319 long translationId)
00320 {
00321 if (hypo == NULL) {
00322 return;
00323 }
00324 std::ostringstream out;
00325 ApplicationContext applicationContext;
00326
00327 OutputTranslationOptions(out, applicationContext, hypo, sentence, translationId);
00328 CHECK(m_detailOutputCollector);
00329 m_detailOutputCollector->Write(translationId, out.str());
00330 }
00331
00332
00333 void IOWrapper::OutputBestHypo(const ChartHypothesis *hypo, long translationId)
00334 {
00335 if (!m_singleBestOutputCollector)
00336 return;
00337 std::ostringstream out;
00338 IOWrapper::FixPrecision(out);
00339 if (hypo != NULL) {
00340 VERBOSE(1,"BEST TRANSLATION: " << *hypo << endl);
00341 VERBOSE(3,"Best path: ");
00342 Backtrack(hypo);
00343 VERBOSE(3,"0" << std::endl);
00344
00345 if (StaticData::Instance().GetOutputHypoScore()) {
00346 out << hypo->GetTotalScore() << " ";
00347 }
00348
00349 if (StaticData::Instance().IsPathRecoveryEnabled()) {
00350 out << "||| ";
00351 }
00352 Phrase outPhrase(ARRAY_SIZE_INCR);
00353 hypo->CreateOutputPhrase(outPhrase);
00354
00355
00356 CHECK(outPhrase.GetSize() >= 2);
00357 outPhrase.RemoveWord(0);
00358 outPhrase.RemoveWord(outPhrase.GetSize() - 1);
00359
00360 const std::vector<FactorType> outputFactorOrder = StaticData::Instance().GetOutputFactorOrder();
00361 string output = outPhrase.GetStringRep(outputFactorOrder);
00362 out << output << endl;
00363 } else {
00364 VERBOSE(1, "NO BEST TRANSLATION" << endl);
00365
00366 if (StaticData::Instance().GetOutputHypoScore()) {
00367 out << "0 ";
00368 }
00369
00370 out << endl;
00371 }
00372 m_singleBestOutputCollector->Write(translationId, out.str());
00373 }
00374
00375 void IOWrapper::OutputBestHypo(search::Applied applied, long translationId) {
00376 if (!m_singleBestOutputCollector) return;
00377 std::ostringstream out;
00378 IOWrapper::FixPrecision(out);
00379 if (StaticData::Instance().GetOutputHypoScore()) {
00380 out << applied.GetScore() << ' ';
00381 }
00382 Phrase outPhrase;
00383 Incremental::ToPhrase(applied, outPhrase);
00384
00385 CHECK(outPhrase.GetSize() >= 2);
00386 outPhrase.RemoveWord(0);
00387 outPhrase.RemoveWord(outPhrase.GetSize() - 1);
00388 out << outPhrase.GetStringRep(StaticData::Instance().GetOutputFactorOrder());
00389 out << '\n';
00390 m_singleBestOutputCollector->Write(translationId, out.str());
00391 }
00392
00393 void IOWrapper::OutputBestNone(long translationId) {
00394 if (!m_singleBestOutputCollector) return;
00395 if (StaticData::Instance().GetOutputHypoScore()) {
00396 m_singleBestOutputCollector->Write(translationId, "0 \n");
00397 } else {
00398 m_singleBestOutputCollector->Write(translationId, "\n");
00399 }
00400 }
00401
00402 namespace {
00403
00404 void OutputSparseFeatureScores(std::ostream& out, const ScoreComponentCollection &features, const FeatureFunction *ff, std::string &lastName) {
00405 const StaticData &staticData = StaticData::Instance();
00406 bool labeledOutput = staticData.IsLabeledNBestList();
00407 const FVector scores = features.GetVectorForProducer( ff );
00408
00409
00410 if (! ff->GetSparseFeatureReporting()) {
00411 const FVector &weights = staticData.GetAllWeights().GetScoresVector();
00412 if (labeledOutput && !boost::contains(ff->GetScoreProducerDescription(), ":"))
00413 out << " " << ff->GetScoreProducerWeightShortName() << ":";
00414 out << " " << scores.inner_product(weights);
00415 }
00416
00417
00418 else {
00419 for(FVector::FNVmap::const_iterator i = scores.cbegin(); i != scores.cend(); i++) {
00420 if (i->second != 0) {
00421 if (labeledOutput)
00422 out << " " << i->first << ":";
00423 out << " " << i->second;
00424 }
00425 }
00426 }
00427 }
00428
00429 void WriteFeatures(const TranslationSystem &system, const ScoreComponentCollection &features, std::ostream &out) {
00430 bool labeledOutput = StaticData::Instance().IsLabeledNBestList();
00431
00432 const LMList& lml = system.GetLanguageModels();
00433 if (lml.size() > 0) {
00434 if (labeledOutput)
00435 out << "lm:";
00436 LMList::const_iterator lmi = lml.begin();
00437 for (; lmi != lml.end(); ++lmi) {
00438 out << " " << features.GetScoreForProducer(*lmi);
00439 }
00440 }
00441
00442 std::string lastName = "";
00443
00444
00445 const vector<const StatefulFeatureFunction*>& sff = system.GetStatefulFeatureFunctions();
00446 for( size_t i=0; i<sff.size(); i++ )
00447 if (sff[i]->GetNumScoreComponents() == ScoreProducer::unlimited)
00448 OutputSparseFeatureScores(out, features, sff[i], lastName);
00449
00450
00451 const vector<PhraseDictionaryFeature*>& pds = system.GetPhraseDictionaries();
00452 if (pds.size() > 0) {
00453 for( size_t i=0; i<pds.size(); i++ ) {
00454 size_t pd_numinputscore = pds[i]->GetNumInputScores();
00455 vector<float> scores = features.GetScoresForProducer( pds[i] );
00456 for (size_t j = 0; j<scores.size(); ++j){
00457 if (labeledOutput && (i == 0) ){
00458 if ((j == 0) || (j == pd_numinputscore)){
00459 lastName = pds[i]->GetScoreProducerWeightShortName(j);
00460 out << " " << lastName << ":";
00461 }
00462 }
00463 out << " " << scores[j];
00464 }
00465 }
00466 }
00467
00468
00469 if (labeledOutput)
00470 out << " w:";
00471 out << " " << features.GetScoreForProducer(system.GetWordPenaltyProducer());
00472
00473
00474 const vector<GenerationDictionary*>& gds = system.GetGenerationDictionaries();
00475 if (gds.size() > 0) {
00476 for( size_t i=0; i<gds.size(); i++ ) {
00477 size_t pd_numinputscore = gds[i]->GetNumInputScores();
00478 vector<float> scores = features.GetScoresForProducer( gds[i] );
00479 for (size_t j = 0; j<scores.size(); ++j){
00480 if (labeledOutput && (i == 0) ){
00481 if ((j == 0) || (j == pd_numinputscore)){
00482 lastName = gds[i]->GetScoreProducerWeightShortName(j);
00483 out << " " << lastName << ":";
00484 }
00485 }
00486 out << " " << scores[j];
00487 }
00488 }
00489 }
00490
00491
00492 lastName = "";
00493
00494 const vector<const StatelessFeatureFunction*>& slf = system.GetStatelessFeatureFunctions();
00495 for( size_t i=0; i<slf.size(); i++ ) {
00496 if (slf[i]->GetNumScoreComponents() == ScoreProducer::unlimited) {
00497 OutputSparseFeatureScores(out, features, slf[i], lastName);
00498 }
00499 }
00500 }
00501
00502 }
00503
00504 void IOWrapper::OutputNBestList(const ChartTrellisPathList &nBestList, const TranslationSystem* system, long translationId) {
00505 std::ostringstream out;
00506
00507
00508 if (m_nBestOutputCollector->OutputIsCout()) {
00509
00510
00511 IOWrapper::FixPrecision(out);
00512
00513
00514 }
00515
00516
00517 bool includeWordAlignment = StaticData::Instance().PrintAlignmentInfoInNbest();
00518
00519 ChartTrellisPathList::const_iterator iter;
00520 for (iter = nBestList.begin() ; iter != nBestList.end() ; ++iter) {
00521 const ChartTrellisPath &path = **iter;
00522
00523
00524 Moses::Phrase outputPhrase = path.GetOutputPhrase();
00525
00526
00527 CHECK(outputPhrase.GetSize() >= 2);
00528 outputPhrase.RemoveWord(0);
00529 outputPhrase.RemoveWord(outputPhrase.GetSize() - 1);
00530
00531
00532 out << translationId << " ||| ";
00533 OutputSurface(out, outputPhrase, m_outputFactorOrder, false);
00534 out << " ||| ";
00535
00536
00537
00538
00539
00540 WriteFeatures(*system, path.GetScoreBreakdown(), out);
00541
00542
00543 out << " ||| " << path.GetTotalScore();
00544
00545
00546
00547
00548
00549
00550
00551
00552
00553
00554
00555
00556
00557
00558
00559
00560
00561
00562
00563
00564
00565 if (includeWordAlignment) {
00566 out << " ||| ";
00567
00568 Alignments retAlign;
00569
00570 const ChartTrellisNode &node = path.GetFinalNode();
00571 OutputAlignmentNBest(retAlign, node, 0);
00572
00573 Alignments::const_iterator iter;
00574 for (iter = retAlign.begin(); iter != retAlign.end(); ++iter) {
00575 const pair<size_t, size_t> &alignPoint = *iter;
00576 out << alignPoint.first << "-" << alignPoint.second << " ";
00577 }
00578 }
00579
00580 out << endl;
00581 }
00582
00583 out <<std::flush;
00584
00585 assert(m_nBestOutputCollector);
00586 m_nBestOutputCollector->Write(translationId, out.str());
00587 }
00588
00589 void IOWrapper::OutputNBestList(const std::vector<search::Applied> &nbest, const TranslationSystem &system, long translationId) {
00590 std::ostringstream out;
00591
00592 if (m_nBestOutputCollector->OutputIsCout()) {
00593 IOWrapper::FixPrecision(out);
00594 }
00595 Phrase outputPhrase;
00596 ScoreComponentCollection features;
00597 for (std::vector<search::Applied>::const_iterator i = nbest.begin(); i != nbest.end(); ++i) {
00598 Incremental::PhraseAndFeatures(system, *i, outputPhrase, features);
00599
00600 CHECK(outputPhrase.GetSize() >= 2);
00601 outputPhrase.RemoveWord(0);
00602 outputPhrase.RemoveWord(outputPhrase.GetSize() - 1);
00603 out << translationId << " ||| ";
00604 OutputSurface(out, outputPhrase, m_outputFactorOrder, false);
00605 out << " ||| ";
00606 WriteFeatures(system, features, out);
00607 out << " ||| " << i->GetScore() << '\n';
00608 }
00609 out << std::flush;
00610 assert(m_nBestOutputCollector);
00611 m_nBestOutputCollector->Write(translationId, out.str());
00612 }
00613
00614 void IOWrapper::FixPrecision(std::ostream &stream, size_t size)
00615 {
00616 stream.setf(std::ios::fixed);
00617 stream.precision(size);
00618 }
00619
00620 template <class T>
00621 void ShiftOffsets(vector<T> &offsets, T shift)
00622 {
00623 for (size_t i = 0; i < offsets.size(); ++i) {
00624 shift += offsets[i];
00625 offsets[i] += shift;
00626 }
00627 }
00628
00629 size_t IOWrapper::OutputAlignmentNBest(Alignments &retAlign, const Moses::ChartTrellisNode &node, size_t startTarget)
00630 {
00631 const ChartHypothesis *hypo = &node.GetHypothesis();
00632
00633 size_t totalTargetSize = 0;
00634 size_t startSource = hypo->GetCurrSourceRange().GetStartPos();
00635
00636 const TargetPhrase &tp = hypo->GetCurrTargetPhrase();
00637
00638 vector<size_t> sourceOffsets(hypo->GetCurrSourceRange().GetNumWordsCovered(), 0);
00639 vector<size_t> targetOffsets(tp.GetSize(), 0);
00640
00641 const ChartTrellisNode::NodeChildren &prevNodes = node.GetChildren();
00642
00643 const AlignmentInfo &aiNonTerm = hypo->GetCurrTargetPhrase().GetAlignNonTerm();
00644 vector<size_t> sourceInd2pos = aiNonTerm.GetSourceIndex2PosMap();
00645 const AlignmentInfo::NonTermIndexMap &targetPos2SourceInd = aiNonTerm.GetNonTermIndexMap();
00646
00647 CHECK(sourceInd2pos.size() == prevNodes.size());
00648
00649 size_t targetInd = 0;
00650 for (size_t targetPos = 0; targetPos < tp.GetSize(); ++targetPos) {
00651 if (tp.GetWord(targetPos).IsNonTerminal()) {
00652 CHECK(targetPos < targetPos2SourceInd.size());
00653 size_t sourceInd = targetPos2SourceInd[targetPos];
00654 size_t sourcePos = sourceInd2pos[sourceInd];
00655
00656 const ChartTrellisNode &prevNode = *prevNodes[sourceInd];
00657
00658
00659 size_t sourceSize = prevNode.GetHypothesis().GetCurrSourceRange().GetNumWordsCovered();
00660 sourceOffsets[sourcePos] = sourceSize;
00661
00662
00663 size_t currStartTarget = startTarget + totalTargetSize;
00664 size_t targetSize = OutputAlignmentNBest(retAlign, prevNode, currStartTarget);
00665 targetOffsets[targetPos] = targetSize;
00666
00667 totalTargetSize += targetSize;
00668 ++targetInd;
00669 }
00670 else {
00671 ++totalTargetSize;
00672 }
00673 }
00674
00675
00676 ShiftOffsets(sourceOffsets, startSource);
00677 ShiftOffsets(targetOffsets, startTarget);
00678
00679
00680 vector< set<size_t> > retAlignmentsS2T(hypo->GetCurrSourceRange().GetNumWordsCovered());
00681 const AlignmentInfo &aiTerm = hypo->GetCurrTargetPhrase().GetAlignTerm();
00682 OutputAlignment(retAlignmentsS2T, aiTerm);
00683
00684
00685 for (size_t source = 0; source < retAlignmentsS2T.size(); ++source) {
00686 const set<size_t> &targets = retAlignmentsS2T[source];
00687 set<size_t>::const_iterator iter;
00688 for (iter = targets.begin(); iter != targets.end(); ++iter) {
00689 size_t target = *iter;
00690 pair<size_t, size_t> alignPoint(source + sourceOffsets[source]
00691 ,target + targetOffsets[target]);
00692 pair<Alignments::iterator, bool> ret = retAlign.insert(alignPoint);
00693 CHECK(ret.second);
00694
00695 }
00696 }
00697
00698 return totalTargetSize;
00699 }
00700
00701 void IOWrapper::OutputAlignment(size_t translationId , const Moses::ChartHypothesis *hypo)
00702 {
00703 ostringstream out;
00704
00705 Alignments retAlign;
00706 OutputAlignment(retAlign, hypo, 0);
00707
00708
00709 Alignments::const_iterator iter;
00710 for (iter = retAlign.begin(); iter != retAlign.end(); ++iter) {
00711 const pair<size_t, size_t> &alignPoint = *iter;
00712 out << alignPoint.first << "-" << alignPoint.second << " ";
00713 }
00714 out << endl;
00715
00716 m_alignmentInfoCollector->Write(translationId, out.str());
00717 }
00718
00719 size_t IOWrapper::OutputAlignment(Alignments &retAlign, const Moses::ChartHypothesis *hypo, size_t startTarget)
00720 {
00721 size_t totalTargetSize = 0;
00722 size_t startSource = hypo->GetCurrSourceRange().GetStartPos();
00723
00724 const TargetPhrase &tp = hypo->GetCurrTargetPhrase();
00725
00726 vector<size_t> sourceOffsets(hypo->GetCurrSourceRange().GetNumWordsCovered(), 0);
00727 vector<size_t> targetOffsets(tp.GetSize(), 0);
00728
00729 const vector<const ChartHypothesis*> &prevHypos = hypo->GetPrevHypos();
00730
00731 const AlignmentInfo &aiNonTerm = hypo->GetCurrTargetPhrase().GetAlignNonTerm();
00732 vector<size_t> sourceInd2pos = aiNonTerm.GetSourceIndex2PosMap();
00733 const AlignmentInfo::NonTermIndexMap &targetPos2SourceInd = aiNonTerm.GetNonTermIndexMap();
00734
00735 CHECK(sourceInd2pos.size() == prevHypos.size());
00736
00737 size_t targetInd = 0;
00738 for (size_t targetPos = 0; targetPos < tp.GetSize(); ++targetPos) {
00739 if (tp.GetWord(targetPos).IsNonTerminal()) {
00740 CHECK(targetPos < targetPos2SourceInd.size());
00741 size_t sourceInd = targetPos2SourceInd[targetPos];
00742 size_t sourcePos = sourceInd2pos[sourceInd];
00743
00744 const ChartHypothesis *prevHypo = prevHypos[sourceInd];
00745
00746
00747 size_t sourceSize = prevHypo->GetCurrSourceRange().GetNumWordsCovered();
00748 sourceOffsets[sourcePos] = sourceSize;
00749
00750
00751 size_t currStartTarget = startTarget + totalTargetSize;
00752 size_t targetSize = OutputAlignment(retAlign, prevHypo, currStartTarget);
00753 targetOffsets[targetPos] = targetSize;
00754
00755 totalTargetSize += targetSize;
00756 ++targetInd;
00757 }
00758 else {
00759 ++totalTargetSize;
00760 }
00761 }
00762
00763
00764 ShiftOffsets(sourceOffsets, startSource);
00765 ShiftOffsets(targetOffsets, startTarget);
00766
00767
00768 vector< set<size_t> > retAlignmentsS2T(hypo->GetCurrSourceRange().GetNumWordsCovered());
00769 const AlignmentInfo &aiTerm = hypo->GetCurrTargetPhrase().GetAlignTerm();
00770 OutputAlignment(retAlignmentsS2T, aiTerm);
00771
00772
00773 for (size_t source = 0; source < retAlignmentsS2T.size(); ++source) {
00774 const set<size_t> &targets = retAlignmentsS2T[source];
00775 set<size_t>::const_iterator iter;
00776 for (iter = targets.begin(); iter != targets.end(); ++iter) {
00777 size_t target = *iter;
00778 pair<size_t, size_t> alignPoint(source + sourceOffsets[source]
00779 ,target + targetOffsets[target]);
00780 pair<Alignments::iterator, bool> ret = retAlign.insert(alignPoint);
00781 CHECK(ret.second);
00782
00783 }
00784 }
00785
00786 return totalTargetSize;
00787 }
00788
00789 void IOWrapper::OutputAlignment(vector< set<size_t> > &retAlignmentsS2T, const AlignmentInfo &ai)
00790 {
00791 typedef std::vector< const std::pair<size_t,size_t>* > AlignVec;
00792 AlignVec alignments = ai.GetSortedAlignments();
00793
00794 AlignVec::const_iterator it;
00795 for (it = alignments.begin(); it != alignments.end(); ++it) {
00796 const std::pair<size_t,size_t> &alignPoint = **it;
00797
00798 CHECK(alignPoint.first < retAlignmentsS2T.size());
00799 pair<set<size_t>::iterator, bool> ret = retAlignmentsS2T[alignPoint.first].insert(alignPoint.second);
00800 CHECK(ret.second);
00801 }
00802 }
00803
00804 }
00805