00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00026 #include <exception>
00027 #include <fstream>
00028 #include <sstream>
00029 #include <vector>
00030
00031 #ifdef WIN32
00032
00033
00034 #endif
00035
00036 #include "Hypothesis.h"
00037 #include "IOWrapper.h"
00038 #include "LatticeMBR.h"
00039 #include "Manager.h"
00040 #include "StaticData.h"
00041 #include "Util.h"
00042 #include "mbr.h"
00043 #include "ThreadPool.h"
00044 #include "TranslationAnalysis.h"
00045 #include "OutputCollector.h"
00046
00047 #ifdef HAVE_PROTOBUF
00048 #include "hypergraph.pb.h"
00049 #endif
00050
00051 using namespace std;
00052 using namespace Moses;
00053
00054
00055 static const size_t PRECISION = 3;
00056
00058 void fix(std::ostream& stream, size_t size)
00059 {
00060 stream.setf(std::ios::fixed);
00061 stream.precision(size);
00062 }
00063
00069 class TranslationTask : public Task
00070 {
00071
00072 public:
00073
00074 TranslationTask(size_t lineNumber,
00075 InputType* source, OutputCollector* outputCollector, OutputCollector* nbestCollector,
00076 OutputCollector* latticeSamplesCollector,
00077 OutputCollector* wordGraphCollector, OutputCollector* searchGraphCollector,
00078 OutputCollector* detailedTranslationCollector,
00079 OutputCollector* alignmentInfoCollector ) :
00080 m_source(source), m_lineNumber(lineNumber),
00081 m_outputCollector(outputCollector), m_nbestCollector(nbestCollector),
00082 m_latticeSamplesCollector(latticeSamplesCollector),
00083 m_wordGraphCollector(wordGraphCollector), m_searchGraphCollector(searchGraphCollector),
00084 m_detailedTranslationCollector(detailedTranslationCollector),
00085 m_alignmentInfoCollector(alignmentInfoCollector) {}
00086
00089 void Run() {
00090
00091
00092 #ifdef BOOST_HAS_PTHREADS
00093 TRACE_ERR("Translating line " << m_lineNumber << " in thread id " << pthread_self() << std::endl);
00094 #endif
00095
00096
00097 const StaticData &staticData = StaticData::Instance();
00098
00099 Sentence sentence();
00100
00101 const TranslationSystem& system = staticData.GetTranslationSystem(TranslationSystem::DEFAULT);
00102
00103
00104
00105
00106 Manager manager(*m_source,staticData.GetSearchAlgorithm(), &system);
00107 manager.ProcessSentence();
00108
00109
00110 if (m_wordGraphCollector) {
00111 ostringstream out;
00112 fix(out,PRECISION);
00113 manager.GetWordGraph(m_lineNumber, out);
00114 m_wordGraphCollector->Write(m_lineNumber, out.str());
00115 }
00116
00117
00118 if (m_searchGraphCollector) {
00119 ostringstream out;
00120 fix(out,PRECISION);
00121 manager.OutputSearchGraph(m_lineNumber, out);
00122 m_searchGraphCollector->Write(m_lineNumber, out.str());
00123
00124 #ifdef HAVE_PROTOBUF
00125 if (staticData.GetOutputSearchGraphPB()) {
00126 ostringstream sfn;
00127 sfn << staticData.GetParam("output-search-graph-pb")[0] << '/' << m_lineNumber << ".pb" << ends;
00128 string fn = sfn.str();
00129 VERBOSE(2, "Writing search graph to " << fn << endl);
00130 fstream output(fn.c_str(), ios::trunc | ios::binary | ios::out);
00131 manager.SerializeSearchGraphPB(m_lineNumber, output);
00132 }
00133 #endif
00134 }
00135
00136
00137 if (m_outputCollector) {
00138 ostringstream out;
00139 ostringstream debug;
00140 fix(debug,PRECISION);
00141
00142
00143 if (staticData.PrintAllDerivations()) {
00144 manager.PrintAllDerivations(m_lineNumber, debug);
00145 }
00146
00147
00148 const Hypothesis* bestHypo = NULL;
00149 if (!staticData.UseMBR())
00150 {
00151 bestHypo = manager.GetBestHypothesis();
00152 if (bestHypo) {
00153 if (staticData.IsPathRecoveryEnabled()) {
00154 OutputInput(out, bestHypo);
00155 out << "||| ";
00156 }
00157 OutputSurface(
00158 out,
00159 bestHypo,
00160 staticData.GetOutputFactorOrder(),
00161 staticData.GetReportSegmentation(),
00162 staticData.GetReportAllFactors());
00163 OutputAlignment(m_alignmentInfoCollector, m_lineNumber, bestHypo);
00164 IFVERBOSE(1) {
00165 debug << "BEST TRANSLATION: " << *bestHypo << endl;
00166 }
00167 }
00168 out << endl;
00169 }
00170
00171
00172 else
00173 {
00174
00175 size_t nBestSize = staticData.GetMBRSize();
00176 if (nBestSize <= 0) {
00177 cerr << "ERROR: negative size for number of MBR candidate translations not allowed (option mbr-size)" << endl;
00178 exit(1);
00179 }
00180 TrellisPathList nBestList;
00181 manager.CalcNBest(nBestSize, nBestList,true);
00182 VERBOSE(2,"size of n-best: " << nBestList.GetSize() << " (" << nBestSize << ")" << endl);
00183 IFVERBOSE(2) {
00184 PrintUserTime("calculated n-best list for (L)MBR decoding");
00185 }
00186
00187
00188 if (staticData.UseLatticeMBR()) {
00189 if (m_nbestCollector) {
00190
00191 vector<LatticeMBRSolution> solutions;
00192 size_t n = min(nBestSize, staticData.GetNBestSize());
00193 getLatticeMBRNBest(manager,nBestList,solutions,n);
00194 ostringstream out;
00195 OutputLatticeMBRNBest(out, solutions,m_lineNumber);
00196 m_nbestCollector->Write(m_lineNumber, out.str());
00197 } else {
00198
00199 vector<Word> mbrBestHypo = doLatticeMBR(manager,nBestList);
00200 OutputBestHypo(mbrBestHypo, m_lineNumber, staticData.GetReportSegmentation(),
00201 staticData.GetReportAllFactors(),out);
00202 IFVERBOSE(2) {
00203 PrintUserTime("finished Lattice MBR decoding");
00204 }
00205 }
00206 }
00207
00208
00209 else if (staticData.UseConsensusDecoding()) {
00210 const TrellisPath &conBestHypo = doConsensusDecoding(manager,nBestList);
00211 OutputBestHypo(conBestHypo, m_lineNumber,
00212 staticData.GetReportSegmentation(),
00213 staticData.GetReportAllFactors(),out);
00214 OutputAlignment(m_alignmentInfoCollector, m_lineNumber, conBestHypo);
00215 IFVERBOSE(2) {
00216 PrintUserTime("finished Consensus decoding");
00217 }
00218 }
00219
00220
00221 else {
00222 const Moses::TrellisPath &mbrBestHypo = doMBR(nBestList);
00223 OutputBestHypo(mbrBestHypo, m_lineNumber,
00224 staticData.GetReportSegmentation(),
00225 staticData.GetReportAllFactors(),out);
00226 OutputAlignment(m_alignmentInfoCollector, m_lineNumber, mbrBestHypo);
00227 IFVERBOSE(2) {
00228 PrintUserTime("finished MBR decoding");
00229 }
00230 }
00231 }
00232
00233
00234 m_outputCollector->Write(m_lineNumber,out.str(),debug.str());
00235 }
00236
00237
00238 if (m_nbestCollector && !staticData.UseLatticeMBR()) {
00239 TrellisPathList nBestList;
00240 ostringstream out;
00241 manager.CalcNBest(staticData.GetNBestSize(), nBestList,staticData.GetDistinctNBest());
00242 OutputNBest(out,nBestList, staticData.GetOutputFactorOrder(), manager.GetTranslationSystem(), m_lineNumber);
00243 m_nbestCollector->Write(m_lineNumber, out.str());
00244 }
00245
00246
00247 if (m_latticeSamplesCollector) {
00248 TrellisPathList latticeSamples;
00249 ostringstream out;
00250 manager.CalcLatticeSamples(staticData.GetLatticeSamplesSize(), latticeSamples);
00251 OutputNBest(out,latticeSamples, staticData.GetOutputFactorOrder(), manager.GetTranslationSystem(), m_lineNumber);
00252 m_latticeSamplesCollector->Write(m_lineNumber, out.str());
00253 }
00254
00255
00256 if (m_detailedTranslationCollector) {
00257 ostringstream out;
00258 fix(out,PRECISION);
00259 TranslationAnalysis::PrintTranslationAnalysis(manager.GetTranslationSystem(), out, manager.GetBestHypothesis());
00260 m_detailedTranslationCollector->Write(m_lineNumber,out.str());
00261 }
00262
00263
00264 IFVERBOSE(2) {
00265 PrintUserTime("Sentence Decoding Time:");
00266 }
00267 manager.CalcDecoderStatistics();
00268 }
00269
00270 ~TranslationTask() {
00271 delete m_source;
00272 }
00273
00274 private:
00275 InputType* m_source;
00276 size_t m_lineNumber;
00277 OutputCollector* m_outputCollector;
00278 OutputCollector* m_nbestCollector;
00279 OutputCollector* m_latticeSamplesCollector;
00280 OutputCollector* m_wordGraphCollector;
00281 OutputCollector* m_searchGraphCollector;
00282 OutputCollector* m_detailedTranslationCollector;
00283 OutputCollector* m_alignmentInfoCollector;
00284 std::ofstream *m_alignmentStream;
00285
00286
00287 };
00288
00289 static void PrintFeatureWeight(const FeatureFunction* ff)
00290 {
00291
00292 size_t weightStart = StaticData::Instance().GetScoreIndexManager().GetBeginIndex(ff->GetScoreBookkeepingID());
00293 size_t weightEnd = StaticData::Instance().GetScoreIndexManager().GetEndIndex(ff->GetScoreBookkeepingID());
00294 for (size_t i = weightStart; i < weightEnd; ++i) {
00295 cout << ff->GetScoreProducerDescription(i-weightStart) << " " << ff->GetScoreProducerWeightShortName(i-weightStart) << " "
00296 << StaticData::Instance().GetAllWeights()[i] << endl;
00297 }
00298 }
00299
00300
00301 static void ShowWeights()
00302 {
00303 fix(cout,6);
00304 const StaticData& staticData = StaticData::Instance();
00305 const TranslationSystem& system = staticData.GetTranslationSystem(TranslationSystem::DEFAULT);
00306 const vector<const StatelessFeatureFunction*>& slf =system.GetStatelessFeatureFunctions();
00307 const vector<const StatefulFeatureFunction*>& sff = system.GetStatefulFeatureFunctions();
00308 const vector<PhraseDictionaryFeature*>& pds = system.GetPhraseDictionaries();
00309 const vector<GenerationDictionary*>& gds = system.GetGenerationDictionaries();
00310 for (size_t i = 0; i < sff.size(); ++i) {
00311 PrintFeatureWeight(sff[i]);
00312 }
00313 for (size_t i = 0; i < slf.size(); ++i) {
00314 PrintFeatureWeight(slf[i]);
00315 }
00316 for (size_t i = 0; i < pds.size(); ++i) {
00317 PrintFeatureWeight(pds[i]);
00318 }
00319 for (size_t i = 0; i < gds.size(); ++i) {
00320 PrintFeatureWeight(gds[i]);
00321 }
00322 }
00323
00325 int main(int argc, char** argv)
00326 {
00327 try {
00328
00329 #ifdef HAVE_PROTOBUF
00330 GOOGLE_PROTOBUF_VERIFY_VERSION;
00331 #endif
00332
00333
00334 IFVERBOSE(1) {
00335 TRACE_ERR("command: ");
00336 for(int i=0; i<argc; ++i) TRACE_ERR(argv[i]<<" ");
00337 TRACE_ERR(endl);
00338 }
00339
00340
00341 fix(cout,PRECISION);
00342 fix(cerr,PRECISION);
00343
00344
00345
00346 Parameter* params = new Parameter();
00347 if (!params->LoadParam(argc,argv)) {
00348 params->Explain();
00349 exit(1);
00350 }
00351
00352
00353
00354
00355 if (!StaticData::LoadDataStatic(params)) {
00356 exit(1);
00357 }
00358
00359
00360 if (params->isParamSpecified("show-weights")) {
00361 ShowWeights();
00362 exit(0);
00363 }
00364
00365
00366 const StaticData& staticData = StaticData::Instance();
00367
00368
00369
00370 srand(time(NULL));
00371
00372
00373 IOWrapper* ioWrapper = GetIODevice(staticData);
00374 if (!ioWrapper) {
00375 cerr << "Error; Failed to create IO object" << endl;
00376 exit(1);
00377 }
00378
00379
00380 vector<float> weights = staticData.GetAllWeights();
00381 IFVERBOSE(2) {
00382 TRACE_ERR("The score component vector looks like this:\n" << staticData.GetScoreIndexManager());
00383 TRACE_ERR("The global weight vector looks like this:");
00384 for (size_t j=0; j<weights.size(); j++) {
00385 TRACE_ERR(" " << weights[j]);
00386 }
00387 TRACE_ERR("\n");
00388 }
00389
00390 if(weights.size() != staticData.GetScoreIndexManager().GetTotalNumberOfScores()) {
00391 TRACE_ERR("ERROR: " << staticData.GetScoreIndexManager().GetTotalNumberOfScores() << " score components, but " << weights.size() << " weights defined" << std::endl);
00392 exit(1);
00393 }
00394
00395
00396
00397
00398 auto_ptr<OutputCollector> outputCollector;
00399 auto_ptr<OutputCollector> nbestCollector;
00400 auto_ptr<OutputCollector> latticeSamplesCollector;
00401 auto_ptr<ofstream> nbestOut;
00402 auto_ptr<ofstream> latticeSamplesOut;
00403 size_t nbestSize = staticData.GetNBestSize();
00404 string nbestFile = staticData.GetNBestFilePath();
00405 bool output1best = true;
00406 if (nbestSize) {
00407 if (nbestFile == "-" || nbestFile == "/dev/stdout") {
00408
00409 nbestCollector.reset(new OutputCollector());
00410 output1best = false;
00411 } else {
00412
00413 nbestOut.reset(new ofstream(nbestFile.c_str()));
00414 if (!nbestOut->good()) {
00415 TRACE_ERR("ERROR: Failed to open " << nbestFile << " for nbest lists" << endl);
00416 exit(1);
00417 }
00418 nbestCollector.reset(new OutputCollector(nbestOut.get()));
00419 }
00420 }
00421 size_t latticeSamplesSize = staticData.GetLatticeSamplesSize();
00422 string latticeSamplesFile = staticData.GetLatticeSamplesFilePath();
00423 if (latticeSamplesSize) {
00424 if (latticeSamplesFile == "-" || latticeSamplesFile == "/dev/stdout") {
00425 latticeSamplesCollector.reset(new OutputCollector());
00426 output1best = false;
00427 } else {
00428 latticeSamplesOut.reset(new ofstream(latticeSamplesFile.c_str()));
00429 if (!latticeSamplesOut->good()) {
00430 TRACE_ERR("ERROR: Failed to open " << latticeSamplesFile << " for lattice samples" << endl);
00431 exit(1);
00432 }
00433 latticeSamplesCollector.reset(new OutputCollector(latticeSamplesOut.get()));
00434 }
00435 }
00436 if (output1best) {
00437 outputCollector.reset(new OutputCollector());
00438 }
00439
00440
00441 auto_ptr<OutputCollector> wordGraphCollector;
00442 if (staticData.GetOutputWordGraph()) {
00443 wordGraphCollector.reset(new OutputCollector(&(ioWrapper->GetOutputWordGraphStream())));
00444 }
00445
00446
00447
00448 auto_ptr<OutputCollector> searchGraphCollector;
00449 if (staticData.GetOutputSearchGraph()) {
00450 searchGraphCollector.reset(new OutputCollector(&(ioWrapper->GetOutputSearchGraphStream())));
00451 }
00452
00453
00454 auto_ptr<OutputCollector> detailedTranslationCollector;
00455 if (staticData.IsDetailedTranslationReportingEnabled()) {
00456 detailedTranslationCollector.reset(new OutputCollector(&(ioWrapper->GetDetailedTranslationReportingStream())));
00457 }
00458
00459
00460 auto_ptr<OutputCollector> alignmentInfoCollector;
00461 if (!staticData.GetAlignmentOutputFile().empty()) {
00462 alignmentInfoCollector.reset(new OutputCollector(ioWrapper->GetAlignmentOutputStream()));
00463 }
00464
00465 #ifdef WITH_THREADS
00466 ThreadPool pool(staticData.ThreadCount());
00467 #endif
00468
00469
00470 InputType* source = NULL;
00471 size_t lineCount = 0;
00472 while(ReadInput(*ioWrapper,staticData.GetInputType(),source)) {
00473 IFVERBOSE(1) {
00474 ResetUserTime();
00475 }
00476
00477 TranslationTask* task =
00478 new TranslationTask(lineCount,source, outputCollector.get(),
00479 nbestCollector.get(),
00480 latticeSamplesCollector.get(),
00481 wordGraphCollector.get(),
00482 searchGraphCollector.get(),
00483 detailedTranslationCollector.get(),
00484 alignmentInfoCollector.get() );
00485
00486 #ifdef WITH_THREADS
00487 pool.Submit(task);
00488 #else
00489 task->Run();
00490 #endif
00491
00492 source = NULL;
00493 ++lineCount;
00494 }
00495
00496
00497 #ifdef WITH_THREADS
00498 pool.Stop(true);
00499 #endif
00500
00501 } catch (const std::exception &e) {
00502 std::cerr << "Exception: " << e.what() << std::endl;
00503 return EXIT_FAILURE;
00504 }
00505
00506 #ifndef EXIT_RETURN
00507
00508 exit(EXIT_SUCCESS);
00509 #else
00510 return EXIT_SUCCESS;
00511 #endif
00512 }