00001
00002
00003 #include <iostream>
00004 #include <sstream>
00005 #include <algorithm>
00006 #include "moses/StaticData.h"
00007 #include "moses/Hypothesis.h"
00008 #include "TranslationAnalysis.h"
00009
00010 using namespace Moses;
00011
00012 namespace TranslationAnalysis
00013 {
00014
00015 void PrintTranslationAnalysis(std::ostream &os, const Hypothesis* hypo)
00016 {
00017 os << std::endl << "TRANSLATION HYPOTHESIS DETAILS:" << std::endl;
00018 std::vector<const Hypothesis*> translationPath;
00019
00020 while (hypo) {
00021 translationPath.push_back(hypo);
00022 hypo = hypo->GetPrevHypo();
00023 }
00024
00025 std::reverse(translationPath.begin(), translationPath.end());
00026 std::vector<std::string> droppedWords;
00027 std::vector<const Hypothesis*>::iterator tpi = translationPath.begin();
00028 if(tpi == translationPath.end())
00029 return;
00030 ++tpi;
00031 std::vector<std::string> sourceMap;
00032 std::vector<std::string> targetMap;
00033 std::vector<unsigned int> lmAcc(0);
00034 size_t lmCalls = 0;
00035 bool doLMStats = ((*tpi)->GetLMStats() != 0);
00036 if (doLMStats)
00037 lmAcc.resize((*tpi)->GetLMStats()->size(), 0);
00038 for (; tpi != translationPath.end(); ++tpi) {
00039 std::ostringstream sms;
00040 std::ostringstream tms;
00041 std::string target = (*tpi)->GetTargetPhraseStringRep();
00042 std::string source = (*tpi)->GetSourcePhraseStringRep();
00043 WordsRange twr = (*tpi)->GetCurrTargetWordsRange();
00044 WordsRange swr = (*tpi)->GetCurrSourceWordsRange();
00045 const AlignmentInfo &alignmentInfo = (*tpi)->GetCurrTargetPhrase().GetAlignTerm();
00046
00047 if (doLMStats) {
00048 std::vector<std::vector<unsigned int> >& lmstats = *(*tpi)->GetLMStats();
00049 std::vector<std::vector<unsigned int> >::iterator i = lmstats.begin();
00050 std::vector<unsigned int>::iterator acc = lmAcc.begin();
00051
00052 for (; i != lmstats.end(); ++i, ++acc) {
00053 std::vector<unsigned int>::iterator j = i->begin();
00054 lmCalls += i->size();
00055 for (; j != i->end(); ++j) {
00056 (*acc) += *j;
00057 }
00058 }
00059 }
00060
00061 bool epsilon = false;
00062 if (target == "") {
00063 target="<EPSILON>";
00064 epsilon = true;
00065 droppedWords.push_back(source);
00066 }
00067 os << " SOURCE: " << swr << " " << source << std::endl
00068 << " TRANSLATED AS: " << target << std::endl
00069 << " WORD ALIGNED: " << alignmentInfo << std::endl;
00070 size_t twr_i = twr.GetStartPos();
00071 size_t swr_i = swr.GetStartPos();
00072 if (!epsilon) {
00073 sms << twr_i;
00074 }
00075 if (epsilon) {
00076 tms << "del(" << swr_i << ")";
00077 } else {
00078 tms << swr_i;
00079 }
00080 swr_i++;
00081 twr_i++;
00082 for (; twr_i <= twr.GetEndPos() && twr.GetEndPos() != NOT_FOUND; twr_i++) {
00083 sms << '-' << twr_i;
00084 }
00085 for (; swr_i <= swr.GetEndPos() && swr.GetEndPos() != NOT_FOUND; swr_i++) {
00086 tms << '-' << swr_i;
00087 }
00088 if (!epsilon) targetMap.push_back(sms.str());
00089 sourceMap.push_back(tms.str());
00090 }
00091 std::vector<std::string>::iterator si = sourceMap.begin();
00092 std::vector<std::string>::iterator ti = targetMap.begin();
00093 os << std::endl << "SOURCE/TARGET SPANS:";
00094 os << std::endl << " SOURCE:";
00095 for (; si != sourceMap.end(); ++si) {
00096 os << " " << *si;
00097 }
00098 os << std::endl << " TARGET:";
00099 for (; ti != targetMap.end(); ++ti) {
00100 os << " " << *ti;
00101 }
00102 os << std::endl << std::endl;
00103 if (doLMStats && lmCalls > 0) {
00104 std::vector<unsigned int>::iterator acc = lmAcc.begin();
00105
00106 const std::vector<const StatefulFeatureFunction*> &statefulFFs = StatefulFeatureFunction::GetStatefulFeatureFunctions();
00107 for (size_t i = 0; i < statefulFFs.size(); ++i) {
00108 const StatefulFeatureFunction *ff = statefulFFs[i];
00109 const LanguageModel *lm = dynamic_cast<const LanguageModel*>(ff);
00110
00111 if (lm) {
00112 char buf[256];
00113 sprintf(buf, "%.4f", (float)(*acc)/(float)lmCalls);
00114 os << lm->GetScoreProducerDescription() <<", AVG N-GRAM LENGTH: " << buf << std::endl;
00115
00116 ++acc;
00117 }
00118 }
00119 }
00120
00121 if (droppedWords.size() > 0) {
00122 std::vector<std::string>::iterator dwi = droppedWords.begin();
00123 os << std::endl << "WORDS/PHRASES DROPPED:" << std::endl;
00124 for (; dwi != droppedWords.end(); ++dwi) {
00125 os << "\tdropped=" << *dwi << std::endl;
00126 }
00127 }
00128 os << std::endl << "SCORES (UNWEIGHTED/WEIGHTED): ";
00129 os << translationPath.back()->GetScoreBreakdown();
00130 os << " weighted(TODO)";
00131 os << std::endl;
00132 }
00133
00134 }