00001
00002
00003 #include <iostream>
00004 #include <sstream>
00005 #include <algorithm>
00006 #include "moses/StaticData.h"
00007 #include "moses/Hypothesis.h"
00008 #include "moses/ChartHypothesis.h"
00009 #include "TranslationAnalysis.h"
00010 #include "moses/FF/StatefulFeatureFunction.h"
00011 #include "moses/FF/StatelessFeatureFunction.h"
00012 #include "moses/LM/Base.h"
00013 #include "util/string_stream.hh"
00014
00015 using namespace Moses;
00016
00017 namespace TranslationAnalysis
00018 {
00019
00020 void PrintTranslationAnalysis(std::ostream &os, const Hypothesis* hypo)
00021 {
00022 os << std::endl << "TRANSLATION HYPOTHESIS DETAILS:" << std::endl;
00023 std::vector<const Hypothesis*> translationPath;
00024
00025 while (hypo) {
00026 translationPath.push_back(hypo);
00027 hypo = hypo->GetPrevHypo();
00028 }
00029
00030 std::reverse(translationPath.begin(), translationPath.end());
00031 std::vector<std::string> droppedWords;
00032 std::vector<const Hypothesis*>::iterator tpi = translationPath.begin();
00033 if(tpi == translationPath.end())
00034 return;
00035 ++tpi;
00036 std::vector<std::string> sourceMap;
00037 std::vector<std::string> targetMap;
00038 std::vector<unsigned int> lmAcc(0);
00039 size_t lmCalls = 0;
00040 bool doLMStats = ((*tpi)->GetLMStats() != 0);
00041 if (doLMStats)
00042 lmAcc.resize((*tpi)->GetLMStats()->size(), 0);
00043 for (; tpi != translationPath.end(); ++tpi) {
00044 util::StringStream sms;
00045
00046 util::StringStream tms;
00047 std::string target = (*tpi)->GetTargetPhraseStringRep();
00048 std::string source = (*tpi)->GetSourcePhraseStringRep();
00049 Range twr = (*tpi)->GetCurrTargetWordsRange();
00050 Range swr = (*tpi)->GetCurrSourceWordsRange();
00051 const AlignmentInfo &alignmentInfo = (*tpi)->GetCurrTargetPhrase().GetAlignTerm();
00052
00053 if (doLMStats) {
00054 std::vector<std::vector<unsigned int> >& lmstats = *(*tpi)->GetLMStats();
00055 std::vector<std::vector<unsigned int> >::iterator i = lmstats.begin();
00056 std::vector<unsigned int>::iterator acc = lmAcc.begin();
00057
00058 for (; i != lmstats.end(); ++i, ++acc) {
00059 std::vector<unsigned int>::iterator j = i->begin();
00060 lmCalls += i->size();
00061 for (; j != i->end(); ++j) {
00062 (*acc) += *j;
00063 }
00064 }
00065 }
00066
00067 bool epsilon = false;
00068 if (target == "") {
00069 target="<EPSILON>";
00070 epsilon = true;
00071 droppedWords.push_back(source);
00072 }
00073 os << " SOURCE: " << swr << " " << source << std::endl
00074 << " TRANSLATED AS: " << target << std::endl
00075 << " WORD ALIGNED: " << alignmentInfo << std::endl;
00076 size_t twr_i = twr.GetStartPos();
00077 size_t swr_i = swr.GetStartPos();
00078 if (!epsilon) {
00079 sms << twr_i;
00080 }
00081 if (epsilon) {
00082 tms << "del(" << swr_i << ")";
00083 } else {
00084 tms << swr_i;
00085 }
00086 swr_i++;
00087 twr_i++;
00088 for (; twr_i <= twr.GetEndPos() && twr.GetEndPos() != NOT_FOUND; twr_i++) {
00089 sms << '-' << twr_i;
00090 }
00091 for (; swr_i <= swr.GetEndPos() && swr.GetEndPos() != NOT_FOUND; swr_i++) {
00092 tms << '-' << swr_i;
00093 }
00094 if (!epsilon) targetMap.push_back(sms.str());
00095 sourceMap.push_back(tms.str());
00096 }
00097 std::vector<std::string>::iterator si = sourceMap.begin();
00098 std::vector<std::string>::iterator ti = targetMap.begin();
00099 os << std::endl << "SOURCE/TARGET SPANS:";
00100 os << std::endl << " SOURCE:";
00101 for (; si != sourceMap.end(); ++si) {
00102 os << " " << *si;
00103 }
00104 os << std::endl << " TARGET:";
00105 for (; ti != targetMap.end(); ++ti) {
00106 os << " " << *ti;
00107 }
00108 os << std::endl << std::endl;
00109 if (doLMStats && lmCalls > 0) {
00110 std::vector<unsigned int>::iterator acc = lmAcc.begin();
00111
00112 const std::vector<const StatefulFeatureFunction*> &statefulFFs = StatefulFeatureFunction::GetStatefulFeatureFunctions();
00113 for (size_t i = 0; i < statefulFFs.size(); ++i) {
00114 const StatefulFeatureFunction *ff = statefulFFs[i];
00115 const LanguageModel *lm = dynamic_cast<const LanguageModel*>(ff);
00116
00117 if (lm) {
00118 char buf[256];
00119 sprintf(buf, "%.4f", (float)(*acc)/(float)lmCalls);
00120 os << lm->GetScoreProducerDescription() <<", AVG N-GRAM LENGTH: " << buf << std::endl;
00121
00122 ++acc;
00123 }
00124 }
00125 }
00126
00127 if (droppedWords.size() > 0) {
00128 std::vector<std::string>::iterator dwi = droppedWords.begin();
00129 os << std::endl << "WORDS/PHRASES DROPPED:" << std::endl;
00130 for (; dwi != droppedWords.end(); ++dwi) {
00131 os << "\tdropped=" << *dwi << std::endl;
00132 }
00133 }
00134 os << std::endl << "SCORES (UNWEIGHTED/WEIGHTED): ";
00135 os << translationPath.back()->GetScoreBreakdown();
00136 os << " weighted(TODO)";
00137 os << std::endl;
00138 }
00139
00140 void PrintTranslationAnalysis(std::ostream &os, const Moses::ChartHypothesis* hypo)
00141 {
00142
00143
00144
00145
00146
00147
00148
00149
00150
00151
00152
00153
00154
00155
00156
00157
00158
00159
00160
00161
00162
00163
00164
00165
00166
00167 }
00168
00169 }