00001 #include <sstream>
00002 #include "Manager.h"
00003 #include "PVertex.h"
00004 #include "moses/OutputCollector.h"
00005 #include "moses/Util.h"
00006
00007 namespace Moses
00008 {
00009 namespace Syntax
00010 {
00011
00012 Manager::Manager(ttasksptr const& ttask)
00013 : Moses::BaseManager(ttask)
00014 { }
00015
00016 void Manager::OutputBest(OutputCollector *collector) const
00017 {
00018 if (!collector) {
00019 return;
00020 }
00021 std::ostringstream out;
00022 FixPrecision(out);
00023 const SHyperedge *best = GetBestSHyperedge();
00024 if (best == NULL) {
00025 VERBOSE(1, "NO BEST TRANSLATION" << std::endl);
00026 if (options()->output.ReportHypoScore) {
00027 out << "0 ";
00028 }
00029 out << '\n';
00030 } else {
00031 if (options()->output.ReportHypoScore) {
00032 out << best->label.futureScore << " ";
00033 }
00034 Phrase yield = GetOneBestTargetYield(*best);
00035
00036 UTIL_THROW_IF2(yield.GetSize() < 2,
00037 "Output phrase should have contained at least 2 words (beginning and end-of-sentence)");
00038 yield.RemoveWord(0);
00039 yield.RemoveWord(yield.GetSize()-1);
00040 out << yield.GetStringRep(options()->output.factor_order);
00041 out << '\n';
00042 }
00043 collector->Write(m_source.GetTranslationId(), out.str());
00044 }
00045
00046 void Manager::OutputNBest(OutputCollector *collector) const
00047 {
00048 if (collector) {
00049 long translationId = m_source.GetTranslationId();
00050 KBestExtractor::KBestVec nBestList;
00051 ExtractKBest(options()->nbest.nbest_size, nBestList,
00052 options()->nbest.only_distinct);
00053 OutputNBestList(collector, nBestList, translationId);
00054 }
00055 }
00056
00057 void Manager::OutputUnknowns(OutputCollector *collector) const
00058 {
00059 if (collector) {
00060 long translationId = m_source.GetTranslationId();
00061
00062 std::ostringstream out;
00063 for (boost::unordered_set<Moses::Word>::const_iterator p = m_oovs.begin();
00064 p != m_oovs.end(); ++p) {
00065 out << *p;
00066 }
00067 out << std::endl;
00068 collector->Write(translationId, out.str());
00069 }
00070 }
00071
00072 void Manager::OutputNBestList(OutputCollector *collector,
00073 const KBestExtractor::KBestVec &nBestList,
00074 long translationId) const
00075 {
00076 const std::vector<FactorType> &outputFactorOrder = options()->output.factor_order;
00077
00078 std::ostringstream out;
00079
00080 if (collector->OutputIsCout()) {
00081
00082
00083 FixPrecision(out);
00084 }
00085
00086 bool includeWordAlignment = options()->nbest.include_alignment_info;
00087 bool PrintNBestTrees = options()->nbest.print_trees;
00088
00089 for (KBestExtractor::KBestVec::const_iterator p = nBestList.begin();
00090 p != nBestList.end(); ++p) {
00091 const KBestExtractor::Derivation &derivation = **p;
00092
00093
00094 Phrase outputPhrase = KBestExtractor::GetOutputPhrase(derivation);
00095
00096
00097 UTIL_THROW_IF2(outputPhrase.GetSize() < 2,
00098 "Output phrase should have contained at least 2 words (beginning and end-of-sentence)");
00099 outputPhrase.RemoveWord(0);
00100 outputPhrase.RemoveWord(outputPhrase.GetSize() - 1);
00101
00102
00103 out << translationId << " ||| ";
00104 OutputSurface(out, outputPhrase);
00105 out << " ||| ";
00106 bool with_labels = options()->nbest.include_feature_labels;
00107 derivation.scoreBreakdown.OutputAllFeatureScores(out, with_labels);
00108 out << " ||| " << derivation.score;
00109
00110
00111 if (includeWordAlignment) {
00112 out << " ||| ";
00113 Alignments align;
00114 OutputAlignmentNBest(align, derivation, 0);
00115 for (Alignments::const_iterator q = align.begin(); q != align.end();
00116 ++q) {
00117 out << q->first << "-" << q->second << " ";
00118 }
00119 }
00120
00121
00122 if (PrintNBestTrees) {
00123 TreePointer tree = KBestExtractor::GetOutputTree(derivation);
00124 out << " ||| " << tree->GetString();
00125 }
00126
00127 out << std::endl;
00128 }
00129
00130 assert(collector);
00131 collector->Write(translationId, out.str());
00132 }
00133
00134 std::size_t Manager::OutputAlignmentNBest(
00135 Alignments &retAlign,
00136 const KBestExtractor::Derivation &derivation,
00137 std::size_t startTarget) const
00138 {
00139 const SHyperedge ­peredge = derivation.edge->shyperedge;
00140
00141 std::size_t totalTargetSize = 0;
00142 std::size_t startSource = shyperedge.head->pvertex->span.GetStartPos();
00143
00144 const TargetPhrase &tp = *(shyperedge.label.translation);
00145
00146 std::size_t thisSourceSize = CalcSourceSize(derivation);
00147
00148
00149
00150 std::vector<std::size_t> sourceOffsets(thisSourceSize, 0);
00151 std::vector<std::size_t> targetOffsets(tp.GetSize(), 0);
00152
00153 const AlignmentInfo &aiNonTerm =
00154 shyperedge.label.translation->GetAlignNonTerm();
00155 std::vector<std::size_t> sourceInd2pos = aiNonTerm.GetSourceIndex2PosMap();
00156 const AlignmentInfo::NonTermIndexMap &targetPos2SourceInd =
00157 aiNonTerm.GetNonTermIndexMap();
00158
00159 UTIL_THROW_IF2(sourceInd2pos.size() != derivation.subderivations.size(),
00160 "Error");
00161
00162 std::size_t targetInd = 0;
00163 for (std::size_t targetPos = 0; targetPos < tp.GetSize(); ++targetPos) {
00164 if (tp.GetWord(targetPos).IsNonTerminal()) {
00165 UTIL_THROW_IF2(targetPos >= targetPos2SourceInd.size(), "Error");
00166 std::size_t sourceInd = targetPos2SourceInd[targetPos];
00167 std::size_t sourcePos = sourceInd2pos[sourceInd];
00168
00169 const KBestExtractor::Derivation &subderivation =
00170 *derivation.subderivations[sourceInd];
00171
00172
00173 std::size_t sourceSize =
00174 subderivation.edge->head->svertex.pvertex->span.GetNumWordsCovered();
00175 sourceOffsets[sourcePos] = sourceSize;
00176
00177
00178
00179 std::size_t currStartTarget = startTarget + totalTargetSize;
00180 std::size_t targetSize = OutputAlignmentNBest(retAlign, subderivation,
00181 currStartTarget);
00182 targetOffsets[targetPos] = targetSize;
00183
00184 totalTargetSize += targetSize;
00185 ++targetInd;
00186 } else {
00187 ++totalTargetSize;
00188 }
00189 }
00190
00191
00192
00193 ShiftOffsets(sourceOffsets, startSource);
00194 ShiftOffsets(targetOffsets, startTarget);
00195
00196
00197 const AlignmentInfo &aiTerm = shyperedge.label.translation->GetAlignTerm();
00198
00199
00200 AlignmentInfo::const_iterator iter;
00201 for (iter = aiTerm.begin(); iter != aiTerm.end(); ++iter) {
00202 const std::pair<std::size_t, std::size_t> &align = *iter;
00203 std::size_t relSource = align.first;
00204 std::size_t relTarget = align.second;
00205 std::size_t absSource = sourceOffsets[relSource];
00206 std::size_t absTarget = targetOffsets[relTarget];
00207
00208 std::pair<std::size_t, std::size_t> alignPoint(absSource, absTarget);
00209 std::pair<Alignments::iterator, bool> ret = retAlign.insert(alignPoint);
00210 UTIL_THROW_IF2(!ret.second, "Error");
00211 }
00212
00213 return totalTargetSize;
00214 }
00215
00216 std::size_t Manager::CalcSourceSize(const KBestExtractor::Derivation &d) const
00217 {
00218 const SHyperedge ­peredge = d.edge->shyperedge;
00219 std::size_t ret = shyperedge.head->pvertex->span.GetNumWordsCovered();
00220 for (std::size_t i = 0; i < shyperedge.tail.size(); ++i) {
00221 std::size_t childSize =
00222 shyperedge.tail[i]->pvertex->span.GetNumWordsCovered();
00223 ret -= (childSize - 1);
00224 }
00225 return ret;
00226 }
00227
00228 }
00229 }