00001 #include "KBestExtractor.h"
00002
00003 #include "moses/ScoreComponentCollection.h"
00004 #include "moses/StaticData.h"
00005
00006 #include <boost/scoped_ptr.hpp>
00007
00008 #include <vector>
00009
00010 namespace Moses
00011 {
00012 namespace Syntax
00013 {
00014
00015
00016 void KBestExtractor::Extract(
00017 const std::vector<boost::shared_ptr<SVertex> > &topLevelVertices,
00018 std::size_t k, KBestVec &kBestList)
00019 {
00020 kBestList.clear();
00021 if (topLevelVertices.empty()) {
00022 return;
00023 }
00024
00025
00026
00027 std::vector<boost::shared_ptr<SVertex> >::const_iterator p =
00028 topLevelVertices.begin();
00029 SVertex &bestTopLevelVertex = **p;
00030 boost::scoped_ptr<SVertex> supremeVertex(new SVertex());
00031 supremeVertex->pvertex = 0;
00032 supremeVertex->best = new SHyperedge();
00033 supremeVertex->best->head = supremeVertex.get();
00034 supremeVertex->best->tail.push_back(&bestTopLevelVertex);
00035 supremeVertex->best->label.futureScore =
00036 bestTopLevelVertex.best->label.futureScore;
00037 supremeVertex->best->label.deltas = bestTopLevelVertex.best->label.deltas;
00038 supremeVertex->best->label.translation = 0;
00039
00040
00041
00042 for (++p; p != topLevelVertices.end(); ++p) {
00043
00044 UTIL_THROW_IF2((*p)->best->label.futureScore >
00045 bestTopLevelVertex.best->label.futureScore,
00046 "top-level SVertices are not correctly sorted");
00047
00048
00049 SHyperedge *altEdge = new SHyperedge();
00050 altEdge->head = supremeVertex.get();
00051 altEdge->tail.push_back((*p).get());
00052 altEdge->label.futureScore = (*p)->best->label.futureScore;
00053 altEdge->label.deltas = (*p)->best->label.deltas;
00054 altEdge->label.translation = 0;
00055 supremeVertex->recombined.push_back(altEdge);
00056 }
00057
00058
00059 boost::shared_ptr<KVertex> targetVertex = FindOrCreateVertex(*supremeVertex);
00060 LazyKthBest(targetVertex, k, k);
00061
00062
00063
00064 kBestList.reserve(targetVertex->kBestList.size());
00065 for (std::vector<boost::weak_ptr<Derivation> >::const_iterator
00066 q = targetVertex->kBestList.begin();
00067 q != targetVertex->kBestList.end(); ++q) {
00068 const boost::shared_ptr<Derivation> d(*q);
00069 assert(d);
00070 assert(d->subderivations.size() == 1);
00071 kBestList.push_back(d->subderivations[0]);
00072 }
00073 }
00074
00075
00076 Phrase KBestExtractor::GetOutputPhrase(const Derivation &d)
00077 {
00078 FactorType placeholderFactor = StaticData::Instance().options()->input.placeholder_factor;
00079
00080 Phrase ret(ARRAY_SIZE_INCR);
00081
00082 const TargetPhrase &phrase = *(d.edge->shyperedge.label.translation);
00083 const AlignmentInfo::NonTermIndexMap &nonTermIndexMap =
00084 phrase.GetAlignNonTerm().GetNonTermIndexMap();
00085 for (std::size_t pos = 0; pos < phrase.GetSize(); ++pos) {
00086 const Word &word = phrase.GetWord(pos);
00087 if (word.IsNonTerminal()) {
00088 std::size_t nonTermInd = nonTermIndexMap[pos];
00089 const Derivation &subderivation = *d.subderivations[nonTermInd];
00090 Phrase subPhrase = GetOutputPhrase(subderivation);
00091 ret.Append(subPhrase);
00092 } else {
00093 ret.AddWord(word);
00094 if (placeholderFactor == NOT_FOUND) {
00095 continue;
00096 }
00097
00098 UTIL_THROW2("placeholders are not currently supported by the S2T decoder");
00099
00100
00101
00102
00103
00104
00105
00106
00107
00108
00109
00110
00111
00112
00113
00114
00115
00116
00117 }
00118 }
00119
00120 return ret;
00121 }
00122
00123
00124 TreePointer KBestExtractor::GetOutputTree(const Derivation &d)
00125 {
00126 const TargetPhrase &phrase = *(d.edge->shyperedge.label.translation);
00127 if (const PhraseProperty *property = phrase.GetProperty("Tree")) {
00128 const std::string *tree = property->GetValueString();
00129 TreePointer mytree (boost::make_shared<InternalTree>(*tree));
00130
00131
00132 std::vector<TreePointer> previous_trees;
00133 for (size_t pos = 0; pos < phrase.GetSize(); ++pos) {
00134 const Word &word = phrase.GetWord(pos);
00135 if (word.IsNonTerminal()) {
00136 size_t nonTermInd = phrase.GetAlignNonTerm().GetNonTermIndexMap()[pos];
00137 const Derivation &subderivation = *d.subderivations[nonTermInd];
00138 const TreePointer prev_tree = GetOutputTree(subderivation);
00139 previous_trees.push_back(prev_tree);
00140 }
00141 }
00142
00143 mytree->Combine(previous_trees);
00144 return mytree;
00145 } else {
00146 UTIL_THROW2("Error: TreeStructureFeature active, but no internal tree structure found");
00147 }
00148 }
00149
00150
00151
00152 boost::shared_ptr<KBestExtractor::KVertex>
00153 KBestExtractor::FindOrCreateVertex(const SVertex &v)
00154 {
00155
00156 assert(v.best);
00157
00158 VertexMap::value_type element(&v, boost::shared_ptr<KVertex>());
00159 std::pair<VertexMap::iterator, bool> p = m_vertexMap.insert(element);
00160 boost::shared_ptr<KVertex> &sp = p.first->second;
00161 if (!p.second) {
00162 return sp;
00163 }
00164 sp.reset(new KVertex(v));
00165
00166 boost::shared_ptr<KHyperedge> bestEdge(new KHyperedge(*(v.best)));
00167 bestEdge->head = sp;
00168 std::size_t kTailSize = 0;
00169 for (std::size_t i = 0; i < v.best->tail.size(); ++i) {
00170 const SVertex *pred = v.best->tail[i];
00171 if (pred->best) {
00172 ++kTailSize;
00173 }
00174 }
00175 bestEdge->tail.reserve(kTailSize);
00176 for (std::size_t i = 0; i < v.best->tail.size(); ++i) {
00177 const SVertex *pred = v.best->tail[i];
00178 if (pred->best) {
00179 bestEdge->tail.push_back(FindOrCreateVertex(*pred));
00180 }
00181 }
00182 boost::shared_ptr<Derivation> bestDerivation(new Derivation(bestEdge));
00183 #ifndef NDEBUG
00184 std::pair<DerivationSet::iterator, bool> q =
00185 #endif
00186 m_derivations.insert(bestDerivation);
00187 assert(q.second);
00188 sp->kBestList.push_back(bestDerivation);
00189 return sp;
00190 }
00191
00192
00193
00194 void KBestExtractor::GetCandidates(boost::shared_ptr<KVertex> v, std::size_t k)
00195 {
00196
00197
00198 for (std::size_t i = 0; i < v->svertex.recombined.size(); ++i) {
00199 const SHyperedge ­peredge = *(v->svertex.recombined[i]);
00200 boost::shared_ptr<KHyperedge> bestEdge(new KHyperedge(shyperedge));
00201 bestEdge->head = v;
00202
00203 std::size_t kTailSize = 0;
00204 for (std::size_t j = 0; j < shyperedge.tail.size(); ++j) {
00205 const SVertex *pred = shyperedge.tail[j];
00206 if (pred->best) {
00207 ++kTailSize;
00208 }
00209 }
00210 bestEdge->tail.reserve(kTailSize);
00211 for (std::size_t j = 0; j < shyperedge.tail.size(); ++j) {
00212 const SVertex *pred = shyperedge.tail[j];
00213 if (pred->best) {
00214 bestEdge->tail.push_back(FindOrCreateVertex(*pred));
00215 }
00216 }
00217 boost::shared_ptr<Derivation> derivation(new Derivation(bestEdge));
00218 #ifndef NDEBUG
00219 std::pair<DerivationSet::iterator, bool> q =
00220 #endif
00221 m_derivations.insert(derivation);
00222 assert(q.second);
00223 v->candidates.push(derivation);
00224 }
00225 }
00226
00227
00228 void KBestExtractor::LazyKthBest(boost::shared_ptr<KVertex> v, std::size_t k,
00229 std::size_t globalK)
00230 {
00231
00232 if (v->visited == false) {
00233
00234 assert(v->kBestList.size() == 1);
00235
00236 GetCandidates(v, globalK);
00237 v->visited = true;
00238 }
00239
00240
00241 while (v->kBestList.size() < k) {
00242 assert(!v->kBestList.empty());
00243
00244
00245 boost::shared_ptr<Derivation> d(v->kBestList.back());
00246 LazyNext(*v, *d, globalK);
00247
00248 if (v->candidates.empty()) {
00249 break;
00250 }
00251
00252 boost::weak_ptr<Derivation> next = v->candidates.top();
00253 v->candidates.pop();
00254
00255 v->kBestList.push_back(next);
00256 }
00257 }
00258
00259
00260 void KBestExtractor::LazyNext(KVertex &v, const Derivation &d,
00261 std::size_t globalK)
00262 {
00263 for (std::size_t i = 0; i < d.edge->tail.size(); ++i) {
00264 boost::shared_ptr<KVertex> pred = d.edge->tail[i];
00265
00266 std::size_t k = d.backPointers[i] + 2;
00267 LazyKthBest(pred, k, globalK);
00268 if (pred->kBestList.size() < k) {
00269
00270 continue;
00271 }
00272
00273 boost::shared_ptr<Derivation> next(new Derivation(d, i));
00274
00275 std::pair<DerivationSet::iterator, bool> p = m_derivations.insert(next);
00276 if (p.second) {
00277 v.candidates.push(next);
00278 }
00279 }
00280 }
00281
00282
00283 KBestExtractor::Derivation::Derivation(const boost::shared_ptr<KHyperedge> &e)
00284 {
00285 edge = e;
00286 const TargetPhrase *translation = edge->shyperedge.label.translation;
00287
00288
00289 if (translation) {
00290 scoreBreakdown = translation->GetScoreBreakdown();
00291 }
00292 const std::size_t arity = edge->tail.size();
00293 backPointers.resize(arity, 0);
00294 subderivations.reserve(arity);
00295 for (std::size_t i = 0; i < arity; ++i) {
00296 const KVertex &pred = *(edge->tail[i]);
00297 assert(pred.kBestList.size() >= 1);
00298 boost::shared_ptr<Derivation> sub(pred.kBestList[0]);
00299 subderivations.push_back(sub);
00300 scoreBreakdown.PlusEquals(sub->scoreBreakdown);
00301 }
00302 scoreBreakdown.PlusEquals(edge->shyperedge.label.deltas);
00303 score = scoreBreakdown.GetWeightedScore();
00304 }
00305
00306
00307 KBestExtractor::Derivation::Derivation(const Derivation &d, std::size_t i)
00308 {
00309 edge = d.edge;
00310 backPointers = d.backPointers;
00311 subderivations = d.subderivations;
00312 std::size_t j = ++backPointers[i];
00313 scoreBreakdown = d.scoreBreakdown;
00314
00315 scoreBreakdown.MinusEquals(subderivations[i]->scoreBreakdown);
00316
00317 boost::shared_ptr<Derivation> newSub(edge->tail[i]->kBestList[j]);
00318 subderivations[i] = newSub;
00319
00320 scoreBreakdown.PlusEquals(subderivations[i]->scoreBreakdown);
00321 score = scoreBreakdown.GetWeightedScore();
00322 }
00323
00324 }
00325 }