00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022 #include <algorithm>
00023 #include <set>
00024 #include <queue>
00025 #include "HypothesisStackNormal.h"
00026 #include "TypeDef.h"
00027 #include "Util.h"
00028 #include "Manager.h"
00029 #include "util/exception.hh"
00030
00031 using namespace std;
00032
00033 namespace Moses
00034 {
00035 HypothesisStackNormal::HypothesisStackNormal(Manager& manager) :
00036 HypothesisStack(manager)
00037 {
00038 m_nBestIsEnabled = manager.options()->nbest.enabled;
00039 m_bestScore = -std::numeric_limits<float>::infinity();
00040 m_worstScore = -std::numeric_limits<float>::infinity();
00041 }
00042
00044 void HypothesisStackNormal::RemoveAll()
00045 {
00046 while (m_hypos.begin() != m_hypos.end()) {
00047 Remove(m_hypos.begin());
00048 }
00049 }
00050
00051 pair<HypothesisStackNormal::iterator, bool> HypothesisStackNormal::Add(Hypothesis *hypo)
00052 {
00053 std::pair<iterator, bool> ret = m_hypos.insert(hypo);
00054 if (ret.second) {
00055
00056 VERBOSE(3,"added hyp to stack");
00057
00058
00059 if (hypo->GetFutureScore() > m_bestScore) {
00060 VERBOSE(3,", best on stack");
00061 m_bestScore = hypo->GetFutureScore();
00062
00063 if ( m_bestScore + m_beamWidth > m_worstScore )
00064 m_worstScore = m_bestScore + m_beamWidth;
00065 }
00066
00067 if ( m_minHypoStackDiversity == 1 &&
00068 hypo->GetFutureScore() > GetWorstScoreForBitmap( hypo->GetWordsBitmap() ) ) {
00069 SetWorstScoreForBitmap( hypo->GetWordsBitmap().GetID(), hypo->GetFutureScore() );
00070 }
00071
00072 VERBOSE(3,", now size " << m_hypos.size());
00073
00074
00075 size_t toleratedSize = 2*m_maxHypoStackSize-1;
00076
00077 if (m_minHypoStackDiversity) {
00078
00079 toleratedSize += m_minHypoStackDiversity
00080 << m_manager.options()->reordering.max_distortion;
00081 }
00082
00083 if (m_hypos.size() > toleratedSize) {
00084 PruneToSize(m_maxHypoStackSize);
00085 } else {
00086 VERBOSE(3,std::endl);
00087 }
00088 }
00089
00090 return ret;
00091 }
00092
00093 bool HypothesisStackNormal::AddPrune(Hypothesis *hypo)
00094 {
00095 if (hypo->GetFutureScore() == - std::numeric_limits<float>::infinity()) {
00096 m_manager.GetSentenceStats().AddDiscarded();
00097 VERBOSE(3,"discarded, constraint" << std::endl);
00098 delete hypo;
00099 return false;
00100 }
00101
00102
00103 if (m_manager.options()->search.disable_discarding == false
00104 && hypo->GetFutureScore() < m_worstScore
00105 && ! ( m_minHypoStackDiversity > 0
00106 && hypo->GetFutureScore() >= GetWorstScoreForBitmap( hypo->GetWordsBitmap() ) ) ) {
00107 m_manager.GetSentenceStats().AddDiscarded();
00108 VERBOSE(3,"discarded, too bad for stack" << std::endl);
00109 delete hypo;
00110 return false;
00111 }
00112
00113
00114 std::pair<iterator, bool> addRet = Add(hypo);
00115 if (addRet.second) {
00116
00117 return true;
00118 }
00119
00120
00121 iterator &iterExisting = addRet.first;
00122 Hypothesis *hypoExisting = *iterExisting;
00123 assert(iterExisting != m_hypos.end());
00124
00125 m_manager.GetSentenceStats().AddRecombination(*hypo, **iterExisting);
00126
00127
00128
00129 if (hypo->GetFutureScore() > hypoExisting->GetFutureScore()) {
00130
00131 VERBOSE(3,"better than matching hyp " << hypoExisting->GetId() << ", recombining, ");
00132 if (m_nBestIsEnabled) {
00133 hypo->AddArc(hypoExisting);
00134 Detach(iterExisting);
00135 } else {
00136 Remove(iterExisting);
00137 }
00138
00139 bool added = Add(hypo).second;
00140 if (!added) {
00141 iterExisting = m_hypos.find(hypo);
00142 UTIL_THROW2("Offending hypo = " << **iterExisting);
00143 }
00144 return false;
00145 } else {
00146
00147 VERBOSE(3,"worse than matching hyp " << hypoExisting->GetId() << ", recombining" << std::endl)
00148 if (m_nBestIsEnabled) {
00149 hypoExisting->AddArc(hypo);
00150 } else {
00151 delete hypo;
00152 }
00153 return false;
00154 }
00155 }
00156
00157 void HypothesisStackNormal::PruneToSize(size_t newSize)
00158 {
00159 if ( newSize == 0) return;
00160 if ( size() <= newSize ) return;
00161
00162
00163 vector< Hypothesis* > hypos = GetSortedListNOTCONST();
00164 bool* included = (bool*) malloc(sizeof(bool) * hypos.size());
00165 for(size_t i=0; i<hypos.size(); i++) included[i] = false;
00166
00167
00168 for( iterator iter = m_hypos.begin(); iter != m_hypos.end(); ) {
00169 iterator removeHyp = iter++;
00170 Detach(removeHyp);
00171 }
00172
00173
00174 if ( m_minHypoStackDiversity > 0 ) {
00175 map< WordsBitmapID, size_t > diversityCount;
00176 for(size_t i=0; i<hypos.size(); i++) {
00177 Hypothesis *hyp = hypos[i];
00178 WordsBitmapID coverage = hyp->GetWordsBitmap().GetID();;
00179 if (diversityCount.find( coverage ) == diversityCount.end())
00180 diversityCount[ coverage ] = 0;
00181
00182 if (diversityCount[ coverage ] < m_minHypoStackDiversity) {
00183 m_hypos.insert( hyp );
00184 included[i] = true;
00185 diversityCount[ coverage ]++;
00186 if (diversityCount[ coverage ] == m_minHypoStackDiversity)
00187 SetWorstScoreForBitmap( coverage, hyp->GetFutureScore());
00188 }
00189 }
00190 }
00191
00192
00193 if ( size() < newSize ) {
00194
00195
00196 for(size_t i=0; i<hypos.size()
00197 && size() < newSize
00198 && hypos[i]->GetFutureScore() > m_bestScore+m_beamWidth; i++) {
00199 if (! included[i]) {
00200 m_hypos.insert( hypos[i] );
00201 included[i] = true;
00202 if (size() == newSize)
00203 m_worstScore = hypos[i]->GetFutureScore();
00204 }
00205 }
00206 }
00207
00208
00209 for(size_t i=0; i<hypos.size(); i++) {
00210 if (! included[i]) {
00211 delete hypos[i];
00212 m_manager.GetSentenceStats().AddPruning();
00213 }
00214 }
00215 free(included);
00216
00217
00218 VERBOSE(3,", pruned to size " << size() << endl);
00219 IFVERBOSE(3) {
00220 TRACE_ERR("stack now contains: ");
00221 for(iterator iter = m_hypos.begin(); iter != m_hypos.end(); iter++) {
00222 Hypothesis *hypo = *iter;
00223 TRACE_ERR( hypo->GetId() << " (" << hypo->GetFutureScore() << ") ");
00224 }
00225 TRACE_ERR( endl);
00226 }
00227 }
00228
00229 const Hypothesis *HypothesisStackNormal::GetBestHypothesis() const
00230 {
00231 if (!m_hypos.empty()) {
00232 const_iterator iter = m_hypos.begin();
00233 Hypothesis *bestHypo = *iter;
00234 while (++iter != m_hypos.end()) {
00235 Hypothesis *hypo = *iter;
00236 if (hypo->GetFutureScore() > bestHypo->GetFutureScore())
00237 bestHypo = hypo;
00238 }
00239 return bestHypo;
00240 }
00241 return NULL;
00242 }
00243
00244 vector<const Hypothesis*> HypothesisStackNormal::GetSortedList() const
00245 {
00246 vector<const Hypothesis*> ret;
00247 ret.reserve(m_hypos.size());
00248 std::copy(m_hypos.begin(), m_hypos.end(), std::inserter(ret, ret.end()));
00249 sort(ret.begin(), ret.end(), CompareHypothesisTotalScore());
00250
00251 return ret;
00252 }
00253
00254 vector<Hypothesis*> HypothesisStackNormal::GetSortedListNOTCONST()
00255 {
00256 vector<Hypothesis*> ret;
00257 ret.reserve(m_hypos.size());
00258 std::copy(m_hypos.begin(), m_hypos.end(), std::inserter(ret, ret.end()));
00259 sort(ret.begin(), ret.end(), CompareHypothesisTotalScore());
00260
00261 return ret;
00262 }
00263
00264 void HypothesisStackNormal::CleanupArcList()
00265 {
00266
00267 if (!m_nBestIsEnabled) return;
00268
00269 iterator iter;
00270 for (iter = m_hypos.begin() ; iter != m_hypos.end() ; ++iter) {
00271 Hypothesis *mainHypo = *iter;
00272 mainHypo->CleanupArcList(this->m_manager.options()->nbest.nbest_size, this->m_manager.options()->NBestDistinct());
00273 }
00274 }
00275
00276 TO_STRING_BODY(HypothesisStackNormal);
00277
00278
00279
00280 std::ostream& operator<<(std::ostream& out, const HypothesisStackNormal& hypoColl)
00281 {
00282 HypothesisStackNormal::const_iterator iter;
00283
00284 for (iter = hypoColl.begin() ; iter != hypoColl.end() ; ++iter) {
00285 const Hypothesis &hypo = **iter;
00286 out << hypo << endl;
00287
00288 }
00289 return out;
00290 }
00291
00292
00293 }
00294