00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021 #include "util/exception.hh"
00022
00023 #include "moses/TranslationModel/PhraseDictionary.h"
00024 #include "moses/TranslationModel/PhraseDictionaryDynamicCacheBased.h"
00025 #include "moses/FactorCollection.h"
00026 #include "moses/InputFileStream.h"
00027 #include "moses/StaticData.h"
00028 #include "moses/TargetPhrase.h"
00029
00030 using namespace std;
00031
00032 namespace Moses
00033 {
00034 std::map< const std::string, PhraseDictionaryDynamicCacheBased * > PhraseDictionaryDynamicCacheBased::s_instance_map;
00035 PhraseDictionaryDynamicCacheBased *PhraseDictionaryDynamicCacheBased::s_instance = NULL;
00036
00038 PhraseDictionaryDynamicCacheBased::PhraseDictionaryDynamicCacheBased(const std::string &line)
00039 : PhraseDictionary(line, true)
00040 {
00041 std::cerr << "Initializing PhraseDictionaryDynamicCacheBased feature..." << std::endl;
00042
00043
00044 m_maxCacheSize = 0;
00045
00046 m_score_type = CBTM_SCORE_TYPE_HYPERBOLA;
00047 m_maxAge = 1000;
00048 m_entries = 0;
00049 m_name = "default";
00050 m_constant = false;
00051 ReadParameters();
00052
00053 UTIL_THROW_IF2(s_instance_map.find(m_name) != s_instance_map.end(), "Only 1 PhraseDictionaryDynamicCacheBased feature named " + m_name + " is allowed");
00054 s_instance_map[m_name] = this;
00055 s_instance = this;
00056 }
00057
00058 PhraseDictionaryDynamicCacheBased::~PhraseDictionaryDynamicCacheBased()
00059 {
00060 Clear();
00061 }
00062
00063 void PhraseDictionaryDynamicCacheBased::Load(AllOptions::ptr const& opts)
00064 {
00065 m_options = opts;
00066 VERBOSE(2,"PhraseDictionaryDynamicCacheBased::Load()" << std::endl);
00067 SetFeaturesToApply();
00068
00069 vector<float> weight = StaticData::Instance().GetWeights(this);
00070 SetPreComputedScores(weight.size());
00071
00072 Load(m_initfiles);
00073 }
00074
00075 void PhraseDictionaryDynamicCacheBased::Load(const std::string filestr)
00076 {
00077 VERBOSE(2,"PhraseDictionaryDynamicCacheBased::Load(const std::string filestr)" << std::endl);
00078
00079 std::vector<std::string> files = Tokenize(filestr, "||");
00080 Load_Multiple_Files(files);
00081 }
00082
00083 void PhraseDictionaryDynamicCacheBased::Load_Multiple_Files(std::vector<std::string> files)
00084 {
00085 VERBOSE(2,"PhraseDictionaryDynamicCacheBased::Load_Multiple_Files(std::vector<std::string> files)" << std::endl);
00086 for(size_t j = 0; j < files.size(); ++j) {
00087 Load_Single_File(files[j]);
00088 }
00089 }
00090
00091 void PhraseDictionaryDynamicCacheBased::Load_Single_File(const std::string file)
00092 {
00093 VERBOSE(2,"PhraseDictionaryDynamicCacheBased::Load_Single_File(const std::string file)" << std::endl);
00094
00095
00096
00097
00098
00099
00100
00101
00102
00103
00104
00105
00106
00107
00108
00109
00110 VERBOSE(2,"Loading data from the cache file " << file << std::endl);
00111 InputFileStream cacheFile(file);
00112
00113 std::string line;
00114 std::vector<std::string> words;
00115
00116 while (getline(cacheFile, line)) {
00117 std::vector<std::string> vecStr = TokenizeMultiCharSeparator( line , "||||" );
00118 if (vecStr.size() >= 2) {
00119 std::string ageString = vecStr[0];
00120 vecStr.erase(vecStr.begin());
00121 Update(vecStr,ageString);
00122 } else {
00123 UTIL_THROW_IF2(false, "The format of the loaded file is wrong: " << line);
00124 }
00125 }
00126 IFVERBOSE(2) Print();
00127 }
00128
00129
00130 void PhraseDictionaryDynamicCacheBased::SetParameter(const std::string& key, const std::string& value)
00131 {
00132 VERBOSE(2, "PhraseDictionaryDynamicCacheBased::SetParameter key:|" << key << "| value:|" << value << "|" << std::endl);
00133
00134 if(key == "cbtm-score-type") {
00135 SetScoreType(Scan<size_t>(value));
00136 } else if (key == "cbtm-max-age") {
00137 SetMaxAge(Scan<unsigned int>(value));
00138 } else if (key == "cbtm-file") {
00139 m_initfiles = Scan<std::string>(value);
00140 } else if (key == "cbtm-name") {
00141 m_name = Scan<std::string>(value);
00142 } else if (key == "cbtm-constant") {
00143 m_constant = Scan<bool>(value);
00144 } else {
00145 PhraseDictionary::SetParameter(key, value);
00146 }
00147 }
00148
00149 void PhraseDictionaryDynamicCacheBased::InitializeForInput(ttasksptr const& ttask)
00150 {
00151 ReduceCache();
00152 }
00153
00154 TargetPhraseCollection::shared_ptr PhraseDictionaryDynamicCacheBased::GetTargetPhraseCollection(const Phrase &source) const
00155 {
00156 #ifdef WITH_THREADS
00157 boost::shared_lock<boost::shared_mutex> read_lock(m_cacheLock);
00158 #endif
00159 TargetPhraseCollection::shared_ptr tpc;
00160 cacheMap::const_iterator it = m_cacheTM.find(source);
00161 if(it != m_cacheTM.end()) {
00162 tpc.reset(new TargetPhraseCollection(*(it->second).first));
00163
00164 std::vector<const TargetPhrase*>::const_iterator it2 = tpc->begin();
00165
00166 while (it2 != tpc->end()) {
00167 ((TargetPhrase*) *it2)->EvaluateInIsolation(source, GetFeaturesToApply());
00168 it2++;
00169 }
00170 }
00171 if (tpc) {
00172 tpc->NthElement(m_tableLimit);
00173 }
00174
00175 return tpc;
00176 }
00177
00178 TargetPhraseCollection::shared_ptr PhraseDictionaryDynamicCacheBased::GetTargetPhraseCollectionLEGACY(Phrase const &src) const
00179 {
00180 TargetPhraseCollection::shared_ptr ret = GetTargetPhraseCollection(src);
00181 return ret;
00182 }
00183
00184 TargetPhraseCollection::shared_ptr PhraseDictionaryDynamicCacheBased::GetTargetPhraseCollectionNonCacheLEGACY(Phrase const &src) const
00185 {
00186 TargetPhraseCollection::shared_ptr ret = GetTargetPhraseCollection(src);
00187 return ret;
00188 }
00189
00190 ChartRuleLookupManager* PhraseDictionaryDynamicCacheBased::CreateRuleLookupManager(const ChartParser &parser, const ChartCellCollectionBase &cellCollection, std::size_t )
00191 {
00192 UTIL_THROW(util::Exception, "Phrase table used in chart decoder");
00193 }
00194
00195 void PhraseDictionaryDynamicCacheBased::SetScoreType(size_t type)
00196 {
00197 #ifdef WITH_THREADS
00198 boost::shared_lock<boost::shared_mutex> read_lock(m_cacheLock);
00199 #endif
00200
00201 m_score_type = type;
00202 if ( m_score_type != CBTM_SCORE_TYPE_HYPERBOLA
00203 && m_score_type != CBTM_SCORE_TYPE_POWER
00204 && m_score_type != CBTM_SCORE_TYPE_EXPONENTIAL
00205 && m_score_type != CBTM_SCORE_TYPE_COSINE
00206 && m_score_type != CBTM_SCORE_TYPE_HYPERBOLA_REWARD
00207 && m_score_type != CBTM_SCORE_TYPE_POWER_REWARD
00208 && m_score_type != CBTM_SCORE_TYPE_EXPONENTIAL_REWARD ) {
00209 VERBOSE(2, "This score type " << m_score_type << " is unknown. Instead used " << CBTM_SCORE_TYPE_HYPERBOLA << "." << std::endl);
00210 m_score_type = CBTM_SCORE_TYPE_HYPERBOLA;
00211 }
00212
00213 VERBOSE(2, "PhraseDictionaryDynamicCacheBased ScoreType: " << m_score_type << std::endl);
00214 }
00215
00216
00217 void PhraseDictionaryDynamicCacheBased::SetMaxAge(unsigned int age)
00218 {
00219 #ifdef WITH_THREADS
00220 boost::shared_lock<boost::shared_mutex> read_lock(m_cacheLock);
00221 #endif
00222 m_maxAge = age;
00223 VERBOSE(2, "PhraseDictionaryCache MaxAge: " << m_maxAge << std::endl);
00224 }
00225
00226
00227
00228 ostream& operator<<(ostream& out, const PhraseDictionaryDynamicCacheBased& phraseDict)
00229 {
00230 return out;
00231 }
00232
00233 float PhraseDictionaryDynamicCacheBased::decaying_score(const int age)
00234 {
00235 float sc;
00236 switch(m_score_type) {
00237 case CBTM_SCORE_TYPE_HYPERBOLA:
00238 sc = (float) 1.0/age - 1.0;
00239 break;
00240 case CBTM_SCORE_TYPE_POWER:
00241 sc = (float) pow(age, -0.25) - 1.0;
00242 break;
00243 case CBTM_SCORE_TYPE_EXPONENTIAL:
00244 sc = (age == 1) ? 0.0 : (float) exp( 1.0/age ) / exp(1.0) - 1.0;
00245 break;
00246 case CBTM_SCORE_TYPE_COSINE:
00247 sc = (float) cos( (age-1) * (PI/2) / m_maxAge ) - 1.0;
00248 break;
00249 case CBTM_SCORE_TYPE_HYPERBOLA_REWARD:
00250 sc = (float) 1.0/age;
00251 break;
00252 case CBTM_SCORE_TYPE_POWER_REWARD:
00253 sc = (float) pow(age, -0.25);
00254 break;
00255 case CBTM_SCORE_TYPE_EXPONENTIAL_REWARD:
00256 sc = (age == 1) ? 1.0 : (float) exp( 1.0/age ) / exp(1.0);
00257 break;
00258 default:
00259 sc = -1.0;
00260 }
00261 return sc;
00262 }
00263
00264 void PhraseDictionaryDynamicCacheBased::SetPreComputedScores(const unsigned int numScoreComponent)
00265 {
00266 VERBOSE(2, "PhraseDictionaryDynamicCacheBased SetPreComputedScores: " << m_maxAge << std::endl);
00267 #ifdef WITH_THREADS
00268 boost::shared_lock<boost::shared_mutex> lock(m_cacheLock);
00269 #endif
00270 float sc;
00271 for (size_t i=0; i<=m_maxAge; i++) {
00272 if (i==m_maxAge) {
00273 if ( m_score_type == CBTM_SCORE_TYPE_HYPERBOLA
00274 || m_score_type == CBTM_SCORE_TYPE_POWER
00275 || m_score_type == CBTM_SCORE_TYPE_EXPONENTIAL
00276 || m_score_type == CBTM_SCORE_TYPE_COSINE ) {
00277 sc = decaying_score(m_maxAge)/numScoreComponent;
00278 } else {
00279 sc = 0.0;
00280 }
00281 } else {
00282 sc = decaying_score(i)/numScoreComponent;
00283 }
00284 Scores sc_vec;
00285 for (size_t j=0; j<numScoreComponent; j++) {
00286 sc_vec.push_back(sc);
00287 }
00288 precomputedScores.push_back(sc_vec);
00289 }
00290 m_lower_score = precomputedScores[m_maxAge].at(0);
00291 VERBOSE(3, "SetPreComputedScores(const unsigned int): lower_age:|" << m_maxAge << "| lower_score:|" << m_lower_score << "|" << std::endl);
00292 }
00293
00294 Scores PhraseDictionaryDynamicCacheBased::GetPreComputedScores(const unsigned int age)
00295 {
00296 if (age < m_maxAge) {
00297 return precomputedScores.at(age);
00298 } else {
00299 return precomputedScores.at(m_maxAge);
00300 }
00301 }
00302
00303 void PhraseDictionaryDynamicCacheBased::ClearEntries(std::string &entries)
00304 {
00305 if (entries != "") {
00306 VERBOSE(3,"entries:|" << entries << "|" << std::endl);
00307 std::vector<std::string> elements = TokenizeMultiCharSeparator(entries, "||||");
00308 VERBOSE(3,"elements.size() after:|" << elements.size() << "|" << std::endl);
00309 ClearEntries(elements);
00310 }
00311 }
00312
00313 void PhraseDictionaryDynamicCacheBased::ClearEntries(std::vector<std::string> entries)
00314 {
00315 VERBOSE(3,"PhraseDictionaryDynamicCacheBased::ClearEntries(std::vector<std::string> entries)" << std::endl);
00316 std::vector<std::string> pp;
00317
00318 std::vector<std::string>::iterator it;
00319 for(it = entries.begin(); it!=entries.end(); it++) {
00320 pp.clear();
00321 pp = TokenizeMultiCharSeparator((*it), "|||");
00322 VERBOSE(3,"pp[0]:|" << pp[0] << "|" << std::endl);
00323 VERBOSE(3,"pp[1]:|" << pp[1] << "|" << std::endl);
00324
00325 ClearEntries(pp[0], pp[1]);
00326 }
00327 }
00328
00329 void PhraseDictionaryDynamicCacheBased::ClearEntries(std::string sourcePhraseString, std::string targetPhraseString)
00330 {
00331 VERBOSE(3,"PhraseDictionaryDynamicCacheBased::ClearEntries(std::string sourcePhraseString, std::string targetPhraseString)" << std::endl);
00332 const StaticData &staticData = StaticData::Instance();
00333 Phrase sourcePhrase(0);
00334 Phrase targetPhrase(0);
00335
00336
00337 targetPhrase.Clear();
00338 VERBOSE(3, "targetPhraseString:|" << targetPhraseString << "|" << std::endl);
00339 targetPhrase.CreateFromString(Output, staticData.options()->output.factor_order,
00340 targetPhraseString, NULL);
00341 VERBOSE(2, "targetPhrase:|" << targetPhrase << "|" << std::endl);
00342
00343
00344
00345 sourcePhrase.Clear();
00346 VERBOSE(3, "sourcePhraseString:|" << sourcePhraseString << "|" << std::endl);
00347 sourcePhrase.CreateFromString(Input, staticData.options()->input.factor_order,
00348 sourcePhraseString, NULL);
00349 VERBOSE(3, "sourcePhrase:|" << sourcePhrase << "|" << std::endl);
00350 ClearEntries(sourcePhrase, targetPhrase);
00351
00352 }
00353
00354 void PhraseDictionaryDynamicCacheBased::ClearEntries(Phrase sp, Phrase tp)
00355 {
00356 VERBOSE(3,"PhraseDictionaryDynamicCacheBased::ClearEntries(Phrase sp, Phrase tp)" << std::endl);
00357 #ifdef WITH_THREADS
00358 boost::shared_lock<boost::shared_mutex> lock(m_cacheLock);
00359 #endif
00360 VERBOSE(3, "PhraseDictionaryCache deleting sp:|" << sp << "| tp:|" << tp << "|" << std::endl);
00361
00362 cacheMap::const_iterator it = m_cacheTM.find(sp);
00363 VERBOSE(3,"sp:|" << sp << "|" << std::endl);
00364 if(it!=m_cacheTM.end()) {
00365 VERBOSE(3,"sp:|" << sp << "| FOUND" << std::endl);
00366
00367
00368
00369
00370 TargetCollectionAgePair TgtCollAgePair = it->second;
00371 TargetPhraseCollection::shared_ptr tpc = TgtCollAgePair.first;
00372 AgeCollection* ac = TgtCollAgePair.second;
00373 const Phrase* p_ptr = NULL;
00374 TargetPhrase* tp_ptr = NULL;
00375 bool found = false;
00376 size_t tp_pos=0;
00377 while (!found && tp_pos < tpc->GetSize()) {
00378 tp_ptr = (TargetPhrase*) tpc->GetTargetPhrase(tp_pos);
00379 p_ptr = (const Phrase*) tp_ptr;
00380 if (tp == *p_ptr) {
00381 found = true;
00382 continue;
00383 }
00384 tp_pos++;
00385 }
00386 if (!found) {
00387 VERBOSE(3,"tp:|" << tp << "| NOT FOUND" << std::endl);
00388
00389 } else {
00390 VERBOSE(3,"tp:|" << tp << "| FOUND" << std::endl);
00391
00392 tpc->Remove(tp_pos);
00393 ac->erase(ac->begin() + tp_pos);
00394 m_entries--;
00395 VERBOSE(3,"tpc size:|" << tpc->GetSize() << "|" << std::endl);
00396 VERBOSE(3,"ac size:|" << ac->size() << "|" << std::endl);
00397 VERBOSE(3,"tp:|" << tp << "| DELETED" << std::endl);
00398 }
00399 if (tpc->GetSize() == 0) {
00400
00401 ac->clear();
00402 tpc.reset();
00403 delete ac;
00404 m_cacheTM.erase(sp);
00405 }
00406
00407 } else {
00408 VERBOSE(3,"sp:|" << sp << "| NOT FOUND" << std::endl);
00409
00410 }
00411 }
00412
00413
00414
00415
00416 void PhraseDictionaryDynamicCacheBased::ClearSource(std::string &entries)
00417 {
00418 if (entries != "") {
00419 VERBOSE(3,"entries:|" << entries << "|" << std::endl);
00420 std::vector<std::string> elements = TokenizeMultiCharSeparator(entries, "||||");
00421 VERBOSE(3,"elements.size() after:|" << elements.size() << "|" << std::endl);
00422 ClearEntries(elements);
00423 }
00424 }
00425
00426 void PhraseDictionaryDynamicCacheBased::ClearSource(std::vector<std::string> entries)
00427 {
00428 VERBOSE(3,"entries.size():|" << entries.size() << "|" << std::endl);
00429 const StaticData &staticData = StaticData::Instance();
00430 Phrase sourcePhrase(0);
00431
00432 std::vector<std::string>::iterator it;
00433 for(it = entries.begin(); it!=entries.end(); it++) {
00434
00435 sourcePhrase.Clear();
00436 VERBOSE(3, "sourcePhraseString:|" << (*it) << "|" << std::endl);
00437 sourcePhrase.CreateFromString(Input, staticData.options()->input.factor_order,
00438 *it, NULL);
00439 VERBOSE(3, "sourcePhrase:|" << sourcePhrase << "|" << std::endl);
00440
00441 ClearSource(sourcePhrase);
00442 }
00443
00444 IFVERBOSE(2) Print();
00445 }
00446
00447 void PhraseDictionaryDynamicCacheBased::ClearSource(Phrase sp)
00448 {
00449 VERBOSE(3,"void PhraseDictionaryDynamicCacheBased::ClearSource(Phrase sp) sp:|" << sp << "|" << std::endl);
00450 cacheMap::const_iterator it = m_cacheTM.find(sp);
00451 if (it != m_cacheTM.end()) {
00452 VERBOSE(3,"found:|" << sp << "|" << std::endl);
00453
00454
00455 TargetCollectionAgePair TgtCollAgePair = it->second;
00456 TargetPhraseCollection::shared_ptr tpc = TgtCollAgePair.first;
00457 AgeCollection* ac = TgtCollAgePair.second;
00458
00459 m_entries-=tpc->GetSize();
00460
00461
00462 ac->clear();
00463 tpc.reset();
00464 delete ac;
00465 m_cacheTM.erase(sp);
00466 } else {
00467
00468 }
00469 }
00470
00471 void PhraseDictionaryDynamicCacheBased::Insert(std::string &entries)
00472 {
00473 if (entries != "") {
00474 VERBOSE(3,"entries:|" << entries << "|" << std::endl);
00475 std::vector<std::string> elements = TokenizeMultiCharSeparator(entries, "||||");
00476 VERBOSE(3,"elements.size() after:|" << elements.size() << "|" << std::endl);
00477 Insert(elements);
00478 }
00479 }
00480
00481 void PhraseDictionaryDynamicCacheBased::Insert(std::vector<std::string> entries)
00482 {
00483 VERBOSE(3,"entries.size():|" << entries.size() << "|" << std::endl);
00484 if (m_constant == false) {
00485 Decay();
00486 }
00487 Update(entries, "1");
00488 IFVERBOSE(3) Print();
00489 }
00490
00491
00492 void PhraseDictionaryDynamicCacheBased::Update(std::vector<std::string> entries, std::string ageString)
00493 {
00494 VERBOSE(3,"PhraseDictionaryDynamicCacheBased::Update(std::vector<std::string> entries, std::string ageString)" << std::endl);
00495 std::vector<std::string> pp;
00496
00497 VERBOSE(3,"ageString:|" << ageString << "|" << std::endl);
00498 std::vector<std::string>::iterator it;
00499 for(it = entries.begin(); it!=entries.end(); it++) {
00500 pp.clear();
00501 pp = TokenizeMultiCharSeparator((*it), "|||");
00502 VERBOSE(3,"pp[0]:|" << pp[0] << "|" << std::endl);
00503 VERBOSE(3,"pp[1]:|" << pp[1] << "|" << std::endl);
00504
00505 if (pp.size() > 2) {
00506 VERBOSE(3,"pp[2]:|" << pp[2] << "|" << std::endl);
00507 Update(pp[0], pp[1], ageString, pp[2]);
00508 } else {
00509 Update(pp[0], pp[1], ageString);
00510 }
00511 }
00512 }
00513
00514 void PhraseDictionaryDynamicCacheBased::Update(std::string sourcePhraseString, std::string targetPhraseString, std::string ageString, std::string waString)
00515 {
00516 VERBOSE(3,"PhraseDictionaryDynamicCacheBased::Update(std::string sourcePhraseString, std::string targetPhraseString, std::string ageString, std::string waString)" << std::endl);
00517 const StaticData &staticData = StaticData::Instance();
00518 Phrase sourcePhrase(0);
00519 TargetPhrase targetPhrase(0);
00520
00521 VERBOSE(3, "ageString:|" << ageString << "|" << std::endl);
00522 char *err_ind_temp;
00523 ageString = Trim(ageString);
00524 int age = strtod(ageString.c_str(), &err_ind_temp);
00525 VERBOSE(3, "age:|" << age << "|" << std::endl);
00526
00527
00528 targetPhrase.Clear();
00529 VERBOSE(3, "targetPhraseString:|" << targetPhraseString << "|" << std::endl);
00530 targetPhrase.CreateFromString(Output, staticData.options()->output.factor_order,
00531 targetPhraseString, NULL);
00532 VERBOSE(3, "targetPhrase:|" << targetPhrase << "|" << std::endl);
00533
00534
00535
00536 sourcePhrase.Clear();
00537 VERBOSE(3, "sourcePhraseString:|" << sourcePhraseString << "|" << std::endl);
00538 sourcePhrase.CreateFromString(Input, staticData.options()->input.factor_order, sourcePhraseString, NULL);
00539 VERBOSE(3, "sourcePhrase:|" << sourcePhrase << "|" << std::endl);
00540
00541 if (!waString.empty()) VERBOSE(3, "waString:|" << waString << "|" << std::endl);
00542
00543 Update(sourcePhrase, targetPhrase, age, waString);
00544 }
00545
00546 void PhraseDictionaryDynamicCacheBased::Update(Phrase sp, TargetPhrase tp, int age, std::string waString)
00547 {
00548 VERBOSE(3,"PhraseDictionaryDynamicCacheBased::Update(Phrase sp, TargetPhrase tp, int age, std::string waString)" << std::endl);
00549 #ifdef WITH_THREADS
00550 boost::shared_lock<boost::shared_mutex> lock(m_cacheLock);
00551 #endif
00552 VERBOSE(3, "PhraseDictionaryCache inserting sp:|" << sp << "| tp:|" << tp << "| age:|" << age << "| word-alignment |" << waString << "|" << std::endl);
00553
00554 cacheMap::const_iterator it = m_cacheTM.find(sp);
00555 VERBOSE(3,"sp:|" << sp << "|" << std::endl);
00556 if(it!=m_cacheTM.end()) {
00557 VERBOSE(3,"sp:|" << sp << "| FOUND" << std::endl);
00558
00559
00560
00561
00562 TargetCollectionAgePair TgtCollAgePair = it->second;
00563 TargetPhraseCollection::shared_ptr tpc = TgtCollAgePair.first;
00564 AgeCollection* ac = TgtCollAgePair.second;
00565
00566 const Phrase* p_ptr = NULL;
00567 TargetPhrase* tp_ptr = NULL;
00568 bool found = false;
00569 size_t tp_pos=0;
00570 while (!found && tp_pos < tpc->GetSize()) {
00571 tp_ptr = (TargetPhrase*) tpc->GetTargetPhrase(tp_pos);
00572 p_ptr = (const TargetPhrase*) tp_ptr;
00573 if ((Phrase) tp == *p_ptr) {
00574 found = true;
00575 continue;
00576 }
00577 tp_pos++;
00578 }
00579 if (!found) {
00580 VERBOSE(3,"tp:|" << tp << "| NOT FOUND" << std::endl);
00581 std::auto_ptr<TargetPhrase> targetPhrase(new TargetPhrase(tp));
00582
00583 targetPhrase->GetScoreBreakdown().Assign(this, GetPreComputedScores(age));
00584 if (!waString.empty()) targetPhrase->SetAlignmentInfo(waString);
00585
00586 tpc->Add(targetPhrase.release());
00587
00588 tp_pos = tpc->GetSize()-1;
00589 ac->push_back(age);
00590 m_entries++;
00591 VERBOSE(3,"sp:|" << sp << "tp:|" << tp << "| INSERTED" << std::endl);
00592 } else {
00593 tp_ptr->GetScoreBreakdown().Assign(this, GetPreComputedScores(age));
00594 if (!waString.empty()) tp_ptr->SetAlignmentInfo(waString);
00595 ac->at(tp_pos) = age;
00596 VERBOSE(3,"sp:|" << sp << "tp:|" << tp << "| UPDATED" << std::endl);
00597 }
00598 } else {
00599 VERBOSE(3,"sp:|" << sp << "| NOT FOUND" << std::endl);
00600
00601
00602
00603
00604 TargetPhraseCollection::shared_ptr tpc(new TargetPhraseCollection);
00605 AgeCollection* ac = new AgeCollection();
00606 m_cacheTM.insert(make_pair(sp,make_pair(tpc,ac)));
00607
00608
00609 std::auto_ptr<TargetPhrase> targetPhrase(new TargetPhrase(tp));
00610 targetPhrase->GetScoreBreakdown().Assign(this, GetPreComputedScores(age));
00611 if (!waString.empty()) targetPhrase->SetAlignmentInfo(waString);
00612
00613 tpc->Add(targetPhrase.release());
00614 ac->push_back(age);
00615 m_entries++;
00616 VERBOSE(3,"sp:|" << sp << "| tp:|" << tp << "| INSERTED" << std::endl);
00617 }
00618 }
00619
00620 void PhraseDictionaryDynamicCacheBased::Decay()
00621 {
00622 #ifdef WITH_THREADS
00623 boost::shared_lock<boost::shared_mutex> lock(m_cacheLock);
00624 #endif
00625 cacheMap::iterator it;
00626 for(it = m_cacheTM.begin(); it!=m_cacheTM.end(); it++) {
00627 Decay((*it).first);
00628 }
00629 }
00630
00631 void PhraseDictionaryDynamicCacheBased::Decay(Phrase sp)
00632 {
00633 VERBOSE(3,"void PhraseDictionaryDynamicCacheBased::Decay(Phrase sp) sp:|" << sp << "|" << std::endl);
00634 cacheMap::iterator it = m_cacheTM.find(sp);
00635 if (it != m_cacheTM.end()) {
00636 VERBOSE(3,"found:|" << sp << "|" << std::endl);
00637
00638
00639 TargetCollectionAgePair TgtCollAgePair = it->second;
00640 TargetPhraseCollection::shared_ptr tpc = TgtCollAgePair.first;
00641 AgeCollection* ac = TgtCollAgePair.second;
00642
00643
00644 for (int tp_pos = tpc->GetSize() - 1 ; tp_pos >= 0; tp_pos--) {
00645 unsigned int tp_age = ac->at(tp_pos);
00646 tp_age++;
00647 VERBOSE(3,"sp:|" << sp << "| " << " new tp_age:|" << tp_age << "|" << std::endl);
00648
00649 TargetPhrase* tp_ptr = (TargetPhrase*) tpc->GetTargetPhrase(tp_pos);
00650
00651 if (tp_age > m_maxAge) {
00652 VERBOSE(3,"tp_age:|" << tp_age << "| TOO BIG" << std::endl);
00653 tpc->Remove(tp_pos);
00654 ac->erase(ac->begin() + tp_pos);
00655 m_entries--;
00656 } else {
00657 VERBOSE(3,"tp_age:|" << tp_age << "| STILL GOOD" << std::endl);
00658 tp_ptr->GetScoreBreakdown().Assign(this, GetPreComputedScores(tp_age));
00659 ac->at(tp_pos) = tp_age;
00660 }
00661 }
00662 if (tpc->GetSize() == 0) {
00663
00664 (((*it).second).second)->clear();
00665 delete ((*it).second).second;
00666 ((*it).second).first.reset();
00667 m_cacheTM.erase(sp);
00668 }
00669 } else {
00670
00671 VERBOSE(3,"sp:|" << sp << "| NOT FOUND" << std::endl);
00672 }
00673
00674
00675 }
00676
00677 void PhraseDictionaryDynamicCacheBased::Execute(std::string command)
00678 {
00679 VERBOSE(2,"command:|" << command << "|" << std::endl);
00680 std::vector<std::string> commands = Tokenize(command, "||");
00681 Execute(commands);
00682 }
00683
00684 void PhraseDictionaryDynamicCacheBased::Execute(std::vector<std::string> commands)
00685 {
00686 for (size_t j=0; j<commands.size(); j++) {
00687 Execute_Single_Command(commands[j]);
00688 }
00689 IFVERBOSE(2) Print();
00690 }
00691
00692 void PhraseDictionaryDynamicCacheBased::Execute_Single_Command(std::string command)
00693 {
00694 if (command == "clear") {
00695 VERBOSE(2,"PhraseDictionaryDynamicCacheBased Execute command:|"<< command << "|. Cache cleared." << std::endl);
00696 Clear();
00697 } else {
00698 VERBOSE(2,"PhraseDictionaryDynamicCacheBased Execute command:|"<< command << "| is unknown. Skipped." << std::endl);
00699 }
00700 }
00701
00702
00703 void PhraseDictionaryDynamicCacheBased::Clear()
00704 {
00705 #ifdef WITH_THREADS
00706 boost::shared_lock<boost::shared_mutex> lock(m_cacheLock);
00707 #endif
00708 cacheMap::iterator it;
00709 for(it = m_cacheTM.begin(); it!=m_cacheTM.end(); it++) {
00710 (((*it).second).second)->clear();
00711 delete ((*it).second).second;
00712 ((*it).second).first.reset();
00713 }
00714 m_cacheTM.clear();
00715 m_entries = 0;
00716 }
00717
00718
00719 void PhraseDictionaryDynamicCacheBased::ExecuteDlt(std::map<std::string, std::string> dlt_meta)
00720 {
00721 if (dlt_meta.find("cbtm") != dlt_meta.end()) {
00722 Insert(dlt_meta["cbtm"]);
00723 }
00724 if (dlt_meta.find("cbtm-command") != dlt_meta.end()) {
00725 Execute(dlt_meta["cbtm-command"]);
00726 }
00727 if (dlt_meta.find("cbtm-file") != dlt_meta.end()) {
00728 Load(dlt_meta["cbtm-file"]);
00729 }
00730 if (dlt_meta.find("cbtm-clear-source") != dlt_meta.end()) {
00731 ClearSource(dlt_meta["cbtm-clear-source"]);
00732 }
00733 if (dlt_meta.find("cbtm-clear-entries") != dlt_meta.end()) {
00734 ClearEntries(dlt_meta["cbtm-clear-entries"]);
00735 }
00736 if (dlt_meta.find("cbtm-clear-all") != dlt_meta.end()) {
00737 Clear();
00738 }
00739
00740 }
00741
00742 void PhraseDictionaryDynamicCacheBased::Print() const
00743 {
00744 VERBOSE(2,"PhraseDictionaryDynamicCacheBased::Print()" << std::endl);
00745 #ifdef WITH_THREADS
00746 boost::shared_lock<boost::shared_mutex> read_lock(m_cacheLock);
00747 #endif
00748 cacheMap::const_iterator it;
00749 for(it = m_cacheTM.begin(); it!=m_cacheTM.end(); it++) {
00750 std::string source = (it->first).ToString();
00751 TargetPhraseCollection::shared_ptr tpc = (it->second).first;
00752 TargetPhraseCollection::iterator itr;
00753 for(itr = tpc->begin(); itr != tpc->end(); itr++) {
00754 std::string target = (*itr)->ToString();
00755 std::cout << source << " ||| " << target << std::endl;
00756 }
00757 source.clear();
00758 }
00759 }
00760
00761 }