00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024 #include <string>
00025 #include <boost/algorithm/string/predicate.hpp>
00026
00027 #include "moses/FF/Factory.h"
00028 #include "TypeDef.h"
00029 #include "moses/FF/WordPenaltyProducer.h"
00030 #include "moses/FF/UnknownWordPenaltyProducer.h"
00031 #include "moses/FF/InputFeature.h"
00032 #include "moses/FF/DynamicCacheBasedLanguageModel.h"
00033 #include "moses/TranslationModel/PhraseDictionaryDynamicCacheBased.h"
00034
00035 #include "DecodeStepTranslation.h"
00036 #include "DecodeStepGeneration.h"
00037 #include "GenerationDictionary.h"
00038 #include "StaticData.h"
00039 #include "Util.h"
00040 #include "FactorCollection.h"
00041 #include "Timer.h"
00042 #include "TranslationOption.h"
00043 #include "DecodeGraph.h"
00044 #include "InputFileStream.h"
00045 #include "ScoreComponentCollection.h"
00046 #include "DecodeGraph.h"
00047 #include "TranslationModel/PhraseDictionary.h"
00048 #include "TranslationModel/PhraseDictionaryTreeAdaptor.h"
00049
00050 #ifdef WITH_THREADS
00051 #include <boost/thread.hpp>
00052 #endif
00053 #ifdef HAVE_CMPH
00054 #include "moses/TranslationModel/CompactPT/PhraseDictionaryCompact.h"
00055 #endif
00056 #if defined HAVE_CMPH
00057 #include "moses/TranslationModel/CompactPT/LexicalReorderingTableCompact.h"
00058 #endif
00059
00060 using namespace std;
00061 using namespace boost::algorithm;
00062
00063 namespace Moses
00064 {
00065 StaticData StaticData::s_instance;
00066
00067 StaticData::StaticData()
00068 : m_options(new AllOptions)
00069 , m_requireSortingAfterSourceContext(false)
00070 , m_currentWeightSetting("default")
00071 , m_treeStructure(NULL)
00072 , m_coordSpaceNextID(1)
00073 {
00074 Phrase::InitializeMemPool();
00075 }
00076
00077 StaticData::~StaticData()
00078 {
00079 RemoveAllInColl(m_decodeGraphs);
00080 Phrase::FinalizeMemPool();
00081 }
00082
00083 bool StaticData::LoadDataStatic(Parameter *parameter, const std::string &execPath)
00084 {
00085 s_instance.SetExecPath(execPath);
00086 return s_instance.LoadData(parameter);
00087 }
00088
00089 void
00090 StaticData
00091 ::initialize_features()
00092 {
00093 std::map<std::string, std::string> featureNameOverride = OverrideFeatureNames();
00094
00095 map<string, int> featureIndexMap;
00096
00097 const PARAM_VEC* params = m_parameter->GetParam("feature");
00098 for (size_t i = 0; params && i < params->size(); ++i) {
00099 const string &line = Trim(params->at(i));
00100 VERBOSE(1,"line=" << line << endl);
00101 if (line.empty())
00102 continue;
00103
00104 vector<string> toks = Tokenize(line);
00105
00106 string &feature = toks[0];
00107 std::map<std::string, std::string>::const_iterator iter
00108 = featureNameOverride.find(feature);
00109 if (iter == featureNameOverride.end()) {
00110
00111 m_registry.Construct(feature, line);
00112 } else {
00113
00114 string newName = iter->second;
00115 feature = newName;
00116 string newLine = Join(" ", toks);
00117 m_registry.Construct(newName, newLine);
00118 }
00119 }
00120
00121 NoCache();
00122 OverrideFeatures();
00123
00124 }
00125
00126 bool
00127 StaticData
00128 ::ini_output_options()
00129 {
00130
00131 m_parameter->SetParameter(m_verboseLevel, "verbose", (size_t) 1);
00132 m_parameter->SetParameter<string>(m_outputUnknownsFile,
00133 "output-unknowns", "");
00134 return true;
00135 }
00136
00137
00138 bool
00139 StaticData
00140 ::ini_performance_options()
00141 {
00142 const PARAM_VEC *params;
00143
00144 m_threadCount = 1;
00145 params = m_parameter->GetParam("threads");
00146 if (params && params->size()) {
00147 if (params->at(0) == "all") {
00148 #ifdef WITH_THREADS
00149 m_threadCount = boost::thread::hardware_concurrency();
00150 if (!m_threadCount) {
00151 std::cerr << "-threads all specified but Boost doesn't know how many cores there are";
00152 return false;
00153 }
00154 #else
00155 std::cerr << "-threads all specified but moses not built with thread support";
00156 return false;
00157 #endif
00158 } else {
00159 m_threadCount = Scan<int>(params->at(0));
00160 if (m_threadCount < 1) {
00161 std::cerr << "Specify at least one thread.";
00162 return false;
00163 }
00164 #ifndef WITH_THREADS
00165 if (m_threadCount > 1) {
00166 std::cerr << "Error: Thread count of " << params->at(0)
00167 << " but moses not built with thread support";
00168 return false;
00169 }
00170 #endif
00171 }
00172 }
00173 return true;
00174 }
00175
00176 bool StaticData::LoadData(Parameter *parameter)
00177 {
00178 m_parameter = parameter;
00179
00180 const PARAM_VEC *params;
00181
00182 m_options->init(*parameter);
00183 if (is_syntax(m_options->search.algo))
00184 m_options->syntax.LoadNonTerminals(*parameter, FactorCollection::Instance());
00185
00186 if (is_syntax(m_options->search.algo))
00187 LoadChartDecodingParameters();
00188
00189
00190
00191
00192 m_parameter->SetParameter<string>(m_factorDelimiter, "factor-delimiter", "|");
00193 m_parameter->SetParameter<size_t>(m_lmcache_cleanup_threshold, "clean-lm-cache", 1);
00194
00195 m_bookkeeping_options.init(*parameter);
00196 if (!ini_output_options()) return false;
00197
00198
00199 if (!ini_performance_options()) return false;
00200
00201
00202
00203
00204 #if defined HAVE_CMPH
00205 LexicalReorderingTableCompact::SetStaticDefaultParameters(*parameter);
00206 PhraseDictionaryCompact::SetStaticDefaultParameters(*parameter);
00207 #endif
00208
00209 initialize_features();
00210
00211 if (m_parameter->GetParam("show-weights") == NULL)
00212 LoadFeatureFunctions();
00213
00214 LoadDecodeGraphs();
00215
00216
00217 if (!CheckWeights()) return false;
00218
00219
00220 string weightFile;
00221 m_parameter->SetParameter<string>(weightFile, "weight-file", "");
00222 if (!weightFile.empty()) {
00223 ScoreComponentCollection extraWeights;
00224 if (!extraWeights.Load(weightFile)) {
00225 std::cerr << "Unable to load weights from " << weightFile;
00226 return false;
00227 }
00228 m_allWeights.PlusEquals(extraWeights);
00229 }
00230
00231
00232 LoadSparseWeightsFromConfig();
00233
00234
00235
00236
00237
00238
00239
00240
00241
00242
00243
00244
00245 params = m_parameter->GetParam("alternate-weight-setting");
00246 if (params && params->size() && !LoadAlternateWeightSettings())
00247 return false;
00248
00249 return true;
00250 }
00251
00252 void StaticData::SetWeight(const FeatureFunction* sp, float weight)
00253 {
00254 m_allWeights.Resize();
00255 m_allWeights.Assign(sp,weight);
00256 }
00257
00258 void StaticData::SetWeights(const FeatureFunction* sp,
00259 const std::vector<float>& weights)
00260 {
00261 m_allWeights.Resize();
00262 m_allWeights.Assign(sp,weights);
00263 }
00264
00265 void StaticData::LoadNonTerminals()
00266 {
00267 string defaultNonTerminals;
00268 m_parameter->SetParameter<string>(defaultNonTerminals, "non-terminals", "X");
00269
00270 FactorCollection &factorCollection = FactorCollection::Instance();
00271
00272 m_inputDefaultNonTerminal.SetIsNonTerminal(true);
00273 const Factor *sourceFactor = factorCollection.AddFactor(Input, 0, defaultNonTerminals, true);
00274 m_inputDefaultNonTerminal.SetFactor(0, sourceFactor);
00275
00276 m_outputDefaultNonTerminal.SetIsNonTerminal(true);
00277 const Factor *targetFactor = factorCollection.AddFactor(Output, 0, defaultNonTerminals, true);
00278 m_outputDefaultNonTerminal.SetFactor(0, targetFactor);
00279
00280
00281 const PARAM_VEC *params = m_parameter->GetParam("unknown-lhs");
00282 if (params == NULL || params->size() == 0) {
00283 UnknownLHSEntry entry(defaultNonTerminals, 0.0f);
00284 m_unknownLHS.push_back(entry);
00285 } else {
00286 const string &filePath = params->at(0);
00287
00288 InputFileStream inStream(filePath);
00289 string line;
00290 while(getline(inStream, line)) {
00291 vector<string> tokens = Tokenize(line);
00292 UTIL_THROW_IF2(tokens.size() != 2,
00293 "Incorrect unknown LHS format: " << line);
00294 UnknownLHSEntry entry(tokens[0], Scan<float>(tokens[1]));
00295 m_unknownLHS.push_back(entry);
00296
00297 factorCollection.AddFactor(Output, 0, tokens[0], true);
00298 }
00299
00300 }
00301
00302 }
00303
00304 void StaticData::LoadChartDecodingParameters()
00305 {
00306 LoadNonTerminals();
00307
00308
00309 m_parameter->SetParameter(m_sourceLabelOverlap, "source-label-overlap",
00310 SourceLabelOverlapAdd);
00311
00312 }
00313
00314 void StaticData::LoadDecodeGraphs()
00315 {
00316 vector<string> mappingVector;
00317 vector<size_t> maxChartSpans;
00318
00319 const PARAM_VEC *params;
00320
00321 params = m_parameter->GetParam("mapping");
00322 if (params && params->size()) {
00323 mappingVector = *params;
00324 } else {
00325 mappingVector.assign(1,"0 T 0");
00326 }
00327
00328 params = m_parameter->GetParam("max-chart-span");
00329 if (params && params->size()) {
00330 maxChartSpans = Scan<size_t>(*params);
00331 }
00332
00333 vector<string> toks = Tokenize(mappingVector[0]);
00334 if (toks.size() == 3) {
00335
00336 LoadDecodeGraphsOld(mappingVector, maxChartSpans);
00337 } else if (toks.size() == 2) {
00338 if (toks[0] == "T" || toks[0] == "G") {
00339
00340 LoadDecodeGraphsOld(mappingVector, maxChartSpans);
00341 } else {
00342
00343 LoadDecodeGraphsNew(mappingVector, maxChartSpans);
00344 }
00345 } else {
00346 UTIL_THROW(util::Exception, "Malformed mapping");
00347 }
00348 }
00349
00350 void
00351 StaticData::
00352 LoadDecodeGraphsOld(const vector<string> &mappingVector,
00353 const vector<size_t> &maxChartSpans)
00354 {
00355 const vector<PhraseDictionary*>& pts = PhraseDictionary::GetColl();
00356 const vector<GenerationDictionary*>& gens = GenerationDictionary::GetColl();
00357
00358 const std::vector<FeatureFunction*> *featuresRemaining
00359 = &FeatureFunction::GetFeatureFunctions();
00360 DecodeStep *prev = 0;
00361 size_t prevDecodeGraphInd = 0;
00362
00363 for(size_t i=0; i<mappingVector.size(); i++) {
00364 vector<string> token = Tokenize(mappingVector[i]);
00365 size_t decodeGraphInd;
00366 DecodeType decodeType;
00367 size_t index;
00368 if (token.size() == 2) {
00369
00370 decodeGraphInd = 0;
00371 decodeType = token[0] == "T" ? Translate : Generate;
00372 index = Scan<size_t>(token[1]);
00373 } else if (token.size() == 3) {
00374
00375
00376 decodeGraphInd = Scan<size_t>(token[0]);
00377
00378 UTIL_THROW_IF2(decodeGraphInd != prevDecodeGraphInd
00379 && decodeGraphInd != prevDecodeGraphInd + 1,
00380 "Malformed mapping");
00381 if (decodeGraphInd > prevDecodeGraphInd) {
00382 prev = NULL;
00383 }
00384
00385 if (prevDecodeGraphInd < decodeGraphInd) {
00386 featuresRemaining = &FeatureFunction::GetFeatureFunctions();
00387 }
00388
00389 decodeType = token[1] == "T" ? Translate : Generate;
00390 index = Scan<size_t>(token[2]);
00391 } else {
00392 UTIL_THROW(util::Exception, "Malformed mapping");
00393 }
00394
00395 DecodeStep* decodeStep = NULL;
00396 switch (decodeType) {
00397 case Translate:
00398 if(index>=pts.size()) {
00399 util::StringStream strme;
00400 strme << "No phrase dictionary with index "
00401 << index << " available!";
00402 UTIL_THROW(util::Exception, strme.str());
00403 }
00404 decodeStep = new DecodeStepTranslation(pts[index], prev, *featuresRemaining);
00405 break;
00406 case Generate:
00407 if(index>=gens.size()) {
00408 util::StringStream strme;
00409 strme << "No generation dictionary with index "
00410 << index << " available!";
00411 UTIL_THROW(util::Exception, strme.str());
00412 }
00413 decodeStep = new DecodeStepGeneration(gens[index], prev, *featuresRemaining);
00414 break;
00415 default:
00416 UTIL_THROW(util::Exception, "Unknown decode step");
00417 break;
00418 }
00419
00420 featuresRemaining = &decodeStep->GetFeaturesRemaining();
00421
00422 UTIL_THROW_IF2(decodeStep == NULL, "Null decode step");
00423 if (m_decodeGraphs.size() < decodeGraphInd + 1) {
00424 DecodeGraph *decodeGraph;
00425 if (is_syntax(m_options->search.algo)) {
00426 size_t maxChartSpan;
00427 if (decodeGraphInd < maxChartSpans.size()) {
00428 maxChartSpan = maxChartSpans[decodeGraphInd];
00429 VERBOSE(1,"max-chart-span: " << maxChartSpans[decodeGraphInd] << endl);
00430 } else {
00431 maxChartSpan = DEFAULT_MAX_CHART_SPAN;
00432 }
00433 decodeGraph = new DecodeGraph(m_decodeGraphs.size(), maxChartSpan);
00434 } else {
00435 decodeGraph = new DecodeGraph(m_decodeGraphs.size());
00436 }
00437
00438 m_decodeGraphs.push_back(decodeGraph);
00439 }
00440
00441 m_decodeGraphs[decodeGraphInd]->Add(decodeStep);
00442 prev = decodeStep;
00443 prevDecodeGraphInd = decodeGraphInd;
00444 }
00445
00446
00447
00448
00449 const vector<string> *backoffVector = m_parameter->GetParam("decoding-graph-backoff");
00450 for(size_t i=0; i<m_decodeGraphs.size() && backoffVector && i<backoffVector->size(); i++) {
00451 DecodeGraph &decodeGraph = *m_decodeGraphs[i];
00452
00453 if (i < backoffVector->size()) {
00454 decodeGraph.SetBackoff(Scan<size_t>(backoffVector->at(i)));
00455 }
00456 }
00457 }
00458
00459 void StaticData::LoadDecodeGraphsNew(const std::vector<std::string> &mappingVector, const std::vector<size_t> &maxChartSpans)
00460 {
00461 const std::vector<FeatureFunction*> *featuresRemaining = &FeatureFunction::GetFeatureFunctions();
00462 DecodeStep *prev = 0;
00463 size_t prevDecodeGraphInd = 0;
00464
00465 for(size_t i=0; i<mappingVector.size(); i++) {
00466 vector<string> token = Tokenize(mappingVector[i]);
00467 size_t decodeGraphInd;
00468
00469 decodeGraphInd = Scan<size_t>(token[0]);
00470
00471 UTIL_THROW_IF2(decodeGraphInd != prevDecodeGraphInd
00472 && decodeGraphInd != prevDecodeGraphInd + 1,
00473 "Malformed mapping");
00474 if (decodeGraphInd > prevDecodeGraphInd) {
00475 prev = NULL;
00476 }
00477
00478 if (prevDecodeGraphInd < decodeGraphInd) {
00479 featuresRemaining = &FeatureFunction::GetFeatureFunctions();
00480 }
00481
00482 FeatureFunction &ff = FeatureFunction::FindFeatureFunction(token[1]);
00483
00484 DecodeStep* decodeStep = NULL;
00485 if (typeid(ff) == typeid(PhraseDictionary)) {
00486 decodeStep = new DecodeStepTranslation(&static_cast<PhraseDictionary&>(ff), prev, *featuresRemaining);
00487 } else if (typeid(ff) == typeid(GenerationDictionary)) {
00488 decodeStep = new DecodeStepGeneration(&static_cast<GenerationDictionary&>(ff), prev, *featuresRemaining);
00489 } else {
00490 UTIL_THROW(util::Exception, "Unknown decode step");
00491 }
00492
00493 featuresRemaining = &decodeStep->GetFeaturesRemaining();
00494
00495 UTIL_THROW_IF2(decodeStep == NULL, "Null decode step");
00496 if (m_decodeGraphs.size() < decodeGraphInd + 1) {
00497 DecodeGraph *decodeGraph;
00498 if (is_syntax(m_options->search.algo)) {
00499 size_t maxChartSpan = (decodeGraphInd < maxChartSpans.size()) ? maxChartSpans[decodeGraphInd] : DEFAULT_MAX_CHART_SPAN;
00500 VERBOSE(1,"max-chart-span: " << maxChartSpans[decodeGraphInd] << endl);
00501 decodeGraph = new DecodeGraph(m_decodeGraphs.size(), maxChartSpan);
00502 } else {
00503 decodeGraph = new DecodeGraph(m_decodeGraphs.size());
00504 }
00505
00506 m_decodeGraphs.push_back(decodeGraph);
00507 }
00508
00509 m_decodeGraphs[decodeGraphInd]->Add(decodeStep);
00510 prev = decodeStep;
00511 prevDecodeGraphInd = decodeGraphInd;
00512 }
00513
00514
00515
00516
00517 const vector<string> *backoffVector = m_parameter->GetParam("decoding-graph-backoff");
00518 for(size_t i=0; i<m_decodeGraphs.size() && backoffVector && i<backoffVector->size(); i++) {
00519 DecodeGraph &decodeGraph = *m_decodeGraphs[i];
00520
00521 if (i < backoffVector->size()) {
00522 decodeGraph.SetBackoff(Scan<size_t>(backoffVector->at(i)));
00523 }
00524 }
00525
00526 }
00527
00528 void StaticData::ReLoadBleuScoreFeatureParameter(float weight)
00529 {
00530
00531 const std::vector<FeatureFunction*> &producers = FeatureFunction::GetFeatureFunctions();
00532 for(size_t i=0; i<producers.size(); ++i) {
00533 FeatureFunction *ff = producers[i];
00534 std::string ffName = ff->GetScoreProducerDescription();
00535
00536 if (ffName == "BleuScoreFeature") {
00537 SetWeight(ff, weight);
00538 break;
00539 }
00540 }
00541 }
00542
00543
00544
00545
00546 void StaticData::SetExecPath(const std::string &path)
00547 {
00548
00549 size_t pos = path.rfind("/");
00550 if (pos != string::npos) {
00551 m_binPath = path.substr(0, pos);
00552 }
00553 VERBOSE(1,m_binPath << endl);
00554 }
00555
00556 const string &StaticData::GetBinDirectory() const
00557 {
00558 return m_binPath;
00559 }
00560
00561 float StaticData::GetWeightWordPenalty() const
00562 {
00563 float weightWP = GetWeight(&WordPenaltyProducer::Instance());
00564 return weightWP;
00565 }
00566
00567 void
00568 StaticData::
00569 InitializeForInput(ttasksptr const& ttask) const
00570 {
00571 const std::vector<FeatureFunction*> &producers
00572 = FeatureFunction::GetFeatureFunctions();
00573 for(size_t i=0; i<producers.size(); ++i) {
00574 FeatureFunction &ff = *producers[i];
00575 if (! IsFeatureFunctionIgnored(ff)) {
00576 Timer iTime;
00577 iTime.start();
00578 ff.InitializeForInput(ttask);
00579 VERBOSE(3,"InitializeForInput( " << ff.GetScoreProducerDescription()
00580 << " )" << "= " << iTime << endl);
00581 }
00582 }
00583 }
00584
00585 void
00586 StaticData::
00587 CleanUpAfterSentenceProcessing(ttasksptr const& ttask) const
00588 {
00589 const std::vector<FeatureFunction*> &producers
00590 = FeatureFunction::GetFeatureFunctions();
00591 for(size_t i=0; i<producers.size(); ++i) {
00592 FeatureFunction &ff = *producers[i];
00593 if (! IsFeatureFunctionIgnored(ff)) {
00594 ff.CleanUpAfterSentenceProcessing(ttask);
00595 }
00596 }
00597 }
00598
00599 void StaticData::LoadFeatureFunctions()
00600 {
00601 const std::vector<FeatureFunction*> &ffs = FeatureFunction::GetFeatureFunctions();
00602 std::vector<FeatureFunction*>::const_iterator iter;
00603 for (iter = ffs.begin(); iter != ffs.end(); ++iter) {
00604 FeatureFunction *ff = *iter;
00605 bool doLoad = true;
00606
00607 if (ff->RequireSortingAfterSourceContext()) {
00608 m_requireSortingAfterSourceContext = true;
00609 }
00610
00611 if (dynamic_cast<PhraseDictionary*>(ff)) {
00612 doLoad = false;
00613 }
00614
00615 if (doLoad) {
00616 VERBOSE(1, "Loading " << ff->GetScoreProducerDescription() << endl);
00617 ff->Load(options());
00618 }
00619 }
00620
00621 const std::vector<PhraseDictionary*> &pts = PhraseDictionary::GetColl();
00622 for (size_t i = 0; i < pts.size(); ++i) {
00623 PhraseDictionary *pt = pts[i];
00624 VERBOSE(1, "Loading " << pt->GetScoreProducerDescription() << endl);
00625 pt->Load(options());
00626 }
00627
00628 CheckLEGACYPT();
00629 }
00630
00631 bool StaticData::CheckWeights() const
00632 {
00633 set<string> weightNames = m_parameter->GetWeightNames();
00634 set<string> featureNames;
00635
00636 const std::vector<FeatureFunction*> &ffs = FeatureFunction::GetFeatureFunctions();
00637 for (size_t i = 0; i < ffs.size(); ++i) {
00638 const FeatureFunction &ff = *ffs[i];
00639 const string &descr = ff.GetScoreProducerDescription();
00640 featureNames.insert(descr);
00641
00642 set<string>::iterator iter = weightNames.find(descr);
00643 if (iter == weightNames.end()) {
00644 cerr << "Can't find weights for feature function " << descr << endl;
00645 } else {
00646 weightNames.erase(iter);
00647 }
00648 }
00649
00650
00651 if (!weightNames.empty()) {
00652 set<string>::iterator iter;
00653 for (iter = weightNames.begin(); iter != weightNames.end(); ) {
00654 string fname = (*iter).substr(0, (*iter).find("_"));
00655 VERBOSE(1,fname << "\n");
00656 if (featureNames.find(fname) != featureNames.end()) {
00657 weightNames.erase(iter++);
00658 } else {
00659 ++iter;
00660 }
00661 }
00662 }
00663
00664 if (!weightNames.empty()) {
00665 cerr << "The following weights have no feature function. "
00666 << "Maybe incorrectly spelt weights: ";
00667 set<string>::iterator iter;
00668 for (iter = weightNames.begin(); iter != weightNames.end(); ++iter) {
00669 cerr << *iter << ",";
00670 }
00671 return false;
00672 }
00673
00674 return true;
00675 }
00676
00677
00678 void StaticData::LoadSparseWeightsFromConfig()
00679 {
00680 set<string> featureNames;
00681 const std::vector<FeatureFunction*> &ffs = FeatureFunction::GetFeatureFunctions();
00682 for (size_t i = 0; i < ffs.size(); ++i) {
00683 const FeatureFunction &ff = *ffs[i];
00684 const string &descr = ff.GetScoreProducerDescription();
00685 featureNames.insert(descr);
00686 }
00687
00688 const std::map<std::string, std::vector<float> > &weights = m_parameter->GetAllWeights();
00689 std::map<std::string, std::vector<float> >::const_iterator iter;
00690 for (iter = weights.begin(); iter != weights.end(); ++iter) {
00691
00692 if (featureNames.find(iter->first) == featureNames.end()) {
00693 UTIL_THROW_IF2(iter->second.size() != 1, "ERROR: only one weight per sparse feature allowed: " << iter->first);
00694 m_allWeights.Assign(iter->first, iter->second[0]);
00695 }
00696 }
00697
00698 }
00699
00700
00702 bool StaticData::LoadAlternateWeightSettings()
00703 {
00704 if (m_threadCount > 1) {
00705 cerr << "ERROR: alternative weight settings currently not supported with multi-threading.";
00706 return false;
00707 }
00708
00709 vector<string> weightSpecification;
00710 const PARAM_VEC *params = m_parameter->GetParam("alternate-weight-setting");
00711 if (params && params->size()) {
00712 weightSpecification = *params;
00713 }
00714
00715
00716 map<string,FeatureFunction*> nameToFF;
00717 const std::vector<FeatureFunction*> &ffs = FeatureFunction::GetFeatureFunctions();
00718 for (size_t i = 0; i < ffs.size(); ++i) {
00719 nameToFF[ ffs[i]->GetScoreProducerDescription() ] = ffs[i];
00720 }
00721
00722
00723 m_weightSetting["default"] = new ScoreComponentCollection( m_allWeights );
00724
00725
00726 string currentId = "";
00727 bool hasErrors = false;
00728 for (size_t i=0; i<weightSpecification.size(); ++i) {
00729
00730
00731 if (weightSpecification[i].find("id=") == 0) {
00732 vector<string> tokens = Tokenize(weightSpecification[i]);
00733 vector<string> args = Tokenize(tokens[0], "=");
00734 currentId = args[1];
00735 VERBOSE(1,"alternate weight setting " << currentId << endl);
00736 UTIL_THROW_IF2(m_weightSetting.find(currentId) != m_weightSetting.end(),
00737 "Duplicate alternate weight id: " << currentId);
00738 m_weightSetting[ currentId ] = new ScoreComponentCollection;
00739
00740
00741 for(size_t j=1; j<tokens.size(); j++) {
00742 vector<string> args = Tokenize(tokens[j], "=");
00743
00744 if (args[0] == "weight-file") {
00745 if (args.size() != 2) {
00746 std::cerr << "One argument should be supplied for weight-file";
00747 return false;
00748 }
00749 ScoreComponentCollection extraWeights;
00750 if (!extraWeights.Load(args[1])) {
00751 std::cerr << "Unable to load weights from " << args[1];
00752 return false;
00753 }
00754 m_weightSetting[ currentId ]->PlusEquals(extraWeights);
00755 }
00756
00757 else if (args[0] == "ignore-ff") {
00758 set< string > *ffNameSet = new set< string >;
00759 m_weightSettingIgnoreFF[ currentId ] = *ffNameSet;
00760 vector<string> featureFunctionName = Tokenize(args[1], ",");
00761 for(size_t k=0; k<featureFunctionName.size(); k++) {
00762
00763 map<string,FeatureFunction*>::iterator ffLookUp = nameToFF.find(featureFunctionName[k]);
00764 if (ffLookUp == nameToFF.end()) {
00765 cerr << "ERROR: alternate weight setting " << currentId
00766 << " specifies to ignore feature function " << featureFunctionName[k]
00767 << " but there is no such feature function" << endl;
00768 hasErrors = true;
00769 } else {
00770 m_weightSettingIgnoreFF[ currentId ].insert( featureFunctionName[k] );
00771 }
00772 }
00773 }
00774 }
00775 }
00776
00777
00778 else {
00779 UTIL_THROW_IF2(currentId.empty(), "No alternative weights specified");
00780 vector<string> tokens = Tokenize(weightSpecification[i]);
00781 UTIL_THROW_IF2(tokens.size() < 2
00782 , "Incorrect format for alternate weights: " << weightSpecification[i]);
00783
00784
00785 string name = tokens[0];
00786 name = name.substr(0, name.size() - 1);
00787 vector<float> weights(tokens.size() - 1);
00788 for (size_t i = 1; i < tokens.size(); ++i) {
00789 float weight = Scan<float>(tokens[i]);
00790 weights[i - 1] = weight;
00791 }
00792
00793
00794 map<string,FeatureFunction*>::iterator ffLookUp = nameToFF.find(name);
00795 if (ffLookUp == nameToFF.end()) {
00796 cerr << "ERROR: alternate weight setting " << currentId
00797 << " specifies weight(s) for " << name
00798 << " but there is no such feature function" << endl;
00799 hasErrors = true;
00800 } else {
00801 m_weightSetting[ currentId ]->Assign( nameToFF[name], weights);
00802 }
00803 }
00804 }
00805 UTIL_THROW_IF2(hasErrors, "Errors loading alternate weights");
00806 return true;
00807 }
00808
00809 void StaticData::NoCache()
00810 {
00811 bool noCache;
00812 m_parameter->SetParameter(noCache, "no-cache", false );
00813
00814 if (noCache) {
00815 const std::vector<PhraseDictionary*> &pts = PhraseDictionary::GetColl();
00816 for (size_t i = 0; i < pts.size(); ++i) {
00817 PhraseDictionary &pt = *pts[i];
00818 pt.SetParameter("cache-size", "0");
00819 }
00820 }
00821 }
00822
00823 std::map<std::string, std::string>
00824 StaticData
00825 ::OverrideFeatureNames()
00826 {
00827 std::map<std::string, std::string> ret;
00828
00829 const PARAM_VEC *params = m_parameter->GetParam("feature-name-overwrite");
00830 if (params && params->size()) {
00831 UTIL_THROW_IF2(params->size() != 1, "Only provide 1 line in the section [feature-name-overwrite]");
00832 vector<string> toks = Tokenize(params->at(0));
00833 UTIL_THROW_IF2(toks.size() % 2 != 0, "Format of -feature-name-overwrite must be [old-name new-name]*");
00834
00835 for (size_t i = 0; i < toks.size(); i += 2) {
00836 const string &oldName = toks[i];
00837 const string &newName = toks[i+1];
00838 ret[oldName] = newName;
00839 }
00840 }
00841
00842
00843
00844 SearchAlgorithm algo = m_options->search.algo;
00845 if (algo == SyntaxS2T || algo == SyntaxT2S ||
00846 algo == SyntaxT2S_SCFG || algo == SyntaxF2S) {
00847
00848
00849
00850
00851 ret["PhraseDictionaryMemory"] = "RuleTable";
00852 ret["PhraseDictionaryScope3"] = "RuleTable";
00853 }
00854
00855 return ret;
00856 }
00857
00858 void StaticData::OverrideFeatures()
00859 {
00860 const PARAM_VEC *params = m_parameter->GetParam("feature-overwrite");
00861 for (size_t i = 0; params && i < params->size(); ++i) {
00862 const string &str = params->at(i);
00863 vector<string> toks = Tokenize(str);
00864 UTIL_THROW_IF2(toks.size() <= 1, "Incorrect format for feature override: " << str);
00865
00866 FeatureFunction &ff = FeatureFunction::FindFeatureFunction(toks[0]);
00867
00868 for (size_t j = 1; j < toks.size(); ++j) {
00869 const string &keyValStr = toks[j];
00870 vector<string> keyVal = Tokenize(keyValStr, "=");
00871 UTIL_THROW_IF2(keyVal.size() != 2, "Incorrect format for parameter override: " << keyValStr);
00872
00873 VERBOSE(1, "Override " << ff.GetScoreProducerDescription() << " "
00874 << keyVal[0] << "=" << keyVal[1] << endl);
00875
00876 ff.SetParameter(keyVal[0], keyVal[1]);
00877
00878 }
00879 }
00880
00881 }
00882
00883 void StaticData::CheckLEGACYPT()
00884 {
00885 const std::vector<PhraseDictionary*> &pts = PhraseDictionary::GetColl();
00886 for (size_t i = 0; i < pts.size(); ++i) {
00887 const PhraseDictionary *phraseDictionary = pts[i];
00888 if (dynamic_cast<const PhraseDictionaryTreeAdaptor*>(phraseDictionary) != NULL) {
00889 m_useLegacyPT = true;
00890 return;
00891 }
00892 }
00893
00894 m_useLegacyPT = false;
00895 }
00896
00897
00898 void StaticData::ResetWeights(const std::string &denseWeights, const std::string &sparseFile)
00899 {
00900 m_allWeights = ScoreComponentCollection();
00901
00902
00903 string name("");
00904 vector<float> weights;
00905 vector<string> toks = Tokenize(denseWeights);
00906 for (size_t i = 0; i < toks.size(); ++i) {
00907 const string &tok = toks[i];
00908
00909 if (ends_with(tok, "=")) {
00910
00911
00912 if (name != "") {
00913
00914 const FeatureFunction &ff = FeatureFunction::FindFeatureFunction(name);
00915 m_allWeights.Assign(&ff, weights);
00916 weights.clear();
00917 }
00918
00919 name = tok.substr(0, tok.size() - 1);
00920 } else {
00921
00922 float weight = Scan<float>(toks[i]);
00923 weights.push_back(weight);
00924 }
00925 }
00926
00927 const FeatureFunction &ff = FeatureFunction::FindFeatureFunction(name);
00928 m_allWeights.Assign(&ff, weights);
00929
00930
00931 InputFileStream sparseStrme(sparseFile);
00932 string line;
00933 while (getline(sparseStrme, line)) {
00934 vector<string> toks = Tokenize(line);
00935 UTIL_THROW_IF2(toks.size() != 2, "Incorrect sparse weight format. Should be FFName_spareseName weight");
00936
00937 vector<string> names = Tokenize(toks[0], "_");
00938 UTIL_THROW_IF2(names.size() != 2, "Incorrect sparse weight name. Should be FFName_spareseName");
00939
00940 const FeatureFunction &ff = FeatureFunction::FindFeatureFunction(names[0]);
00941 m_allWeights.Assign(&ff, names[1], Scan<float>(toks[1]));
00942 }
00943 }
00944
00945 size_t StaticData::GetCoordSpace(string space) const
00946 {
00947 map<string const, size_t>::const_iterator m = m_coordSpaceMap.find(space);
00948 if(m == m_coordSpaceMap.end()) {
00949 return 0;
00950 }
00951 return m->second;
00952 }
00953
00954 size_t StaticData::MapCoordSpace(string space)
00955 {
00956 map<string const, size_t>::const_iterator m = m_coordSpaceMap.find(space);
00957 if (m != m_coordSpaceMap.end()) {
00958 return m->second;
00959 }
00960 size_t id = m_coordSpaceNextID;
00961 m_coordSpaceNextID += 1;
00962 m_coordSpaceMap[space] = id;
00963 return id;
00964 }
00965
00966 }