00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022 #ifndef moses_StaticData_h
00023 #define moses_StaticData_h
00024
00025 #include <stdexcept>
00026 #include <limits>
00027 #include <list>
00028 #include <vector>
00029 #include <map>
00030 #include <memory>
00031 #include <utility>
00032 #include <fstream>
00033 #include <string>
00034
00035 #ifdef WITH_THREADS
00036 #include <boost/thread/mutex.hpp>
00037 #endif
00038
00039 #include "TypeDef.h"
00040 #include "ScoreIndexManager.h"
00041 #include "FactorCollection.h"
00042 #include "Parameter.h"
00043 #include "LM/Base.h"
00044 #include "LMList.h"
00045 #include "SentenceStats.h"
00046 #include "DecodeGraph.h"
00047 #include "TranslationOptionList.h"
00048 #include "TranslationSystem.h"
00049
00050 namespace Moses
00051 {
00052
00053 class InputType;
00054 class LexicalReordering;
00055 class GlobalLexicalModel;
00056 class PhraseDictionaryFeature;
00057 class GenerationDictionary;
00058 class DistortionScoreProducer;
00059 class DecodeStep;
00060 class UnknownWordPenaltyProducer;
00061 #ifdef HAVE_SYNLM
00062 class SyntacticLanguageModel;
00063 #endif
00064 class TranslationSystem;
00065
00066 typedef std::pair<std::string, float> UnknownLHSEntry;
00067 typedef std::vector<UnknownLHSEntry> UnknownLHSList;
00068
00070 class StaticData
00071 {
00072 private:
00073 static StaticData s_instance;
00074 protected:
00075
00076 std::map<long,Phrase> m_constraints;
00077 std::vector<PhraseDictionaryFeature*> m_phraseDictionary;
00078 std::vector<GenerationDictionary*> m_generationDictionary;
00079 Parameter *m_parameter;
00080 std::vector<FactorType> m_inputFactorOrder, m_outputFactorOrder;
00081 LMList m_languageModel;
00082 #ifdef HAVE_SYNLM
00083 SyntacticLanguageModel* m_syntacticLanguageModel;
00084 #endif
00085 ScoreIndexManager m_scoreIndexManager;
00086 std::vector<float> m_allWeights;
00087 std::vector<LexicalReordering*> m_reorderModels;
00088 std::vector<GlobalLexicalModel*> m_globalLexicalModels;
00089 std::vector<DecodeGraph*> m_decodeGraphs;
00090 std::vector<size_t> m_decodeGraphBackoff;
00091
00092
00093 std::map<std::string, TranslationSystem> m_translationSystems;
00094 float
00095 m_beamWidth,
00096 m_earlyDiscardingThreshold,
00097 m_translationOptionThreshold,
00098 m_wordDeletionWeight;
00099
00100
00101 int m_maxDistortion;
00102
00103
00104
00105 bool m_reorderingConstraint;
00106 size_t
00107 m_maxHypoStackSize
00108 , m_minHypoStackDiversity
00109 , m_nBestSize
00110 , m_latticeSamplesSize
00111 , m_nBestFactor
00112 , m_maxNoTransOptPerCoverage
00113 , m_maxNoPartTransOpt
00114 , m_maxPhraseLength
00115 , m_numLinkParams;
00116
00117 std::string
00118 m_constraintFileName;
00119
00120 std::string m_nBestFilePath, m_latticeSamplesFilePath;
00121 bool m_fLMsLoaded, m_labeledNBestList,m_nBestIncludesAlignment;
00122 bool m_dropUnknown;
00123 bool m_wordDeletionEnabled;
00124
00125 bool m_disableDiscarding;
00126 bool m_printAllDerivations;
00127
00128 bool m_sourceStartPosMattersForRecombination;
00129 bool m_recoverPath;
00130 bool m_outputHypoScore;
00131
00132 ParsingAlgorithm m_parsingAlgorithm;
00133 SearchAlgorithm m_searchAlgorithm;
00134 InputTypeEnum m_inputType;
00135 size_t m_numInputScores;
00136
00137 mutable size_t m_verboseLevel;
00138 std::vector<WordPenaltyProducer*> m_wordPenaltyProducers;
00139 std::vector<DistortionScoreProducer *> m_distortionScoreProducers;
00140 UnknownWordPenaltyProducer *m_unknownWordPenaltyProducer;
00141 bool m_reportSegmentation;
00142 bool m_reportAllFactors;
00143 bool m_reportAllFactorsNBest;
00144 std::string m_detailedTranslationReportingFilePath;
00145 bool m_onlyDistinctNBest;
00146 bool m_UseAlignmentInfo;
00147 bool m_PrintAlignmentInfo;
00148 bool m_PrintAlignmentInfoNbest;
00149
00150 std::string m_alignmentOutputFile;
00151
00152 std::string m_factorDelimiter;
00153 size_t m_maxFactorIdx[2];
00154 size_t m_maxNumFactors;
00155
00156 XmlInputType m_xmlInputType;
00157 std::pair<std::string,std::string> m_xmlBrackets;
00158
00159 bool m_mbr;
00160 bool m_useLatticeMBR;
00161 bool m_useConsensusDecoding;
00162 size_t m_mbrSize;
00163 float m_mbrScale;
00164 size_t m_lmbrPruning;
00165 std::vector<float> m_lmbrThetas;
00166 bool m_useLatticeHypSetForLatticeMBR;
00167 float m_lmbrPrecision;
00168 float m_lmbrPRatio;
00169 float m_lmbrMapWeight;
00170
00171 size_t m_lmcache_cleanup_threshold;
00172 bool m_lmEnableOOVFeature;
00173
00174 bool m_timeout;
00175 size_t m_timeout_threshold;
00176
00177 bool m_useTransOptCache;
00178 mutable std::map<std::pair<size_t, Phrase>, std::pair<TranslationOptionList*,clock_t> > m_transOptCache;
00179 size_t m_transOptCacheMaxSize;
00180
00181
00182 #ifdef WITH_THREADS
00183 mutable boost::mutex m_transOptCacheMutex;
00184 #endif
00185 bool m_isAlwaysCreateDirectTranslationOption;
00187
00188 bool m_outputWordGraph;
00189 bool m_outputSearchGraph;
00190 bool m_outputSearchGraphExtended;
00191 #ifdef HAVE_PROTOBUF
00192 bool m_outputSearchGraphPB;
00193 #endif
00194 bool m_unprunedSearchGraph;
00195
00196 size_t m_cubePruningPopLimit;
00197 size_t m_cubePruningDiversity;
00198 bool m_cubePruningLazyScoring;
00199 size_t m_ruleLimit;
00200
00201
00202
00203
00204 Word m_inputDefaultNonTerminal, m_outputDefaultNonTerminal;
00205 SourceLabelOverlap m_sourceLabelOverlap;
00206 UnknownLHSList m_unknownLHS;
00207 WordAlignmentSort m_wordAlignmentSort;
00208
00209 int m_threadCount;
00210 long m_startTranslationId;
00211
00212 StaticData();
00213
00214 void LoadPhraseBasedParameters();
00215 void LoadChartDecodingParameters();
00216 void LoadNonTerminals();
00217
00219 void SetBooleanParameter(bool *paramter, std::string parameterName, bool defaultValue);
00221 bool LoadLanguageModels();
00222 #ifdef HAVE_SYNLM
00224 bool LoadSyntacticLanguageModel();
00225 #endif
00227 bool LoadPhraseTables();
00229 bool LoadGenerationTables();
00231 bool LoadDecodeGraphs();
00232 bool LoadLexicalReorderingModel();
00233 bool LoadGlobalLexicalModel();
00234 void ReduceTransOptCache() const;
00235 bool m_continuePartialTranslation;
00236
00237 public:
00238
00239 bool IsAlwaysCreateDirectTranslationOption() const {
00240 return m_isAlwaysCreateDirectTranslationOption;
00241 }
00243 ~StaticData();
00245 static const StaticData& Instance() {
00246 return s_instance;
00247 }
00248
00252 #ifdef WIN32
00253 static void Reset() {
00254 s_instance = StaticData();
00255 }
00256 #endif
00257
00259 static bool LoadDataStatic(Parameter *parameter) {
00260 return s_instance.LoadData(parameter);
00261 }
00262
00264 bool LoadData(Parameter *parameter);
00265
00266 const PARAM_VEC &GetParam(const std::string ¶mName) const {
00267 return m_parameter->GetParam(paramName);
00268 }
00269
00270 const std::vector<FactorType> &GetInputFactorOrder() const {
00271 return m_inputFactorOrder;
00272 }
00273 const std::vector<FactorType> &GetOutputFactorOrder() const {
00274 return m_outputFactorOrder;
00275 }
00276
00277
00278 inline bool GetSourceStartPosMattersForRecombination() const {
00279 return m_sourceStartPosMattersForRecombination;
00280 }
00281 inline bool GetDropUnknown() const {
00282 return m_dropUnknown;
00283 }
00284 inline bool GetDisableDiscarding() const {
00285 return m_disableDiscarding;
00286 }
00287 inline size_t GetMaxNoTransOptPerCoverage() const {
00288 return m_maxNoTransOptPerCoverage;
00289 }
00290 inline size_t GetMaxNoPartTransOpt() const {
00291 return m_maxNoPartTransOpt;
00292 }
00293 inline const Phrase* GetConstrainingPhrase(long sentenceID) const {
00294 std::map<long,Phrase>::const_iterator iter = m_constraints.find(sentenceID);
00295 if (iter != m_constraints.end()) {
00296 const Phrase& phrase = iter->second;
00297 return &phrase;
00298 } else {
00299 return NULL;
00300 }
00301 }
00302 inline size_t GetMaxPhraseLength() const {
00303 return m_maxPhraseLength;
00304 }
00305 bool IsWordDeletionEnabled() const {
00306 return m_wordDeletionEnabled;
00307 }
00308 size_t GetMaxHypoStackSize() const {
00309 return m_maxHypoStackSize;
00310 }
00311 size_t GetMinHypoStackDiversity() const {
00312 return m_minHypoStackDiversity;
00313 }
00314 size_t GetCubePruningPopLimit() const {
00315 return m_cubePruningPopLimit;
00316 }
00317 size_t GetCubePruningDiversity() const {
00318 return m_cubePruningDiversity;
00319 }
00320 bool GetCubePruningLazyScoring() const {
00321 return m_cubePruningLazyScoring;
00322 }
00323 size_t IsPathRecoveryEnabled() const {
00324 return m_recoverPath;
00325 }
00326 int GetMaxDistortion() const {
00327 return m_maxDistortion;
00328 }
00329 bool UseReorderingConstraint() const {
00330 return m_reorderingConstraint;
00331 }
00332 float GetBeamWidth() const {
00333 return m_beamWidth;
00334 }
00335 float GetEarlyDiscardingThreshold() const {
00336 return m_earlyDiscardingThreshold;
00337 }
00338 bool UseEarlyDiscarding() const {
00339 return m_earlyDiscardingThreshold != -std::numeric_limits<float>::infinity();
00340 }
00341 float GetTranslationOptionThreshold() const {
00342 return m_translationOptionThreshold;
00343 }
00345 size_t GetTotalScoreComponents() const {
00346 return m_scoreIndexManager.GetTotalNumberOfScores();
00347 }
00348 const ScoreIndexManager& GetScoreIndexManager() const {
00349 return m_scoreIndexManager;
00350 }
00351
00352 const TranslationSystem& GetTranslationSystem(std::string id) const {
00353 std::map<std::string, TranslationSystem>::const_iterator iter =
00354 m_translationSystems.find(id);
00355 if (iter == m_translationSystems.end()) {
00356 VERBOSE(1, "Translation system not found " << id << std::endl);
00357 throw std::runtime_error("Unknown translation system id");
00358 } else {
00359 return iter->second;
00360 }
00361 }
00362 size_t GetVerboseLevel() const {
00363 return m_verboseLevel;
00364 }
00365 void SetVerboseLevel(int x) const {
00366 m_verboseLevel = x;
00367 }
00368 bool GetReportSegmentation() const {
00369 return m_reportSegmentation;
00370 }
00371 bool GetReportAllFactors() const {
00372 return m_reportAllFactors;
00373 }
00374 bool GetReportAllFactorsNBest() const {
00375 return m_reportAllFactorsNBest;
00376 }
00377 bool IsDetailedTranslationReportingEnabled() const {
00378 return !m_detailedTranslationReportingFilePath.empty();
00379 }
00380 const std::string &GetDetailedTranslationReportingFilePath() const {
00381 return m_detailedTranslationReportingFilePath;
00382 }
00383
00384 const std::string &GetAlignmentOutputFile() const {
00385 return m_alignmentOutputFile;
00386 }
00387
00388 bool IsLabeledNBestList() const {
00389 return m_labeledNBestList;
00390 }
00391 bool NBestIncludesAlignment() const {
00392 return m_nBestIncludesAlignment;
00393 }
00394 size_t GetNumLinkParams() const {
00395 return m_numLinkParams;
00396 }
00397 const std::vector<std::string> &GetDescription() const {
00398 return m_parameter->GetParam("description");
00399 }
00400
00401
00402 size_t GetNBestSize() const {
00403 return m_nBestSize;
00404 }
00405 const std::string &GetNBestFilePath() const {
00406 return m_nBestFilePath;
00407 }
00408 bool IsNBestEnabled() const {
00409 return (!m_nBestFilePath.empty()) || m_mbr || m_useLatticeMBR || m_outputSearchGraph || m_useConsensusDecoding || !m_latticeSamplesFilePath.empty()
00410 #ifdef HAVE_PROTOBUF
00411 || m_outputSearchGraphPB
00412 #endif
00413 ;
00414 }
00415 size_t GetLatticeSamplesSize() const {
00416 return m_latticeSamplesSize;
00417 }
00418
00419 const std::string& GetLatticeSamplesFilePath() const {
00420 return m_latticeSamplesFilePath;
00421 }
00422
00423 size_t GetNBestFactor() const {
00424 return m_nBestFactor;
00425 }
00426 bool GetOutputWordGraph() const {
00427 return m_outputWordGraph;
00428 }
00429
00431 void SetWeightsForScoreProducer(const ScoreProducer* sp, const std::vector<float>& weights);
00432 InputTypeEnum GetInputType() const {
00433 return m_inputType;
00434 }
00435 ParsingAlgorithm GetParsingAlgorithm() const {
00436 return m_parsingAlgorithm;
00437 }
00438 SearchAlgorithm GetSearchAlgorithm() const {
00439 return m_searchAlgorithm;
00440 }
00441 LMList GetLMList() const {
00442 return m_languageModel;
00443 }
00444 size_t GetNumInputScores() const {
00445 return m_numInputScores;
00446 }
00447
00448 const std::vector<float>& GetAllWeights() const {
00449 return m_allWeights;
00450 }
00451
00452 bool UseAlignmentInfo() const {
00453 return m_UseAlignmentInfo;
00454 }
00455 void UseAlignmentInfo(bool a) {
00456 m_UseAlignmentInfo=a;
00457 };
00458 bool PrintAlignmentInfo() const {
00459 return m_PrintAlignmentInfo;
00460 }
00461 bool PrintAlignmentInfoInNbest() const {
00462 return m_PrintAlignmentInfoNbest;
00463 }
00464 bool GetDistinctNBest() const {
00465 return m_onlyDistinctNBest;
00466 }
00467 const std::string& GetFactorDelimiter() const {
00468 return m_factorDelimiter;
00469 }
00470 size_t GetMaxNumFactors(FactorDirection direction) const {
00471 return m_maxFactorIdx[(size_t)direction]+1;
00472 }
00473 size_t GetMaxNumFactors() const {
00474 return m_maxNumFactors;
00475 }
00476 bool UseMBR() const {
00477 return m_mbr;
00478 }
00479 bool UseLatticeMBR() const {
00480 return m_useLatticeMBR ;
00481 }
00482 bool UseConsensusDecoding() const {
00483 return m_useConsensusDecoding;
00484 }
00485 void SetUseLatticeMBR(bool flag) {
00486 m_useLatticeMBR = flag;
00487 }
00488 size_t GetMBRSize() const {
00489 return m_mbrSize;
00490 }
00491 float GetMBRScale() const {
00492 return m_mbrScale;
00493 }
00494 void SetMBRScale(float scale) {
00495 m_mbrScale = scale;
00496 }
00497 size_t GetLatticeMBRPruningFactor() const {
00498 return m_lmbrPruning;
00499 }
00500 void SetLatticeMBRPruningFactor(size_t prune) {
00501 m_lmbrPruning = prune;
00502 }
00503 const std::vector<float>& GetLatticeMBRThetas() const {
00504 return m_lmbrThetas;
00505 }
00506 bool UseLatticeHypSetForLatticeMBR() const {
00507 return m_useLatticeHypSetForLatticeMBR;
00508 }
00509 float GetLatticeMBRPrecision() const {
00510 return m_lmbrPrecision;
00511 }
00512 void SetLatticeMBRPrecision(float p) {
00513 m_lmbrPrecision = p;
00514 }
00515 float GetLatticeMBRPRatio() const {
00516 return m_lmbrPRatio;
00517 }
00518 void SetLatticeMBRPRatio(float r) {
00519 m_lmbrPRatio = r;
00520 }
00521
00522 float GetLatticeMBRMapWeight() const {
00523 return m_lmbrMapWeight;
00524 }
00525
00526 bool UseTimeout() const {
00527 return m_timeout;
00528 }
00529 size_t GetTimeoutThreshold() const {
00530 return m_timeout_threshold;
00531 }
00532
00533 size_t GetLMCacheCleanupThreshold() const {
00534 return m_lmcache_cleanup_threshold;
00535 }
00536
00537 bool GetLMEnableOOVFeature() const {
00538 return m_lmEnableOOVFeature;
00539 }
00540
00541 bool GetOutputSearchGraph() const {
00542 return m_outputSearchGraph;
00543 }
00544 void SetOutputSearchGraph(bool outputSearchGraph) {
00545 m_outputSearchGraph = outputSearchGraph;
00546 }
00547 bool GetOutputSearchGraphExtended() const {
00548 return m_outputSearchGraphExtended;
00549 }
00550 #ifdef HAVE_PROTOBUF
00551 bool GetOutputSearchGraphPB() const {
00552 return m_outputSearchGraphPB;
00553 }
00554 #endif
00555 bool GetUnprunedSearchGraph() const {
00556 return m_unprunedSearchGraph;
00557 }
00558
00559 XmlInputType GetXmlInputType() const {
00560 return m_xmlInputType;
00561 }
00562
00563 std::pair<std::string,std::string> GetXmlBrackets() const {
00564 return m_xmlBrackets;
00565 }
00566
00567 bool GetUseTransOptCache() const {
00568 return m_useTransOptCache;
00569 }
00570
00571 void AddTransOptListToCache(const DecodeGraph &decodeGraph, const Phrase &sourcePhrase, const TranslationOptionList &transOptList) const;
00572
00573 void ClearTransOptionCache() const;
00574
00575
00576 const TranslationOptionList* FindTransOptListInCache(const DecodeGraph &decodeGraph, const Phrase &sourcePhrase) const;
00577
00578 bool PrintAllDerivations() const {
00579 return m_printAllDerivations;
00580 }
00581
00582 const UnknownLHSList &GetUnknownLHS() const {
00583 return m_unknownLHS;
00584 }
00585
00586 const Word &GetInputDefaultNonTerminal() const {
00587 return m_inputDefaultNonTerminal;
00588 }
00589 const Word &GetOutputDefaultNonTerminal() const {
00590 return m_outputDefaultNonTerminal;
00591 }
00592
00593 SourceLabelOverlap GetSourceLabelOverlap() const {
00594 return m_sourceLabelOverlap;
00595 }
00596
00597 bool GetOutputHypoScore() const {
00598 return m_outputHypoScore;
00599 }
00600 size_t GetRuleLimit() const {
00601 return m_ruleLimit;
00602 }
00603 float GetRuleCountThreshold() const {
00604 return 999999;
00605 }
00606
00607 bool ContinuePartialTranslation() const {
00608 return m_continuePartialTranslation;
00609 }
00610
00611 WordAlignmentSort GetWordAlignmentSort() const {
00612 return m_wordAlignmentSort;
00613 }
00614
00615 int ThreadCount() const {
00616 return m_threadCount;
00617 }
00618
00619 long GetStartTranslationId() const
00620 { return m_startTranslationId; }
00621 };
00622
00623 }
00624 #endif