00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023 #ifndef moses_StaticData_h
00024 #define moses_StaticData_h
00025
00026 #include <stdexcept>
00027 #include <limits>
00028 #include <list>
00029 #include <vector>
00030 #include <map>
00031 #include <memory>
00032 #include <utility>
00033 #include <fstream>
00034 #include <string>
00035
00036 #ifdef WITH_THREADS
00037 #include <boost/thread.hpp>
00038 #include <boost/thread/mutex.hpp>
00039 #endif
00040
00041 #include "Parameter.h"
00042 #include "SentenceStats.h"
00043 #include "ScoreComponentCollection.h"
00044 #include "moses/FF/Factory.h"
00045 #include "moses/PP/Factory.h"
00046
00047 #include "moses/parameters/AllOptions.h"
00048 #include "moses/parameters/BookkeepingOptions.h"
00049
00050 namespace Moses
00051 {
00052
00053 class InputType;
00054 class DecodeGraph;
00055 class DecodeStep;
00056
00057 class DynamicCacheBasedLanguageModel;
00058 class PhraseDictionaryDynamicCacheBased;
00059
00060 typedef std::pair<std::string, float> UnknownLHSEntry;
00061 typedef std::vector<UnknownLHSEntry> UnknownLHSList;
00062
00067 class StaticData
00068 {
00069 friend class HyperParameterAsWeight;
00070
00071 private:
00072 static StaticData s_instance;
00073 protected:
00074 Parameter *m_parameter;
00075 boost::shared_ptr<AllOptions> m_options;
00076
00077 mutable ScoreComponentCollection m_allWeights;
00078
00079 std::vector<DecodeGraph*> m_decodeGraphs;
00080
00081
00082
00083 float
00084 m_wordDeletionWeight;
00085
00086
00087
00088
00089
00090
00091
00092 bool m_reorderingConstraint;
00093 BookkeepingOptions m_bookkeeping_options;
00094
00095
00096 bool m_requireSortingAfterSourceContext;
00097
00098 mutable size_t m_verboseLevel;
00099
00100 std::string m_factorDelimiter;
00101
00102
00103 size_t m_lmcache_cleanup_threshold;
00104
00105 std::string m_outputUnknownsFile;
00106
00107
00108
00109 Word m_inputDefaultNonTerminal, m_outputDefaultNonTerminal;
00110 SourceLabelOverlap m_sourceLabelOverlap;
00111 UnknownLHSList m_unknownLHS;
00112
00113 int m_threadCount;
00114
00115
00116
00117 mutable std::string m_currentWeightSetting;
00118 std::map< std::string, ScoreComponentCollection* > m_weightSetting;
00119 std::map< std::string, std::set< std::string > > m_weightSettingIgnoreFF;
00120 std::map< std::string, std::set< size_t > > m_weightSettingIgnoreDP;
00121
00122 bool m_useLegacyPT;
00123
00124
00125
00126 FeatureRegistry m_registry;
00127 PhrasePropertyFactory m_phrasePropertyFactory;
00128
00129 StaticData();
00130
00131 void LoadChartDecodingParameters();
00132 void LoadNonTerminals();
00133
00135 void LoadDecodeGraphs();
00136 void LoadDecodeGraphsOld(const std::vector<std::string> &mappingVector,
00137 const std::vector<size_t> &maxChartSpans);
00138 void LoadDecodeGraphsNew(const std::vector<std::string> &mappingVector,
00139 const std::vector<size_t> &maxChartSpans);
00140
00141 void NoCache();
00142
00143 std::string m_binPath;
00144
00145
00146 std::vector<std::vector<Word> > m_softMatchesMap;
00147
00148 const StatefulFeatureFunction* m_treeStructure;
00149
00150 void ini_oov_options();
00151 bool ini_output_options();
00152 bool ini_performance_options();
00153
00154 void initialize_features();
00155
00156
00157
00158 std::map< std::string const, size_t > m_coordSpaceMap;
00159 size_t m_coordSpaceNextID;
00160
00161 public:
00162
00164 ~StaticData();
00165
00167 static const StaticData& Instance() {
00168 return s_instance;
00169 }
00170
00172 static StaticData& InstanceNonConst() {
00173 return s_instance;
00174 }
00175
00179 #ifdef WIN32
00180 static void Reset() {
00181 s_instance = StaticData();
00182 }
00183 #endif
00184
00186
00187 static bool LoadDataStatic(Parameter *parameter, const std::string &execPath);
00188
00190 bool LoadData(Parameter *parameter);
00191 void ClearData();
00192
00193 const Parameter &GetParameter() const {
00194 return *m_parameter;
00195 }
00196
00197 AllOptions::ptr const
00198 options() const {
00199 return m_options;
00200 }
00201
00202 size_t
00203 GetVerboseLevel() const {
00204 return m_verboseLevel;
00205 }
00206
00207 void
00208 SetVerboseLevel(int x) const {
00209 m_verboseLevel = x;
00210 }
00211
00212 const ScoreComponentCollection&
00213 GetAllWeights() const {
00214 return m_allWeights;
00215 }
00216
00217 void SetAllWeights(const ScoreComponentCollection& weights) {
00218 m_allWeights = weights;
00219 }
00220
00221
00222 float GetWeight(const FeatureFunction* sp) const {
00223 return m_allWeights.GetScoreForProducer(sp);
00224 }
00225
00226
00227 void SetWeight(const FeatureFunction* sp, float weight) ;
00228
00229
00230
00231 std::vector<float> GetWeights(const FeatureFunction* sp) const {
00232 return m_allWeights.GetScoresForProducer(sp);
00233 }
00234
00235
00236 void SetWeights(const FeatureFunction* sp, const std::vector<float>& weights);
00237
00238 const std::string& GetFactorDelimiter() const {
00239 return m_factorDelimiter;
00240 }
00241
00242 size_t GetLMCacheCleanupThreshold() const {
00243 return m_lmcache_cleanup_threshold;
00244 }
00245
00246 const std::string& GetOutputUnknownsFile() const {
00247 return m_outputUnknownsFile;
00248 }
00249
00250 const UnknownLHSList &GetUnknownLHS() const {
00251 return m_unknownLHS;
00252 }
00253
00254 float GetRuleCountThreshold() const {
00255 return 999999;
00256 }
00257
00258 void ReLoadBleuScoreFeatureParameter(float weight);
00259
00260 Parameter* GetParameter() {
00261 return m_parameter;
00262 }
00263
00264 int ThreadCount() const {
00265 return m_threadCount;
00266 }
00267
00268 void SetExecPath(const std::string &path);
00269 const std::string &GetBinDirectory() const;
00270
00271 bool NeedAlignmentInfo() const {
00272 return m_bookkeeping_options.need_alignment_info;
00273 }
00274
00275 bool GetHasAlternateWeightSettings() const {
00276 return m_weightSetting.size() > 0;
00277 }
00278
00282 bool IsFeatureFunctionIgnored( const FeatureFunction &ff ) const {
00283 if (!GetHasAlternateWeightSettings()) {
00284 return false;
00285 }
00286 std::map< std::string, std::set< std::string > >::const_iterator lookupIgnoreFF
00287 = m_weightSettingIgnoreFF.find( m_currentWeightSetting );
00288 if (lookupIgnoreFF == m_weightSettingIgnoreFF.end()) {
00289 return false;
00290 }
00291 const std::string &ffName = ff.GetScoreProducerDescription();
00292 const std::set< std::string > &ignoreFF = lookupIgnoreFF->second;
00293 return ignoreFF.count( ffName );
00294 }
00295
00300 bool IsDecodingGraphIgnored( const size_t id ) const {
00301 if (!GetHasAlternateWeightSettings()) {
00302 return false;
00303 }
00304 std::map< std::string, std::set< size_t > >::const_iterator lookupIgnoreDP
00305 = m_weightSettingIgnoreDP.find( m_currentWeightSetting );
00306 if (lookupIgnoreDP == m_weightSettingIgnoreDP.end()) {
00307 return false;
00308 }
00309 const std::set< size_t > &ignoreDP = lookupIgnoreDP->second;
00310 return ignoreDP.count( id );
00311 }
00312
00315 void SetWeightSetting(const std::string &settingName) const {
00316
00317
00318 if (m_currentWeightSetting == settingName) {
00319 return;
00320 }
00321
00322
00323 if (!GetHasAlternateWeightSettings()) {
00324 std::cerr << "Warning: Input specifies weight setting, but model does not support alternate weight settings.";
00325 return;
00326 }
00327
00328
00329 m_currentWeightSetting = settingName;
00330 std::map< std::string, ScoreComponentCollection* >::const_iterator i =
00331 m_weightSetting.find( settingName );
00332
00333
00334 if (i == m_weightSetting.end()) {
00335 std::cerr << "Warning: Specified weight setting " << settingName
00336 << " does not exist in model, using default weight setting instead";
00337 i = m_weightSetting.find( "default" );
00338 m_currentWeightSetting = "default";
00339 }
00340
00341
00342 m_allWeights = *(i->second);
00343 }
00344
00345 float GetWeightWordPenalty() const;
00346
00347 const std::vector<DecodeGraph*>& GetDecodeGraphs() const {
00348 return m_decodeGraphs;
00349 }
00350
00351
00352 void InitializeForInput(ttasksptr const& ttask) const;
00353 void CleanUpAfterSentenceProcessing(ttasksptr const& ttask) const;
00354
00355 void LoadFeatureFunctions();
00356 bool CheckWeights() const;
00357 void LoadSparseWeightsFromConfig();
00358 bool LoadWeightSettings();
00359 bool LoadAlternateWeightSettings();
00360
00361 std::map<std::string, std::string> OverrideFeatureNames();
00362 void OverrideFeatures();
00363
00364 const FeatureRegistry &GetFeatureRegistry() const {
00365 return m_registry;
00366 }
00367
00368 const PhrasePropertyFactory &GetPhrasePropertyFactory() const {
00369 return m_phrasePropertyFactory;
00370 }
00371
00375 void CheckLEGACYPT();
00376 bool GetUseLegacyPT() const {
00377 return m_useLegacyPT;
00378 }
00379
00380 void SetSoftMatches(std::vector<std::vector<Word> >& softMatchesMap) {
00381 m_softMatchesMap = softMatchesMap;
00382 }
00383
00384 const std::vector< std::vector<Word> >& GetSoftMatches() const {
00385 return m_softMatchesMap;
00386 }
00387
00388 void ResetWeights(const std::string &denseWeights, const std::string &sparseFile);
00389
00390
00391 const StatefulFeatureFunction* GetTreeStructure() const {
00392 return m_treeStructure;
00393 }
00394
00395 void SetTreeStructure(const StatefulFeatureFunction* treeStructure) {
00396 m_treeStructure = treeStructure;
00397 }
00398
00399 bool RequireSortingAfterSourceContext() const {
00400 return m_requireSortingAfterSourceContext;
00401 }
00402
00403
00404 size_t GetCoordSpace(std::string space) const;
00405 size_t MapCoordSpace(std::string space);
00406 };
00407
00408 }
00409 #endif