00001 #include "util/exception.hh"
00002 #include "moses/FF/Factory.h"
00003 #include "moses/StaticData.h"
00004
00005 #include "moses/TranslationModel/PhraseDictionaryTreeAdaptor.h"
00006 #include "moses/TranslationModel/PhraseDictionaryMemory.h"
00007 #include "moses/TranslationModel/PhraseDictionaryMultiModel.h"
00008 #include "moses/TranslationModel/PhraseDictionaryMultiModelCounts.h"
00009 #include "moses/TranslationModel/PhraseDictionaryGroup.h"
00010 #include "moses/TranslationModel/PhraseDictionaryScope3.h"
00011 #include "moses/TranslationModel/PhraseDictionaryTransliteration.h"
00012 #include "moses/TranslationModel/PhraseDictionaryDynamicCacheBased.h"
00013
00014 #include "moses/TranslationModel/RuleTable/PhraseDictionaryOnDisk.h"
00015 #include "moses/TranslationModel/RuleTable/PhraseDictionaryFuzzyMatch.h"
00016 #include "moses/TranslationModel/RuleTable/PhraseDictionaryALSuffixArray.h"
00017 #include "moses/TranslationModel/ProbingPT.h"
00018 #include "moses/TranslationModel/PhraseDictionaryMemoryPerSentence.h"
00019 #include "moses/TranslationModel/PhraseDictionaryMemoryPerSentenceOnDemand.h"
00020
00021 #include "moses/FF/LexicalReordering/LexicalReordering.h"
00022
00023 #include "moses/FF/BleuScoreFeature.h"
00024 #include "moses/FF/TargetWordInsertionFeature.h"
00025 #include "moses/FF/SourceWordDeletionFeature.h"
00026 #include "moses/FF/GlobalLexicalModel.h"
00027 #include "moses/FF/GlobalLexicalModelUnlimited.h"
00028 #include "moses/FF/Model1Feature.h"
00029 #include "moses/FF/UnknownWordPenaltyProducer.h"
00030 #include "moses/FF/WordTranslationFeature.h"
00031 #include "moses/FF/TargetBigramFeature.h"
00032 #include "moses/FF/TargetNgramFeature.h"
00033 #include "moses/FF/PhraseBoundaryFeature.h"
00034 #include "moses/FF/PhraseDistanceFeature.h"
00035 #include "moses/FF/PhrasePairFeature.h"
00036 #include "moses/FF/RulePairUnlexicalizedSource.h"
00037 #include "moses/FF/PhraseLengthFeature.h"
00038 #include "moses/FF/DistortionScoreProducer.h"
00039 #include "moses/FF/SparseHieroReorderingFeature.h"
00040 #include "moses/FF/WordPenaltyProducer.h"
00041 #include "moses/FF/InputFeature.h"
00042 #include "moses/FF/PhrasePenalty.h"
00043 #include "moses/FF/OSM-Feature/OpSequenceModel.h"
00044 #include "moses/FF/Dsg-Feature/DsgModel.h"
00045 #include "moses/FF/ControlRecombination.h"
00046 #include "moses/FF/ConstrainedDecoding.h"
00047 #include "moses/FF/SoftSourceSyntacticConstraintsFeature.h"
00048 #include "moses/FF/TargetConstituentAdjacencyFeature.h"
00049 #include "moses/FF/TargetPreferencesFeature.h"
00050 #include "moses/FF/CoveredReferenceFeature.h"
00051 #include "moses/FF/TreeStructureFeature.h"
00052 #include "moses/FF/SoftMatchingFeature.h"
00053 #include "moses/FF/DynamicCacheBasedLanguageModel.h"
00054 #include "moses/FF/SourceGHKMTreeInputMatchFeature.h"
00055 #include "moses/FF/HyperParameterAsWeight.h"
00056 #include "moses/FF/SetSourcePhrase.h"
00057 #include "moses/FF/PhraseOrientationFeature.h"
00058 #include "moses/FF/UnalignedWordCountFeature.h"
00059 #include "CountNonTerms.h"
00060 #include "ReferenceComparison.h"
00061 #include "RuleScope.h"
00062 #include "MaxSpanFreeNonTermSource.h"
00063 #include "NieceTerminal.h"
00064 #include "SpanLength.h"
00065 #include "SyntaxRHS.h"
00066 #include "DeleteRules.h"
00067
00068 #include "moses/FF/ExampleStatelessFF.h"
00069 #include "moses/FF/ExampleStatefulFF.h"
00070 #include "moses/LM/ExampleLM.h"
00071 #include "moses/FF/ExampleTranslationOptionListFeature.h"
00072 #include "moses/LM/BilingualLM.h"
00073 #include "moses/TranslationModel/ExamplePT.h"
00074 #include "moses/Syntax/InputWeightFF.h"
00075 #include "moses/Syntax/RuleTableFF.h"
00076
00077 #include "moses/LM/InMemoryPerSentenceOnDemandLM.h"
00078 #include "moses/FF/EditOps.h"
00079 #include "moses/FF/CorrectionPattern.h"
00080
00081 #ifdef HAVE_VW
00082 #include "moses/FF/VW/VW.h"
00083 #include "moses/FF/VW/VWFeatureContextBigrams.h"
00084 #include "moses/FF/VW/VWFeatureContextBilingual.h"
00085 #include "moses/FF/VW/VWFeatureContextWindow.h"
00086 #include "moses/FF/VW/VWFeatureSourceBagOfWords.h"
00087 #include "moses/FF/VW/VWFeatureSourceBigrams.h"
00088 #include "moses/FF/VW/VWFeatureSourceIndicator.h"
00089 #include "moses/FF/VW/VWFeatureSourcePhraseInternal.h"
00090 #include "moses/FF/VW/VWFeatureSourceSenseWindow.h"
00091 #include "moses/FF/VW/VWFeatureSourceWindow.h"
00092 #include "moses/FF/VW/VWFeatureTargetBigrams.h"
00093 #include "moses/FF/VW/VWFeatureTargetIndicator.h"
00094 #include "moses/FF/VW/VWFeatureSourceExternalFeatures.h"
00095 #include "moses/FF/VW/VWFeatureTargetPhraseInternal.h"
00096 #include "moses/FF/VW/VWFeatureTargetPhraseScores.h"
00097 #endif
00098
00099 #ifdef HAVE_CMPH
00100 #include "moses/TranslationModel/CompactPT/PhraseDictionaryCompact.h"
00101 #endif
00102 #ifdef PT_UG
00103 #include "moses/TranslationModel/UG/mmsapt.h"
00104 #endif
00105
00106 #include "moses/LM/Ken.h"
00107 #include "moses/LM/Reloading.h"
00108 #ifdef LM_IRST
00109 #include "moses/LM/IRST.h"
00110 #endif
00111
00112 #ifdef LM_SRI
00113 #include "moses/LM/SRI.h"
00114 #endif
00115
00116 #ifdef LM_MAXENT_SRI
00117 #include "moses/LM/MaxEntSRI.h"
00118 #endif
00119
00120 #ifdef LM_RAND
00121 #include "moses/LM/Rand.h"
00122 #endif
00123
00124 #ifdef HAVE_SYNLM
00125 #include "moses/SyntacticLanguageModel.h"
00126 #endif
00127
00128 #ifdef LM_NEURAL
00129 #include "moses/LM/NeuralLMWrapper.h"
00130 #include "moses/LM/RDLM.h"
00131 #include "moses/LM/bilingual-lm/BiLM_NPLM.h"
00132 #endif
00133
00134 #ifdef LM_DALM
00135 #include "moses/LM/DALMWrapper.h"
00136 #endif
00137
00138 #ifdef LM_OXLM
00139 #include "moses/LM/oxlm/OxLM.h"
00140 #include "moses/LM/oxlm/SourceOxLM.h"
00141 #endif
00142
00143 #include "util/exception.hh"
00144
00145 #include <vector>
00146
00147 namespace Moses
00148 {
00149
00150 class FeatureFactory
00151 {
00152 public:
00153 virtual ~FeatureFactory() {}
00154
00155 virtual void Create(const std::string &line) = 0;
00156
00157 protected:
00158 template <class F> static void DefaultSetup(F *feature);
00159
00160 FeatureFactory() {}
00161 };
00162
00163 template <class F>
00164 void
00165 FeatureFactory
00166 ::DefaultSetup(F *feature)
00167 {
00168 FeatureFunction::Register(feature);
00169
00170 StaticData &static_data = StaticData::InstanceNonConst();
00171 const std::string &featureName = feature->GetScoreProducerDescription();
00172 std::vector<float> weights = static_data.GetParameter()->GetWeights(featureName);
00173
00174
00175 if (feature->GetNumScoreComponents()) {
00176 if (weights.size() == 0) {
00177 weights = feature->DefaultWeights();
00178 if (weights.size() == 0) {
00179 TRACE_ERR("WARNING: No weights specified in config file for FF "
00180 << featureName << ". This FF does not supply default values.\n"
00181 << "WARNING: Auto-initializing all weights for this FF to 1.0");
00182 weights.assign(feature->GetNumScoreComponents(),1.0);
00183 } else {
00184 VERBOSE(2,"WARNING: No weights specified in config file for FF "
00185 << featureName << ". Using default values supplied by FF.");
00186 }
00187 }
00188 UTIL_THROW_IF2(weights.size() != feature->GetNumScoreComponents(),
00189 "FATAL ERROR: Mismatch in number of features and number "
00190 << "of weights for Feature Function " << featureName
00191 << " (features: " << feature->GetNumScoreComponents()
00192 << " vs. weights: " << weights.size() << ")");
00193 static_data.SetWeights(feature, weights);
00194 } else if (feature->IsTuneable())
00195 static_data.SetWeights(feature, weights);
00196 }
00197
00198 namespace
00199 {
00200
00201 template <class F>
00202 class DefaultFeatureFactory : public FeatureFactory
00203 {
00204 public:
00205 void Create(const std::string &line) {
00206 DefaultSetup(new F(line));
00207 }
00208 };
00209
00210 class KenFactory : public FeatureFactory
00211 {
00212 public:
00213 void Create(const std::string &line) {
00214 DefaultSetup(ConstructKenLM(line));
00215 }
00216 };
00217
00218 class ReloadingFactory : public FeatureFactory
00219 {
00220 public:
00221 void Create(const std::string &line) {
00222 DefaultSetup(ConstructReloadingLM(line));
00223 }
00224 };
00225
00226 }
00227
00228 FeatureRegistry::FeatureRegistry()
00229 {
00230
00231 #define MOSES_FNAME(name) Add(#name, new DefaultFeatureFactory< name >());
00232
00233 #define MOSES_FNAME2(name, type) Add(name, new DefaultFeatureFactory< type >());
00234
00235 MOSES_FNAME2("PhraseDictionaryBinary", PhraseDictionaryTreeAdaptor);
00236 MOSES_FNAME(PhraseDictionaryOnDisk);
00237 MOSES_FNAME(PhraseDictionaryMemory);
00238 MOSES_FNAME(PhraseDictionaryScope3);
00239 MOSES_FNAME(PhraseDictionaryMultiModel);
00240 MOSES_FNAME(PhraseDictionaryMultiModelCounts);
00241 MOSES_FNAME(PhraseDictionaryGroup);
00242 MOSES_FNAME(PhraseDictionaryALSuffixArray);
00243
00244 MOSES_FNAME(PhraseDictionaryTransliteration);
00245 MOSES_FNAME(PhraseDictionaryDynamicCacheBased);
00246 MOSES_FNAME(PhraseDictionaryFuzzyMatch);
00247 MOSES_FNAME(ProbingPT);
00248 MOSES_FNAME(PhraseDictionaryMemoryPerSentence);
00249 MOSES_FNAME(PhraseDictionaryMemoryPerSentenceOnDemand);
00250 MOSES_FNAME2("RuleTable", Syntax::RuleTableFF);
00251 MOSES_FNAME2("SyntaxInputWeight", Syntax::InputWeightFF);
00252
00253 MOSES_FNAME(GlobalLexicalModel);
00254
00255 MOSES_FNAME(Model1Feature);
00256 MOSES_FNAME(SourceWordDeletionFeature);
00257 MOSES_FNAME(TargetWordInsertionFeature);
00258 MOSES_FNAME(PhraseBoundaryFeature);
00259 MOSES_FNAME(PhraseDistanceFeature);
00260 MOSES_FNAME(PhraseLengthFeature);
00261 MOSES_FNAME(WordTranslationFeature);
00262 MOSES_FNAME(TargetBigramFeature);
00263 MOSES_FNAME(TargetNgramFeature);
00264 MOSES_FNAME(PhrasePairFeature);
00265 MOSES_FNAME(RulePairUnlexicalizedSource);
00266 MOSES_FNAME(LexicalReordering);
00267 MOSES_FNAME2("Generation", GenerationDictionary);
00268 MOSES_FNAME(BleuScoreFeature);
00269 MOSES_FNAME2("Distortion", DistortionScoreProducer);
00270 MOSES_FNAME2("WordPenalty", WordPenaltyProducer);
00271 MOSES_FNAME(InputFeature);
00272 MOSES_FNAME(OpSequenceModel);
00273 MOSES_FNAME(DesegModel);
00274 MOSES_FNAME(PhrasePenalty);
00275 MOSES_FNAME2("UnknownWordPenalty", UnknownWordPenaltyProducer);
00276 MOSES_FNAME(ControlRecombination);
00277 MOSES_FNAME(ConstrainedDecoding);
00278 MOSES_FNAME(CoveredReferenceFeature);
00279 MOSES_FNAME(SourceGHKMTreeInputMatchFeature);
00280 MOSES_FNAME(SoftSourceSyntacticConstraintsFeature);
00281 MOSES_FNAME(TargetConstituentAdjacencyFeature);
00282 MOSES_FNAME(TargetPreferencesFeature);
00283 MOSES_FNAME(TreeStructureFeature);
00284 MOSES_FNAME(SoftMatchingFeature);
00285 MOSES_FNAME(DynamicCacheBasedLanguageModel);
00286 MOSES_FNAME(HyperParameterAsWeight);
00287 MOSES_FNAME(SetSourcePhrase);
00288 MOSES_FNAME(CountNonTerms);
00289 MOSES_FNAME(ReferenceComparison);
00290 MOSES_FNAME(RuleScope);
00291 MOSES_FNAME(MaxSpanFreeNonTermSource);
00292 MOSES_FNAME(NieceTerminal);
00293 MOSES_FNAME(SparseHieroReorderingFeature);
00294 MOSES_FNAME(SpanLength);
00295 MOSES_FNAME(SyntaxRHS);
00296 MOSES_FNAME(PhraseOrientationFeature);
00297 MOSES_FNAME(UnalignedWordCountFeature);
00298 MOSES_FNAME(DeleteRules);
00299
00300 MOSES_FNAME(ExampleStatelessFF);
00301 MOSES_FNAME(ExampleStatefulFF);
00302 MOSES_FNAME(ExampleLM);
00303 MOSES_FNAME(ExampleTranslationOptionListFeature);
00304 MOSES_FNAME(ExamplePT);
00305
00306 MOSES_FNAME(InMemoryPerSentenceOnDemandLM);
00307 MOSES_FNAME(EditOps);
00308 MOSES_FNAME(CorrectionPattern);
00309
00310 #ifdef HAVE_VW
00311 MOSES_FNAME(VW);
00312 MOSES_FNAME(VWFeatureContextBigrams);
00313 MOSES_FNAME(VWFeatureContextBilingual);
00314 MOSES_FNAME(VWFeatureContextWindow);
00315 MOSES_FNAME(VWFeatureSourceBagOfWords);
00316 MOSES_FNAME(VWFeatureSourceBigrams);
00317 MOSES_FNAME(VWFeatureSourceIndicator);
00318 MOSES_FNAME(VWFeatureSourcePhraseInternal);
00319 MOSES_FNAME(VWFeatureSourceSenseWindow);
00320 MOSES_FNAME(VWFeatureSourceWindow);
00321 MOSES_FNAME(VWFeatureTargetBigrams);
00322 MOSES_FNAME(VWFeatureTargetPhraseInternal);
00323 MOSES_FNAME(VWFeatureTargetIndicator);
00324 MOSES_FNAME(VWFeatureSourceExternalFeatures);
00325 MOSES_FNAME(VWFeatureTargetPhraseScores);
00326 #endif
00327
00328 #ifdef HAVE_CMPH
00329 MOSES_FNAME(PhraseDictionaryCompact);
00330 #endif
00331 #ifdef PT_UG
00332 MOSES_FNAME(Mmsapt);
00333 MOSES_FNAME2("PhraseDictionaryBitextSampling",Mmsapt);
00334 #endif
00335
00336 #ifdef HAVE_SYNLM
00337 MOSES_FNAME(SyntacticLanguageModel);
00338 #endif
00339 #ifdef LM_IRST
00340 MOSES_FNAME2("IRSTLM", LanguageModelIRST);
00341 #endif
00342 #ifdef LM_SRI
00343 MOSES_FNAME2("SRILM", LanguageModelSRI);
00344 #endif
00345 #ifdef LM_MAXENT_SRI
00346 MOSES_FNAME2("MaxEntLM", LanguageModelMaxEntSRI);
00347 #endif
00348 #ifdef LM_RAND
00349 MOSES_FNAME2("RANDLM", LanguageModelRandLM);
00350 #endif
00351 #ifdef LM_NEURAL
00352 MOSES_FNAME2("NeuralLM", NeuralLMWrapper);
00353 MOSES_FNAME(RDLM);
00354 MOSES_FNAME2("BilingualNPLM", BilingualLM_NPLM);
00355 #endif
00356 #ifdef LM_DALM
00357 MOSES_FNAME2("DALM", LanguageModelDALM);
00358 #endif
00359 #ifdef LM_OXLM
00360 MOSES_FNAME2("OxLM", OxLM<oxlm::LM>);
00361 MOSES_FNAME2("OxFactoredLM", OxLM<oxlm::FactoredLM>);
00362 MOSES_FNAME2("OxFactoredMaxentLM", OxLM<oxlm::FactoredMaxentLM>);
00363 MOSES_FNAME2("OxSourceFactoredLM", SourceOxLM);
00364 MOSES_FNAME2("OxTreeLM", OxLM<oxlm::FactoredTreeLM>);
00365 #endif
00366 Add("ReloadingLM", new ReloadingFactory());
00367 Add("KENLM", new KenFactory());
00368 }
00369
00370 FeatureRegistry::~FeatureRegistry()
00371 {
00372 }
00373
00374 void FeatureRegistry::Add(const std::string &name, FeatureFactory *factory)
00375 {
00376 std::pair<std::string, boost::shared_ptr<FeatureFactory> > to_ins(name, boost::shared_ptr<FeatureFactory>(factory));
00377 UTIL_THROW_IF2(!registry_.insert(to_ins).second, "Duplicate feature name " << name);
00378 }
00379
00380 namespace
00381 {
00382 class UnknownFeatureException : public util::Exception {};
00383 }
00384
00385 void FeatureRegistry::Construct(const std::string &name, const std::string &line)
00386 {
00387 Map::iterator i = registry_.find(name);
00388 UTIL_THROW_IF(i == registry_.end(), UnknownFeatureException, "Feature name " << name << " is not registered.");
00389 i->second->Create(line);
00390 }
00391
00392 void FeatureRegistry::PrintFF() const
00393 {
00394 std::vector<std::string> ffs;
00395 std::cerr << "Available feature functions:" << std::endl;
00396 Map::const_iterator iter;
00397 for (iter = registry_.begin(); iter != registry_.end(); ++iter) {
00398 const std::string &ffName = iter->first;
00399 ffs.push_back(ffName);
00400 }
00401
00402 std::vector<std::string>::const_iterator iterVec;
00403 std::sort(ffs.begin(), ffs.end());
00404 for (iterVec = ffs.begin(); iterVec != ffs.end(); ++iterVec) {
00405 const std::string &ffName = *iterVec;
00406 std::cerr << ffName << " ";
00407 }
00408
00409 std::cerr << std::endl;
00410 }
00411
00412 }