00001 // $Id: LanguageModel.h 4348 2011-10-13 12:33:05Z heafield $ 00002 00003 /*********************************************************************** 00004 Moses - factored phrase-based language decoder 00005 Copyright (C) 2006 University of Edinburgh 00006 00007 This library is free software; you can redistribute it and/or 00008 modify it under the terms of the GNU Lesser General Public 00009 License as published by the Free Software Foundation; either 00010 version 2.1 of the License, or (at your option) any later version. 00011 00012 This library is distributed in the hope that it will be useful, 00013 but WITHOUT ANY WARRANTY; without even the implied warranty of 00014 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 00015 Lesser General Public License for more details. 00016 00017 You should have received a copy of the GNU Lesser General Public 00018 License along with this library; if not, write to the Free Software 00019 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 00020 ***********************************************************************/ 00021 00022 #ifndef moses_LanguageModel_h 00023 #define moses_LanguageModel_h 00024 00025 #include <string> 00026 #include <vector> 00027 #include "Factor.h" 00028 #include "TypeDef.h" 00029 #include "Util.h" 00030 #include "FeatureFunction.h" 00031 #include "Word.h" 00032 00033 namespace Moses 00034 { 00035 00036 class FactorCollection; 00037 class Factor; 00038 class Phrase; 00039 00041 class LanguageModel : public StatefulFeatureFunction { 00042 protected: 00043 LanguageModel(); 00044 00045 // This can't be in the constructor for virual function dispatch reasons 00046 void Init(ScoreIndexManager &scoreIndexManager); 00047 00048 bool m_enableOOVFeature; 00049 00050 public: 00051 virtual ~LanguageModel(); 00052 00053 // Make another feature without copying the underlying model data. 00054 virtual LanguageModel *Duplicate(ScoreIndexManager &scoreIndexManager) const = 0; 00055 00057 size_t GetNumScoreComponents() const; 00058 00059 bool OOVFeatureEnabled() const { 00060 return m_enableOOVFeature; 00061 } 00062 00063 float GetWeight() const; 00064 float GetOOVWeight() const; 00065 00066 std::string GetScoreProducerWeightShortName(unsigned) const { 00067 return "lm"; 00068 } 00069 00070 virtual void InitializeBeforeSentenceProcessing() {} 00071 00072 virtual void CleanUpAfterSentenceProcessing() {} 00073 00074 virtual const FFState* EmptyHypothesisState(const InputType &input) const = 0; 00075 00076 /* whether this LM can be used on a particular phrase. 00077 * Should return false if phrase size = 0 or factor types required don't exists 00078 */ 00079 virtual bool Useable(const Phrase &phrase) const = 0; 00080 00081 /* calc total unweighted LM score of this phrase and return score via arguments. 00082 * Return scores should always be in natural log, regardless of representation with LM implementation. 00083 * Uses GetValue() of inherited class. 00084 * Useable() should be called beforehand on the phrase 00085 * \param fullScore scores of all unigram, bigram... of contiguous n-gram of the phrase 00086 * \param ngramScore score of only n-gram of order m_nGramOrder 00087 * \param oovCount number of LM OOVs 00088 */ 00089 virtual void CalcScore(const Phrase &phrase, float &fullScore, float &ngramScore, size_t &oovCount) const = 0; 00090 }; 00091 00092 } 00093 00094 #endif
1.5.9