00001 00002 #pragma once 00003 00004 #include "moses/PP/PhraseProperty.h" 00005 #include "util/exception.hh" 00006 #include <string> 00007 #include <list> 00008 00009 namespace Moses 00010 { 00011 00012 // A simple phrase property class to access the three phrase count values. 00013 // 00014 // The counts are usually not needed during decoding and are not loaded 00015 // from the phrase table. This is just a workaround that can make them 00016 // available to features which have a use for them. 00017 // 00018 // If you need access to the counts, copy the two marginal counts and the 00019 // joint count into an additional information property with key "Counts", 00020 // e.g. using awk: 00021 // 00022 // $ zcat phrase-table.gz | awk -F' \|\|\| ' '{printf("%s {{Counts %s}}\n",$0,$5);}' | gzip -c > phrase-table.withCountsPP.gz 00023 // 00024 // CountsPhraseProperty reads them from the phrase table and provides 00025 // methods GetSourceMarginal(), GetTargetMarginal(), GetJointCount(). 00026 00027 00028 class CountsPhraseProperty : public PhraseProperty 00029 { 00030 friend std::ostream& operator<<(std::ostream &, const CountsPhraseProperty &); 00031 00032 public: 00033 00034 CountsPhraseProperty() {}; 00035 00036 virtual void ProcessValue(const std::string &value); 00037 00038 size_t GetSourceMarginal() const { 00039 return m_sourceMarginal; 00040 } 00041 00042 size_t GetTargetMarginal() const { 00043 return m_targetMarginal; 00044 } 00045 00046 float GetJointCount() const { 00047 return m_jointCount; 00048 } 00049 00050 virtual const std::string *GetValueString() const { 00051 UTIL_THROW2("CountsPhraseProperty: value string not available in this phrase property"); 00052 return NULL; 00053 }; 00054 00055 protected: 00056 00057 float m_sourceMarginal, m_targetMarginal, m_jointCount; 00058 00059 }; 00060 00061 } // namespace Moses 00062