00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023 #ifndef moses_Phrase_h
00024 #define moses_Phrase_h
00025
00026 #include <iostream>
00027 #include <vector>
00028 #include <list>
00029 #include <string>
00030
00031 #include <boost/functional/hash.hpp>
00032
00033 #include "Word.h"
00034 #include "WordsBitmap.h"
00035 #include "TypeDef.h"
00036 #include "Util.h"
00037
00038 #include "util/string_piece.hh"
00039
00040 namespace Moses
00041 {
00042 class FactorMask;
00043
00047 class Phrase
00048 {
00049 friend std::ostream& operator<<(std::ostream&, const Phrase&);
00050 private:
00051
00052 std::vector<Word> m_words;
00053
00054 public:
00056 static void InitializeMemPool();
00057 static void FinalizeMemPool();
00058
00061 Phrase();
00062 explicit Phrase(size_t reserveSize);
00064 explicit Phrase(const std::vector< const Word* > &mergeWords);
00065
00067 virtual ~Phrase();
00068
00074 void CreateFromString(FactorDirection direction
00075 , const std::vector<FactorType> &factorOrder
00076 , const StringPiece &phraseString
00077 , const StringPiece &factorDelimiter
00078 , Word **lhs);
00079
00083 void MergeFactors(const Phrase ©);
00085 void MergeFactors(const Phrase ©, FactorType factorType);
00087 void MergeFactors(const Phrase ©, const std::vector<FactorType>& factorVec);
00088
00092 bool IsCompatible(const Phrase &inputPhrase) const;
00093 bool IsCompatible(const Phrase &inputPhrase, FactorType factorType) const;
00094 bool IsCompatible(const Phrase &inputPhrase, const std::vector<FactorType>& factorVec) const;
00095
00097 inline size_t GetSize() const {
00098 return m_words.size();
00099 }
00100
00102 inline const Word &GetWord(size_t pos) const {
00103 return m_words[pos];
00104 }
00105 inline Word &GetWord(size_t pos) {
00106 return m_words[pos];
00107 }
00109 inline const Factor *GetFactor(size_t pos, FactorType factorType) const {
00110 const Word &ptr = m_words[pos];
00111 return ptr[factorType];
00112 }
00113 inline void SetFactor(size_t pos, FactorType factorType, const Factor *factor) {
00114 Word &ptr = m_words[pos];
00115 ptr[factorType] = factor;
00116 }
00117
00118 size_t GetNumTerminals() const;
00119
00121 bool Contains(const std::vector< std::vector<std::string> > &subPhraseVector
00122 , const std::vector<FactorType> &inputFactor) const;
00123
00125 Word &AddWord();
00127 void AddWord(const Word &newWord) {
00128 AddWord() = newWord;
00129 }
00130
00132 void Append(const Phrase &endPhrase);
00133 void PrependWord(const Word &newWord);
00134
00135 void Clear() {
00136 m_words.clear();
00137 }
00138
00139 void RemoveWord(size_t pos) {
00140 CHECK(pos < m_words.size());
00141 m_words.erase(m_words.begin() + pos);
00142 }
00143
00145 Phrase GetSubString(const WordsRange &wordsRange) const;
00146 Phrase GetSubString(const WordsRange &wordsRange, FactorType factorType) const;
00147
00149 std::string GetStringRep(const std::vector<FactorType> factorsToPrint) const;
00150
00151 TO_STRING();
00152
00153
00154 int Compare(const Phrase &other) const;
00155
00159 bool operator< (const Phrase &compare) const {
00160 return Compare(compare) < 0;
00161 }
00162
00163 bool operator== (const Phrase &compare) const {
00164 return Compare(compare) == 0;
00165 }
00166
00167 void OnlyTheseFactors(const FactorMask &factors);
00168
00169 };
00170
00171 inline size_t hash_value(const Phrase& phrase)
00172 {
00173 size_t seed = 0;
00174 for (size_t i = 0; i < phrase.GetSize(); ++i) {
00175 boost::hash_combine(seed, phrase.GetWord(i));
00176 }
00177 return seed;
00178 }
00179
00180 struct PhrasePtrComparator {
00181 inline bool operator()(const Phrase* lhs, const Phrase* rhs) const {
00182 return *lhs == *rhs;
00183 }
00184 };
00185
00186 struct PhrasePtrHasher {
00187 inline size_t operator()(const Phrase* phrase) const {
00188 size_t seed = 0;
00189 boost::hash_combine(seed,*phrase);
00190 return seed;
00191 }
00192
00193 };
00194
00195 }
00196
00197 #endif