00001 #ifndef moses_DynSAInclude_vocab_h
00002 #define moses_DynSAInclude_vocab_h
00003
00004 #include <map>
00005 #include <string>
00006 #include "types.h"
00007 #include "FileHandler.h"
00008 #include "utils.h"
00009 #include "moses/TypeDef.h"
00010 #include "moses/Word.h"
00011
00012 namespace Moses
00013 {
00014
00016 class Vocab
00017 {
00018 public:
00019
00020 typedef std::map<Word, wordID_t> Word2Id;
00021 typedef std::map<wordID_t, Word> Id2Word;
00022
00023 Vocab(bool sntMarkers = true):
00024 m_closed(false),
00025 m_kOOVWordID(0),
00026 m_kBOSWordID(1) {
00027 InitSpecialWords();
00028 if(sntMarkers) {
00029 GetWordID(m_kBOSWord);
00030 GetWordID(m_kEOSWord);
00031 }
00032 }
00033
00034
00035
00036 Vocab(const std::string & vocab_path, const FactorDirection& direction,
00037 const FactorList& factors, bool closed = true):
00038 m_kOOVWordID(0),
00039 m_kBOSWordID(1) {
00040 InitSpecialWords();
00041 bool ret = Load(vocab_path, direction, factors, closed);
00042 UTIL_THROW_IF2(!ret, "Unable to load vocab file: " << vocab_path);
00043 }
00044 Vocab(FileHandler * fin, const FactorDirection& direction,
00045 const FactorList& factors, bool closed = true):
00046 m_kOOVWordID(0),
00047 m_kBOSWordID(1) {
00048 InitSpecialWords();
00049 bool ret = Load(fin, direction, factors, closed);
00050 UTIL_THROW_IF2(!ret, "Unable to load vocab file");
00051 }
00052 Vocab(FileHandler *fin):
00053 m_kOOVWordID(0),
00054 m_kBOSWordID(1) {
00055 Load(fin);
00056 }
00057 ~Vocab() {}
00058
00059 wordID_t GetWordID(const std::string& word, const FactorDirection& direction,
00060 const FactorList& factors, bool isNonTerminal);
00061 wordID_t GetWordID(const Word& word);
00062 wordID_t GetWordID(const std::string& word);
00063 Word& GetWord(wordID_t id);
00064 inline wordID_t GetkOOVWordID() {
00065 return m_kOOVWordID;
00066 }
00067 inline wordID_t GetBOSWordID() {
00068 return m_kBOSWordID;
00069 }
00070 inline const Word& GetkOOVWord() {
00071 return m_kOOVWord;
00072 }
00073 inline const Word& GetkBOSWord() {
00074 return m_kBOSWord;
00075 }
00076 inline const Word& GetkEOSWord() {
00077 return m_kEOSWord;
00078 }
00079
00080 bool InVocab(wordID_t id);
00081 bool InVocab(const Word& word);
00082 uint32_t Size() {
00083 return m_words2ids.size();
00084 }
00085 void MakeClosed() {
00086 m_closed = true;
00087 }
00088 void MakeOpen() {
00089 m_closed = false;
00090 }
00091 bool IsClosed() {
00092 return m_closed;
00093 }
00094 bool Save(const std::string & vocab_path);
00095 bool Save(FileHandler* fout);
00096 bool Load(const std::string & vocab_path, const FactorDirection& direction,
00097 const FactorList& factors, bool closed = true);
00098 bool Load(FileHandler* fin, const FactorDirection& direction,
00099 const FactorList& factors, bool closed = true);
00100 bool Load(FileHandler* fin);
00101 void PrintVocab();
00102 Word2Id::const_iterator VocabStart() {
00103 return m_words2ids.begin();
00104 }
00105 Word2Id::const_iterator VocabEnd() {
00106 return m_words2ids.end();
00107 }
00108
00109 protected:
00110 bool m_closed;
00111
00112 const wordID_t m_kOOVWordID;
00113 const wordID_t m_kBOSWordID;
00114 Word m_kBOSWord;
00115 Word m_kEOSWord;
00116 Word m_kOOVWord;
00117
00118 const Word InitSpecialWord( const std::string& type);
00119 void InitSpecialWords();
00120
00121 Word2Id m_words2ids;
00122 Id2Word m_ids2words;
00123 };
00124
00125 }
00126
00127 #endif