00001 #ifndef moses_PrefixTreeMap_h
00002 #define moses_PrefixTreeMap_h
00003
00004 #include<vector>
00005 #include<climits>
00006 #include<iostream>
00007 #include <map>
00008
00009
00010 #include "PrefixTree.h"
00011 #include "File.h"
00012 #include "LVoc.h"
00013 #include "ObjectPool.h"
00014
00015 namespace Moses
00016 {
00017
00018
00019 typedef PrefixTreeF<LabelId,OFF_T> PTF;
00020 typedef FilePtr<PTF> CPT;
00021 typedef std::vector<CPT> Data;
00022 typedef LVoc<std::string> WordVoc;
00023
00026 class GenericCandidate
00027 {
00028 public:
00029 typedef std::vector<IPhrase> PhraseList;
00030 typedef std::vector< std::vector<float> > ScoreList;
00031 public:
00032 GenericCandidate() {
00033 };
00034 GenericCandidate(const GenericCandidate& other)
00035 : m_PhraseList(other.m_PhraseList), m_ScoreList(other.m_ScoreList) {
00036 };
00037 GenericCandidate(const PhraseList& p, const ScoreList& s)
00038 : m_PhraseList(p), m_ScoreList(s) {
00039 };
00040 ~GenericCandidate() {
00041 };
00042 public:
00043 size_t NumPhrases() const {
00044 return m_PhraseList.size();
00045 };
00046 size_t NumScores() const {
00047 return m_ScoreList.size();
00048 };
00049 const IPhrase& GetPhrase(unsigned int i) const {
00050 return m_PhraseList.at(i);
00051 }
00052 const std::vector<float>& GetScore(unsigned int i) const {
00053 return m_ScoreList.at(i);
00054 }
00055 void readBin(FILE* f);
00056 void writeBin(FILE* f) const;
00057 private:
00058 PhraseList m_PhraseList;
00059 ScoreList m_ScoreList;
00060 };
00061
00062
00065 struct PPimp {
00066 PTF const*p;
00067 unsigned idx;
00068 bool root;
00069
00070 PPimp(PTF const* x,unsigned i,bool b) : p(x),idx(i),root(b) {}
00071 bool isValid() const {
00072 return root || (p && idx<p->size());
00073 }
00074
00075 bool isRoot() const {
00076 return root;
00077 }
00078 PTF const* ptr() const {
00079 return p;
00080 }
00081 };
00082
00083
00086 class Candidates : public std::vector<GenericCandidate>
00087 {
00088 typedef std::vector<GenericCandidate> MyBase;
00089 public:
00090 Candidates() : MyBase() {
00091 };
00092 void writeBin(FILE* f) const;
00093 void readBin(FILE* f);
00094 };
00095
00096 class PrefixTreeMap
00097 {
00098 public:
00099 PrefixTreeMap() : m_FileSrc(0), m_FileTgt(0) {
00100 PTF::setDefault(InvalidOffT);
00101 }
00102 ~PrefixTreeMap();
00103
00104 public:
00105 static const LabelId MagicWord;
00106
00107 void FreeMemory();
00108
00109 int Read(const std::string& fileNameStem, int numVocs = -1);
00110
00111 void GetCandidates(const IPhrase& key, Candidates* cands);
00112 void GetCandidates(const PPimp& p, Candidates* cands);
00113
00114 std::vector< std::string const * > ConvertPhrase(const IPhrase& p, unsigned int voc) const;
00115 IPhrase ConvertPhrase(const std::vector< std::string >& p, unsigned int voc) const;
00116 LabelId ConvertWord(const std::string& w, unsigned int voc) const;
00117 std::string ConvertWord(LabelId w, unsigned int voc) const;
00118 public:
00119 PPimp* GetRoot();
00120 PPimp* Extend(PPimp* p, LabelId wi);
00121 PPimp* Extend(PPimp* p, const std::string w, size_t voc) {
00122 return Extend(p, ConvertWord(w,voc));
00123 }
00124 private:
00125 Data m_Data;
00126 FILE* m_FileSrc;
00127 FILE* m_FileTgt;
00128
00129 std::vector<WordVoc*> m_Voc;
00130 ObjectPool<PPimp> m_PtrPool;
00131 std::map<std::string,WordVoc> m_vocs;
00132 };
00133
00134 }
00135
00136 #endif