00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017 #ifndef INC_RANDLM_CACHE_H
00018 #define INC_RANDLM_CACHE_H
00019
00020 #include <iterator>
00021 #include <map>
00022 #include <ctime>
00023 #include <iostream>
00024
00025 namespace randlm
00026 {
00027
00029 template<typename T>
00030 class CacheNode
00031 {
00032 public:
00033 typedef std::map<wordID_t, CacheNode<T>* > childMap;
00034
00035 CacheNode(T unknown_value) : value_(unknown_value) {}
00036 childMap childs_;
00037 T value_;
00038 const void* state_;
00039 };
00040
00041 template<typename T>
00042 class Cache
00043 {
00044 public:
00045 typedef typename std::map<wordID_t, CacheNode<T>* >::iterator childPtr;
00046
00047
00048
00049 Cache(T unknown_value, T null_value) :
00050 cur_nodes_(0), unknown_value_(unknown_value), null_value_(null_value) {
00051 root_ = newNode();
00052 }
00053 ~Cache() {
00054 if(clear()) {
00055 delete root_;
00056 root_ = NULL;
00057 } else {
00058 std::cerr << "Error freeing cache memory.\n";
00059 }
00060 }
00061 bool setCacheNgram(const wordID_t* ngram, int len, T value, const void* state) {
00062
00063 CacheNode<T>* node = root_;
00064 for (int i = len - 1; i > -1; --i) {
00065 childPtr child = node->childs_.find(ngram[i]);
00066 if( child != node->childs_.end() ) {
00067
00068 node = node->childs_[ngram[i]];
00069 } else {
00070
00071 CacheNode<T> * newChild = newNode(node);
00072 node->childs_[ngram[i]] = newChild;
00073
00074 node = newChild;
00075 }
00076 }
00077 node->value_ = value;
00078 node->state_ = state;
00079 return true;
00080 }
00081 bool checkCacheNgram(const wordID_t* ngram, int len, T* value, const void** state) {
00082
00083 CacheNode<T> * node = root_;
00084 for(int i = len - 1; i > -1; --i) {
00085
00086 childPtr child = node->childs_.find(ngram[i]);
00087 if( child != node->childs_.end() ) {
00088
00089 node = node->childs_[ngram[i]];
00090 } else {
00091
00092 return false;
00093 }
00094 }
00095 *value = node->value_;
00096 if(state) *state = node->state_;
00097 return *value != null_value_ && *value != unknown_value_;
00098 }
00099 int getCache2(const wordID_t* ngram, int len, T** values, int* found) {
00100
00101 CacheNode<T> * node = root_;
00102 *found = 0;
00103
00104 bool all_found = true;
00105 for(int i = len - 1; i > -1; --i) {
00106
00107 childPtr child = node->childs_.find(ngram[i]);
00108 if( child != node->childs_.end() ) {
00109
00110 node = node->childs_[ngram[i]];
00111
00112 values[i] = &node->value_;
00113
00114 if (node->value_ == null_value_) {
00115 return len - 1 - i;
00116 }
00117 all_found = all_found && (node->value_ != unknown_value_);
00118 if (all_found)
00119 ++(*found);
00120 } else {
00121
00122 CacheNode<T> * newChild = newNode(node);
00123 node->childs_[ngram[i]] = newChild;
00124
00125 node = newChild;
00126 values[i] = &node->value_;
00127 }
00128 }
00129 return len;
00130 }
00131 int getCache(const wordID_t* ngram, int len, T** values, int* found) {
00132
00133
00134
00135 CacheNode<T> * node = root_;
00136 *found = 0;
00137 values[0] = &node->value_;
00138 bool all_found = true;
00139 for(int i = len - 1; i > -1; --i) {
00140
00141 childPtr child = node->childs_.find(ngram[i]);
00142 if( child != node->childs_.end() ) {
00143
00144 node = node->childs_[ngram[i]];
00145
00146 values[len - i] = &node->value_;
00147
00148 if (node->value_ == null_value_)
00149 return len - 1 - i;
00150 all_found = all_found && (node->value_ != unknown_value_);
00151 if (all_found)
00152 ++(*found);
00153 } else {
00154
00155 CacheNode<T> * newChild = newNode(node);
00156 node->childs_[ngram[i]] = newChild;
00157
00158 node = newChild;
00159 values[len - i] = &node->value_;
00160 }
00161 }
00162 return len;
00163 }
00164 bool clear() {
00165 std::cerr << "Clearing cache with " << static_cast<float>(cur_nodes_ * nodeSize())
00166 / static_cast<float>(1ull << 20) << "MB" << std::endl;
00167 return clearNodes(root_);
00168 }
00169 int nodes() {
00170
00171 return cur_nodes_;
00172 }
00173 int nodeSize() {
00174 return sizeof(CacheNode<T>) + sizeof(root_->childs_);
00175 }
00176 private:
00177 CacheNode<T> * root_;
00178 count_t cur_nodes_;
00179 T unknown_value_;
00180 T null_value_;
00181 CacheNode<T>* newNode(CacheNode<T> * node = 0) {
00182 ++cur_nodes_;
00183 return new CacheNode<T>(unknown_value_);
00184 }
00185 bool clearNodes(CacheNode<T> * node) {
00186
00187 if(!node->childs_.empty()) {
00188 iterate(node->childs_, itr) {
00189 if(!clearNodes(itr->second))
00190 std::cerr << "Error emptying cache\n";
00191 delete itr->second;
00192 --cur_nodes_;
00193 }
00194 node->childs_.clear();
00195 }
00196 return true;
00197 }
00198
00199 };
00200 }
00201 #endif //INC_RANDLM_CACHE_H