00001 #ifndef INC_ALLHASHFUNCS_H
00002 #define INC_ALLHASHFUNCS_H
00003
00004 #include "util/check.hh"
00005 #include <cmath>
00006 #include "types.h"
00007 #include "utils.h"
00008 #include "FileHandler.h"
00009 using namespace Moses;
00010 typedef uint64_t P;
00011
00013 template <typename T>
00014 class HashBase {
00015 protected:
00016 T m_;
00017 count_t H_;
00018 virtual void initSeeds()=0;
00019 virtual void freeSeeds()=0;
00020 public:
00021 HashBase(float m, count_t H=1):m_((T)m), H_(H) {
00022
00023 }
00024 HashBase(FileHandler* fin) {
00025 load(fin);
00026 }
00027 virtual ~HashBase(){}
00028 virtual T hash(const char*s, count_t h)=0;
00029 virtual T hash(const wordID_t* id, const int len, count_t h)=0;
00030 count_t size() { return H_;}
00031 virtual void save(FileHandler* fout) {
00032 CHECK(fout != 0);
00033 fout->write((char*)&m_, sizeof(m_));
00034 fout->write((char*)&H_, sizeof(H_));
00035 }
00036 virtual void load(FileHandler* fin) {
00037 CHECK(fin != 0);
00038 fin->read((char*)&m_, sizeof(m_));
00039 fin->read((char*)&H_, sizeof(H_));
00040 }
00041 };
00042
00044 template <typename T>
00045 class UnivHash_linear: public HashBase<T> {
00046 public:
00047 UnivHash_linear(float m, count_t H, P pr):
00048 HashBase<T>(m, H), pr_(pr) {
00049
00050 initSeeds();
00051 }
00052 UnivHash_linear(FileHandler* fin):
00053 HashBase<T>(fin) {
00054 load(fin);
00055 }
00056 ~UnivHash_linear() {freeSeeds();}
00057 T hash(const char* s, count_t h){return 0;}
00058 T hash(const wordID_t* id, const int len, count_t h);
00059 T hash(const wordID_t id, const count_t pos,
00060 const T prevValue, count_t h);
00061 void save(FileHandler* fout);
00062 void load(FileHandler* fin);
00063 private:
00064 T** a_, **b_;
00065 P pr_;
00066 void initSeeds();
00067 void freeSeeds();
00068 };
00069
00076 template <typename T>
00077 class UnivHash_noPrimes: public HashBase<T> {
00078 public:
00079 UnivHash_noPrimes(float k, float l):
00080 HashBase<T>(k, 100), d_(count_t((l-k))) {
00081 if(((int)l >> 3) == sizeof(P)) p_ = (P) pow(2,l) - 1;
00082 else p_ = (P) pow(2,l);
00083 initSeeds();
00084 }
00085 UnivHash_noPrimes(FileHandler* fin):
00086 HashBase<T>(fin) {
00087 load(fin);
00088 }
00089 ~UnivHash_noPrimes() {freeSeeds();}
00090 T hash(const char* s, count_t h);
00091 T hash(const wordID_t* id, const int len, count_t h);
00092 T hash(const P x, count_t h);
00093 void save(FileHandler* fout);
00094 void load(FileHandler* fin);
00095 private:
00096 count_t d_;
00097 P p_, *a_;
00098 void initSeeds();
00099 void freeSeeds() {delete[] a_;}
00100 };
00101
00103 template <typename T>
00104 class Hash_shiftAddXOR: public HashBase<T> {
00105 public:
00106 Hash_shiftAddXOR(float m, count_t H=5): HashBase<T>(m,H),
00107 l_(5), r_(2) {
00108 initSeeds();
00109 }
00110 ~Hash_shiftAddXOR() {freeSeeds();}
00111 T hash(const char* s, count_t h);
00112 T hash(const wordID_t* id, const int len, count_t h) {}
00113 private:
00114 T* v_;
00115 const unsigned short l_, r_;
00116 void initSeeds();
00117 void freeSeeds() {delete[] v_;}
00118 };
00119
00121 template <typename T>
00122 class UnivHash_tableXOR: public HashBase<T> {
00123 public:
00124 UnivHash_tableXOR(float m, count_t H=5): HashBase<T>(m, H),
00125 table_(NULL), tblLen_(255*MAX_STR_LEN) {
00126 initSeeds();
00127 }
00128 ~UnivHash_tableXOR() {freeSeeds();}
00129 T hash(const char* s, count_t h);
00130 T hash(const wordID_t* id, const int len, count_t h) {}
00131 private:
00132 T** table_;
00133 count_t tblLen_;
00134 void initSeeds();
00135 void freeSeeds();
00136 };
00137
00138
00139 template <typename T>
00140 void Hash_shiftAddXOR<T>::initSeeds() {
00141 v_ = new T[this->H_];
00142 for(count_t i=0; i < this->H_; i++)
00143 v_[i] = Utils::rand<T>() + 1;
00144 }
00145 template <typename T>
00146 T Hash_shiftAddXOR<T>::hash(const char* s, count_t h) {
00147 T value = v_[h];
00148 int pos(0);
00149 unsigned char c;
00150 while((c = *s++) && (++pos < MAX_STR_LEN)) {
00151 value ^= ((value << l_) + (value >> r_) + c);
00152 }
00153 return (value % this->m_);
00154 }
00155
00156
00157 template <typename T>
00158 void UnivHash_tableXOR<T>::initSeeds() {
00159
00160 if(table_) freeSeeds();
00161
00162 table_ = new T* [this->H_];
00163
00164 for(count_t j=0; j < this->H_; j++) {
00165 table_[j] = new T[tblLen_];
00166 for(count_t i=0; i < tblLen_; i++) {
00167 table_[j][i] = Utils::rand<T>(this->m_-1);
00168 }
00169 }
00170 }
00171 template <typename T>
00172 void UnivHash_tableXOR<T>::freeSeeds() {
00173 for(count_t j = 0; j < this->H_; j++)
00174 delete[] table_[j];
00175 delete[] table_;
00176 table_ = NULL;
00177 }
00178 template <typename T>
00179 T UnivHash_tableXOR<T>::hash(const char* s, count_t h) {
00180 T value = 0;
00181 count_t pos = 0, idx = 0;
00182 unsigned char c;
00183 while((c = *s++) && (++pos < MAX_STR_LEN))
00184 value ^= table_[h][idx += c];
00185 CHECK(value < this->m_);
00186 return value;
00187 }
00188
00189
00190 template <typename T>
00191 void UnivHash_noPrimes<T>::initSeeds() {
00192 a_ = new P[this->H_];
00193 for(T i=0; i < this->H_; i++) {
00194 a_[i] = Utils::rand<P>();
00195 if(a_[i] % 2 == 0) a_[i]++;
00196 }
00197 }
00198 template <typename T>
00199 T UnivHash_noPrimes<T>::hash(const P x, count_t h) {
00200
00201 T value = ((a_[h] * x) % p_) >> d_;
00202 return value % this->m_;
00203 }
00204 template <typename T>
00205 T UnivHash_noPrimes<T>::hash(const wordID_t* id, const int len,
00206 count_t h) {
00207 T value = 0;
00208 int pos(0);
00209 while(pos < len) {
00210 value ^= hash((P)id[pos], h++);
00211 pos++;
00212 }
00213 return value % this->m_;
00214 }
00215 template <typename T>
00216 T UnivHash_noPrimes<T>::hash(const char* s, count_t h) {
00217 T value = 0;
00218 int pos(0);
00219 unsigned char c;
00220 while((c = *s++) && (++pos < MAX_STR_LEN)) {
00221 value ^= hash((P)c, h);
00222 }
00223 return value % this->m_;
00224 }
00225 template <typename T>
00226 void UnivHash_noPrimes<T>::save(FileHandler* fout) {
00227 HashBase<T>::save(fout);
00228 fout->write((char*)&p_, sizeof(p_));
00229 fout->write((char*)&d_, sizeof(d_));
00230 for(T i=0; i < this->H_; i++) {
00231 fout->write((char*)&a_[i], sizeof(a_[i]));
00232 }
00233 }
00234 template <typename T>
00235 void UnivHash_noPrimes<T>::load(FileHandler* fin) {
00236 a_ = new P[this->H_];
00237
00238 fin->read((char*)&p_, sizeof(p_));
00239 fin->read((char*)&d_, sizeof(d_));
00240 for(T i=0; i < this->H_; i++)
00241 {
00242 fin->read((char*)&a_[i], sizeof(a_[i]));
00243 }
00244 }
00245
00246
00247 template <typename T>
00248 void UnivHash_linear<T>::initSeeds() {
00249 a_ = new T*[this->H_];
00250 b_ = new T*[this->H_];
00251 for(count_t i=0; i < this->H_; i++) {
00252 a_[i] = new T[MAX_NGRAM_ORDER];
00253 b_[i] = new T[MAX_NGRAM_ORDER];
00254 for(count_t j=0; j < MAX_NGRAM_ORDER; j++) {
00255 a_[i][j] = 1 + Utils::rand<T>();
00256 b_[i][j] = Utils::rand<T>();
00257 }
00258 }
00259 }
00260 template <typename T>
00261 void UnivHash_linear<T>::freeSeeds() {
00262 for(count_t i=0; i < this->H_; i++) {
00263 delete[] a_[i];
00264 delete[] b_[i];
00265 }
00266 delete[] a_;
00267 delete[] b_;
00268 a_ = b_ = NULL;
00269 }
00270 template <typename T>
00271 inline T UnivHash_linear<T>::hash(const wordID_t* id, const int len,
00272 count_t h) {
00273 CHECK(h < this->H_);
00274 T value = 0;
00275 int pos(0);
00276 while(pos < len) {
00277 value += ((a_[h][pos] * id[pos]) + b_[h][pos]);
00278 ++pos;
00279 }
00280 return value % this->m_;
00281 }
00282 template <typename T>
00283 inline T UnivHash_linear<T>::hash(const wordID_t id, const count_t pos,
00284 const T prevValue, count_t h) {
00285 CHECK(h < this->H_);
00286 T value = prevValue + ((a_[h][pos] * id) + b_[h][pos]);
00287 return value % this->m_;
00288 }
00289 template <typename T>
00290 void UnivHash_linear<T>::save(FileHandler* fout) {
00291
00292 HashBase<T>::save(fout);
00293 fout->write((char*)&pr_, sizeof(pr_));
00294 for(count_t i=0; i < this->H_; i++) {
00295 for(count_t j=0; j < MAX_NGRAM_ORDER; j++) {
00296 fout->write((char*)&a_[i][j], sizeof(a_[i][j]));
00297 fout->write((char*)&b_[i][j], sizeof(b_[i][j]));
00298
00299
00300 }
00301 }
00302 }
00303 template <typename T>
00304 void UnivHash_linear<T>::load(FileHandler* fin) {
00305
00306 fin->read((char*)&pr_, sizeof(pr_));
00307 a_ = new T*[this->H_];
00308 b_ = new T*[this->H_];
00309 for(count_t i=0; i < this->H_; i++) {
00310 a_[i] = new T[MAX_NGRAM_ORDER];
00311 b_[i] = new T[MAX_NGRAM_ORDER];
00312 for(count_t j=0; j < MAX_NGRAM_ORDER; j++) {
00313 fin->read((char*)&a_[i][j], sizeof(a_[i][j]));
00314 fin->read((char*)&b_[i][j], sizeof(b_[i][j]));
00315
00316
00317 }
00318 }
00319 }
00320
00321
00322
00323
00324
00325
00326
00327
00328
00329
00330
00331
00332 #endif