00001 #ifndef INC_ALLHASHFUNCS_H
00002 #define INC_ALLHASHFUNCS_H
00003
00004 #include <cmath>
00005 #include "types.h"
00006 #include "utils.h"
00007 #include "FileHandler.h"
00008 #include "util/exception.hh"
00009 #include "util/random.hh"
00010
00011 typedef uint64_t P;
00012
00014 template <typename T>
00015 class HashBase
00016 {
00017 protected:
00018 T m_;
00019 count_t H_;
00020 virtual void initSeeds()=0;
00021 virtual void freeSeeds()=0;
00022 public:
00023 HashBase(float m, count_t H=1):m_((T)m), H_(H) {
00024
00025 }
00026 HashBase(Moses::FileHandler* fin) {
00027 load(fin);
00028 }
00029 virtual ~HashBase() {}
00030 virtual T hash(const char*s, count_t h)=0;
00031 virtual T hash(const wordID_t* id, const int len, count_t h)=0;
00032 count_t size() {
00033 return H_;
00034 }
00035 virtual void save(Moses::FileHandler* fout) {
00036 UTIL_THROW_IF2(fout == 0, "Null file handle");
00037 fout->write((char*)&m_, sizeof(m_));
00038 fout->write((char*)&H_, sizeof(H_));
00039 }
00040 virtual void load(Moses::FileHandler* fin) {
00041 UTIL_THROW_IF2(fin == 0, "Null file handle");
00042 fin->read((char*)&m_, sizeof(m_));
00043 fin->read((char*)&H_, sizeof(H_));
00044 }
00045 };
00046
00048 template <typename T>
00049 class UnivHash_linear: public HashBase<T>
00050 {
00051 public:
00052 UnivHash_linear(float m, count_t H, P pr):
00053 HashBase<T>(m, H), pr_(pr) {
00054 initSeeds();
00055 }
00056 UnivHash_linear(Moses::FileHandler* fin):
00057 HashBase<T>(fin) {
00058 load(fin);
00059 }
00060 ~UnivHash_linear() {
00061 freeSeeds();
00062 }
00063 T hash(const char* s, count_t h) {
00064 return 0;
00065 }
00066 T hash(const wordID_t* id, const int len, count_t h);
00067 T hash(const wordID_t id, const count_t pos,
00068 const T prevValue, count_t h);
00069 void save(Moses::FileHandler* fout);
00070 void load(Moses::FileHandler* fin);
00071 private:
00072 T** a_, **b_;
00073 P pr_;
00074 void initSeeds();
00075 void freeSeeds();
00076 };
00077
00084 template <typename T>
00085 class UnivHash_noPrimes: public HashBase<T>
00086 {
00087 public:
00088 UnivHash_noPrimes(float k, float l):
00089 HashBase<T>(k, 100), d_(count_t((l-k))) {
00090 if(((int)l >> 3) == sizeof(P)) p_ = (P) pow(2,l) - 1;
00091 else p_ = (P) pow(2,l);
00092 initSeeds();
00093 }
00094 UnivHash_noPrimes(Moses::FileHandler* fin):
00095 HashBase<T>(fin) {
00096 load(fin);
00097 }
00098 ~UnivHash_noPrimes() {
00099 freeSeeds();
00100 }
00101 T hash(const char* s, count_t h);
00102 T hash(const wordID_t* id, const int len, count_t h);
00103 T hash(const P x, count_t h);
00104 void save(Moses::FileHandler* fout);
00105 void load(Moses::FileHandler* fin);
00106 private:
00107 count_t d_;
00108 P p_, *a_;
00109 void initSeeds();
00110 void freeSeeds() {
00111 delete[] a_;
00112 }
00113 };
00114
00116 template <typename T>
00117 class Hash_shiftAddXOR: public HashBase<T>
00118 {
00119 public:
00120 Hash_shiftAddXOR(float m, count_t H=5): HashBase<T>(m,H),
00121 l_(5), r_(2) {
00122 initSeeds();
00123 }
00124 ~Hash_shiftAddXOR() {
00125 freeSeeds();
00126 }
00127 T hash(const char* s, count_t h);
00128 T hash(const wordID_t* id, const int len, count_t h) {}
00129 private:
00130 T* v_;
00131 const unsigned short l_, r_;
00132 void initSeeds();
00133 void freeSeeds() {
00134 delete[] v_;
00135 }
00136 };
00137
00139 template <typename T>
00140 class UnivHash_tableXOR: public HashBase<T>
00141 {
00142 public:
00143 UnivHash_tableXOR(float m, count_t H=5): HashBase<T>(m, H),
00144 table_(NULL), tblLen_(255*MAX_STR_LEN) {
00145 initSeeds();
00146 }
00147 ~UnivHash_tableXOR() {
00148 freeSeeds();
00149 }
00150 T hash(const char* s, count_t h);
00151 T hash(const wordID_t* id, const int len, count_t h) {}
00152 private:
00153 T** table_;
00154 count_t tblLen_;
00155 void initSeeds();
00156 void freeSeeds();
00157 };
00158
00159
00160 template <typename T>
00161 void Hash_shiftAddXOR<T>::initSeeds()
00162 {
00163 v_ = new T[this->H_];
00164 for(count_t i=0; i < this->H_; i++)
00165 v_[i] = util::wide_rand<T>() + 1;
00166 }
00167 template <typename T>
00168 T Hash_shiftAddXOR<T>::hash(const char* s, count_t h)
00169 {
00170 T value = v_[h];
00171 int pos(0);
00172 unsigned char c;
00173 while((c = *s++) && (++pos < MAX_STR_LEN)) {
00174 value ^= ((value << l_) + (value >> r_) + c);
00175 }
00176 return (value % this->m_);
00177 }
00178
00179
00180 template <typename T>
00181 void UnivHash_tableXOR<T>::initSeeds()
00182 {
00183
00184 if(table_) freeSeeds();
00185
00186 table_ = new T* [this->H_];
00187
00188 for(count_t j=0; j < this->H_; j++) {
00189 table_[j] = new T[tblLen_];
00190 for(count_t i=0; i < tblLen_; i++)
00191 table_[j][i] = util::wide_rand_excl(this->m_-1);
00192 }
00193 }
00194 template <typename T>
00195 void UnivHash_tableXOR<T>::freeSeeds()
00196 {
00197 for(count_t j = 0; j < this->H_; j++)
00198 delete[] table_[j];
00199 delete[] table_;
00200 table_ = NULL;
00201 }
00202 template <typename T>
00203 T UnivHash_tableXOR<T>::hash(const char* s, count_t h)
00204 {
00205 T value = 0;
00206 count_t pos = 0, idx = 0;
00207 unsigned char c;
00208 while((c = *s++) && (++pos < MAX_STR_LEN))
00209 value ^= table_[h][idx += c];
00210 UTIL_THROW_IF2(value >= this->m_, "Error");
00211 return value;
00212 }
00213
00214
00215 template <typename T>
00216 void UnivHash_noPrimes<T>::initSeeds()
00217 {
00218 a_ = new P[this->H_];
00219 for(T i=0; i < this->H_; i++) {
00220 a_[i] = util::wide_rand<P>();
00221 if(a_[i] % 2 == 0) a_[i]++;
00222 }
00223 }
00224 template <typename T>
00225 T UnivHash_noPrimes<T>::hash(const P x, count_t h)
00226 {
00227
00228 T value = ((a_[h] * x) % p_) >> d_;
00229 return value % this->m_;
00230 }
00231 template <typename T>
00232 T UnivHash_noPrimes<T>::hash(const wordID_t* id, const int len,
00233 count_t h)
00234 {
00235 T value = 0;
00236 int pos(0);
00237 while(pos < len) {
00238 value ^= hash((P)id[pos], h++);
00239 pos++;
00240 }
00241 return value % this->m_;
00242 }
00243 template <typename T>
00244 T UnivHash_noPrimes<T>::hash(const char* s, count_t h)
00245 {
00246 T value = 0;
00247 int pos(0);
00248 unsigned char c;
00249 while((c = *s++) && (++pos < MAX_STR_LEN)) {
00250 value ^= hash((P)c, h);
00251 }
00252 return value % this->m_;
00253 }
00254 template <typename T>
00255 void UnivHash_noPrimes<T>::save(Moses::FileHandler* fout)
00256 {
00257 HashBase<T>::save(fout);
00258 fout->write((char*)&p_, sizeof(p_));
00259 fout->write((char*)&d_, sizeof(d_));
00260 for(T i=0; i < this->H_; i++) {
00261 fout->write((char*)&a_[i], sizeof(a_[i]));
00262 }
00263 }
00264 template <typename T>
00265 void UnivHash_noPrimes<T>::load(Moses::FileHandler* fin)
00266 {
00267 a_ = new P[this->H_];
00268
00269 fin->read((char*)&p_, sizeof(p_));
00270 fin->read((char*)&d_, sizeof(d_));
00271 for(T i=0; i < this->H_; i++) {
00272 fin->read((char*)&a_[i], sizeof(a_[i]));
00273 }
00274 }
00275
00276
00277 template <typename T>
00278 void UnivHash_linear<T>::initSeeds()
00279 {
00280 a_ = new T*[this->H_];
00281 b_ = new T*[this->H_];
00282 for(count_t i=0; i < this->H_; i++) {
00283 a_[i] = new T[MAX_NGRAM_ORDER];
00284 b_[i] = new T[MAX_NGRAM_ORDER];
00285 for(count_t j=0; j < MAX_NGRAM_ORDER; j++) {
00286 a_[i][j] = 1 + util::wide_rand<T>();
00287 b_[i][j] = util::wide_rand<T>();
00288 }
00289 }
00290 }
00291 template <typename T>
00292 void UnivHash_linear<T>::freeSeeds()
00293 {
00294 for(count_t i=0; i < this->H_; i++) {
00295 delete[] a_[i];
00296 delete[] b_[i];
00297 }
00298 delete[] a_;
00299 delete[] b_;
00300 a_ = b_ = NULL;
00301 }
00302 template <typename T>
00303 inline T UnivHash_linear<T>::hash(const wordID_t* id, const int len,
00304 count_t h)
00305 {
00306 UTIL_THROW_IF2(h >= this->H_, "Error");
00307
00308 T value = 0;
00309 int pos(0);
00310 while(pos < len) {
00311 value += ((a_[h][pos] * id[pos]) + b_[h][pos]);
00312 ++pos;
00313 }
00314 return value % this->m_;
00315 }
00316 template <typename T>
00317 inline T UnivHash_linear<T>::hash(const wordID_t id, const count_t pos,
00318 const T prevValue, count_t h)
00319 {
00320 UTIL_THROW_IF2(h >= this->H_, "Error");
00321 T value = prevValue + ((a_[h][pos] * id) + b_[h][pos]);
00322 return value % this->m_;
00323 }
00324 template <typename T>
00325 void UnivHash_linear<T>::save(Moses::FileHandler* fout)
00326 {
00327
00328 HashBase<T>::save(fout);
00329 fout->write((char*)&pr_, sizeof(pr_));
00330 for(count_t i=0; i < this->H_; i++) {
00331 for(count_t j=0; j < MAX_NGRAM_ORDER; j++) {
00332 fout->write((char*)&a_[i][j], sizeof(a_[i][j]));
00333 fout->write((char*)&b_[i][j], sizeof(b_[i][j]));
00334
00335
00336 }
00337 }
00338 }
00339 template <typename T>
00340 void UnivHash_linear<T>::load(Moses::FileHandler* fin)
00341 {
00342
00343 fin->read((char*)&pr_, sizeof(pr_));
00344 a_ = new T*[this->H_];
00345 b_ = new T*[this->H_];
00346 for(count_t i=0; i < this->H_; i++) {
00347 a_[i] = new T[MAX_NGRAM_ORDER];
00348 b_[i] = new T[MAX_NGRAM_ORDER];
00349 for(count_t j=0; j < MAX_NGRAM_ORDER; j++) {
00350 fin->read((char*)&a_[i][j], sizeof(a_[i][j]));
00351 fin->read((char*)&b_[i][j], sizeof(b_[i][j]));
00352
00353
00354 }
00355 }
00356 }
00357 #endif