00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017 #ifndef INC_RANDLM_FILTER_H
00018 #define INC_RANDLM_FILTER_H
00019
00020 #include <cassert>
00021 #include <cmath>
00022 #include "FileHandler.h"
00023
00024 #ifdef WIN32
00025 #define log2(X) (log((double)X)/log((double)2))
00026 #endif
00027
00028 namespace randlm
00029 {
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00040 template<typename T>
00041 class Filter
00042 {
00043 public:
00044 Filter(uint64_t addresses, int width) : addresses_(addresses), width_(width), data_(NULL) {
00045
00046 cell_width_ = sizeof(T) << 3;
00047
00048 assert(cell_width_ > 0 && cell_width_ <= 64 && cell_width_ >= width);
00049
00050 log_cell_width_ = static_cast<int>(floor(log((double)cell_width_)/log((double)2) + 0.000001));
00051
00052 cells_ = ((addresses * width) + cell_width_ - 1) >> log_cell_width_;
00053
00054 data_ = new T[cells_];
00055 assert(data_ != NULL);
00056 assert(reset());
00057
00058 first_bit_ = (width % cell_width_ == 0) ? 0 : cell_width_ - (width % cell_width_);
00059
00060 full_mask_ = static_cast<T>(0xffffffffffffffffull);
00061
00062 address_mask_ = full_mask_ >> first_bit_;
00063 }
00064 Filter(Moses::FileHandler* fin, bool loaddata = true) : data_(NULL) {
00065 assert(loadHeader(fin));
00066 if (loaddata)
00067 assert(loadData(fin));
00068 }
00069 virtual ~Filter() {
00070 delete[] data_;
00071 }
00072 bool reset() {
00073 for (uint64_t i = 0; i < cells_; ++i)
00074 data_[i] = 0;
00075 return true;
00076 }
00077 count_t size() {
00078
00079 return cells_ * sizeof(T) >> 20;
00080 }
00081
00082 inline bool read(uint64_t address, T* value) {
00083 assert(address <= addresses_);
00084
00085 uint64_t data_bit = address * width_;
00086 uint32_t data_cell = (data_bit >> log_cell_width_);
00087
00088 int offset = (data_bit % cell_width_) - first_bit_;
00089
00090 if (offset == 0) {
00091 *value = data_[data_cell] & address_mask_;
00092 return true;
00093 }
00094
00095 if (offset < 0) {
00096 *value = (data_[data_cell] >> -offset) & address_mask_;
00097 return true;
00098 }
00099
00100 *value = ((data_[data_cell] << offset)
00101 | (data_[data_cell + 1] >> (cell_width_ - offset))) & address_mask_ ;
00102 return true;
00103 }
00104 inline T read(uint64_t address) {
00105 assert(address <= addresses_);
00106
00107 T value = 0;
00108 uint64_t data_bit = address * width_;
00109 uint32_t data_cell = (data_bit >> log_cell_width_);
00110
00111 int offset = (data_bit % cell_width_) - first_bit_;
00112
00113 if (offset == 0) {
00114 value = data_[data_cell] & address_mask_;
00115 }
00116
00117 else if (offset < 0) {
00118 value = (data_[data_cell] >> -offset) & address_mask_;
00119 }
00120
00121 else
00122 value = ((data_[data_cell] << offset)
00123 | (data_[data_cell + 1] >> (cell_width_ - offset))) & address_mask_ ;
00124 return value;
00125 }
00126 inline bool write(uint64_t address, T value) {
00127 assert(address <= addresses_);
00128 assert(log2(value) <= width_);
00129
00130 uint64_t data_bit = address * width_;
00131 uint32_t data_cell = (data_bit >> log_cell_width_);
00132
00133 int offset = (data_bit % cell_width_) - first_bit_;
00134
00135 if (offset == 0) {
00136 data_[data_cell] = value | (data_[data_cell] & ~address_mask_);
00137 return true;
00138 }
00139
00140 if (offset < 0) {
00141 data_[data_cell] = (value << -offset)
00142 | (data_[data_cell] & ~(address_mask_ << -offset));
00143 return true;
00144 }
00145
00146 data_[data_cell] = (value >> offset) |
00147 (data_[data_cell] & ~(address_mask_ >> offset));
00148 data_[data_cell + 1] = (value << (cell_width_ - offset)) |
00149 (data_[data_cell + 1] & (full_mask_ >> offset));
00150 return true;
00151 }
00152 inline bool readWithFingerprint(uint64_t address, T finger, T* value) {
00153
00154 uint64_t data_bit = address * width_;
00155 uint32_t data_cell = (data_bit >> log_cell_width_);
00156
00157 int offset = (data_bit % cell_width_) - first_bit_;
00158
00159 if (offset == 0) {
00160 *value = (finger ^ data_[data_cell]) & address_mask_;
00161 return true;
00162 }
00163
00164 if (offset < 0) {
00165 *value = ((data_[data_cell] >> -offset) ^ finger) & address_mask_;
00166 return true;
00167 }
00168
00169 *value = (((data_[data_cell] << offset)
00170 | (data_[data_cell + 1] >> (cell_width_ - offset))) ^ finger)
00171 & address_mask_ ;
00172 return true;
00173 }
00174 inline bool writeWithFingerprint(uint64_t address, T finger, T value) {
00175
00176 finger &= address_mask_;
00177 uint64_t data_bit = address * width_;
00178 uint32_t data_cell = (data_bit >> log_cell_width_);
00179
00180 int offset = (data_bit % cell_width_) - first_bit_;
00181
00182 if (offset == 0) {
00183 data_[data_cell] = (finger ^ value) | (data_[data_cell] & ~address_mask_);
00184 return true;
00185 }
00186
00187 if (offset < 0) {
00188 data_[data_cell] = ((finger ^ value) << -offset)
00189 | (data_[data_cell] & ~(address_mask_ << -offset));
00190 return true;
00191 }
00192
00193 data_[data_cell] = ((finger ^ value) >> offset) |
00194 (data_[data_cell] & ~(address_mask_ >> offset));
00195 data_[data_cell + 1] = ((finger ^ value) << (cell_width_ - offset)) |
00196 (data_[data_cell + 1] & (full_mask_ >> offset));
00197 return true;
00198 }
00199
00200 void printFilter(const std::string & prefix = "", uint32_t truncate = 64) {
00201 std::cout << prefix;
00202 for (uint32_t i = 0; i < cells_ && i < truncate; ++i) {
00203 for (int j = cell_width_ - 1; j >= 0; --j)
00204 if (data_[i] & (1ull << j))
00205 std::cout << 1;
00206 else
00207 std::cout << 0;
00208 std::cout << "\n";
00209 }
00210 std::cout << std::endl;
00211 }
00212
00213 uint64_t getAddresses() {
00214 return addresses_;
00215 }
00216 int getWidth() {
00217 return width_;
00218 }
00219 int getCellWidth() {
00220 return cell_width_;
00221 }
00222 uint32_t getCells() {
00223 return cells_;
00224 }
00225 virtual bool save(Moses::FileHandler* out) {
00226 assert(out != NULL);
00227 assert(out->write((char*)&cells_, sizeof(cells_)));
00228 assert(out->write((char*)&cell_width_, sizeof(cell_width_)));
00229 assert(out->write((char*)&log_cell_width_, sizeof(log_cell_width_)));
00230 assert(out->write((char*)&addresses_, sizeof(addresses_)));
00231 assert(out->write((char*)&width_, sizeof(width_)));
00232 assert(out->write((char*)&first_bit_, sizeof(first_bit_)));
00233 assert(out->write((char*)&full_mask_, sizeof(full_mask_)));
00234 assert(out->write((char*)&address_mask_, sizeof(address_mask_)));
00235
00236 const uint64_t jump = 524288032ul;
00237 if((width_ == 1) || cells_ < jump)
00238 assert(out->write((char*)data_, cells_ * sizeof(T)));
00239 else {
00240 uint64_t idx(0);
00241 while(idx + jump < cells_) {
00242 assert(out->write((char*)&data_[idx], jump * sizeof(T)));
00243 idx += jump;
00244 }
00245 assert(out->write((char*)&data_[idx], (cells_ - idx) * sizeof(T)));
00246 }
00247 return true;
00248 }
00249 protected:
00250 bool loadHeader(Moses::FileHandler* fin) {
00251 assert(fin != NULL);
00252 assert(fin->read((char*)&cells_, sizeof(cells_)));
00253 assert(fin->read((char*)&cell_width_, sizeof(cell_width_)));
00254 assert(cell_width_ == sizeof(T) << 3);
00255 assert(fin->read((char*)&log_cell_width_, sizeof(log_cell_width_)));
00256 assert(fin->read((char*)&addresses_, sizeof(addresses_)));
00257 assert(fin->read((char*)&width_, sizeof(width_)));
00258 assert(fin->read((char*)&first_bit_, sizeof(first_bit_)));
00259 assert(fin->read((char*)&full_mask_, sizeof(full_mask_)));
00260 assert(fin->read((char*)&address_mask_, sizeof(address_mask_)));
00261 return true;
00262 }
00263 bool loadData(Moses::FileHandler* fin) {
00264
00265 data_ = new T[cells_];
00266 assert(data_ != NULL);
00267 assert(fin->read((char*)data_, cells_ * sizeof(T)));
00268
00269
00270 return true;
00271 }
00272 uint64_t cells_;
00273 int cell_width_;
00274 int log_cell_width_;
00275 uint64_t addresses_;
00276 int width_;
00277 int first_bit_;
00278 T full_mask_;
00279 T address_mask_;
00280 T* data_;
00281 };
00282
00283
00284 class BitFilter : public Filter<uint8_t>
00285 {
00286 public:
00287 BitFilter(uint64_t bits) : Filter<uint8_t>(bits, 1) {}
00288 BitFilter(Moses::FileHandler* fin, bool loaddata = true)
00289 : Filter<uint8_t>(fin, loaddata) {
00290 if (loaddata)
00291 assert(load(fin));
00292 }
00293
00294 virtual bool testBit(uint64_t location) {
00295
00296 return data_[(location % addresses_) >> 3] & 1 << ((location % addresses_) % 8);
00297 }
00298 virtual bool setBit(uint64_t location) {
00299
00300 data_[(location % addresses_) >> 3] |= 1 << ((location % addresses_) % 8);
00301 return true;
00302 }
00303 virtual bool clearBit(uint64_t location) {
00304
00305 data_[(location % addresses_) >> 3] &= 0 << ((location % addresses_) % 8);
00306 return true;
00307 }
00308 bool save(Moses::FileHandler* fout) {
00309 assert(Filter<uint8_t>::save(fout));
00310 std::cerr << "Saved BitFilter. Rho = " << rho() << "." << std::endl;;
00311 return true;
00312 }
00313 float rho(uint64_t limit = 0) {
00314 uint64_t ones = 0;
00315 uint64_t range = limit > 0 ? std::min(limit,cells_) : cells_;
00316 for (uint64_t i = 0; i < range; ++i)
00317 for (int j = 0; j < 8; ++j)
00318 if (data_[i] & (1 << j))
00319 ++ones;
00320 return static_cast<float>((range << 3) - ones)/static_cast<float>(range << 3);
00321 }
00322 protected:
00323 bool load(Moses::FileHandler* fin) {
00324 std::cerr << "Loaded BitFilter. Rho = " << rho() << "." << std::endl;;
00325 return true;
00326 }
00327 };
00328
00329
00330
00331
00332
00333
00334
00335
00336
00337
00338
00339
00340
00341
00342
00343
00344
00345
00346
00347
00348
00349
00350
00351
00352
00353
00354
00355
00356
00357
00358
00359
00360
00361
00362
00363
00364
00365
00366
00367
00368
00369
00370
00371
00372
00373
00374
00375
00376
00377
00378
00379
00380
00381
00382
00383
00384
00385
00386
00387
00388
00389
00390
00391
00392
00393
00394
00395
00396
00397
00398
00399
00400
00401
00402
00403
00404
00405
00406
00407
00408
00409
00410
00411
00412
00413
00414
00415
00416
00417
00418
00419
00420
00421
00422
00423
00424
00425
00426 }
00427 #endif // INC_RANDLM_FILTER_H