00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017 #ifndef INC_RANDLM_FILTER_H
00018 #define INC_RANDLM_FILTER_H
00019
00020 #include <cmath>
00021 #include "FileHandler.h"
00022
00023 #ifdef WIN32
00024 #define log2(X) (log((double)X)/log((double)2))
00025 #endif
00026
00027 namespace randlm {
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038 template<typename T>
00039 class Filter {
00040 public:
00041 Filter(uint64_t addresses, int width) : addresses_(addresses), width_(width), data_(NULL) {
00042
00043 cell_width_ = sizeof(T) << 3;
00044
00045 CHECK(cell_width_ > 0 && cell_width_ <= 64 && cell_width_ >= width);
00046
00047 log_cell_width_ = static_cast<int>(floor(log((double)cell_width_)/log((double)2) + 0.000001));
00048
00049 cells_ = ((addresses * width) + cell_width_ - 1) >> log_cell_width_;
00050
00051 data_ = new T[cells_];
00052 CHECK(data_ != NULL);
00053 CHECK(reset());
00054
00055 first_bit_ = (width % cell_width_ == 0) ? 0 : cell_width_ - (width % cell_width_);
00056
00057 full_mask_ = static_cast<T>(0xffffffffffffffffull);
00058
00059 address_mask_ = full_mask_ >> first_bit_;
00060 }
00061 Filter(FileHandler* fin, bool loaddata = true) : data_(NULL) {
00062 CHECK(loadHeader(fin));
00063 if (loaddata)
00064 CHECK(loadData(fin));
00065 }
00066 virtual ~Filter() {
00067 delete[] data_;
00068 }
00069 bool reset() {
00070 for (uint64_t i = 0; i < cells_; ++i)
00071 data_[i] = 0;
00072 return true;
00073 }
00074 count_t size() {
00075
00076 return cells_ * sizeof(T) >> 20;
00077 }
00078
00079 inline bool read(uint64_t address, T* value) {
00080 CHECK(address <= addresses_);
00081
00082 uint64_t data_bit = address * width_;
00083 uint32_t data_cell = (data_bit >> log_cell_width_);
00084
00085 int offset = (data_bit % cell_width_) - first_bit_;
00086
00087 if (offset == 0) {
00088 *value = data_[data_cell] & address_mask_;
00089 return true;
00090 }
00091
00092 if (offset < 0) {
00093 *value = (data_[data_cell] >> -offset) & address_mask_;
00094 return true;
00095 }
00096
00097 *value = ((data_[data_cell] << offset)
00098 | (data_[data_cell + 1] >> (cell_width_ - offset))) & address_mask_ ;
00099 return true;
00100 }
00101 inline T read(uint64_t address) {
00102 CHECK(address <= addresses_);
00103
00104 T value = 0;
00105 uint64_t data_bit = address * width_;
00106 uint32_t data_cell = (data_bit >> log_cell_width_);
00107
00108 int offset = (data_bit % cell_width_) - first_bit_;
00109
00110 if (offset == 0) {
00111 value = data_[data_cell] & address_mask_;
00112 }
00113
00114 else if (offset < 0) {
00115 value = (data_[data_cell] >> -offset) & address_mask_;
00116 }
00117
00118 else
00119 value = ((data_[data_cell] << offset)
00120 | (data_[data_cell + 1] >> (cell_width_ - offset))) & address_mask_ ;
00121 return value;
00122 }
00123 inline bool write(uint64_t address, T value) {
00124 CHECK(address <= addresses_);
00125 CHECK(log2(value) <= width_);
00126
00127 uint64_t data_bit = address * width_;
00128 uint32_t data_cell = (data_bit >> log_cell_width_);
00129
00130 int offset = (data_bit % cell_width_) - first_bit_;
00131
00132 if (offset == 0) {
00133 data_[data_cell] = value | (data_[data_cell] & ~address_mask_);
00134 return true;
00135 }
00136
00137 if (offset < 0) {
00138 data_[data_cell] = (value << -offset)
00139 | (data_[data_cell] & ~(address_mask_ << -offset));
00140 return true;
00141 }
00142
00143 data_[data_cell] = (value >> offset) |
00144 (data_[data_cell] & ~(address_mask_ >> offset));
00145 data_[data_cell + 1] = (value << (cell_width_ - offset)) |
00146 (data_[data_cell + 1] & (full_mask_ >> offset));
00147 return true;
00148 }
00149 inline bool readWithFingerprint(uint64_t address, T finger, T* value) {
00150
00151 uint64_t data_bit = address * width_;
00152 uint32_t data_cell = (data_bit >> log_cell_width_);
00153
00154 int offset = (data_bit % cell_width_) - first_bit_;
00155
00156 if (offset == 0) {
00157 *value = (finger ^ data_[data_cell]) & address_mask_;
00158 return true;
00159 }
00160
00161 if (offset < 0) {
00162 *value = ((data_[data_cell] >> -offset) ^ finger) & address_mask_;
00163 return true;
00164 }
00165
00166 *value = (((data_[data_cell] << offset)
00167 | (data_[data_cell + 1] >> (cell_width_ - offset))) ^ finger)
00168 & address_mask_ ;
00169 return true;
00170 }
00171 inline bool writeWithFingerprint(uint64_t address, T finger, T value) {
00172
00173 finger &= address_mask_;
00174 uint64_t data_bit = address * width_;
00175 uint32_t data_cell = (data_bit >> log_cell_width_);
00176
00177 int offset = (data_bit % cell_width_) - first_bit_;
00178
00179 if (offset == 0) {
00180 data_[data_cell] = (finger ^ value) | (data_[data_cell] & ~address_mask_);
00181 return true;
00182 }
00183
00184 if (offset < 0) {
00185 data_[data_cell] = ((finger ^ value) << -offset)
00186 | (data_[data_cell] & ~(address_mask_ << -offset));
00187 return true;
00188 }
00189
00190 data_[data_cell] = ((finger ^ value) >> offset) |
00191 (data_[data_cell] & ~(address_mask_ >> offset));
00192 data_[data_cell + 1] = ((finger ^ value) << (cell_width_ - offset)) |
00193 (data_[data_cell + 1] & (full_mask_ >> offset));
00194 return true;
00195 }
00196
00197 void printFilter(const std::string & prefix = "", uint32_t truncate = 64){
00198 std::cout << prefix;
00199 for (uint32_t i = 0; i < cells_ && i < truncate; ++i) {
00200 for (int j = cell_width_ - 1; j >= 0; --j)
00201 if (data_[i] & (1ull << j))
00202 std::cout << 1;
00203 else
00204 std::cout << 0;
00205 std::cout << "\n";
00206 }
00207 std::cout << std::endl;
00208 }
00209
00210 uint64_t getAddresses() { return addresses_; }
00211 int getWidth() { return width_; }
00212 int getCellWidth() { return cell_width_; }
00213 uint32_t getCells() { return cells_; }
00214 virtual bool save(FileHandler* out) {
00215 CHECK(out != NULL);
00216 CHECK(out->write((char*)&cells_, sizeof(cells_)));
00217 CHECK(out->write((char*)&cell_width_, sizeof(cell_width_)));
00218 CHECK(out->write((char*)&log_cell_width_, sizeof(log_cell_width_)));
00219 CHECK(out->write((char*)&addresses_, sizeof(addresses_)));
00220 CHECK(out->write((char*)&width_, sizeof(width_)));
00221 CHECK(out->write((char*)&first_bit_, sizeof(first_bit_)));
00222 CHECK(out->write((char*)&full_mask_, sizeof(full_mask_)));
00223 CHECK(out->write((char*)&address_mask_, sizeof(address_mask_)));
00224
00225 const uint64_t jump = 524288032ul;
00226 if((width_ == 1) || cells_ < jump)
00227 CHECK(out->write((char*)data_, cells_ * sizeof(T)));
00228 else {
00229 uint64_t idx(0);
00230 while(idx + jump < cells_) {
00231 CHECK(out->write((char*)&data_[idx], jump * sizeof(T)));
00232 idx += jump;
00233 }
00234 CHECK(out->write((char*)&data_[idx], (cells_ - idx) * sizeof(T)));
00235 }
00236 return true;
00237 }
00238 protected:
00239 bool loadHeader(FileHandler* fin) {
00240 CHECK(fin != NULL);
00241 CHECK(fin->read((char*)&cells_, sizeof(cells_)));
00242 CHECK(fin->read((char*)&cell_width_, sizeof(cell_width_)));
00243 CHECK(cell_width_ == sizeof(T) << 3);
00244 CHECK(fin->read((char*)&log_cell_width_, sizeof(log_cell_width_)));
00245 CHECK(fin->read((char*)&addresses_, sizeof(addresses_)));
00246 CHECK(fin->read((char*)&width_, sizeof(width_)));
00247 CHECK(fin->read((char*)&first_bit_, sizeof(first_bit_)));
00248 CHECK(fin->read((char*)&full_mask_, sizeof(full_mask_)));
00249 CHECK(fin->read((char*)&address_mask_, sizeof(address_mask_)));
00250 return true;
00251 }
00252 bool loadData(FileHandler* fin) {
00253
00254 data_ = new T[cells_];
00255 CHECK(data_ != NULL);
00256 CHECK(fin->read((char*)data_, cells_ * sizeof(T)));
00257
00258
00259 return true;
00260 }
00261 uint64_t cells_;
00262 int cell_width_;
00263 int log_cell_width_;
00264 uint64_t addresses_;
00265 int width_;
00266 int first_bit_;
00267 T full_mask_;
00268 T address_mask_;
00269 T* data_;
00270 };
00271
00272
00273 class BitFilter : public Filter<uint8_t> {
00274 public:
00275 BitFilter(uint64_t bits) : Filter<uint8_t>(bits, 1) {}
00276 BitFilter(FileHandler* fin, bool loaddata = true)
00277 : Filter<uint8_t>(fin, loaddata) {
00278 if (loaddata)
00279 CHECK(load(fin));
00280 }
00281
00282 virtual bool testBit(uint64_t location) {
00283
00284 return data_[(location % addresses_) >> 3] & 1 << ((location % addresses_) % 8);
00285 }
00286 virtual bool setBit(uint64_t location) {
00287
00288 data_[(location % addresses_) >> 3] |= 1 << ((location % addresses_) % 8);
00289 return true;
00290 }
00291 virtual bool clearBit(uint64_t location) {
00292
00293 data_[(location % addresses_) >> 3] &= 0 << ((location % addresses_) % 8);
00294 return true;
00295 }
00296 bool save(FileHandler* fout) {
00297 CHECK(Filter<uint8_t>::save(fout));
00298 std::cerr << "Saved BitFilter. Rho = " << rho() << "." << std::endl;;
00299 return true;
00300 }
00301 float rho(uint64_t limit = 0) {
00302 uint64_t ones = 0;
00303 uint64_t range = limit > 0 ? std::min(limit,cells_) : cells_;
00304 for (uint64_t i = 0; i < range; ++i)
00305 for (int j = 0; j < 8; ++j)
00306 if (data_[i] & (1 << j))
00307 ++ones;
00308 return static_cast<float>((range << 3) - ones)/static_cast<float>(range << 3);
00309 }
00310 protected:
00311 bool load(FileHandler* fin) {
00312 std::cerr << "Loaded BitFilter. Rho = " << rho() << "." << std::endl;;
00313 return true;
00314 }
00315 };
00316
00317
00318
00319
00320
00321
00322
00323
00324
00325
00326
00327
00328
00329
00330
00331
00332
00333
00334
00335
00336
00337
00338
00339
00340
00341
00342
00343
00344
00345
00346
00347
00348
00349
00350
00351
00352
00353
00354
00355
00356
00357
00358
00359
00360
00361
00362
00363
00364
00365
00366
00367
00368
00369
00370
00371
00372
00373
00374
00375
00376
00377
00378
00379
00380
00381
00382
00383
00384
00385
00386
00387
00388
00389
00390
00391
00392
00393
00394
00395
00396
00397
00398
00399
00400
00401
00402
00403
00404
00405
00406
00407
00408
00409
00410
00411
00412
00413
00414 }
00415 #endif // INC_RANDLM_FILTER_H