00001 #include "lm/bhiksha.hh"
00002 #include "lm/config.hh"
00003 #include "util/file.hh"
00004 #include "util/exception.hh"
00005
00006 #include <limits>
00007
00008 namespace lm {
00009 namespace ngram {
00010 namespace trie {
00011
00012 DontBhiksha::DontBhiksha(const void * , uint64_t , uint64_t max_next, const Config &) :
00013 next_(util::BitsMask::ByMax(max_next)) {}
00014
00015 const uint8_t kArrayBhikshaVersion = 0;
00016
00017
00018 void ArrayBhiksha::UpdateConfigFromBinary(int fd, Config &config) {
00019 uint8_t version;
00020 uint8_t configured_bits;
00021 util::ReadOrThrow(fd, &version, 1);
00022 util::ReadOrThrow(fd, &configured_bits, 1);
00023 if (version != kArrayBhikshaVersion) UTIL_THROW(FormatLoadException, "This file has sorted array compression version " << (unsigned) version << " but the code expects version " << (unsigned)kArrayBhikshaVersion);
00024 config.pointer_bhiksha_bits = configured_bits;
00025 }
00026
00027 namespace {
00028
00029
00030 uint8_t ChopBits(uint64_t max_offset, uint64_t max_next, const Config &config) {
00031 uint8_t required = util::RequiredBits(max_next);
00032 uint8_t best_chop = 0;
00033 int64_t lowest_change = std::numeric_limits<int64_t>::max();
00034
00035 for (uint8_t chop = 0; chop <= std::min(required, config.pointer_bhiksha_bits); ++chop) {
00036 int64_t change = (max_next >> (required - chop)) * 64
00037 - max_offset * static_cast<int64_t>(chop);
00038 if (change < lowest_change) {
00039 lowest_change = change;
00040 best_chop = chop;
00041 }
00042 }
00043 return best_chop;
00044 }
00045
00046 std::size_t ArrayCount(uint64_t max_offset, uint64_t max_next, const Config &config) {
00047 uint8_t required = util::RequiredBits(max_next);
00048 uint8_t chopping = ChopBits(max_offset, max_next, config);
00049 return (max_next >> (required - chopping)) + 1 ;
00050 }
00051 }
00052
00053 uint64_t ArrayBhiksha::Size(uint64_t max_offset, uint64_t max_next, const Config &config) {
00054 return sizeof(uint64_t) * (1 + ArrayCount(max_offset, max_next, config)) + 7 ;
00055 }
00056
00057 uint8_t ArrayBhiksha::InlineBits(uint64_t max_offset, uint64_t max_next, const Config &config) {
00058 return util::RequiredBits(max_next) - ChopBits(max_offset, max_next, config);
00059 }
00060
00061 namespace {
00062
00063 void *AlignTo8(void *from) {
00064 uint8_t *val = reinterpret_cast<uint8_t*>(from);
00065 std::size_t remainder = reinterpret_cast<std::size_t>(val) & 7;
00066 if (!remainder) return val;
00067 return val + 8 - remainder;
00068 }
00069
00070 }
00071
00072 ArrayBhiksha::ArrayBhiksha(void *base, uint64_t max_offset, uint64_t max_next, const Config &config)
00073 : next_inline_(util::BitsMask::ByBits(InlineBits(max_offset, max_next, config))),
00074 offset_begin_(reinterpret_cast<const uint64_t*>(AlignTo8(base)) + 1 ),
00075 offset_end_(offset_begin_ + ArrayCount(max_offset, max_next, config)),
00076 write_to_(reinterpret_cast<uint64_t*>(AlignTo8(base)) + 1 + 1 ),
00077 original_base_(base) {}
00078
00079 void ArrayBhiksha::FinishedLoading(const Config &config) {
00080
00081 *(write_to_ - (write_to_ - offset_begin_)) = 0;
00082
00083 if (write_to_ != offset_end_) UTIL_THROW(util::Exception, "Did not get all the array entries that were expected.");
00084
00085 uint8_t *head_write = reinterpret_cast<uint8_t*>(original_base_);
00086 *(head_write++) = kArrayBhikshaVersion;
00087 *(head_write++) = config.pointer_bhiksha_bits;
00088 }
00089
00090 void ArrayBhiksha::LoadedBinary() {
00091 }
00092
00093 }
00094 }
00095 }