00001
00002
00003
00004
00005 #include "util/mmap.hh"
00006
00007 #include "util/exception.hh"
00008 #include "util/file.hh"
00009 #include "util/parallel_read.hh"
00010 #include "util/scoped.hh"
00011
00012 #include <iostream>
00013
00014 #include <cassert>
00015 #include <fcntl.h>
00016 #include <sys/types.h>
00017 #include <sys/stat.h>
00018 #include <cstdlib>
00019
00020 #if defined(_WIN32) || defined(_WIN64)
00021 #include <windows.h>
00022 #include <io.h>
00023 #else
00024 #include <sys/mman.h>
00025 #include <unistd.h>
00026 #endif
00027
00028 namespace util {
00029
00030 std::size_t SizePage() {
00031 #if defined(_WIN32) || defined(_WIN64)
00032 SYSTEM_INFO si;
00033 GetSystemInfo(&si);
00034 return si.dwAllocationGranularity;
00035 #else
00036 return sysconf(_SC_PAGE_SIZE);
00037 #endif
00038 }
00039
00040 scoped_mmap::~scoped_mmap() {
00041 if (data_ != (void*)-1) {
00042 try {
00043
00044 SyncOrThrow(data_, size_);
00045 UnmapOrThrow(data_, size_);
00046 } catch (const util::ErrnoException &e) {
00047 std::cerr << e.what();
00048 abort();
00049 }
00050 }
00051 }
00052
00053 namespace {
00054 template <class T> T RoundUpPow2(T value, T mult) {
00055 return ((value - 1) & ~(mult - 1)) + mult;
00056 }
00057 }
00058
00059 scoped_memory::scoped_memory(std::size_t size, bool zeroed) : data_(NULL), size_(0), source_(NONE_ALLOCATED) {
00060 HugeMalloc(size, zeroed, *this);
00061 }
00062
00063 void scoped_memory::reset(void *data, std::size_t size, Alloc source) {
00064 switch(source_) {
00065 case MMAP_ROUND_UP_ALLOCATED:
00066 scoped_mmap(data_, RoundUpPow2(size_, (std::size_t)SizePage()));
00067 break;
00068 case MMAP_ALLOCATED:
00069 scoped_mmap(data_, size_);
00070 break;
00071 case MALLOC_ALLOCATED:
00072 free(data_);
00073 break;
00074 case NONE_ALLOCATED:
00075 break;
00076 }
00077 data_ = data;
00078 size_ = size;
00079 source_ = source;
00080 }
00081
00082
00083
00084
00085
00086
00087
00088
00089
00090
00091
00092
00093
00094 const int kFileFlags =
00095 #if defined(_WIN32) || defined(_WIN64)
00096 0
00097 #elif defined(MAP_FILE)
00098 MAP_FILE | MAP_SHARED
00099 #else
00100 MAP_SHARED
00101 #endif
00102 ;
00103
00104 void *MapOrThrow(std::size_t size, bool for_write, int flags, bool prefault, int fd, uint64_t offset) {
00105 #ifdef MAP_POPULATE // Linux specific
00106 if (prefault) {
00107 flags |= MAP_POPULATE;
00108 }
00109 #endif
00110 #if defined(_WIN32) || defined(_WIN64)
00111 int protectC = for_write ? PAGE_READWRITE : PAGE_READONLY;
00112 int protectM = for_write ? FILE_MAP_WRITE : FILE_MAP_READ;
00113 uint64_t total_size = size + offset;
00114 HANDLE hMapping = CreateFileMapping((HANDLE)_get_osfhandle(fd), NULL, protectC, total_size >> 32, static_cast<DWORD>(total_size), NULL);
00115 UTIL_THROW_IF(!hMapping, ErrnoException, "CreateFileMapping failed");
00116 LPVOID ret = MapViewOfFile(hMapping, protectM, offset >> 32, offset, size);
00117 CloseHandle(hMapping);
00118 UTIL_THROW_IF(!ret, ErrnoException, "MapViewOfFile failed");
00119 #else
00120 int protect = for_write ? (PROT_READ | PROT_WRITE) : PROT_READ;
00121 void *ret;
00122 UTIL_THROW_IF((ret = mmap(NULL, size, protect, flags, fd, offset)) == MAP_FAILED, ErrnoException, "mmap failed for size " << size << " at offset " << offset);
00123 # ifdef MADV_HUGEPAGE
00124
00125
00126
00127 madvise(ret, size, MADV_HUGEPAGE);
00128 # endif
00129 #endif
00130 return ret;
00131 }
00132
00133 void SyncOrThrow(void *start, size_t length) {
00134 #if defined(_WIN32) || defined(_WIN64)
00135 UTIL_THROW_IF(!::FlushViewOfFile(start, length), ErrnoException, "Failed to sync mmap");
00136 #else
00137 UTIL_THROW_IF(length && msync(start, length, MS_SYNC), ErrnoException, "Failed to sync mmap");
00138 #endif
00139 }
00140
00141 void UnmapOrThrow(void *start, size_t length) {
00142 #if defined(_WIN32) || defined(_WIN64)
00143 UTIL_THROW_IF(!::UnmapViewOfFile(start), ErrnoException, "Failed to unmap a file");
00144 #else
00145 UTIL_THROW_IF(munmap(start, length), ErrnoException, "munmap failed");
00146 #endif
00147 }
00148
00149
00150 #ifdef __linux__
00151
00152 namespace {
00153
00154 bool AnonymousMap(std::size_t size, int flags, bool populate, util::scoped_memory &to) {
00155 if (populate) flags |= MAP_POPULATE;
00156 void *ret = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS | flags, -1, 0);
00157 if (ret == MAP_FAILED) return false;
00158 to.reset(ret, size, scoped_memory::MMAP_ALLOCATED);
00159 return true;
00160 }
00161
00162 bool TryHuge(std::size_t size, uint8_t alignment_bits, bool populate, util::scoped_memory &to) {
00163
00164 if (size < (1ULL << alignment_bits) || (1ULL << alignment_bits) < SizePage())
00165 return false;
00166
00167
00168 #ifdef MAP_HUGE_SHIFT
00169 if (AnonymousMap(size, MAP_HUGETLB | (alignment_bits << MAP_HUGE_SHIFT), populate, to))
00170 return true;
00171 #endif
00172
00173
00174
00175
00176 #ifdef MAP_HUGETLB
00177 if (AnonymousMap(size, MAP_HUGETLB, populate, to))
00178 return true;
00179 #endif
00180
00181
00182
00183
00184
00185
00186 std::size_t size_up = RoundUpPow2(size, SizePage());
00187
00188 std::size_t ask = size_up + (1 << alignment_bits) - SizePage();
00189
00190 scoped_mmap larger(mmap(NULL, ask, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0), ask);
00191 if (larger.get() == MAP_FAILED) return false;
00192
00193
00194 uintptr_t base = reinterpret_cast<uintptr_t>(larger.get());
00195
00196 uintptr_t rounded_up = RoundUpPow2(base, static_cast<uintptr_t>(1) << alignment_bits);
00197 if (base != rounded_up) {
00198
00199 UnmapOrThrow(larger.get(), rounded_up - base);
00200 larger.steal();
00201 larger.reset(reinterpret_cast<void*>(rounded_up), ask - (rounded_up - base));
00202 }
00203
00204
00205 assert(larger.size() >= size_up);
00206 if (larger.size() > size_up) {
00207
00208 UnmapOrThrow(static_cast<uint8_t*>(larger.get()) + size_up, larger.size() - size_up);
00209 larger.reset(larger.steal(), size_up);
00210 }
00211 #ifdef MADV_HUGEPAGE
00212 madvise(larger.get(), size_up, MADV_HUGEPAGE);
00213 #endif
00214 to.reset(larger.steal(), size, scoped_memory::MMAP_ROUND_UP_ALLOCATED);
00215 return true;
00216 }
00217
00218 }
00219
00220 #endif
00221
00222 void HugeMalloc(std::size_t size, bool zeroed, scoped_memory &to) {
00223 to.reset();
00224 #ifdef __linux__
00225
00226
00227
00228 if (size >= (1ULL << 30) && TryHuge(size, 30, zeroed, to))
00229 return;
00230
00231 if (size >= (1ULL << 21) && TryHuge(size, 21, zeroed, to))
00232 return;
00233 #endif // __linux__
00234
00235 to.reset(zeroed ? calloc(1, size) : malloc(size), size, scoped_memory::MALLOC_ALLOCATED);
00236 UTIL_THROW_IF(!to.get(), ErrnoException, "Failed to allocate " << size << " bytes");
00237 }
00238
00239 #ifdef __linux__
00240 const std::size_t kTransitionHuge = std::max<std::size_t>(1ULL << 21, SizePage());
00241 #endif // __linux__
00242
00243 void HugeRealloc(std::size_t to, bool zero_new, scoped_memory &mem) {
00244 if (!to) {
00245 mem.reset();
00246 return;
00247 }
00248 std::size_t from_size = mem.size();
00249 switch (mem.source()) {
00250 case scoped_memory::NONE_ALLOCATED:
00251 HugeMalloc(to, zero_new, mem);
00252 return;
00253 #ifdef __linux__
00254 case scoped_memory::MMAP_ROUND_UP_ALLOCATED:
00255
00256 from_size = RoundUpPow2(from_size, SizePage());
00257 case scoped_memory::MMAP_ALLOCATED:
00258
00259 if (to <= SizePage()) {
00260 scoped_malloc replacement(malloc(to));
00261 memcpy(replacement.get(), mem.get(), std::min(to, mem.size()));
00262 if (zero_new && to > mem.size())
00263 memset(static_cast<uint8_t*>(replacement.get()) + mem.size(), 0, to - mem.size());
00264 mem.reset(replacement.release(), to, scoped_memory::MALLOC_ALLOCATED);
00265 } else {
00266 void *new_addr = mremap(mem.get(), from_size, to, MREMAP_MAYMOVE);
00267 UTIL_THROW_IF(!new_addr, ErrnoException, "Failed to mremap from " << from_size << " to " << to);
00268 mem.steal();
00269 mem.reset(new_addr, to, scoped_memory::MMAP_ALLOCATED);
00270 }
00271 return;
00272 #endif // __linux__
00273 case scoped_memory::MALLOC_ALLOCATED:
00274 #ifdef __linux__
00275
00276 if (to >= kTransitionHuge && mem.size() < kTransitionHuge) {
00277 scoped_memory replacement;
00278 HugeMalloc(to, zero_new, replacement);
00279 memcpy(replacement.get(), mem.get(), mem.size());
00280
00281 mem.reset(replacement.get(), replacement.size(), replacement.source());
00282 replacement.steal();
00283 return;
00284 }
00285 #endif // __linux__
00286 {
00287 void *new_addr = std::realloc(mem.get(), to);
00288 UTIL_THROW_IF(!new_addr, ErrnoException, "realloc to " << to << " bytes failed.");
00289 if (zero_new && to > mem.size())
00290 memset(static_cast<uint8_t*>(new_addr) + mem.size(), 0, to - mem.size());
00291 mem.steal();
00292 mem.reset(new_addr, to, scoped_memory::MALLOC_ALLOCATED);
00293 }
00294 return;
00295 default:
00296 UTIL_THROW(Exception, "HugeRealloc called with type " << mem.source());
00297 }
00298 }
00299
00300 void MapRead(LoadMethod method, int fd, uint64_t offset, std::size_t size, scoped_memory &out) {
00301 switch (method) {
00302 case LAZY:
00303 out.reset(MapOrThrow(size, false, kFileFlags, false, fd, offset), size, scoped_memory::MMAP_ALLOCATED);
00304 break;
00305 case POPULATE_OR_LAZY:
00306 #ifdef MAP_POPULATE
00307 case POPULATE_OR_READ:
00308 #endif
00309 out.reset(MapOrThrow(size, false, kFileFlags, true, fd, offset), size, scoped_memory::MMAP_ALLOCATED);
00310 break;
00311 #ifndef MAP_POPULATE
00312 case POPULATE_OR_READ:
00313 #endif
00314 case READ:
00315 HugeMalloc(size, false, out);
00316 SeekOrThrow(fd, offset);
00317 ReadOrThrow(fd, out.get(), size);
00318 break;
00319 case PARALLEL_READ:
00320 HugeMalloc(size, false, out);
00321 ParallelRead(fd, out.get(), size, offset);
00322 break;
00323 }
00324 }
00325
00326 void *MapZeroedWrite(int fd, std::size_t size) {
00327 ResizeOrThrow(fd, 0);
00328 ResizeOrThrow(fd, size);
00329 return MapOrThrow(size, true, kFileFlags, false, fd, 0);
00330 }
00331
00332 void *MapZeroedWrite(const char *name, std::size_t size, scoped_fd &file) {
00333 file.reset(CreateOrThrow(name));
00334 try {
00335 return MapZeroedWrite(file.get(), size);
00336 } catch (ErrnoException &e) {
00337 e << " in file " << name;
00338 throw;
00339 }
00340 }
00341
00342 Rolling::Rolling(const Rolling ©_from, uint64_t increase) {
00343 *this = copy_from;
00344 IncreaseBase(increase);
00345 }
00346
00347 Rolling &Rolling::operator=(const Rolling ©_from) {
00348 fd_ = copy_from.fd_;
00349 file_begin_ = copy_from.file_begin_;
00350 file_end_ = copy_from.file_end_;
00351 for_write_ = copy_from.for_write_;
00352 block_ = copy_from.block_;
00353 read_bound_ = copy_from.read_bound_;
00354
00355 current_begin_ = 0;
00356 if (copy_from.IsPassthrough()) {
00357 current_end_ = copy_from.current_end_;
00358 ptr_ = copy_from.ptr_;
00359 } else {
00360
00361 current_end_ = 0;
00362 ptr_ = NULL;
00363 }
00364 return *this;
00365 }
00366
00367 Rolling::Rolling(int fd, bool for_write, std::size_t block, std::size_t read_bound, uint64_t offset, uint64_t amount) {
00368 current_begin_ = 0;
00369 current_end_ = 0;
00370 fd_ = fd;
00371 file_begin_ = offset;
00372 file_end_ = offset + amount;
00373 for_write_ = for_write;
00374 block_ = block;
00375 read_bound_ = read_bound;
00376 }
00377
00378 void *Rolling::ExtractNonRolling(scoped_memory &out, uint64_t index, std::size_t size) {
00379 out.reset();
00380 if (IsPassthrough()) return static_cast<uint8_t*>(get()) + index;
00381 uint64_t offset = index + file_begin_;
00382
00383 uint64_t cruft = offset % static_cast<uint64_t>(SizePage());
00384 std::size_t map_size = static_cast<std::size_t>(size + cruft);
00385 out.reset(MapOrThrow(map_size, for_write_, kFileFlags, true, fd_, offset - cruft), map_size, scoped_memory::MMAP_ALLOCATED);
00386 return static_cast<uint8_t*>(out.get()) + static_cast<std::size_t>(cruft);
00387 }
00388
00389 void Rolling::Roll(uint64_t index) {
00390 assert(!IsPassthrough());
00391 std::size_t amount;
00392 if (file_end_ - (index + file_begin_) > static_cast<uint64_t>(block_)) {
00393 amount = block_;
00394 current_end_ = index + amount - read_bound_;
00395 } else {
00396 amount = file_end_ - (index + file_begin_);
00397 current_end_ = index + amount;
00398 }
00399 ptr_ = static_cast<uint8_t*>(ExtractNonRolling(mem_, index, amount)) - index;
00400
00401 current_begin_ = index;
00402 }
00403
00404 }