/* Copyright: Boaz Segev, 2018-2019 License: MIT Feel free to copy, use and enjoy according to the license provided. */ #ifndef H_FIO_MEM_H /** * This is a custom memory allocator the utilizes memory pools to allow for * concurrent memory allocations across threads. * * Allocated memory is always zeroed out and aligned on a 16 byte boundary. * * Reallocated memory is always aligned on a 16 byte boundary but it might be * filled with junk data after the valid data (this is true also for * `fio_realloc2`). * * The memory allocator assumes multiple concurrent allocation/deallocation, * short life spans (memory is freed shortly, but not immediately, after it was * allocated) as well as small allocations (realloc almost always copies data). * * These assumptions allow the allocator to avoid lock contention by ignoring * fragmentation within a memory "block" and waiting for the whole "block" to be * freed before it's memory is recycled (no per-allocation "free list"). * * An "arena" is allocated per-CPU core during initialization - there's no * dynamic allocation of arenas. This allows threads to minimize lock contention * by cycling through the arenas until a free arena is detected. * * There should be a free arena at any given time (statistically speaking) and * the thread will only be deferred in the unlikely event in which there's no * available arena. * * By avoiding the "free-list", the need for allocation "headers" is also * avoided and allocations are performed with practically zero overhead (about * 32 bytes overhead per 32KB memory, that's 1 bit per 1Kb). * * However, the lack of a "free list" means that memory "leaks" are more * expensive and small long-life allocations could cause fragmentation if * performed periodically (rather than performed during startup). * * This allocator should NOT be used for objects with a long life-span, because * even a single persistent object will prevent the re-use of the whole memory * block from which it was allocated (see FIO_MEMORY_BLOCK_SIZE for size). * * Some more details: * * Allocation and deallocations and (usually) managed by "blocks". * * A memory "block" can include any number of memory pages that are a multiple * of 2 (up to 1Mb of memory). However, the default value, set by the value of * FIO_MEMORY_BLOCK_SIZE_LOG, is 32Kb (see value at the end of this header). * * Each block includes a 32 byte header that uses reference counters and * position markers (24 bytes are required padding). * * The block's position marker (`pos`) marks the next available byte (counted in * multiples of 16 bytes). * * The block's reference counter (`ref`) counts how many allocations reference * memory in the block (including the "arena" that "owns" the block). * * Except for the position marker (`pos`) that acts the same as `sbrk`, there's * no way to know which "slices" are allocated and which "slices" are available. * * The allocator uses `mmap` when requesting memory from the system and for * allocations bigger than MEMORY_BLOCK_ALLOC_LIMIT (37.5% of the block). * * Small allocations are differentiated from big allocations by their memory * alignment. * * If a memory allocation is placed 16 bytes after whole block alignment (within * a block's padding zone), the memory was allocated directly using `mmap` as a * "big allocation". The 16 bytes include an 8 byte header and an 8 byte * padding. * * To replace the system's `malloc` function family compile with the * `FIO_OVERRIDE_MALLOC` defined (`-DFIO_OVERRIDE_MALLOC`). * * When using tcmalloc or jemalloc, define `FIO_FORCE_MALLOC` to prevent * `fio_mem` from compiling (`-DFIO_FORCE_MALLOC`). Function wrappers will be * compiled just in case, so calls to `fio_malloc` will be routed to `malloc`. * */ #define H_FIO_MEM_H #include /** * Allocates memory using a per-CPU core block memory pool. * Memory is zeroed out. * * Allocations above FIO_MEMORY_BLOCK_ALLOC_LIMIT (12,288 bytes when using 32Kb * blocks) will be redirected to `mmap`, as if `fio_mmap` was called. */ void *fio_malloc(size_t size); /** * same as calling `fio_malloc(size_per_unit * unit_count)`; * * Allocations above FIO_MEMORY_BLOCK_ALLOC_LIMIT (12,288 bytes when using 32Kb * blocks) will be redirected to `mmap`, as if `fio_mmap` was called. */ void *fio_calloc(size_t size_per_unit, size_t unit_count); /** Frees memory that was allocated using this library. */ void fio_free(void *ptr); /** * Re-allocates memory. An attept to avoid copying the data is made only for big * memory allocations (larger than FIO_MEMORY_BLOCK_ALLOC_LIMIT). */ void *fio_realloc(void *ptr, size_t new_size); /** * Re-allocates memory. An attept to avoid copying the data is made only for big * memory allocations (larger than FIO_MEMORY_BLOCK_ALLOC_LIMIT). * * This variation is slightly faster as it might copy less data. */ void *fio_realloc2(void *ptr, size_t new_size, size_t copy_length); /** * Allocates memory directly using `mmap`, this is prefered for objects that * both require almost a page of memory (or more) and expect a long lifetime. * * However, since this allocation will invoke the system call (`mmap`), it will * be inherently slower. * * `fio_free` can be used for deallocating the memory. */ void *fio_mmap(size_t size); /** Clears any memory locks, in case of a system call to `fork`. */ void fio_malloc_after_fork(void); /** Tests the facil.io memory allocator. */ void fio_malloc_test(void); /** If defined, `malloc` will be used instead of the fio_malloc functions */ #if FIO_FORCE_MALLOC #define fio_malloc malloc #define fio_calloc calloc #define fio_mmap malloc #define fio_free free #define fio_realloc realloc #define fio_realloc2(ptr, new_size, old_data_len) realloc((ptr), (new_size)) #define fio_malloc_test() #define fio_malloc_after_fork() /* allows local override as well as global override */ #elif FIO_OVERRIDE_MALLOC #define malloc fio_malloc #define free fio_free #define realloc fio_realloc #define calloc fio_calloc #endif /** Allocator default settings. */ #ifndef FIO_MEMORY_BLOCK_SIZE_LOG #define FIO_MEMORY_BLOCK_SIZE_LOG (15) /*15 == 32Kb, 16 == 64Kb, 17 == 128Kb*/ #endif #ifndef FIO_MEMORY_BLOCK_SIZE #define FIO_MEMORY_BLOCK_SIZE ((uintptr_t)1 << FIO_MEMORY_BLOCK_SIZE_LOG) #endif #ifndef FIO_MEMORY_BLOCK_MASK #define FIO_MEMORY_BLOCK_MASK (FIO_MEMORY_BLOCK_SIZE - 1) /* 0b111... */ #endif #ifndef FIO_MEMORY_BLOCK_SLICES #define FIO_MEMORY_BLOCK_SLICES (FIO_MEMORY_BLOCK_SIZE >> 4) /* 16B slices */ #endif #ifndef FIO_MEMORY_BLOCK_ALLOC_LIMIT /* defaults to 37.5% of the block, after which `mmap` is used instead */ #define FIO_MEMORY_BLOCK_ALLOC_LIMIT \ ((FIO_MEMORY_BLOCK_SIZE >> 2) + (FIO_MEMORY_BLOCK_SIZE >> 3)) #endif #ifndef FIO_MEM_MAX_BLOCKS_PER_CORE /** * The maximum number of available memory blocks that will be pooled before * memory is returned to the system. */ #define FIO_MEM_MAX_BLOCKS_PER_CORE \ (1 << (22 - FIO_MEMORY_BLOCK_SIZE_LOG)) /* 22 == 4Mb per CPU core (1<<22) */ #endif #endif /* H_FIO_MEM_H */