mirror of
https://github.com/zigzap/zap.git
synced 2025-10-20 23:24:09 +00:00
189 lines
7 KiB
C
189 lines
7 KiB
C
/*
|
|
Copyright: Boaz Segev, 2018-2019
|
|
License: MIT
|
|
|
|
Feel free to copy, use and enjoy according to the license provided.
|
|
*/
|
|
#ifndef H_FIO_MEM_H
|
|
|
|
/**
|
|
* This is a custom memory allocator the utilizes memory pools to allow for
|
|
* concurrent memory allocations across threads.
|
|
*
|
|
* Allocated memory is always zeroed out and aligned on a 16 byte boundary.
|
|
*
|
|
* Reallocated memory is always aligned on a 16 byte boundary but it might be
|
|
* filled with junk data after the valid data (this is true also for
|
|
* `fio_realloc2`).
|
|
*
|
|
* The memory allocator assumes multiple concurrent allocation/deallocation,
|
|
* short life spans (memory is freed shortly, but not immediately, after it was
|
|
* allocated) as well as small allocations (realloc almost always copies data).
|
|
*
|
|
* These assumptions allow the allocator to avoid lock contention by ignoring
|
|
* fragmentation within a memory "block" and waiting for the whole "block" to be
|
|
* freed before it's memory is recycled (no per-allocation "free list").
|
|
*
|
|
* An "arena" is allocated per-CPU core during initialization - there's no
|
|
* dynamic allocation of arenas. This allows threads to minimize lock contention
|
|
* by cycling through the arenas until a free arena is detected.
|
|
*
|
|
* There should be a free arena at any given time (statistically speaking) and
|
|
* the thread will only be deferred in the unlikely event in which there's no
|
|
* available arena.
|
|
*
|
|
* By avoiding the "free-list", the need for allocation "headers" is also
|
|
* avoided and allocations are performed with practically zero overhead (about
|
|
* 32 bytes overhead per 32KB memory, that's 1 bit per 1Kb).
|
|
*
|
|
* However, the lack of a "free list" means that memory "leaks" are more
|
|
* expensive and small long-life allocations could cause fragmentation if
|
|
* performed periodically (rather than performed during startup).
|
|
*
|
|
* This allocator should NOT be used for objects with a long life-span, because
|
|
* even a single persistent object will prevent the re-use of the whole memory
|
|
* block from which it was allocated (see FIO_MEMORY_BLOCK_SIZE for size).
|
|
*
|
|
* Some more details:
|
|
*
|
|
* Allocation and deallocations and (usually) managed by "blocks".
|
|
*
|
|
* A memory "block" can include any number of memory pages that are a multiple
|
|
* of 2 (up to 1Mb of memory). However, the default value, set by the value of
|
|
* FIO_MEMORY_BLOCK_SIZE_LOG, is 32Kb (see value at the end of this header).
|
|
*
|
|
* Each block includes a 32 byte header that uses reference counters and
|
|
* position markers (24 bytes are required padding).
|
|
*
|
|
* The block's position marker (`pos`) marks the next available byte (counted in
|
|
* multiples of 16 bytes).
|
|
*
|
|
* The block's reference counter (`ref`) counts how many allocations reference
|
|
* memory in the block (including the "arena" that "owns" the block).
|
|
*
|
|
* Except for the position marker (`pos`) that acts the same as `sbrk`, there's
|
|
* no way to know which "slices" are allocated and which "slices" are available.
|
|
*
|
|
* The allocator uses `mmap` when requesting memory from the system and for
|
|
* allocations bigger than MEMORY_BLOCK_ALLOC_LIMIT (37.5% of the block).
|
|
*
|
|
* Small allocations are differentiated from big allocations by their memory
|
|
* alignment.
|
|
*
|
|
* If a memory allocation is placed 16 bytes after whole block alignment (within
|
|
* a block's padding zone), the memory was allocated directly using `mmap` as a
|
|
* "big allocation". The 16 bytes include an 8 byte header and an 8 byte
|
|
* padding.
|
|
*
|
|
* To replace the system's `malloc` function family compile with the
|
|
* `FIO_OVERRIDE_MALLOC` defined (`-DFIO_OVERRIDE_MALLOC`).
|
|
*
|
|
* When using tcmalloc or jemalloc, define `FIO_FORCE_MALLOC` to prevent
|
|
* `fio_mem` from compiling (`-DFIO_FORCE_MALLOC`). Function wrappers will be
|
|
* compiled just in case, so calls to `fio_malloc` will be routed to `malloc`.
|
|
*
|
|
*/
|
|
#define H_FIO_MEM_H
|
|
|
|
#include <stdlib.h>
|
|
|
|
/**
|
|
* Allocates memory using a per-CPU core block memory pool.
|
|
* Memory is zeroed out.
|
|
*
|
|
* Allocations above FIO_MEMORY_BLOCK_ALLOC_LIMIT (12,288 bytes when using 32Kb
|
|
* blocks) will be redirected to `mmap`, as if `fio_mmap` was called.
|
|
*/
|
|
void *fio_malloc(size_t size);
|
|
|
|
/**
|
|
* same as calling `fio_malloc(size_per_unit * unit_count)`;
|
|
*
|
|
* Allocations above FIO_MEMORY_BLOCK_ALLOC_LIMIT (12,288 bytes when using 32Kb
|
|
* blocks) will be redirected to `mmap`, as if `fio_mmap` was called.
|
|
*/
|
|
void *fio_calloc(size_t size_per_unit, size_t unit_count);
|
|
|
|
/** Frees memory that was allocated using this library. */
|
|
void fio_free(void *ptr);
|
|
|
|
/**
|
|
* Re-allocates memory. An attept to avoid copying the data is made only for big
|
|
* memory allocations (larger than FIO_MEMORY_BLOCK_ALLOC_LIMIT).
|
|
*/
|
|
void *fio_realloc(void *ptr, size_t new_size);
|
|
|
|
/**
|
|
* Re-allocates memory. An attept to avoid copying the data is made only for big
|
|
* memory allocations (larger than FIO_MEMORY_BLOCK_ALLOC_LIMIT).
|
|
*
|
|
* This variation is slightly faster as it might copy less data.
|
|
*/
|
|
void *fio_realloc2(void *ptr, size_t new_size, size_t copy_length);
|
|
|
|
/**
|
|
* Allocates memory directly using `mmap`, this is prefered for objects that
|
|
* both require almost a page of memory (or more) and expect a long lifetime.
|
|
*
|
|
* However, since this allocation will invoke the system call (`mmap`), it will
|
|
* be inherently slower.
|
|
*
|
|
* `fio_free` can be used for deallocating the memory.
|
|
*/
|
|
void *fio_mmap(size_t size);
|
|
|
|
/** Clears any memory locks, in case of a system call to `fork`. */
|
|
void fio_malloc_after_fork(void);
|
|
|
|
/** Tests the facil.io memory allocator. */
|
|
void fio_malloc_test(void);
|
|
|
|
/** If defined, `malloc` will be used instead of the fio_malloc functions */
|
|
#if FIO_FORCE_MALLOC
|
|
#define fio_malloc malloc
|
|
#define fio_calloc calloc
|
|
#define fio_mmap malloc
|
|
#define fio_free free
|
|
#define fio_realloc realloc
|
|
#define fio_realloc2(ptr, new_size, old_data_len) realloc((ptr), (new_size))
|
|
#define fio_malloc_test()
|
|
#define fio_malloc_after_fork()
|
|
|
|
/* allows local override as well as global override */
|
|
#elif FIO_OVERRIDE_MALLOC
|
|
#define malloc fio_malloc
|
|
#define free fio_free
|
|
#define realloc fio_realloc
|
|
#define calloc fio_calloc
|
|
|
|
#endif
|
|
|
|
/** Allocator default settings. */
|
|
#ifndef FIO_MEMORY_BLOCK_SIZE_LOG
|
|
#define FIO_MEMORY_BLOCK_SIZE_LOG (15) /*15 == 32Kb, 16 == 64Kb, 17 == 128Kb*/
|
|
#endif
|
|
#ifndef FIO_MEMORY_BLOCK_SIZE
|
|
#define FIO_MEMORY_BLOCK_SIZE ((uintptr_t)1 << FIO_MEMORY_BLOCK_SIZE_LOG)
|
|
#endif
|
|
#ifndef FIO_MEMORY_BLOCK_MASK
|
|
#define FIO_MEMORY_BLOCK_MASK (FIO_MEMORY_BLOCK_SIZE - 1) /* 0b111... */
|
|
#endif
|
|
#ifndef FIO_MEMORY_BLOCK_SLICES
|
|
#define FIO_MEMORY_BLOCK_SLICES (FIO_MEMORY_BLOCK_SIZE >> 4) /* 16B slices */
|
|
#endif
|
|
#ifndef FIO_MEMORY_BLOCK_ALLOC_LIMIT
|
|
/* defaults to 37.5% of the block, after which `mmap` is used instead */
|
|
#define FIO_MEMORY_BLOCK_ALLOC_LIMIT \
|
|
((FIO_MEMORY_BLOCK_SIZE >> 2) + (FIO_MEMORY_BLOCK_SIZE >> 3))
|
|
#endif
|
|
|
|
#ifndef FIO_MEM_MAX_BLOCKS_PER_CORE
|
|
/**
|
|
* The maximum number of available memory blocks that will be pooled before
|
|
* memory is returned to the system.
|
|
*/
|
|
#define FIO_MEM_MAX_BLOCKS_PER_CORE \
|
|
(1 << (22 - FIO_MEMORY_BLOCK_SIZE_LOG)) /* 22 == 4Mb per CPU core (1<<22) */
|
|
#endif
|
|
|
|
#endif /* H_FIO_MEM_H */
|