mirror of
				https://github.com/zigzap/zap.git
				synced 2025-10-22 16:14:07 +00:00 
			
		
		
		
	
		
			
				
	
	
		
			806 lines
		
	
	
	
		
			26 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
			
		
		
	
	
			806 lines
		
	
	
	
		
			26 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
| /*
 | |
| Copyright: Boaz Segev, 2018-2019
 | |
| License: MIT
 | |
| 
 | |
| Feel free to copy, use and enjoy according to the license provided.
 | |
| */
 | |
| 
 | |
| #if defined(__unix__) || defined(__APPLE__) || defined(__linux__)
 | |
| #ifndef _GNU_SOURCE
 | |
| #define _GNU_SOURCE
 | |
| #endif
 | |
| #include <time.h>
 | |
| #endif /* __unix__ */
 | |
| 
 | |
| #include <pthread.h>
 | |
| #include <sys/mman.h>
 | |
| #include <unistd.h>
 | |
| 
 | |
| /* *****************************************************************************
 | |
| If FIO_FORCE_MALLOC is set, use glibc / library malloc
 | |
| ***************************************************************************** */
 | |
| #if FIO_FORCE_MALLOC
 | |
| 
 | |
| void *fio_malloc(size_t size) { return malloc(size); }
 | |
| 
 | |
| void *fio_calloc(size_t size, size_t count) { return calloc(size, count); }
 | |
| 
 | |
| void fio_free(void *ptr) { free(ptr); }
 | |
| 
 | |
| void *fio_realloc(void *ptr, size_t new_size) { return realloc(ptr, new_size); }
 | |
| void *fio_realloc2(void *ptr, size_t new_size, size_t valid_len) {
 | |
|   return realloc(ptr, new_size);
 | |
|   (void)valid_len;
 | |
| }
 | |
| 
 | |
| void fio_malloc_after_fork(void) {}
 | |
| 
 | |
| /* *****************************************************************************
 | |
| facil.io malloc implementation
 | |
| ***************************************************************************** */
 | |
| #else
 | |
| 
 | |
| #include <fio_mem.h>
 | |
| 
 | |
| #if !defined(__clang__) && !defined(__GNUC__)
 | |
| #define __thread _Thread_value
 | |
| #endif
 | |
| 
 | |
| #undef malloc
 | |
| #undef calloc
 | |
| #undef free
 | |
| #undef realloc
 | |
| 
 | |
| /* *****************************************************************************
 | |
| Memory Copying by 16 byte units
 | |
| ***************************************************************************** */
 | |
| 
 | |
| static inline void fio_memcpy(void *__restrict dest_, void *__restrict src_,
 | |
|                               size_t units) {
 | |
| #if __SIZEOF_INT128__ == 9 /* a 128bit type exists... but tests favor 64bit */
 | |
|   register __uint128_t *dest = dest_;
 | |
|   register __uint128_t *src = src_;
 | |
| #elif SIZE_MAX == 0xFFFFFFFFFFFFFFFF /* 64 bit size_t */
 | |
|   register size_t *dest = dest_;
 | |
|   register size_t *src = src_;
 | |
|   units = units << 1;
 | |
| #elif SIZE_MAX == 0xFFFFFFFF         /* 32 bit size_t */
 | |
|   register size_t *dest = dest_;
 | |
|   register size_t *src = src_;
 | |
|   units = units << 2;
 | |
| #else                                /* unknow... assume 16 bit? */
 | |
|   register size_t *dest = dest_;
 | |
|   register size_t *src = src_;
 | |
|   units = units << 3;
 | |
| #endif
 | |
|   while (units >= 16) { /* unroll loop */
 | |
|     dest[0] = src[0];
 | |
|     dest[1] = src[1];
 | |
|     dest[2] = src[2];
 | |
|     dest[3] = src[3];
 | |
|     dest[4] = src[4];
 | |
|     dest[5] = src[5];
 | |
|     dest[6] = src[6];
 | |
|     dest[7] = src[7];
 | |
|     dest[8] = src[8];
 | |
|     dest[9] = src[9];
 | |
|     dest[10] = src[10];
 | |
|     dest[11] = src[11];
 | |
|     dest[12] = src[12];
 | |
|     dest[13] = src[13];
 | |
|     dest[14] = src[14];
 | |
|     dest[15] = src[15];
 | |
|     dest += 16;
 | |
|     src += 16;
 | |
|     units -= 16;
 | |
|   }
 | |
|   switch (units) {
 | |
|   case 15:
 | |
|     *(dest++) = *(src++); /* fallthrough */
 | |
|   case 14:
 | |
|     *(dest++) = *(src++); /* fallthrough */
 | |
|   case 13:
 | |
|     *(dest++) = *(src++); /* fallthrough */
 | |
|   case 12:
 | |
|     *(dest++) = *(src++); /* fallthrough */
 | |
|   case 11:
 | |
|     *(dest++) = *(src++); /* fallthrough */
 | |
|   case 10:
 | |
|     *(dest++) = *(src++); /* fallthrough */
 | |
|   case 9:
 | |
|     *(dest++) = *(src++); /* fallthrough */
 | |
|   case 8:
 | |
|     *(dest++) = *(src++); /* fallthrough */
 | |
|   case 7:
 | |
|     *(dest++) = *(src++); /* fallthrough */
 | |
|   case 6:
 | |
|     *(dest++) = *(src++); /* fallthrough */
 | |
|   case 5:
 | |
|     *(dest++) = *(src++); /* fallthrough */
 | |
|   case 4:
 | |
|     *(dest++) = *(src++); /* fallthrough */
 | |
|   case 3:
 | |
|     *(dest++) = *(src++); /* fallthrough */
 | |
|   case 2:
 | |
|     *(dest++) = *(src++); /* fallthrough */
 | |
|   case 1:
 | |
|     *(dest++) = *(src++);
 | |
|   }
 | |
| }
 | |
| 
 | |
| /* *****************************************************************************
 | |
| Spinlock for the few locks we need (atomic reference counting & free blocks)
 | |
| ***************************************************************************** */
 | |
| 
 | |
| /* manage the way threads "wait" for the lock to release */
 | |
| #if defined(__unix__) || defined(__APPLE__) || defined(__linux__)
 | |
| /* nanosleep seems to be the most effective and efficient reschedule */
 | |
| #define reschedule_thread()                                                    \
 | |
|   {                                                                            \
 | |
|     const struct timespec tm = {.tv_nsec = 1};                                 \
 | |
|     nanosleep(&tm, NULL);                                                      \
 | |
|   }
 | |
| #define throttle_thread(micosec)                                               \
 | |
|   {                                                                            \
 | |
|     const struct timespec tm = {.tv_nsec = (micosec & 0xfffff),                \
 | |
|                                 .tv_sec = (micosec >> 20)};                    \
 | |
|     nanosleep(&tm, NULL);                                                      \
 | |
|   }
 | |
| #else /* no effective rescheduling, just spin... */
 | |
| #define reschedule_thread()
 | |
| #define throttle_thread(micosec)
 | |
| #endif
 | |
| 
 | |
| /** locks use a single byte */
 | |
| typedef volatile unsigned char spn_lock_i;
 | |
| 
 | |
| /** The initail value of an unlocked spinlock. */
 | |
| #define SPN_LOCK_INIT 0
 | |
| 
 | |
| /* C11 Atomics are defined? */
 | |
| #if defined(__ATOMIC_RELAXED)
 | |
| #define SPN_LOCK_BUILTIN(...) __atomic_exchange_n(__VA_ARGS__, __ATOMIC_SEQ_CST)
 | |
| /** An atomic addition operation */
 | |
| #define spn_add(...) __atomic_add_fetch(__VA_ARGS__, __ATOMIC_SEQ_CST)
 | |
| /** An atomic subtraction operation */
 | |
| #define spn_sub(...) __atomic_sub_fetch(__VA_ARGS__, __ATOMIC_SEQ_CST)
 | |
| 
 | |
| /* Select the correct compiler builtin method. */
 | |
| #elif defined(__has_builtin)
 | |
| 
 | |
| #if __has_builtin(__sync_fetch_and_or)
 | |
| #define SPN_LOCK_BUILTIN(...) __sync_fetch_and_or(__VA_ARGS__)
 | |
| /** An atomic addition operation */
 | |
| #define spn_add(...) __sync_add_and_fetch(__VA_ARGS__)
 | |
| /** An atomic subtraction operation */
 | |
| #define spn_sub(...) __sync_sub_and_fetch(__VA_ARGS__)
 | |
| 
 | |
| #else
 | |
| #error Required builtin "__sync_swap" or "__sync_fetch_and_or" missing from compiler.
 | |
| #endif /* defined(__has_builtin) */
 | |
| 
 | |
| #elif __GNUC__ > 3
 | |
| #define SPN_LOCK_BUILTIN(...) __sync_fetch_and_or(__VA_ARGS__)
 | |
| /** An atomic addition operation */
 | |
| #define spn_add(...) __sync_add_and_fetch(__VA_ARGS__)
 | |
| /** An atomic subtraction operation */
 | |
| #define spn_sub(...) __sync_sub_and_fetch(__VA_ARGS__)
 | |
| 
 | |
| #else
 | |
| #error Required builtin "__sync_swap" or "__sync_fetch_and_or" not found.
 | |
| #endif
 | |
| 
 | |
| /** returns 1 and 0 if the lock was successfully aquired (TRUE == FAIL). */
 | |
| static inline int spn_trylock(spn_lock_i *lock) {
 | |
|   return SPN_LOCK_BUILTIN(lock, 1);
 | |
| }
 | |
| 
 | |
| /** Releases a lock. */
 | |
| static inline __attribute__((unused)) int spn_unlock(spn_lock_i *lock) {
 | |
|   return SPN_LOCK_BUILTIN(lock, 0);
 | |
| }
 | |
| 
 | |
| /** returns a lock's state (non 0 == Busy). */
 | |
| static inline __attribute__((unused)) int spn_is_locked(spn_lock_i *lock) {
 | |
|   __asm__ volatile("" ::: "memory");
 | |
|   return *lock;
 | |
| }
 | |
| 
 | |
| /** Busy waits for the lock. */
 | |
| static inline __attribute__((unused)) void spn_lock(spn_lock_i *lock) {
 | |
|   while (spn_trylock(lock)) {
 | |
|     reschedule_thread();
 | |
|   }
 | |
| }
 | |
| 
 | |
| /* *****************************************************************************
 | |
| System Memory wrappers
 | |
| ***************************************************************************** */
 | |
| 
 | |
| /*
 | |
|  * allocates memory using `mmap`, but enforces block size alignment.
 | |
|  * requires page aligned `len`.
 | |
|  *
 | |
|  * `align_shift` is used to move the memory page alignment to allow for a single
 | |
|  * page allocation header. align_shift MUST be either 0 (normal) or 1 (single
 | |
|  * page header). Other values might cause errors.
 | |
|  */
 | |
| static inline void *sys_alloc(size_t len, uint8_t is_indi) {
 | |
|   void *result;
 | |
|   static void *next_alloc = NULL;
 | |
| /* hope for the best? */
 | |
| #ifdef MAP_ALIGNED
 | |
|   result =
 | |
|       mmap(next_alloc, len, PROT_READ | PROT_WRITE,
 | |
|            MAP_PRIVATE | MAP_ANONYMOUS | MAP_ALIGNED(FIO_MEMORY_BLOCK_SIZE_LOG),
 | |
|            -1, 0);
 | |
| #else
 | |
|   result = mmap(next_alloc, len, PROT_READ | PROT_WRITE,
 | |
|                 MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
 | |
| #endif
 | |
|   if (result == MAP_FAILED)
 | |
|     return NULL;
 | |
|   if (((uintptr_t)result & FIO_MEMORY_BLOCK_MASK)) {
 | |
|     munmap(result, len);
 | |
|     result = mmap(NULL, len + FIO_MEMORY_BLOCK_SIZE, PROT_READ | PROT_WRITE,
 | |
|                   MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
 | |
|     if (result == MAP_FAILED) {
 | |
|       return NULL;
 | |
|     }
 | |
|     const uintptr_t offset =
 | |
|         (FIO_MEMORY_BLOCK_SIZE - ((uintptr_t)result & FIO_MEMORY_BLOCK_MASK));
 | |
|     if (offset) {
 | |
|       munmap(result, offset);
 | |
|       result = (void *)((uintptr_t)result + offset);
 | |
|     }
 | |
|     munmap((void *)((uintptr_t)result + len), FIO_MEMORY_BLOCK_SIZE - offset);
 | |
|   }
 | |
|   next_alloc =
 | |
|       (void *)((uintptr_t)result + FIO_MEMORY_BLOCK_SIZE +
 | |
|                (is_indi * ((uintptr_t)1 << 30))); /* add 1TB for realloc */
 | |
|   return result;
 | |
| }
 | |
| 
 | |
| /* frees memory using `munmap`. requires exact, page aligned, `len` */
 | |
| static inline void sys_free(void *mem, size_t len) { munmap(mem, len); }
 | |
| 
 | |
| static void *sys_realloc(void *mem, size_t prev_len, size_t new_len) {
 | |
|   if (new_len > prev_len) {
 | |
| #if defined(__linux__) && defined(MREMAP_MAYMOVE)
 | |
|     void *result = mremap(mem, prev_len, new_len, MREMAP_MAYMOVE);
 | |
|     if (result == MAP_FAILED)
 | |
|       return NULL;
 | |
| #else
 | |
|     void *result =
 | |
|         mmap((void *)((uintptr_t)mem + prev_len), new_len - prev_len,
 | |
|              PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
 | |
|     if (result == (void *)((uintptr_t)mem + prev_len)) {
 | |
|       result = mem;
 | |
|     } else {
 | |
|       /* copy and free */
 | |
|       munmap(result, new_len - prev_len); /* free the failed attempt */
 | |
|       result = sys_alloc(new_len, 1);     /* allocate new memory */
 | |
|       if (!result) {
 | |
|         return NULL;
 | |
|       }
 | |
|       fio_memcpy(result, mem, prev_len >> 4); /* copy data */
 | |
|       // memcpy(result, mem, prev_len);
 | |
|       munmap(mem, prev_len); /* free original memory */
 | |
|     }
 | |
| #endif
 | |
|     return result;
 | |
|   }
 | |
|   if (new_len + 4096 < prev_len) /* more than a single dangling page */
 | |
|     munmap((void *)((uintptr_t)mem + new_len), prev_len - new_len);
 | |
|   return mem;
 | |
| }
 | |
| 
 | |
| /** Rounds up any size to the nearest page alignment (assumes 4096 bytes per
 | |
|  * page) */
 | |
| static inline size_t sys_round_size(size_t size) {
 | |
|   return (size & (~4095)) + (4096 * (!!(size & 4095)));
 | |
| }
 | |
| 
 | |
| /* *****************************************************************************
 | |
| Data Types
 | |
| ***************************************************************************** */
 | |
| 
 | |
| /* The basic block header. Starts a 32Kib memory block */
 | |
| typedef struct block_s {
 | |
|   uint16_t ref; /* reference count (per memory page) */
 | |
|   uint16_t pos; /* position into the block */
 | |
|   uint16_t max; /* available memory count */
 | |
|   uint16_t pad; /* memory padding */
 | |
| } block_s;
 | |
| 
 | |
| /* a per-CPU core "arena" for memory allocations  */
 | |
| typedef struct {
 | |
|   block_s *block;
 | |
|   spn_lock_i lock;
 | |
| } arena_s;
 | |
| 
 | |
| /* The memory allocators persistent state */
 | |
| static struct {
 | |
|   size_t active_size; /* active array size */
 | |
|   block_s *available; /* free list for memory blocks */
 | |
|   intptr_t count;     /* free list counter */
 | |
|   size_t cores;       /* the number of detected CPU cores*/
 | |
|   spn_lock_i lock;    /* a global lock */
 | |
| } memory = {
 | |
|     .cores = 1,
 | |
|     .lock = SPN_LOCK_INIT,
 | |
| };
 | |
| 
 | |
| /* The per-CPU arena array. */
 | |
| static arena_s *arenas;
 | |
| 
 | |
| /* The per-CPU arena array. */
 | |
| static long double on_malloc_zero;
 | |
| 
 | |
| /* *****************************************************************************
 | |
| Per-CPU Arena management
 | |
| ***************************************************************************** */
 | |
| 
 | |
| /* returned a locked arena. Attempts the preffered arena first. */
 | |
| static inline arena_s *arena_lock(arena_s *preffered) {
 | |
|   if (!preffered)
 | |
|     preffered = arenas;
 | |
|   if (!spn_trylock(&preffered->lock))
 | |
|     return preffered;
 | |
|   do {
 | |
|     arena_s *arena = preffered;
 | |
|     for (size_t i = (size_t)(arena - arenas); i < memory.cores; ++i) {
 | |
|       if ((preffered == arenas || arena != preffered) &&
 | |
|           !spn_trylock(&arena->lock))
 | |
|         return arena;
 | |
|       ++arena;
 | |
|     }
 | |
|     if (preffered == arenas)
 | |
|       reschedule_thread();
 | |
|     preffered = arenas;
 | |
|   } while (1);
 | |
| }
 | |
| 
 | |
| static __thread arena_s *arena_last_used;
 | |
| 
 | |
| static void arena_enter(void) { arena_last_used = arena_lock(arena_last_used); }
 | |
| 
 | |
| static inline void arena_exit(void) { spn_unlock(&arena_last_used->lock); }
 | |
| 
 | |
| /** Clears any memory locks, in case of a system call to `fork`. */
 | |
| void fio_malloc_after_fork(void) {
 | |
|   arena_last_used = NULL;
 | |
|   if (!arenas) {
 | |
|     return;
 | |
|   }
 | |
|   memory.lock = SPN_LOCK_INIT;
 | |
|   for (size_t i = 0; i < memory.cores; ++i) {
 | |
|     arenas[i].lock = SPN_LOCK_INIT;
 | |
|   }
 | |
| }
 | |
| 
 | |
| /* *****************************************************************************
 | |
| Block management
 | |
| ***************************************************************************** */
 | |
| 
 | |
| // static inline block_s **block_find(void *mem_) {
 | |
| //   const uintptr_t mem = (uintptr_t)mem_;
 | |
| //   block_s *blk = memory.active;
 | |
| // }
 | |
| 
 | |
| /* intializes the block header for an available block of memory. */
 | |
| static inline block_s *block_init(void *blk_) {
 | |
|   block_s *blk = blk_;
 | |
|   *blk = (block_s){
 | |
|       .ref = 1,
 | |
|       .pos = (2 + (sizeof(block_s) >> 4)),
 | |
|       .max = (FIO_MEMORY_BLOCK_SLICES - 1) -
 | |
|              (sizeof(block_s) >> 4), /* count available units of 16 bytes */
 | |
|   };
 | |
|   return blk;
 | |
| }
 | |
| 
 | |
| /* intializes the block header for an available block of memory. */
 | |
| static inline void block_free(block_s *blk) {
 | |
|   if (spn_sub(&blk->ref, 1))
 | |
|     return;
 | |
| 
 | |
|   if (spn_add(&memory.count, 1) >
 | |
|       (intptr_t)(FIO_MEM_MAX_BLOCKS_PER_CORE * memory.cores)) {
 | |
|     /* TODO: return memory to the system */
 | |
|     spn_sub(&memory.count, 1);
 | |
|     sys_free(blk, FIO_MEMORY_BLOCK_SIZE);
 | |
|     return;
 | |
|   }
 | |
|   memset(blk, 0, FIO_MEMORY_BLOCK_SIZE);
 | |
|   spn_lock(&memory.lock);
 | |
|   *(block_s **)blk = memory.available;
 | |
|   memory.available = (block_s *)blk;
 | |
|   spn_unlock(&memory.lock);
 | |
| }
 | |
| 
 | |
| /* intializes the block header for an available block of memory. */
 | |
| static inline block_s *block_new(void) {
 | |
|   block_s *blk = NULL;
 | |
| 
 | |
|   if (memory.available) {
 | |
|     spn_lock(&memory.lock);
 | |
|     blk = (block_s *)memory.available;
 | |
|     if (blk) {
 | |
|       memory.available = ((block_s **)blk)[0];
 | |
|     }
 | |
|     spn_unlock(&memory.lock);
 | |
|   }
 | |
|   if (blk) {
 | |
|     spn_sub(&memory.count, 1);
 | |
|     ((block_s **)blk)[0] = NULL;
 | |
|     ((block_s **)blk)[1] = NULL;
 | |
|     return block_init(blk);
 | |
|   }
 | |
|   /* TODO: collect memory from the system */
 | |
|   blk = sys_alloc(FIO_MEMORY_BLOCK_SIZE, 0);
 | |
|   if (!blk)
 | |
|     return NULL;
 | |
|   return block_init(blk);
 | |
|   ;
 | |
| }
 | |
| 
 | |
| static inline void *block_slice(uint16_t units) {
 | |
|   block_s *blk = arena_last_used->block;
 | |
|   if (!blk) {
 | |
|     /* arena is empty */
 | |
|     blk = block_new();
 | |
|     arena_last_used->block = blk;
 | |
|   } else if (blk->pos + units > blk->max) {
 | |
|     /* not enough memory in the block - rotate */
 | |
|     block_free(blk);
 | |
|     blk = block_new();
 | |
|     arena_last_used->block = blk;
 | |
|   }
 | |
|   if (!blk) {
 | |
|     /* no system memory available? */
 | |
|     errno = ENOMEM;
 | |
|     return NULL;
 | |
|   }
 | |
|   /* slice block starting at blk->pos and increase reference count */
 | |
|   const void *mem = (void *)((uintptr_t)blk + ((uintptr_t)blk->pos << 4));
 | |
|   spn_add(&blk->ref, 1);
 | |
|   blk->pos += units;
 | |
|   if (blk->pos >= blk->max) {
 | |
|     /* it's true that a 16 bytes slice remains, but statistically... */
 | |
|     /* ... the block was fully utilized, clear arena */
 | |
|     block_free(blk);
 | |
|     arena_last_used->block = NULL;
 | |
|   }
 | |
|   return (void *)mem;
 | |
| }
 | |
| 
 | |
| static inline void block_slice_free(void *mem) {
 | |
|   /* locate block boundary */
 | |
|   block_s *blk = (block_s *)((uintptr_t)mem & (~FIO_MEMORY_BLOCK_MASK));
 | |
|   block_free(blk);
 | |
| }
 | |
| 
 | |
| /* *****************************************************************************
 | |
| Non-Block allocations (direct from the system)
 | |
| ***************************************************************************** */
 | |
| 
 | |
| static inline void *big_alloc(size_t size) {
 | |
|   size = sys_round_size(size + 16);
 | |
|   size_t *mem = sys_alloc(size, 1);
 | |
|   if (!mem)
 | |
|     goto error;
 | |
|   *mem = size;
 | |
|   return (void *)(((uintptr_t)mem) + 16);
 | |
| error:
 | |
|   return NULL;
 | |
| }
 | |
| 
 | |
| static inline void big_free(void *ptr) {
 | |
|   size_t *mem = (void *)(((uintptr_t)ptr) - 16);
 | |
|   sys_free(mem, *mem);
 | |
| }
 | |
| 
 | |
| static inline void *big_realloc(void *ptr, size_t new_size) {
 | |
|   size_t *mem = (void *)(((uintptr_t)ptr) - 16);
 | |
|   new_size = sys_round_size(new_size + 16);
 | |
|   mem = sys_realloc(mem, *mem, new_size);
 | |
|   if (!mem)
 | |
|     goto error;
 | |
|   *mem = new_size;
 | |
|   return (void *)(((uintptr_t)mem) + 16);
 | |
| error:
 | |
|   return NULL;
 | |
| }
 | |
| 
 | |
| /* *****************************************************************************
 | |
| Library Initialization (initialize arenas and allocate a block for each CPU)
 | |
| ***************************************************************************** */
 | |
| 
 | |
| static void __attribute__((constructor)) fio_mem_init(void) {
 | |
|   if (arenas)
 | |
|     return;
 | |
| 
 | |
| #ifdef _SC_NPROCESSORS_ONLN
 | |
|   ssize_t cpu_count = sysconf(_SC_NPROCESSORS_ONLN);
 | |
| #else
 | |
| #warning Dynamic CPU core count is unavailable - assuming 8 cores for memory allocation pools.
 | |
|   ssize_t cpu_count = 8; /* fallback */
 | |
| #endif
 | |
|   memory.cores = cpu_count;
 | |
|   memory.count = 0 - (intptr_t)cpu_count;
 | |
|   arenas = big_alloc(sizeof(*arenas) * cpu_count);
 | |
|   if (!arenas) {
 | |
|     perror("FATAL ERROR: Couldn't initialize memory allocator");
 | |
|     exit(errno);
 | |
|   }
 | |
|   size_t pre_pool = cpu_count > 32 ? 32 : cpu_count;
 | |
|   for (size_t i = 0; i < pre_pool; ++i) {
 | |
|     void *block = sys_alloc(FIO_MEMORY_BLOCK_SIZE, 0);
 | |
|     if (block) {
 | |
|       block_init(block);
 | |
|       block_free(block);
 | |
|     }
 | |
|   }
 | |
|   pthread_atfork(NULL, NULL, fio_malloc_after_fork);
 | |
| }
 | |
| 
 | |
| static void __attribute__((destructor)) fio_mem_destroy(void) {
 | |
|   if (!arenas)
 | |
|     return;
 | |
| 
 | |
|   arena_s *arena = arenas;
 | |
|   for (size_t i = 0; i < memory.cores; ++i) {
 | |
|     if (arena->block)
 | |
|       block_free(arena->block);
 | |
|     arena->block = NULL;
 | |
|     ++arena;
 | |
|   }
 | |
|   while (memory.available) {
 | |
|     block_s *b = memory.available;
 | |
|     memory.available = *(block_s **)b;
 | |
|     sys_free(b, FIO_MEMORY_BLOCK_SIZE);
 | |
|   }
 | |
|   big_free(arenas);
 | |
|   arenas = NULL;
 | |
| }
 | |
| 
 | |
| /* *****************************************************************************
 | |
| Memory allocation / deacclocation API
 | |
| ***************************************************************************** */
 | |
| 
 | |
| void *fio_malloc(size_t size) {
 | |
| #if FIO_OVERRIDE_MALLOC
 | |
|   if (!arenas)
 | |
|     fio_mem_init();
 | |
| #endif
 | |
|   if (!size) {
 | |
|     /* changed behavior prevents "allocation failed" test for `malloc(0)` */
 | |
|     return (void *)(&on_malloc_zero);
 | |
|   }
 | |
|   if (size >= FIO_MEMORY_BLOCK_ALLOC_LIMIT) {
 | |
|     /* system allocation - must be block aligned */
 | |
|     return big_alloc(size);
 | |
|   }
 | |
|   /* ceiling for 16 byte alignement, translated to 16 byte units */
 | |
|   size = (size >> 4) + (!!(size & 15));
 | |
|   arena_enter();
 | |
|   void *mem = block_slice(size);
 | |
|   arena_exit();
 | |
|   return mem;
 | |
| }
 | |
| 
 | |
| void *fio_calloc(size_t size, size_t count) {
 | |
|   return fio_malloc(size * count); // memory is pre-initialized by mmap or pool.
 | |
| }
 | |
| 
 | |
| void fio_free(void *ptr) {
 | |
|   if (!ptr || ptr == (void *)&on_malloc_zero)
 | |
|     return;
 | |
|   if (((uintptr_t)ptr & FIO_MEMORY_BLOCK_MASK) == 16) {
 | |
|     /* big allocation - direct from the system */
 | |
|     big_free(ptr);
 | |
|     return;
 | |
|   }
 | |
|   /* allocated within block */
 | |
|   block_slice_free(ptr);
 | |
| }
 | |
| 
 | |
| /**
 | |
|  * Re-allocates memory. An attept to avoid copying the data is made only for big
 | |
|  * memory allocations.
 | |
|  *
 | |
|  * This variation is slightly faster as it might copy less data
 | |
|  */
 | |
| void *fio_realloc2(void *ptr, size_t new_size, size_t copy_length) {
 | |
|   if (!ptr || ptr == (void *)&on_malloc_zero)
 | |
|     return fio_malloc(new_size);
 | |
|   if (!new_size) {
 | |
|     goto zero_size;
 | |
|   }
 | |
|   if (((uintptr_t)ptr & FIO_MEMORY_BLOCK_MASK) == 16) {
 | |
|     /* big reallocation - direct from the system */
 | |
|     return big_realloc(ptr, new_size);
 | |
|   }
 | |
|   /* allocated within block - don't even try to expand the allocation */
 | |
|   /* ceiling for 16 byte alignement, translated to 16 byte units */
 | |
|   void *new_mem = fio_malloc(new_size);
 | |
|   if (!new_mem)
 | |
|     return NULL;
 | |
|   new_size = ((new_size >> 4) + (!!(new_size & 15)));
 | |
|   copy_length = ((copy_length >> 4) + (!!(copy_length & 15)));
 | |
|   // memcpy(new_mem, ptr, (copy_length > new_size ? new_size : copy_length) <<
 | |
|   // 4);
 | |
|   fio_memcpy(new_mem, ptr, (copy_length > new_size ? new_size : copy_length));
 | |
|   block_slice_free(ptr);
 | |
|   return new_mem;
 | |
| zero_size:
 | |
|   fio_free(ptr);
 | |
|   return malloc(0);
 | |
| }
 | |
| 
 | |
| void *fio_realloc(void *ptr, size_t new_size) {
 | |
|   const size_t max_old =
 | |
|       FIO_MEMORY_BLOCK_SIZE - ((uintptr_t)ptr & FIO_MEMORY_BLOCK_MASK);
 | |
|   return fio_realloc2(ptr, new_size, max_old);
 | |
| }
 | |
| 
 | |
| /**
 | |
|  * Allocates memory directly using `mmap`, this is prefered for larger objects
 | |
|  * that have a long lifetime.
 | |
|  *
 | |
|  * `fio_free` can be used for deallocating the memory.
 | |
|  */
 | |
| void *fio_mmap(size_t size) {
 | |
|   if (!size) {
 | |
|     return NULL;
 | |
|   }
 | |
|   return big_alloc(size);
 | |
| }
 | |
| 
 | |
| /* *****************************************************************************
 | |
| FIO_OVERRIDE_MALLOC - override glibc / library malloc
 | |
| ***************************************************************************** */
 | |
| #if FIO_OVERRIDE_MALLOC
 | |
| void *malloc(size_t size) { return fio_malloc(size); }
 | |
| void *calloc(size_t size, size_t count) { return fio_calloc(size, count); }
 | |
| void free(void *ptr) { fio_free(ptr); }
 | |
| void *realloc(void *ptr, size_t new_size) { return fio_realloc(ptr, new_size); }
 | |
| #endif
 | |
| 
 | |
| #endif
 | |
| 
 | |
| /* *****************************************************************************
 | |
| Some Tests
 | |
| ***************************************************************************** */
 | |
| 
 | |
| #if DEBUG && !FIO_FORCE_MALLOC
 | |
| 
 | |
| void fio_malloc_test(void) {
 | |
| #define TEST_ASSERT(cond, ...)                                                 \
 | |
|   if (!(cond)) {                                                               \
 | |
|     fprintf(stderr, "* " __VA_ARGS__);                                         \
 | |
|     fprintf(stderr, "\nTesting failed.\n");                                    \
 | |
|     exit(-1);                                                                  \
 | |
|   }
 | |
| 
 | |
|   fprintf(stderr, "=== Testing facil.io memory allocator's system calls\n");
 | |
|   char *mem = sys_alloc(FIO_MEMORY_BLOCK_SIZE, 0);
 | |
|   TEST_ASSERT(mem, "sys_alloc failed to allocate memory!\n");
 | |
|   TEST_ASSERT(!((uintptr_t)mem & FIO_MEMORY_BLOCK_MASK),
 | |
|               "Memory allocation not aligned to FIO_MEMORY_BLOCK_SIZE!");
 | |
|   mem[0] = 'a';
 | |
|   mem[FIO_MEMORY_BLOCK_SIZE - 1] = 'z';
 | |
|   fprintf(stderr, "* Testing reallocation from %p\n", (void *)mem);
 | |
|   char *mem2 =
 | |
|       sys_realloc(mem, FIO_MEMORY_BLOCK_SIZE, FIO_MEMORY_BLOCK_SIZE * 2);
 | |
|   if (mem == mem2)
 | |
|     fprintf(stderr, "* Performed system realloc without copy :-)\n");
 | |
|   TEST_ASSERT(mem2[0] = 'a' && mem2[FIO_MEMORY_BLOCK_SIZE - 1] == 'z',
 | |
|               "Reaclloc data was lost!");
 | |
|   sys_free(mem2, FIO_MEMORY_BLOCK_SIZE * 2);
 | |
|   fprintf(stderr, "=== Testing facil.io memory allocator's internal data.\n");
 | |
|   TEST_ASSERT(arenas, "Missing arena data - library not initialized!");
 | |
|   fio_free(NULL); /* fio_free(NULL) shouldn't crash... */
 | |
|   mem = fio_malloc(1);
 | |
|   TEST_ASSERT(mem, "fio_malloc failed to allocate memory!\n");
 | |
|   TEST_ASSERT(!((uintptr_t)mem & 15), "fio_malloc memory not aligned!\n");
 | |
|   TEST_ASSERT(((uintptr_t)mem & FIO_MEMORY_BLOCK_MASK) != 16,
 | |
|               "small fio_malloc memory indicates system allocation!\n");
 | |
|   mem[0] = 'a';
 | |
|   TEST_ASSERT(mem[0] == 'a', "allocate memory wasn't written to!\n");
 | |
|   mem = fio_realloc(mem, 1);
 | |
|   TEST_ASSERT(mem[0] == 'a', "fio_realloc memory wasn't copied!\n");
 | |
|   TEST_ASSERT(arena_last_used, "arena_last_used wasn't initialized!\n");
 | |
|   block_s *b = arena_last_used->block;
 | |
|   size_t count = 2;
 | |
|   intptr_t old_memory_pool_count = memory.count;
 | |
|   do {
 | |
|     TEST_ASSERT(mem, "fio_malloc failed to allocate memory!\n");
 | |
|     TEST_ASSERT(!((uintptr_t)mem & 15),
 | |
|                 "fio_malloc memory not aligned at allocation #%zu!\n", count);
 | |
|     TEST_ASSERT((((uintptr_t)mem & FIO_MEMORY_BLOCK_MASK) != 16),
 | |
|                 "fio_malloc memory indicates system allocation!\n");
 | |
| #if __x86_64__
 | |
|     fio_memcpy((size_t *)mem, (size_t *)"0123456789abcdefg", 1);
 | |
| #else
 | |
|     mem[0] = 'a';
 | |
| #endif
 | |
|     fio_free(mem); /* make sure we hold on to the block, so it rotates */
 | |
|     mem = fio_malloc(1);
 | |
|     ++count;
 | |
|   } while (arena_last_used->block == b);
 | |
|   {
 | |
|     fprintf(
 | |
|         stderr,
 | |
|         "* Performed %zu allocations out of expected %zu allocations per "
 | |
|         "block.\n",
 | |
|         count,
 | |
|         (size_t)((FIO_MEMORY_BLOCK_SLICES - 2) - (sizeof(block_s) >> 4) - 1));
 | |
|     TEST_ASSERT(memory.available,
 | |
|                 "memory pool empty (memory block wasn't freed)!\n");
 | |
|     TEST_ASSERT(old_memory_pool_count == memory.count,
 | |
|                 "memory.count == %ld (memory block not counted)!\n",
 | |
|                 (long)old_memory_pool_count);
 | |
|     fio_free(mem);
 | |
|   }
 | |
|   /* rotate block again */
 | |
|   b = arena_last_used->block;
 | |
|   mem = fio_realloc(mem, 1);
 | |
|   do {
 | |
|     mem2 = mem;
 | |
|     mem = fio_malloc(1);
 | |
|     fio_free(mem2); /* make sure we hold on to the block, so it rotates */
 | |
|     TEST_ASSERT(mem, "fio_malloc failed to allocate memory!\n");
 | |
|     TEST_ASSERT(!((uintptr_t)mem & 15),
 | |
|                 "fio_malloc memory not aligned at allocation #%zu!\n", count);
 | |
|     TEST_ASSERT((((uintptr_t)mem & FIO_MEMORY_BLOCK_MASK) != 16),
 | |
|                 "fio_malloc memory indicates system allocation!\n");
 | |
| #if __x86_64__
 | |
|     fio_memcpy((size_t *)mem, (size_t *)"0123456789abcdefg", 1);
 | |
| #else
 | |
|     mem[0] = 'a';
 | |
| #endif
 | |
|     ++count;
 | |
|   } while (arena_last_used->block == b);
 | |
| 
 | |
|   mem = fio_calloc(FIO_MEMORY_BLOCK_ALLOC_LIMIT - 64, 1);
 | |
|   TEST_ASSERT(mem,
 | |
|               "failed to allocate FIO_MEMORY_BLOCK_ALLOC_LIMIT - 64 bytes!\n");
 | |
|   TEST_ASSERT(((uintptr_t)mem & FIO_MEMORY_BLOCK_MASK) != 16,
 | |
|               "fio_calloc (under limit) memory alignment error!\n");
 | |
|   mem2 = fio_malloc(1);
 | |
|   TEST_ASSERT(mem2, "fio_malloc(1) failed to allocate memory!\n");
 | |
|   mem2[0] = 'a';
 | |
| 
 | |
|   for (uintptr_t i = 0; i < (FIO_MEMORY_BLOCK_ALLOC_LIMIT - 64); ++i) {
 | |
|     TEST_ASSERT(mem[i] == 0,
 | |
|                 "calloc returned memory that wasn't initialized?!\n");
 | |
|   }
 | |
|   fio_free(mem);
 | |
| 
 | |
|   mem = fio_malloc(FIO_MEMORY_BLOCK_SIZE);
 | |
|   TEST_ASSERT(mem, "fio_malloc failed to FIO_MEMORY_BLOCK_SIZE bytes!\n");
 | |
|   TEST_ASSERT(((uintptr_t)mem & FIO_MEMORY_BLOCK_MASK) == 16,
 | |
|               "fio_malloc (big) memory isn't aligned!\n");
 | |
|   mem = fio_realloc(mem, FIO_MEMORY_BLOCK_SIZE * 2);
 | |
|   TEST_ASSERT(mem,
 | |
|               "fio_realloc (big) failed on FIO_MEMORY_BLOCK_SIZE X2 bytes!\n");
 | |
|   fio_free(mem);
 | |
|   TEST_ASSERT(((uintptr_t)mem & FIO_MEMORY_BLOCK_MASK) == 16,
 | |
|               "fio_realloc (big) memory isn't aligned!\n");
 | |
| 
 | |
|   {
 | |
|     void *m0 = fio_malloc(0);
 | |
|     void *rm0 = fio_realloc(m0, 16);
 | |
|     TEST_ASSERT(m0 != rm0, "fio_realloc(fio_malloc(0), 16) failed!\n");
 | |
|   }
 | |
| 
 | |
|   fprintf(stderr, "* passed.\n");
 | |
| }
 | |
| 
 | |
| #else
 | |
| 
 | |
| void fio_malloc_test(void) {}
 | |
| 
 | |
| #endif
 | 
