mirror of
https://codeberg.org/ziglang/zig.git
synced 2025-12-06 13:54:21 +00:00
add std.heap.SmpAllocator
An allocator intended to be used in -OReleaseFast mode when multi-threading is enabled.
This commit is contained in:
parent
6a6e72fff8
commit
51c4ffa410
5 changed files with 317 additions and 20 deletions
|
|
@ -9,11 +9,12 @@ const Allocator = std.mem.Allocator;
|
||||||
const windows = std.os.windows;
|
const windows = std.os.windows;
|
||||||
|
|
||||||
pub const ArenaAllocator = @import("heap/arena_allocator.zig").ArenaAllocator;
|
pub const ArenaAllocator = @import("heap/arena_allocator.zig").ArenaAllocator;
|
||||||
pub const WasmAllocator = @import("heap/WasmAllocator.zig");
|
pub const SmpAllocator = @import("heap/SmpAllocator.zig");
|
||||||
pub const PageAllocator = @import("heap/PageAllocator.zig");
|
|
||||||
pub const ThreadSafeAllocator = @import("heap/ThreadSafeAllocator.zig");
|
|
||||||
pub const SbrkAllocator = @import("heap/sbrk_allocator.zig").SbrkAllocator;
|
|
||||||
pub const FixedBufferAllocator = @import("heap/FixedBufferAllocator.zig");
|
pub const FixedBufferAllocator = @import("heap/FixedBufferAllocator.zig");
|
||||||
|
pub const PageAllocator = @import("heap/PageAllocator.zig");
|
||||||
|
pub const SbrkAllocator = @import("heap/sbrk_allocator.zig").SbrkAllocator;
|
||||||
|
pub const ThreadSafeAllocator = @import("heap/ThreadSafeAllocator.zig");
|
||||||
|
pub const WasmAllocator = @import("heap/WasmAllocator.zig");
|
||||||
|
|
||||||
pub const DebugAllocatorConfig = @import("heap/debug_allocator.zig").Config;
|
pub const DebugAllocatorConfig = @import("heap/debug_allocator.zig").Config;
|
||||||
pub const DebugAllocator = @import("heap/debug_allocator.zig").DebugAllocator;
|
pub const DebugAllocator = @import("heap/debug_allocator.zig").DebugAllocator;
|
||||||
|
|
@ -358,6 +359,11 @@ else if (builtin.target.isWasm()) .{
|
||||||
.vtable = &PageAllocator.vtable,
|
.vtable = &PageAllocator.vtable,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
pub const smp_allocator: Allocator = .{
|
||||||
|
.ptr = undefined,
|
||||||
|
.vtable = &SmpAllocator.vtable,
|
||||||
|
};
|
||||||
|
|
||||||
/// This allocator is fast, small, and specific to WebAssembly. In the future,
|
/// This allocator is fast, small, and specific to WebAssembly. In the future,
|
||||||
/// this will be the implementation automatically selected by
|
/// this will be the implementation automatically selected by
|
||||||
/// `GeneralPurposeAllocator` when compiling in `ReleaseSmall` mode for wasm32
|
/// `GeneralPurposeAllocator` when compiling in `ReleaseSmall` mode for wasm32
|
||||||
|
|
@ -978,4 +984,5 @@ test {
|
||||||
if (builtin.target.isWasm()) {
|
if (builtin.target.isWasm()) {
|
||||||
_ = WasmAllocator;
|
_ = WasmAllocator;
|
||||||
}
|
}
|
||||||
|
if (!builtin.single_threaded) _ = smp_allocator;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -16,11 +16,7 @@ pub const vtable: Allocator.VTable = .{
|
||||||
.free = free,
|
.free = free,
|
||||||
};
|
};
|
||||||
|
|
||||||
fn alloc(context: *anyopaque, n: usize, alignment: mem.Alignment, ra: usize) ?[*]u8 {
|
pub fn map(n: usize, alignment: mem.Alignment) ?[*]u8 {
|
||||||
_ = context;
|
|
||||||
_ = ra;
|
|
||||||
assert(n > 0);
|
|
||||||
|
|
||||||
const page_size = std.heap.pageSize();
|
const page_size = std.heap.pageSize();
|
||||||
if (n >= maxInt(usize) - page_size) return null;
|
if (n >= maxInt(usize) - page_size) return null;
|
||||||
const alignment_bytes = alignment.toByteUnits();
|
const alignment_bytes = alignment.toByteUnits();
|
||||||
|
|
@ -101,6 +97,13 @@ fn alloc(context: *anyopaque, n: usize, alignment: mem.Alignment, ra: usize) ?[*
|
||||||
return result_ptr;
|
return result_ptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn alloc(context: *anyopaque, n: usize, alignment: mem.Alignment, ra: usize) ?[*]u8 {
|
||||||
|
_ = context;
|
||||||
|
_ = ra;
|
||||||
|
assert(n > 0);
|
||||||
|
return map(n, alignment);
|
||||||
|
}
|
||||||
|
|
||||||
fn resize(
|
fn resize(
|
||||||
context: *anyopaque,
|
context: *anyopaque,
|
||||||
memory: []u8,
|
memory: []u8,
|
||||||
|
|
@ -114,7 +117,7 @@ fn resize(
|
||||||
return realloc(memory, new_len, false) != null;
|
return realloc(memory, new_len, false) != null;
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn remap(
|
fn remap(
|
||||||
context: *anyopaque,
|
context: *anyopaque,
|
||||||
memory: []u8,
|
memory: []u8,
|
||||||
alignment: mem.Alignment,
|
alignment: mem.Alignment,
|
||||||
|
|
@ -127,21 +130,24 @@ pub fn remap(
|
||||||
return realloc(memory, new_len, true);
|
return realloc(memory, new_len, true);
|
||||||
}
|
}
|
||||||
|
|
||||||
fn free(context: *anyopaque, slice: []u8, alignment: mem.Alignment, return_address: usize) void {
|
fn free(context: *anyopaque, memory: []u8, alignment: mem.Alignment, return_address: usize) void {
|
||||||
_ = context;
|
_ = context;
|
||||||
_ = alignment;
|
_ = alignment;
|
||||||
_ = return_address;
|
_ = return_address;
|
||||||
|
return unmap(@alignCast(memory));
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn unmap(memory: []align(page_size_min) u8) void {
|
||||||
if (native_os == .windows) {
|
if (native_os == .windows) {
|
||||||
windows.VirtualFree(slice.ptr, 0, windows.MEM_RELEASE);
|
windows.VirtualFree(memory.ptr, 0, windows.MEM_RELEASE);
|
||||||
} else {
|
} else {
|
||||||
const buf_aligned_len = mem.alignForward(usize, slice.len, std.heap.pageSize());
|
const page_aligned_len = mem.alignForward(usize, memory.len, std.heap.pageSize());
|
||||||
posix.munmap(@alignCast(slice.ptr[0..buf_aligned_len]));
|
posix.munmap(memory.ptr[0..page_aligned_len]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn realloc(uncasted_memory: []u8, new_len: usize, may_move: bool) ?[*]u8 {
|
pub fn realloc(uncasted_memory: []u8, new_len: usize, may_move: bool) ?[*]u8 {
|
||||||
const memory: []align(std.heap.page_size_min) u8 = @alignCast(uncasted_memory);
|
const memory: []align(page_size_min) u8 = @alignCast(uncasted_memory);
|
||||||
const page_size = std.heap.pageSize();
|
const page_size = std.heap.pageSize();
|
||||||
const new_size_aligned = mem.alignForward(usize, new_len, page_size);
|
const new_size_aligned = mem.alignForward(usize, new_len, page_size);
|
||||||
|
|
||||||
|
|
|
||||||
288
lib/std/heap/SmpAllocator.zig
Normal file
288
lib/std/heap/SmpAllocator.zig
Normal file
|
|
@ -0,0 +1,288 @@
|
||||||
|
//! An allocator that is designed for ReleaseFast optimization mode, with
|
||||||
|
//! multi-threading enabled.
|
||||||
|
//!
|
||||||
|
//! This allocator is a singleton; it uses global state and only one should be
|
||||||
|
//! instantiated for the entire process.
|
||||||
|
//!
|
||||||
|
//! ## Basic Design
|
||||||
|
//!
|
||||||
|
//! Avoid locking the global mutex as much as possible.
|
||||||
|
//!
|
||||||
|
//! Each thread gets a separate freelist, however, the data must be recoverable
|
||||||
|
//! when the thread exits. We do not directly learn when a thread exits, so
|
||||||
|
//! occasionally, one thread must attempt to reclaim another thread's
|
||||||
|
//! resources.
|
||||||
|
//!
|
||||||
|
//! Above a certain size, those allocations are memory mapped directly, with no
|
||||||
|
//! storage of allocation metadata. This works because the implementation
|
||||||
|
//! refuses resizes that would move an allocation from small category to large
|
||||||
|
//! category or vice versa.
|
||||||
|
//!
|
||||||
|
//! Each allocator operation checks the thread identifier from a threadlocal
|
||||||
|
//! variable to find out which metadata in the global state to access, and
|
||||||
|
//! attempts to grab its lock. This will usually succeed without contention,
|
||||||
|
//! unless another thread has been assigned the same id. In the case of such
|
||||||
|
//! contention, the thread moves on to the next thread metadata slot and
|
||||||
|
//! repeats the process of attempting to obtain the lock.
|
||||||
|
//!
|
||||||
|
//! By limiting the thread-local metadata array to the same number as the CPU
|
||||||
|
//! count, ensures that as threads are created and destroyed, they cycle
|
||||||
|
//! through the full set of freelists.
|
||||||
|
|
||||||
|
const builtin = @import("builtin");
|
||||||
|
const native_os = builtin.os.tag;
|
||||||
|
|
||||||
|
const std = @import("../std.zig");
|
||||||
|
const assert = std.debug.assert;
|
||||||
|
const mem = std.mem;
|
||||||
|
const math = std.math;
|
||||||
|
const Allocator = std.mem.Allocator;
|
||||||
|
const SmpAllocator = @This();
|
||||||
|
const PageAllocator = std.heap.PageAllocator;
|
||||||
|
|
||||||
|
/// Protects the state in this struct (global state), except for `threads`
|
||||||
|
/// which each have their own mutex.
|
||||||
|
mutex: std.Thread.Mutex,
|
||||||
|
next_thread_index: u32,
|
||||||
|
cpu_count: u32,
|
||||||
|
threads: [max_thread_count]Thread,
|
||||||
|
|
||||||
|
var global: SmpAllocator = .{
|
||||||
|
.mutex = .{},
|
||||||
|
.next_thread_index = 0,
|
||||||
|
.threads = @splat(.{}),
|
||||||
|
.cpu_count = 0,
|
||||||
|
};
|
||||||
|
threadlocal var thread_id: Thread.Id = .none;
|
||||||
|
|
||||||
|
const max_thread_count = 128;
|
||||||
|
const slab_len: usize = @max(std.heap.page_size_max, switch (builtin.os.tag) {
|
||||||
|
.windows => 64 * 1024, // Makes `std.heap.PageAllocator` take the happy path.
|
||||||
|
.wasi => 64 * 1024, // Max alignment supported by `std.heap.WasmAllocator`.
|
||||||
|
else => 256 * 1024, // Avoids too many active mappings when `page_size_max` is low.
|
||||||
|
});
|
||||||
|
/// Because of storing free list pointers, the minimum size class is 3.
|
||||||
|
const min_class = math.log2(math.ceilPowerOfTwoAssert(usize, 1 + @sizeOf(usize)));
|
||||||
|
const size_class_count = math.log2(slab_len) - min_class;
|
||||||
|
|
||||||
|
const Thread = struct {
|
||||||
|
/// Avoid false sharing.
|
||||||
|
_: void align(std.atomic.cache_line) = {},
|
||||||
|
|
||||||
|
/// Protects the state in this struct (per-thread state).
|
||||||
|
///
|
||||||
|
/// Threads lock this before accessing their own state in order
|
||||||
|
/// to support freelist reclamation.
|
||||||
|
mutex: std.Thread.Mutex = .{},
|
||||||
|
|
||||||
|
next_addrs: [size_class_count]usize = @splat(0),
|
||||||
|
/// For each size class, points to the freed pointer.
|
||||||
|
frees: [size_class_count]usize = @splat(0),
|
||||||
|
|
||||||
|
/// Index into `SmpAllocator.threads`.
|
||||||
|
const Id = enum(usize) {
|
||||||
|
none = 0,
|
||||||
|
first = 1,
|
||||||
|
_,
|
||||||
|
|
||||||
|
fn fromIndex(index: usize) Id {
|
||||||
|
return @enumFromInt(index + 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
fn toIndex(id: Id) usize {
|
||||||
|
return @intFromEnum(id) - 1;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
fn lock() *Thread {
|
||||||
|
const id = thread_id;
|
||||||
|
if (id != .none) {
|
||||||
|
var index = id.toIndex();
|
||||||
|
{
|
||||||
|
const t = &global.threads[index];
|
||||||
|
if (t.mutex.tryLock()) return t;
|
||||||
|
}
|
||||||
|
const cpu_count = global.cpu_count;
|
||||||
|
assert(cpu_count != 0);
|
||||||
|
while (true) {
|
||||||
|
index = (index + 1) % cpu_count;
|
||||||
|
const t = &global.threads[index];
|
||||||
|
if (t.mutex.tryLock()) {
|
||||||
|
thread_id = .fromIndex(index);
|
||||||
|
return t;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
while (true) {
|
||||||
|
const thread_index = i: {
|
||||||
|
global.mutex.lock();
|
||||||
|
defer global.mutex.unlock();
|
||||||
|
const cpu_count = c: {
|
||||||
|
const cpu_count = global.cpu_count;
|
||||||
|
if (cpu_count == 0) {
|
||||||
|
const n: u32 = @intCast(@max(std.Thread.getCpuCount() catch max_thread_count, max_thread_count));
|
||||||
|
global.cpu_count = n;
|
||||||
|
break :c n;
|
||||||
|
}
|
||||||
|
break :c cpu_count;
|
||||||
|
};
|
||||||
|
const thread_index = global.next_thread_index;
|
||||||
|
global.next_thread_index = @intCast((thread_index + 1) % cpu_count);
|
||||||
|
break :i thread_index;
|
||||||
|
};
|
||||||
|
const t = &global.threads[thread_index];
|
||||||
|
if (t.mutex.tryLock()) {
|
||||||
|
thread_id = .fromIndex(thread_index);
|
||||||
|
return t;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn unlock(t: *Thread) void {
|
||||||
|
t.mutex.unlock();
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
pub const vtable: Allocator.VTable = .{
|
||||||
|
.alloc = alloc,
|
||||||
|
.resize = resize,
|
||||||
|
.remap = remap,
|
||||||
|
.free = free,
|
||||||
|
};
|
||||||
|
|
||||||
|
comptime {
|
||||||
|
assert(!builtin.single_threaded); // you're holding it wrong
|
||||||
|
}
|
||||||
|
|
||||||
|
fn alloc(context: *anyopaque, len: usize, alignment: mem.Alignment, ra: usize) ?[*]u8 {
|
||||||
|
_ = context;
|
||||||
|
_ = ra;
|
||||||
|
const class = sizeClassIndex(len, alignment);
|
||||||
|
if (class >= size_class_count) {
|
||||||
|
@branchHint(.unlikely);
|
||||||
|
return PageAllocator.map(len, alignment);
|
||||||
|
}
|
||||||
|
|
||||||
|
const t = Thread.lock();
|
||||||
|
defer t.unlock();
|
||||||
|
|
||||||
|
const slot_size = slotSize(class);
|
||||||
|
|
||||||
|
const top_free_ptr = t.frees[class];
|
||||||
|
if (top_free_ptr != 0) {
|
||||||
|
const node: *usize = @ptrFromInt(top_free_ptr + (slot_size - @sizeOf(usize)));
|
||||||
|
t.frees[class] = node.*;
|
||||||
|
return @ptrFromInt(top_free_ptr);
|
||||||
|
}
|
||||||
|
|
||||||
|
const next_addr = t.next_addrs[class];
|
||||||
|
if (next_addr % slab_len == 0) {
|
||||||
|
const slab = PageAllocator.map(slab_len, .fromByteUnits(std.heap.pageSize())) orelse return null;
|
||||||
|
t.next_addrs[class] = @intFromPtr(slab) + slot_size;
|
||||||
|
return slab;
|
||||||
|
}
|
||||||
|
|
||||||
|
t.next_addrs[class] = next_addr + slot_size;
|
||||||
|
return @ptrFromInt(next_addr);
|
||||||
|
}
|
||||||
|
|
||||||
|
fn resize(context: *anyopaque, memory: []u8, alignment: mem.Alignment, new_len: usize, ra: usize) bool {
|
||||||
|
_ = context;
|
||||||
|
_ = ra;
|
||||||
|
const class = sizeClassIndex(memory.len, alignment);
|
||||||
|
const new_class = sizeClassIndex(new_len, alignment);
|
||||||
|
if (class >= size_class_count) {
|
||||||
|
if (new_class < size_class_count) return false;
|
||||||
|
return PageAllocator.realloc(memory, new_len, false) != null;
|
||||||
|
}
|
||||||
|
return new_class == class;
|
||||||
|
}
|
||||||
|
|
||||||
|
fn remap(context: *anyopaque, memory: []u8, alignment: mem.Alignment, new_len: usize, ra: usize) ?[*]u8 {
|
||||||
|
_ = context;
|
||||||
|
_ = ra;
|
||||||
|
const class = sizeClassIndex(memory.len, alignment);
|
||||||
|
const new_class = sizeClassIndex(new_len, alignment);
|
||||||
|
if (class >= size_class_count) {
|
||||||
|
if (new_class < size_class_count) return null;
|
||||||
|
return PageAllocator.realloc(memory, new_len, true);
|
||||||
|
}
|
||||||
|
return if (new_class == class) memory.ptr else null;
|
||||||
|
}
|
||||||
|
|
||||||
|
fn free(context: *anyopaque, memory: []u8, alignment: mem.Alignment, ra: usize) void {
|
||||||
|
_ = context;
|
||||||
|
_ = ra;
|
||||||
|
const class = sizeClassIndex(memory.len, alignment);
|
||||||
|
if (class >= size_class_count) {
|
||||||
|
@branchHint(.unlikely);
|
||||||
|
return PageAllocator.unmap(@alignCast(memory));
|
||||||
|
}
|
||||||
|
|
||||||
|
const t = Thread.lock();
|
||||||
|
defer t.unlock();
|
||||||
|
|
||||||
|
const slot_size = slotSize(class);
|
||||||
|
const addr = @intFromPtr(memory.ptr);
|
||||||
|
const node: *usize = @ptrFromInt(addr + (slot_size - @sizeOf(usize)));
|
||||||
|
node.* = t.frees[class];
|
||||||
|
t.frees[class] = addr;
|
||||||
|
}
|
||||||
|
|
||||||
|
fn sizeClassIndex(len: usize, alignment: mem.Alignment) usize {
|
||||||
|
return @max(
|
||||||
|
@bitSizeOf(usize) - @clz(len - 1),
|
||||||
|
@intFromEnum(alignment),
|
||||||
|
min_class,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
fn slotSize(class: usize) usize {
|
||||||
|
const Log2USize = std.math.Log2Int(usize);
|
||||||
|
return @as(usize, 1) << @as(Log2USize, @intCast(class));
|
||||||
|
}
|
||||||
|
|
||||||
|
test "large alloc, resize, remap, free" {
|
||||||
|
const gpa = std.heap.smp_allocator;
|
||||||
|
|
||||||
|
const ptr1 = try gpa.alloc(u64, 42768);
|
||||||
|
const ptr2 = try gpa.alloc(u64, 52768);
|
||||||
|
gpa.free(ptr1);
|
||||||
|
const ptr3 = try gpa.alloc(u64, 62768);
|
||||||
|
gpa.free(ptr3);
|
||||||
|
gpa.free(ptr2);
|
||||||
|
}
|
||||||
|
|
||||||
|
test "small allocations - free in same order" {
|
||||||
|
const gpa = std.heap.smp_allocator;
|
||||||
|
|
||||||
|
var list = std.ArrayList(*u64).init(std.testing.allocator);
|
||||||
|
defer list.deinit();
|
||||||
|
|
||||||
|
var i: usize = 0;
|
||||||
|
while (i < 513) : (i += 1) {
|
||||||
|
const ptr = try gpa.create(u64);
|
||||||
|
try list.append(ptr);
|
||||||
|
}
|
||||||
|
|
||||||
|
for (list.items) |ptr| {
|
||||||
|
gpa.destroy(ptr);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
test "small allocations - free in reverse order" {
|
||||||
|
const gpa = std.heap.smp_allocator;
|
||||||
|
|
||||||
|
var list = std.ArrayList(*u64).init(std.testing.allocator);
|
||||||
|
defer list.deinit();
|
||||||
|
|
||||||
|
var i: usize = 0;
|
||||||
|
while (i < 513) : (i += 1) {
|
||||||
|
const ptr = try gpa.create(u64);
|
||||||
|
try list.append(ptr);
|
||||||
|
}
|
||||||
|
|
||||||
|
while (list.popOrNull()) |ptr| {
|
||||||
|
gpa.destroy(ptr);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -1,5 +1,3 @@
|
||||||
//! This is intended to be merged into GeneralPurposeAllocator at some point.
|
|
||||||
|
|
||||||
const std = @import("../std.zig");
|
const std = @import("../std.zig");
|
||||||
const builtin = @import("builtin");
|
const builtin = @import("builtin");
|
||||||
const Allocator = std.mem.Allocator;
|
const Allocator = std.mem.Allocator;
|
||||||
|
|
|
||||||
|
|
@ -851,8 +851,6 @@ pub fn DebugAllocator(comptime config: Config) type {
|
||||||
self.mutex.lock();
|
self.mutex.lock();
|
||||||
defer self.mutex.unlock();
|
defer self.mutex.unlock();
|
||||||
|
|
||||||
assert(old_memory.len != 0);
|
|
||||||
|
|
||||||
const size_class_index: usize = @max(@bitSizeOf(usize) - @clz(old_memory.len - 1), @intFromEnum(alignment));
|
const size_class_index: usize = @max(@bitSizeOf(usize) - @clz(old_memory.len - 1), @intFromEnum(alignment));
|
||||||
if (size_class_index >= self.buckets.len) {
|
if (size_class_index >= self.buckets.len) {
|
||||||
@branchHint(.unlikely);
|
@branchHint(.unlikely);
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue