std.heap: rework c_allocator

The main goal here was to avoid allocating padding and header space if
`malloc` already guarantees the alignment we need via `max_align_t`.
Previously, the compiler was using `std.heap.raw_c_allocator` as its GPA
in some cases depending on `std.c.max_align_t`, but that's pretty
fragile (it meant we had to encode our alignment requirements into
`src/main.zig`!). Perhaps more importantly, that solution is
unnecessarily restrictive: since Zig's `Allocator` API passes the
`Alignment` not only to `alloc`, but also to `free` etc, we are able to
use a different strategy depending on its value. So `c_allocator` can
simply compare the requested align to `Alignment.of(std.c.max_align_t)`,
and use a raw `malloc` call (no header needed!) if it will guarantee a
suitable alignment (which, in practice, will be true the vast majority
of the time).

So in short, this makes `std.heap.c_allocator` more memory efficient,
and probably removes any incentive to use `std.heap.raw_c_allocator`.

I also refactored the `c_allocator` implementation while doing this,
just to neaten things up a little.
This commit is contained in:
Matthew Lugg 2025-12-04 12:11:09 +00:00 committed by mlugg
parent ea94ac52c5
commit 4ce7b57e86
2 changed files with 159 additions and 105 deletions

View file

@ -141,7 +141,19 @@ test defaultQueryPageSize {
assert(std.math.isPowerOfTwo(defaultQueryPageSize())); assert(std.math.isPowerOfTwo(defaultQueryPageSize()));
} }
const CAllocator = struct { /// A wrapper around the C memory allocation API which supports the full `Allocator`
/// interface, including arbitrary alignment. Simple `malloc` calls are used when
/// possible, but large requested alignments may require larger buffers in order to
/// satisfy the request. As well as `malloc`, `realloc`, and `free`, the extension
/// functions `malloc_usable_size` and `posix_memalign` are used when available.
///
/// For an allocator that directly calls `malloc`/`realloc`/`free`, with no padding
/// or special handling, see `raw_c_allocator`.
pub const c_allocator: Allocator = .{
.ptr = undefined,
.vtable = &c_allocator_impl.vtable,
};
const c_allocator_impl = struct {
comptime { comptime {
if (!builtin.link_libc) { if (!builtin.link_libc) {
@compileError("C allocator is only available when linking against libc"); @compileError("C allocator is only available when linking against libc");
@ -155,67 +167,55 @@ const CAllocator = struct {
.free = free, .free = free,
}; };
pub const supports_malloc_size = @TypeOf(malloc_size) != void; const have_posix_memalign = switch (builtin.os.tag) {
pub const malloc_size = if (@TypeOf(c.malloc_size) != void) .dragonfly,
c.malloc_size .netbsd,
else if (@TypeOf(c.malloc_usable_size) != void) .freebsd,
c.malloc_usable_size .illumos,
else if (@TypeOf(c._msize) != void) .openbsd,
c._msize .linux,
else {}; .driverkit,
.ios,
pub const supports_posix_memalign = switch (builtin.os.tag) { .maccatalyst,
.dragonfly, .netbsd, .freebsd, .illumos, .openbsd, .linux, .driverkit, .ios, .maccatalyst, .macos, .tvos, .visionos, .watchos, .serenity => true, .macos,
.tvos,
.visionos,
.watchos,
.serenity,
=> true,
else => false, else => false,
}; };
fn getHeader(ptr: [*]u8) *[*]u8 { fn allocStrat(need_align: Alignment) union(enum) {
return @ptrCast(@alignCast(ptr - @sizeOf(usize))); raw,
posix_memalign: if (have_posix_memalign) void else noreturn,
manual_align: if (have_posix_memalign) noreturn else void,
} {
// If `malloc` guarantees `need_align`, always prefer a raw allocation.
if (Alignment.compare(need_align, .lte, .of(c.max_align_t))) {
return .raw;
}
// Use `posix_memalign` if available. Otherwise, we must manually align the allocation.
return if (have_posix_memalign) .posix_memalign else .manual_align;
} }
fn alignedAlloc(len: usize, alignment: Alignment) ?[*]u8 { /// If `allocStrat(a) == .manual_align`, an allocation looks like this:
const alignment_bytes = alignment.toByteUnits(); ///
if (supports_posix_memalign) { /// unaligned_ptr hdr_ptr aligned_ptr
// The posix_memalign only accepts alignment values that are a /// v v v
// multiple of the pointer size /// +---------------+--------+--------------+
const effective_alignment = @max(alignment_bytes, @sizeOf(usize)); /// | padding | header | usable bytes |
/// +---------------+--------+--------------+
var aligned_ptr: ?*anyopaque = undefined; ///
if (c.posix_memalign(&aligned_ptr, effective_alignment, len) != 0) /// * `unaligned_ptr` is the raw return value of `malloc`.
return null; /// * `aligned_ptr` is computed by aligning `unaligned_ptr` forward; it is what `alloc` returns.
/// * `hdr_ptr` points to a pointer-sized header directly before the usable space. This header
return @ptrCast(aligned_ptr); /// contains the value `unaligned_ptr`, so that we can pass it to `free` later. This is
} /// necessary because the width of the padding is unknown.
///
// Thin wrapper around regular malloc, overallocate to account for /// This function accepts `aligned_ptr` and offsets it backwards to return `hdr_ptr`.
// alignment padding and store the original malloc()'ed pointer before fn manualAlignHeader(aligned_ptr: [*]u8) *[*]u8 {
// the aligned address. return @ptrCast(@alignCast(aligned_ptr - @sizeOf(usize)));
const unaligned_ptr = @as([*]u8, @ptrCast(c.malloc(len + alignment_bytes - 1 + @sizeOf(usize)) orelse return null));
const unaligned_addr = @intFromPtr(unaligned_ptr);
const aligned_addr = mem.alignForward(usize, unaligned_addr + @sizeOf(usize), alignment_bytes);
const aligned_ptr = unaligned_ptr + (aligned_addr - unaligned_addr);
getHeader(aligned_ptr).* = unaligned_ptr;
return aligned_ptr;
}
fn alignedFree(ptr: [*]u8) void {
if (supports_posix_memalign) {
return c.free(ptr);
}
const unaligned_ptr = getHeader(ptr).*;
c.free(unaligned_ptr);
}
fn alignedAllocSize(ptr: [*]u8) usize {
if (supports_posix_memalign) {
return CAllocator.malloc_size(ptr);
}
const unaligned_ptr = getHeader(ptr).*;
const delta = @intFromPtr(ptr) - @intFromPtr(unaligned_ptr);
return CAllocator.malloc_size(unaligned_ptr) - delta;
} }
fn alloc( fn alloc(
@ -226,67 +226,120 @@ const CAllocator = struct {
) ?[*]u8 { ) ?[*]u8 {
_ = return_address; _ = return_address;
assert(len > 0); assert(len > 0);
return alignedAlloc(len, alignment); switch (allocStrat(alignment)) {
.raw => {
// C only needs to respect `max_align_t` up to the allocation size due to object
// alignment rules. If necessary, extend the allocation size.
const actual_len = @max(len, @alignOf(std.c.max_align_t));
const ptr = c.malloc(actual_len) orelse return null;
assert(alignment.check(@intFromPtr(ptr)));
return @ptrCast(ptr);
},
.posix_memalign => {
// The posix_memalign only accepts alignment values that are a
// multiple of the pointer size
const effective_alignment = @max(alignment.toByteUnits(), @sizeOf(usize));
var aligned_ptr: ?*anyopaque = undefined;
if (c.posix_memalign(&aligned_ptr, effective_alignment, len) != 0) {
return null;
}
assert(alignment.check(@intFromPtr(aligned_ptr)));
return @ptrCast(aligned_ptr);
},
.manual_align => {
// Overallocate to account for alignment padding and store the original pointer
// returned by `malloc` before the aligned address.
const padded_len = len + @sizeOf(usize) + alignment.toByteUnits() - 1;
const unaligned_ptr: [*]u8 = @ptrCast(c.malloc(padded_len) orelse return null);
const unaligned_addr = @intFromPtr(unaligned_ptr);
const aligned_addr = alignment.forward(unaligned_addr + @sizeOf(usize));
const aligned_ptr = unaligned_ptr + (aligned_addr - unaligned_addr);
manualAlignHeader(aligned_ptr).* = unaligned_ptr;
return aligned_ptr;
},
}
} }
fn resize( fn resize(
_: *anyopaque, _: *anyopaque,
buf: []u8, memory: []u8,
alignment: Alignment, alignment: Alignment,
new_len: usize, new_len: usize,
return_address: usize, return_address: usize,
) bool { ) bool {
_ = alignment;
_ = return_address; _ = return_address;
if (new_len <= buf.len) { assert(new_len > 0);
return true; if (new_len <= memory.len) {
return true; // in-place shrink always works
} }
if (CAllocator.supports_malloc_size) { const mallocSize = func: {
const full_len = alignedAllocSize(buf.ptr); if (@TypeOf(c.malloc_size) != void) break :func c.malloc_size;
if (new_len <= full_len) { if (@TypeOf(c.malloc_usable_size) != void) break :func c.malloc_usable_size;
return true; if (@TypeOf(c._msize) != void) break :func c._msize;
} return false; // we don't know how much space is actually available
} };
return false; const usable_len: usize = switch (allocStrat(alignment)) {
.raw, .posix_memalign => mallocSize(memory.ptr),
.manual_align => usable_len: {
const unaligned_ptr = manualAlignHeader(memory.ptr).*;
const full_len = mallocSize(unaligned_ptr);
const padding = @intFromPtr(memory.ptr) - @intFromPtr(unaligned_ptr);
break :usable_len full_len - padding;
},
};
return new_len <= usable_len;
} }
fn remap( fn remap(
context: *anyopaque, ctx: *anyopaque,
memory: []u8, memory: []u8,
alignment: Alignment, alignment: Alignment,
new_len: usize, new_len: usize,
return_address: usize, return_address: usize,
) ?[*]u8 { ) ?[*]u8 {
// realloc would potentially return a new allocation that does not assert(new_len > 0);
// respect the original alignment. // Prefer resizing in-place if possible, since `realloc` could be expensive even if legal.
return if (resize(context, memory, alignment, new_len, return_address)) memory.ptr else null; if (resize(ctx, memory, alignment, new_len, return_address)) {
return memory.ptr;
}
switch (allocStrat(alignment)) {
.raw => {
// `malloc` and friends guarantee the required alignment, so we can try `realloc`.
// C only needs to respect `max_align_t` up to the allocation size due to object
// alignment rules. If necessary, extend the allocation size.
const actual_len = @max(new_len, @alignOf(std.c.max_align_t));
const new_ptr = c.realloc(memory.ptr, actual_len) orelse return null;
assert(alignment.check(@intFromPtr(new_ptr)));
return @ptrCast(new_ptr);
},
.posix_memalign, .manual_align => {
// `realloc` would potentially return a new allocation which does not respect
// the original alignment, so we can't do anything more.
return null;
},
}
} }
fn free( fn free(
_: *anyopaque, _: *anyopaque,
buf: []u8, memory: []u8,
alignment: Alignment, alignment: Alignment,
return_address: usize, return_address: usize,
) void { ) void {
_ = alignment;
_ = return_address; _ = return_address;
alignedFree(buf.ptr); switch (allocStrat(alignment)) {
.raw, .posix_memalign => c.free(memory.ptr),
.manual_align => c.free(manualAlignHeader(memory.ptr).*),
}
} }
}; };
/// Supports the full Allocator interface, including alignment, and exploiting /// Asserts that allocations have alignments which `malloc` can satisfy. This means that
/// `malloc_usable_size` if available. For an allocator that directly calls /// the requested alignment is no greater than `@min(@alignOf(std.c.max_align_t), size)`.
/// `malloc`/`free`, see `raw_c_allocator`. ///
pub const c_allocator: Allocator = .{ /// This allocator is rarely appropriate to use. In general, prefer `c_allocator`, which
.ptr = undefined, /// does not have any special requirements of its input, but is still highly efficient for
.vtable = &CAllocator.vtable, /// allocation requests which obey `malloc` alignment rules.
};
/// Asserts allocations are within `@alignOf(std.c.max_align_t)` and directly
/// calls `malloc`/`free`. Does not attempt to utilize `malloc_usable_size`.
/// This allocator is safe to use as the backing allocator with
/// `ArenaAllocator` for example and is more optimal in such a case than
/// `c_allocator`.
pub const raw_c_allocator: Allocator = .{ pub const raw_c_allocator: Allocator = .{
.ptr = undefined, .ptr = undefined,
.vtable = &raw_c_allocator_vtable, .vtable = &raw_c_allocator_vtable,
@ -306,13 +359,20 @@ fn rawCAlloc(
) ?[*]u8 { ) ?[*]u8 {
_ = context; _ = context;
_ = return_address; _ = return_address;
assert(alignment.compare(.lte, .of(std.c.max_align_t))); // `std.c.max_align_t` isn't the whole story, because if `len` is smaller than
// Note that this pointer cannot be aligncasted to max_align_t because if // every C type with alignment `max_align_t`, the allocation can be less-aligned.
// len is < max_align_t then the alignment can be smaller. For example, if // The implementation need only guarantee that any type of length `len` would be
// max_align_t is 16, but the user requests 8 bytes, there is no built-in // suitably aligned.
// type in C that is size 8 and has 16 byte alignment, so the alignment may //
// be 8 bytes rather than 16. Similarly if only 1 byte is requested, malloc // For instance, if `len == 8` and `alignment == .@"16"`, then `malloc` may not
// is allowed to return a 1-byte aligned pointer. // fulfil this request, because there is necessarily no C type with 8-byte size
// but 16-byte alignment.
//
// In theory, the resulting rule here would be target-specific, but in practice,
// the smallest type with an alignment of `max_align_t` has the same size (it's
// usually `c_longdouble`), so we can just check that `alignment <= len`.
assert(alignment.toByteUnits() <= len);
assert(Alignment.compare(alignment, .lte, .of(std.c.max_align_t)));
return @ptrCast(c.malloc(len)); return @ptrCast(c.malloc(len));
} }
@ -339,8 +399,9 @@ fn rawCRemap(
return_address: usize, return_address: usize,
) ?[*]u8 { ) ?[*]u8 {
_ = context; _ = context;
_ = alignment;
_ = return_address; _ = return_address;
// See `rawCMalloc` for an explanation of this `assert` call.
assert(alignment.toByteUnits() <= new_len);
return @ptrCast(c.realloc(memory.ptr, new_len)); return @ptrCast(c.realloc(memory.ptr, new_len));
} }

View file

@ -167,14 +167,7 @@ pub fn main() anyerror!void {
const gpa, const is_debug = gpa: { const gpa, const is_debug = gpa: {
if (build_options.debug_gpa) break :gpa .{ debug_allocator.allocator(), true }; if (build_options.debug_gpa) break :gpa .{ debug_allocator.allocator(), true };
if (native_os == .wasi) break :gpa .{ std.heap.wasm_allocator, false }; if (native_os == .wasi) break :gpa .{ std.heap.wasm_allocator, false };
if (builtin.link_libc) { if (builtin.link_libc) break :gpa .{ std.heap.c_allocator, false };
// We would prefer to use raw libc allocator here, but cannot use
// it if it won't support the alignment we need.
if (@alignOf(std.c.max_align_t) < @max(@alignOf(i128), std.atomic.cache_line)) {
break :gpa .{ std.heap.c_allocator, false };
}
break :gpa .{ std.heap.raw_c_allocator, false };
}
break :gpa switch (builtin.mode) { break :gpa switch (builtin.mode) {
.Debug, .ReleaseSafe => .{ debug_allocator.allocator(), true }, .Debug, .ReleaseSafe => .{ debug_allocator.allocator(), true },
.ReleaseFast, .ReleaseSmall => .{ std.heap.smp_allocator, false }, .ReleaseFast, .ReleaseSmall => .{ std.heap.smp_allocator, false },