std.Io.Threaded: slightly different semantics

while still preserving the guarantee about async() being assigned a unit
of concurrency (or immediately running the task), this change:
* retains the error from calling getCpuCount()
* spawns all threads in detached mode, using WaitGroup to join them
* treats all workers the same regardless of whether they are processing
  concurrent or async tasks. one thread pool does all the work, while
  respecting async and concurrent limits.
This commit is contained in:
Andrew Kelley 2025-11-21 12:02:59 -08:00
parent 13b537d77c
commit cf744aa182
3 changed files with 92 additions and 102 deletions

View file

@ -22,12 +22,30 @@ mutex: std.Thread.Mutex = .{},
cond: std.Thread.Condition = .{}, cond: std.Thread.Condition = .{},
run_queue: std.SinglyLinkedList = .{}, run_queue: std.SinglyLinkedList = .{},
join_requested: bool = false, join_requested: bool = false,
threads: std.ArrayList(std.Thread),
stack_size: usize, stack_size: usize,
cpu_count: usize, // 0 means no limit /// All threads are spawned detached; this is how we wait until they all exit.
concurrency_limit: usize, // 0 means no limit wait_group: std.Thread.WaitGroup = .{},
available_thread_count: usize = 0, /// Maximum thread pool size (excluding main thread) when dispatching async
one_shot_thread_count: usize = 0, /// tasks. Until this limit, calls to `Io.async` when all threads are busy will
/// cause a new thread to be spawned and permanently added to the pool. After
/// this limit, calls to `Io.async` when all threads are busy run the task
/// immediately.
///
/// Defaults to a number equal to logical CPU cores.
async_limit: Io.Limit,
/// Maximum thread pool size (excluding main thread) for dispatching concurrent
/// tasks. Until this limit, calls to `Io.concurrent` will increase the thread
/// pool size.
///
/// concurrent tasks. After this number, calls to `Io.concurrent` return
/// `error.ConcurrencyUnavailable`.
concurrent_limit: Io.Limit = .unlimited,
/// Error from calling `std.Thread.getCpuCount` in `init`.
cpu_count_error: ?std.Thread.CpuCountError,
/// Number of threads that are unavailable to take tasks. To calculate
/// available count, subtract this from either `async_limit` or
/// `concurrent_limit`.
busy_count: usize = 0,
wsa: if (is_windows) Wsa else struct {} = .{}, wsa: if (is_windows) Wsa else struct {} = .{},
@ -103,19 +121,18 @@ pub fn init(
) Threaded { ) Threaded {
if (builtin.single_threaded) return .init_single_threaded; if (builtin.single_threaded) return .init_single_threaded;
const cpu_count = std.Thread.getCpuCount();
var t: Threaded = .{ var t: Threaded = .{
.allocator = gpa, .allocator = gpa,
.threads = .empty,
.stack_size = std.Thread.SpawnConfig.default_stack_size, .stack_size = std.Thread.SpawnConfig.default_stack_size,
.cpu_count = std.Thread.getCpuCount() catch 0, .async_limit = if (cpu_count) |n| .limited(n - 1) else |_| .nothing,
.concurrency_limit = 0, .cpu_count_error = if (cpu_count) |_| null else |e| e,
.old_sig_io = undefined, .old_sig_io = undefined,
.old_sig_pipe = undefined, .old_sig_pipe = undefined,
.have_signal_handler = false, .have_signal_handler = false,
}; };
t.threads.ensureTotalCapacity(gpa, t.cpu_count) catch {};
if (posix.Sigaction != void) { if (posix.Sigaction != void) {
// This causes sending `posix.SIG.IO` to thread to interrupt blocking // This causes sending `posix.SIG.IO` to thread to interrupt blocking
// syscalls, returning `posix.E.INTR`. // syscalls, returning `posix.E.INTR`.
@ -140,19 +157,17 @@ pub fn init(
/// * `deinit` is safe, but unnecessary to call. /// * `deinit` is safe, but unnecessary to call.
pub const init_single_threaded: Threaded = .{ pub const init_single_threaded: Threaded = .{
.allocator = .failing, .allocator = .failing,
.threads = .empty,
.stack_size = std.Thread.SpawnConfig.default_stack_size, .stack_size = std.Thread.SpawnConfig.default_stack_size,
.cpu_count = 1, .async_limit = .nothing,
.concurrency_limit = 0, .cpu_count_error = null,
.concurrent_limit = .nothing,
.old_sig_io = undefined, .old_sig_io = undefined,
.old_sig_pipe = undefined, .old_sig_pipe = undefined,
.have_signal_handler = false, .have_signal_handler = false,
}; };
pub fn deinit(t: *Threaded) void { pub fn deinit(t: *Threaded) void {
const gpa = t.allocator;
t.join(); t.join();
t.threads.deinit(gpa);
if (is_windows and t.wsa.status == .initialized) { if (is_windows and t.wsa.status == .initialized) {
if (ws2_32.WSACleanup() != 0) recoverableOsBugDetected(); if (ws2_32.WSACleanup() != 0) recoverableOsBugDetected();
} }
@ -171,10 +186,12 @@ fn join(t: *Threaded) void {
t.join_requested = true; t.join_requested = true;
} }
t.cond.broadcast(); t.cond.broadcast();
for (t.threads.items) |thread| thread.join(); t.wait_group.wait();
} }
fn worker(t: *Threaded) void { fn worker(t: *Threaded) void {
defer t.wait_group.finish();
t.mutex.lock(); t.mutex.lock();
defer t.mutex.unlock(); defer t.mutex.unlock();
@ -184,20 +201,13 @@ fn worker(t: *Threaded) void {
const closure: *Closure = @fieldParentPtr("node", closure_node); const closure: *Closure = @fieldParentPtr("node", closure_node);
closure.start(closure); closure.start(closure);
t.mutex.lock(); t.mutex.lock();
t.available_thread_count += 1; t.busy_count -= 1;
} }
if (t.join_requested) break; if (t.join_requested) break;
t.cond.wait(&t.mutex); t.cond.wait(&t.mutex);
} }
} }
fn oneShotWorker(t: *Threaded, closure: *Closure) void {
closure.start(closure);
t.mutex.lock();
defer t.mutex.unlock();
t.one_shot_thread_count -= 1;
}
pub fn io(t: *Threaded) Io { pub fn io(t: *Threaded) Io {
return .{ return .{
.userdata = t, .userdata = t,
@ -488,7 +498,7 @@ fn async(
start: *const fn (context: *const anyopaque, result: *anyopaque) void, start: *const fn (context: *const anyopaque, result: *anyopaque) void,
) ?*Io.AnyFuture { ) ?*Io.AnyFuture {
const t: *Threaded = @ptrCast(@alignCast(userdata)); const t: *Threaded = @ptrCast(@alignCast(userdata));
if (t.cpu_count == 1 or builtin.single_threaded) { if (builtin.single_threaded or t.async_limit == .nothing) {
start(context.ptr, result.ptr); start(context.ptr, result.ptr);
return null; return null;
} }
@ -500,35 +510,29 @@ fn async(
t.mutex.lock(); t.mutex.lock();
if (t.available_thread_count == 0) { const busy_count = t.busy_count;
if (t.cpu_count != 0 and t.threads.items.len >= t.cpu_count) {
t.mutex.unlock();
ac.deinit(gpa);
start(context.ptr, result.ptr);
return null;
}
t.threads.ensureUnusedCapacity(gpa, 1) catch { if (busy_count >= @intFromEnum(t.async_limit)) {
t.mutex.unlock();
ac.deinit(gpa);
start(context.ptr, result.ptr);
return null;
}
t.busy_count = busy_count + 1;
const pool_size = t.wait_group.value();
if (pool_size - busy_count == 0) {
t.wait_group.start();
const thread = std.Thread.spawn(.{ .stack_size = t.stack_size }, worker, .{t}) catch {
t.wait_group.finish();
t.busy_count = busy_count;
t.mutex.unlock(); t.mutex.unlock();
ac.deinit(gpa); ac.deinit(gpa);
start(context.ptr, result.ptr); start(context.ptr, result.ptr);
return null; return null;
}; };
thread.detach();
const thread = std.Thread.spawn(
.{ .stack_size = t.stack_size },
worker,
.{t},
) catch {
t.mutex.unlock();
ac.deinit(gpa);
start(context.ptr, result.ptr);
return null;
};
t.threads.appendAssumeCapacity(thread);
} else {
t.available_thread_count -= 1;
} }
t.run_queue.prepend(&ac.closure.node); t.run_queue.prepend(&ac.closure.node);
@ -550,47 +554,33 @@ fn concurrent(
const t: *Threaded = @ptrCast(@alignCast(userdata)); const t: *Threaded = @ptrCast(@alignCast(userdata));
const gpa = t.allocator; const gpa = t.allocator;
const ac = AsyncClosure.init(gpa, result_len, result_alignment, context, context_alignment, start) catch { const ac = AsyncClosure.init(gpa, result_len, result_alignment, context, context_alignment, start) catch
return error.ConcurrencyUnavailable; return error.ConcurrencyUnavailable;
};
errdefer ac.deinit(gpa); errdefer ac.deinit(gpa);
t.mutex.lock(); t.mutex.lock();
defer t.mutex.unlock(); defer t.mutex.unlock();
// If there's an avilable thread, use it. const busy_count = t.busy_count;
if (t.available_thread_count > 0) {
t.available_thread_count -= 1;
t.run_queue.prepend(&ac.closure.node);
t.cond.signal();
return @ptrCast(ac);
}
// If we can spawn a normal worker, spawn it and use it. if (busy_count >= @intFromEnum(t.concurrent_limit))
if (t.cpu_count == 0 or t.threads.items.len < t.cpu_count) { return error.ConcurrencyUnavailable;
t.threads.ensureUnusedCapacity(gpa, 1) catch return error.ConcurrencyUnavailable;
t.busy_count = busy_count + 1;
errdefer t.busy_count = busy_count;
const pool_size = t.wait_group.value();
if (pool_size - busy_count == 0) {
t.wait_group.start();
errdefer t.wait_group.finish();
const thread = std.Thread.spawn(.{ .stack_size = t.stack_size }, worker, .{t}) catch const thread = std.Thread.spawn(.{ .stack_size = t.stack_size }, worker, .{t}) catch
return error.ConcurrencyUnavailable; return error.ConcurrencyUnavailable;
thread.detach();
t.threads.appendAssumeCapacity(thread);
t.run_queue.prepend(&ac.closure.node);
t.cond.signal();
return @ptrCast(ac);
} }
// If we have a concurrencty limit and we havent' hit it yet, t.run_queue.prepend(&ac.closure.node);
// spawn a new one-shot thread. t.cond.signal();
if (t.concurrency_limit != 0 and t.one_shot_thread_count >= t.concurrency_limit)
return error.ConcurrencyUnavailable;
t.one_shot_thread_count += 1;
errdefer t.one_shot_thread_count -= 1;
const thread = std.Thread.spawn(.{ .stack_size = t.stack_size }, oneShotWorker, .{ t, &ac.closure }) catch
return error.ConcurrencyUnavailable;
thread.detach();
return @ptrCast(ac); return @ptrCast(ac);
} }
@ -684,41 +674,37 @@ fn groupAsync(
context_alignment: std.mem.Alignment, context_alignment: std.mem.Alignment,
start: *const fn (*Io.Group, context: *const anyopaque) void, start: *const fn (*Io.Group, context: *const anyopaque) void,
) void { ) void {
if (builtin.single_threaded) return start(group, context.ptr);
const t: *Threaded = @ptrCast(@alignCast(userdata)); const t: *Threaded = @ptrCast(@alignCast(userdata));
if (builtin.single_threaded or t.async_limit == .nothing)
return start(group, context.ptr);
const gpa = t.allocator; const gpa = t.allocator;
const gc = GroupClosure.init(gpa, t, group, context, context_alignment, start) catch const gc = GroupClosure.init(gpa, t, group, context, context_alignment, start) catch
return start(group, context.ptr); return start(group, context.ptr);
t.mutex.lock(); t.mutex.lock();
if (t.available_thread_count == 0) { const busy_count = t.busy_count;
if (t.cpu_count != 0 and t.threads.items.len >= t.cpu_count) {
t.mutex.unlock();
gc.deinit(gpa);
return start(group, context.ptr);
}
t.threads.ensureUnusedCapacity(gpa, 1) catch { if (busy_count >= @intFromEnum(t.async_limit)) {
t.mutex.unlock();
gc.deinit(gpa);
return start(group, context.ptr);
}
t.busy_count = busy_count + 1;
const pool_size = t.wait_group.value();
if (pool_size - busy_count == 0) {
t.wait_group.start();
const thread = std.Thread.spawn(.{ .stack_size = t.stack_size }, worker, .{t}) catch {
t.wait_group.finish();
t.busy_count = busy_count;
t.mutex.unlock(); t.mutex.unlock();
gc.deinit(gpa); gc.deinit(gpa);
return start(group, context.ptr); return start(group, context.ptr);
}; };
thread.detach();
const thread = std.Thread.spawn(
.{ .stack_size = t.stack_size },
worker,
.{t},
) catch {
t.mutex.unlock();
gc.deinit(gpa);
return start(group, context.ptr);
};
t.threads.appendAssumeCapacity(thread);
} else {
t.available_thread_count -= 1;
} }
// Append to the group linked list inside the mutex to make `Io.Group.async` thread-safe. // Append to the group linked list inside the mutex to make `Io.Group.async` thread-safe.

View file

@ -10,7 +10,7 @@ test "concurrent vs main prevents deadlock via oversubscription" {
defer threaded.deinit(); defer threaded.deinit();
const io = threaded.io(); const io = threaded.io();
threaded.cpu_count = 1; threaded.async_limit = .nothing;
var queue: Io.Queue(u8) = .init(&.{}); var queue: Io.Queue(u8) = .init(&.{});
@ -38,7 +38,7 @@ test "concurrent vs concurrent prevents deadlock via oversubscription" {
defer threaded.deinit(); defer threaded.deinit();
const io = threaded.io(); const io = threaded.io();
threaded.cpu_count = 1; threaded.async_limit = .nothing;
var queue: Io.Queue(u8) = .init(&.{}); var queue: Io.Queue(u8) = .init(&.{});

View file

@ -60,6 +60,10 @@ pub fn isDone(wg: *WaitGroup) bool {
return (state / one_pending) == 0; return (state / one_pending) == 0;
} }
pub fn value(wg: *WaitGroup) usize {
return wg.state.load(.monotonic) / one_pending;
}
// Spawns a new thread for the task. This is appropriate when the callee // Spawns a new thread for the task. This is appropriate when the callee
// delegates all work. // delegates all work.
pub fn spawnManager( pub fn spawnManager(