Compilation: put supported codegen backends on a separate thread

(There are no supported backends.)
2025-12-06 13:54:21 +00:00 · 2024-07-08 09:05:30 -04:00 · 2024-07-08 09:05:30 -04:00 · 65ced4a334
commit 65ced4a334
parent c36e2bb980
7 changed files with 173 additions and 65 deletions
--- a/lib/std/Progress.zig
+++ b/lib/std/Progress.zig
@ -282,7 +282,7 @@ pub const Node = struct {
    }
    fn init(free_index: Index, parent: Parent, name: []const u8, estimated_total_items: usize) Node {
-        assert(parent != .unused);
+        assert(parent == .none or @intFromEnum(parent) < node_storage_buffer_len);
        const storage = storageByIndex(free_index);
        storage.* = .{
--- a/lib/std/Thread/Pool.zig
+++ b/lib/std/Thread/Pool.zig
@ -21,11 +21,11 @@ const Runnable = struct {
    runFn: RunProto,
 };
-const RunProto = *const fn (*Runnable, id: ?u32) void;
+const RunProto = *const fn (*Runnable, id: ?usize) void;
 pub const Options = struct {
    allocator: std.mem.Allocator,
-    n_jobs: ?u32 = null,
+    n_jobs: ?usize = null,
    track_ids: bool = false,
 };
@ -109,7 +109,7 @@ pub fn spawnWg(pool: *Pool, wait_group: *WaitGroup, comptime func: anytype, args
        run_node: RunQueue.Node = .{ .data = .{ .runFn = runFn } },
        wait_group: *WaitGroup,
-        fn runFn(runnable: *Runnable, _: ?u32) void {
+        fn runFn(runnable: *Runnable, _: ?usize) void {
            const run_node: *RunQueue.Node = @fieldParentPtr("data", runnable);
            const closure: *@This() = @alignCast(@fieldParentPtr("run_node", run_node));
            @call(.auto, func, closure.arguments);
@ -150,7 +150,7 @@ pub fn spawnWg(pool: *Pool, wait_group: *WaitGroup, comptime func: anytype, args
 /// Runs `func` in the thread pool, calling `WaitGroup.start` beforehand, and
 /// `WaitGroup.finish` after it returns.
 ///
-/// The first argument passed to `func` is a dense `u32` thread id, the rest
+/// The first argument passed to `func` is a dense `usize` thread id, the rest
 /// of the arguments are passed from `args`. Requires the pool to have been
 /// initialized with `.track_ids = true`.
 ///
@ -172,7 +172,7 @@ pub fn spawnWgId(pool: *Pool, wait_group: *WaitGroup, comptime func: anytype, ar
        run_node: RunQueue.Node = .{ .data = .{ .runFn = runFn } },
        wait_group: *WaitGroup,
-        fn runFn(runnable: *Runnable, id: ?u32) void {
+        fn runFn(runnable: *Runnable, id: ?usize) void {
            const run_node: *RunQueue.Node = @fieldParentPtr("data", runnable);
            const closure: *@This() = @alignCast(@fieldParentPtr("run_node", run_node));
            @call(.auto, func, .{id.?} ++ closure.arguments);
@ -191,7 +191,7 @@ pub fn spawnWgId(pool: *Pool, wait_group: *WaitGroup, comptime func: anytype, ar
        pool.mutex.lock();
        const closure = pool.allocator.create(Closure) catch {
-            const id = pool.ids.getIndex(std.Thread.getCurrentId());
+            const id: ?usize = pool.ids.getIndex(std.Thread.getCurrentId());
            pool.mutex.unlock();
            @call(.auto, func, .{id.?} ++ args);
            wait_group.finish();
@ -258,7 +258,7 @@ fn worker(pool: *Pool) void {
    pool.mutex.lock();
    defer pool.mutex.unlock();
-    const id: ?u32 = if (pool.ids.count() > 0) @intCast(pool.ids.count()) else null;
+    const id: ?usize = if (pool.ids.count() > 0) @intCast(pool.ids.count()) else null;
    if (id) |_| pool.ids.putAssumeCapacityNoClobber(std.Thread.getCurrentId(), {});
    while (true) {
@ -280,15 +280,12 @@ fn worker(pool: *Pool) void {
 }
 pub fn waitAndWork(pool: *Pool, wait_group: *WaitGroup) void {
-    var id: ?u32 = null;
+    var id: ?usize = null;
    while (!wait_group.isDone()) {
        pool.mutex.lock();
        if (pool.run_queue.popFirst()) |run_node| {
-            id = id orelse if (pool.ids.getIndex(std.Thread.getCurrentId())) |index|
+            id = id orelse pool.ids.getIndex(std.Thread.getCurrentId());
                @intCast(index)
            else
                null;
            pool.mutex.unlock();
            run_node.data.runFn(&run_node.data, id);
            continue;
@ -300,6 +297,6 @@ pub fn waitAndWork(pool: *Pool, wait_group: *WaitGroup) void {
    }
 }
-pub fn getIdCount(pool: *Pool) u32 {
+pub fn getIdCount(pool: *Pool) usize {
    return @intCast(1 + pool.threads.len);
 }
--- a/src/Compilation.zig
+++ b/src/Compilation.zig
@ -103,6 +103,14 @@ lld_errors: std.ArrayListUnmanaged(LldError) = .{},
 work_queue: std.fifo.LinearFifo(Job, .Dynamic),
 codegen_work: if (InternPool.single_threaded) void else struct {
    mutex: std.Thread.Mutex,
    cond: std.Thread.Condition,
    queue: std.fifo.LinearFifo(CodegenJob, .Dynamic),
    job_error: ?JobError,
    done: bool,
 },
 /// These jobs are to invoke the Clang compiler to create an object file, which
 /// gets linked with the Compilation.
 c_object_work_queue: std.fifo.LinearFifo(*CObject, .Dynamic),
@ -362,6 +370,16 @@ const Job = union(enum) {
    windows_import_lib: usize,
 };
 const CodegenJob = union(enum) {
    decl: InternPool.DeclIndex,
    func: struct {
        func: InternPool.Index,
        /// This `Air` is owned by the `Job` and allocated with `gpa`.
        /// It must be deinited when the job is processed.
        air: Air,
    },
 };
 pub const CObject = struct {
    /// Relative to cwd. Owned by arena.
    src: CSourceFile,
@ -1429,6 +1447,13 @@ pub fn create(gpa: Allocator, arena: Allocator, options: CreateOptions) !*Compil
            .emit_llvm_ir = options.emit_llvm_ir,
            .emit_llvm_bc = options.emit_llvm_bc,
            .work_queue = std.fifo.LinearFifo(Job, .Dynamic).init(gpa),
            .codegen_work = if (InternPool.single_threaded) {} else .{
                .mutex = .{},
                .cond = .{},
                .queue = std.fifo.LinearFifo(CodegenJob, .Dynamic).init(gpa),
                .job_error = null,
                .done = false,
            },
            .c_object_work_queue = std.fifo.LinearFifo(*CObject, .Dynamic).init(gpa),
            .win32_resource_work_queue = if (build_options.only_core_functionality) {} else std.fifo.LinearFifo(*Win32Resource, .Dynamic).init(gpa),
            .astgen_work_queue = std.fifo.LinearFifo(Zcu.File.Index, .Dynamic).init(gpa),
@ -3310,7 +3335,21 @@ pub fn addZirErrorMessages(eb: *ErrorBundle.Wip, file: *Zcu.File) !void {
 pub fn performAllTheWork(
    comp: *Compilation,
    main_progress_node: std.Progress.Node,
-) error{ TimerUnsupported, OutOfMemory }!void {
+) JobError!void {
    defer if (comp.module) |mod| {
        mod.sema_prog_node.end();
        mod.sema_prog_node = std.Progress.Node.none;
        mod.codegen_prog_node.end();
        mod.codegen_prog_node = std.Progress.Node.none;
    };
    try comp.performAllTheWorkInner(main_progress_node);
    if (!InternPool.single_threaded) if (comp.codegen_work.job_error) |job_error| return job_error;
 }
 fn performAllTheWorkInner(
    comp: *Compilation,
    main_progress_node: std.Progress.Node,
 ) JobError!void {
    // Here we queue up all the AstGen tasks first, followed by C object compilation.
    // We wait until the AstGen tasks are all completed before proceeding to the
    // (at least for now) single-threaded main work queue. However, C object compilation
@ -3410,16 +3449,20 @@ pub fn performAllTheWork(
        mod.sema_prog_node = main_progress_node.start("Semantic Analysis", 0);
        mod.codegen_prog_node = main_progress_node.start("Code Generation", 0);
    }
-    defer if (comp.module) |mod| {
+
-        mod.sema_prog_node.end();
+    if (!InternPool.single_threaded) comp.thread_pool.spawnWgId(&comp.work_queue_wait_group, codegenThread, .{comp});
-        mod.sema_prog_node = undefined;
+    defer if (!InternPool.single_threaded) {
-        mod.codegen_prog_node.end();
+        {
-        mod.codegen_prog_node = undefined;
+            comp.codegen_work.mutex.lock();
            defer comp.codegen_work.mutex.unlock();
            comp.codegen_work.done = true;
        }
        comp.codegen_work.cond.signal();
    };
    while (true) {
        if (comp.work_queue.readItem()) |work_item| {
-            try processOneJob(0, comp, work_item, main_progress_node);
+            try processOneJob(@intFromEnum(Zcu.PerThread.Id.main), comp, work_item, main_progress_node);
            continue;
        }
        if (comp.module) |zcu| {
@ -3447,11 +3490,12 @@ pub fn performAllTheWork(
    }
 }
-fn processOneJob(tid: usize, comp: *Compilation, job: Job, prog_node: std.Progress.Node) !void {
+const JobError = Allocator.Error;
 fn processOneJob(tid: usize, comp: *Compilation, job: Job, prog_node: std.Progress.Node) JobError!void {
    switch (job) {
        .codegen_decl => |decl_index| {
-            const pt: Zcu.PerThread = .{ .zcu = comp.module.?, .tid = @enumFromInt(tid) };
+            const decl = comp.module.?.declPtr(decl_index);
            const decl = pt.zcu.declPtr(decl_index);
            switch (decl.analysis) {
                .unreferenced => unreachable,
@ -3461,26 +3505,20 @@ fn processOneJob(tid: usize, comp: *Compilation, job: Job, prog_node: std.Progre
                .sema_failure,
                .codegen_failure,
                .dependency_failure,
-                => return,
+                => {},
                .complete => {
                    const named_frame = tracy.namedFrame("codegen_decl");
                    defer named_frame.end();
                    assert(decl.has_tv);
-
+                    try comp.queueCodegenJob(tid, .{ .decl = decl_index });
                    try pt.linkerUpdateDecl(decl_index);
                    return;
                },
            }
        },
        .codegen_func => |func| {
            const named_frame = tracy.namedFrame("codegen_func");
            defer named_frame.end();
            const pt: Zcu.PerThread = .{ .zcu = comp.module.?, .tid = @enumFromInt(tid) };
            // This call takes ownership of `func.air`.
-            try pt.linkerUpdateFunc(func.func, func.air);
+            try comp.queueCodegenJob(tid, .{ .func = .{
                .func = func.func,
                .air = func.air,
            } });
        },
        .analyze_func => |func| {
            const named_frame = tracy.namedFrame("analyze_func");
@ -3772,6 +3810,61 @@ fn processOneJob(tid: usize, comp: *Compilation, job: Job, prog_node: std.Progre
    }
 }
 fn queueCodegenJob(comp: *Compilation, tid: usize, codegen_job: CodegenJob) !void {
    if (InternPool.single_threaded or
        !comp.module.?.backendSupportsFeature(.separate_thread))
        return processOneCodegenJob(tid, comp, codegen_job);
    {
        comp.codegen_work.mutex.lock();
        defer comp.codegen_work.mutex.unlock();
        try comp.codegen_work.queue.writeItem(codegen_job);
    }
    comp.codegen_work.cond.signal();
 }
 fn codegenThread(tid: usize, comp: *Compilation) void {
    comp.codegen_work.mutex.lock();
    defer comp.codegen_work.mutex.unlock();
    while (true) {
        if (comp.codegen_work.queue.readItem()) |codegen_job| {
            comp.codegen_work.mutex.unlock();
            defer comp.codegen_work.mutex.lock();
            processOneCodegenJob(tid, comp, codegen_job) catch |job_error| {
                comp.codegen_work.job_error = job_error;
                break;
            };
            continue;
        }
        if (comp.codegen_work.done) break;
        comp.codegen_work.cond.wait(&comp.codegen_work.mutex);
    }
 }
 fn processOneCodegenJob(tid: usize, comp: *Compilation, codegen_job: CodegenJob) JobError!void {
    switch (codegen_job) {
        .decl => |decl_index| {
            const named_frame = tracy.namedFrame("codegen_decl");
            defer named_frame.end();
            const pt: Zcu.PerThread = .{ .zcu = comp.module.?, .tid = @enumFromInt(tid) };
            try pt.linkerUpdateDecl(decl_index);
        },
        .func => |func| {
            const named_frame = tracy.namedFrame("codegen_func");
            defer named_frame.end();
            const pt: Zcu.PerThread = .{ .zcu = comp.module.?, .tid = @enumFromInt(tid) };
            // This call takes ownership of `func.air`.
            try pt.linkerUpdateFunc(func.func, func.air);
        },
    }
 }
 fn workerDocsCopy(comp: *Compilation) void {
    docsCopyFallible(comp) catch |err| {
        return comp.lockAndSetMiscFailure(
--- a/src/Compilation/Config.zig
+++ b/src/Compilation/Config.zig
@ -440,12 +440,8 @@ pub fn resolve(options: Options) ResolveError!Config {
        };
    };
-    const backend_supports_error_tracing = target_util.backendSupportsFeature(
+    const backend = target_util.zigBackend(target, use_llvm);
-        target.cpu.arch,
+    const backend_supports_error_tracing = target_util.backendSupportsFeature(backend, .error_return_trace);
        target.ofmt,
        use_llvm,
        .error_return_trace,
    );
    const root_error_tracing = b: {
        if (options.root_error_tracing) |x| break :b x;
--- a/src/Zcu.zig
+++ b/src/Zcu.zig
@ -64,8 +64,8 @@ root_mod: *Package.Module,
 /// `root_mod` is the test runner, and `main_mod` is the user's source file which has the tests.
 main_mod: *Package.Module,
 std_mod: *Package.Module,
-sema_prog_node: std.Progress.Node = undefined,
+sema_prog_node: std.Progress.Node = std.Progress.Node.none,
-codegen_prog_node: std.Progress.Node = undefined,
+codegen_prog_node: std.Progress.Node = std.Progress.Node.none,
 /// Used by AstGen worker to load and store ZIR cache.
 global_zir_cache: Compilation.Directory,
@ -3557,13 +3557,13 @@ pub const Feature = enum {
    /// to generate better machine code in the backends. All backends should migrate to
    /// enabling this feature.
    safety_checked_instructions,
    /// If the backend supports running from another thread.
    separate_thread,
 };
-pub fn backendSupportsFeature(zcu: Module, feature: Feature) bool {
+pub fn backendSupportsFeature(zcu: Module, comptime feature: Feature) bool {
-    const cpu_arch = zcu.root_mod.resolved_target.result.cpu.arch;
+    const backend = target_util.zigBackend(zcu.root_mod.resolved_target.result, zcu.comp.config.use_llvm);
-    const ofmt = zcu.root_mod.resolved_target.result.ofmt;
+    return target_util.backendSupportsFeature(backend, feature);
    const use_llvm = zcu.comp.config.use_llvm;
    return target_util.backendSupportsFeature(cpu_arch, ofmt, use_llvm, feature);
 }
 pub const AtomicPtrAlignmentError = error{
--- a/src/Zcu/PerThread.zig
+++ b/src/Zcu/PerThread.zig
@ -2129,7 +2129,7 @@ pub fn populateTestFunctions(
        zcu.sema_prog_node = main_progress_node.start("Semantic Analysis", 0);
        defer {
            zcu.sema_prog_node.end();
-            zcu.sema_prog_node = undefined;
+            zcu.sema_prog_node = std.Progress.Node.none;
        }
        try pt.ensureDeclAnalyzed(decl_index);
    }
@ -2238,7 +2238,7 @@ pub fn populateTestFunctions(
        zcu.codegen_prog_node = main_progress_node.start("Code Generation", 0);
        defer {
            zcu.codegen_prog_node.end();
-            zcu.codegen_prog_node = undefined;
+            zcu.codegen_prog_node = std.Progress.Node.none;
        }
        try pt.linkerUpdateDecl(decl_index);
--- a/src/target.zig
+++ b/src/target.zig
@ -537,20 +537,42 @@ pub fn zigBackend(target: std.Target, use_llvm: bool) std.builtin.CompilerBacken
    };
 }
-pub fn backendSupportsFeature(
+pub inline fn backendSupportsFeature(backend: std.builtin.CompilerBackend, comptime feature: Feature) bool {
    cpu_arch: std.Target.Cpu.Arch,
    ofmt: std.Target.ObjectFormat,
    use_llvm: bool,
    feature: Feature,
 ) bool {
    return switch (feature) {
-        .panic_fn => ofmt == .c or use_llvm or cpu_arch == .x86_64 or cpu_arch == .riscv64,
+        .panic_fn => switch (backend) {
-        .panic_unwrap_error => ofmt == .c or use_llvm,
+            .stage2_c, .stage2_llvm, .stage2_x86_64, .stage2_riscv64 => true,
-        .safety_check_formatted => ofmt == .c or use_llvm,
+            else => false,
-        .error_return_trace => use_llvm,
+        },
-        .is_named_enum_value => use_llvm,
+        .panic_unwrap_error => switch (backend) {
-        .error_set_has_value => use_llvm or cpu_arch.isWasm(),
+            .stage2_c, .stage2_llvm => true,
-        .field_reordering => ofmt == .c or use_llvm,
+            else => false,
-        .safety_checked_instructions => use_llvm,
+        },
        .safety_check_formatted => switch (backend) {
            .stage2_c, .stage2_llvm => true,
            else => false,
        },
        .error_return_trace => switch (backend) {
            .stage2_llvm => true,
            else => false,
        },
        .is_named_enum_value => switch (backend) {
            .stage2_llvm => true,
            else => false,
        },
        .error_set_has_value => switch (backend) {
            .stage2_llvm, .stage2_wasm => true,
            else => false,
        },
        .field_reordering => switch (backend) {
            .stage2_c, .stage2_llvm => true,
            else => false,
        },
        .safety_checked_instructions => switch (backend) {
            .stage2_llvm => true,
            else => false,
        },
        .separate_thread => switch (backend) {
            else => false,
        },
    };
 }