diff --git a/lib/std/Progress.zig b/lib/std/Progress.zig
index 160894aae9..2028e95dd5 100644
--- a/lib/std/Progress.zig
+++ b/lib/std/Progress.zig
@@ -282,7 +282,7 @@ pub const Node = struct {
     }
 
     fn init(free_index: Index, parent: Parent, name: []const u8, estimated_total_items: usize) Node {
-        assert(parent != .unused);
+        assert(parent == .none or @intFromEnum(parent) < node_storage_buffer_len);
 
         const storage = storageByIndex(free_index);
         storage.* = .{
diff --git a/lib/std/Thread/Pool.zig b/lib/std/Thread/Pool.zig
index d501b66520..179f2f8521 100644
--- a/lib/std/Thread/Pool.zig
+++ b/lib/std/Thread/Pool.zig
@@ -21,11 +21,11 @@ const Runnable = struct {
     runFn: RunProto,
 };
 
-const RunProto = *const fn (*Runnable, id: ?u32) void;
+const RunProto = *const fn (*Runnable, id: ?usize) void;
 
 pub const Options = struct {
     allocator: std.mem.Allocator,
-    n_jobs: ?u32 = null,
+    n_jobs: ?usize = null,
     track_ids: bool = false,
 };
 
@@ -109,7 +109,7 @@ pub fn spawnWg(pool: *Pool, wait_group: *WaitGroup, comptime func: anytype, args
         run_node: RunQueue.Node = .{ .data = .{ .runFn = runFn } },
         wait_group: *WaitGroup,
 
-        fn runFn(runnable: *Runnable, _: ?u32) void {
+        fn runFn(runnable: *Runnable, _: ?usize) void {
             const run_node: *RunQueue.Node = @fieldParentPtr("data", runnable);
             const closure: *@This() = @alignCast(@fieldParentPtr("run_node", run_node));
             @call(.auto, func, closure.arguments);
@@ -150,7 +150,7 @@ pub fn spawnWg(pool: *Pool, wait_group: *WaitGroup, comptime func: anytype, args
 /// Runs `func` in the thread pool, calling `WaitGroup.start` beforehand, and
 /// `WaitGroup.finish` after it returns.
 ///
-/// The first argument passed to `func` is a dense `u32` thread id, the rest
+/// The first argument passed to `func` is a dense `usize` thread id, the rest
 /// of the arguments are passed from `args`. Requires the pool to have been
 /// initialized with `.track_ids = true`.
 ///
@@ -172,7 +172,7 @@ pub fn spawnWgId(pool: *Pool, wait_group: *WaitGroup, comptime func: anytype, ar
         run_node: RunQueue.Node = .{ .data = .{ .runFn = runFn } },
         wait_group: *WaitGroup,
 
-        fn runFn(runnable: *Runnable, id: ?u32) void {
+        fn runFn(runnable: *Runnable, id: ?usize) void {
             const run_node: *RunQueue.Node = @fieldParentPtr("data", runnable);
             const closure: *@This() = @alignCast(@fieldParentPtr("run_node", run_node));
             @call(.auto, func, .{id.?} ++ closure.arguments);
@@ -191,7 +191,7 @@ pub fn spawnWgId(pool: *Pool, wait_group: *WaitGroup, comptime func: anytype, ar
         pool.mutex.lock();
 
         const closure = pool.allocator.create(Closure) catch {
-            const id = pool.ids.getIndex(std.Thread.getCurrentId());
+            const id: ?usize = pool.ids.getIndex(std.Thread.getCurrentId());
             pool.mutex.unlock();
             @call(.auto, func, .{id.?} ++ args);
             wait_group.finish();
@@ -258,7 +258,7 @@ fn worker(pool: *Pool) void {
     pool.mutex.lock();
     defer pool.mutex.unlock();
 
-    const id: ?u32 = if (pool.ids.count() > 0) @intCast(pool.ids.count()) else null;
+    const id: ?usize = if (pool.ids.count() > 0) @intCast(pool.ids.count()) else null;
     if (id) |_| pool.ids.putAssumeCapacityNoClobber(std.Thread.getCurrentId(), {});
 
     while (true) {
@@ -280,15 +280,12 @@ fn worker(pool: *Pool) void {
 }
 
 pub fn waitAndWork(pool: *Pool, wait_group: *WaitGroup) void {
-    var id: ?u32 = null;
+    var id: ?usize = null;
 
     while (!wait_group.isDone()) {
         pool.mutex.lock();
         if (pool.run_queue.popFirst()) |run_node| {
-            id = id orelse if (pool.ids.getIndex(std.Thread.getCurrentId())) |index|
-                @intCast(index)
-            else
-                null;
+            id = id orelse pool.ids.getIndex(std.Thread.getCurrentId());
             pool.mutex.unlock();
             run_node.data.runFn(&run_node.data, id);
             continue;
@@ -300,6 +297,6 @@ pub fn waitAndWork(pool: *Pool, wait_group: *WaitGroup) void {
     }
 }
 
-pub fn getIdCount(pool: *Pool) u32 {
+pub fn getIdCount(pool: *Pool) usize {
     return @intCast(1 + pool.threads.len);
 }
diff --git a/src/Compilation.zig b/src/Compilation.zig
index 74e8222bc3..118e325ed7 100644
--- a/src/Compilation.zig
+++ b/src/Compilation.zig
@@ -103,6 +103,14 @@ lld_errors: std.ArrayListUnmanaged(LldError) = .{},
 
 work_queue: std.fifo.LinearFifo(Job, .Dynamic),
 
+codegen_work: if (InternPool.single_threaded) void else struct {
+    mutex: std.Thread.Mutex,
+    cond: std.Thread.Condition,
+    queue: std.fifo.LinearFifo(CodegenJob, .Dynamic),
+    job_error: ?JobError,
+    done: bool,
+},
+
 /// These jobs are to invoke the Clang compiler to create an object file, which
 /// gets linked with the Compilation.
 c_object_work_queue: std.fifo.LinearFifo(*CObject, .Dynamic),
@@ -362,6 +370,16 @@ const Job = union(enum) {
     windows_import_lib: usize,
 };
 
+const CodegenJob = union(enum) {
+    decl: InternPool.DeclIndex,
+    func: struct {
+        func: InternPool.Index,
+        /// This `Air` is owned by the `Job` and allocated with `gpa`.
+        /// It must be deinited when the job is processed.
+        air: Air,
+    },
+};
+
 pub const CObject = struct {
     /// Relative to cwd. Owned by arena.
     src: CSourceFile,
@@ -1429,6 +1447,13 @@ pub fn create(gpa: Allocator, arena: Allocator, options: CreateOptions) !*Compil
             .emit_llvm_ir = options.emit_llvm_ir,
             .emit_llvm_bc = options.emit_llvm_bc,
             .work_queue = std.fifo.LinearFifo(Job, .Dynamic).init(gpa),
+            .codegen_work = if (InternPool.single_threaded) {} else .{
+                .mutex = .{},
+                .cond = .{},
+                .queue = std.fifo.LinearFifo(CodegenJob, .Dynamic).init(gpa),
+                .job_error = null,
+                .done = false,
+            },
             .c_object_work_queue = std.fifo.LinearFifo(*CObject, .Dynamic).init(gpa),
             .win32_resource_work_queue = if (build_options.only_core_functionality) {} else std.fifo.LinearFifo(*Win32Resource, .Dynamic).init(gpa),
             .astgen_work_queue = std.fifo.LinearFifo(Zcu.File.Index, .Dynamic).init(gpa),
@@ -3310,7 +3335,21 @@ pub fn addZirErrorMessages(eb: *ErrorBundle.Wip, file: *Zcu.File) !void {
 pub fn performAllTheWork(
     comp: *Compilation,
     main_progress_node: std.Progress.Node,
-) error{ TimerUnsupported, OutOfMemory }!void {
+) JobError!void {
+    defer if (comp.module) |mod| {
+        mod.sema_prog_node.end();
+        mod.sema_prog_node = std.Progress.Node.none;
+        mod.codegen_prog_node.end();
+        mod.codegen_prog_node = std.Progress.Node.none;
+    };
+    try comp.performAllTheWorkInner(main_progress_node);
+    if (!InternPool.single_threaded) if (comp.codegen_work.job_error) |job_error| return job_error;
+}
+
+fn performAllTheWorkInner(
+    comp: *Compilation,
+    main_progress_node: std.Progress.Node,
+) JobError!void {
     // Here we queue up all the AstGen tasks first, followed by C object compilation.
     // We wait until the AstGen tasks are all completed before proceeding to the
     // (at least for now) single-threaded main work queue. However, C object compilation
@@ -3410,16 +3449,20 @@ pub fn performAllTheWork(
         mod.sema_prog_node = main_progress_node.start("Semantic Analysis", 0);
         mod.codegen_prog_node = main_progress_node.start("Code Generation", 0);
     }
-    defer if (comp.module) |mod| {
-        mod.sema_prog_node.end();
-        mod.sema_prog_node = undefined;
-        mod.codegen_prog_node.end();
-        mod.codegen_prog_node = undefined;
+
+    if (!InternPool.single_threaded) comp.thread_pool.spawnWgId(&comp.work_queue_wait_group, codegenThread, .{comp});
+    defer if (!InternPool.single_threaded) {
+        {
+            comp.codegen_work.mutex.lock();
+            defer comp.codegen_work.mutex.unlock();
+            comp.codegen_work.done = true;
+        }
+        comp.codegen_work.cond.signal();
     };
 
     while (true) {
         if (comp.work_queue.readItem()) |work_item| {
-            try processOneJob(0, comp, work_item, main_progress_node);
+            try processOneJob(@intFromEnum(Zcu.PerThread.Id.main), comp, work_item, main_progress_node);
             continue;
         }
         if (comp.module) |zcu| {
@@ -3447,11 +3490,12 @@ pub fn performAllTheWork(
     }
 }
 
-fn processOneJob(tid: usize, comp: *Compilation, job: Job, prog_node: std.Progress.Node) !void {
+const JobError = Allocator.Error;
+
+fn processOneJob(tid: usize, comp: *Compilation, job: Job, prog_node: std.Progress.Node) JobError!void {
     switch (job) {
         .codegen_decl => |decl_index| {
-            const pt: Zcu.PerThread = .{ .zcu = comp.module.?, .tid = @enumFromInt(tid) };
-            const decl = pt.zcu.declPtr(decl_index);
+            const decl = comp.module.?.declPtr(decl_index);
 
             switch (decl.analysis) {
                 .unreferenced => unreachable,
@@ -3461,26 +3505,20 @@ fn processOneJob(tid: usize, comp: *Compilation, job: Job, prog_node: std.Progre
                 .sema_failure,
                 .codegen_failure,
                 .dependency_failure,
-                => return,
+                => {},
 
                 .complete => {
-                    const named_frame = tracy.namedFrame("codegen_decl");
-                    defer named_frame.end();
-
                     assert(decl.has_tv);
-
-                    try pt.linkerUpdateDecl(decl_index);
-                    return;
+                    try comp.queueCodegenJob(tid, .{ .decl = decl_index });
                 },
             }
         },
         .codegen_func => |func| {
-            const named_frame = tracy.namedFrame("codegen_func");
-            defer named_frame.end();
-
-            const pt: Zcu.PerThread = .{ .zcu = comp.module.?, .tid = @enumFromInt(tid) };
             // This call takes ownership of `func.air`.
-            try pt.linkerUpdateFunc(func.func, func.air);
+            try comp.queueCodegenJob(tid, .{ .func = .{
+                .func = func.func,
+                .air = func.air,
+            } });
         },
         .analyze_func => |func| {
             const named_frame = tracy.namedFrame("analyze_func");
@@ -3772,6 +3810,61 @@ fn processOneJob(tid: usize, comp: *Compilation, job: Job, prog_node: std.Progre
     }
 }
 
+fn queueCodegenJob(comp: *Compilation, tid: usize, codegen_job: CodegenJob) !void {
+    if (InternPool.single_threaded or
+        !comp.module.?.backendSupportsFeature(.separate_thread))
+        return processOneCodegenJob(tid, comp, codegen_job);
+
+    {
+        comp.codegen_work.mutex.lock();
+        defer comp.codegen_work.mutex.unlock();
+        try comp.codegen_work.queue.writeItem(codegen_job);
+    }
+    comp.codegen_work.cond.signal();
+}
+
+fn codegenThread(tid: usize, comp: *Compilation) void {
+    comp.codegen_work.mutex.lock();
+    defer comp.codegen_work.mutex.unlock();
+
+    while (true) {
+        if (comp.codegen_work.queue.readItem()) |codegen_job| {
+            comp.codegen_work.mutex.unlock();
+            defer comp.codegen_work.mutex.lock();
+
+            processOneCodegenJob(tid, comp, codegen_job) catch |job_error| {
+                comp.codegen_work.job_error = job_error;
+                break;
+            };
+            continue;
+        }
+
+        if (comp.codegen_work.done) break;
+
+        comp.codegen_work.cond.wait(&comp.codegen_work.mutex);
+    }
+}
+
+fn processOneCodegenJob(tid: usize, comp: *Compilation, codegen_job: CodegenJob) JobError!void {
+    switch (codegen_job) {
+        .decl => |decl_index| {
+            const named_frame = tracy.namedFrame("codegen_decl");
+            defer named_frame.end();
+
+            const pt: Zcu.PerThread = .{ .zcu = comp.module.?, .tid = @enumFromInt(tid) };
+            try pt.linkerUpdateDecl(decl_index);
+        },
+        .func => |func| {
+            const named_frame = tracy.namedFrame("codegen_func");
+            defer named_frame.end();
+
+            const pt: Zcu.PerThread = .{ .zcu = comp.module.?, .tid = @enumFromInt(tid) };
+            // This call takes ownership of `func.air`.
+            try pt.linkerUpdateFunc(func.func, func.air);
+        },
+    }
+}
+
 fn workerDocsCopy(comp: *Compilation) void {
     docsCopyFallible(comp) catch |err| {
         return comp.lockAndSetMiscFailure(
diff --git a/src/Compilation/Config.zig b/src/Compilation/Config.zig
index 2de2184252..6e28f5028c 100644
--- a/src/Compilation/Config.zig
+++ b/src/Compilation/Config.zig
@@ -440,12 +440,8 @@ pub fn resolve(options: Options) ResolveError!Config {
         };
     };
 
-    const backend_supports_error_tracing = target_util.backendSupportsFeature(
-        target.cpu.arch,
-        target.ofmt,
-        use_llvm,
-        .error_return_trace,
-    );
+    const backend = target_util.zigBackend(target, use_llvm);
+    const backend_supports_error_tracing = target_util.backendSupportsFeature(backend, .error_return_trace);
 
     const root_error_tracing = b: {
         if (options.root_error_tracing) |x| break :b x;
diff --git a/src/Zcu.zig b/src/Zcu.zig
index b855e4fcf0..2f87bcca0f 100644
--- a/src/Zcu.zig
+++ b/src/Zcu.zig
@@ -64,8 +64,8 @@ root_mod: *Package.Module,
 /// `root_mod` is the test runner, and `main_mod` is the user's source file which has the tests.
 main_mod: *Package.Module,
 std_mod: *Package.Module,
-sema_prog_node: std.Progress.Node = undefined,
-codegen_prog_node: std.Progress.Node = undefined,
+sema_prog_node: std.Progress.Node = std.Progress.Node.none,
+codegen_prog_node: std.Progress.Node = std.Progress.Node.none,
 
 /// Used by AstGen worker to load and store ZIR cache.
 global_zir_cache: Compilation.Directory,
@@ -3557,13 +3557,13 @@ pub const Feature = enum {
     /// to generate better machine code in the backends. All backends should migrate to
     /// enabling this feature.
     safety_checked_instructions,
+    /// If the backend supports running from another thread.
+    separate_thread,
 };
 
-pub fn backendSupportsFeature(zcu: Module, feature: Feature) bool {
-    const cpu_arch = zcu.root_mod.resolved_target.result.cpu.arch;
-    const ofmt = zcu.root_mod.resolved_target.result.ofmt;
-    const use_llvm = zcu.comp.config.use_llvm;
-    return target_util.backendSupportsFeature(cpu_arch, ofmt, use_llvm, feature);
+pub fn backendSupportsFeature(zcu: Module, comptime feature: Feature) bool {
+    const backend = target_util.zigBackend(zcu.root_mod.resolved_target.result, zcu.comp.config.use_llvm);
+    return target_util.backendSupportsFeature(backend, feature);
 }
 
 pub const AtomicPtrAlignmentError = error{
diff --git a/src/Zcu/PerThread.zig b/src/Zcu/PerThread.zig
index b0fc35b552..f8a3104dc0 100644
--- a/src/Zcu/PerThread.zig
+++ b/src/Zcu/PerThread.zig
@@ -2129,7 +2129,7 @@ pub fn populateTestFunctions(
         zcu.sema_prog_node = main_progress_node.start("Semantic Analysis", 0);
         defer {
             zcu.sema_prog_node.end();
-            zcu.sema_prog_node = undefined;
+            zcu.sema_prog_node = std.Progress.Node.none;
         }
         try pt.ensureDeclAnalyzed(decl_index);
     }
@@ -2238,7 +2238,7 @@ pub fn populateTestFunctions(
         zcu.codegen_prog_node = main_progress_node.start("Code Generation", 0);
         defer {
             zcu.codegen_prog_node.end();
-            zcu.codegen_prog_node = undefined;
+            zcu.codegen_prog_node = std.Progress.Node.none;
         }
 
         try pt.linkerUpdateDecl(decl_index);
diff --git a/src/target.zig b/src/target.zig
index a253c1fa0b..2accc100b8 100644
--- a/src/target.zig
+++ b/src/target.zig
@@ -537,20 +537,42 @@ pub fn zigBackend(target: std.Target, use_llvm: bool) std.builtin.CompilerBacken
     };
 }
 
-pub fn backendSupportsFeature(
-    cpu_arch: std.Target.Cpu.Arch,
-    ofmt: std.Target.ObjectFormat,
-    use_llvm: bool,
-    feature: Feature,
-) bool {
+pub inline fn backendSupportsFeature(backend: std.builtin.CompilerBackend, comptime feature: Feature) bool {
     return switch (feature) {
-        .panic_fn => ofmt == .c or use_llvm or cpu_arch == .x86_64 or cpu_arch == .riscv64,
-        .panic_unwrap_error => ofmt == .c or use_llvm,
-        .safety_check_formatted => ofmt == .c or use_llvm,
-        .error_return_trace => use_llvm,
-        .is_named_enum_value => use_llvm,
-        .error_set_has_value => use_llvm or cpu_arch.isWasm(),
-        .field_reordering => ofmt == .c or use_llvm,
-        .safety_checked_instructions => use_llvm,
+        .panic_fn => switch (backend) {
+            .stage2_c, .stage2_llvm, .stage2_x86_64, .stage2_riscv64 => true,
+            else => false,
+        },
+        .panic_unwrap_error => switch (backend) {
+            .stage2_c, .stage2_llvm => true,
+            else => false,
+        },
+        .safety_check_formatted => switch (backend) {
+            .stage2_c, .stage2_llvm => true,
+            else => false,
+        },
+        .error_return_trace => switch (backend) {
+            .stage2_llvm => true,
+            else => false,
+        },
+        .is_named_enum_value => switch (backend) {
+            .stage2_llvm => true,
+            else => false,
+        },
+        .error_set_has_value => switch (backend) {
+            .stage2_llvm, .stage2_wasm => true,
+            else => false,
+        },
+        .field_reordering => switch (backend) {
+            .stage2_c, .stage2_llvm => true,
+            else => false,
+        },
+        .safety_checked_instructions => switch (backend) {
+            .stage2_llvm => true,
+            else => false,
+        },
+        .separate_thread => switch (backend) {
+            else => false,
+        },
     };
 }