compiler: make it easier to apply breaking changes to std.builtin

Documentation for this will be on the wiki shortly.

Resolves: #21842
This commit is contained in:
mlugg 2025-01-16 10:09:41 +00:00
parent d00e05f186
commit b6abe1dbf7
No known key found for this signature in database
GPG key ID: 3F5B7DCCBF4AF02E
6 changed files with 151 additions and 67 deletions

View file

@ -141,6 +141,7 @@ int main(int argc, char **argv) {
"pub const skip_non_native = false;\n"
"pub const force_gpa = false;\n"
"pub const dev = .core;\n"
"pub const value_interpret_mode = .direct;\n"
, zig_version);
if (written < 100)
panic("unable to write to config.zig file");

View file

@ -9,6 +9,7 @@ const fs = std.fs;
const InstallDirectoryOptions = std.Build.InstallDirectoryOptions;
const assert = std.debug.assert;
const DevEnv = @import("src/dev.zig").Env;
const ValueInterpretMode = enum { direct, by_name };
const zig_version: std.SemanticVersion = .{ .major = 0, .minor = 14, .patch = 0 };
const stack_size = 46 * 1024 * 1024;
@ -177,6 +178,7 @@ pub fn build(b: *std.Build) !void {
const strip = b.option(bool, "strip", "Omit debug information");
const valgrind = b.option(bool, "valgrind", "Enable valgrind integration");
const pie = b.option(bool, "pie", "Produce a Position Independent Executable");
const value_interpret_mode = b.option(ValueInterpretMode, "value-interpret-mode", "How the compiler translates between 'std.builtin' types and its internal datastructures") orelse .direct;
const value_tracing = b.option(bool, "value-tracing", "Enable extra state tracking to help troubleshoot bugs in the compiler (using the std.debug.Trace API)") orelse false;
const mem_leak_frames: u32 = b.option(u32, "mem-leak-frames", "How many stack frames to print when a memory leak occurs. Tests get 2x this amount.") orelse blk: {
@ -234,6 +236,7 @@ pub fn build(b: *std.Build) !void {
exe_options.addOption(bool, "llvm_has_xtensa", llvm_has_xtensa);
exe_options.addOption(bool, "force_gpa", force_gpa);
exe_options.addOption(DevEnv, "dev", b.option(DevEnv, "dev", "Build a compiler with a reduced feature set for development of specific features") orelse if (only_c) .bootstrap else .full);
exe_options.addOption(ValueInterpretMode, "value_interpret_mode", value_interpret_mode);
if (link_libc) {
exe.root_module.link_libc = true;
@ -620,6 +623,23 @@ fn addWasiUpdateStep(b: *std.Build, version: [:0]const u8) !void {
exe_options.addOption(bool, "value_tracing", false);
exe_options.addOption(DevEnv, "dev", .bootstrap);
// zig1 chooses to interpret values by name. The tradeoff is as follows:
//
// * We lose a small amount of performance. This is essentially irrelevant for zig1.
//
// * We lose the ability to perform trivial renames on certain `std.builtin` types without
// zig1.wasm updates. For instance, we cannot rename an enum from PascalCase fields to
// snake_case fields without an update.
//
// * We gain the ability to add and remove fields to and from `std.builtin` types without
// zig1.wasm updates. For instance, we can add a new tag to `CallingConvention` without
// an update.
//
// Because field renames only happen when we apply a breaking change to the language (which
// is becoming progressively rarer), but tags may be added to or removed from target-dependent
// types over time in response to new targets coming into use, we gain more than we lose here.
exe_options.addOption(ValueInterpretMode, "value_interpret_mode", .by_name);
const run_opt = b.addSystemCommand(&.{
"wasm-opt",
"-Oz",

View file

@ -2713,8 +2713,18 @@ fn analyzeValueAsCallconv(
src: LazySrcLoc,
unresolved_val: Value,
) !std.builtin.CallingConvention {
return interpretBuiltinType(sema, block, src, unresolved_val, std.builtin.CallingConvention);
}
fn interpretBuiltinType(
sema: *Sema,
block: *Block,
src: LazySrcLoc,
unresolved_val: Value,
comptime T: type,
) !T {
const resolved_val = try sema.resolveLazyValue(unresolved_val);
return resolved_val.interpret(std.builtin.CallingConvention, sema.pt) catch |err| switch (err) {
return resolved_val.interpret(T, sema.pt) catch |err| switch (err) {
error.OutOfMemory => |e| return e,
error.UndefinedValue => return sema.failWithUseOfUndef(block, src),
error.TypeMismatch => @panic("std.builtin is corrupt"),
@ -21536,19 +21546,8 @@ fn zirReify(
.@"anyframe" => return sema.failWithUseOfAsync(block, src),
.enum_literal => return .enum_literal_type,
.int => {
const struct_type = ip.loadStructType(ip.typeOf(union_val.val));
const signedness_val = try Value.fromInterned(union_val.val).fieldValue(
pt,
struct_type.nameIndex(ip, try ip.getOrPutString(gpa, pt.tid, "signedness", .no_embedded_nulls)).?,
);
const bits_val = try Value.fromInterned(union_val.val).fieldValue(
pt,
struct_type.nameIndex(ip, try ip.getOrPutString(gpa, pt.tid, "bits", .no_embedded_nulls)).?,
);
const signedness = zcu.toEnum(std.builtin.Signedness, signedness_val);
const bits: u16 = @intCast(try bits_val.toUnsignedIntSema(pt));
const ty = try pt.intType(signedness, bits);
const int = try sema.interpretBuiltinType(block, operand_src, .fromInterned(union_val.val), std.builtin.Type.Int);
const ty = try pt.intType(int.signedness, int.bits);
return Air.internedToRef(ty.toIntern());
},
.vector => {
@ -21574,20 +21573,15 @@ fn zirReify(
return Air.internedToRef(ty.toIntern());
},
.float => {
const struct_type = ip.loadStructType(ip.typeOf(union_val.val));
const bits_val = try Value.fromInterned(union_val.val).fieldValue(pt, struct_type.nameIndex(
ip,
try ip.getOrPutString(gpa, pt.tid, "bits", .no_embedded_nulls),
).?);
const float = try sema.interpretBuiltinType(block, operand_src, .fromInterned(union_val.val), std.builtin.Type.Float);
const bits: u16 = @intCast(try bits_val.toUnsignedIntSema(pt));
const ty = switch (bits) {
const ty = switch (float.bits) {
16 => Type.f16,
32 => Type.f32,
64 => Type.f64,
80 => Type.f80,
128 => Type.f128,
else => return sema.fail(block, src, "{}-bit float unsupported", .{bits}),
else => return sema.fail(block, src, "{}-bit float unsupported", .{float.bits}),
};
return Air.internedToRef(ty.toIntern());
},
@ -21641,7 +21635,7 @@ fn zirReify(
try elem_ty.resolveLayout(pt);
}
const ptr_size = zcu.toEnum(std.builtin.Type.Pointer.Size, size_val);
const ptr_size = try sema.interpretBuiltinType(block, operand_src, size_val, std.builtin.Type.Pointer.Size);
const actual_sentinel: InternPool.Index = s: {
if (!sentinel_val.isNull(zcu)) {
@ -21691,7 +21685,7 @@ fn zirReify(
.is_const = is_const_val.toBool(),
.is_volatile = is_volatile_val.toBool(),
.alignment = abi_align,
.address_space = zcu.toEnum(std.builtin.AddressSpace, address_space_val),
.address_space = try sema.interpretBuiltinType(block, operand_src, address_space_val, std.builtin.AddressSpace),
.is_allowzero = is_allowzero_val.toBool(),
},
});
@ -21813,7 +21807,7 @@ fn zirReify(
try ip.getOrPutString(gpa, pt.tid, "is_tuple", .no_embedded_nulls),
).?);
const layout = zcu.toEnum(std.builtin.Type.ContainerLayout, layout_val);
const layout = try sema.interpretBuiltinType(block, operand_src, layout_val, std.builtin.Type.ContainerLayout);
// Decls
if (try decls_val.sliceLen(pt) > 0) {
@ -21929,7 +21923,7 @@ fn zirReify(
if (try decls_val.sliceLen(pt) > 0) {
return sema.fail(block, src, "reified unions must have no decls", .{});
}
const layout = zcu.toEnum(std.builtin.Type.ContainerLayout, layout_val);
const layout = try sema.interpretBuiltinType(block, operand_src, layout_val, std.builtin.Type.ContainerLayout);
const fields_arr = try sema.derefSliceAsArray(block, operand_src, fields_val, .{ .simple = .union_fields });
@ -24456,7 +24450,7 @@ fn resolveExportOptions(
const linkage_operand = try sema.fieldVal(block, src, options, try ip.getOrPutString(gpa, pt.tid, "linkage", .no_embedded_nulls), linkage_src);
const linkage_val = try sema.resolveConstDefinedValue(block, linkage_src, linkage_operand, .{ .simple = .export_options });
const linkage = zcu.toEnum(std.builtin.GlobalLinkage, linkage_val);
const linkage = try sema.interpretBuiltinType(block, linkage_src, linkage_val, std.builtin.GlobalLinkage);
const section_operand = try sema.fieldVal(block, src, options, try ip.getOrPutString(gpa, pt.tid, "section", .no_embedded_nulls), section_src);
const section_opt_val = try sema.resolveConstDefinedValue(block, section_src, section_operand, .{ .simple = .export_options });
@ -24467,7 +24461,7 @@ fn resolveExportOptions(
const visibility_operand = try sema.fieldVal(block, src, options, try ip.getOrPutString(gpa, pt.tid, "visibility", .no_embedded_nulls), visibility_src);
const visibility_val = try sema.resolveConstDefinedValue(block, visibility_src, visibility_operand, .{ .simple = .export_options });
const visibility = zcu.toEnum(std.builtin.SymbolVisibility, visibility_val);
const visibility = try sema.interpretBuiltinType(block, visibility_src, visibility_val, std.builtin.SymbolVisibility);
if (name.len < 1) {
return sema.fail(block, name_src, "exported symbol name cannot be empty", .{});
@ -24495,12 +24489,11 @@ fn resolveBuiltinEnum(
comptime name: Zcu.BuiltinDecl,
reason: ComptimeReason,
) CompileError!@field(std.builtin, @tagName(name)) {
const pt = sema.pt;
const ty = try sema.getBuiltinType(src, name);
const air_ref = try sema.resolveInst(zir_ref);
const coerced = try sema.coerce(block, ty, air_ref, src);
const val = try sema.resolveConstDefinedValue(block, src, coerced, reason);
return pt.zcu.toEnum(@field(std.builtin, @tagName(name)), val);
return sema.interpretBuiltinType(block, src, val, @field(std.builtin, @tagName(name)));
}
fn resolveAtomicOrder(
@ -25293,7 +25286,7 @@ fn zirBuiltinCall(sema: *Sema, block: *Block, inst: Zir.Inst.Index) CompileError
const air_ref = try sema.resolveInst(extra.modifier);
const modifier_ref = try sema.coerce(block, modifier_ty, air_ref, modifier_src);
const modifier_val = try sema.resolveConstDefinedValue(block, modifier_src, modifier_ref, .{ .simple = .call_modifier });
var modifier = zcu.toEnum(std.builtin.CallModifier, modifier_val);
var modifier = try sema.interpretBuiltinType(block, modifier_src, modifier_val, std.builtin.CallModifier);
switch (modifier) {
// These can be upgraded to comptime or nosuspend calls.
.auto, .never_tail, .no_async => {
@ -26468,9 +26461,9 @@ fn resolvePrefetchOptions(
const cache_val = try sema.resolveConstDefinedValue(block, cache_src, cache, .{ .simple = .prefetch_options });
return std.builtin.PrefetchOptions{
.rw = zcu.toEnum(std.builtin.PrefetchOptions.Rw, rw_val),
.rw = try sema.interpretBuiltinType(block, rw_src, rw_val, std.builtin.PrefetchOptions.Rw),
.locality = @intCast(try locality_val.toUnsignedIntSema(pt)),
.cache = zcu.toEnum(std.builtin.PrefetchOptions.Cache, cache_val),
.cache = try sema.interpretBuiltinType(block, cache_src, cache_val, std.builtin.PrefetchOptions.Cache),
};
}
@ -26536,7 +26529,7 @@ fn resolveExternOptions(
const linkage_ref = try sema.fieldVal(block, src, options, try ip.getOrPutString(gpa, pt.tid, "linkage", .no_embedded_nulls), linkage_src);
const linkage_val = try sema.resolveConstDefinedValue(block, linkage_src, linkage_ref, .{ .simple = .extern_options });
const linkage = zcu.toEnum(std.builtin.GlobalLinkage, linkage_val);
const linkage = try sema.interpretBuiltinType(block, linkage_src, linkage_val, std.builtin.GlobalLinkage);
const is_thread_local = try sema.fieldVal(block, src, options, try ip.getOrPutString(gpa, pt.tid, "is_thread_local", .no_embedded_nulls), thread_local_src);
const is_thread_local_val = try sema.resolveConstDefinedValue(block, thread_local_src, is_thread_local, .{ .simple = .extern_options });
@ -26770,9 +26763,6 @@ fn zirInplaceArithResultTy(sema: *Sema, extended: Zir.Inst.Extended.InstData) Co
}
fn zirBranchHint(sema: *Sema, block: *Block, extended: Zir.Inst.Extended.InstData) CompileError!void {
const pt = sema.pt;
const zcu = pt.zcu;
const extra = sema.code.extraData(Zir.Inst.UnNode, extended.operand).data;
const uncoerced_hint = try sema.resolveInst(extra.operand);
const operand_src = block.builtinCallArgSrc(extra.node, 0);
@ -26784,7 +26774,7 @@ fn zirBranchHint(sema: *Sema, block: *Block, extended: Zir.Inst.Extended.InstDat
// We only apply the first hint in a branch.
// This allows user-provided hints to override implicit cold hints.
if (sema.branch_hint == null) {
sema.branch_hint = zcu.toEnum(std.builtin.BranchHint, hint_val);
sema.branch_hint = try sema.interpretBuiltinType(block, operand_src, hint_val, std.builtin.BranchHint);
}
}
@ -37136,11 +37126,10 @@ pub fn analyzeAsAddressSpace(
ctx: AddressSpaceContext,
) !std.builtin.AddressSpace {
const pt = sema.pt;
const zcu = pt.zcu;
const addrspace_ty = try sema.getBuiltinType(src, .AddressSpace);
const coerced = try sema.coerce(block, addrspace_ty, air_ref, src);
const addrspace_val = try sema.resolveConstDefinedValue(block, src, coerced, .{ .simple = .@"addrspace" });
const address_space = zcu.toEnum(std.builtin.AddressSpace, addrspace_val);
const address_space = try sema.interpretBuiltinType(block, src, addrspace_val, std.builtin.AddressSpace);
const target = pt.zcu.getTarget();
const arch = target.cpu.arch;

View file

@ -1,5 +1,6 @@
const std = @import("std");
const builtin = @import("builtin");
const build_options = @import("build_options");
const Type = @import("Type.zig");
const assert = std.debug.assert;
const BigIntConst = std.math.big.int.Const;
@ -4531,6 +4532,20 @@ pub fn resolveLazy(
}
}
const InterpretMode = enum {
/// In this mode, types are assumed to match what the compiler was built with in terms of field
/// order, field types, etc. This improves compiler performance. However, it means that certain
/// modifications to `std.builtin` will result in compiler crashes.
direct,
/// In this mode, various details of the type are allowed to differ from what the compiler was built
/// with. Fields are matched by name rather than index; added struct fields are ignored, and removed
/// struct fields use their default value if one exists. This is slower than `.direct`, but permits
/// making certain changes to `std.builtin` (in particular reordering/adding/removing fields), so it
/// is useful when applying breaking changes.
by_name,
};
const interpret_mode: InterpretMode = @field(InterpretMode, @tagName(build_options.value_interpret_mode));
/// Given a `Value` representing a comptime-known value of type `T`, unwrap it into an actual `T` known to the compiler.
/// This is useful for accessing `std.builtin` structures received from comptime logic.
/// `val` must be fully resolved.
@ -4583,11 +4598,20 @@ pub fn interpret(val: Value, comptime T: type, pt: Zcu.PerThread) error{ OutOfMe
else
null,
.@"enum" => zcu.toEnum(T, val),
.@"enum" => switch (interpret_mode) {
.direct => {
const int = val.getUnsignedInt(zcu) orelse return error.TypeMismatch;
return std.meta.intToEnum(T, int) catch error.TypeMismatch;
},
.by_name => {
const field_index = ty.enumTagFieldIndex(val, zcu) orelse return error.TypeMismatch;
const field_name = ty.enumFieldName(field_index, zcu);
return std.meta.stringToEnum(T, field_name.toSlice(ip)) orelse error.TypeMismatch;
},
},
.@"union" => |@"union"| {
const union_obj = zcu.typeToUnion(ty) orelse return error.TypeMismatch;
if (union_obj.field_types.len != @"union".fields.len) return error.TypeMismatch;
// No need to handle `interpret_mode`, because the `.@"enum"` handling already deals with it.
const tag_val = val.unionTag(zcu) orelse return error.TypeMismatch;
const tag = try tag_val.interpret(@"union".tag_type.?, pt);
return switch (tag) {
@ -4599,14 +4623,31 @@ pub fn interpret(val: Value, comptime T: type, pt: Zcu.PerThread) error{ OutOfMe
};
},
.@"struct" => |@"struct"| {
if (ty.structFieldCount(zcu) != @"struct".fields.len) return error.TypeMismatch;
var result: T = undefined;
inline for (@"struct".fields, 0..) |field, field_idx| {
const field_val = try val.fieldValue(pt, field_idx);
@field(result, field.name) = try field_val.interpret(field.type, pt);
}
return result;
.@"struct" => |@"struct"| switch (interpret_mode) {
.direct => {
if (ty.structFieldCount(zcu) != @"struct".fields.len) return error.TypeMismatch;
var result: T = undefined;
inline for (@"struct".fields, 0..) |field, field_idx| {
const field_val = try val.fieldValue(pt, field_idx);
@field(result, field.name) = try field_val.interpret(field.type, pt);
}
return result;
},
.by_name => {
const struct_obj = zcu.typeToStruct(ty) orelse return error.TypeMismatch;
var result: T = undefined;
inline for (@"struct".fields) |field| {
const field_name_ip = try ip.getOrPutString(zcu.gpa, pt.tid, field.name, .no_embedded_nulls);
@field(result, field.name) = if (struct_obj.nameIndex(ip, field_name_ip)) |field_idx| f: {
const field_val = try val.fieldValue(pt, field_idx);
break :f try field_val.interpret(field.type, pt);
} else if (field.default_value) |ptr| f: {
const typed_ptr: *const field.type = @ptrCast(@alignCast(ptr));
break :f typed_ptr.*;
} else return error.TypeMismatch;
}
return result;
},
},
};
}
@ -4618,6 +4659,7 @@ pub fn uninterpret(val: anytype, ty: Type, pt: Zcu.PerThread) error{ OutOfMemory
const T = @TypeOf(val);
const zcu = pt.zcu;
const ip = &zcu.intern_pool;
if (ty.zigTypeTag(zcu) != @typeInfo(T)) return error.TypeMismatch;
return switch (@typeInfo(T)) {
@ -4657,9 +4699,17 @@ pub fn uninterpret(val: anytype, ty: Type, pt: Zcu.PerThread) error{ OutOfMemory
else
try pt.nullValue(ty),
.@"enum" => try pt.enumValue(ty, (try uninterpret(@intFromEnum(val), ty.intTagType(zcu), pt)).toIntern()),
.@"enum" => switch (interpret_mode) {
.direct => try pt.enumValue(ty, (try uninterpret(@intFromEnum(val), ty.intTagType(zcu), pt)).toIntern()),
.by_name => {
const field_name_ip = try ip.getOrPutString(zcu.gpa, pt.tid, @tagName(val), .no_embedded_nulls);
const field_idx = ty.enumFieldIndex(field_name_ip, zcu) orelse return error.TypeMismatch;
return pt.enumValueFieldIndex(ty, field_idx);
},
},
.@"union" => |@"union"| {
// No need to handle `interpret_mode`, because the `.@"enum"` handling already deals with it.
const tag: @"union".tag_type.? = val;
const tag_val = try uninterpret(tag, ty.unionTagType(zcu).?, pt);
const field_ty = ty.unionFieldType(tag_val, zcu) orelse return error.TypeMismatch;
@ -4672,17 +4722,44 @@ pub fn uninterpret(val: anytype, ty: Type, pt: Zcu.PerThread) error{ OutOfMemory
};
},
.@"struct" => |@"struct"| {
if (ty.structFieldCount(zcu) != @"struct".fields.len) return error.TypeMismatch;
var field_vals: [@"struct".fields.len]InternPool.Index = undefined;
inline for (&field_vals, @"struct".fields, 0..) |*field_val, field, field_idx| {
const field_ty = ty.fieldType(field_idx, zcu);
field_val.* = (try uninterpret(@field(val, field.name), field_ty, pt)).toIntern();
}
return .fromInterned(try pt.intern(.{ .aggregate = .{
.ty = ty.toIntern(),
.storage = .{ .elems = &field_vals },
} }));
.@"struct" => |@"struct"| switch (interpret_mode) {
.direct => {
if (ty.structFieldCount(zcu) != @"struct".fields.len) return error.TypeMismatch;
var field_vals: [@"struct".fields.len]InternPool.Index = undefined;
inline for (&field_vals, @"struct".fields, 0..) |*field_val, field, field_idx| {
const field_ty = ty.fieldType(field_idx, zcu);
field_val.* = (try uninterpret(@field(val, field.name), field_ty, pt)).toIntern();
}
return .fromInterned(try pt.intern(.{ .aggregate = .{
.ty = ty.toIntern(),
.storage = .{ .elems = &field_vals },
} }));
},
.by_name => {
const struct_obj = zcu.typeToStruct(ty) orelse return error.TypeMismatch;
const want_fields_len = struct_obj.field_types.len;
const field_vals = try zcu.gpa.alloc(InternPool.Index, want_fields_len);
defer zcu.gpa.free(field_vals);
@memset(field_vals, .none);
inline for (@"struct".fields) |field| {
const field_name_ip = try ip.getOrPutString(zcu.gpa, pt.tid, field.name, .no_embedded_nulls);
if (struct_obj.nameIndex(ip, field_name_ip)) |field_idx| {
const field_ty = ty.fieldType(field_idx, zcu);
field_vals[field_idx] = (try uninterpret(@field(val, field.name), field_ty, pt)).toIntern();
}
}
for (field_vals, 0..) |*field_val, field_idx| {
if (field_val.* == .none) {
const default_init = struct_obj.field_inits.get(ip)[field_idx];
if (default_init == .none) return error.TypeMismatch;
field_val.* = default_init;
}
}
return .fromInterned(try pt.intern(.{ .aggregate = .{
.ty = ty.toIntern(),
.storage = .{ .elems = field_vals },
} }));
},
},
};
}

View file

@ -3486,10 +3486,6 @@ pub fn funcInfo(zcu: *const Zcu, func_index: InternPool.Index) InternPool.Key.Fu
return zcu.intern_pool.toFunc(func_index);
}
pub fn toEnum(zcu: *const Zcu, comptime E: type, val: Value) E {
return zcu.intern_pool.toEnum(E, val.toIntern());
}
pub const UnionLayout = struct {
abi_size: u64,
abi_align: Alignment,

View file

@ -13,3 +13,4 @@ pub const value_tracing = false;
pub const skip_non_native = false;
pub const force_gpa = false;
pub const dev = .core;
pub const value_interpret_mode = .direct;