AstGen: handle ty result location for struct and array init correctly

Well, this was a journey!

The original issue I was trying to fix is covered by the new behavior
test in array.zig: in essence, `ty` and `coerced_ty` result locations
were not correctly propagated.

While fixing this, I noticed a similar bug in struct inits: the type was
propagated to *fields* fine, but the actual struct init was
unnecessarily anonymous, which could lead to unnecessary copies. Note
that the behavior test added in struct.zig was already passing - the bug
here didn't change any easy-to-test behavior - but I figured I'd add it
anyway.

This is a little harder than it seems, because the result type may not
itself be an array/struct type: it could be an optional / error union
wrapper. A new ZIR instruction is introduced to unwrap these.

This is also made a little tricky by the fact that it's possible for
result types to be unknown at the time of semantic analysis (due to
`anytype` parameters), leading to generic poison. In these cases, we
must essentially downgrade to an anonymous initialization.

Fixing these issues exposed *another* bug, related to type resolution in
Sema. That issue is now tracked by #16603. As a temporary workaround for
this bug, a few result locations for builtin function operands have been
disabled in AstGen. This is technically a breaking change, but it's very
minor: I doubt it'll cause any breakage in the wild.
This commit is contained in:
mlugg 2023-07-29 06:03:51 +01:00
parent 0461a64a93
commit 6917a8c258
No known key found for this signature in database
GPG key ID: 58978E823BDE3EF9
7 changed files with 192 additions and 30 deletions

View file

@ -1509,9 +1509,11 @@ fn arrayInitExpr(
const tag: Zir.Inst.Tag = if (types.array != .none) .array_init else .array_init_anon;
return arrayInitExprInner(gz, scope, node, array_init.ast.elements, types.array, types.elem, tag);
},
.ty, .coerced_ty => {
const tag: Zir.Inst.Tag = if (types.array != .none) .array_init else .array_init_anon;
const result = try arrayInitExprInner(gz, scope, node, array_init.ast.elements, types.array, types.elem, tag);
.ty, .coerced_ty => |ty_inst| {
const arr_ty = if (types.array != .none) types.array else blk: {
break :blk try gz.addUnNode(.opt_eu_base_ty, ty_inst, node);
};
const result = try arrayInitExprInner(gz, scope, node, array_init.ast.elements, arr_ty, types.elem, .array_init);
return rvalue(gz, ri, result, node);
},
.ptr => |ptr_res| {
@ -1748,7 +1750,9 @@ fn structInitExpr(
},
.ty, .coerced_ty => |ty_inst| {
if (struct_init.ast.type_expr == 0) {
const result = try structInitExprRlNone(gz, scope, node, struct_init, ty_inst, .struct_init_anon);
const struct_ty_inst = try gz.addUnNode(.opt_eu_base_ty, ty_inst, node);
_ = try gz.addUnNode(.validate_struct_init_ty, struct_ty_inst, node);
const result = try structInitExprRlTy(gz, scope, node, struct_init, struct_ty_inst, .struct_init);
return rvalue(gz, ri, result, node);
}
const inner_ty_inst = try typeExpr(gz, scope, struct_init.ast.type_expr);
@ -2743,6 +2747,7 @@ fn addEnsureResult(gz: *GenZir, maybe_unused_result: Zir.Inst.Ref, statement: As
.for_len,
.@"try",
.try_ptr,
.opt_eu_base_ty,
=> break :b false,
.extended => switch (gz.astgen.instructions.items(.data)[inst].extended.opcode) {
@ -8314,7 +8319,10 @@ fn builtinCall(
local_val.used = ident_token;
_ = try gz.addPlNode(.export_value, node, Zir.Inst.ExportValue{
.operand = local_val.inst,
.options = try comptimeExpr(gz, scope, .{ .rl = .{ .coerced_ty = .export_options_type } }, params[1]),
// TODO: the result location here should be `.{ .coerced_ty = .export_options_type }`, but
// that currently hits assertions in Sema due to type resolution issues.
// See #16603
.options = try comptimeExpr(gz, scope, .{ .rl = .none }, params[1]),
});
return rvalue(gz, ri, .void_value, node);
}
@ -8329,7 +8337,10 @@ fn builtinCall(
const loaded = try gz.addUnNode(.load, local_ptr.ptr, node);
_ = try gz.addPlNode(.export_value, node, Zir.Inst.ExportValue{
.operand = loaded,
.options = try comptimeExpr(gz, scope, .{ .rl = .{ .coerced_ty = .export_options_type } }, params[1]),
// TODO: the result location here should be `.{ .coerced_ty = .export_options_type }`, but
// that currently hits assertions in Sema due to type resolution issues.
// See #16603
.options = try comptimeExpr(gz, scope, .{ .rl = .none }, params[1]),
});
return rvalue(gz, ri, .void_value, node);
}
@ -8363,7 +8374,10 @@ fn builtinCall(
},
else => return astgen.failNode(params[0], "symbol to export must identify a declaration", .{}),
}
const options = try comptimeExpr(gz, scope, .{ .rl = .{ .ty = .export_options_type } }, params[1]);
// TODO: the result location here should be `.{ .coerced_ty = .export_options_type }`, but
// that currently hits assertions in Sema due to type resolution issues.
// See #16603
const options = try comptimeExpr(gz, scope, .{ .rl = .none }, params[1]);
_ = try gz.addPlNode(.@"export", node, Zir.Inst.Export{
.namespace = namespace,
.decl_name = decl_name,
@ -8373,7 +8387,10 @@ fn builtinCall(
},
.@"extern" => {
const type_inst = try typeExpr(gz, scope, params[0]);
const options = try comptimeExpr(gz, scope, .{ .rl = .{ .ty = .extern_options_type } }, params[1]);
// TODO: the result location here should be `.{ .coerced_ty = .extern_options_type }`, but
// that currently hits assertions in Sema due to type resolution issues.
// See #16603
const options = try comptimeExpr(gz, scope, .{ .rl = .none }, params[1]);
const result = try gz.addExtendedPayload(.builtin_extern, Zir.Inst.BinNode{
.node = gz.nodeIndexToRelative(node),
.lhs = type_inst,
@ -8477,7 +8494,10 @@ fn builtinCall(
// zig fmt: on
.Type => {
const operand = try expr(gz, scope, .{ .rl = .{ .coerced_ty = .type_info_type } }, params[0]);
// TODO: the result location here should be `.{ .coerced_ty = .type_info_type }`, but
// that currently hits assertions in Sema due to type resolution issues.
// See #16603
const operand = try expr(gz, scope, .{ .rl = .none }, params[0]);
const gpa = gz.astgen.gpa;
@ -8755,7 +8775,10 @@ fn builtinCall(
},
.prefetch => {
const ptr = try expr(gz, scope, .{ .rl = .none }, params[0]);
const options = try comptimeExpr(gz, scope, .{ .rl = .{ .ty = .prefetch_options_type } }, params[1]);
// TODO: the result location here should be `.{ .coerced_ty = .preftech_options_type }`, but
// that currently hits assertions in Sema due to type resolution issues.
// See #16603
const options = try comptimeExpr(gz, scope, .{ .rl = .none }, params[1]);
_ = try gz.addExtendedPayload(.prefetch, Zir.Inst.BinNode{
.node = gz.nodeIndexToRelative(node),
.lhs = ptr,

View file

@ -1125,6 +1125,7 @@ fn analyzeBodyInner(
.array_base_ptr => try sema.zirArrayBasePtr(block, inst),
.field_base_ptr => try sema.zirFieldBasePtr(block, inst),
.for_len => try sema.zirForLen(block, inst),
.opt_eu_base_ty => try sema.zirOptEuBaseTy(block, inst),
.clz => try sema.zirBitCount(block, inst, .clz, Value.clz),
.ctz => try sema.zirBitCount(block, inst, .ctz, Value.ctz),
@ -1359,12 +1360,12 @@ fn analyzeBodyInner(
continue;
},
.validate_array_init_ty => {
try sema.validateArrayInitTy(block, inst);
try sema.zirValidateArrayInitTy(block, inst);
i += 1;
continue;
},
.validate_struct_init_ty => {
try sema.validateStructInitTy(block, inst);
try sema.zirValidateStructInitTy(block, inst);
i += 1;
continue;
},
@ -4312,7 +4313,31 @@ fn zirForLen(sema: *Sema, block: *Block, inst: Zir.Inst.Index) CompileError!Air.
return len;
}
fn validateArrayInitTy(
fn zirOptEuBaseTy(
sema: *Sema,
block: *Block,
inst: Zir.Inst.Index,
) CompileError!Air.Inst.Ref {
const mod = sema.mod;
const inst_data = sema.code.instructions.items(.data)[inst].un_node;
var ty = sema.resolveType(block, .unneeded, inst_data.operand) catch |err| switch (err) {
// Since this is a ZIR instruction that returns a type, encountering
// generic poison should not result in a failed compilation, but the
// generic poison type. This prevents unnecessary failures when
// constructing types at compile-time.
error.GenericPoison => return .generic_poison_type,
else => |e| return e,
};
while (true) {
switch (ty.zigTypeTag(mod)) {
.Optional => ty = ty.optionalChild(mod),
.ErrorUnion => ty = ty.errorUnionPayload(mod),
else => return sema.addType(ty),
}
}
}
fn zirValidateArrayInitTy(
sema: *Sema,
block: *Block,
inst: Zir.Inst.Index,
@ -4322,7 +4347,11 @@ fn validateArrayInitTy(
const src = inst_data.src();
const ty_src: LazySrcLoc = .{ .node_offset_init_ty = inst_data.src_node };
const extra = sema.code.extraData(Zir.Inst.ArrayInit, inst_data.payload_index).data;
const ty = try sema.resolveType(block, ty_src, extra.ty);
const ty = sema.resolveType(block, ty_src, extra.ty) catch |err| switch (err) {
// It's okay for the type to be unknown: this will result in an anonymous array init.
error.GenericPoison => return,
else => |e| return e,
};
switch (ty.zigTypeTag(mod)) {
.Array => {
@ -4358,7 +4387,7 @@ fn validateArrayInitTy(
return sema.failWithArrayInitNotSupported(block, ty_src, ty);
}
fn validateStructInitTy(
fn zirValidateStructInitTy(
sema: *Sema,
block: *Block,
inst: Zir.Inst.Index,
@ -4366,7 +4395,11 @@ fn validateStructInitTy(
const mod = sema.mod;
const inst_data = sema.code.instructions.items(.data)[inst].un_node;
const src = inst_data.src();
const ty = try sema.resolveType(block, src, inst_data.operand);
const ty = sema.resolveType(block, src, inst_data.operand) catch |err| switch (err) {
// It's okay for the type to be unknown: this will result in an anonymous struct init.
error.GenericPoison => return,
else => |e| return e,
};
switch (ty.zigTypeTag(mod)) {
.Struct, .Union => return,
@ -7744,7 +7777,15 @@ fn zirOptionalType(sema: *Sema, block: *Block, inst: Zir.Inst.Index) CompileErro
fn zirElemTypeIndex(sema: *Sema, block: *Block, inst: Zir.Inst.Index) CompileError!Air.Inst.Ref {
const mod = sema.mod;
const bin = sema.code.instructions.items(.data)[inst].bin;
const indexable_ty = try sema.resolveType(block, .unneeded, bin.lhs);
const operand = sema.resolveType(block, .unneeded, bin.lhs) catch |err| switch (err) {
// Since this is a ZIR instruction that returns a type, encountering
// generic poison should not result in a failed compilation, but the
// generic poison type. This prevents unnecessary failures when
// constructing types at compile-time.
error.GenericPoison => return .generic_poison_type,
else => |e| return e,
};
const indexable_ty = try sema.resolveTypeFields(operand);
assert(indexable_ty.isIndexable(mod)); // validated by a previous instruction
if (indexable_ty.zigTypeTag(mod) == .Struct) {
const elem_type = indexable_ty.structFieldType(@intFromEnum(bin.rhs), mod);
@ -18794,7 +18835,13 @@ fn zirStructInit(
const first_item = sema.code.extraData(Zir.Inst.StructInit.Item, extra.end).data;
const first_field_type_data = zir_datas[first_item.field_type].pl_node;
const first_field_type_extra = sema.code.extraData(Zir.Inst.FieldType, first_field_type_data.payload_index).data;
const resolved_ty = try sema.resolveType(block, src, first_field_type_extra.container_type);
const resolved_ty = sema.resolveType(block, src, first_field_type_extra.container_type) catch |err| switch (err) {
error.GenericPoison => {
// The type wasn't actually known, so treat this as an anon struct init.
return sema.structInitAnon(block, src, .typed_init, extra.data, extra.end, is_ref);
},
else => |e| return e,
};
try sema.resolveTypeLayout(resolved_ty);
if (resolved_ty.zigTypeTag(mod) == .Struct) {
@ -19037,26 +19084,57 @@ fn zirStructInitAnon(
inst: Zir.Inst.Index,
is_ref: bool,
) CompileError!Air.Inst.Ref {
const mod = sema.mod;
const gpa = sema.gpa;
const inst_data = sema.code.instructions.items(.data)[inst].pl_node;
const src = inst_data.src();
const extra = sema.code.extraData(Zir.Inst.StructInitAnon, inst_data.payload_index);
const types = try sema.arena.alloc(InternPool.Index, extra.data.fields_len);
return sema.structInitAnon(block, src, .anon_init, extra.data, extra.end, is_ref);
}
fn structInitAnon(
sema: *Sema,
block: *Block,
src: LazySrcLoc,
/// It is possible for a typed struct_init to be downgraded to an anonymous init due to a
/// generic poison type. In this case, we need to know to interpret the extra data differently.
comptime kind: enum { anon_init, typed_init },
extra_data: switch (kind) {
.anon_init => Zir.Inst.StructInitAnon,
.typed_init => Zir.Inst.StructInit,
},
extra_end: usize,
is_ref: bool,
) CompileError!Air.Inst.Ref {
const mod = sema.mod;
const gpa = sema.gpa;
const zir_datas = sema.code.instructions.items(.data);
const types = try sema.arena.alloc(InternPool.Index, extra_data.fields_len);
const values = try sema.arena.alloc(InternPool.Index, types.len);
var fields = std.AutoArrayHashMap(InternPool.NullTerminatedString, u32).init(sema.arena);
try fields.ensureUnusedCapacity(types.len);
// Find which field forces the expression to be runtime, if any.
const opt_runtime_index = rs: {
var runtime_index: ?usize = null;
var extra_index = extra.end;
var extra_index = extra_end;
for (types, 0..) |*field_ty, i_usize| {
const i = @as(u32, @intCast(i_usize));
const item = sema.code.extraData(Zir.Inst.StructInitAnon.Item, extra_index);
const i: u32 = @intCast(i_usize);
const item = switch (kind) {
.anon_init => sema.code.extraData(Zir.Inst.StructInitAnon.Item, extra_index),
.typed_init => sema.code.extraData(Zir.Inst.StructInit.Item, extra_index),
};
extra_index = item.end;
const name = sema.code.nullTerminatedString(item.data.field_name);
const name = switch (kind) {
.anon_init => sema.code.nullTerminatedString(item.data.field_name),
.typed_init => name: {
// `item.data.field_type` references a `field_type` instruction
const field_type_data = zir_datas[item.data.field_type].pl_node;
const field_type_extra = sema.code.extraData(Zir.Inst.FieldType, field_type_data.payload_index);
break :name sema.code.nullTerminatedString(field_type_extra.data.name_start);
},
};
const name_ip = try mod.intern_pool.getOrPutString(gpa, name);
const gop = fields.getOrPutAssumeCapacity(name_ip);
if (gop.found_existing) {
@ -19129,10 +19207,13 @@ fn zirStructInitAnon(
.flags = .{ .address_space = target_util.defaultAddressSpace(target, .local) },
});
const alloc = try block.addTy(.alloc, alloc_ty);
var extra_index = extra.end;
var extra_index = extra_end;
for (types, 0..) |field_ty, i_usize| {
const i = @as(u32, @intCast(i_usize));
const item = sema.code.extraData(Zir.Inst.StructInitAnon.Item, extra_index);
const item = switch (kind) {
.anon_init => sema.code.extraData(Zir.Inst.StructInitAnon.Item, extra_index),
.typed_init => sema.code.extraData(Zir.Inst.StructInit.Item, extra_index),
};
extra_index = item.end;
const field_ptr_ty = try mod.ptrType(.{
@ -19150,9 +19231,12 @@ fn zirStructInitAnon(
}
const element_refs = try sema.arena.alloc(Air.Inst.Ref, types.len);
var extra_index = extra.end;
var extra_index = extra_end;
for (types, 0..) |_, i| {
const item = sema.code.extraData(Zir.Inst.StructInitAnon.Item, extra_index);
const item = switch (kind) {
.anon_init => sema.code.extraData(Zir.Inst.StructInitAnon.Item, extra_index),
.typed_init => sema.code.extraData(Zir.Inst.StructInit.Item, extra_index),
};
extra_index = item.end;
element_refs[i] = try sema.resolveInst(item.data.init);
}
@ -19175,7 +19259,13 @@ fn zirArrayInit(
const args = sema.code.refSlice(extra.end, extra.data.operands_len);
assert(args.len >= 2); // array_ty + at least one element
const array_ty = try sema.resolveType(block, src, args[0]);
const array_ty = sema.resolveType(block, src, args[0]) catch |err| switch (err) {
error.GenericPoison => {
// The type wasn't actually known, so treat this as an anon array init.
return sema.arrayInitAnon(block, src, args[1..], is_ref);
},
else => |e| return e,
};
const sentinel_val = array_ty.sentinel(mod);
const resolved_args = try gpa.alloc(Air.Inst.Ref, args.len - 1 + @intFromBool(sentinel_val != null));
@ -19283,6 +19373,16 @@ fn zirArrayInitAnon(
const src = inst_data.src();
const extra = sema.code.extraData(Zir.Inst.MultiOp, inst_data.payload_index);
const operands = sema.code.refSlice(extra.end, extra.data.operands_len);
return sema.arrayInitAnon(block, src, operands, is_ref);
}
fn arrayInitAnon(
sema: *Sema,
block: *Block,
src: LazySrcLoc,
operands: []const Zir.Inst.Ref,
is_ref: bool,
) CompileError!Air.Inst.Ref {
const mod = sema.mod;
const types = try sema.arena.alloc(InternPool.Index, operands.len);

View file

@ -700,10 +700,16 @@ pub const Inst = struct {
/// *?S returns *S
/// Uses the `un_node` field.
field_base_ptr,
/// Given a type, strips all optional and error union types wrapping it.
/// e.g. `E!?u32` becomes `u32`, `[]u8` becomes `[]u8`.
/// Uses the `un_node` field.
opt_eu_base_ty,
/// Checks that the type supports array init syntax.
/// Returns the underlying indexable type (since the given type may be e.g. an optional).
/// Uses the `un_node` field.
validate_array_init_ty,
/// Checks that the type supports struct init syntax.
/// Returns the underlying struct type (since the given type may be e.g. an optional).
/// Uses the `un_node` field.
validate_struct_init_ty,
/// Given a set of `field_ptr` instructions, assumes they are all part of a struct
@ -1234,6 +1240,7 @@ pub const Inst = struct {
.save_err_ret_index,
.restore_err_ret_index,
.for_len,
.opt_eu_base_ty,
=> false,
.@"break",
@ -1522,6 +1529,7 @@ pub const Inst = struct {
.for_len,
.@"try",
.try_ptr,
.opt_eu_base_ty,
=> false,
.extended => switch (data.extended.opcode) {
@ -1676,6 +1684,7 @@ pub const Inst = struct {
.switch_block_ref = .pl_node,
.array_base_ptr = .un_node,
.field_base_ptr = .un_node,
.opt_eu_base_ty = .un_node,
.validate_array_init_ty = .pl_node,
.validate_struct_init_ty = .un_node,
.validate_struct_init = .pl_node,

View file

@ -229,6 +229,7 @@ const Writer = struct {
.make_ptr_const,
.validate_deref,
.check_comptime_control_flow,
.opt_eu_base_ty,
=> try self.writeUnNode(stream, inst),
.ref,

View file

@ -3039,6 +3039,7 @@ pub const Type = struct {
return switch (mod.intern_pool.indexToKey(ty.toIntern())) {
.struct_type => |struct_type| {
const struct_obj = mod.structPtrUnwrap(struct_type.index).?;
assert(struct_obj.haveFieldTypes());
return struct_obj.fields.values()[index].ty;
},
.union_type => |union_type| {

View file

@ -761,3 +761,17 @@ test "slicing array of zero-sized values" {
for (arr[0..]) |zero|
try expect(zero == 0);
}
test "array init with no result pointer sets field result types" {
const S = struct {
// A function parameter has a result type, but no result pointer.
fn f(arr: [1]u32) u32 {
return arr[0];
}
};
const x: u64 = 123;
const y = S.f(.{@intCast(x)});
try expect(y == x);
}

View file

@ -1724,3 +1724,17 @@ test "packed struct field in anonymous struct" {
fn countFields(v: anytype) usize {
return @typeInfo(@TypeOf(v)).Struct.fields.len;
}
test "struct init with no result pointer sets field result types" {
const S = struct {
// A function parameter has a result type, but no result pointer.
fn f(s: struct { x: u32 }) u32 {
return s.x;
}
};
const x: u64 = 123;
const y = S.f(.{ .x = @intCast(x) });
try expect(y == x);
}