diff --git a/lib/std/hash_map.zig b/lib/std/hash_map.zig index 83c5b7690f..2273577f17 100644 --- a/lib/std/hash_map.zig +++ b/lib/std/hash_map.zig @@ -92,27 +92,24 @@ pub fn hashString(s: []const u8) u64 { pub const StringIndexContext = struct { bytes: *const std.ArrayListUnmanaged(u8), - pub fn eql(self: @This(), a: u32, b: u32) bool { - _ = self; + pub fn eql(_: @This(), a: u32, b: u32) bool { return a == b; } - pub fn hash(self: @This(), x: u32) u64 { - const x_slice = mem.sliceTo(@as([*:0]const u8, @ptrCast(self.bytes.items.ptr)) + x, 0); - return hashString(x_slice); + pub fn hash(ctx: @This(), key: u32) u64 { + return hashString(mem.sliceTo(ctx.bytes.items[key..], 0)); } }; pub const StringIndexAdapter = struct { bytes: *const std.ArrayListUnmanaged(u8), - pub fn eql(self: @This(), a_slice: []const u8, b: u32) bool { - const b_slice = mem.sliceTo(@as([*:0]const u8, @ptrCast(self.bytes.items.ptr)) + b, 0); - return mem.eql(u8, a_slice, b_slice); + pub fn eql(ctx: @This(), a: []const u8, b: u32) bool { + return mem.eql(u8, a, mem.sliceTo(ctx.bytes.items[b..], 0)); } - pub fn hash(self: @This(), adapted_key: []const u8) u64 { - _ = self; + pub fn hash(_: @This(), adapted_key: []const u8) u64 { + assert(mem.indexOfScalar(u8, adapted_key, 0) == null); return hashString(adapted_key); } }; diff --git a/src/AstGen.zig b/src/AstGen.zig index bb2cab5c70..b080a36d4f 100644 --- a/src/AstGen.zig +++ b/src/AstGen.zig @@ -11461,6 +11461,10 @@ fn strLitAsString(astgen: *AstGen, str_lit_token: Ast.TokenIndex) !IndexSlice { const token_bytes = astgen.tree.tokenSlice(str_lit_token); try astgen.parseStrLit(str_lit_token, string_bytes, token_bytes, 0); const key: []const u8 = string_bytes.items[str_index..]; + if (std.mem.indexOfScalar(u8, key, 0)) |_| return .{ + .index = @enumFromInt(str_index), + .len = @intCast(key.len), + }; const gop = try astgen.string_table.getOrPutContextAdapted(gpa, key, StringIndexAdapter{ .bytes = string_bytes, }, StringIndexContext{ @@ -11468,7 +11472,7 @@ fn strLitAsString(astgen: *AstGen, str_lit_token: Ast.TokenIndex) !IndexSlice { }); if (gop.found_existing) { string_bytes.shrinkRetainingCapacity(str_index); - return IndexSlice{ + return .{ .index = @enumFromInt(gop.key_ptr.*), .len = @intCast(key.len), }; @@ -11478,7 +11482,7 @@ fn strLitAsString(astgen: *AstGen, str_lit_token: Ast.TokenIndex) !IndexSlice { // to lookup null terminated strings, so if we get a match, it has to // be null terminated for that to work. try string_bytes.append(gpa, 0); - return IndexSlice{ + return .{ .index = @enumFromInt(str_index), .len = @intCast(key.len), }; diff --git a/src/InternPool.zig b/src/InternPool.zig index 379a4f76c6..19be12c129 100644 --- a/src/InternPool.zig +++ b/src/InternPool.zig @@ -7985,7 +7985,8 @@ pub fn getTrailingAggregate( ) Allocator.Error!Index { try ip.items.ensureUnusedCapacity(gpa, 1); try ip.extra.ensureUnusedCapacity(gpa, @typeInfo(Bytes).Struct.fields.len); - const str: String = @enumFromInt(@intFromEnum(try getOrPutTrailingString(ip, gpa, len))); + + const str: String = @enumFromInt(ip.string_bytes.items.len - len); const adapter: KeyAdapter = .{ .intern_pool = ip }; const gop = try ip.map.getOrPutAdapted(gpa, Key{ .aggregate = .{ .ty = ty, diff --git a/src/Module.zig b/src/Module.zig index 66d6aa3fe5..2f32fa8197 100644 --- a/src/Module.zig +++ b/src/Module.zig @@ -4400,6 +4400,7 @@ fn newEmbedFile( src_loc: SrcLoc, ) !InternPool.Index { const gpa = mod.gpa; + const ip = &mod.intern_pool; const new_file = try gpa.create(EmbedFile); errdefer gpa.destroy(new_file); @@ -4414,11 +4415,11 @@ fn newEmbedFile( .mtime = actual_stat.mtime, }; const size = std.math.cast(usize, actual_stat.size) orelse return error.Overflow; - const ip = &mod.intern_pool; - const ptr = try ip.string_bytes.addManyAsSlice(gpa, size); - const actual_read = try file.readAll(ptr); + const bytes = try ip.string_bytes.addManyAsSlice(gpa, try std.math.add(usize, size, 1)); + const actual_read = try file.readAll(bytes[0..size]); if (actual_read != size) return error.UnexpectedEndOfFile; + bytes[size] = 0; const comp = mod.comp; switch (comp.cache_use) { @@ -4427,7 +4428,7 @@ fn newEmbedFile( errdefer gpa.free(copied_resolved_path); whole.cache_manifest_mutex.lock(); defer whole.cache_manifest_mutex.unlock(); - try man.addFilePostContents(copied_resolved_path, ptr, stat); + try man.addFilePostContents(copied_resolved_path, bytes[0..size], stat); }, .incremental => {}, } @@ -4437,7 +4438,7 @@ fn newEmbedFile( .sentinel = .zero_u8, .child = .u8_type, } }); - const array_val = try ip.getTrailingAggregate(gpa, array_ty, size); + const array_val = try ip.getTrailingAggregate(gpa, array_ty, bytes.len); const ptr_ty = (try mod.ptrType(.{ .child = array_ty, diff --git a/test/behavior.zig b/test/behavior.zig index 4067ffd99f..841bc0483b 100644 --- a/test/behavior.zig +++ b/test/behavior.zig @@ -127,3 +127,10 @@ test { _ = @import("behavior/export_keyword.zig"); } } + +// This bug only repros in the root file +test "deference @embedFile() of a file full of zero bytes" { + const contents = @embedFile("behavior/zero.bin").*; + try @import("std").testing.expect(contents.len == 456); + for (contents) |byte| try @import("std").testing.expect(byte == 0); +} diff --git a/test/behavior/zero.bin b/test/behavior/zero.bin new file mode 100644 index 0000000000..e6fd1dbde8 Binary files /dev/null and b/test/behavior/zero.bin differ