fix(cache): handle missing cache hits when chaining two run steps

fixes #19817

This improves the efficiency of the cache when chaining muliple commands
like

    const step1 = b.addRunArtifact(tool_fast);
    step1.addFileArg(b.path("src/input.c"));
    const output1 = step1.addOutputFileArg("output1.h");

    const step2 = b.addRunArtifact(tool_slow);
    step2.addFileArg(output1);
    const chained_output = step2.addOutputFileArg("output2.h");

assume that step2 takes much long time than step1
if we make a change to "src/input.c" which produces an identical
"output1.h" as a previous input, one would expect step2 not to
rerun as the cached output2.h only depends on the content of output1.h

However, this does not work yet as the hash of src/input.c leaks into
the file name of the cached output1.h, which the second run step
interprets as a different cache key. Not using the ".zig-build/o/{HASH}"
part of the file name in the hash key fixes this.
This commit is contained in:
bfredl 2024-05-15 11:41:52 +02:00
parent cbfa87cbea
commit b934e4c937
4 changed files with 29 additions and 16 deletions

View file

@ -2314,6 +2314,9 @@ pub const LazyPath = union(enum) {
/// Applied after `up`.
sub_path: []const u8 = "",
/// If set, the file is hashed only on this suffix, not the full absolute path
content_hash_name: ?[]const u8 = null,
},
/// An absolute path or a path relative to the current working directory of
@ -2503,7 +2506,9 @@ pub const LazyPath = union(enum) {
}
}
return file_path.join(src_builder.allocator, gen.sub_path) catch @panic("OOM");
var item = file_path.join(src_builder.allocator, gen.sub_path) catch @panic("OOM");
item.content_hash_name = gen.content_hash_name;
return item;
},
.dependency => |dep| return .{
.root_dir = dep.dependency.builder.build_root,
@ -2543,6 +2548,7 @@ pub const LazyPath = union(enum) {
.file = gen.file,
.up = gen.up,
.sub_path = dupePathInner(allocator, gen.sub_path),
.content_hash_name = if (gen.content_hash_name) |name| Build.dupeInner(allocator, name) else null,
} },
.dependency => |dep| .{ .dependency = .{
.dependency = dep.dependency,

View file

@ -404,7 +404,7 @@ pub const Manifest = struct {
});
errdefer gpa.free(resolved_path);
const prefixed_path = try m.cache.findPrefixResolved(resolved_path);
return addFileInner(m, prefixed_path, handle, max_file_size);
return addFileInner(m, prefixed_path, handle, max_file_size, path.content_hash_name);
}
/// Deprecated; use `addFilePath`.
@ -416,10 +416,10 @@ pub const Manifest = struct {
const prefixed_path = try self.cache.findPrefix(file_path);
errdefer gpa.free(prefixed_path.sub_path);
return addFileInner(self, prefixed_path, null, max_file_size);
return addFileInner(self, prefixed_path, null, max_file_size, null);
}
fn addFileInner(self: *Manifest, prefixed_path: PrefixedPath, handle: ?fs.File, max_file_size: ?usize) usize {
fn addFileInner(self: *Manifest, prefixed_path: PrefixedPath, handle: ?fs.File, max_file_size: ?usize, content_hash_name: ?[]const u8) usize {
const gop = self.files.getOrPutAssumeCapacityAdapted(prefixed_path, FilesAdapter{});
if (gop.found_existing) {
self.cache.gpa.free(prefixed_path.sub_path);
@ -436,8 +436,13 @@ pub const Manifest = struct {
.handle = handle,
};
self.hash.add(prefixed_path.prefix);
self.hash.addBytes(prefixed_path.sub_path);
if (content_hash_name) |name| {
self.hash.add(@as(u8, '?'));
self.hash.addBytes(name);
} else {
self.hash.add(prefixed_path.prefix);
self.hash.addBytes(prefixed_path.sub_path);
}
return gop.index;
}
@ -715,15 +720,13 @@ pub const Manifest = struct {
if (file_path.len == 0) return error.InvalidFormat;
const prefixed_path: PrefixedPath = .{
.prefix = prefix,
.sub_path = file_path, // expires with file_contents
};
const cache_hash_file = f: {
const prefixed_path: PrefixedPath = .{
.prefix = prefix,
.sub_path = file_path, // expires with file_contents
};
if (idx < input_file_count) {
const file = &self.files.keys()[idx];
if (!file.prefixed_path.eql(prefixed_path))
return error.InvalidFormat;
file.stat = .{
.size = stat_size,
@ -779,11 +782,13 @@ pub const Manifest = struct {
} };
return error.CacheCheckFailed;
};
const name_match = pp.eql(prefixed_path);
const size_match = actual_stat.size == cache_hash_file.stat.size;
const mtime_match = actual_stat.mtime.nanoseconds == cache_hash_file.stat.mtime.nanoseconds;
const inode_match = actual_stat.inode == cache_hash_file.stat.inode;
if (!size_match or !mtime_match or !inode_match) {
if (!name_match or !size_match or !mtime_match or !inode_match) {
cache_hash_file.stat = .{
.size = actual_stat.size,
.mtime = actual_stat.mtime,

View file

@ -11,11 +11,13 @@ root_dir: Cache.Directory,
/// The path, relative to the root dir, that this `Path` represents.
/// Empty string means the root_dir is the path.
sub_path: []const u8 = "",
content_hash_name: ?[]const u8 = null,
pub fn clone(p: Path, arena: Allocator) Allocator.Error!Path {
return .{
.root_dir = try p.root_dir.clone(arena),
.sub_path = try arena.dupe(u8, p.sub_path),
.content_hash_name = if (p.content_hash_name) |name| try arena.dupe(u8, name) else null,
};
}

View file

@ -300,7 +300,7 @@ pub fn addPrefixedOutputFileArg(
run.setName(b.fmt("{s} ({s})", .{ run.step.name, basename }));
}
return .{ .generated = .{ .file = &output.generated_file } };
return .{ .generated = .{ .file = &output.generated_file, .content_hash_name = output.basename } };
}
/// Appends an input file to the command line arguments.
@ -890,8 +890,8 @@ fn make(step: *Step, options: Step.MakeOptions) !void {
man.hash.addBytes(bytes);
},
.lazy_path => |lazy_path| {
const file_path = lazy_path.getPath2(b, step);
_ = try man.addFile(file_path, null);
const file_path = lazy_path.getPath3(b, step);
_ = try man.addFilePath(file_path, null);
},
.none => {},
}