re-apply "Fix C include files not being in whole cache (#11365)"

This reverts commit 06310e3d4e, reapplying
commit a430630002.

I deeply apologize to @moosichu and those affected by this bug. The
original fix was actually fine. When I reverted it, I misremembered
how the Cache API works. I thought the fix was going to introduce
nondeterminism into the hash, but I forgot that the order of files in
the manifest doesn't actually matter when checking for a cache hit.

Actually, it does matter a little bit. This fix has a subtle downside
which is that it does introduce the possibility of false negatives when
checking for cache hits of 2+ iterations ago. For example, if the code
goes from "foo", to "bar", and then back to "foo", it may look like a
cache miss when it should have been a hit because 2 iterations ago the
code was the same. However, this is an uncommon use case, and all it
does is cause a bit of wasted time and disk space. That said, my
suggestion from earlier still applies and would be a nice follow-up
enhancement to this fix:

The proper solution to this is to, in whole cache mode, append the hash
inputs to some data structure, and then after the compilation is
complete, do some kind of sorting on the hash inputs so that they will
be the same order every time, then apply them in sequence. No lock on
the Cache object is needed for this scheme.

closes #11063
This commit is contained in:
Andrew Kelley 2022-10-30 16:51:13 -07:00
parent a5c96c49d0
commit 30b8b29f88
4 changed files with 23 additions and 10 deletions

View file

@ -690,7 +690,7 @@ pub const Manifest = struct {
while (true) {
switch (it.next() orelse return) {
.target, .target_must_resolve => return,
.prereq => |bytes| try self.addFilePost(bytes),
.prereq => |file_path| try self.addFilePost(file_path),
else => |err| {
try err.printError(error_buf.writer());
log.err("failed parsing {s}: {s}", .{ dep_file_basename, error_buf.items });

View file

@ -48,6 +48,7 @@ bin_file: *link.File,
c_object_table: std.AutoArrayHashMapUnmanaged(*CObject, void) = .{},
/// This is a pointer to a local variable inside `update()`.
whole_cache_manifest: ?*Cache.Manifest = null,
whole_cache_manifest_mutex: std.Thread.Mutex = .{},
link_error_flags: link.File.ErrorFlags = .{},
@ -2199,8 +2200,8 @@ pub fn update(comp: *Compilation) !void {
// We are about to obtain this lock, so here we give other processes a chance first.
comp.bin_file.releaseLock();
comp.whole_cache_manifest = &man;
man = comp.cache_parent.obtain();
comp.whole_cache_manifest = &man;
try comp.addNonIncrementalStuffToCacheManifest(&man);
const is_hit = man.hit() catch |err| {
@ -3598,6 +3599,8 @@ pub fn cImport(comp: *Compilation, c_src: []const u8) !CImportResult {
const dep_basename = std.fs.path.basename(out_dep_path);
try man.addDepFilePost(zig_cache_tmp_dir, dep_basename);
if (comp.whole_cache_manifest) |whole_cache_manifest| {
comp.whole_cache_manifest_mutex.lock();
defer comp.whole_cache_manifest_mutex.unlock();
try whole_cache_manifest.addDepFilePost(zig_cache_tmp_dir, dep_basename);
}
@ -4060,6 +4063,11 @@ fn updateCObject(comp: *Compilation, c_object: *CObject, c_obj_prog_node: *std.P
const dep_basename = std.fs.path.basename(dep_file_path);
// Add the files depended on to the cache system.
try man.addDepFilePost(zig_cache_tmp_dir, dep_basename);
if (comp.whole_cache_manifest) |whole_cache_manifest| {
comp.whole_cache_manifest_mutex.lock();
defer comp.whole_cache_manifest_mutex.unlock();
try whole_cache_manifest.addDepFilePost(zig_cache_tmp_dir, dep_basename);
}
// Just to save disk space, we delete the file because it is never needed again.
zig_cache_tmp_dir.deleteFile(dep_basename) catch |err| {
log.warn("failed to delete '{s}': {s}", .{ dep_file_path, @errorName(err) });

View file

@ -4523,7 +4523,7 @@ pub fn semaFile(mod: *Module, file: *File) SemaError!void {
error.AnalysisFail => {},
}
if (mod.comp.whole_cache_manifest) |man| {
if (mod.comp.whole_cache_manifest) |whole_cache_manifest| {
const source = file.getSource(gpa) catch |err| {
try reportRetryableFileError(mod, file, "unable to load source: {s}", .{@errorName(err)});
return error.AnalysisFail;
@ -4541,7 +4541,9 @@ pub fn semaFile(mod: *Module, file: *File) SemaError!void {
};
errdefer gpa.free(resolved_path);
try man.addFilePostContents(resolved_path, source.bytes, source.stat);
mod.comp.whole_cache_manifest_mutex.lock();
defer mod.comp.whole_cache_manifest_mutex.unlock();
try whole_cache_manifest.addFilePostContents(resolved_path, source.bytes, source.stat);
}
} else {
new_decl.analysis = .file_failure;
@ -5036,10 +5038,12 @@ pub fn embedFile(mod: *Module, cur_file: *File, rel_file_path: []const u8) !*Emb
resolved_root_path, resolved_path, sub_file_path, rel_file_path,
});
if (mod.comp.whole_cache_manifest) |man| {
if (mod.comp.whole_cache_manifest) |whole_cache_manifest| {
const copied_resolved_path = try gpa.dupe(u8, resolved_path);
errdefer gpa.free(copied_resolved_path);
try man.addFilePostContents(copied_resolved_path, bytes, stat);
mod.comp.whole_cache_manifest_mutex.lock();
defer mod.comp.whole_cache_manifest_mutex.unlock();
try whole_cache_manifest.addFilePostContents(copied_resolved_path, bytes, stat);
}
keep_resolved_path = true; // It's now owned by embed_table.

View file

@ -459,10 +459,11 @@ export fn stage2_fetch_file(
const comp = @intToPtr(*Compilation, stage1.userdata);
const file_path = path_ptr[0..path_len];
const max_file_size = std.math.maxInt(u32);
const contents = if (comp.whole_cache_manifest) |man|
man.addFilePostFetch(file_path, max_file_size) catch return null
else
std.fs.cwd().readFileAlloc(comp.gpa, file_path, max_file_size) catch return null;
const contents = if (comp.whole_cache_manifest) |man| blk: {
comp.whole_cache_manifest_mutex.lock();
defer comp.whole_cache_manifest_mutex.unlock();
break :blk man.addFilePostFetch(file_path, max_file_size) catch return null;
} else std.fs.cwd().readFileAlloc(comp.gpa, file_path, max_file_size) catch return null;
result_len.* = contents.len;
// TODO https://github.com/ziglang/zig/issues/3328#issuecomment-716749475
if (contents.len == 0) return @intToPtr(?[*]const u8, 0x1);