macho: clean up hasher interface

This commit is contained in:
Jakub Konka 2023-06-19 20:33:27 +02:00
parent 8087c134db
commit ef9d6331fc
3 changed files with 27 additions and 21 deletions

View file

@ -288,8 +288,8 @@ pub fn writeAdhocSignature(
self.code_directory.inner.nCodeSlots = total_pages; self.code_directory.inner.nCodeSlots = total_pages;
// Calculate hash for each page (in file) and write it to the buffer // Calculate hash for each page (in file) and write it to the buffer
var hasher = Hasher(Sha256){}; var hasher = Hasher(Sha256){ .allocator = gpa, .thread_pool = comp.thread_pool };
try hasher.hash(gpa, comp.thread_pool, opts.file, self.code_directory.code_slots.items, .{ try hasher.hash(opts.file, self.code_directory.code_slots.items, .{
.chunk_size = self.page_size, .chunk_size = self.page_size,
.max_file_size = opts.file_size, .max_file_size = opts.file_size,
}); });

View file

@ -11,34 +11,39 @@ pub fn ParallelHasher(comptime Hasher: type) type {
const hash_size = Hasher.digest_length; const hash_size = Hasher.digest_length;
return struct { return struct {
pub fn hash(self: @This(), gpa: Allocator, pool: *ThreadPool, file: fs.File, out: [][hash_size]u8, opts: struct { allocator: Allocator,
chunk_size: u16 = 0x4000, thread_pool: *ThreadPool,
pub fn hash(self: Self, file: fs.File, out: [][hash_size]u8, opts: struct {
chunk_size: u64 = 0x4000,
max_file_size: ?u64 = null, max_file_size: ?u64 = null,
}) !void { }) !void {
_ = self;
var wg: WaitGroup = .{}; var wg: WaitGroup = .{};
const file_size = opts.max_file_size orelse try file.getEndPos(); const file_size = opts.max_file_size orelse try file.getEndPos();
const total_num_chunks = mem.alignForward(u64, file_size, opts.chunk_size) / opts.chunk_size;
assert(out.len >= total_num_chunks);
const buffer = try gpa.alloc(u8, opts.chunk_size * total_num_chunks); const buffer = try self.allocator.alloc(u8, opts.chunk_size * out.len);
defer gpa.free(buffer); defer self.allocator.free(buffer);
const results = try gpa.alloc(fs.File.PReadError!usize, total_num_chunks); const results = try self.allocator.alloc(fs.File.PReadError!usize, out.len);
defer gpa.free(results); defer self.allocator.free(results);
{ {
wg.reset(); wg.reset();
defer wg.wait(); defer wg.wait();
var i: usize = 0; for (out, results, 0..) |*out_buf, *result, i| {
while (i < total_num_chunks) : (i += 1) {
const fstart = i * opts.chunk_size; const fstart = i * opts.chunk_size;
const fsize = if (fstart + opts.chunk_size > file_size) file_size - fstart else opts.chunk_size; const fsize = if (fstart + opts.chunk_size > file_size) file_size - fstart else opts.chunk_size;
wg.start(); wg.start();
try pool.spawn(worker, .{ file, fstart, buffer[fstart..][0..fsize], &out[i], &results[i], &wg }); try self.thread_pool.spawn(worker, .{
file,
fstart,
buffer[fstart..][0..fsize],
&(out_buf.*),
&(result.*),
&wg,
});
} }
} }
for (results) |result| _ = try result; for (results) |result| _ = try result;
@ -56,5 +61,7 @@ pub fn ParallelHasher(comptime Hasher: type) type {
err.* = file.preadAll(buffer, fstart); err.* = file.preadAll(buffer, fstart);
Hasher.hash(buffer, out, .{}); Hasher.hash(buffer, out, .{});
} }
const Self = @This();
}; };
} }

View file

@ -14,20 +14,19 @@ const Hasher = @import("hasher.zig").ParallelHasher;
/// TODO LLD also hashes the output filename to disambiguate between same builds with different /// TODO LLD also hashes the output filename to disambiguate between same builds with different
/// output files. Should we also do that? /// output files. Should we also do that?
pub fn calcUuid(comp: *const Compilation, file: fs.File, file_size: u64, out: *[Md5.digest_length]u8) !void { pub fn calcUuid(comp: *const Compilation, file: fs.File, file_size: u64, out: *[Md5.digest_length]u8) !void {
const num_chunks = @intCast(u64, comp.thread_pool.threads.len) * 10; const num_chunks = comp.thread_pool.threads.len * 0x10;
const chunk_size = @divTrunc(file_size + num_chunks - 1, num_chunks); const chunk_size = @divTrunc(file_size + num_chunks - 1, num_chunks);
const total_hashes = mem.alignForward(u64, file_size, chunk_size) / chunk_size;
const hashes = try comp.gpa.alloc([Md5.digest_length]u8, total_hashes); const hashes = try comp.gpa.alloc([Md5.digest_length]u8, num_chunks);
defer comp.gpa.free(hashes); defer comp.gpa.free(hashes);
var hasher = Hasher(Md5){}; var hasher = Hasher(Md5){ .allocator = comp.gpa, .thread_pool = comp.thread_pool };
try hasher.hash(comp.gpa, comp.thread_pool, file, hashes, .{ try hasher.hash(file, hashes, .{
.chunk_size = chunk_size, .chunk_size = chunk_size,
.max_file_size = file_size, .max_file_size = file_size,
}); });
const final_buffer = try comp.gpa.alloc(u8, total_hashes * Md5.digest_length); const final_buffer = try comp.gpa.alloc(u8, num_chunks * Md5.digest_length);
defer comp.gpa.free(final_buffer); defer comp.gpa.free(final_buffer);
for (hashes, 0..) |hash, i| { for (hashes, 0..) |hash, i| {