//! Represents one independent job whose responsibility is to: //! //! 1. Check the global zig package cache to see if the hash already exists. //! If so, load, parse, and validate the build.zig.zon file therein, and //! goto step 8. Likewise if the location is a relative path, treat this //! the same as a cache hit. Otherwise, proceed. //! 2. Fetch and unpack a URL into a temporary directory. //! 3. Load, parse, and validate the build.zig.zon file therein. It is allowed //! for the file to be missing, in which case this fetched package is considered //! to be a "naked" package. //! 4. Apply inclusion rules of the build.zig.zon to the temporary directory by //! deleting excluded files. If any files had errors for files that were //! ultimately excluded, those errors should be ignored, such as failure to //! create symlinks that weren't supposed to be included anyway. //! 5. Compute the package hash based on the remaining files in the temporary //! directory. //! 6. Rename the temporary directory into the global zig package cache //! directory. If the hash already exists, delete the temporary directory and //! leave the zig package cache directory untouched as it may be in use by the //! system. This is done even if the hash is invalid, in case the package with //! the different hash is used in the future. //! 7. Validate the computed hash against the expected hash. If invalid, //! this job is done. //! 8. Spawn a new fetch job for each dependency in the manifest file. Use //! a mutex and a hash map so that redundant jobs do not get queued up. //! //! All of this must be done with only referring to the state inside this struct //! because this work will be done in a dedicated thread. const Fetch = @This(); const builtin = @import("builtin"); const native_os = builtin.os.tag; const std = @import("std"); const Io = std.Io; const fs = std.fs; const assert = std.debug.assert; const ascii = std.ascii; const Allocator = std.mem.Allocator; const Cache = std.Build.Cache; const git = @import("Fetch/git.zig"); const Package = @import("../Package.zig"); const Manifest = Package.Manifest; const ErrorBundle = std.zig.ErrorBundle; arena: std.heap.ArenaAllocator, location: Location, location_tok: std.zig.Ast.TokenIndex, hash_tok: std.zig.Ast.OptionalTokenIndex, name_tok: std.zig.Ast.TokenIndex, lazy_status: LazyStatus, parent_package_root: Cache.Path, parent_manifest_ast: ?*const std.zig.Ast, prog_node: std.Progress.Node, job_queue: *JobQueue, /// If true, don't add an error for a missing hash. This flag is not passed /// down to recursive dependencies. It's intended to be used only be the CLI. omit_missing_hash_error: bool, /// If true, don't fail when a manifest file is missing the `paths` field, /// which specifies inclusion rules. This is intended to be true for the first /// fetch task and false for the recursive dependencies. allow_missing_paths_field: bool, allow_missing_fingerprint: bool, allow_name_string: bool, /// If true and URL points to a Git repository, will use the latest commit. use_latest_commit: bool, // Above this are fields provided as inputs to `run`. // Below this are fields populated by `run`. /// This will either be relative to `global_cache`, or to the build root of /// the root package. package_root: Cache.Path, error_bundle: ErrorBundle.Wip, manifest: ?Manifest, manifest_ast: std.zig.Ast, computed_hash: ComputedHash, /// Fetch logic notices whether a package has a build.zig file and sets this flag. has_build_zig: bool, /// Indicates whether the task aborted due to an out-of-memory condition. oom_flag: bool, /// If `use_latest_commit` was true, this will be set to the commit that was used. /// If the resource pointed to by the location is not a Git-repository, this /// will be left unchanged. latest_commit: ?git.Oid, // This field is used by the CLI only, untouched by this file. /// The module for this `Fetch` tasks's package, which exposes `build.zig` as /// the root source file. module: ?*Package.Module, pub const LazyStatus = enum { /// Not lazy. eager, /// Lazy, found. available, /// Lazy, not found. unavailable, }; /// Contains shared state among all `Fetch` tasks. pub const JobQueue = struct { io: Io, mutex: std.Thread.Mutex = .{}, /// It's an array hash map so that it can be sorted before rendering the /// dependencies.zig source file. /// Protected by `mutex`. table: Table = .{}, /// `table` may be missing some tasks such as ones that failed, so this /// field contains references to all of them. /// Protected by `mutex`. all_fetches: std.ArrayListUnmanaged(*Fetch) = .empty, http_client: *std.http.Client, wait_group: Io.Group = .init, global_cache: Cache.Directory, /// If true then, no fetching occurs, and: /// * The `global_cache` directory is assumed to be the direct parent /// directory of on-disk packages rather than having the "p/" directory /// prefix inside of it. /// * An error occurs if any non-lazy packages are not already present in /// the package cache directory. /// * Missing hash field causes an error, and no fetching occurs so it does /// not print the correct hash like usual. read_only: bool, recursive: bool, /// Dumps hash information to stdout which can be used to troubleshoot why /// two hashes of the same package do not match. /// If this is true, `recursive` must be false. debug_hash: bool, work_around_btrfs_bug: bool, mode: Mode, /// Set of hashes that will be additionally fetched even if they are marked /// as lazy. unlazy_set: UnlazySet = .{}, pub const Mode = enum { /// Non-lazy dependencies are always fetched. /// Lazy dependencies are fetched only when needed. needed, /// Both non-lazy and lazy dependencies are always fetched. all, }; pub const Table = std.AutoArrayHashMapUnmanaged(Package.Hash, *Fetch); pub const UnlazySet = std.AutoArrayHashMapUnmanaged(Package.Hash, void); pub fn deinit(jq: *JobQueue) void { if (jq.all_fetches.items.len == 0) return; const gpa = jq.all_fetches.items[0].arena.child_allocator; jq.table.deinit(gpa); // These must be deinitialized in reverse order because subsequent // `Fetch` instances are allocated in prior ones' arenas. // Sorry, I know it's a bit weird, but it slightly simplifies the // critical section. while (jq.all_fetches.pop()) |f| f.deinit(); jq.all_fetches.deinit(gpa); jq.* = undefined; } /// Dumps all subsequent error bundles into the first one. pub fn consolidateErrors(jq: *JobQueue) !void { const root = &jq.all_fetches.items[0].error_bundle; const gpa = root.gpa; for (jq.all_fetches.items[1..]) |fetch| { if (fetch.error_bundle.root_list.items.len > 0) { var bundle = try fetch.error_bundle.toOwnedBundle(""); defer bundle.deinit(gpa); try root.addBundleAsRoots(bundle); } } } /// Creates the dependencies.zig source code for the build runner to obtain /// via `@import("@dependencies")`. pub fn createDependenciesSource(jq: *JobQueue, buf: *std.array_list.Managed(u8)) Allocator.Error!void { const keys = jq.table.keys(); assert(keys.len != 0); // caller should have added the first one if (keys.len == 1) { // This is the first one. It must have no dependencies. return createEmptyDependenciesSource(buf); } try buf.appendSlice("pub const packages = struct {\n"); // Ensure the generated .zig file is deterministic. jq.table.sortUnstable(@as(struct { keys: []const Package.Hash, pub fn lessThan(ctx: @This(), a_index: usize, b_index: usize) bool { return std.mem.lessThan(u8, &ctx.keys[a_index].bytes, &ctx.keys[b_index].bytes); } }, .{ .keys = keys })); for (keys, jq.table.values()) |*hash, fetch| { if (fetch == jq.all_fetches.items[0]) { // The first one is a dummy package for the current project. continue; } const hash_slice = hash.toSlice(); try buf.print( \\ pub const {f} = struct {{ \\ , .{std.zig.fmtId(hash_slice)}); lazy: { switch (fetch.lazy_status) { .eager => break :lazy, .available => { try buf.appendSlice( \\ pub const available = true; \\ ); break :lazy; }, .unavailable => { try buf.appendSlice( \\ pub const available = false; \\ }; \\ ); continue; }, } } try buf.print( \\ pub const build_root = "{f}"; \\ , .{std.fmt.alt(fetch.package_root, .formatEscapeString)}); if (fetch.has_build_zig) { try buf.print( \\ pub const build_zig = @import("{f}"); \\ , .{std.zig.fmtString(hash_slice)}); } if (fetch.manifest) |*manifest| { try buf.appendSlice( \\ pub const deps: []const struct { []const u8, []const u8 } = &.{ \\ ); for (manifest.dependencies.keys(), manifest.dependencies.values()) |name, dep| { const h = depDigest(fetch.package_root, jq.global_cache, dep) orelse continue; try buf.print( " .{{ \"{f}\", \"{f}\" }},\n", .{ std.zig.fmtString(name), std.zig.fmtString(h.toSlice()) }, ); } try buf.appendSlice( \\ }; \\ }; \\ ); } else { try buf.appendSlice( \\ pub const deps: []const struct { []const u8, []const u8 } = &.{}; \\ }; \\ ); } } try buf.appendSlice( \\}; \\ \\pub const root_deps: []const struct { []const u8, []const u8 } = &.{ \\ ); const root_fetch = jq.all_fetches.items[0]; const root_manifest = &root_fetch.manifest.?; for (root_manifest.dependencies.keys(), root_manifest.dependencies.values()) |name, dep| { const h = depDigest(root_fetch.package_root, jq.global_cache, dep) orelse continue; try buf.print( " .{{ \"{f}\", \"{f}\" }},\n", .{ std.zig.fmtString(name), std.zig.fmtString(h.toSlice()) }, ); } try buf.appendSlice("};\n"); } pub fn createEmptyDependenciesSource(buf: *std.array_list.Managed(u8)) Allocator.Error!void { try buf.appendSlice( \\pub const packages = struct {}; \\pub const root_deps: []const struct { []const u8, []const u8 } = &.{}; \\ ); } }; pub const Location = union(enum) { remote: Remote, /// A directory found inside the parent package. relative_path: Cache.Path, /// Recursive Fetch tasks will never use this Location, but it may be /// passed in by the CLI. Indicates the file contents here should be copied /// into the global package cache. It may be a file relative to the cwd or /// absolute, in which case it should be treated exactly like a `file://` /// URL, or a directory, in which case it should be treated as an /// already-unpacked directory (but still needs to be copied into the /// global package cache and have inclusion rules applied). path_or_url: []const u8, pub const Remote = struct { url: []const u8, /// If this is null it means the user omitted the hash field from a dependency. /// It will be an error but the logic should still fetch and print the discovered hash. hash: ?Package.Hash, }; }; pub const RunError = error{ OutOfMemory, /// This error code is intended to be handled by inspecting the /// `error_bundle` field. FetchFailed, }; pub fn run(f: *Fetch) RunError!void { const io = f.job_queue.io; const eb = &f.error_bundle; const arena = f.arena.allocator(); const gpa = f.arena.child_allocator; const cache_root = f.job_queue.global_cache; try eb.init(gpa); // Check the global zig package cache to see if the hash already exists. If // so, load, parse, and validate the build.zig.zon file therein, and skip // ahead to queuing up jobs for dependencies. Likewise if the location is a // relative path, treat this the same as a cache hit. Otherwise, proceed. const remote = switch (f.location) { .relative_path => |pkg_root| { if (fs.path.isAbsolute(pkg_root.sub_path)) return f.fail( f.location_tok, try eb.addString("expected path relative to build root; found absolute path"), ); if (f.hash_tok.unwrap()) |hash_tok| return f.fail( hash_tok, try eb.addString("path-based dependencies are not hashed"), ); // Packages fetched by URL may not use relative paths to escape outside the // fetched package directory from within the package cache. if (pkg_root.root_dir.eql(cache_root)) { // `parent_package_root.sub_path` contains a path like this: // "p/$hash", or // "p/$hash/foo", with possibly more directories after "foo". // We want to fail unless the resolved relative path has a // prefix of "p/$hash/". const prefix_len: usize = if (f.job_queue.read_only) 0 else "p/".len; const parent_sub_path = f.parent_package_root.sub_path; const end = find_end: { if (parent_sub_path.len > prefix_len) { // Use `isSep` instead of `indexOfScalarPos` to account for // Windows accepting both `\` and `/` as path separators. for (parent_sub_path[prefix_len..], prefix_len..) |c, i| { if (std.fs.path.isSep(c)) break :find_end i; } } break :find_end parent_sub_path.len; }; const expected_prefix = parent_sub_path[0..end]; if (!std.mem.startsWith(u8, pkg_root.sub_path, expected_prefix)) { return f.fail( f.location_tok, try eb.printString("dependency path outside project: '{f}'", .{pkg_root}), ); } } f.package_root = pkg_root; try loadManifest(f, pkg_root); if (!f.has_build_zig) try checkBuildFileExistence(f); if (!f.job_queue.recursive) return; return queueJobsForDeps(f); }, .remote => |remote| remote, .path_or_url => |path_or_url| { if (fs.cwd().openDir(path_or_url, .{ .iterate = true })) |dir| { var resource: Resource = .{ .dir = dir }; return f.runResource(path_or_url, &resource, null); } else |dir_err| { var server_header_buffer: [init_resource_buffer_size]u8 = undefined; const file_err = if (dir_err == error.NotDir) e: { if (fs.cwd().openFile(path_or_url, .{})) |file| { var resource: Resource = .{ .file = file.reader(io, &server_header_buffer) }; return f.runResource(path_or_url, &resource, null); } else |err| break :e err; } else dir_err; const uri = std.Uri.parse(path_or_url) catch |uri_err| { return f.fail(0, try eb.printString( "'{s}' could not be recognized as a file path ({t}) or an URL ({t})", .{ path_or_url, file_err, uri_err }, )); }; var resource: Resource = undefined; try f.initResource(uri, &resource, &server_header_buffer); return f.runResource(try uri.path.toRawMaybeAlloc(arena), &resource, null); } }, }; if (remote.hash) |expected_hash| { var prefixed_pkg_sub_path_buffer: [Package.Hash.max_len + 2]u8 = undefined; prefixed_pkg_sub_path_buffer[0] = 'p'; prefixed_pkg_sub_path_buffer[1] = fs.path.sep; const hash_slice = expected_hash.toSlice(); @memcpy(prefixed_pkg_sub_path_buffer[2..][0..hash_slice.len], hash_slice); const prefixed_pkg_sub_path = prefixed_pkg_sub_path_buffer[0 .. 2 + hash_slice.len]; const prefix_len: usize = if (f.job_queue.read_only) "p/".len else 0; const pkg_sub_path = prefixed_pkg_sub_path[prefix_len..]; if (cache_root.handle.access(pkg_sub_path, .{})) |_| { assert(f.lazy_status != .unavailable); f.package_root = .{ .root_dir = cache_root, .sub_path = try arena.dupe(u8, pkg_sub_path), }; try loadManifest(f, f.package_root); try checkBuildFileExistence(f); if (!f.job_queue.recursive) return; return queueJobsForDeps(f); } else |err| switch (err) { error.FileNotFound => { switch (f.lazy_status) { .eager => {}, .available => if (!f.job_queue.unlazy_set.contains(expected_hash)) { f.lazy_status = .unavailable; return; }, .unavailable => unreachable, } if (f.job_queue.read_only) return f.fail( f.name_tok, try eb.printString("package not found at '{f}{s}'", .{ cache_root, pkg_sub_path, }), ); }, else => |e| { try eb.addRootErrorMessage(.{ .msg = try eb.printString("unable to open global package cache directory '{f}{s}': {s}", .{ cache_root, pkg_sub_path, @errorName(e), }), }); return error.FetchFailed; }, } } else if (f.job_queue.read_only) { try eb.addRootErrorMessage(.{ .msg = try eb.addString("dependency is missing hash field"), .src_loc = try f.srcLoc(f.location_tok), }); return error.FetchFailed; } // Fetch and unpack the remote into a temporary directory. const uri = std.Uri.parse(remote.url) catch |err| return f.fail( f.location_tok, try eb.printString("invalid URI: {s}", .{@errorName(err)}), ); var buffer: [init_resource_buffer_size]u8 = undefined; var resource: Resource = undefined; try f.initResource(uri, &resource, &buffer); return f.runResource(try uri.path.toRawMaybeAlloc(arena), &resource, remote.hash); } pub fn deinit(f: *Fetch) void { f.error_bundle.deinit(); f.arena.deinit(); } /// Consumes `resource`, even if an error is returned. fn runResource( f: *Fetch, uri_path: []const u8, resource: *Resource, remote_hash: ?Package.Hash, ) RunError!void { const io = f.job_queue.io; defer resource.deinit(io); const arena = f.arena.allocator(); const eb = &f.error_bundle; const s = fs.path.sep_str; const cache_root = f.job_queue.global_cache; const rand_int = std.crypto.random.int(u64); const tmp_dir_sub_path = "tmp" ++ s ++ std.fmt.hex(rand_int); const package_sub_path = blk: { const tmp_directory_path = try cache_root.join(arena, &.{tmp_dir_sub_path}); var tmp_directory: Cache.Directory = .{ .path = tmp_directory_path, .handle = handle: { const dir = cache_root.handle.makeOpenPath(tmp_dir_sub_path, .{ .iterate = true, }) catch |err| { try eb.addRootErrorMessage(.{ .msg = try eb.printString("unable to create temporary directory '{s}': {s}", .{ tmp_directory_path, @errorName(err), }), }); return error.FetchFailed; }; break :handle dir; }, }; defer tmp_directory.handle.close(); // Fetch and unpack a resource into a temporary directory. var unpack_result = try unpackResource(f, resource, uri_path, tmp_directory); var pkg_path: Cache.Path = .{ .root_dir = tmp_directory, .sub_path = unpack_result.root_dir }; // Apply btrfs workaround if needed. Reopen tmp_directory. if (native_os == .linux and f.job_queue.work_around_btrfs_bug) { // https://github.com/ziglang/zig/issues/17095 pkg_path.root_dir.handle.close(); pkg_path.root_dir.handle = cache_root.handle.makeOpenPath(tmp_dir_sub_path, .{ .iterate = true, }) catch @panic("btrfs workaround failed"); } // Load, parse, and validate the unpacked build.zig.zon file. It is allowed // for the file to be missing, in which case this fetched package is // considered to be a "naked" package. try loadManifest(f, pkg_path); const filter: Filter = .{ .include_paths = if (f.manifest) |m| m.paths else .{}, }; // Ignore errors that were excluded by manifest, such as failure to // create symlinks that weren't supposed to be included anyway. try unpack_result.validate(f, filter); // Apply the manifest's inclusion rules to the temporary directory by // deleting excluded files. // Empty directories have already been omitted by `unpackResource`. // Compute the package hash based on the remaining files in the temporary // directory. f.computed_hash = try computeHash(f, pkg_path, filter); break :blk if (unpack_result.root_dir.len > 0) try fs.path.join(arena, &.{ tmp_dir_sub_path, unpack_result.root_dir }) else tmp_dir_sub_path; }; const computed_package_hash = computedPackageHash(f); // Rename the temporary directory into the global zig package cache // directory. If the hash already exists, delete the temporary directory // and leave the zig package cache directory untouched as it may be in use // by the system. This is done even if the hash is invalid, in case the // package with the different hash is used in the future. f.package_root = .{ .root_dir = cache_root, .sub_path = try std.fmt.allocPrint(arena, "p" ++ s ++ "{s}", .{computed_package_hash.toSlice()}), }; renameTmpIntoCache(cache_root.handle, package_sub_path, f.package_root.sub_path) catch |err| { const src = try cache_root.join(arena, &.{tmp_dir_sub_path}); const dest = try cache_root.join(arena, &.{f.package_root.sub_path}); try eb.addRootErrorMessage(.{ .msg = try eb.printString( "unable to rename temporary directory '{s}' into package cache directory '{s}': {s}", .{ src, dest, @errorName(err) }, ) }); return error.FetchFailed; }; // Remove temporary directory root if not already renamed to global cache. if (!std.mem.eql(u8, package_sub_path, tmp_dir_sub_path)) { cache_root.handle.deleteDir(tmp_dir_sub_path) catch {}; } // Validate the computed hash against the expected hash. If invalid, this // job is done. if (remote_hash) |declared_hash| { const hash_tok = f.hash_tok.unwrap().?; if (declared_hash.isOld()) { const actual_hex = Package.multiHashHexDigest(f.computed_hash.digest); if (!std.mem.eql(u8, declared_hash.toSlice(), &actual_hex)) { return f.fail(hash_tok, try eb.printString( "hash mismatch: manifest declares '{s}' but the fetched package has '{s}'", .{ declared_hash.toSlice(), actual_hex }, )); } } else { if (!computed_package_hash.eql(&declared_hash)) { return f.fail(hash_tok, try eb.printString( "hash mismatch: manifest declares '{s}' but the fetched package has '{s}'", .{ declared_hash.toSlice(), computed_package_hash.toSlice() }, )); } } } else if (!f.omit_missing_hash_error) { const notes_len = 1; try eb.addRootErrorMessage(.{ .msg = try eb.addString("dependency is missing hash field"), .src_loc = try f.srcLoc(f.location_tok), .notes_len = notes_len, }); const notes_start = try eb.reserveNotes(notes_len); eb.extra.items[notes_start] = @intFromEnum(try eb.addErrorMessage(.{ .msg = try eb.printString("expected .hash = \"{s}\",", .{computed_package_hash.toSlice()}), })); return error.FetchFailed; } // Spawn a new fetch job for each dependency in the manifest file. Use // a mutex and a hash map so that redundant jobs do not get queued up. if (!f.job_queue.recursive) return; return queueJobsForDeps(f); } pub fn computedPackageHash(f: *const Fetch) Package.Hash { const saturated_size = std.math.cast(u32, f.computed_hash.total_size) orelse std.math.maxInt(u32); if (f.manifest) |man| { var version_buffer: [32]u8 = undefined; const version: []const u8 = std.fmt.bufPrint(&version_buffer, "{f}", .{man.version}) catch &version_buffer; return .init(f.computed_hash.digest, man.name, version, man.id, saturated_size); } // In the future build.zig.zon fields will be added to allow overriding these values // for naked tarballs. return .init(f.computed_hash.digest, "N", "V", 0xffff, saturated_size); } /// `computeHash` gets a free check for the existence of `build.zig`, but when /// not computing a hash, we need to do a syscall to check for it. fn checkBuildFileExistence(f: *Fetch) RunError!void { const eb = &f.error_bundle; if (f.package_root.access(Package.build_zig_basename, .{})) |_| { f.has_build_zig = true; } else |err| switch (err) { error.FileNotFound => {}, else => |e| { try eb.addRootErrorMessage(.{ .msg = try eb.printString("unable to access '{f}{s}': {s}", .{ f.package_root, Package.build_zig_basename, @errorName(e), }), }); return error.FetchFailed; }, } } /// This function populates `f.manifest` or leaves it `null`. fn loadManifest(f: *Fetch, pkg_root: Cache.Path) RunError!void { const eb = &f.error_bundle; const arena = f.arena.allocator(); const manifest_bytes = pkg_root.root_dir.handle.readFileAllocOptions( try fs.path.join(arena, &.{ pkg_root.sub_path, Manifest.basename }), arena, .limited(Manifest.max_bytes), .@"1", 0, ) catch |err| switch (err) { error.FileNotFound => return, else => |e| { const file_path = try pkg_root.join(arena, Manifest.basename); try eb.addRootErrorMessage(.{ .msg = try eb.printString("unable to load package manifest '{f}': {s}", .{ file_path, @errorName(e), }), }); return error.FetchFailed; }, }; const ast = &f.manifest_ast; ast.* = try std.zig.Ast.parse(arena, manifest_bytes, .zon); if (ast.errors.len > 0) { const file_path = try std.fmt.allocPrint(arena, "{f}" ++ fs.path.sep_str ++ Manifest.basename, .{pkg_root}); try std.zig.putAstErrorsIntoBundle(arena, ast.*, file_path, eb); return error.FetchFailed; } f.manifest = try Manifest.parse(arena, ast.*, .{ .allow_missing_paths_field = f.allow_missing_paths_field, .allow_missing_fingerprint = f.allow_missing_fingerprint, .allow_name_string = f.allow_name_string, }); const manifest = &f.manifest.?; if (manifest.errors.len > 0) { const src_path = try eb.printString("{f}" ++ fs.path.sep_str ++ "{s}", .{ pkg_root, Manifest.basename }); try manifest.copyErrorsIntoBundle(ast.*, src_path, eb); return error.FetchFailed; } } fn queueJobsForDeps(f: *Fetch) RunError!void { const io = f.job_queue.io; assert(f.job_queue.recursive); // If the package does not have a build.zig.zon file then there are no dependencies. const manifest = f.manifest orelse return; const new_fetches, const prog_names = nf: { const parent_arena = f.arena.allocator(); const gpa = f.arena.child_allocator; const cache_root = f.job_queue.global_cache; const dep_names = manifest.dependencies.keys(); const deps = manifest.dependencies.values(); // Grab the new tasks into a temporary buffer so we can unlock that mutex // as fast as possible. // This overallocates any fetches that get skipped by the `continue` in the // loop below. const new_fetches = try parent_arena.alloc(Fetch, deps.len); const prog_names = try parent_arena.alloc([]const u8, deps.len); var new_fetch_index: usize = 0; f.job_queue.mutex.lock(); defer f.job_queue.mutex.unlock(); try f.job_queue.all_fetches.ensureUnusedCapacity(gpa, new_fetches.len); try f.job_queue.table.ensureUnusedCapacity(gpa, @intCast(new_fetches.len)); // There are four cases here: // * Correct hash is provided by manifest. // - Hash map already has the entry, no need to add it again. // * Incorrect hash is provided by manifest. // - Hash mismatch error emitted; `queueJobsForDeps` is not called. // * Hash is not provided by manifest. // - Hash missing error emitted; `queueJobsForDeps` is not called. // * path-based location is used without a hash. // - Hash is added to the table based on the path alone before // calling run(); no need to add it again. // // If we add a dep as lazy and then later try to add the same dep as eager, // eagerness takes precedence and the existing entry is updated and re-scheduled // for fetching. for (dep_names, deps) |dep_name, dep| { var promoted_existing_to_eager = false; const new_fetch = &new_fetches[new_fetch_index]; const location: Location = switch (dep.location) { .url => |url| .{ .remote = .{ .url = url, .hash = h: { const h = dep.hash orelse break :h null; const pkg_hash: Package.Hash = .fromSlice(h); if (h.len == 0) break :h pkg_hash; const gop = f.job_queue.table.getOrPutAssumeCapacity(pkg_hash); if (gop.found_existing) { if (!dep.lazy and gop.value_ptr.*.lazy_status != .eager) { gop.value_ptr.*.lazy_status = .eager; promoted_existing_to_eager = true; } else { continue; } } gop.value_ptr.* = new_fetch; break :h pkg_hash; }, }, }, .path => |rel_path| l: { // This might produce an invalid path, which is checked for // at the beginning of run(). const new_root = try f.package_root.resolvePosix(parent_arena, rel_path); const pkg_hash = relativePathDigest(new_root, cache_root); const gop = f.job_queue.table.getOrPutAssumeCapacity(pkg_hash); if (gop.found_existing) { if (!dep.lazy and gop.value_ptr.*.lazy_status != .eager) { gop.value_ptr.*.lazy_status = .eager; promoted_existing_to_eager = true; } else { continue; } } gop.value_ptr.* = new_fetch; break :l .{ .relative_path = new_root }; }, }; prog_names[new_fetch_index] = dep_name; new_fetch_index += 1; if (!promoted_existing_to_eager) { f.job_queue.all_fetches.appendAssumeCapacity(new_fetch); } new_fetch.* = .{ .arena = std.heap.ArenaAllocator.init(gpa), .location = location, .location_tok = dep.location_tok, .hash_tok = dep.hash_tok, .name_tok = dep.name_tok, .lazy_status = switch (f.job_queue.mode) { .needed => if (dep.lazy) .available else .eager, .all => .eager, }, .parent_package_root = f.package_root, .parent_manifest_ast = &f.manifest_ast, .prog_node = f.prog_node, .job_queue = f.job_queue, .omit_missing_hash_error = false, .allow_missing_paths_field = true, .allow_missing_fingerprint = true, .allow_name_string = true, .use_latest_commit = false, .package_root = undefined, .error_bundle = undefined, .manifest = null, .manifest_ast = undefined, .computed_hash = undefined, .has_build_zig = false, .oom_flag = false, .latest_commit = null, .module = null, }; } f.prog_node.increaseEstimatedTotalItems(new_fetch_index); break :nf .{ new_fetches[0..new_fetch_index], prog_names[0..new_fetch_index] }; }; // Now it's time to give tasks to the thread pool. for (new_fetches, prog_names) |*new_fetch, prog_name| { f.job_queue.wait_group.async(io, workerRun, .{ new_fetch, prog_name }); } } pub fn relativePathDigest(pkg_root: Cache.Path, cache_root: Cache.Directory) Package.Hash { return .initPath(pkg_root.sub_path, pkg_root.root_dir.eql(cache_root)); } pub fn workerRun(f: *Fetch, prog_name: []const u8) void { const prog_node = f.prog_node.start(prog_name, 0); defer prog_node.end(); run(f) catch |err| switch (err) { error.OutOfMemory => f.oom_flag = true, error.FetchFailed => { // Nothing to do because the errors are already reported in `error_bundle`, // and a reference is kept to the `Fetch` task inside `all_fetches`. }, }; } fn srcLoc( f: *Fetch, tok: std.zig.Ast.TokenIndex, ) Allocator.Error!ErrorBundle.SourceLocationIndex { const ast = f.parent_manifest_ast orelse return .none; const eb = &f.error_bundle; const start_loc = ast.tokenLocation(0, tok); const src_path = try eb.printString("{f}" ++ fs.path.sep_str ++ Manifest.basename, .{f.parent_package_root}); const msg_off = 0; return eb.addSourceLocation(.{ .src_path = src_path, .span_start = ast.tokenStart(tok), .span_end = @intCast(ast.tokenStart(tok) + ast.tokenSlice(tok).len), .span_main = ast.tokenStart(tok) + msg_off, .line = @intCast(start_loc.line), .column = @intCast(start_loc.column), .source_line = try eb.addString(ast.source[start_loc.line_start..start_loc.line_end]), }); } fn fail(f: *Fetch, msg_tok: std.zig.Ast.TokenIndex, msg_str: u32) RunError { const eb = &f.error_bundle; try eb.addRootErrorMessage(.{ .msg = msg_str, .src_loc = try f.srcLoc(msg_tok), }); return error.FetchFailed; } const Resource = union(enum) { file: fs.File.Reader, http_request: HttpRequest, git: Git, dir: fs.Dir, const Git = struct { session: git.Session, fetch_stream: git.Session.FetchStream, want_oid: git.Oid, }; const HttpRequest = struct { request: std.http.Client.Request, response: std.http.Client.Response, transfer_buffer: []u8, decompress: std.http.Decompress, decompress_buffer: []u8, }; fn deinit(resource: *Resource, io: Io) void { switch (resource.*) { .file => |*file_reader| file_reader.file.close(io), .http_request => |*http_request| http_request.request.deinit(), .git => |*git_resource| { git_resource.fetch_stream.deinit(); }, .dir => |*dir| dir.close(), } resource.* = undefined; } fn reader(resource: *Resource) *Io.Reader { return switch (resource.*) { .file => |*file_reader| return &file_reader.interface, .http_request => |*http_request| return http_request.response.readerDecompressing( http_request.transfer_buffer, &http_request.decompress, http_request.decompress_buffer, ), .git => |*g| return &g.fetch_stream.reader, .dir => unreachable, }; } }; const FileType = enum { tar, @"tar.gz", @"tar.xz", @"tar.zst", git_pack, zip, fn fromPath(file_path: []const u8) ?FileType { if (ascii.endsWithIgnoreCase(file_path, ".tar")) return .tar; if (ascii.endsWithIgnoreCase(file_path, ".tgz")) return .@"tar.gz"; if (ascii.endsWithIgnoreCase(file_path, ".tar.gz")) return .@"tar.gz"; if (ascii.endsWithIgnoreCase(file_path, ".txz")) return .@"tar.xz"; if (ascii.endsWithIgnoreCase(file_path, ".tar.xz")) return .@"tar.xz"; if (ascii.endsWithIgnoreCase(file_path, ".tzst")) return .@"tar.zst"; if (ascii.endsWithIgnoreCase(file_path, ".tar.zst")) return .@"tar.zst"; if (ascii.endsWithIgnoreCase(file_path, ".zip")) return .zip; if (ascii.endsWithIgnoreCase(file_path, ".jar")) return .zip; return null; } /// Parameter is a content-disposition header value. fn fromContentDisposition(cd_header: []const u8) ?FileType { const attach_end = ascii.indexOfIgnoreCase(cd_header, "attachment;") orelse return null; var value_start = ascii.indexOfIgnoreCasePos(cd_header, attach_end + 1, "filename") orelse return null; value_start += "filename".len; if (cd_header[value_start] == '*') { value_start += 1; } if (cd_header[value_start] != '=') return null; value_start += 1; var value_end = std.mem.indexOfPos(u8, cd_header, value_start, ";") orelse cd_header.len; if (cd_header[value_end - 1] == '\"') { value_end -= 1; } return fromPath(cd_header[value_start..value_end]); } test fromContentDisposition { try std.testing.expectEqual(@as(?FileType, .@"tar.gz"), fromContentDisposition("attaChment; FILENAME=\"stuff.tar.gz\"; size=42")); try std.testing.expectEqual(@as(?FileType, .@"tar.gz"), fromContentDisposition("attachment; filename*=\"stuff.tar.gz\"")); try std.testing.expectEqual(@as(?FileType, .@"tar.xz"), fromContentDisposition("ATTACHMENT; filename=\"stuff.tar.xz\"")); try std.testing.expectEqual(@as(?FileType, .@"tar.xz"), fromContentDisposition("attachment; FileName=\"stuff.tar.xz\"")); try std.testing.expectEqual(@as(?FileType, .@"tar.gz"), fromContentDisposition("attachment; FileName*=UTF-8\'\'xyz%2Fstuff.tar.gz")); try std.testing.expectEqual(@as(?FileType, .tar), fromContentDisposition("attachment; FileName=\"stuff.tar\"")); try std.testing.expect(fromContentDisposition("attachment FileName=\"stuff.tar.gz\"") == null); try std.testing.expect(fromContentDisposition("attachment; FileName\"stuff.gz\"") == null); try std.testing.expect(fromContentDisposition("attachment; size=42") == null); try std.testing.expect(fromContentDisposition("inline; size=42") == null); try std.testing.expect(fromContentDisposition("FileName=\"stuff.tar.gz\"; attachment;") == null); try std.testing.expect(fromContentDisposition("FileName=\"stuff.tar.gz\";") == null); } }; const init_resource_buffer_size = git.Packet.max_data_length; fn initResource(f: *Fetch, uri: std.Uri, resource: *Resource, reader_buffer: []u8) RunError!void { const io = f.job_queue.io; const arena = f.arena.allocator(); const eb = &f.error_bundle; if (ascii.eqlIgnoreCase(uri.scheme, "file")) { const path = try uri.path.toRawMaybeAlloc(arena); const file = f.parent_package_root.openFile(path, .{}) catch |err| { return f.fail(f.location_tok, try eb.printString("unable to open '{f}{s}': {t}", .{ f.parent_package_root, path, err, })); }; resource.* = .{ .file = file.reader(io, reader_buffer) }; return; } const http_client = f.job_queue.http_client; if (ascii.eqlIgnoreCase(uri.scheme, "http") or ascii.eqlIgnoreCase(uri.scheme, "https")) { resource.* = .{ .http_request = .{ .request = http_client.request(.GET, uri, .{}) catch |err| return f.fail(f.location_tok, try eb.printString("unable to connect to server: {t}", .{err})), .response = undefined, .transfer_buffer = reader_buffer, .decompress_buffer = &.{}, .decompress = undefined, } }; const request = &resource.http_request.request; errdefer request.deinit(); request.sendBodiless() catch |err| return f.fail(f.location_tok, try eb.printString("HTTP request failed: {t}", .{err})); var redirect_buffer: [1024]u8 = undefined; const response = &resource.http_request.response; response.* = request.receiveHead(&redirect_buffer) catch |err| switch (err) { error.ReadFailed => { return f.fail(f.location_tok, try eb.printString("HTTP response read failure: {t}", .{ request.connection.?.getReadError().?, })); }, else => |e| return f.fail(f.location_tok, try eb.printString("invalid HTTP response: {t}", .{e})), }; if (response.head.status != .ok) return f.fail(f.location_tok, try eb.printString( "bad HTTP response code: '{d} {s}'", .{ response.head.status, response.head.status.phrase() orelse "" }, )); resource.http_request.decompress_buffer = try arena.alloc(u8, response.head.content_encoding.minBufferCapacity()); return; } if (ascii.eqlIgnoreCase(uri.scheme, "git+http") or ascii.eqlIgnoreCase(uri.scheme, "git+https")) { var transport_uri = uri; transport_uri.scheme = uri.scheme["git+".len..]; var session = git.Session.init(arena, http_client, transport_uri, reader_buffer) catch |err| { return f.fail( f.location_tok, try eb.printString("unable to discover remote git server capabilities: {t}", .{err}), ); }; const want_oid = want_oid: { const want_ref = if (uri.fragment) |fragment| try fragment.toRawMaybeAlloc(arena) else "HEAD"; if (git.Oid.parseAny(want_ref)) |oid| break :want_oid oid else |_| {} const want_ref_head = try std.fmt.allocPrint(arena, "refs/heads/{s}", .{want_ref}); const want_ref_tag = try std.fmt.allocPrint(arena, "refs/tags/{s}", .{want_ref}); var ref_iterator: git.Session.RefIterator = undefined; session.listRefs(&ref_iterator, .{ .ref_prefixes = &.{ want_ref, want_ref_head, want_ref_tag }, .include_peeled = true, .buffer = reader_buffer, }) catch |err| return f.fail(f.location_tok, try eb.printString("unable to list refs: {t}", .{err})); defer ref_iterator.deinit(); while (ref_iterator.next() catch |err| { return f.fail(f.location_tok, try eb.printString( "unable to iterate refs: {s}", .{@errorName(err)}, )); }) |ref| { if (std.mem.eql(u8, ref.name, want_ref) or std.mem.eql(u8, ref.name, want_ref_head) or std.mem.eql(u8, ref.name, want_ref_tag)) { break :want_oid ref.peeled orelse ref.oid; } } return f.fail(f.location_tok, try eb.printString("ref not found: {s}", .{want_ref})); }; if (f.use_latest_commit) { f.latest_commit = want_oid; } else if (uri.fragment == null) { const notes_len = 1; try eb.addRootErrorMessage(.{ .msg = try eb.addString("url field is missing an explicit ref"), .src_loc = try f.srcLoc(f.location_tok), .notes_len = notes_len, }); const notes_start = try eb.reserveNotes(notes_len); eb.extra.items[notes_start] = @intFromEnum(try eb.addErrorMessage(.{ .msg = try eb.printString("try .url = \"{f}#{f}\",", .{ uri.fmt(.{ .scheme = true, .authority = true, .path = true }), want_oid, }), })); return error.FetchFailed; } var want_oid_buf: [git.Oid.max_formatted_length]u8 = undefined; _ = std.fmt.bufPrint(&want_oid_buf, "{f}", .{want_oid}) catch unreachable; resource.* = .{ .git = .{ .session = session, .fetch_stream = undefined, .want_oid = want_oid, } }; const fetch_stream = &resource.git.fetch_stream; session.fetch(fetch_stream, &.{&want_oid_buf}, reader_buffer) catch |err| { return f.fail(f.location_tok, try eb.printString("unable to create fetch stream: {t}", .{err})); }; errdefer fetch_stream.deinit(fetch_stream); return; } return f.fail(f.location_tok, try eb.printString("unsupported URL scheme: {s}", .{uri.scheme})); } fn unpackResource( f: *Fetch, resource: *Resource, uri_path: []const u8, tmp_directory: Cache.Directory, ) RunError!UnpackResult { const eb = &f.error_bundle; const file_type = switch (resource.*) { .file => FileType.fromPath(uri_path) orelse return f.fail(f.location_tok, try eb.printString("unknown file type: '{s}'", .{uri_path})), .http_request => |*http_request| ft: { const head = &http_request.response.head; // Content-Type takes first precedence. const content_type = head.content_type orelse return f.fail(f.location_tok, try eb.addString("missing 'Content-Type' header")); // Extract the MIME type, ignoring charset and boundary directives const mime_type_end = std.mem.indexOf(u8, content_type, ";") orelse content_type.len; const mime_type = content_type[0..mime_type_end]; if (ascii.eqlIgnoreCase(mime_type, "application/x-tar")) break :ft .tar; if (ascii.eqlIgnoreCase(mime_type, "application/gzip") or ascii.eqlIgnoreCase(mime_type, "application/x-gzip") or ascii.eqlIgnoreCase(mime_type, "application/tar+gzip") or ascii.eqlIgnoreCase(mime_type, "application/x-tar-gz") or ascii.eqlIgnoreCase(mime_type, "application/x-gtar-compressed")) { break :ft .@"tar.gz"; } if (ascii.eqlIgnoreCase(mime_type, "application/x-xz")) break :ft .@"tar.xz"; if (ascii.eqlIgnoreCase(mime_type, "application/zstd")) break :ft .@"tar.zst"; if (ascii.eqlIgnoreCase(mime_type, "application/zip") or ascii.eqlIgnoreCase(mime_type, "application/x-zip-compressed") or ascii.eqlIgnoreCase(mime_type, "application/java-archive")) { break :ft .zip; } if (!ascii.eqlIgnoreCase(mime_type, "application/octet-stream") and !ascii.eqlIgnoreCase(mime_type, "application/x-compressed")) { return f.fail(f.location_tok, try eb.printString( "unrecognized 'Content-Type' header: '{s}'", .{content_type}, )); } // Next, the filename from 'content-disposition: attachment' takes precedence. if (head.content_disposition) |cd_header| { break :ft FileType.fromContentDisposition(cd_header) orelse { return f.fail(f.location_tok, try eb.printString( "unsupported Content-Disposition header value: '{s}' for Content-Type=application/octet-stream", .{cd_header}, )); }; } // Finally, the path from the URI is used. break :ft FileType.fromPath(uri_path) orelse { return f.fail(f.location_tok, try eb.printString("unknown file type: '{s}'", .{uri_path})); }; }, .git => .git_pack, .dir => |dir| { f.recursiveDirectoryCopy(dir, tmp_directory.handle) catch |err| { return f.fail(f.location_tok, try eb.printString("unable to copy directory '{s}': {t}", .{ uri_path, err, })); }; return .{}; }, }; switch (file_type) { .tar => { return unpackTarball(f, tmp_directory.handle, resource.reader()); }, .@"tar.gz" => { var flate_buffer: [std.compress.flate.max_window_len]u8 = undefined; var decompress: std.compress.flate.Decompress = .init(resource.reader(), .gzip, &flate_buffer); return try unpackTarball(f, tmp_directory.handle, &decompress.reader); }, .@"tar.xz" => { const gpa = f.arena.child_allocator; var decompress = std.compress.xz.Decompress.init(resource.reader(), gpa, &.{}) catch |err| return f.fail(f.location_tok, try eb.printString("unable to decompress tarball: {t}", .{err})); defer decompress.deinit(); return try unpackTarball(f, tmp_directory.handle, &decompress.reader); }, .@"tar.zst" => { const window_len = std.compress.zstd.default_window_len; const window_buffer = try f.arena.allocator().alloc(u8, window_len + std.compress.zstd.block_size_max); var decompress: std.compress.zstd.Decompress = .init(resource.reader(), window_buffer, .{ .verify_checksum = false, .window_len = window_len, }); return try unpackTarball(f, tmp_directory.handle, &decompress.reader); }, .git_pack => return unpackGitPack(f, tmp_directory.handle, &resource.git) catch |err| switch (err) { error.FetchFailed => return error.FetchFailed, error.OutOfMemory => return error.OutOfMemory, else => |e| return f.fail(f.location_tok, try eb.printString("unable to unpack git files: {t}", .{e})), }, .zip => return unzip(f, tmp_directory.handle, resource.reader()) catch |err| switch (err) { error.ReadFailed => return f.fail(f.location_tok, try eb.printString( "failed reading resource: {t}", .{err}, )), else => |e| return e, }, } } fn unpackTarball(f: *Fetch, out_dir: fs.Dir, reader: *Io.Reader) RunError!UnpackResult { const eb = &f.error_bundle; const arena = f.arena.allocator(); var diagnostics: std.tar.Diagnostics = .{ .allocator = arena }; std.tar.pipeToFileSystem(out_dir, reader, .{ .diagnostics = &diagnostics, .strip_components = 0, .mode_mode = .ignore, .exclude_empty_directories = true, }) catch |err| return f.fail( f.location_tok, try eb.printString("unable to unpack tarball to temporary directory: {t}", .{err}), ); var res: UnpackResult = .{ .root_dir = diagnostics.root_dir }; if (diagnostics.errors.items.len > 0) { try res.allocErrors(arena, diagnostics.errors.items.len, "unable to unpack tarball"); for (diagnostics.errors.items) |item| { switch (item) { .unable_to_create_file => |i| res.unableToCreateFile(stripRoot(i.file_name, res.root_dir), i.code), .unable_to_create_sym_link => |i| res.unableToCreateSymLink(stripRoot(i.file_name, res.root_dir), i.link_name, i.code), .unsupported_file_type => |i| res.unsupportedFileType(stripRoot(i.file_name, res.root_dir), @intFromEnum(i.file_type)), .components_outside_stripped_prefix => unreachable, // unreachable with strip_components = 0 } } } return res; } fn unzip(f: *Fetch, out_dir: fs.Dir, reader: *Io.Reader) error{ ReadFailed, OutOfMemory, FetchFailed }!UnpackResult { // We write the entire contents to a file first because zip files // must be processed back to front and they could be too large to // load into memory. const io = f.job_queue.io; const cache_root = f.job_queue.global_cache; const prefix = "tmp/"; const suffix = ".zip"; const eb = &f.error_bundle; const random_len = @sizeOf(u64) * 2; var zip_path: [prefix.len + random_len + suffix.len]u8 = undefined; zip_path[0..prefix.len].* = prefix.*; zip_path[prefix.len + random_len ..].* = suffix.*; var zip_file = while (true) { const random_integer = std.crypto.random.int(u64); zip_path[prefix.len..][0..random_len].* = std.fmt.hex(random_integer); break cache_root.handle.createFile(&zip_path, .{ .exclusive = true, .read = true, }) catch |err| switch (err) { error.PathAlreadyExists => continue, else => |e| return f.fail( f.location_tok, try eb.printString("failed to create temporary zip file: {t}", .{e}), ), }; }; defer zip_file.close(); var zip_file_buffer: [4096]u8 = undefined; var zip_file_reader = b: { var zip_file_writer = zip_file.writer(&zip_file_buffer); _ = reader.streamRemaining(&zip_file_writer.interface) catch |err| switch (err) { error.ReadFailed => return error.ReadFailed, error.WriteFailed => return f.fail( f.location_tok, try eb.printString("failed writing temporary zip file: {t}", .{err}), ), }; zip_file_writer.interface.flush() catch |err| return f.fail( f.location_tok, try eb.printString("failed writing temporary zip file: {t}", .{err}), ); break :b zip_file_writer.moveToReader(io); }; var diagnostics: std.zip.Diagnostics = .{ .allocator = f.arena.allocator() }; // no need to deinit since we are using an arena allocator zip_file_reader.seekTo(0) catch |err| return f.fail(f.location_tok, try eb.printString("failed to seek temporary zip file: {t}", .{err})); std.zip.extract(out_dir, &zip_file_reader, .{ .allow_backslashes = true, .diagnostics = &diagnostics, }) catch |err| return f.fail(f.location_tok, try eb.printString("zip extract failed: {t}", .{err})); cache_root.handle.deleteFile(&zip_path) catch |err| return f.fail(f.location_tok, try eb.printString("delete temporary zip failed: {t}", .{err})); return .{ .root_dir = diagnostics.root_dir }; } fn unpackGitPack(f: *Fetch, out_dir: fs.Dir, resource: *Resource.Git) anyerror!UnpackResult { const io = f.job_queue.io; const arena = f.arena.allocator(); // TODO don't try to get a gpa from an arena. expose this dependency higher up // because the backing of arena could be page allocator const gpa = f.arena.child_allocator; const object_format: git.Oid.Format = resource.want_oid; var res: UnpackResult = .{}; // The .git directory is used to store the packfile and associated index, but // we do not attempt to replicate the exact structure of a real .git // directory, since that isn't relevant for fetching a package. { var pack_dir = try out_dir.makeOpenPath(".git", .{}); defer pack_dir.close(); var pack_file = try pack_dir.createFile("pkg.pack", .{ .read = true }); defer pack_file.close(); var pack_file_buffer: [4096]u8 = undefined; var pack_file_reader = b: { var pack_file_writer = pack_file.writer(&pack_file_buffer); const fetch_reader = &resource.fetch_stream.reader; _ = try fetch_reader.streamRemaining(&pack_file_writer.interface); try pack_file_writer.interface.flush(); break :b pack_file_writer.moveToReader(io); }; var index_file = try pack_dir.createFile("pkg.idx", .{ .read = true }); defer index_file.close(); var index_file_buffer: [2000]u8 = undefined; var index_file_writer = index_file.writer(&index_file_buffer); { const index_prog_node = f.prog_node.start("Index pack", 0); defer index_prog_node.end(); try git.indexPack(gpa, object_format, &pack_file_reader, &index_file_writer); } { var index_file_reader = index_file.reader(io, &index_file_buffer); const checkout_prog_node = f.prog_node.start("Checkout", 0); defer checkout_prog_node.end(); var repository: git.Repository = undefined; try repository.init(gpa, object_format, &pack_file_reader, &index_file_reader); defer repository.deinit(); var diagnostics: git.Diagnostics = .{ .allocator = arena }; try repository.checkout(out_dir, resource.want_oid, &diagnostics); if (diagnostics.errors.items.len > 0) { try res.allocErrors(arena, diagnostics.errors.items.len, "unable to unpack packfile"); for (diagnostics.errors.items) |item| { switch (item) { .unable_to_create_file => |i| res.unableToCreateFile(i.file_name, i.code), .unable_to_create_sym_link => |i| res.unableToCreateSymLink(i.file_name, i.link_name, i.code), } } } } } try out_dir.deleteTree(".git"); return res; } fn recursiveDirectoryCopy(f: *Fetch, dir: fs.Dir, tmp_dir: fs.Dir) anyerror!void { const gpa = f.arena.child_allocator; // Recursive directory copy. var it = try dir.walk(gpa); defer it.deinit(); while (try it.next()) |entry| { switch (entry.kind) { .directory => {}, // omit empty directories .file => { dir.copyFile( entry.path, tmp_dir, entry.path, .{}, ) catch |err| switch (err) { error.FileNotFound => { if (fs.path.dirname(entry.path)) |dirname| try tmp_dir.makePath(dirname); try dir.copyFile(entry.path, tmp_dir, entry.path, .{}); }, else => |e| return e, }; }, .sym_link => { var buf: [fs.max_path_bytes]u8 = undefined; const link_name = try dir.readLink(entry.path, &buf); // TODO: if this would create a symlink to outside // the destination directory, fail with an error instead. tmp_dir.symLink(link_name, entry.path, .{}) catch |err| switch (err) { error.FileNotFound => { if (fs.path.dirname(entry.path)) |dirname| try tmp_dir.makePath(dirname); try tmp_dir.symLink(link_name, entry.path, .{}); }, else => |e| return e, }; }, else => return error.IllegalFileTypeInPackage, } } } pub fn renameTmpIntoCache(cache_dir: fs.Dir, tmp_dir_sub_path: []const u8, dest_dir_sub_path: []const u8) !void { assert(dest_dir_sub_path[1] == fs.path.sep); var handled_missing_dir = false; while (true) { cache_dir.rename(tmp_dir_sub_path, dest_dir_sub_path) catch |err| switch (err) { error.FileNotFound => { if (handled_missing_dir) return err; cache_dir.makeDir(dest_dir_sub_path[0..1]) catch |mkd_err| switch (mkd_err) { error.PathAlreadyExists => handled_missing_dir = true, else => |e| return e, }; continue; }, error.PathAlreadyExists, error.AccessDenied => { // Package has been already downloaded and may already be in use on the system. cache_dir.deleteTree(tmp_dir_sub_path) catch { // Garbage files leftover in zig-cache/tmp/ is, as they say // on Star Trek, "operating within normal parameters". }; }, else => |e| return e, }; break; } } const ComputedHash = struct { digest: Package.Hash.Digest, total_size: u64, }; /// Assumes that files not included in the package have already been filtered /// prior to calling this function. This ensures that files not protected by /// the hash are not present on the file system. Empty directories are *not /// hashed* and must not be present on the file system when calling this /// function. fn computeHash(f: *Fetch, pkg_path: Cache.Path, filter: Filter) RunError!ComputedHash { const io = f.job_queue.io; // All the path name strings need to be in memory for sorting. const arena = f.arena.allocator(); const gpa = f.arena.child_allocator; const eb = &f.error_bundle; const root_dir = pkg_path.root_dir.handle; // Collect all files, recursively, then sort. var all_files = std.array_list.Managed(*HashedFile).init(gpa); defer all_files.deinit(); var deleted_files = std.array_list.Managed(*DeletedFile).init(gpa); defer deleted_files.deinit(); // Track directories which had any files deleted from them so that empty directories // can be deleted. var sus_dirs: std.StringArrayHashMapUnmanaged(void) = .empty; defer sus_dirs.deinit(gpa); var walker = try root_dir.walk(gpa); defer walker.deinit(); // Total number of bytes of file contents included in the package. var total_size: u64 = 0; { // The final hash will be a hash of each file hashed independently. This // allows hashing in parallel. var wait_group: Io.Group = .init; // TODO `computeHash` is called from a worker thread so there must not be // any waiting without working or a deadlock could occur. defer wait_group.wait(io); while (walker.next() catch |err| { try eb.addRootErrorMessage(.{ .msg = try eb.printString( "unable to walk temporary directory '{f}': {t}", .{ pkg_path, err }, ) }); return error.FetchFailed; }) |entry| { if (entry.kind == .directory) continue; const entry_pkg_path = stripRoot(entry.path, pkg_path.sub_path); if (!filter.includePath(entry_pkg_path)) { // Delete instead of including in hash calculation. const fs_path = try arena.dupe(u8, entry.path); // Also track the parent directory in case it becomes empty. if (fs.path.dirname(fs_path)) |parent| try sus_dirs.put(gpa, parent, {}); const deleted_file = try arena.create(DeletedFile); deleted_file.* = .{ .fs_path = fs_path, .failure = undefined, // to be populated by the worker }; wait_group.async(io, workerDeleteFile, .{ root_dir, deleted_file }); try deleted_files.append(deleted_file); continue; } const kind: HashedFile.Kind = switch (entry.kind) { .directory => unreachable, .file => .file, .sym_link => .link, else => return f.fail(f.location_tok, try eb.printString( "package contains '{s}' which has illegal file type '{s}'", .{ entry.path, @tagName(entry.kind) }, )), }; if (std.mem.eql(u8, entry_pkg_path, Package.build_zig_basename)) f.has_build_zig = true; const fs_path = try arena.dupe(u8, entry.path); const hashed_file = try arena.create(HashedFile); hashed_file.* = .{ .fs_path = fs_path, .normalized_path = try normalizePathAlloc(arena, entry_pkg_path), .kind = kind, .hash = undefined, // to be populated by the worker .failure = undefined, // to be populated by the worker .size = undefined, // to be populated by the worker }; wait_group.async(io, workerHashFile, .{ root_dir, hashed_file }); try all_files.append(hashed_file); } } { // Sort by length, descending, so that child directories get removed first. sus_dirs.sortUnstable(@as(struct { keys: []const []const u8, pub fn lessThan(ctx: @This(), a_index: usize, b_index: usize) bool { return ctx.keys[b_index].len < ctx.keys[a_index].len; } }, .{ .keys = sus_dirs.keys() })); // During this loop, more entries will be added, so we must loop by index. var i: usize = 0; while (i < sus_dirs.count()) : (i += 1) { const sus_dir = sus_dirs.keys()[i]; root_dir.deleteDir(sus_dir) catch |err| switch (err) { error.DirNotEmpty => continue, error.FileNotFound => continue, else => |e| { try eb.addRootErrorMessage(.{ .msg = try eb.printString( "unable to delete empty directory '{s}': {s}", .{ sus_dir, @errorName(e) }, ) }); return error.FetchFailed; }, }; if (fs.path.dirname(sus_dir)) |parent| { try sus_dirs.put(gpa, parent, {}); } } } std.mem.sortUnstable(*HashedFile, all_files.items, {}, HashedFile.lessThan); var hasher = Package.Hash.Algo.init(.{}); var any_failures = false; for (all_files.items) |hashed_file| { hashed_file.failure catch |err| { any_failures = true; try eb.addRootErrorMessage(.{ .msg = try eb.printString("unable to hash '{s}': {s}", .{ hashed_file.fs_path, @errorName(err), }), }); }; hasher.update(&hashed_file.hash); total_size += hashed_file.size; } for (deleted_files.items) |deleted_file| { deleted_file.failure catch |err| { any_failures = true; try eb.addRootErrorMessage(.{ .msg = try eb.printString("failed to delete excluded path '{s}' from package: {s}", .{ deleted_file.fs_path, @errorName(err), }), }); }; } if (any_failures) return error.FetchFailed; if (f.job_queue.debug_hash) { assert(!f.job_queue.recursive); // Print something to stdout that can be text diffed to figure out why // the package hash is different. dumpHashInfo(all_files.items) catch |err| { std.debug.print("unable to write to stdout: {s}\n", .{@errorName(err)}); std.process.exit(1); }; } return .{ .digest = hasher.finalResult(), .total_size = total_size, }; } fn dumpHashInfo(all_files: []const *const HashedFile) !void { var stdout_buffer: [1024]u8 = undefined; var stdout_writer: fs.File.Writer = .initStreaming(.stdout(), &stdout_buffer); const w = &stdout_writer.interface; for (all_files) |hashed_file| { try w.print("{t}: {x}: {s}\n", .{ hashed_file.kind, &hashed_file.hash, hashed_file.normalized_path }); } try w.flush(); } fn workerHashFile(dir: fs.Dir, hashed_file: *HashedFile) void { hashed_file.failure = hashFileFallible(dir, hashed_file); } fn workerDeleteFile(dir: fs.Dir, deleted_file: *DeletedFile) void { deleted_file.failure = deleteFileFallible(dir, deleted_file); } fn hashFileFallible(dir: fs.Dir, hashed_file: *HashedFile) HashedFile.Error!void { var buf: [8000]u8 = undefined; var hasher = Package.Hash.Algo.init(.{}); hasher.update(hashed_file.normalized_path); var file_size: u64 = 0; switch (hashed_file.kind) { .file => { var file = try dir.openFile(hashed_file.fs_path, .{}); defer file.close(); // Hard-coded false executable bit: https://github.com/ziglang/zig/issues/17463 hasher.update(&.{ 0, 0 }); var file_header: FileHeader = .{}; while (true) { const bytes_read = try file.read(&buf); if (bytes_read == 0) break; file_size += bytes_read; hasher.update(buf[0..bytes_read]); file_header.update(buf[0..bytes_read]); } if (file_header.isExecutable()) { try setExecutable(file); } }, .link => { const link_name = try dir.readLink(hashed_file.fs_path, &buf); if (fs.path.sep != canonical_sep) { // Package hashes are intended to be consistent across // platforms which means we must normalize path separators // inside symlinks. normalizePath(link_name); } hasher.update(link_name); }, } hasher.final(&hashed_file.hash); hashed_file.size = file_size; } fn deleteFileFallible(dir: fs.Dir, deleted_file: *DeletedFile) DeletedFile.Error!void { try dir.deleteFile(deleted_file.fs_path); } fn setExecutable(file: fs.File) !void { if (!std.fs.has_executable_bit) return; const S = std.posix.S; const mode = fs.File.default_mode | S.IXUSR | S.IXGRP | S.IXOTH; try file.chmod(mode); } const DeletedFile = struct { fs_path: []const u8, failure: Error!void, const Error = fs.Dir.DeleteFileError || fs.Dir.DeleteDirError; }; const HashedFile = struct { fs_path: []const u8, normalized_path: []const u8, hash: Package.Hash.Digest, failure: Error!void, kind: Kind, size: u64, const Error = fs.File.OpenError || fs.File.ReadError || fs.File.StatError || fs.File.ChmodError || fs.Dir.ReadLinkError; const Kind = enum { file, link }; fn lessThan(context: void, lhs: *const HashedFile, rhs: *const HashedFile) bool { _ = context; return std.mem.lessThan(u8, lhs.normalized_path, rhs.normalized_path); } }; /// Strips root directory name from file system path. fn stripRoot(fs_path: []const u8, root_dir: []const u8) []const u8 { if (root_dir.len == 0 or fs_path.len <= root_dir.len) return fs_path; if (std.mem.eql(u8, fs_path[0..root_dir.len], root_dir) and fs.path.isSep(fs_path[root_dir.len])) { return fs_path[root_dir.len + 1 ..]; } return fs_path; } /// Make a file system path identical independently of operating system path inconsistencies. /// This converts backslashes into forward slashes. fn normalizePathAlloc(arena: Allocator, pkg_path: []const u8) ![]const u8 { const normalized = try arena.dupe(u8, pkg_path); if (fs.path.sep == canonical_sep) return normalized; normalizePath(normalized); return normalized; } const canonical_sep = fs.path.sep_posix; fn normalizePath(bytes: []u8) void { assert(fs.path.sep != canonical_sep); std.mem.replaceScalar(u8, bytes, fs.path.sep, canonical_sep); } const Filter = struct { include_paths: std.StringArrayHashMapUnmanaged(void) = .empty, /// sub_path is relative to the package root. pub fn includePath(self: Filter, sub_path: []const u8) bool { if (self.include_paths.count() == 0) return true; if (self.include_paths.contains("")) return true; if (self.include_paths.contains(".")) return true; if (self.include_paths.contains(sub_path)) return true; // Check if any included paths are parent directories of sub_path. var dirname = sub_path; while (std.fs.path.dirname(dirname)) |next_dirname| { if (self.include_paths.contains(next_dirname)) return true; dirname = next_dirname; } return false; } test includePath { const gpa = std.testing.allocator; var filter: Filter = .{}; defer filter.include_paths.deinit(gpa); try filter.include_paths.put(gpa, "src", {}); try std.testing.expect(filter.includePath("src/core/unix/SDL_poll.c")); try std.testing.expect(!filter.includePath(".gitignore")); } }; pub fn depDigest(pkg_root: Cache.Path, cache_root: Cache.Directory, dep: Manifest.Dependency) ?Package.Hash { if (dep.hash) |h| return .fromSlice(h); switch (dep.location) { .url => return null, .path => |rel_path| { var buf: [fs.max_path_bytes]u8 = undefined; var fba = std.heap.FixedBufferAllocator.init(&buf); const new_root = pkg_root.resolvePosix(fba.allocator(), rel_path) catch return null; return relativePathDigest(new_root, cache_root); }, } } // Detects executable header: ELF or Macho-O magic header or shebang line. const FileHeader = struct { header: [4]u8 = undefined, bytes_read: usize = 0, pub fn update(self: *FileHeader, buf: []const u8) void { if (self.bytes_read >= self.header.len) return; const n = @min(self.header.len - self.bytes_read, buf.len); @memcpy(self.header[self.bytes_read..][0..n], buf[0..n]); self.bytes_read += n; } fn isScript(self: *FileHeader) bool { const shebang = "#!"; return std.mem.eql(u8, self.header[0..@min(self.bytes_read, shebang.len)], shebang); } fn isElf(self: *FileHeader) bool { const elf_magic = std.elf.MAGIC; return std.mem.eql(u8, self.header[0..@min(self.bytes_read, elf_magic.len)], elf_magic); } fn isMachO(self: *FileHeader) bool { if (self.bytes_read < 4) return false; const magic_number = std.mem.readInt(u32, &self.header, builtin.cpu.arch.endian()); return magic_number == std.macho.MH_MAGIC or magic_number == std.macho.MH_MAGIC_64 or magic_number == std.macho.FAT_MAGIC or magic_number == std.macho.FAT_MAGIC_64 or magic_number == std.macho.MH_CIGAM or magic_number == std.macho.MH_CIGAM_64 or magic_number == std.macho.FAT_CIGAM or magic_number == std.macho.FAT_CIGAM_64; } pub fn isExecutable(self: *FileHeader) bool { return self.isScript() or self.isElf() or self.isMachO(); } }; test FileHeader { var h: FileHeader = .{}; try std.testing.expect(!h.isExecutable()); const elf_magic = std.elf.MAGIC; h.update(elf_magic[0..2]); try std.testing.expect(!h.isExecutable()); h.update(elf_magic[2..4]); try std.testing.expect(h.isExecutable()); h.update(elf_magic[2..4]); try std.testing.expect(h.isExecutable()); const macho64_magic_bytes = [_]u8{ 0xCF, 0xFA, 0xED, 0xFE }; h.bytes_read = 0; h.update(&macho64_magic_bytes); try std.testing.expect(h.isExecutable()); const macho64_cigam_bytes = [_]u8{ 0xFE, 0xED, 0xFA, 0xCF }; h.bytes_read = 0; h.update(&macho64_cigam_bytes); try std.testing.expect(h.isExecutable()); } // Result of the `unpackResource` operation. Enables collecting errors from // tar/git diagnostic, filtering that errors by manifest inclusion rules and // emitting remaining errors to an `ErrorBundle`. const UnpackResult = struct { errors: []Error = undefined, errors_count: usize = 0, root_error_message: []const u8 = "", // A non empty value means that the package contents are inside a // sub-directory indicated by the named path. root_dir: []const u8 = "", const Error = union(enum) { unable_to_create_sym_link: struct { code: anyerror, file_name: []const u8, link_name: []const u8, }, unable_to_create_file: struct { code: anyerror, file_name: []const u8, }, unsupported_file_type: struct { file_name: []const u8, file_type: u8, }, fn excluded(self: Error, filter: Filter) bool { const file_name = switch (self) { .unable_to_create_file => |info| info.file_name, .unable_to_create_sym_link => |info| info.file_name, .unsupported_file_type => |info| info.file_name, }; return !filter.includePath(file_name); } }; fn allocErrors(self: *UnpackResult, arena: std.mem.Allocator, n: usize, root_error_message: []const u8) !void { self.root_error_message = try arena.dupe(u8, root_error_message); self.errors = try arena.alloc(UnpackResult.Error, n); } fn hasErrors(self: *UnpackResult) bool { return self.errors_count > 0; } fn unableToCreateFile(self: *UnpackResult, file_name: []const u8, err: anyerror) void { self.errors[self.errors_count] = .{ .unable_to_create_file = .{ .code = err, .file_name = file_name, } }; self.errors_count += 1; } fn unableToCreateSymLink(self: *UnpackResult, file_name: []const u8, link_name: []const u8, err: anyerror) void { self.errors[self.errors_count] = .{ .unable_to_create_sym_link = .{ .code = err, .file_name = file_name, .link_name = link_name, } }; self.errors_count += 1; } fn unsupportedFileType(self: *UnpackResult, file_name: []const u8, file_type: u8) void { self.errors[self.errors_count] = .{ .unsupported_file_type = .{ .file_name = file_name, .file_type = file_type, } }; self.errors_count += 1; } fn validate(self: *UnpackResult, f: *Fetch, filter: Filter) !void { if (self.errors_count == 0) return; var unfiltered_errors: u32 = 0; for (self.errors) |item| { if (item.excluded(filter)) continue; unfiltered_errors += 1; } if (unfiltered_errors == 0) return; // Emmit errors to an `ErrorBundle`. const eb = &f.error_bundle; try eb.addRootErrorMessage(.{ .msg = try eb.addString(self.root_error_message), .src_loc = try f.srcLoc(f.location_tok), .notes_len = unfiltered_errors, }); var note_i: u32 = try eb.reserveNotes(unfiltered_errors); for (self.errors) |item| { if (item.excluded(filter)) continue; switch (item) { .unable_to_create_sym_link => |info| { eb.extra.items[note_i] = @intFromEnum(try eb.addErrorMessage(.{ .msg = try eb.printString("unable to create symlink from '{s}' to '{s}': {s}", .{ info.file_name, info.link_name, @errorName(info.code), }), })); }, .unable_to_create_file => |info| { eb.extra.items[note_i] = @intFromEnum(try eb.addErrorMessage(.{ .msg = try eb.printString("unable to create file '{s}': {s}", .{ info.file_name, @errorName(info.code), }), })); }, .unsupported_file_type => |info| { eb.extra.items[note_i] = @intFromEnum(try eb.addErrorMessage(.{ .msg = try eb.printString("file '{s}' has unsupported type '{c}'", .{ info.file_name, info.file_type, }), })); }, } note_i += 1; } return error.FetchFailed; } test validate { const gpa = std.testing.allocator; var arena_instance = std.heap.ArenaAllocator.init(gpa); defer arena_instance.deinit(); const arena = arena_instance.allocator(); // fill UnpackResult with errors var res: UnpackResult = .{}; try res.allocErrors(arena, 4, "unable to unpack"); try std.testing.expectEqual(0, res.errors_count); res.unableToCreateFile("dir1/file1", error.File1); res.unableToCreateSymLink("dir2/file2", "filename", error.SymlinkError); res.unableToCreateFile("dir1/file3", error.File3); res.unsupportedFileType("dir2/file4", 'x'); try std.testing.expectEqual(4, res.errors_count); // create filter, includes dir2, excludes dir1 var filter: Filter = .{}; try filter.include_paths.put(arena, "dir2", {}); // init Fetch var fetch: Fetch = undefined; fetch.parent_manifest_ast = null; fetch.location_tok = 0; try fetch.error_bundle.init(gpa); defer fetch.error_bundle.deinit(); // validate errors with filter try std.testing.expectError(error.FetchFailed, res.validate(&fetch, filter)); // output errors to string var errors = try fetch.error_bundle.toOwnedBundle(""); defer errors.deinit(gpa); var aw: Io.Writer.Allocating = .init(gpa); defer aw.deinit(); try errors.renderToWriter(.{ .ttyconf = .no_color }, &aw.writer); try std.testing.expectEqualStrings( \\error: unable to unpack \\ note: unable to create symlink from 'dir2/file2' to 'filename': SymlinkError \\ note: file 'dir2/file4' has unsupported type 'x' \\ , aw.written()); } }; test "tarball with duplicate paths" { // This tarball has duplicate path 'dir1/file1' to simulate case sensitve // file system on any file sytstem. // // duplicate_paths/ // duplicate_paths/dir1/ // duplicate_paths/dir1/file1 // duplicate_paths/dir1/file1 // duplicate_paths/build.zig.zon // duplicate_paths/src/ // duplicate_paths/src/main.zig // duplicate_paths/src/root.zig // duplicate_paths/build.zig // const gpa = std.testing.allocator; const io = std.testing.io; var tmp = std.testing.tmpDir(.{}); defer tmp.cleanup(); const tarball_name = "duplicate_paths.tar.gz"; try saveEmbedFile(tarball_name, tmp.dir); const tarball_path = try std.fmt.allocPrint(gpa, ".zig-cache/tmp/{s}/{s}", .{ tmp.sub_path, tarball_name }); defer gpa.free(tarball_path); // Run tarball fetch, expect to fail var fb: TestFetchBuilder = undefined; var fetch = try fb.build(gpa, io, tmp.dir, tarball_path); defer fb.deinit(); try std.testing.expectError(error.FetchFailed, fetch.run()); try fb.expectFetchErrors(1, \\error: unable to unpack tarball \\ note: unable to create file 'dir1/file1': PathAlreadyExists \\ ); } test "tarball with excluded duplicate paths" { // Same as previous tarball but has build.zig.zon wich excludes 'dir1'. // // .paths = .{ // "build.zig", // "build.zig.zon", // "src", // } // const gpa = std.testing.allocator; const io = std.testing.io; var tmp = std.testing.tmpDir(.{}); defer tmp.cleanup(); const tarball_name = "duplicate_paths_excluded.tar.gz"; try saveEmbedFile(tarball_name, tmp.dir); const tarball_path = try std.fmt.allocPrint(gpa, ".zig-cache/tmp/{s}/{s}", .{ tmp.sub_path, tarball_name }); defer gpa.free(tarball_path); // Run tarball fetch, should succeed var fb: TestFetchBuilder = undefined; var fetch = try fb.build(gpa, io, tmp.dir, tarball_path); defer fb.deinit(); try fetch.run(); const hex_digest = Package.multiHashHexDigest(fetch.computed_hash.digest); try std.testing.expectEqualStrings( "12200bafe035cbb453dd717741b66e9f9d1e6c674069d06121dafa1b2e62eb6b22da", &hex_digest, ); const expected_files: []const []const u8 = &.{ "build.zig", "build.zig.zon", "src/main.zig", "src/root.zig", }; try fb.expectPackageFiles(expected_files); } test "tarball without root folder" { // Tarball with root folder. Manifest excludes dir1 and dir2. // // build.zig // build.zig.zon // dir1/ // dir1/file2 // dir1/file1 // dir2/ // dir2/file2 // src/ // src/main.zig // const gpa = std.testing.allocator; const io = std.testing.io; var tmp = std.testing.tmpDir(.{}); defer tmp.cleanup(); const tarball_name = "no_root.tar.gz"; try saveEmbedFile(tarball_name, tmp.dir); const tarball_path = try std.fmt.allocPrint(gpa, ".zig-cache/tmp/{s}/{s}", .{ tmp.sub_path, tarball_name }); defer gpa.free(tarball_path); // Run tarball fetch, should succeed var fb: TestFetchBuilder = undefined; var fetch = try fb.build(gpa, io, tmp.dir, tarball_path); defer fb.deinit(); try fetch.run(); const hex_digest = Package.multiHashHexDigest(fetch.computed_hash.digest); try std.testing.expectEqualStrings( "12209f939bfdcb8b501a61bb4a43124dfa1b2848adc60eec1e4624c560357562b793", &hex_digest, ); const expected_files: []const []const u8 = &.{ "build.zig", "build.zig.zon", "src/main.zig", }; try fb.expectPackageFiles(expected_files); } test "set executable bit based on file content" { if (!std.fs.has_executable_bit) return error.SkipZigTest; const gpa = std.testing.allocator; const io = std.testing.io; var tmp = std.testing.tmpDir(.{}); defer tmp.cleanup(); const tarball_name = "executables.tar.gz"; try saveEmbedFile(tarball_name, tmp.dir); const tarball_path = try std.fmt.allocPrint(gpa, ".zig-cache/tmp/{s}/{s}", .{ tmp.sub_path, tarball_name }); defer gpa.free(tarball_path); // $ tar -tvf executables.tar.gz // drwxrwxr-x 0 executables/ // -rwxrwxr-x 170 executables/hello // lrwxrwxrwx 0 executables/hello_ln -> hello // -rw-rw-r-- 0 executables/file1 // -rw-rw-r-- 17 executables/script_with_shebang_without_exec_bit // -rwxrwxr-x 7 executables/script_without_shebang // -rwxrwxr-x 17 executables/script var fb: TestFetchBuilder = undefined; var fetch = try fb.build(gpa, io, tmp.dir, tarball_path); defer fb.deinit(); try fetch.run(); try std.testing.expectEqualStrings( "1220fecb4c06a9da8673c87fe8810e15785f1699212f01728eadce094d21effeeef3", &Package.multiHashHexDigest(fetch.computed_hash.digest), ); var out = try fb.packageDir(); defer out.close(); const S = std.posix.S; // expect executable bit not set try std.testing.expect((try out.statFile("file1")).mode & S.IXUSR == 0); try std.testing.expect((try out.statFile("script_without_shebang")).mode & S.IXUSR == 0); // expect executable bit set try std.testing.expect((try out.statFile("hello")).mode & S.IXUSR != 0); try std.testing.expect((try out.statFile("script")).mode & S.IXUSR != 0); try std.testing.expect((try out.statFile("script_with_shebang_without_exec_bit")).mode & S.IXUSR != 0); try std.testing.expect((try out.statFile("hello_ln")).mode & S.IXUSR != 0); // // $ ls -al zig-cache/tmp/OCz9ovUcstDjTC_U/zig-global-cache/p/1220fecb4c06a9da8673c87fe8810e15785f1699212f01728eadce094d21effeeef3 // -rw-rw-r-- 1 0 Apr file1 // -rwxrwxr-x 1 170 Apr hello // lrwxrwxrwx 1 5 Apr hello_ln -> hello // -rwxrwxr-x 1 17 Apr script // -rw-rw-r-- 1 7 Apr script_without_shebang // -rwxrwxr-x 1 17 Apr script_with_shebang_without_exec_bit } fn saveEmbedFile(comptime tarball_name: []const u8, dir: fs.Dir) !void { //const tarball_name = "duplicate_paths_excluded.tar.gz"; const tarball_content = @embedFile("Fetch/testdata/" ++ tarball_name); var tmp_file = try dir.createFile(tarball_name, .{}); defer tmp_file.close(); try tmp_file.writeAll(tarball_content); } // Builds Fetch with required dependencies, clears dependencies on deinit(). const TestFetchBuilder = struct { http_client: std.http.Client, global_cache_directory: Cache.Directory, job_queue: Fetch.JobQueue, fetch: Fetch, fn build( self: *TestFetchBuilder, allocator: std.mem.Allocator, io: Io, cache_parent_dir: std.fs.Dir, path_or_url: []const u8, ) !*Fetch { const cache_dir = try cache_parent_dir.makeOpenPath("zig-global-cache", .{}); self.http_client = .{ .allocator = allocator, .io = io }; self.global_cache_directory = .{ .handle = cache_dir, .path = null }; self.job_queue = .{ .http_client = &self.http_client, .global_cache = self.global_cache_directory, .recursive = false, .read_only = false, .debug_hash = false, .work_around_btrfs_bug = false, .mode = .needed, }; self.fetch = .{ .arena = std.heap.ArenaAllocator.init(allocator), .io = io, .location = .{ .path_or_url = path_or_url }, .location_tok = 0, .hash_tok = .none, .name_tok = 0, .lazy_status = .eager, .parent_package_root = Cache.Path{ .root_dir = Cache.Directory{ .handle = cache_dir, .path = null } }, .parent_manifest_ast = null, .prog_node = std.Progress.Node.none, .job_queue = &self.job_queue, .omit_missing_hash_error = true, .allow_missing_paths_field = false, .allow_missing_fingerprint = true, // so we can keep using the old testdata .tar.gz .allow_name_string = true, // so we can keep using the old testdata .tar.gz .use_latest_commit = true, .package_root = undefined, .error_bundle = undefined, .manifest = null, .manifest_ast = undefined, .computed_hash = undefined, .has_build_zig = false, .oom_flag = false, .latest_commit = null, .module = null, }; return &self.fetch; } fn deinit(self: *TestFetchBuilder) void { self.fetch.deinit(); self.job_queue.deinit(); self.fetch.prog_node.end(); self.global_cache_directory.handle.close(); self.http_client.deinit(); } fn packageDir(self: *TestFetchBuilder) !fs.Dir { const root = self.fetch.package_root; return try root.root_dir.handle.openDir(root.sub_path, .{ .iterate = true }); } // Test helper, asserts thet package dir constains expected_files. // expected_files must be sorted. fn expectPackageFiles(self: *TestFetchBuilder, expected_files: []const []const u8) !void { var package_dir = try self.packageDir(); defer package_dir.close(); var actual_files: std.ArrayListUnmanaged([]u8) = .empty; defer actual_files.deinit(std.testing.allocator); defer for (actual_files.items) |file| std.testing.allocator.free(file); var walker = try package_dir.walk(std.testing.allocator); defer walker.deinit(); while (try walker.next()) |entry| { if (entry.kind != .file) continue; const path = try std.testing.allocator.dupe(u8, entry.path); errdefer std.testing.allocator.free(path); std.mem.replaceScalar(u8, path, std.fs.path.sep, '/'); try actual_files.append(std.testing.allocator, path); } std.mem.sortUnstable([]u8, actual_files.items, {}, struct { fn lessThan(_: void, a: []u8, b: []u8) bool { return std.mem.lessThan(u8, a, b); } }.lessThan); try std.testing.expectEqual(expected_files.len, actual_files.items.len); for (expected_files, 0..) |file_name, i| { try std.testing.expectEqualStrings(file_name, actual_files.items[i]); } try std.testing.expectEqualDeep(expected_files, actual_files.items); } // Test helper, asserts that fetch has failed with `msg` error message. fn expectFetchErrors(self: *TestFetchBuilder, notes_len: usize, msg: []const u8) !void { var errors = try self.fetch.error_bundle.toOwnedBundle(""); defer errors.deinit(std.testing.allocator); const em = errors.getErrorMessage(errors.getMessages()[0]); try std.testing.expectEqual(1, em.count); if (notes_len > 0) { try std.testing.expectEqual(notes_len, em.notes_len); } var aw: Io.Writer.Allocating = .init(std.testing.allocator); defer aw.deinit(); try errors.renderToWriter(.{ .ttyconf = .no_color }, &aw.writer); try std.testing.expectEqualStrings(msg, aw.written()); } }; test { _ = Filter; _ = FileType; _ = UnpackResult; }