diff --git a/CMakeLists.txt b/CMakeLists.txt index 1f3337589c..b08dca6f04 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -528,7 +528,7 @@ set(ZIG_STAGE2_SOURCES "${CMAKE_SOURCE_DIR}/src/Liveness.zig" "${CMAKE_SOURCE_DIR}/src/Module.zig" "${CMAKE_SOURCE_DIR}/src/Package.zig" - "${CMAKE_SOURCE_DIR}/src/Package/hash.zig" + "${CMAKE_SOURCE_DIR}/src/Package/Fetch.zig" "${CMAKE_SOURCE_DIR}/src/RangeSet.zig" "${CMAKE_SOURCE_DIR}/src/Sema.zig" "${CMAKE_SOURCE_DIR}/src/TypedValue.zig" diff --git a/src/Package.zig b/src/Package.zig index 14052e3de4..e5fa24e18d 100644 --- a/src/Package.zig +++ b/src/Package.zig @@ -15,9 +15,9 @@ const Compilation = @import("Compilation.zig"); const Module = @import("Module.zig"); const Cache = std.Build.Cache; const build_options = @import("build_options"); -const git = @import("git.zig"); -const computePackageHash = @import("Package/hash.zig").compute; +const Fetch = @import("Package/Fetch.zig"); +pub const build_zig_basename = "build.zig"; pub const Manifest = @import("Manifest.zig"); pub const Table = std.StringHashMapUnmanaged(*Package); @@ -213,223 +213,6 @@ pub fn getName(target: *const Package, gpa: Allocator, mod: Module) ![]const u8 return buf.toOwnedSlice(); } -pub const build_zig_basename = "build.zig"; - -/// Fetches a package and all of its dependencies recursively. Writes the -/// corresponding datastructures for the build runner into `dependencies_source`. -pub fn fetchAndAddDependencies( - pkg: *Package, - deps_pkg: *Package, - arena: Allocator, - thread_pool: *ThreadPool, - http_client: *std.http.Client, - directory: Compilation.Directory, - global_cache_directory: Compilation.Directory, - local_cache_directory: Compilation.Directory, - dependencies_source: *std.ArrayList(u8), - error_bundle: *std.zig.ErrorBundle.Wip, - all_modules: *AllModules, - root_prog_node: *std.Progress.Node, - /// null for the root package - this_hash: ?[]const u8, -) !void { - const max_bytes = 10 * 1024 * 1024; - const gpa = thread_pool.allocator; - const build_zig_zon_bytes = directory.handle.readFileAllocOptions( - arena, - Manifest.basename, - max_bytes, - null, - 1, - 0, - ) catch |err| switch (err) { - error.FileNotFound => { - // Handle the same as no dependencies. - if (this_hash) |hash| { - try dependencies_source.writer().print( - \\ pub const {} = struct {{ - \\ pub const build_root = "{}"; - \\ pub const build_zig = @import("{}"); - \\ pub const deps: []const struct {{ []const u8, []const u8 }} = &.{{}}; - \\ }}; - \\ - , .{ - std.zig.fmtId(hash), - std.zig.fmtEscapes(pkg.root_src_directory.path.?), - std.zig.fmtEscapes(hash), - }); - } else { - try dependencies_source.writer().writeAll( - \\pub const packages = struct {}; - \\pub const root_deps: []const struct { []const u8, []const u8 } = &.{}; - \\ - ); - } - return; - }, - else => |e| return e, - }; - - var ast = try std.zig.Ast.parse(gpa, build_zig_zon_bytes, .zon); - defer ast.deinit(gpa); - - if (ast.errors.len > 0) { - const file_path = try directory.join(arena, &.{Manifest.basename}); - try main.putAstErrorsIntoBundle(gpa, ast, file_path, error_bundle); - return error.PackageFetchFailed; - } - - var manifest = try Manifest.parse(gpa, ast); - defer manifest.deinit(gpa); - - if (manifest.errors.len > 0) { - const file_path = try directory.join(arena, &.{Manifest.basename}); - for (manifest.errors) |msg| { - const str = try error_bundle.addString(msg.msg); - try Report.addErrorMessage(&ast, file_path, error_bundle, 0, str, msg.tok, msg.off); - } - return error.PackageFetchFailed; - } - - const report: Report = .{ - .ast = &ast, - .directory = directory, - .error_bundle = error_bundle, - }; - - for (manifest.dependencies.values()) |dep| { - // If the hash is invalid, let errors happen later - // We only want to add these for progress reporting - const hash = dep.hash orelse continue; - if (hash.len != hex_multihash_len) continue; - const gop = try all_modules.getOrPut(gpa, hash[0..hex_multihash_len].*); - if (!gop.found_existing) gop.value_ptr.* = null; - } - - root_prog_node.setEstimatedTotalItems(all_modules.count()); - - if (this_hash == null) { - try dependencies_source.writer().writeAll("pub const packages = struct {\n"); - } - - for (manifest.dependencies.keys(), manifest.dependencies.values()) |name, *dep| { - var fetch_location = try FetchLocation.init(gpa, dep.*, directory, report); - defer fetch_location.deinit(gpa); - - // Directories do not provide a hash in build.zig.zon. - // Hash the path to the module rather than its contents. - const sub_mod, const found_existing = if (fetch_location == .directory) - try getDirectoryModule(gpa, fetch_location, directory, all_modules, dep, report) - else - try getCachedPackage( - gpa, - global_cache_directory, - dep.*, - all_modules, - root_prog_node, - ) orelse .{ - try fetchAndUnpack( - fetch_location, - thread_pool, - http_client, - directory, - global_cache_directory, - dep.*, - report, - all_modules, - root_prog_node, - name, - ), - false, - }; - - assert(dep.hash != null); - - switch (sub_mod) { - .zig_pkg => |sub_pkg| { - if (!found_existing) { - try sub_pkg.fetchAndAddDependencies( - deps_pkg, - arena, - thread_pool, - http_client, - sub_pkg.root_src_directory, - global_cache_directory, - local_cache_directory, - dependencies_source, - error_bundle, - all_modules, - root_prog_node, - dep.hash.?, - ); - } - - try pkg.add(gpa, name, sub_pkg); - if (deps_pkg.table.get(dep.hash.?)) |other_sub| { - // This should be the same package (and hence module) since it's the same hash - // TODO: dedup multiple versions of the same package - assert(other_sub == sub_pkg); - } else { - try deps_pkg.add(gpa, dep.hash.?, sub_pkg); - } - }, - .non_zig_pkg => |sub_pkg| { - if (!found_existing) { - try dependencies_source.writer().print( - \\ pub const {} = struct {{ - \\ pub const build_root = "{}"; - \\ pub const deps: []const struct {{ []const u8, []const u8 }} = &.{{}}; - \\ }}; - \\ - , .{ - std.zig.fmtId(dep.hash.?), - std.zig.fmtEscapes(sub_pkg.root_src_directory.path.?), - }); - } - }, - } - } - - if (this_hash) |hash| { - try dependencies_source.writer().print( - \\ pub const {} = struct {{ - \\ pub const build_root = "{}"; - \\ pub const build_zig = @import("{}"); - \\ pub const deps: []const struct {{ []const u8, []const u8 }} = &.{{ - \\ - , .{ - std.zig.fmtId(hash), - std.zig.fmtEscapes(pkg.root_src_directory.path.?), - std.zig.fmtEscapes(hash), - }); - for (manifest.dependencies.keys(), manifest.dependencies.values()) |name, dep| { - try dependencies_source.writer().print( - " .{{ \"{}\", \"{}\" }},\n", - .{ std.zig.fmtEscapes(name), std.zig.fmtEscapes(dep.hash.?) }, - ); - } - try dependencies_source.writer().writeAll( - \\ }; - \\ }; - \\ - ); - } else { - try dependencies_source.writer().writeAll( - \\}; - \\ - \\pub const root_deps: []const struct { []const u8, []const u8 } = &.{ - \\ - ); - for (manifest.dependencies.keys(), manifest.dependencies.values()) |name, dep| { - try dependencies_source.writer().print( - " .{{ \"{}\", \"{}\" }},\n", - .{ std.zig.fmtEscapes(name), std.zig.fmtEscapes(dep.hash.?) }, - ); - } - try dependencies_source.writer().writeAll("};\n"); - } -} - pub fn createFilePkg( gpa: Allocator, cache_directory: Compilation.Directory, @@ -450,484 +233,11 @@ pub fn createFilePkg( const hex_digest = hh.final(); const o_dir_sub_path = "o" ++ fs.path.sep_str ++ hex_digest; - try renameTmpIntoCache(cache_directory.handle, tmp_dir_sub_path, o_dir_sub_path); + try Fetch.renameTmpIntoCache(cache_directory.handle, tmp_dir_sub_path, o_dir_sub_path); return createWithDir(gpa, cache_directory, o_dir_sub_path, basename); } -pub const Report = struct { - ast: ?*const std.zig.Ast, - directory: Compilation.Directory, - error_bundle: *std.zig.ErrorBundle.Wip, - - fn fail( - report: Report, - tok: std.zig.Ast.TokenIndex, - comptime fmt_string: []const u8, - fmt_args: anytype, - ) error{ PackageFetchFailed, OutOfMemory } { - const msg = try report.error_bundle.printString(fmt_string, fmt_args); - return failMsg(report, tok, msg); - } - - fn failMsg( - report: Report, - tok: std.zig.Ast.TokenIndex, - msg: u32, - ) error{ PackageFetchFailed, OutOfMemory } { - const gpa = report.error_bundle.gpa; - - const file_path = try report.directory.join(gpa, &.{Manifest.basename}); - defer gpa.free(file_path); - - const eb = report.error_bundle; - - if (report.ast) |ast| { - try addErrorMessage(ast, file_path, eb, 0, msg, tok, 0); - } else { - try eb.addRootErrorMessage(.{ - .msg = msg, - .src_loc = .none, - .notes_len = 0, - }); - } - - return error.PackageFetchFailed; - } - - fn addErrorWithNotes( - report: Report, - notes_len: u32, - msg: Manifest.ErrorMessage, - ) error{OutOfMemory}!void { - const eb = report.error_bundle; - const msg_str = try eb.addString(msg.msg); - if (report.ast) |ast| { - const gpa = eb.gpa; - const file_path = try report.directory.join(gpa, &.{Manifest.basename}); - defer gpa.free(file_path); - return addErrorMessage(ast, file_path, eb, notes_len, msg_str, msg.tok, msg.off); - } else { - return eb.addRootErrorMessage(.{ - .msg = msg_str, - .src_loc = .none, - .notes_len = notes_len, - }); - } - } - - fn addErrorMessage( - ast: *const std.zig.Ast, - file_path: []const u8, - eb: *std.zig.ErrorBundle.Wip, - notes_len: u32, - msg_str: u32, - msg_tok: std.zig.Ast.TokenIndex, - msg_off: u32, - ) error{OutOfMemory}!void { - const token_starts = ast.tokens.items(.start); - const start_loc = ast.tokenLocation(0, msg_tok); - - try eb.addRootErrorMessage(.{ - .msg = msg_str, - .src_loc = try eb.addSourceLocation(.{ - .src_path = try eb.addString(file_path), - .span_start = token_starts[msg_tok], - .span_end = @as(u32, @intCast(token_starts[msg_tok] + ast.tokenSlice(msg_tok).len)), - .span_main = token_starts[msg_tok] + msg_off, - .line = @intCast(start_loc.line), - .column = @as(u32, @intCast(start_loc.column)), - .source_line = try eb.addString(ast.source[start_loc.line_start..start_loc.line_end]), - }), - .notes_len = notes_len, - }); - } -}; - -pub const FetchLocation = union(enum) { - /// The relative path to a file or directory. - /// This may be a file that requires unpacking (such as a .tar.gz), - /// or the path to the root directory of a package. - file: []const u8, - directory: []const u8, - http_request: std.Uri, - git_request: std.Uri, - - pub fn init( - gpa: Allocator, - dep: Manifest.Dependency, - root_dir: Compilation.Directory, - report: Report, - ) !FetchLocation { - switch (dep.location) { - .url => |url| { - const uri = std.Uri.parse(url) catch |err| switch (err) { - error.UnexpectedCharacter => return report.fail(dep.location_tok, "failed to parse dependency location as URI", .{}), - else => return err, - }; - return initUri(uri, dep.location_tok, report); - }, - .path => |path| { - if (fs.path.isAbsolute(path)) { - return report.fail(dep.location_tok, "absolute paths are not allowed. Use a relative path instead", .{}); - } - - const is_dir = isDirectory(root_dir, path) catch |err| switch (err) { - error.FileNotFound => return report.fail(dep.location_tok, "file not found: {s}", .{path}), - else => return err, - }; - - return if (is_dir) - .{ .directory = try gpa.dupe(u8, path) } - else - .{ .file = try gpa.dupe(u8, path) }; - }, - } - } - - pub fn initUri(uri: std.Uri, location_tok: std.zig.Ast.TokenIndex, report: Report) !FetchLocation { - if (ascii.eqlIgnoreCase(uri.scheme, "file")) { - return report.fail(location_tok, "'file' scheme is not allowed for URLs. Use '.path' instead", .{}); - } else if (ascii.eqlIgnoreCase(uri.scheme, "http") or ascii.eqlIgnoreCase(uri.scheme, "https")) { - return .{ .http_request = uri }; - } else if (ascii.eqlIgnoreCase(uri.scheme, "git+http") or ascii.eqlIgnoreCase(uri.scheme, "git+https")) { - return .{ .git_request = uri }; - } else { - return report.fail(location_tok, "unsupported URL scheme: {s}", .{uri.scheme}); - } - } - - pub fn deinit(f: *FetchLocation, gpa: Allocator) void { - switch (f.*) { - .file, .directory => |path| gpa.free(path), - .http_request, .git_request => {}, - } - f.* = undefined; - } - - pub fn fetch( - f: FetchLocation, - gpa: Allocator, - root_dir: Compilation.Directory, - http_client: *std.http.Client, - dep_location_tok: std.zig.Ast.TokenIndex, - report: Report, - ) !ReadableResource { - switch (f) { - .file => |file| { - const owned_path = try gpa.dupe(u8, file); - errdefer gpa.free(owned_path); - return .{ - .path = owned_path, - .resource = .{ .file = try root_dir.handle.openFile(file, .{}) }, - }; - }, - .http_request => |uri| { - var h = std.http.Headers{ .allocator = gpa }; - defer h.deinit(); - - var req = try http_client.request(.GET, uri, h, .{}); - errdefer req.deinit(); - - try req.start(.{}); - try req.wait(); - - if (req.response.status != .ok) { - return report.fail(dep_location_tok, "expected response status '200 OK' got '{} {s}'", .{ - @intFromEnum(req.response.status), - req.response.status.phrase() orelse "", - }); - } - - return .{ - .path = try gpa.dupe(u8, uri.path), - .resource = .{ .http_request = req }, - }; - }, - .git_request => |uri| { - var transport_uri = uri; - transport_uri.scheme = uri.scheme["git+".len..]; - var redirect_uri: []u8 = undefined; - var session: git.Session = .{ .transport = http_client, .uri = transport_uri }; - session.discoverCapabilities(gpa, &redirect_uri) catch |e| switch (e) { - error.Redirected => { - defer gpa.free(redirect_uri); - return report.fail(dep_location_tok, "repository moved to {s}", .{redirect_uri}); - }, - else => |other| return other, - }; - - const want_oid = want_oid: { - const want_ref = uri.fragment orelse "HEAD"; - if (git.parseOid(want_ref)) |oid| break :want_oid oid else |_| {} - - const want_ref_head = try std.fmt.allocPrint(gpa, "refs/heads/{s}", .{want_ref}); - defer gpa.free(want_ref_head); - const want_ref_tag = try std.fmt.allocPrint(gpa, "refs/tags/{s}", .{want_ref}); - defer gpa.free(want_ref_tag); - - var ref_iterator = try session.listRefs(gpa, .{ - .ref_prefixes = &.{ want_ref, want_ref_head, want_ref_tag }, - .include_peeled = true, - }); - defer ref_iterator.deinit(); - while (try ref_iterator.next()) |ref| { - if (mem.eql(u8, ref.name, want_ref) or - mem.eql(u8, ref.name, want_ref_head) or - mem.eql(u8, ref.name, want_ref_tag)) - { - break :want_oid ref.peeled orelse ref.oid; - } - } - return report.fail(dep_location_tok, "ref not found: {s}", .{want_ref}); - }; - if (uri.fragment == null) { - const notes_len = 1; - try report.addErrorWithNotes(notes_len, .{ - .tok = dep_location_tok, - .off = 0, - .msg = "url field is missing an explicit ref", - }); - const eb = report.error_bundle; - const notes_start = try eb.reserveNotes(notes_len); - eb.extra.items[notes_start] = @intFromEnum(try eb.addErrorMessage(.{ - .msg = try eb.printString("try .url = \"{+/}#{}\",", .{ uri, std.fmt.fmtSliceHexLower(&want_oid) }), - })); - return error.PackageFetchFailed; - } - - var want_oid_buf: [git.fmt_oid_length]u8 = undefined; - _ = std.fmt.bufPrint(&want_oid_buf, "{}", .{std.fmt.fmtSliceHexLower(&want_oid)}) catch unreachable; - var fetch_stream = try session.fetch(gpa, &.{&want_oid_buf}); - errdefer fetch_stream.deinit(); - - return .{ - .path = try gpa.dupe(u8, &want_oid_buf), - .resource = .{ .git_fetch_stream = fetch_stream }, - }; - }, - .directory => unreachable, // Directories do not require fetching - } - } -}; - -pub const ReadableResource = struct { - path: []const u8, - resource: union(enum) { - file: fs.File, - http_request: std.http.Client.Request, - git_fetch_stream: git.Session.FetchStream, - dir: fs.IterableDir, - }, - - /// Unpack the package into the global cache directory. - /// If `ps` does not require unpacking (for example, if it is a directory), then no caching is performed. - /// In either case, the hash is computed and returned along with the path to the package. - pub fn unpack( - rr: *ReadableResource, - allocator: Allocator, - thread_pool: *ThreadPool, - global_cache_directory: Compilation.Directory, - dep_location_tok: std.zig.Ast.TokenIndex, - report: Report, - pkg_prog_node: *std.Progress.Node, - ) !PackageLocation { - switch (rr.resource) { - inline .file, .http_request, .git_fetch_stream, .dir => |*r, tag| { - const s = fs.path.sep_str; - const rand_int = std.crypto.random.int(u64); - const tmp_dir_sub_path = "tmp" ++ s ++ Manifest.hex64(rand_int); - - const actual_hash = h: { - var tmp_directory: Compilation.Directory = d: { - const path = try global_cache_directory.join(allocator, &.{tmp_dir_sub_path}); - errdefer allocator.free(path); - - const iterable_dir = try global_cache_directory.handle.makeOpenPathIterable(tmp_dir_sub_path, .{}); - errdefer iterable_dir.close(); - - break :d .{ - .path = path, - .handle = iterable_dir.dir, - }; - }; - defer tmp_directory.closeAndFree(allocator); - - if (tag != .dir) { - const opt_content_length = try rr.getSize(); - - var prog_reader: ProgressReader(@TypeOf(r.reader())) = .{ - .child_reader = r.reader(), - .prog_node = pkg_prog_node, - .unit = if (opt_content_length) |content_length| unit: { - const kib = content_length / 1024; - const mib = kib / 1024; - if (mib > 0) { - pkg_prog_node.setEstimatedTotalItems(@intCast(mib)); - pkg_prog_node.setUnit("MiB"); - break :unit .mib; - } else { - pkg_prog_node.setEstimatedTotalItems(@intCast(@max(1, kib))); - pkg_prog_node.setUnit("KiB"); - break :unit .kib; - } - } else .any, - }; - - switch (try rr.getFileType(dep_location_tok, report)) { - .tar => try unpackTarball(allocator, prog_reader.reader(), tmp_directory.handle, dep_location_tok, report), - .@"tar.gz" => try unpackTarballCompressed(allocator, prog_reader, tmp_directory.handle, dep_location_tok, report, std.compress.gzip), - .@"tar.xz" => try unpackTarballCompressed(allocator, prog_reader, tmp_directory.handle, dep_location_tok, report, std.compress.xz), - .git_pack => try unpackGitPack(allocator, &prog_reader, git.parseOid(rr.path) catch unreachable, tmp_directory.handle, dep_location_tok, report), - } - } else { - // Recursive directory copy. - var it = try r.walk(allocator); - defer it.deinit(); - while (try it.next()) |entry| { - switch (entry.kind) { - .directory => try tmp_directory.handle.makePath(entry.path), - .file => try r.dir.copyFile( - entry.path, - tmp_directory.handle, - entry.path, - .{}, - ), - .sym_link => { - var buf: [fs.MAX_PATH_BYTES]u8 = undefined; - const link_name = try r.dir.readLink(entry.path, &buf); - // TODO: if this would create a symlink to outside - // the destination directory, fail with an error instead. - try tmp_directory.handle.symLink(link_name, entry.path, .{}); - }, - else => return error.IllegalFileTypeInPackage, - } - } - } - - break :h try computePackageHash(thread_pool, .{ .dir = tmp_directory.handle }); - }; - - const pkg_dir_sub_path = "p" ++ s ++ Manifest.hexDigest(actual_hash); - const unpacked_path = try global_cache_directory.join(allocator, &.{pkg_dir_sub_path}); - defer allocator.free(unpacked_path); - - const relative_unpacked_path = try fs.path.relative(allocator, global_cache_directory.path.?, unpacked_path); - errdefer allocator.free(relative_unpacked_path); - try renameTmpIntoCache(global_cache_directory.handle, tmp_dir_sub_path, relative_unpacked_path); - - return .{ - .hash = actual_hash, - .relative_unpacked_path = relative_unpacked_path, - }; - }, - } - } - - const FileType = enum { - tar, - @"tar.gz", - @"tar.xz", - git_pack, - }; - - pub fn getSize(rr: ReadableResource) !?u64 { - switch (rr.resource) { - .file => |f| return (try f.metadata()).size(), - // TODO: Handle case of chunked content-length - .http_request => |req| return req.response.content_length, - .git_fetch_stream => |stream| return stream.request.response.content_length, - .dir => unreachable, - } - } - - pub fn getFileType( - rr: ReadableResource, - dep_location_tok: std.zig.Ast.TokenIndex, - report: Report, - ) !FileType { - switch (rr.resource) { - .file => { - return fileTypeFromPath(rr.path) orelse - return report.fail(dep_location_tok, "unknown file type", .{}); - }, - .http_request => |req| { - const content_type = req.response.headers.getFirstValue("Content-Type") orelse - return report.fail(dep_location_tok, "missing 'Content-Type' header", .{}); - - // If the response has a different content type than the URI indicates, override - // the previously assumed file type. - if (ascii.eqlIgnoreCase(content_type, "application/x-tar")) return .tar; - - return if (ascii.eqlIgnoreCase(content_type, "application/gzip") or - ascii.eqlIgnoreCase(content_type, "application/x-gzip") or - ascii.eqlIgnoreCase(content_type, "application/tar+gzip")) - .@"tar.gz" - else if (ascii.eqlIgnoreCase(content_type, "application/x-xz")) - .@"tar.xz" - else if (ascii.eqlIgnoreCase(content_type, "application/octet-stream")) ty: { - // support gitlab tarball urls such as https://gitlab.com///-/archive//-.tar.gz - // whose content-disposition header is: 'attachment; filename="-.tar.gz"' - const content_disposition = req.response.headers.getFirstValue("Content-Disposition") orelse - return report.fail(dep_location_tok, "missing 'Content-Disposition' header for Content-Type=application/octet-stream", .{}); - break :ty getAttachmentType(content_disposition) orelse - return report.fail(dep_location_tok, "unsupported 'Content-Disposition' header value: '{s}' for Content-Type=application/octet-stream", .{content_disposition}); - } else return report.fail(dep_location_tok, "unrecognized value for 'Content-Type' header: {s}", .{content_type}); - }, - .git_fetch_stream => return .git_pack, - .dir => unreachable, - } - } - - fn fileTypeFromPath(file_path: []const u8) ?FileType { - if (ascii.endsWithIgnoreCase(file_path, ".tar")) return .tar; - if (ascii.endsWithIgnoreCase(file_path, ".tar.gz")) return .@"tar.gz"; - if (ascii.endsWithIgnoreCase(file_path, ".tar.xz")) return .@"tar.xz"; - return null; - } - - fn getAttachmentType(content_disposition: []const u8) ?FileType { - const disposition_type_end = ascii.indexOfIgnoreCase(content_disposition, "attachment;") orelse return null; - - var value_start = ascii.indexOfIgnoreCasePos(content_disposition, disposition_type_end + 1, "filename") orelse return null; - value_start += "filename".len; - if (content_disposition[value_start] == '*') { - value_start += 1; - } - if (content_disposition[value_start] != '=') return null; - value_start += 1; - - var value_end = mem.indexOfPos(u8, content_disposition, value_start, ";") orelse content_disposition.len; - if (content_disposition[value_end - 1] == '\"') { - value_end -= 1; - } - return fileTypeFromPath(content_disposition[value_start..value_end]); - } - - pub fn deinit(rr: *ReadableResource, gpa: Allocator) void { - gpa.free(rr.path); - switch (rr.resource) { - .file => |file| file.close(), - .http_request => |*req| req.deinit(), - .git_fetch_stream => |*stream| stream.deinit(), - .dir => |*dir| dir.close(), - } - rr.* = undefined; - } -}; - -pub const PackageLocation = struct { - /// For packages that require unpacking, this is the hash of the package contents. - /// For directories, this is the hash of the absolute file path. - hash: [Manifest.Hash.digest_length]u8, - relative_unpacked_path: []const u8, - - pub fn deinit(pl: *PackageLocation, allocator: Allocator) void { - allocator.free(pl.relative_unpacked_path); - pl.* = undefined; - } -}; - const hex_multihash_len = 2 * Manifest.multihash_len; const MultiHashHexDigest = [hex_multihash_len]u8; @@ -939,411 +249,3 @@ const DependencyModule = union(enum) { /// If the value is `null`, the package is a known dependency, but has not yet /// been fetched. pub const AllModules = std.AutoHashMapUnmanaged(MultiHashHexDigest, ?DependencyModule); - -fn ProgressReader(comptime ReaderType: type) type { - return struct { - child_reader: ReaderType, - bytes_read: u64 = 0, - prog_node: *std.Progress.Node, - unit: enum { - kib, - mib, - any, - }, - - pub const Error = ReaderType.Error; - pub const Reader = std.io.Reader(*@This(), Error, read); - - pub fn read(self: *@This(), buf: []u8) Error!usize { - const amt = try self.child_reader.read(buf); - self.bytes_read += amt; - const kib = self.bytes_read / 1024; - const mib = kib / 1024; - switch (self.unit) { - .kib => self.prog_node.setCompletedItems(@intCast(kib)), - .mib => self.prog_node.setCompletedItems(@intCast(mib)), - .any => { - if (mib > 0) { - self.prog_node.setUnit("MiB"); - self.prog_node.setCompletedItems(@intCast(mib)); - } else { - self.prog_node.setUnit("KiB"); - self.prog_node.setCompletedItems(@intCast(kib)); - } - }, - } - self.prog_node.activate(); - return amt; - } - - pub fn reader(self: *@This()) Reader { - return .{ .context = self }; - } - }; -} - -/// Get a cached package if it exists. -/// Returns `null` if the package has not been cached -/// If the package exists in the cache, returns a pointer to the package and a -/// boolean indicating whether this package has already been seen in the build -/// (i.e. whether or not its transitive dependencies have been fetched). -fn getCachedPackage( - gpa: Allocator, - global_cache_directory: Compilation.Directory, - dep: Manifest.Dependency, - all_modules: *AllModules, - root_prog_node: *std.Progress.Node, -) !?struct { DependencyModule, bool } { - const s = fs.path.sep_str; - // Check if the expected_hash is already present in the global package - // cache, and thereby avoid both fetching and unpacking. - if (dep.hash) |h| { - const hex_digest = h[0..hex_multihash_len]; - const pkg_dir_sub_path = "p" ++ s ++ hex_digest; - - var pkg_dir = global_cache_directory.handle.openDir(pkg_dir_sub_path, .{}) catch |err| switch (err) { - error.FileNotFound => return null, - else => |e| return e, - }; - errdefer pkg_dir.close(); - - // The compiler has a rule that a file must not be included in multiple modules, - // so we must detect if a module has been created for this package and reuse it. - const gop = try all_modules.getOrPut(gpa, hex_digest.*); - if (gop.found_existing) { - if (gop.value_ptr.*) |mod| { - return .{ mod, true }; - } - } - - root_prog_node.completeOne(); - - const is_zig_mod = if (pkg_dir.access(build_zig_basename, .{})) |_| true else |_| false; - const basename = if (is_zig_mod) build_zig_basename else ""; - const pkg = try createWithDir(gpa, global_cache_directory, pkg_dir_sub_path, basename); - - const module: DependencyModule = if (is_zig_mod) - .{ .zig_pkg = pkg } - else - .{ .non_zig_pkg = pkg }; - - try all_modules.put(gpa, hex_digest.*, module); - return .{ module, false }; - } - - return null; -} - -fn getDirectoryModule( - gpa: Allocator, - fetch_location: FetchLocation, - directory: Compilation.Directory, - all_modules: *AllModules, - dep: *Manifest.Dependency, - report: Report, -) !struct { DependencyModule, bool } { - assert(fetch_location == .directory); - - if (dep.hash != null) { - return report.fail(dep.hash_tok, "hash not allowed for directory package", .{}); - } - - const hash = try computePathHash(gpa, directory, fetch_location.directory); - const hex_digest = Manifest.hexDigest(hash); - dep.hash = try gpa.dupe(u8, &hex_digest); - - // There is no fixed location to check for directory modules. - // Instead, check whether it is already listed in all_modules. - if (all_modules.get(hex_digest)) |mod| return .{ mod.?, true }; - - var pkg_dir = directory.handle.openDir(fetch_location.directory, .{}) catch |err| switch (err) { - error.FileNotFound => return report.fail(dep.location_tok, "file not found: {s}", .{fetch_location.directory}), - else => |e| return e, - }; - defer pkg_dir.close(); - - const is_zig_mod = if (pkg_dir.access(build_zig_basename, .{})) |_| true else |_| false; - const basename = if (is_zig_mod) build_zig_basename else ""; - - const pkg = try createWithDir(gpa, directory, fetch_location.directory, basename); - const module: DependencyModule = if (is_zig_mod) - .{ .zig_pkg = pkg } - else - .{ .non_zig_pkg = pkg }; - - try all_modules.put(gpa, hex_digest, module); - return .{ module, false }; -} - -fn fetchAndUnpack( - fetch_location: FetchLocation, - thread_pool: *ThreadPool, - http_client: *std.http.Client, - directory: Compilation.Directory, - global_cache_directory: Compilation.Directory, - dep: Manifest.Dependency, - report: Report, - all_modules: *AllModules, - root_prog_node: *std.Progress.Node, - /// This does not have to be any form of canonical or fully-qualified name: it - /// is only intended to be human-readable for progress reporting. - name_for_prog: []const u8, -) !DependencyModule { - assert(fetch_location != .directory); - - const gpa = http_client.allocator; - - var pkg_prog_node = root_prog_node.start(name_for_prog, 0); - defer pkg_prog_node.end(); - pkg_prog_node.activate(); - - var readable_resource = try fetch_location.fetch(gpa, directory, http_client, dep.location_tok, report); - defer readable_resource.deinit(gpa); - - var package_location = try readable_resource.unpack( - gpa, - thread_pool, - global_cache_directory, - dep.location_tok, - report, - &pkg_prog_node, - ); - defer package_location.deinit(gpa); - - const actual_hex = Manifest.hexDigest(package_location.hash); - if (dep.hash) |h| { - if (!mem.eql(u8, h, &actual_hex)) { - return report.fail(dep.hash_tok, "hash mismatch: expected: {s}, found: {s}", .{ - h, actual_hex, - }); - } - } else { - const notes_len = 1; - try report.addErrorWithNotes(notes_len, .{ - .tok = dep.location_tok, - .off = 0, - .msg = "dependency is missing hash field", - }); - const eb = report.error_bundle; - const notes_start = try eb.reserveNotes(notes_len); - eb.extra.items[notes_start] = @intFromEnum(try eb.addErrorMessage(.{ - .msg = try eb.printString("expected .hash = \"{s}\",", .{&actual_hex}), - })); - return error.PackageFetchFailed; - } - - const build_zig_path = try fs.path.join(gpa, &.{ package_location.relative_unpacked_path, build_zig_basename }); - defer gpa.free(build_zig_path); - - const is_zig_mod = if (global_cache_directory.handle.access(build_zig_path, .{})) |_| true else |_| false; - const basename = if (is_zig_mod) build_zig_basename else ""; - const pkg = try createWithDir(gpa, global_cache_directory, package_location.relative_unpacked_path, basename); - const module: DependencyModule = if (is_zig_mod) - .{ .zig_pkg = pkg } - else - .{ .non_zig_pkg = pkg }; - - try all_modules.put(gpa, actual_hex, module); - return module; -} - -fn unpackTarballCompressed( - gpa: Allocator, - reader: anytype, - out_dir: fs.Dir, - dep_location_tok: std.zig.Ast.TokenIndex, - report: Report, - comptime Compression: type, -) !void { - var br = std.io.bufferedReaderSize(std.crypto.tls.max_ciphertext_record_len, reader); - - var decompress = try Compression.decompress(gpa, br.reader()); - defer decompress.deinit(); - - return unpackTarball(gpa, decompress.reader(), out_dir, dep_location_tok, report); -} - -fn unpackTarball( - gpa: Allocator, - reader: anytype, - out_dir: fs.Dir, - dep_location_tok: std.zig.Ast.TokenIndex, - report: Report, -) !void { - var diagnostics: std.tar.Options.Diagnostics = .{ .allocator = gpa }; - defer diagnostics.deinit(); - - try std.tar.pipeToFileSystem(out_dir, reader, .{ - .diagnostics = &diagnostics, - .strip_components = 1, - // TODO: we would like to set this to executable_bit_only, but two - // things need to happen before that: - // 1. the tar implementation needs to support it - // 2. the hashing algorithm here needs to support detecting the is_executable - // bit on Windows from the ACLs (see the isExecutable function). - .mode_mode = .ignore, - }); - - if (diagnostics.errors.items.len > 0) { - const notes_len: u32 = @intCast(diagnostics.errors.items.len); - try report.addErrorWithNotes(notes_len, .{ - .tok = dep_location_tok, - .off = 0, - .msg = "unable to unpack tarball", - }); - const eb = report.error_bundle; - const notes_start = try eb.reserveNotes(notes_len); - for (diagnostics.errors.items, notes_start..) |item, note_i| { - switch (item) { - .unable_to_create_sym_link => |info| { - eb.extra.items[note_i] = @intFromEnum(try eb.addErrorMessage(.{ - .msg = try eb.printString("unable to create symlink from '{s}' to '{s}': {s}", .{ - info.file_name, info.link_name, @errorName(info.code), - }), - })); - }, - .unsupported_file_type => |info| { - eb.extra.items[note_i] = @intFromEnum(try eb.addErrorMessage(.{ - .msg = try eb.printString("file '{s}' has unsupported type '{c}'", .{ - info.file_name, @intFromEnum(info.file_type), - }), - })); - }, - } - } - return error.InvalidTarball; - } -} - -fn unpackGitPack( - gpa: Allocator, - reader: anytype, - want_oid: git.Oid, - out_dir: fs.Dir, - dep_location_tok: std.zig.Ast.TokenIndex, - report: Report, -) !void { - // The .git directory is used to store the packfile and associated index, but - // we do not attempt to replicate the exact structure of a real .git - // directory, since that isn't relevant for fetching a package. - { - var pack_dir = try out_dir.makeOpenPath(".git", .{}); - defer pack_dir.close(); - var pack_file = try pack_dir.createFile("pkg.pack", .{ .read = true }); - defer pack_file.close(); - var fifo = std.fifo.LinearFifo(u8, .{ .Static = 4096 }).init(); - try fifo.pump(reader.reader(), pack_file.writer()); - try pack_file.sync(); - - var index_file = try pack_dir.createFile("pkg.idx", .{ .read = true }); - defer index_file.close(); - { - var index_prog_node = reader.prog_node.start("Index pack", 0); - defer index_prog_node.end(); - index_prog_node.activate(); - var index_buffered_writer = std.io.bufferedWriter(index_file.writer()); - try git.indexPack(gpa, pack_file, index_buffered_writer.writer()); - try index_buffered_writer.flush(); - try index_file.sync(); - } - - { - var checkout_prog_node = reader.prog_node.start("Checkout", 0); - defer checkout_prog_node.end(); - checkout_prog_node.activate(); - var repository = try git.Repository.init(gpa, pack_file, index_file); - defer repository.deinit(); - var diagnostics: git.Diagnostics = .{ .allocator = gpa }; - defer diagnostics.deinit(); - try repository.checkout(out_dir, want_oid, &diagnostics); - - if (diagnostics.errors.items.len > 0) { - const notes_len: u32 = @intCast(diagnostics.errors.items.len); - try report.addErrorWithNotes(notes_len, .{ - .tok = dep_location_tok, - .off = 0, - .msg = "unable to unpack packfile", - }); - const eb = report.error_bundle; - const notes_start = try eb.reserveNotes(notes_len); - for (diagnostics.errors.items, notes_start..) |item, note_i| { - switch (item) { - .unable_to_create_sym_link => |info| { - eb.extra.items[note_i] = @intFromEnum(try eb.addErrorMessage(.{ - .msg = try eb.printString("unable to create symlink from '{s}' to '{s}': {s}", .{ - info.file_name, info.link_name, @errorName(info.code), - }), - })); - }, - } - } - return error.InvalidGitPack; - } - } - } - - try out_dir.deleteTree(".git"); -} - -/// Compute the hash of a file path. -fn computePathHash(gpa: Allocator, dir: Compilation.Directory, path: []const u8) ![Manifest.Hash.digest_length]u8 { - const resolved_path = try std.fs.path.resolve(gpa, &.{ dir.path.?, path }); - defer gpa.free(resolved_path); - var hasher = Manifest.Hash.init(.{}); - hasher.update(resolved_path); - return hasher.finalResult(); -} - -fn isDirectory(root_dir: Compilation.Directory, path: []const u8) !bool { - var dir = root_dir.handle.openDir(path, .{}) catch |err| switch (err) { - error.NotDir => return false, - else => return err, - }; - defer dir.close(); - return true; -} - -fn renameTmpIntoCache( - cache_dir: fs.Dir, - tmp_dir_sub_path: []const u8, - dest_dir_sub_path: []const u8, -) !void { - assert(dest_dir_sub_path[1] == fs.path.sep); - var handled_missing_dir = false; - while (true) { - cache_dir.rename(tmp_dir_sub_path, dest_dir_sub_path) catch |err| switch (err) { - error.FileNotFound => { - if (handled_missing_dir) return err; - cache_dir.makeDir(dest_dir_sub_path[0..1]) catch |mkd_err| switch (mkd_err) { - error.PathAlreadyExists => handled_missing_dir = true, - else => |e| return e, - }; - continue; - }, - error.PathAlreadyExists, error.AccessDenied => { - // Package has been already downloaded and may already be in use on the system. - cache_dir.deleteTree(tmp_dir_sub_path) catch |del_err| { - std.log.warn("unable to delete temp directory: {s}", .{@errorName(del_err)}); - }; - }, - else => |e| return e, - }; - break; - } -} - -test "getAttachmentType" { - try std.testing.expectEqual(@as(?ReadableResource.FileType, .@"tar.gz"), ReadableResource.getAttachmentType("attaChment; FILENAME=\"stuff.tar.gz\"; size=42")); - try std.testing.expectEqual(@as(?ReadableResource.FileType, .@"tar.gz"), ReadableResource.getAttachmentType("attachment; filename*=\"stuff.tar.gz\"")); - try std.testing.expectEqual(@as(?ReadableResource.FileType, .@"tar.xz"), ReadableResource.getAttachmentType("ATTACHMENT; filename=\"stuff.tar.xz\"")); - try std.testing.expectEqual(@as(?ReadableResource.FileType, .@"tar.xz"), ReadableResource.getAttachmentType("attachment; FileName=\"stuff.tar.xz\"")); - try std.testing.expectEqual(@as(?ReadableResource.FileType, .@"tar.gz"), ReadableResource.getAttachmentType("attachment; FileName*=UTF-8\'\'xyz%2Fstuff.tar.gz")); - - try std.testing.expect(ReadableResource.getAttachmentType("attachment FileName=\"stuff.tar.gz\"") == null); - try std.testing.expect(ReadableResource.getAttachmentType("attachment; FileName=\"stuff.tar\"") == null); - try std.testing.expect(ReadableResource.getAttachmentType("attachment; FileName\"stuff.gz\"") == null); - try std.testing.expect(ReadableResource.getAttachmentType("attachment; size=42") == null); - try std.testing.expect(ReadableResource.getAttachmentType("inline; size=42") == null); - try std.testing.expect(ReadableResource.getAttachmentType("FileName=\"stuff.tar.gz\"; attachment;") == null); - try std.testing.expect(ReadableResource.getAttachmentType("FileName=\"stuff.tar.gz\";") == null); -} diff --git a/src/Package/Fetch.zig b/src/Package/Fetch.zig new file mode 100644 index 0000000000..b3b4667e40 --- /dev/null +++ b/src/Package/Fetch.zig @@ -0,0 +1,1012 @@ +//! Represents one independent job whose responsibility is to: +//! +//! 1. Check the global zig package cache to see if the hash already exists. +//! If so, load, parse, and validate the build.zig.zon file therein, and +//! goto step 8. Likewise if the location is a relative path, treat this +//! the same as a cache hit. Otherwise, proceed. +//! 2. Fetch and unpack a URL into a temporary directory. +//! 3. Load, parse, and validate the build.zig.zon file therein. It is allowed +//! for the file to be missing, in which case this fetched package is considered +//! to be a "naked" package. +//! 4. Apply inclusion rules of the build.zig.zon to the temporary directory by +//! deleting excluded files. If any files had errors for files that were +//! ultimately excluded, those errors should be ignored, such as failure to +//! create symlinks that weren't supposed to be included anyway. +//! 5. Compute the package hash based on the remaining files in the temporary +//! directory. +//! 6. Rename the temporary directory into the global zig package cache +//! directory. If the hash already exists, delete the temporary directory and +//! leave the zig package cache directory untouched as it may be in use by the +//! system. This is done even if the hash is invalid, in case the package with +//! the different hash is used in the future. +//! 7. Validate the computed hash against the expected hash. If invalid, +//! this job is done. +//! 8. Spawn a new fetch job for each dependency in the manifest file. Use +//! a mutex and a hash map so that redundant jobs do not get queued up. +//! +//! All of this must be done with only referring to the state inside this struct +//! because this work will be done in a dedicated thread. + +/// Try to avoid this as much as possible since arena will have less contention. +gpa: Allocator, +arena: std.heap.ArenaAllocator, +location: Location, +location_tok: std.zig.Ast.TokenIndex, +hash_tok: std.zig.Ast.TokenIndex, +global_cache: Cache.Directory, +parent_package_root: Path, +parent_manifest_ast: ?*const std.zig.Ast, +prog_node: *std.Progress.Node, +http_client: *std.http.Client, +thread_pool: *ThreadPool, +job_queue: *JobQueue, +wait_group: *WaitGroup, + +// Above this are fields provided as inputs to `run`. +// Below this are fields populated by `run`. + +/// This will either be relative to `global_cache`, or to the build root of +/// the root package. +package_root: Path, +error_bundle: std.zig.ErrorBundle.Wip, +manifest: ?Manifest, +manifest_ast: ?*std.zig.Ast, +actual_hash: Digest, +/// Fetch logic notices whether a package has a build.zig file and sets this flag. +has_build_zig: bool, +/// Indicates whether the task aborted due to an out-of-memory condition. +oom_flag: bool, + +pub const JobQueue = struct { + mutex: std.Thread.Mutex = .{}, +}; + +pub const Digest = [Manifest.Hash.digest_length]u8; +pub const MultiHashHexDigest = [hex_multihash_len]u8; + +pub const Path = struct { + root_dir: Cache.Directory, + /// The path, relative to the root dir, that this `Path` represents. + /// Empty string means the root_dir is the path. + sub_path: []const u8 = "", +}; + +pub const Location = union(enum) { + remote: Remote, + relative_path: []const u8, + + pub const Remote = struct { + url: []const u8, + /// If this is null it means the user omitted the hash field from a dependency. + /// It will be an error but the logic should still fetch and print the discovered hash. + hash: ?[hex_multihash_len]u8, + }; +}; + +pub const RunError = error{ + OutOfMemory, + /// This error code is intended to be handled by inspecting the + /// `error_bundle` field. + FetchFailed, +}; + +pub fn run(f: *Fetch) RunError!void { + const eb = &f.error_bundle; + const arena = f.arena_allocator.allocator(); + + // Check the global zig package cache to see if the hash already exists. If + // so, load, parse, and validate the build.zig.zon file therein, and skip + // ahead to queuing up jobs for dependencies. Likewise if the location is a + // relative path, treat this the same as a cache hit. Otherwise, proceed. + + const remote = switch (f.location) { + .relative_path => |sub_path| { + if (fs.path.isAbsolute(sub_path)) return f.fail( + f.location_tok, + try eb.addString("expected path relative to build root; found absolute path"), + ); + if (f.hash_tok != 0) return f.fail( + f.hash_tok, + try eb.addString("path-based dependencies are not hashed"), + ); + f.package_root = try f.parent_package_root.join(arena, sub_path); + try loadManifest(f, f.package_root); + // Package hashes are used as unique identifiers for packages, so + // we still need one for relative paths. + const hash = h: { + var hasher = Manifest.Hash.init(.{}); + // This hash is a tuple of: + // * whether it relative to the global cache directory or to the root package + // * the relative file path from there to the build root of the package + hasher.update(if (f.package_root.root_dir.handle == f.global_cache.handle) + &package_hash_prefix_cached + else + &package_hash_prefix_project); + hasher.update(f.package_root.sub_path); + break :h hasher.finalResult(); + }; + return queueJobsForDeps(f, hash); + }, + .remote => |remote| remote, + }; + const s = fs.path.sep_str; + if (remote.hash) |expected_hash| { + const pkg_sub_path = "p" ++ s ++ expected_hash; + if (f.global_cache.handle.access(pkg_sub_path, .{})) |_| { + f.package_root = .{ + .root_dir = f.global_cache, + .sub_path = pkg_sub_path, + }; + try loadManifest(f, f.package_root); + return queueJobsForDeps(f, expected_hash); + } else |err| switch (err) { + error.FileNotFound => {}, + else => |e| { + try eb.addRootErrorMessage(.{ + .msg = try eb.printString("unable to open global package cache directory '{s}': {s}", .{ + try f.global_cache.join(arena, .{pkg_sub_path}), @errorName(e), + }), + .src_loc = .none, + .notes_len = 0, + }); + return error.FetchFailed; + }, + } + } + + // Fetch and unpack the remote into a temporary directory. + + const uri = std.Uri.parse(remote.url) catch |err| return f.fail( + f.location_tok, + "invalid URI: {s}", + .{@errorName(err)}, + ); + const rand_int = std.crypto.random.int(u64); + const tmp_dir_sub_path = "tmp" ++ s ++ Manifest.hex64(rand_int); + + var tmp_directory: Cache.Directory = .{ + .path = try f.global_cache.join(arena, &.{tmp_dir_sub_path}), + .handle = (try f.global_cache.handle.makeOpenPathIterable(tmp_dir_sub_path, .{})).dir, + }; + defer tmp_directory.handle.close(); + + var resource = try f.initResource(uri); + defer resource.deinit(); // releases more than memory + + try f.unpackResource(&resource, uri.path, tmp_directory); + + // Load, parse, and validate the unpacked build.zig.zon file. It is allowed + // for the file to be missing, in which case this fetched package is + // considered to be a "naked" package. + try loadManifest(f, .{ .root_dir = tmp_directory }); + + // Apply the manifest's inclusion rules to the temporary directory by + // deleting excluded files. If any error occurred for files that were + // ultimately excluded, those errors should be ignored, such as failure to + // create symlinks that weren't supposed to be included anyway. + + // Empty directories have already been omitted by `unpackResource`. + + const filter: Filter = .{ + .include_paths = if (f.manifest) |m| m.paths else .{}, + }; + + // Compute the package hash based on the remaining files in the temporary + // directory. + + if (builtin.os.tag == .linux and f.work_around_btrfs_bug) { + // https://github.com/ziglang/zig/issues/17095 + tmp_directory.handle.close(); + const iterable_dir = f.global_cache.handle.makeOpenPathIterable(tmp_dir_sub_path, .{}) catch + @panic("btrfs workaround failed"); + tmp_directory.handle = iterable_dir.dir; + } + + f.actual_hash = try computeHash(f, .{ .dir = tmp_directory.handle }, filter); + + // Rename the temporary directory into the global zig package cache + // directory. If the hash already exists, delete the temporary directory + // and leave the zig package cache directory untouched as it may be in use + // by the system. This is done even if the hash is invalid, in case the + // package with the different hash is used in the future. + + const dest_pkg_sub_path = "p" ++ s ++ Manifest.hexDigest(f.actual_hash); + try renameTmpIntoCache(f.global_cache.handle, tmp_dir_sub_path, dest_pkg_sub_path); + + // Validate the computed hash against the expected hash. If invalid, this + // job is done. + + const actual_hex = Manifest.hexDigest(f.actual_hash); + if (remote.hash) |declared_hash| { + if (!std.mem.eql(u8, declared_hash, &actual_hex)) { + return f.fail(f.hash_tok, "hash mismatch: manifest declares {s} but the fetched package has {s}", .{ + declared_hash, actual_hex, + }); + } + } else { + const notes_len = 1; + try f.addErrorWithNotes(notes_len, f.location_tok, "dependency is missing hash field"); + const notes_start = try eb.reserveNotes(notes_len); + eb.extra.items[notes_start] = @intFromEnum(try eb.addErrorMessage(.{ + .msg = try eb.printString("expected .hash = \"{s}\",", .{&actual_hex}), + })); + return error.PackageFetchFailed; + } + + // Spawn a new fetch job for each dependency in the manifest file. Use + // a mutex and a hash map so that redundant jobs do not get queued up. + return queueJobsForDeps(f, .{ .hash = f.actual_hash }); +} + +/// This function populates `f.manifest` or leaves it `null`. +fn loadManifest(f: *Fetch, pkg_root: Path) RunError!void { + const eb = &f.error_bundle; + const arena = f.arena_allocator.allocator(); + const manifest_bytes = pkg_root.readFileAllocOptions( + arena, + Manifest.basename, + Manifest.max_bytes, + null, + 1, + 0, + ) catch |err| switch (err) { + error.FileNotFound => return, + else => |e| { + const file_path = try pkg_root.join(arena, .{Manifest.basename}); + try eb.addRootErrorMessage(.{ + .msg = try eb.printString("unable to load package manifest '{s}': {s}", .{ + file_path, @errorName(e), + }), + .src_loc = .none, + .notes_len = 0, + }); + }, + }; + + var ast = try std.zig.Ast.parse(arena, manifest_bytes, .zon); + f.manifest_ast = ast; + + if (ast.errors.len > 0) { + const file_path = try pkg_root.join(arena, .{Manifest.basename}); + try main.putAstErrorsIntoBundle(arena, ast, file_path, eb); + return error.PackageFetchFailed; + } + + f.manifest = try Manifest.parse(arena, ast); + + if (f.manifest.errors.len > 0) { + const file_path = try pkg_root.join(arena, .{Manifest.basename}); + const token_starts = ast.tokens.items(.start); + + for (f.manifest.errors) |msg| { + const start_loc = ast.tokenLocation(0, msg.tok); + + try eb.addRootErrorMessage(.{ + .msg = try eb.addString(msg.msg), + .src_loc = try eb.addSourceLocation(.{ + .src_path = try eb.addString(file_path), + .span_start = token_starts[msg.tok], + .span_end = @intCast(token_starts[msg.tok] + ast.tokenSlice(msg.tok).len), + .span_main = token_starts[msg.tok] + msg.off, + .line = @intCast(start_loc.line), + .column = @intCast(start_loc.column), + .source_line = try eb.addString(ast.source[start_loc.line_start..start_loc.line_end]), + }), + .notes_len = 0, + }); + } + return error.PackageFetchFailed; + } +} + +fn queueJobsForDeps(f: *Fetch, hash: Digest) RunError!void { + // If the package does not have a build.zig.zon file then there are no dependencies. + const manifest = f.manifest orelse return; + + const new_fetches = nf: { + // Grab the new tasks into a temporary buffer so we can unlock that mutex + // as fast as possible. + // This overallocates any fetches that get skipped by the `continue` in the + // loop below. + const new_fetches = try f.arena.alloc(Fetch, manifest.dependencies.count()); + var new_fetch_index: usize = 0; + + f.job_queue.lock(); + defer f.job_queue.unlock(); + + // It is impossible for there to be a collision here. Consider all three cases: + // * Correct hash is provided by manifest. + // - Redundant jobs are skipped in the loop below. + // * Incorrect has is provided by manifest. + // - Hash mismatch error emitted; `queueJobsForDeps` is not called. + // * Hash is not provided by manifest. + // - Hash missing error emitted; `queueJobsForDeps` is not called. + try f.job_queue.finish(hash, f, new_fetches.len); + + for (manifest.dependencies.values()) |dep| { + const location: Location = switch (dep.location) { + .url => |url| .{ .remote = .{ + .url = url, + .hash = if (dep.hash) |h| h[0..hex_multihash_len].* else null, + } }, + .path => |path| .{ .relative_path = path }, + }; + const new_fetch = &new_fetches[new_fetch_index]; + const already_done = f.job_queue.add(location, new_fetch); + if (already_done) continue; + new_fetch_index += 1; + + new_fetch.* = .{ + .gpa = f.gpa, + .arena = std.heap.ArenaAllocator.init(f.gpa), + .location = location, + .location_tok = dep.location_tok, + .hash_tok = dep.hash_tok, + .global_cache = f.global_cache, + .parent_package_root = f.package_root, + .parent_manifest_ast = f.manifest_ast.?, + .prog_node = f.prog_node, + .http_client = f.http_client, + .thread_pool = f.thread_pool, + .job_queue = f.job_queue, + .wait_group = f.wait_group, + + .package_root = undefined, + .error_bundle = .{}, + .manifest = null, + .manifest_ast = null, + .actual_hash = undefined, + .has_build_zig = false, + }; + } + + break :nf new_fetches[0..new_fetch_index]; + }; + + // Now it's time to give tasks to the thread pool. + for (new_fetches) |new_fetch| { + f.wait_group.start(); + f.thread_pool.spawn(workerRun, .{f}) catch |err| switch (err) { + error.OutOfMemory => { + new_fetch.oom_flag = true; + f.wait_group.finish(); + continue; + }, + }; + } +} + +fn workerRun(f: *Fetch) void { + defer f.wait_group.finish(); + run(f) catch |err| switch (err) { + error.OutOfMemory => f.oom_flag = true, + error.FetchFailed => {}, // See `error_bundle`. + }; +} + +fn fail(f: *Fetch, msg_tok: std.zig.Ast.TokenIndex, msg_str: u32) RunError!void { + const ast = f.parent_manifest_ast; + const token_starts = ast.tokens.items(.start); + const start_loc = ast.tokenLocation(0, msg_tok); + const eb = &f.error_bundle; + const file_path = try f.parent_package_root.join(f.arena, Manifest.basename); + const msg_off = 0; + + try eb.addRootErrorMessage(.{ + .msg = msg_str, + .src_loc = try eb.addSourceLocation(.{ + .src_path = try eb.addString(file_path), + .span_start = token_starts[msg_tok], + .span_end = @intCast(token_starts[msg_tok] + ast.tokenSlice(msg_tok).len), + .span_main = token_starts[msg_tok] + msg_off, + .line = @intCast(start_loc.line), + .column = @intCast(start_loc.column), + .source_line = try eb.addString(ast.source[start_loc.line_start..start_loc.line_end]), + }), + .notes_len = 0, + }); + + return error.FetchFailed; +} + +const Resource = union(enum) { + file: fs.File, + http_request: std.http.Client.Request, + git_fetch_stream: git.Session.FetchStream, + dir: fs.IterableDir, +}; + +const FileType = enum { + tar, + @"tar.gz", + @"tar.xz", + git_pack, + + fn fromPath(file_path: []const u8) ?FileType { + if (ascii.endsWithIgnoreCase(file_path, ".tar")) return .tar; + if (ascii.endsWithIgnoreCase(file_path, ".tar.gz")) return .@"tar.gz"; + if (ascii.endsWithIgnoreCase(file_path, ".tar.xz")) return .@"tar.xz"; + return null; + } + + /// Parameter is a content-disposition header value. + fn fromContentDisposition(cd_header: []const u8) ?FileType { + const attach_end = ascii.indexOfIgnoreCase(cd_header, "attachment;") orelse + return null; + + var value_start = ascii.indexOfIgnoreCasePos(cd_header, attach_end + 1, "filename") orelse + return null; + value_start += "filename".len; + if (cd_header[value_start] == '*') { + value_start += 1; + } + if (cd_header[value_start] != '=') return null; + value_start += 1; + + var value_end = std.mem.indexOfPos(u8, cd_header, value_start, ";") orelse cd_header.len; + if (cd_header[value_end - 1] == '\"') { + value_end -= 1; + } + return fromPath(cd_header[value_start..value_end]); + } + + test fromContentDisposition { + try std.testing.expectEqual(@as(?FileType, .@"tar.gz"), fromContentDisposition("attaChment; FILENAME=\"stuff.tar.gz\"; size=42")); + try std.testing.expectEqual(@as(?FileType, .@"tar.gz"), fromContentDisposition("attachment; filename*=\"stuff.tar.gz\"")); + try std.testing.expectEqual(@as(?FileType, .@"tar.xz"), fromContentDisposition("ATTACHMENT; filename=\"stuff.tar.xz\"")); + try std.testing.expectEqual(@as(?FileType, .@"tar.xz"), fromContentDisposition("attachment; FileName=\"stuff.tar.xz\"")); + try std.testing.expectEqual(@as(?FileType, .@"tar.gz"), fromContentDisposition("attachment; FileName*=UTF-8\'\'xyz%2Fstuff.tar.gz")); + + try std.testing.expect(fromContentDisposition("attachment FileName=\"stuff.tar.gz\"") == null); + try std.testing.expect(fromContentDisposition("attachment; FileName=\"stuff.tar\"") == null); + try std.testing.expect(fromContentDisposition("attachment; FileName\"stuff.gz\"") == null); + try std.testing.expect(fromContentDisposition("attachment; size=42") == null); + try std.testing.expect(fromContentDisposition("inline; size=42") == null); + try std.testing.expect(fromContentDisposition("FileName=\"stuff.tar.gz\"; attachment;") == null); + try std.testing.expect(fromContentDisposition("FileName=\"stuff.tar.gz\";") == null); + } +}; + +fn initResource(f: *Fetch, uri: std.Uri) RunError!Resource { + const gpa = f.gpa; + const arena = f.arena_allocator.allocator(); + const eb = &f.error_bundle; + + if (ascii.eqlIgnoreCase(uri.scheme, "file")) return .{ + .file = try f.parent_package_root.openFile(uri.path, .{}), + }; + + if (ascii.eqlIgnoreCase(uri.scheme, "http") or + ascii.eqlIgnoreCase(uri.scheme, "https")) + { + var h = std.http.Headers{ .allocator = gpa }; + defer h.deinit(); + + var req = try f.http_client.request(.GET, uri, h, .{}); + errdefer req.deinit(); // releases more than memory + + try req.start(.{}); + try req.wait(); + + if (req.response.status != .ok) { + return f.fail(f.location_tok, "expected response status '200 OK' got '{s} {s}'", .{ + @intFromEnum(req.response.status), req.response.status.phrase() orelse "", + }); + } + + return .{ .http_request = req }; + } + + if (ascii.eqlIgnoreCase(uri.scheme, "git+http") or + ascii.eqlIgnoreCase(uri.scheme, "git+https")) + { + var transport_uri = uri; + transport_uri.scheme = uri.scheme["git+".len..]; + var redirect_uri: []u8 = undefined; + var session: git.Session = .{ .transport = f.http_client, .uri = transport_uri }; + session.discoverCapabilities(gpa, &redirect_uri) catch |e| switch (e) { + error.Redirected => { + defer gpa.free(redirect_uri); + return f.fail(f.location_tok, "repository moved to {s}", .{redirect_uri}); + }, + else => |other| return other, + }; + + const want_oid = want_oid: { + const want_ref = uri.fragment orelse "HEAD"; + if (git.parseOid(want_ref)) |oid| break :want_oid oid else |_| {} + + const want_ref_head = try std.fmt.allocPrint(arena, "refs/heads/{s}", .{want_ref}); + const want_ref_tag = try std.fmt.allocPrint(arena, "refs/tags/{s}", .{want_ref}); + + var ref_iterator = try session.listRefs(gpa, .{ + .ref_prefixes = &.{ want_ref, want_ref_head, want_ref_tag }, + .include_peeled = true, + }); + defer ref_iterator.deinit(); + while (try ref_iterator.next()) |ref| { + if (std.mem.eql(u8, ref.name, want_ref) or + std.mem.eql(u8, ref.name, want_ref_head) or + std.mem.eql(u8, ref.name, want_ref_tag)) + { + break :want_oid ref.peeled orelse ref.oid; + } + } + return f.fail(f.location_tok, "ref not found: {s}", .{want_ref}); + }; + if (uri.fragment == null) { + const notes_len = 1; + try f.addErrorWithNotes(notes_len, f.location_tok, "url field is missing an explicit ref"); + const notes_start = try eb.reserveNotes(notes_len); + eb.extra.items[notes_start] = @intFromEnum(try eb.addErrorMessage(.{ + .msg = try eb.printString("try .url = \"{+/}#{}\",", .{ + uri, std.fmt.fmtSliceHexLower(&want_oid), + }), + })); + return error.PackageFetchFailed; + } + + var want_oid_buf: [git.fmt_oid_length]u8 = undefined; + _ = std.fmt.bufPrint(&want_oid_buf, "{}", .{ + std.fmt.fmtSliceHexLower(&want_oid), + }) catch unreachable; + var fetch_stream = try session.fetch(gpa, &.{&want_oid_buf}); + errdefer fetch_stream.deinit(); + + return .{ .git_fetch_stream = fetch_stream }; + } + + return f.fail(f.location_tok, "unsupported URL scheme: {s}", .{uri.scheme}); +} + +fn unpackResource( + f: *Fetch, + resource: *Resource, + uri_path: []const u8, + tmp_directory: Cache.Directory, +) RunError!void { + const file_type = switch (resource.*) { + .file => FileType.fromPath(uri_path) orelse + return f.fail(f.location_tok, "unknown file type: '{s}'", .{uri_path}), + + .http_request => |req| ft: { + // Content-Type takes first precedence. + const content_type = req.response.headers.getFirstValue("Content-Type") orelse + return f.fail(f.location_tok, "missing 'Content-Type' header", .{}); + + if (ascii.eqlIgnoreCase(content_type, "application/x-tar")) + return .tar; + + if (ascii.eqlIgnoreCase(content_type, "application/gzip") or + ascii.eqlIgnoreCase(content_type, "application/x-gzip") or + ascii.eqlIgnoreCase(content_type, "application/tar+gzip")) + { + return .@"tar.gz"; + } + + if (ascii.eqlIgnoreCase(content_type, "application/x-xz")) + return .@"tar.xz"; + + if (!ascii.eqlIgnoreCase(content_type, "application/octet-stream")) { + return f.fail(f.location_tok, "unrecognized 'Content-Type' header: '{s}'", .{ + content_type, + }); + } + + // Next, the filename from 'content-disposition: attachment' takes precedence. + if (req.response.headers.getFirstValue("Content-Disposition")) |cd_header| { + break :ft FileType.fromContentDisposition(cd_header) orelse + return f.fail( + f.location_tok, + "unsupported Content-Disposition header value: '{s}' for Content-Type=application/octet-stream", + .{cd_header}, + ); + } + + // Finally, the path from the URI is used. + break :ft FileType.fromPath(uri_path) orelse + return f.fail(f.location_tok, "unknown file type: '{s}'", .{uri_path}); + }, + .git_fetch_stream => return .git_pack, + .dir => |dir| { + try f.recursiveDirectoryCopy(dir, tmp_directory.handle); + return; + }, + }; + + switch (file_type) { + .tar => try unpackTarball(f, tmp_directory.handle, resource.reader()), + .@"tar.gz" => try unpackTarballCompressed(f, tmp_directory.handle, resource, std.compress.gzip), + .@"tar.xz" => try unpackTarballCompressed(f, tmp_directory.handle, resource, std.compress.xz), + .git_pack => try unpackGitPack(f, tmp_directory.handle, resource), + } +} + +fn unpackTarballCompressed( + f: *Fetch, + out_dir: fs.Dir, + resource: *Resource, + comptime Compression: type, +) RunError!void { + const gpa = f.gpa; + const reader = resource.reader(); + var br = std.io.bufferedReaderSize(std.crypto.tls.max_ciphertext_record_len, reader); + + var decompress = try Compression.decompress(gpa, br.reader()); + defer decompress.deinit(); + + return unpackTarball(f, out_dir, decompress.reader()); +} + +fn unpackTarball(f: *Fetch, out_dir: fs.Dir, reader: anytype) RunError!void { + const eb = &f.error_bundle; + + var diagnostics: std.tar.Options.Diagnostics = .{ .allocator = f.gpa }; + defer diagnostics.deinit(); + + try std.tar.pipeToFileSystem(out_dir, reader, .{ + .diagnostics = &diagnostics, + .strip_components = 1, + // TODO: we would like to set this to executable_bit_only, but two + // things need to happen before that: + // 1. the tar implementation needs to support it + // 2. the hashing algorithm here needs to support detecting the is_executable + // bit on Windows from the ACLs (see the isExecutable function). + .mode_mode = .ignore, + .filter = .{ .exclude_empty_directories = true }, + }); + + if (diagnostics.errors.items.len > 0) { + const notes_len: u32 = @intCast(diagnostics.errors.items.len); + try f.addErrorWithNotes(notes_len, f.location_tok, "unable to unpack tarball"); + const notes_start = try eb.reserveNotes(notes_len); + for (diagnostics.errors.items, notes_start..) |item, note_i| { + switch (item) { + .unable_to_create_sym_link => |info| { + eb.extra.items[note_i] = @intFromEnum(try eb.addErrorMessage(.{ + .msg = try eb.printString("unable to create symlink from '{s}' to '{s}': {s}", .{ + info.file_name, info.link_name, @errorName(info.code), + }), + })); + }, + .unsupported_file_type => |info| { + eb.extra.items[note_i] = @intFromEnum(try eb.addErrorMessage(.{ + .msg = try eb.printString("file '{s}' has unsupported type '{c}'", .{ + info.file_name, @intFromEnum(info.file_type), + }), + })); + }, + } + } + return error.InvalidTarball; + } +} + +fn unpackGitPack( + f: *Fetch, + out_dir: fs.Dir, + resource: *Resource, + want_oid: git.Oid, +) !void { + const eb = &f.error_bundle; + const gpa = f.gpa; + const reader = resource.reader(); + // The .git directory is used to store the packfile and associated index, but + // we do not attempt to replicate the exact structure of a real .git + // directory, since that isn't relevant for fetching a package. + { + var pack_dir = try out_dir.makeOpenPath(".git", .{}); + defer pack_dir.close(); + var pack_file = try pack_dir.createFile("pkg.pack", .{ .read = true }); + defer pack_file.close(); + var fifo = std.fifo.LinearFifo(u8, .{ .Static = 4096 }).init(); + try fifo.pump(reader.reader(), pack_file.writer()); + try pack_file.sync(); + + var index_file = try pack_dir.createFile("pkg.idx", .{ .read = true }); + defer index_file.close(); + { + var index_prog_node = reader.prog_node.start("Index pack", 0); + defer index_prog_node.end(); + index_prog_node.activate(); + var index_buffered_writer = std.io.bufferedWriter(index_file.writer()); + try git.indexPack(gpa, pack_file, index_buffered_writer.writer()); + try index_buffered_writer.flush(); + try index_file.sync(); + } + + { + var checkout_prog_node = reader.prog_node.start("Checkout", 0); + defer checkout_prog_node.end(); + checkout_prog_node.activate(); + var repository = try git.Repository.init(gpa, pack_file, index_file); + defer repository.deinit(); + var diagnostics: git.Diagnostics = .{ .allocator = gpa }; + defer diagnostics.deinit(); + try repository.checkout(out_dir, want_oid, &diagnostics); + + if (diagnostics.errors.items.len > 0) { + const notes_len: u32 = @intCast(diagnostics.errors.items.len); + try f.addErrorWithNotes(notes_len, f.location_tok, "unable to unpack packfile"); + const notes_start = try eb.reserveNotes(notes_len); + for (diagnostics.errors.items, notes_start..) |item, note_i| { + switch (item) { + .unable_to_create_sym_link => |info| { + eb.extra.items[note_i] = @intFromEnum(try eb.addErrorMessage(.{ + .msg = try eb.printString("unable to create symlink from '{s}' to '{s}': {s}", .{ + info.file_name, info.link_name, @errorName(info.code), + }), + })); + }, + } + } + return error.InvalidGitPack; + } + } + } + + try out_dir.deleteTree(".git"); +} + +fn recursiveDirectoryCopy(f: *Fetch, dir: fs.IterableDir, tmp_dir: fs.Dir) RunError!void { + // Recursive directory copy. + var it = try dir.walk(f.gpa); + defer it.deinit(); + while (try it.next()) |entry| { + switch (entry.kind) { + .directory => {}, // omit empty directories + .file => { + dir.dir.copyFile( + entry.path, + tmp_dir, + entry.path, + .{}, + ) catch |err| switch (err) { + error.FileNotFound => { + if (fs.path.dirname(entry.path)) |dirname| try tmp_dir.makePath(dirname); + try dir.dir.copyFile(entry.path, tmp_dir, entry.path, .{}); + }, + else => |e| return e, + }; + }, + .sym_link => { + var buf: [fs.MAX_PATH_BYTES]u8 = undefined; + const link_name = try dir.dir.readLink(entry.path, &buf); + // TODO: if this would create a symlink to outside + // the destination directory, fail with an error instead. + try tmp_dir.symLink(link_name, entry.path, .{}); + }, + else => return error.IllegalFileTypeInPackage, + } + } +} + +pub fn renameTmpIntoCache( + cache_dir: fs.Dir, + tmp_dir_sub_path: []const u8, + dest_dir_sub_path: []const u8, +) !void { + assert(dest_dir_sub_path[1] == fs.path.sep); + var handled_missing_dir = false; + while (true) { + cache_dir.rename(tmp_dir_sub_path, dest_dir_sub_path) catch |err| switch (err) { + error.FileNotFound => { + if (handled_missing_dir) return err; + cache_dir.makeDir(dest_dir_sub_path[0..1]) catch |mkd_err| switch (mkd_err) { + error.PathAlreadyExists => handled_missing_dir = true, + else => |e| return e, + }; + continue; + }, + error.PathAlreadyExists, error.AccessDenied => { + // Package has been already downloaded and may already be in use on the system. + cache_dir.deleteTree(tmp_dir_sub_path) catch { + // Garbage files leftover in zig-cache/tmp/ is, as they say + // on Star Trek, "operating within normal parameters". + }; + }, + else => |e| return e, + }; + break; + } +} + +/// Assumes that files not included in the package have already been filtered +/// prior to calling this function. This ensures that files not protected by +/// the hash are not present on the file system. Empty directories are *not +/// hashed* and must not be present on the file system when calling this +/// function. +fn computeHash(f: *Fetch, pkg_dir: fs.IterableDir, filter: Filter) RunError!Digest { + // All the path name strings need to be in memory for sorting. + const arena = f.arena_allocator.allocator(); + const gpa = f.gpa; + + // Collect all files, recursively, then sort. + var all_files = std.ArrayList(*HashedFile).init(gpa); + defer all_files.deinit(); + + var walker = try pkg_dir.walk(gpa); + defer walker.deinit(); + + { + // The final hash will be a hash of each file hashed independently. This + // allows hashing in parallel. + var wait_group: WaitGroup = .{}; + // `computeHash` is called from a worker thread so there must not be + // any waiting without working or a deadlock could occur. + defer wait_group.waitAndWork(); + + while (try walker.next()) |entry| { + _ = filter; // TODO: apply filter rules here + + const kind: HashedFile.Kind = switch (entry.kind) { + .directory => continue, + .file => .file, + .sym_link => .sym_link, + else => return error.IllegalFileTypeInPackage, + }; + + if (std.mem.eql(u8, entry.path, build_zig_basename)) + f.has_build_zig = true; + + const hashed_file = try arena.create(HashedFile); + const fs_path = try arena.dupe(u8, entry.path); + hashed_file.* = .{ + .fs_path = fs_path, + .normalized_path = try normalizePath(arena, fs_path), + .kind = kind, + .hash = undefined, // to be populated by the worker + .failure = undefined, // to be populated by the worker + }; + wait_group.start(); + try f.thread_pool.spawn(workerHashFile, .{ pkg_dir.dir, hashed_file, &wait_group }); + + try all_files.append(hashed_file); + } + } + + std.mem.sortUnstable(*HashedFile, all_files.items, {}, HashedFile.lessThan); + + var hasher = Manifest.Hash.init(.{}); + var any_failures = false; + const eb = &f.error_bundle; + for (all_files.items) |hashed_file| { + hashed_file.failure catch |err| { + any_failures = true; + try eb.addRootErrorMessage(.{ + .msg = try eb.printString("unable to hash: {s}", .{@errorName(err)}), + .src_loc = try eb.addSourceLocation(.{ + .src_path = try eb.addString(hashed_file.fs_path), + .span_start = 0, + .span_end = 0, + .span_main = 0, + }), + .notes_len = 0, + }); + }; + hasher.update(&hashed_file.hash); + } + if (any_failures) return error.FetchFailed; + return hasher.finalResult(); +} + +fn workerHashFile(dir: fs.Dir, hashed_file: *HashedFile, wg: *WaitGroup) void { + defer wg.finish(); + hashed_file.failure = hashFileFallible(dir, hashed_file); +} + +fn hashFileFallible(dir: fs.Dir, hashed_file: *HashedFile) HashedFile.Error!void { + var buf: [8000]u8 = undefined; + var hasher = Manifest.Hash.init(.{}); + hasher.update(hashed_file.normalized_path); + switch (hashed_file.kind) { + .file => { + var file = try dir.openFile(hashed_file.fs_path, .{}); + defer file.close(); + hasher.update(&.{ 0, @intFromBool(try isExecutable(file)) }); + while (true) { + const bytes_read = try file.read(&buf); + if (bytes_read == 0) break; + hasher.update(buf[0..bytes_read]); + } + }, + .sym_link => { + const link_name = try dir.readLink(hashed_file.fs_path, &buf); + hasher.update(link_name); + }, + } + hasher.final(&hashed_file.hash); +} + +fn isExecutable(file: fs.File) !bool { + if (builtin.os.tag == .windows) { + // TODO check the ACL on Windows. + // Until this is implemented, this could be a false negative on + // Windows, which is why we do not yet set executable_bit_only above + // when unpacking the tarball. + return false; + } else { + const stat = try file.stat(); + return (stat.mode & std.os.S.IXUSR) != 0; + } +} + +const HashedFile = struct { + fs_path: []const u8, + normalized_path: []const u8, + hash: Digest, + failure: Error!void, + kind: Kind, + + const Error = + fs.File.OpenError || + fs.File.ReadError || + fs.File.StatError || + fs.Dir.ReadLinkError; + + const Kind = enum { file, sym_link }; + + fn lessThan(context: void, lhs: *const HashedFile, rhs: *const HashedFile) bool { + _ = context; + return std.mem.lessThan(u8, lhs.normalized_path, rhs.normalized_path); + } +}; + +/// Make a file system path identical independently of operating system path inconsistencies. +/// This converts backslashes into forward slashes. +fn normalizePath(arena: Allocator, fs_path: []const u8) ![]const u8 { + const canonical_sep = '/'; + + if (fs.path.sep == canonical_sep) + return fs_path; + + const normalized = try arena.dupe(u8, fs_path); + for (normalized) |*byte| { + switch (byte.*) { + fs.path.sep => byte.* = canonical_sep, + else => continue, + } + } + return normalized; +} + +pub const Filter = struct { + include_paths: std.StringArrayHashMapUnmanaged(void) = .{}, + + /// sub_path is relative to the tarball root. + pub fn includePath(self: Filter, sub_path: []const u8) bool { + if (self.include_paths.count() == 0) return true; + if (self.include_paths.contains("")) return true; + if (self.include_paths.contains(sub_path)) return true; + + // Check if any included paths are parent directories of sub_path. + var dirname = sub_path; + while (std.fs.path.dirname(sub_path)) |next_dirname| { + if (self.include_paths.contains(sub_path)) return true; + dirname = next_dirname; + } + + return false; + } +}; + +const build_zig_basename = @import("../Package.zig").build_zig_basename; +const hex_multihash_len = 2 * Manifest.multihash_len; + +// These are random bytes. +const package_hash_prefix_cached: [8]u8 = &.{ 0x53, 0x7e, 0xfa, 0x94, 0x65, 0xe9, 0xf8, 0x73 }; +const package_hash_prefix_project: [8]u8 = &.{ 0xe1, 0x25, 0xee, 0xfa, 0xa6, 0x17, 0x38, 0xcc }; + +const builtin = @import("builtin"); +const std = @import("std"); +const fs = std.fs; +const assert = std.debug.assert; +const ascii = std.ascii; +const Allocator = std.mem.Allocator; +const Cache = std.Build.Cache; +const ThreadPool = std.Thread.Pool; +const WaitGroup = std.Thread.WaitGroup; +const Manifest = @import("../Manifest.zig"); +const Fetch = @This(); +const main = @import("../main.zig"); +const git = @import("../git.zig"); diff --git a/src/Package/hash.zig b/src/Package/hash.zig deleted file mode 100644 index b14ec70244..0000000000 --- a/src/Package/hash.zig +++ /dev/null @@ -1,153 +0,0 @@ -const builtin = @import("builtin"); -const std = @import("std"); -const fs = std.fs; -const ThreadPool = std.Thread.Pool; -const WaitGroup = std.Thread.WaitGroup; -const Allocator = std.mem.Allocator; - -const Hash = @import("../Manifest.zig").Hash; - -pub fn compute(thread_pool: *ThreadPool, pkg_dir: fs.IterableDir) ![Hash.digest_length]u8 { - const gpa = thread_pool.allocator; - - // We'll use an arena allocator for the path name strings since they all - // need to be in memory for sorting. - var arena_instance = std.heap.ArenaAllocator.init(gpa); - defer arena_instance.deinit(); - const arena = arena_instance.allocator(); - - // TODO: delete files not included in the package prior to computing the package hash. - // for example, if the ini file has directives to include/not include certain files, - // apply those rules directly to the filesystem right here. This ensures that files - // not protected by the hash are not present on the file system. - - // Collect all files, recursively, then sort. - var all_files = std.ArrayList(*HashedFile).init(gpa); - defer all_files.deinit(); - - var walker = try pkg_dir.walk(gpa); - defer walker.deinit(); - - { - // The final hash will be a hash of each file hashed independently. This - // allows hashing in parallel. - var wait_group: WaitGroup = .{}; - defer wait_group.wait(); - - while (try walker.next()) |entry| { - const kind: HashedFile.Kind = switch (entry.kind) { - .directory => continue, - .file => .file, - .sym_link => .sym_link, - else => return error.IllegalFileTypeInPackage, - }; - const hashed_file = try arena.create(HashedFile); - const fs_path = try arena.dupe(u8, entry.path); - hashed_file.* = .{ - .fs_path = fs_path, - .normalized_path = try normalizePath(arena, fs_path), - .kind = kind, - .hash = undefined, // to be populated by the worker - .failure = undefined, // to be populated by the worker - }; - wait_group.start(); - try thread_pool.spawn(workerHashFile, .{ pkg_dir.dir, hashed_file, &wait_group }); - - try all_files.append(hashed_file); - } - } - - std.mem.sortUnstable(*HashedFile, all_files.items, {}, HashedFile.lessThan); - - var hasher = Hash.init(.{}); - var any_failures = false; - for (all_files.items) |hashed_file| { - hashed_file.failure catch |err| { - any_failures = true; - std.log.err("unable to hash '{s}': {s}", .{ hashed_file.fs_path, @errorName(err) }); - }; - hasher.update(&hashed_file.hash); - } - if (any_failures) return error.PackageHashUnavailable; - return hasher.finalResult(); -} - -const HashedFile = struct { - fs_path: []const u8, - normalized_path: []const u8, - hash: [Hash.digest_length]u8, - failure: Error!void, - kind: Kind, - - const Error = - fs.File.OpenError || - fs.File.ReadError || - fs.File.StatError || - fs.Dir.ReadLinkError; - - const Kind = enum { file, sym_link }; - - fn lessThan(context: void, lhs: *const HashedFile, rhs: *const HashedFile) bool { - _ = context; - return std.mem.lessThan(u8, lhs.normalized_path, rhs.normalized_path); - } -}; - -/// Make a file system path identical independently of operating system path inconsistencies. -/// This converts backslashes into forward slashes. -fn normalizePath(arena: Allocator, fs_path: []const u8) ![]const u8 { - const canonical_sep = '/'; - - if (fs.path.sep == canonical_sep) - return fs_path; - - const normalized = try arena.dupe(u8, fs_path); - for (normalized) |*byte| { - switch (byte.*) { - fs.path.sep => byte.* = canonical_sep, - else => continue, - } - } - return normalized; -} - -fn workerHashFile(dir: fs.Dir, hashed_file: *HashedFile, wg: *WaitGroup) void { - defer wg.finish(); - hashed_file.failure = hashFileFallible(dir, hashed_file); -} - -fn hashFileFallible(dir: fs.Dir, hashed_file: *HashedFile) HashedFile.Error!void { - var buf: [8000]u8 = undefined; - var hasher = Hash.init(.{}); - hasher.update(hashed_file.normalized_path); - switch (hashed_file.kind) { - .file => { - var file = try dir.openFile(hashed_file.fs_path, .{}); - defer file.close(); - hasher.update(&.{ 0, @intFromBool(try isExecutable(file)) }); - while (true) { - const bytes_read = try file.read(&buf); - if (bytes_read == 0) break; - hasher.update(buf[0..bytes_read]); - } - }, - .sym_link => { - const link_name = try dir.readLink(hashed_file.fs_path, &buf); - hasher.update(link_name); - }, - } - hasher.final(&hashed_file.hash); -} - -fn isExecutable(file: fs.File) !bool { - if (builtin.os.tag == .windows) { - // TODO check the ACL on Windows. - // Until this is implemented, this could be a false negative on - // Windows, which is why we do not yet set executable_bit_only above - // when unpacking the tarball. - return false; - } else { - const stat = try file.stat(); - return (stat.mode & std.os.S.IXUSR) != 0; - } -} diff --git a/src/main.zig b/src/main.zig index 247669440d..34ab5ea191 100644 --- a/src/main.zig +++ b/src/main.zig @@ -4714,7 +4714,7 @@ pub fn cmdBuild(gpa: Allocator, arena: Allocator, args: []const []const u8) !voi defer if (cleanup_build_dir) |*dir| dir.close(); const cwd_path = try process.getCwdAlloc(arena); - const build_zig_basename = if (build_file) |bf| fs.path.basename(bf) else "build.zig"; + const build_zig_basename = if (build_file) |bf| fs.path.basename(bf) else Package.build_zig_basename; const build_directory: Compilation.Directory = blk: { if (build_file) |bf| { if (fs.path.dirname(bf)) |dirname| {