zig/src/Package.zig
2025-02-26 11:42:04 -08:00

200 lines
7.2 KiB
Zig

const std = @import("std");
const assert = std.debug.assert;
pub const Module = @import("Package/Module.zig");
pub const Fetch = @import("Package/Fetch.zig");
pub const build_zig_basename = "build.zig";
pub const Manifest = @import("Package/Manifest.zig");
pub const multihash_len = 1 + 1 + Hash.Algo.digest_length;
pub const multihash_hex_digest_len = 2 * multihash_len;
pub const MultiHashHexDigest = [multihash_hex_digest_len]u8;
pub const Fingerprint = packed struct(u64) {
id: u32,
checksum: u32,
pub fn generate(name: []const u8) Fingerprint {
return .{
.id = std.crypto.random.intRangeLessThan(u32, 1, 0xffffffff),
.checksum = std.hash.Crc32.hash(name),
};
}
pub fn validate(n: Fingerprint, name: []const u8) bool {
switch (n.id) {
0x00000000, 0xffffffff => return false,
else => return std.hash.Crc32.hash(name) == n.checksum,
}
}
pub fn int(n: Fingerprint) u64 {
return @bitCast(n);
}
};
/// A user-readable, file system safe hash that identifies an exact package
/// snapshot, including file contents.
///
/// The hash is not only to prevent collisions but must resist attacks where
/// the adversary fully controls the contents being hashed. Thus, it contains
/// a full SHA-256 digest.
///
/// This data structure can be used to store the legacy hash format too. Legacy
/// hash format is scheduled to be removed after 0.14.0 is tagged.
///
/// There's also a third way this structure is used. When using path rather than
/// hash, a unique hash is still needed, so one is computed based on the path.
pub const Hash = struct {
/// Maximum size of a package hash. Unused bytes at the end are
/// filled with zeroes.
bytes: [max_len]u8,
pub const Algo = std.crypto.hash.sha2.Sha256;
pub const Digest = [Algo.digest_length]u8;
/// Example: "nnnn-vvvv-hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh"
pub const max_len = 32 + 1 + 32 + 1 + (32 + 32 + 200) / 6;
pub fn fromSlice(s: []const u8) Hash {
assert(s.len <= max_len);
var result: Hash = undefined;
@memcpy(result.bytes[0..s.len], s);
@memset(result.bytes[s.len..], 0);
return result;
}
pub fn toSlice(ph: *const Hash) []const u8 {
var end: usize = ph.bytes.len;
while (true) {
end -= 1;
if (ph.bytes[end] != 0) return ph.bytes[0 .. end + 1];
}
}
pub fn eql(a: *const Hash, b: *const Hash) bool {
return std.mem.eql(u8, &a.bytes, &b.bytes);
}
/// Distinguishes whether the legacy multihash format is being stored here.
pub fn isOld(h: *const Hash) bool {
if (h.bytes.len < 2) return false;
const their_multihash_func = std.fmt.parseInt(u8, h.bytes[0..2], 16) catch return false;
if (@as(MultihashFunction, @enumFromInt(their_multihash_func)) != multihash_function) return false;
if (h.toSlice().len != multihash_hex_digest_len) return false;
return std.mem.indexOfScalar(u8, &h.bytes, '-') == null;
}
test isOld {
const h: Hash = .fromSlice("1220138f4aba0c01e66b68ed9e1e1e74614c06e4743d88bc58af4f1c3dd0aae5fea7");
try std.testing.expect(h.isOld());
}
/// Produces "$name-$semver-$hashplus".
/// * name is the name field from build.zig.zon, asserted to be at most 32
/// bytes and assumed be a valid zig identifier
/// * semver is the version field from build.zig.zon, asserted to be at
/// most 32 bytes
/// * hashplus is the following 33-byte array, base64 encoded using -_ to make
/// it filesystem safe:
/// - (4 bytes) LE u32 Package ID
/// - (4 bytes) LE u32 total decompressed size in bytes, overflow saturated
/// - (25 bytes) truncated SHA-256 digest of hashed files of the package
pub fn init(digest: Digest, name: []const u8, ver: []const u8, id: u32, size: u32) Hash {
assert(name.len <= 32);
assert(ver.len <= 32);
var result: Hash = undefined;
var buf: std.ArrayListUnmanaged(u8) = .initBuffer(&result.bytes);
buf.appendSliceAssumeCapacity(name);
buf.appendAssumeCapacity('-');
buf.appendSliceAssumeCapacity(ver);
buf.appendAssumeCapacity('-');
var hashplus: [33]u8 = undefined;
std.mem.writeInt(u32, hashplus[0..4], id, .little);
std.mem.writeInt(u32, hashplus[4..8], size, .little);
hashplus[8..].* = digest[0..25].*;
_ = std.base64.url_safe_no_pad.Encoder.encode(buf.addManyAsArrayAssumeCapacity(44), &hashplus);
@memset(buf.unusedCapacitySlice(), 0);
return result;
}
/// Produces a unique hash based on the path provided. The result should
/// not be user-visible.
pub fn initPath(sub_path: []const u8, is_global: bool) Hash {
var result: Hash = .{ .bytes = @splat(0) };
var i: usize = 0;
if (is_global) {
result.bytes[0] = '/';
i += 1;
}
if (i + sub_path.len <= result.bytes.len) {
@memcpy(result.bytes[i..][0..sub_path.len], sub_path);
return result;
}
var bin_digest: [Algo.digest_length]u8 = undefined;
Algo.hash(sub_path, &bin_digest, .{});
_ = std.fmt.bufPrint(result.bytes[i..], "{}", .{std.fmt.fmtSliceHexLower(&bin_digest)}) catch unreachable;
return result;
}
};
pub const MultihashFunction = enum(u16) {
identity = 0x00,
sha1 = 0x11,
@"sha2-256" = 0x12,
@"sha2-512" = 0x13,
@"sha3-512" = 0x14,
@"sha3-384" = 0x15,
@"sha3-256" = 0x16,
@"sha3-224" = 0x17,
@"sha2-384" = 0x20,
@"sha2-256-trunc254-padded" = 0x1012,
@"sha2-224" = 0x1013,
@"sha2-512-224" = 0x1014,
@"sha2-512-256" = 0x1015,
@"blake2b-256" = 0xb220,
_,
};
pub const multihash_function: MultihashFunction = switch (Hash.Algo) {
std.crypto.hash.sha2.Sha256 => .@"sha2-256",
else => unreachable,
};
pub fn multiHashHexDigest(digest: Hash.Digest) MultiHashHexDigest {
const hex_charset = std.fmt.hex_charset;
var result: MultiHashHexDigest = undefined;
result[0] = hex_charset[@intFromEnum(multihash_function) >> 4];
result[1] = hex_charset[@intFromEnum(multihash_function) & 15];
result[2] = hex_charset[Hash.Algo.digest_length >> 4];
result[3] = hex_charset[Hash.Algo.digest_length & 15];
for (digest, 0..) |byte, i| {
result[4 + i * 2] = hex_charset[byte >> 4];
result[5 + i * 2] = hex_charset[byte & 15];
}
return result;
}
comptime {
// We avoid unnecessary uleb128 code in hexDigest by asserting here the
// values are small enough to be contained in the one-byte encoding.
assert(@intFromEnum(multihash_function) < 127);
assert(Hash.Algo.digest_length < 127);
}
test Hash {
const example_digest: Hash.Digest = .{
0xc7, 0xf5, 0x71, 0xb7, 0xb4, 0xe7, 0x6f, 0x3c, 0xdb, 0x87, 0x7a, 0x7f, 0xdd, 0xf9, 0x77, 0x87,
0x9d, 0xd3, 0x86, 0xfa, 0x73, 0x57, 0x9a, 0xf7, 0x9d, 0x1e, 0xdb, 0x8f, 0x3a, 0xd9, 0xbd, 0x9f,
};
const result: Hash = .init(example_digest, "nasm", "2.16.1-3", 0xcafebabe, 10 * 1024 * 1024);
try std.testing.expectEqualStrings("nasm-2.16.1-3-vrr-ygAAoADH9XG3tOdvPNuHen_d-XeHndOG-nNXmved", result.toSlice());
}
test {
_ = Fetch;
}