std.compress.xz: flatten namespaces

2025-12-06 05:44:20 +00:00 · 2025-08-22 14:14:26 -07:00 · 2025-08-22 14:14:26 -07:00 · 6464e0d4fc
commit 6464e0d4fc
parent ea0ce7afb5
2 changed files with 207 additions and 212 deletions
--- a/lib/std/compress/xz.zig
+++ b/lib/std/compress/xz.zig
@ -1,7 +1,10 @@
 const std = @import("std");
-const block = @import("xz/block.zig");
 const Allocator = std.mem.Allocator;
+const ArrayList = std.ArrayList;
 const Crc32 = std.hash.Crc32;
+const Crc64 = std.hash.crc.Crc64Xz;
+const Sha256 = std.crypto.hash.sha2.Sha256;
+const lzma2 = std.compress.lzma2;

 pub const Check = enum(u4) {
    none = 0x00,
@ -27,11 +30,11 @@ pub fn Decompress(comptime ReaderType: type) type {
    return struct {
        const Self = @This();

-        pub const Error = ReaderType.Error || block.Decoder(ReaderType).Error;
+        pub const Error = ReaderType.Error || Decoder(ReaderType).Error;
        pub const Reader = std.io.GenericReader(*Self, Error, read);

        allocator: Allocator,
-        block_decoder: block.Decoder(ReaderType),
+        block_decoder: Decoder(ReaderType),
        in_reader: ReaderType,

        fn init(allocator: Allocator, source: ReaderType) !Self {
@ -52,7 +55,7 @@ pub fn Decompress(comptime ReaderType: type) type {

            return Self{
                .allocator = allocator,
-                .block_decoder = try block.decoder(allocator, source, check),
+                .block_decoder = try decoder(allocator, source, check),
                .in_reader = source,
            };
        }
@ -161,6 +164,206 @@ pub fn hashedReader(
    return .{ .child_reader = reader, .hasher = hasher };
 }

+const DecodeError = error{
+    CorruptInput,
+    EndOfStream,
+    EndOfStreamWithNoError,
+    WrongChecksum,
+    Unsupported,
+    Overflow,
+};
+
+pub fn decoder(allocator: Allocator, reader: anytype, check: Check) !Decoder(@TypeOf(reader)) {
+    return Decoder(@TypeOf(reader)).init(allocator, reader, check);
+}
+
+pub fn Decoder(comptime ReaderType: type) type {
+    return struct {
+        const Self = @This();
+        pub const Error =
+            ReaderType.Error ||
+            DecodeError ||
+            Allocator.Error;
+        pub const Reader = std.io.GenericReader(*Self, Error, read);
+
+        allocator: Allocator,
+        inner_reader: ReaderType,
+        check: Check,
+        err: ?Error,
+        to_read: ArrayList(u8),
+        read_pos: usize,
+        block_count: usize,
+
+        fn init(allocator: Allocator, in_reader: ReaderType, check: Check) !Self {
+            return Self{
+                .allocator = allocator,
+                .inner_reader = in_reader,
+                .check = check,
+                .err = null,
+                .to_read = .{},
+                .read_pos = 0,
+                .block_count = 0,
+            };
+        }
+
+        pub fn deinit(self: *Self) void {
+            self.to_read.deinit(self.allocator);
+        }
+
+        pub fn reader(self: *Self) Reader {
+            return .{ .context = self };
+        }
+
+        pub fn read(self: *Self, output: []u8) Error!usize {
+            while (true) {
+                const unread_len = self.to_read.items.len - self.read_pos;
+                if (unread_len > 0) {
+                    const n = @min(unread_len, output.len);
+                    @memcpy(output[0..n], self.to_read.items[self.read_pos..][0..n]);
+                    self.read_pos += n;
+                    return n;
+                }
+                if (self.err) |e| {
+                    if (e == DecodeError.EndOfStreamWithNoError) {
+                        return 0;
+                    }
+                    return e;
+                }
+                if (self.read_pos > 0) {
+                    self.to_read.shrinkRetainingCapacity(0);
+                    self.read_pos = 0;
+                }
+                self.readBlock() catch |e| {
+                    self.err = e;
+                };
+            }
+        }
+
+        fn readBlock(self: *Self) Error!void {
+            var block_counter = std.io.countingReader(self.inner_reader);
+            const block_reader = block_counter.reader();
+
+            var packed_size: ?u64 = null;
+            var unpacked_size: ?u64 = null;
+
+            // Block Header
+            {
+                var header_hasher = hashedReader(block_reader, Crc32.init());
+                const header_reader = header_hasher.reader();
+
+                const header_size = @as(u64, try header_reader.readByte()) * 4;
+                if (header_size == 0)
+                    return error.EndOfStreamWithNoError;
+
+                const Flags = packed struct(u8) {
+                    last_filter_index: u2,
+                    reserved: u4,
+                    has_packed_size: bool,
+                    has_unpacked_size: bool,
+                };
+
+                const flags = @as(Flags, @bitCast(try header_reader.readByte()));
+                const filter_count = @as(u3, flags.last_filter_index) + 1;
+                if (filter_count > 1)
+                    return error.Unsupported;
+
+                if (flags.has_packed_size)
+                    packed_size = try std.leb.readUleb128(u64, header_reader);
+
+                if (flags.has_unpacked_size)
+                    unpacked_size = try std.leb.readUleb128(u64, header_reader);
+
+                const FilterId = enum(u64) {
+                    lzma2 = 0x21,
+                    _,
+                };
+
+                const filter_id = @as(
+                    FilterId,
+                    @enumFromInt(try std.leb.readUleb128(u64, header_reader)),
+                );
+
+                if (@intFromEnum(filter_id) >= 0x4000_0000_0000_0000)
+                    return error.CorruptInput;
+
+                if (filter_id != .lzma2)
+                    return error.Unsupported;
+
+                const properties_size = try std.leb.readUleb128(u64, header_reader);
+                if (properties_size != 1)
+                    return error.CorruptInput;
+
+                // TODO: use filter properties
+                _ = try header_reader.readByte();
+
+                while (block_counter.bytes_read != header_size) {
+                    if (try header_reader.readByte() != 0)
+                        return error.CorruptInput;
+                }
+
+                const hash_a = header_hasher.hasher.final();
+                const hash_b = try header_reader.readInt(u32, .little);
+                if (hash_a != hash_b)
+                    return error.WrongChecksum;
+            }
+
+            // Compressed Data
+            var packed_counter = std.io.countingReader(block_reader);
+            try lzma2.decompress(
+                self.allocator,
+                packed_counter.reader(),
+                self.to_read.writer(self.allocator),
+            );
+
+            if (packed_size) |s| {
+                if (s != packed_counter.bytes_read)
+                    return error.CorruptInput;
+            }
+
+            const unpacked_bytes = self.to_read.items;
+            if (unpacked_size) |s| {
+                if (s != unpacked_bytes.len)
+                    return error.CorruptInput;
+            }
+
+            // Block Padding
+            while (block_counter.bytes_read % 4 != 0) {
+                if (try block_reader.readByte() != 0)
+                    return error.CorruptInput;
+            }
+
+            switch (self.check) {
+                .none => {},
+                .crc32 => {
+                    const hash_a = Crc32.hash(unpacked_bytes);
+                    const hash_b = try self.inner_reader.readInt(u32, .little);
+                    if (hash_a != hash_b)
+                        return error.WrongChecksum;
+                },
+                .crc64 => {
+                    const hash_a = Crc64.hash(unpacked_bytes);
+                    const hash_b = try self.inner_reader.readInt(u64, .little);
+                    if (hash_a != hash_b)
+                        return error.WrongChecksum;
+                },
+                .sha256 => {
+                    var hash_a: [Sha256.digest_length]u8 = undefined;
+                    Sha256.hash(unpacked_bytes, &hash_a, .{});
+
+                    var hash_b: [Sha256.digest_length]u8 = undefined;
+                    try self.inner_reader.readNoEof(&hash_b);
+
+                    if (!std.mem.eql(u8, &hash_a, &hash_b))
+                        return error.WrongChecksum;
+                },
+                else => return error.Unsupported,
+            }
+
+            self.block_count += 1;
+        }
+    };
+}
+
 test {
    _ = @import("xz/test.zig");
 }
--- a/lib/std/compress/xz/block.zig
+++ b/lib/std/compress/xz/block.zig
@ -1,208 +0,0 @@
-const std = @import("../../std.zig");
-const lzma2 = std.compress.lzma2;
-const Allocator = std.mem.Allocator;
-const ArrayListUnmanaged = std.ArrayListUnmanaged;
-const Crc32 = std.hash.Crc32;
-const Crc64 = std.hash.crc.Crc64Xz;
-const Sha256 = std.crypto.hash.sha2.Sha256;
-const xz = std.compress.xz;
-
-const DecodeError = error{
-    CorruptInput,
-    EndOfStream,
-    EndOfStreamWithNoError,
-    WrongChecksum,
-    Unsupported,
-    Overflow,
-};
-
-pub fn decoder(allocator: Allocator, reader: anytype, check: xz.Check) !Decoder(@TypeOf(reader)) {
-    return Decoder(@TypeOf(reader)).init(allocator, reader, check);
-}
-
-pub fn Decoder(comptime ReaderType: type) type {
-    return struct {
-        const Self = @This();
-        pub const Error =
-            ReaderType.Error ||
-            DecodeError ||
-            Allocator.Error;
-        pub const Reader = std.io.GenericReader(*Self, Error, read);
-
-        allocator: Allocator,
-        inner_reader: ReaderType,
-        check: xz.Check,
-        err: ?Error,
-        to_read: ArrayListUnmanaged(u8),
-        read_pos: usize,
-        block_count: usize,
-
-        fn init(allocator: Allocator, in_reader: ReaderType, check: xz.Check) !Self {
-            return Self{
-                .allocator = allocator,
-                .inner_reader = in_reader,
-                .check = check,
-                .err = null,
-                .to_read = .{},
-                .read_pos = 0,
-                .block_count = 0,
-            };
-        }
-
-        pub fn deinit(self: *Self) void {
-            self.to_read.deinit(self.allocator);
-        }
-
-        pub fn reader(self: *Self) Reader {
-            return .{ .context = self };
-        }
-
-        pub fn read(self: *Self, output: []u8) Error!usize {
-            while (true) {
-                const unread_len = self.to_read.items.len - self.read_pos;
-                if (unread_len > 0) {
-                    const n = @min(unread_len, output.len);
-                    @memcpy(output[0..n], self.to_read.items[self.read_pos..][0..n]);
-                    self.read_pos += n;
-                    return n;
-                }
-                if (self.err) |e| {
-                    if (e == DecodeError.EndOfStreamWithNoError) {
-                        return 0;
-                    }
-                    return e;
-                }
-                if (self.read_pos > 0) {
-                    self.to_read.shrinkRetainingCapacity(0);
-                    self.read_pos = 0;
-                }
-                self.readBlock() catch |e| {
-                    self.err = e;
-                };
-            }
-        }
-
-        fn readBlock(self: *Self) Error!void {
-            var block_counter = std.io.countingReader(self.inner_reader);
-            const block_reader = block_counter.reader();
-
-            var packed_size: ?u64 = null;
-            var unpacked_size: ?u64 = null;
-
-            // Block Header
-            {
-                var header_hasher = xz.hashedReader(block_reader, Crc32.init());
-                const header_reader = header_hasher.reader();
-
-                const header_size = @as(u64, try header_reader.readByte()) * 4;
-                if (header_size == 0)
-                    return error.EndOfStreamWithNoError;
-
-                const Flags = packed struct(u8) {
-                    last_filter_index: u2,
-                    reserved: u4,
-                    has_packed_size: bool,
-                    has_unpacked_size: bool,
-                };
-
-                const flags = @as(Flags, @bitCast(try header_reader.readByte()));
-                const filter_count = @as(u3, flags.last_filter_index) + 1;
-                if (filter_count > 1)
-                    return error.Unsupported;
-
-                if (flags.has_packed_size)
-                    packed_size = try std.leb.readUleb128(u64, header_reader);
-
-                if (flags.has_unpacked_size)
-                    unpacked_size = try std.leb.readUleb128(u64, header_reader);
-
-                const FilterId = enum(u64) {
-                    lzma2 = 0x21,
-                    _,
-                };
-
-                const filter_id = @as(
-                    FilterId,
-                    @enumFromInt(try std.leb.readUleb128(u64, header_reader)),
-                );
-
-                if (@intFromEnum(filter_id) >= 0x4000_0000_0000_0000)
-                    return error.CorruptInput;
-
-                if (filter_id != .lzma2)
-                    return error.Unsupported;
-
-                const properties_size = try std.leb.readUleb128(u64, header_reader);
-                if (properties_size != 1)
-                    return error.CorruptInput;
-
-                // TODO: use filter properties
-                _ = try header_reader.readByte();
-
-                while (block_counter.bytes_read != header_size) {
-                    if (try header_reader.readByte() != 0)
-                        return error.CorruptInput;
-                }
-
-                const hash_a = header_hasher.hasher.final();
-                const hash_b = try header_reader.readInt(u32, .little);
-                if (hash_a != hash_b)
-                    return error.WrongChecksum;
-            }
-
-            // Compressed Data
-            var packed_counter = std.io.countingReader(block_reader);
-            try lzma2.decompress(
-                self.allocator,
-                packed_counter.reader(),
-                self.to_read.writer(self.allocator),
-            );
-
-            if (packed_size) |s| {
-                if (s != packed_counter.bytes_read)
-                    return error.CorruptInput;
-            }
-
-            const unpacked_bytes = self.to_read.items;
-            if (unpacked_size) |s| {
-                if (s != unpacked_bytes.len)
-                    return error.CorruptInput;
-            }
-
-            // Block Padding
-            while (block_counter.bytes_read % 4 != 0) {
-                if (try block_reader.readByte() != 0)
-                    return error.CorruptInput;
-            }
-
-            switch (self.check) {
-                .none => {},
-                .crc32 => {
-                    const hash_a = Crc32.hash(unpacked_bytes);
-                    const hash_b = try self.inner_reader.readInt(u32, .little);
-                    if (hash_a != hash_b)
-                        return error.WrongChecksum;
-                },
-                .crc64 => {
-                    const hash_a = Crc64.hash(unpacked_bytes);
-                    const hash_b = try self.inner_reader.readInt(u64, .little);
-                    if (hash_a != hash_b)
-                        return error.WrongChecksum;
-                },
-                .sha256 => {
-                    var hash_a: [Sha256.digest_length]u8 = undefined;
-                    Sha256.hash(unpacked_bytes, &hash_a, .{});
-
-                    var hash_b: [Sha256.digest_length]u8 = undefined;
-                    try self.inner_reader.readNoEof(&hash_b);
-
-                    if (!std.mem.eql(u8, &hash_a, &hash_b))
-                        return error.WrongChecksum;
-                },
-                else => return error.Unsupported,
-            }
-
-            self.block_count += 1;
-        }
-    };
-}