std.compress.xz public API cleanup

* add xz to std.compress
 * prefer importing std.zig by file name, to reduce reliance on the
   standard library being a special case.
 * extract some types from inside generic functions. These types are the
   same regardless of the generic parameters.
 * expose some more types in the std.compress.xz namespace.
 * rename xz.stream to xz.decompress
 * rename check.Kind to Check
 * use std.leb for LEB instead of a redundant implementation
This commit is contained in:
Andrew Kelley 2023-01-24 15:04:56 -07:00
parent 06ce15e8f7
commit ea9ded8758
8 changed files with 157 additions and 186 deletions

View file

@ -3,6 +3,7 @@ const std = @import("std.zig");
pub const deflate = @import("compress/deflate.zig"); pub const deflate = @import("compress/deflate.zig");
pub const gzip = @import("compress/gzip.zig"); pub const gzip = @import("compress/gzip.zig");
pub const zlib = @import("compress/zlib.zig"); pub const zlib = @import("compress/zlib.zig");
pub const xz = @import("compress/xz.zig");
pub fn HashedReader( pub fn HashedReader(
comptime ReaderType: anytype, comptime ReaderType: anytype,
@ -38,4 +39,5 @@ test {
_ = deflate; _ = deflate;
_ = gzip; _ = gzip;
_ = zlib; _ = zlib;
_ = xz;
} }

View file

@ -1,5 +1,142 @@
pub usingnamespace @import("xz/stream.zig"); const std = @import("std");
const block = @import("xz/block.zig");
const Allocator = std.mem.Allocator;
const Crc32 = std.hash.Crc32;
pub const Flags = packed struct(u16) {
reserved1: u8,
check_kind: Check,
reserved2: u4,
};
pub const Header = extern struct {
magic: [6]u8,
flags: Flags,
crc32: u32,
};
pub const Footer = extern struct {
crc32: u32,
backward_size: u32,
flags: Flags,
magic: [2]u8,
};
pub const Check = enum(u4) {
none = 0x00,
crc32 = 0x01,
crc64 = 0x04,
sha256 = 0x0A,
_,
};
pub fn decompress(allocator: Allocator, reader: anytype) !Decompress(@TypeOf(reader)) {
return Decompress(@TypeOf(reader)).init(allocator, reader);
}
pub fn Decompress(comptime ReaderType: type) type {
return struct {
const Self = @This();
pub const Error = ReaderType.Error || block.Decoder(ReaderType).Error;
pub const Reader = std.io.Reader(*Self, Error, read);
allocator: Allocator,
block_decoder: block.Decoder(ReaderType),
in_reader: ReaderType,
fn init(allocator: Allocator, source: ReaderType) !Self {
const header = try source.readStruct(Header);
if (!std.mem.eql(u8, &header.magic, &.{ 0xFD, '7', 'z', 'X', 'Z', 0x00 }))
return error.BadHeader;
if (header.flags.reserved1 != 0 or header.flags.reserved2 != 0)
return error.BadHeader;
const hash = Crc32.hash(std.mem.asBytes(&header.flags));
if (hash != header.crc32)
return error.WrongChecksum;
return Self{
.allocator = allocator,
.block_decoder = try block.decoder(allocator, source, header.flags.check_kind),
.in_reader = source,
};
}
pub fn deinit(self: *Self) void {
self.block_decoder.deinit();
}
pub fn reader(self: *Self) Reader {
return .{ .context = self };
}
pub fn read(self: *Self, buffer: []u8) Error!usize {
if (buffer.len == 0)
return 0;
const r = try self.block_decoder.read(buffer);
if (r != 0)
return r;
const index_size = blk: {
var hasher = std.compress.hashedReader(self.in_reader, Crc32.init());
hasher.hasher.update(&[1]u8{0x00});
var counter = std.io.countingReader(hasher.reader());
counter.bytes_read += 1;
const counting_reader = counter.reader();
const record_count = try std.leb.readULEB128(u64, counting_reader);
if (record_count != self.block_decoder.block_count)
return error.CorruptInput;
var i: usize = 0;
while (i < record_count) : (i += 1) {
// TODO: validate records
_ = try std.leb.readULEB128(u64, counting_reader);
_ = try std.leb.readULEB128(u64, counting_reader);
}
while (counter.bytes_read % 4 != 0) {
if (try counting_reader.readByte() != 0)
return error.CorruptInput;
}
const hash_a = hasher.hasher.final();
const hash_b = try counting_reader.readIntLittle(u32);
if (hash_a != hash_b)
return error.WrongChecksum;
break :blk counter.bytes_read;
};
const footer = try self.in_reader.readStruct(Footer);
const backward_size = (footer.backward_size + 1) * 4;
if (backward_size != index_size)
return error.CorruptInput;
if (footer.flags.reserved1 != 0 or footer.flags.reserved2 != 0)
return error.CorruptInput;
var hasher = Crc32.init();
hasher.update(std.mem.asBytes(&footer.backward_size));
hasher.update(std.mem.asBytes(&footer.flags));
const hash = hasher.final();
if (hash != footer.crc32)
return error.WrongChecksum;
if (!std.mem.eql(u8, &footer.magic, &.{ 'Y', 'Z' }))
return error.CorruptInput;
return 0;
}
};
}
test { test {
_ = @import("xz/stream.zig"); _ = @import("xz/test.zig");
} }

View file

@ -1,11 +1,10 @@
const std = @import("std"); const std = @import("../../std.zig");
const check = @import("check.zig");
const lzma = @import("lzma.zig"); const lzma = @import("lzma.zig");
const multibyte = @import("multibyte.zig");
const Allocator = std.mem.Allocator; const Allocator = std.mem.Allocator;
const Crc32 = std.hash.Crc32; const Crc32 = std.hash.Crc32;
const Crc64 = std.hash.crc.Crc64Xz; const Crc64 = std.hash.crc.Crc64Xz;
const Sha256 = std.crypto.hash.sha2.Sha256; const Sha256 = std.crypto.hash.sha2.Sha256;
const xz = std.compress.xz;
const DecodeError = error{ const DecodeError = error{
CorruptInput, CorruptInput,
@ -16,8 +15,8 @@ const DecodeError = error{
Overflow, Overflow,
}; };
pub fn decoder(allocator: Allocator, reader: anytype, check_kind: check.Kind) !Decoder(@TypeOf(reader)) { pub fn decoder(allocator: Allocator, reader: anytype, check: xz.Check) !Decoder(@TypeOf(reader)) {
return Decoder(@TypeOf(reader)).init(allocator, reader, check_kind); return Decoder(@TypeOf(reader)).init(allocator, reader, check);
} }
pub fn Decoder(comptime ReaderType: type) type { pub fn Decoder(comptime ReaderType: type) type {
@ -31,17 +30,17 @@ pub fn Decoder(comptime ReaderType: type) type {
allocator: Allocator, allocator: Allocator,
inner_reader: ReaderType, inner_reader: ReaderType,
check_kind: check.Kind, check: xz.Check,
err: ?Error, err: ?Error,
accum: lzma.LzAccumBuffer, accum: lzma.LzAccumBuffer,
lzma_state: lzma.DecoderState, lzma_state: lzma.DecoderState,
block_count: usize, block_count: usize,
fn init(allocator: Allocator, in_reader: ReaderType, check_kind: check.Kind) !Self { fn init(allocator: Allocator, in_reader: ReaderType, check: xz.Check) !Self {
return Self{ return Self{
.allocator = allocator, .allocator = allocator,
.inner_reader = in_reader, .inner_reader = in_reader,
.check_kind = check_kind, .check = check,
.err = null, .err = null,
.accum = .{}, .accum = .{},
.lzma_state = try lzma.DecoderState.init(allocator), .lzma_state = try lzma.DecoderState.init(allocator),
@ -116,10 +115,10 @@ pub fn Decoder(comptime ReaderType: type) type {
return error.Unsupported; return error.Unsupported;
if (flags.has_packed_size) if (flags.has_packed_size)
packed_size = try multibyte.readInt(header_reader); packed_size = try std.leb.readULEB128(u64, header_reader);
if (flags.has_unpacked_size) if (flags.has_unpacked_size)
unpacked_size = try multibyte.readInt(header_reader); unpacked_size = try std.leb.readULEB128(u64, header_reader);
const FilterId = enum(u64) { const FilterId = enum(u64) {
lzma2 = 0x21, lzma2 = 0x21,
@ -128,7 +127,7 @@ pub fn Decoder(comptime ReaderType: type) type {
const filter_id = @intToEnum( const filter_id = @intToEnum(
FilterId, FilterId,
try multibyte.readInt(header_reader), try std.leb.readULEB128(u64, header_reader),
); );
if (@enumToInt(filter_id) >= 0x4000_0000_0000_0000) if (@enumToInt(filter_id) >= 0x4000_0000_0000_0000)
@ -137,7 +136,7 @@ pub fn Decoder(comptime ReaderType: type) type {
if (filter_id != .lzma2) if (filter_id != .lzma2)
return error.Unsupported; return error.Unsupported;
const properties_size = try multibyte.readInt(header_reader); const properties_size = try std.leb.readULEB128(u64, header_reader);
if (properties_size != 1) if (properties_size != 1)
return error.CorruptInput; return error.CorruptInput;
@ -177,8 +176,7 @@ pub fn Decoder(comptime ReaderType: type) type {
return error.CorruptInput; return error.CorruptInput;
} }
// Check switch (self.check) {
switch (self.check_kind) {
.none => {}, .none => {},
.crc32 => { .crc32 => {
const hash_a = Crc32.hash(unpacked_bytes); const hash_a = Crc32.hash(unpacked_bytes);

View file

@ -1,7 +0,0 @@
pub const Kind = enum(u4) {
none = 0x00,
crc32 = 0x01,
crc64 = 0x04,
sha256 = 0x0A,
_,
};

View file

@ -1,6 +1,6 @@
// Ported from https://github.com/gendx/lzma-rs // Ported from https://github.com/gendx/lzma-rs
const std = @import("std"); const std = @import("../../std.zig");
const assert = std.debug.assert; const assert = std.debug.assert;
const Allocator = std.mem.Allocator; const Allocator = std.mem.Allocator;
const ArrayListUnmanaged = std.ArrayListUnmanaged; const ArrayListUnmanaged = std.ArrayListUnmanaged;

View file

@ -1,23 +0,0 @@
const Multibyte = packed struct(u8) {
value: u7,
more: bool,
};
pub fn readInt(reader: anytype) !u64 {
const max_size = 9;
var chunk = try reader.readStruct(Multibyte);
var num: u64 = chunk.value;
var i: u6 = 0;
while (chunk.more) {
chunk = try reader.readStruct(Multibyte);
i += 1;
if (i >= max_size or @bitCast(u8, chunk) == 0x00)
return error.CorruptInput;
num |= @as(u64, chunk.value) << (i * 7);
}
return num;
}

View file

@ -1,136 +0,0 @@
const std = @import("std");
const block = @import("block.zig");
const check = @import("check.zig");
const multibyte = @import("multibyte.zig");
const Allocator = std.mem.Allocator;
const Crc32 = std.hash.Crc32;
test {
_ = @import("stream_test.zig");
}
const Flags = packed struct(u16) {
reserved1: u8,
check_kind: check.Kind,
reserved2: u4,
};
pub fn stream(allocator: Allocator, reader: anytype) !Stream(@TypeOf(reader)) {
return Stream(@TypeOf(reader)).init(allocator, reader);
}
pub fn Stream(comptime ReaderType: type) type {
return struct {
const Self = @This();
pub const Error = ReaderType.Error || block.Decoder(ReaderType).Error;
pub const Reader = std.io.Reader(*Self, Error, read);
allocator: Allocator,
block_decoder: block.Decoder(ReaderType),
in_reader: ReaderType,
fn init(allocator: Allocator, source: ReaderType) !Self {
const Header = extern struct {
magic: [6]u8,
flags: Flags,
crc32: u32,
};
const header = try source.readStruct(Header);
if (!std.mem.eql(u8, &header.magic, &.{ 0xFD, '7', 'z', 'X', 'Z', 0x00 }))
return error.BadHeader;
if (header.flags.reserved1 != 0 or header.flags.reserved2 != 0)
return error.BadHeader;
const hash = Crc32.hash(std.mem.asBytes(&header.flags));
if (hash != header.crc32)
return error.WrongChecksum;
return Self{
.allocator = allocator,
.block_decoder = try block.decoder(allocator, source, header.flags.check_kind),
.in_reader = source,
};
}
pub fn deinit(self: *Self) void {
self.block_decoder.deinit();
}
pub fn reader(self: *Self) Reader {
return .{ .context = self };
}
pub fn read(self: *Self, buffer: []u8) Error!usize {
if (buffer.len == 0)
return 0;
const r = try self.block_decoder.read(buffer);
if (r != 0)
return r;
const index_size = blk: {
var hasher = std.compress.hashedReader(self.in_reader, Crc32.init());
hasher.hasher.update(&[1]u8{0x00});
var counter = std.io.countingReader(hasher.reader());
counter.bytes_read += 1;
const counting_reader = counter.reader();
const record_count = try multibyte.readInt(counting_reader);
if (record_count != self.block_decoder.block_count)
return error.CorruptInput;
var i: usize = 0;
while (i < record_count) : (i += 1) {
// TODO: validate records
_ = try multibyte.readInt(counting_reader);
_ = try multibyte.readInt(counting_reader);
}
while (counter.bytes_read % 4 != 0) {
if (try counting_reader.readByte() != 0)
return error.CorruptInput;
}
const hash_a = hasher.hasher.final();
const hash_b = try counting_reader.readIntLittle(u32);
if (hash_a != hash_b)
return error.WrongChecksum;
break :blk counter.bytes_read;
};
const Footer = extern struct {
crc32: u32,
backward_size: u32,
flags: Flags,
magic: [2]u8,
};
const footer = try self.in_reader.readStruct(Footer);
const backward_size = (footer.backward_size + 1) * 4;
if (backward_size != index_size)
return error.CorruptInput;
if (footer.flags.reserved1 != 0 or footer.flags.reserved2 != 0)
return error.CorruptInput;
var hasher = Crc32.init();
hasher.update(std.mem.asBytes(&footer.backward_size));
hasher.update(std.mem.asBytes(&footer.flags));
const hash = hasher.final();
if (hash != footer.crc32)
return error.WrongChecksum;
if (!std.mem.eql(u8, &footer.magic, &.{ 'Y', 'Z' }))
return error.CorruptInput;
return 0;
}
};
}

View file

@ -1,11 +1,11 @@
const std = @import("std"); const std = @import("../../std.zig");
const testing = std.testing; const testing = std.testing;
const stream = @import("stream.zig").stream; const xz = std.compress.xz;
fn decompress(data: []const u8) ![]u8 { fn decompress(data: []const u8) ![]u8 {
var in_stream = std.io.fixedBufferStream(data); var in_stream = std.io.fixedBufferStream(data);
var xz_stream = try stream(testing.allocator, in_stream.reader()); var xz_stream = try xz.decompress(testing.allocator, in_stream.reader());
defer xz_stream.deinit(); defer xz_stream.deinit();
return xz_stream.reader().readAllAlloc(testing.allocator, std.math.maxInt(usize)); return xz_stream.reader().readAllAlloc(testing.allocator, std.math.maxInt(usize));