zig/lib/std/compress/xz.zig
Andrew Kelley ea9ded8758 std.compress.xz public API cleanup
* add xz to std.compress
 * prefer importing std.zig by file name, to reduce reliance on the
   standard library being a special case.
 * extract some types from inside generic functions. These types are the
   same regardless of the generic parameters.
 * expose some more types in the std.compress.xz namespace.
 * rename xz.stream to xz.decompress
 * rename check.Kind to Check
 * use std.leb for LEB instead of a redundant implementation
2023-01-24 15:24:19 -07:00

142 lines
4.3 KiB
Zig

const std = @import("std");
const block = @import("xz/block.zig");
const Allocator = std.mem.Allocator;
const Crc32 = std.hash.Crc32;
pub const Flags = packed struct(u16) {
reserved1: u8,
check_kind: Check,
reserved2: u4,
};
pub const Header = extern struct {
magic: [6]u8,
flags: Flags,
crc32: u32,
};
pub const Footer = extern struct {
crc32: u32,
backward_size: u32,
flags: Flags,
magic: [2]u8,
};
pub const Check = enum(u4) {
none = 0x00,
crc32 = 0x01,
crc64 = 0x04,
sha256 = 0x0A,
_,
};
pub fn decompress(allocator: Allocator, reader: anytype) !Decompress(@TypeOf(reader)) {
return Decompress(@TypeOf(reader)).init(allocator, reader);
}
pub fn Decompress(comptime ReaderType: type) type {
return struct {
const Self = @This();
pub const Error = ReaderType.Error || block.Decoder(ReaderType).Error;
pub const Reader = std.io.Reader(*Self, Error, read);
allocator: Allocator,
block_decoder: block.Decoder(ReaderType),
in_reader: ReaderType,
fn init(allocator: Allocator, source: ReaderType) !Self {
const header = try source.readStruct(Header);
if (!std.mem.eql(u8, &header.magic, &.{ 0xFD, '7', 'z', 'X', 'Z', 0x00 }))
return error.BadHeader;
if (header.flags.reserved1 != 0 or header.flags.reserved2 != 0)
return error.BadHeader;
const hash = Crc32.hash(std.mem.asBytes(&header.flags));
if (hash != header.crc32)
return error.WrongChecksum;
return Self{
.allocator = allocator,
.block_decoder = try block.decoder(allocator, source, header.flags.check_kind),
.in_reader = source,
};
}
pub fn deinit(self: *Self) void {
self.block_decoder.deinit();
}
pub fn reader(self: *Self) Reader {
return .{ .context = self };
}
pub fn read(self: *Self, buffer: []u8) Error!usize {
if (buffer.len == 0)
return 0;
const r = try self.block_decoder.read(buffer);
if (r != 0)
return r;
const index_size = blk: {
var hasher = std.compress.hashedReader(self.in_reader, Crc32.init());
hasher.hasher.update(&[1]u8{0x00});
var counter = std.io.countingReader(hasher.reader());
counter.bytes_read += 1;
const counting_reader = counter.reader();
const record_count = try std.leb.readULEB128(u64, counting_reader);
if (record_count != self.block_decoder.block_count)
return error.CorruptInput;
var i: usize = 0;
while (i < record_count) : (i += 1) {
// TODO: validate records
_ = try std.leb.readULEB128(u64, counting_reader);
_ = try std.leb.readULEB128(u64, counting_reader);
}
while (counter.bytes_read % 4 != 0) {
if (try counting_reader.readByte() != 0)
return error.CorruptInput;
}
const hash_a = hasher.hasher.final();
const hash_b = try counting_reader.readIntLittle(u32);
if (hash_a != hash_b)
return error.WrongChecksum;
break :blk counter.bytes_read;
};
const footer = try self.in_reader.readStruct(Footer);
const backward_size = (footer.backward_size + 1) * 4;
if (backward_size != index_size)
return error.CorruptInput;
if (footer.flags.reserved1 != 0 or footer.flags.reserved2 != 0)
return error.CorruptInput;
var hasher = Crc32.init();
hasher.update(std.mem.asBytes(&footer.backward_size));
hasher.update(std.mem.asBytes(&footer.flags));
const hash = hasher.final();
if (hash != footer.crc32)
return error.WrongChecksum;
if (!std.mem.eql(u8, &footer.magic, &.{ 'Y', 'Z' }))
return error.CorruptInput;
return 0;
}
};
}
test {
_ = @import("xz/test.zig");
}