Merge pull request #14434 from FnControlOption/xz

Add xz decoder

closes #14300
closes #2851
This commit is contained in:
Andrew Kelley 2023-01-25 20:31:55 -05:00 committed by GitHub
commit 96a55f6ce8
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
32 changed files with 1230 additions and 16 deletions

View file

@ -122,6 +122,8 @@ pub fn build(b: *Builder) !void {
"compress-gettysburg.txt",
"compress-pi.txt",
"rfc1951.txt",
// exclude files from lib/std/compress/xz/testdata
".xz",
// exclude files from lib/std/tz/
".tzif",
// others

View file

@ -3,6 +3,7 @@ const std = @import("std.zig");
pub const deflate = @import("compress/deflate.zig");
pub const gzip = @import("compress/gzip.zig");
pub const zlib = @import("compress/zlib.zig");
pub const xz = @import("compress/xz.zig");
pub fn HashedReader(
comptime ReaderType: anytype,
@ -38,4 +39,5 @@ test {
_ = deflate;
_ = gzip;
_ = zlib;
_ = xz;
}

View file

@ -1,7 +1,7 @@
//
// Decompressor for GZIP data streams (RFC1952)
const std = @import("std");
const std = @import("../std.zig");
const io = std.io;
const fs = std.fs;
const testing = std.testing;
@ -17,10 +17,7 @@ const FCOMMENT = 1 << 4;
const max_string_len = 1024;
/// TODO: the fully qualified namespace to this declaration is
/// std.compress.gzip.GzipStream which has a redundant "gzip" in the name.
/// Instead, it should be `std.compress.gzip.Stream`.
pub fn GzipStream(comptime ReaderType: type) type {
pub fn Decompress(comptime ReaderType: type) type {
return struct {
const Self = @This();
@ -154,14 +151,14 @@ pub fn GzipStream(comptime ReaderType: type) type {
};
}
pub fn gzipStream(allocator: mem.Allocator, reader: anytype) !GzipStream(@TypeOf(reader)) {
return GzipStream(@TypeOf(reader)).init(allocator, reader);
pub fn decompress(allocator: mem.Allocator, reader: anytype) !Decompress(@TypeOf(reader)) {
return Decompress(@TypeOf(reader)).init(allocator, reader);
}
fn testReader(data: []const u8, comptime expected: []const u8) !void {
var in_stream = io.fixedBufferStream(data);
var gzip_stream = try gzipStream(testing.allocator, in_stream.reader());
var gzip_stream = try decompress(testing.allocator, in_stream.reader());
defer gzip_stream.deinit();
// Read and decompress the whole file

145
lib/std/compress/xz.zig Normal file
View file

@ -0,0 +1,145 @@
const std = @import("std");
const block = @import("xz/block.zig");
const Allocator = std.mem.Allocator;
const Crc32 = std.hash.Crc32;
pub const Check = enum(u4) {
none = 0x00,
crc32 = 0x01,
crc64 = 0x04,
sha256 = 0x0A,
_,
};
fn readStreamFlags(reader: anytype, check: *Check) !void {
var bit_reader = std.io.bitReader(.Little, reader);
const reserved1 = try bit_reader.readBitsNoEof(u8, 8);
if (reserved1 != 0)
return error.CorruptInput;
check.* = @intToEnum(Check, try bit_reader.readBitsNoEof(u4, 4));
const reserved2 = try bit_reader.readBitsNoEof(u4, 4);
if (reserved2 != 0)
return error.CorruptInput;
}
pub fn decompress(allocator: Allocator, reader: anytype) !Decompress(@TypeOf(reader)) {
return Decompress(@TypeOf(reader)).init(allocator, reader);
}
pub fn Decompress(comptime ReaderType: type) type {
return struct {
const Self = @This();
pub const Error = ReaderType.Error || block.Decoder(ReaderType).Error;
pub const Reader = std.io.Reader(*Self, Error, read);
allocator: Allocator,
block_decoder: block.Decoder(ReaderType),
in_reader: ReaderType,
fn init(allocator: Allocator, source: ReaderType) !Self {
const magic = try source.readBytesNoEof(6);
if (!std.mem.eql(u8, &magic, &.{ 0xFD, '7', 'z', 'X', 'Z', 0x00 }))
return error.BadHeader;
var check: Check = undefined;
const hash_a = blk: {
var hasher = std.compress.hashedReader(source, Crc32.init());
try readStreamFlags(hasher.reader(), &check);
break :blk hasher.hasher.final();
};
const hash_b = try source.readIntLittle(u32);
if (hash_a != hash_b)
return error.WrongChecksum;
return Self{
.allocator = allocator,
.block_decoder = try block.decoder(allocator, source, check),
.in_reader = source,
};
}
pub fn deinit(self: *Self) void {
self.block_decoder.deinit();
}
pub fn reader(self: *Self) Reader {
return .{ .context = self };
}
pub fn read(self: *Self, buffer: []u8) Error!usize {
if (buffer.len == 0)
return 0;
const r = try self.block_decoder.read(buffer);
if (r != 0)
return r;
const index_size = blk: {
var hasher = std.compress.hashedReader(self.in_reader, Crc32.init());
hasher.hasher.update(&[1]u8{0x00});
var counter = std.io.countingReader(hasher.reader());
counter.bytes_read += 1;
const counting_reader = counter.reader();
const record_count = try std.leb.readULEB128(u64, counting_reader);
if (record_count != self.block_decoder.block_count)
return error.CorruptInput;
var i: usize = 0;
while (i < record_count) : (i += 1) {
// TODO: validate records
_ = try std.leb.readULEB128(u64, counting_reader);
_ = try std.leb.readULEB128(u64, counting_reader);
}
while (counter.bytes_read % 4 != 0) {
if (try counting_reader.readByte() != 0)
return error.CorruptInput;
}
const hash_a = hasher.hasher.final();
const hash_b = try counting_reader.readIntLittle(u32);
if (hash_a != hash_b)
return error.WrongChecksum;
break :blk counter.bytes_read;
};
const hash_a = try self.in_reader.readIntLittle(u32);
const hash_b = blk: {
var hasher = std.compress.hashedReader(self.in_reader, Crc32.init());
const hashed_reader = hasher.reader();
const backward_size = (try hashed_reader.readIntLittle(u32) + 1) * 4;
if (backward_size != index_size)
return error.CorruptInput;
var check: Check = undefined;
try readStreamFlags(hashed_reader, &check);
break :blk hasher.hasher.final();
};
if (hash_a != hash_b)
return error.WrongChecksum;
const magic = try self.in_reader.readBytesNoEof(2);
if (!std.mem.eql(u8, &magic, &.{ 'Y', 'Z' }))
return error.CorruptInput;
return 0;
}
};
}
test {
_ = @import("xz/test.zig");
}

View file

@ -0,0 +1,317 @@
const std = @import("../../std.zig");
const lzma = @import("lzma.zig");
const Allocator = std.mem.Allocator;
const Crc32 = std.hash.Crc32;
const Crc64 = std.hash.crc.Crc64Xz;
const Sha256 = std.crypto.hash.sha2.Sha256;
const xz = std.compress.xz;
const DecodeError = error{
CorruptInput,
EndOfStream,
EndOfStreamWithNoError,
WrongChecksum,
Unsupported,
Overflow,
};
pub fn decoder(allocator: Allocator, reader: anytype, check: xz.Check) !Decoder(@TypeOf(reader)) {
return Decoder(@TypeOf(reader)).init(allocator, reader, check);
}
pub fn Decoder(comptime ReaderType: type) type {
return struct {
const Self = @This();
pub const Error =
ReaderType.Error ||
DecodeError ||
Allocator.Error;
pub const Reader = std.io.Reader(*Self, Error, read);
allocator: Allocator,
inner_reader: ReaderType,
check: xz.Check,
err: ?Error,
accum: lzma.LzAccumBuffer,
lzma_state: lzma.DecoderState,
block_count: usize,
fn init(allocator: Allocator, in_reader: ReaderType, check: xz.Check) !Self {
return Self{
.allocator = allocator,
.inner_reader = in_reader,
.check = check,
.err = null,
.accum = .{},
.lzma_state = try lzma.DecoderState.init(allocator),
.block_count = 0,
};
}
pub fn deinit(self: *Self) void {
self.accum.deinit(self.allocator);
self.lzma_state.deinit(self.allocator);
}
pub fn reader(self: *Self) Reader {
return .{ .context = self };
}
pub fn read(self: *Self, output: []u8) Error!usize {
while (true) {
if (self.accum.to_read.items.len > 0) {
const n = self.accum.read(output);
if (self.accum.to_read.items.len == 0 and self.err != null) {
if (self.err.? == DecodeError.EndOfStreamWithNoError) {
return n;
}
return self.err.?;
}
return n;
}
if (self.err != null) {
if (self.err.? == DecodeError.EndOfStreamWithNoError) {
return 0;
}
return self.err.?;
}
self.readBlock() catch |e| {
self.err = e;
if (self.accum.to_read.items.len == 0) {
try self.accum.reset(self.allocator);
}
};
}
}
fn readBlock(self: *Self) Error!void {
const unpacked_pos = self.accum.to_read.items.len;
var block_counter = std.io.countingReader(self.inner_reader);
const block_reader = block_counter.reader();
var packed_size: ?u64 = null;
var unpacked_size: ?u64 = null;
// Block Header
{
var header_hasher = std.compress.hashedReader(block_reader, Crc32.init());
const header_reader = header_hasher.reader();
const header_size = try header_reader.readByte() * 4;
if (header_size == 0)
return error.EndOfStreamWithNoError;
const Flags = packed struct(u8) {
last_filter_index: u2,
reserved: u4,
has_packed_size: bool,
has_unpacked_size: bool,
};
const flags = @bitCast(Flags, try header_reader.readByte());
const filter_count = @as(u3, flags.last_filter_index) + 1;
if (filter_count > 1)
return error.Unsupported;
if (flags.has_packed_size)
packed_size = try std.leb.readULEB128(u64, header_reader);
if (flags.has_unpacked_size)
unpacked_size = try std.leb.readULEB128(u64, header_reader);
const FilterId = enum(u64) {
lzma2 = 0x21,
_,
};
const filter_id = @intToEnum(
FilterId,
try std.leb.readULEB128(u64, header_reader),
);
if (@enumToInt(filter_id) >= 0x4000_0000_0000_0000)
return error.CorruptInput;
if (filter_id != .lzma2)
return error.Unsupported;
const properties_size = try std.leb.readULEB128(u64, header_reader);
if (properties_size != 1)
return error.CorruptInput;
// TODO: use filter properties
_ = try header_reader.readByte();
while (block_counter.bytes_read != header_size) {
if (try header_reader.readByte() != 0)
return error.CorruptInput;
}
const hash_a = header_hasher.hasher.final();
const hash_b = try header_reader.readIntLittle(u32);
if (hash_a != hash_b)
return error.WrongChecksum;
}
// Compressed Data
var packed_counter = std.io.countingReader(block_reader);
const packed_reader = packed_counter.reader();
while (try self.readLzma2Chunk(packed_reader)) {}
if (packed_size) |s| {
if (s != packed_counter.bytes_read)
return error.CorruptInput;
}
const unpacked_bytes = self.accum.to_read.items[unpacked_pos..];
if (unpacked_size) |s| {
if (s != unpacked_bytes.len)
return error.CorruptInput;
}
// Block Padding
while (block_counter.bytes_read % 4 != 0) {
if (try block_reader.readByte() != 0)
return error.CorruptInput;
}
switch (self.check) {
.none => {},
.crc32 => {
const hash_a = Crc32.hash(unpacked_bytes);
const hash_b = try self.inner_reader.readIntLittle(u32);
if (hash_a != hash_b)
return error.WrongChecksum;
},
.crc64 => {
const hash_a = Crc64.hash(unpacked_bytes);
const hash_b = try self.inner_reader.readIntLittle(u64);
if (hash_a != hash_b)
return error.WrongChecksum;
},
.sha256 => {
var hash_a: [Sha256.digest_length]u8 = undefined;
Sha256.hash(unpacked_bytes, &hash_a, .{});
var hash_b: [Sha256.digest_length]u8 = undefined;
try self.inner_reader.readNoEof(&hash_b);
if (!std.mem.eql(u8, &hash_a, &hash_b))
return error.WrongChecksum;
},
else => return error.Unsupported,
}
self.block_count += 1;
}
fn readLzma2Chunk(self: *Self, packed_reader: anytype) Error!bool {
const status = try packed_reader.readByte();
switch (status) {
0 => {
try self.accum.reset(self.allocator);
return false;
},
1, 2 => {
if (status == 1)
try self.accum.reset(self.allocator);
const size = try packed_reader.readIntBig(u16) + 1;
try self.accum.ensureUnusedCapacity(self.allocator, size);
var i: usize = 0;
while (i < size) : (i += 1)
self.accum.appendAssumeCapacity(try packed_reader.readByte());
return true;
},
else => {
if (status & 0x80 == 0)
return error.CorruptInput;
const Reset = struct {
dict: bool,
state: bool,
props: bool,
};
const reset = switch ((status >> 5) & 0x3) {
0 => Reset{
.dict = false,
.state = false,
.props = false,
},
1 => Reset{
.dict = false,
.state = true,
.props = false,
},
2 => Reset{
.dict = false,
.state = true,
.props = true,
},
3 => Reset{
.dict = true,
.state = true,
.props = true,
},
else => unreachable,
};
const unpacked_size = blk: {
var tmp: u64 = status & 0x1F;
tmp <<= 16;
tmp |= try packed_reader.readIntBig(u16);
break :blk tmp + 1;
};
const packed_size = blk: {
const tmp: u17 = try packed_reader.readIntBig(u16);
break :blk tmp + 1;
};
if (reset.dict)
try self.accum.reset(self.allocator);
if (reset.state) {
var new_props = self.lzma_state.lzma_props;
if (reset.props) {
var props = try packed_reader.readByte();
if (props >= 225)
return error.CorruptInput;
const lc = @intCast(u4, props % 9);
props /= 9;
const lp = @intCast(u3, props % 5);
props /= 5;
const pb = @intCast(u3, props);
if (lc + lp > 4)
return error.CorruptInput;
new_props = .{ .lc = lc, .lp = lp, .pb = pb };
}
try self.lzma_state.reset_state(self.allocator, new_props);
}
self.lzma_state.unpacked_size = unpacked_size + self.accum.len();
const buffer = try self.allocator.alloc(u8, packed_size);
defer self.allocator.free(buffer);
for (buffer) |*b|
b.* = try packed_reader.readByte();
var rangecoder = try lzma.RangeDecoder.init(buffer);
try self.lzma_state.process(self.allocator, &self.accum, &rangecoder);
return true;
},
}
}
};
}

View file

@ -0,0 +1,658 @@
// Ported from https://github.com/gendx/lzma-rs
const std = @import("../../std.zig");
const assert = std.debug.assert;
const Allocator = std.mem.Allocator;
const ArrayListUnmanaged = std.ArrayListUnmanaged;
const LzmaProperties = struct {
lc: u4,
lp: u3,
pb: u3,
fn validate(self: LzmaProperties) void {
assert(self.lc <= 8);
assert(self.lp <= 4);
assert(self.pb <= 4);
}
};
pub const DecoderState = struct {
lzma_props: LzmaProperties,
unpacked_size: ?u64,
literal_probs: Vec2D(u16),
pos_slot_decoder: [4]BitTree,
align_decoder: BitTree,
pos_decoders: [115]u16,
is_match: [192]u16,
is_rep: [12]u16,
is_rep_g0: [12]u16,
is_rep_g1: [12]u16,
is_rep_g2: [12]u16,
is_rep_0long: [192]u16,
state: usize,
rep: [4]usize,
len_decoder: LenDecoder,
rep_len_decoder: LenDecoder,
pub fn init(allocator: Allocator) !DecoderState {
return .{
.lzma_props = LzmaProperties{ .lc = 0, .lp = 0, .pb = 0 },
.unpacked_size = null,
.literal_probs = try Vec2D(u16).init(allocator, 0x400, 1, 0x300),
.pos_slot_decoder = .{
try BitTree.init(allocator, 6),
try BitTree.init(allocator, 6),
try BitTree.init(allocator, 6),
try BitTree.init(allocator, 6),
},
.align_decoder = try BitTree.init(allocator, 4),
.pos_decoders = .{0x400} ** 115,
.is_match = .{0x400} ** 192,
.is_rep = .{0x400} ** 12,
.is_rep_g0 = .{0x400} ** 12,
.is_rep_g1 = .{0x400} ** 12,
.is_rep_g2 = .{0x400} ** 12,
.is_rep_0long = .{0x400} ** 192,
.state = 0,
.rep = .{0} ** 4,
.len_decoder = try LenDecoder.init(allocator),
.rep_len_decoder = try LenDecoder.init(allocator),
};
}
pub fn deinit(self: *DecoderState, allocator: Allocator) void {
self.literal_probs.deinit(allocator);
for (self.pos_slot_decoder) |*t| t.deinit(allocator);
self.align_decoder.deinit(allocator);
self.len_decoder.deinit(allocator);
self.rep_len_decoder.deinit(allocator);
}
pub fn reset_state(self: *DecoderState, allocator: Allocator, new_props: LzmaProperties) !void {
new_props.validate();
if (self.lzma_props.lc + self.lzma_props.lp == new_props.lc + new_props.lp) {
self.literal_probs.fill(0x400);
} else {
self.literal_probs.deinit(allocator);
self.literal_probs = try Vec2D(u16).init(allocator, 0x400, @as(usize, 1) << (new_props.lc + new_props.lp), 0x300);
}
self.lzma_props = new_props;
for (self.pos_slot_decoder) |*t| t.reset();
self.align_decoder.reset();
self.pos_decoders = .{0x400} ** 115;
self.is_match = .{0x400} ** 192;
self.is_rep = .{0x400} ** 12;
self.is_rep_g0 = .{0x400} ** 12;
self.is_rep_g1 = .{0x400} ** 12;
self.is_rep_g2 = .{0x400} ** 12;
self.is_rep_0long = .{0x400} ** 192;
self.state = 0;
self.rep = .{0} ** 4;
self.len_decoder.reset();
self.rep_len_decoder.reset();
}
fn processNextInner(
self: *DecoderState,
allocator: Allocator,
output: *LzAccumBuffer,
rangecoder: *RangeDecoder,
update: bool,
) !ProcessingStatus {
const pos_state = output.len() & ((@as(usize, 1) << self.lzma_props.pb) - 1);
if (!try rangecoder.decodeBit(
&self.is_match[(self.state << 4) + pos_state],
update,
)) {
const byte: u8 = try self.decodeLiteral(output, rangecoder, update);
if (update) {
try output.appendLiteral(allocator, byte);
self.state = if (self.state < 4)
0
else if (self.state < 10)
self.state - 3
else
self.state - 6;
}
return .continue_;
}
var len: usize = undefined;
if (try rangecoder.decodeBit(&self.is_rep[self.state], update)) {
if (!try rangecoder.decodeBit(&self.is_rep_g0[self.state], update)) {
if (!try rangecoder.decodeBit(
&self.is_rep_0long[(self.state << 4) + pos_state],
update,
)) {
if (update) {
self.state = if (self.state < 7) 9 else 11;
const dist = self.rep[0] + 1;
try output.appendLz(allocator, 1, dist);
}
return .continue_;
}
} else {
const idx: usize = if (!try rangecoder.decodeBit(&self.is_rep_g1[self.state], update))
1
else if (!try rangecoder.decodeBit(&self.is_rep_g2[self.state], update))
2
else
3;
if (update) {
const dist = self.rep[idx];
var i = idx;
while (i > 0) : (i -= 1) {
self.rep[i] = self.rep[i - 1];
}
self.rep[0] = dist;
}
}
len = try self.rep_len_decoder.decode(rangecoder, pos_state, update);
if (update) {
self.state = if (self.state < 7) 8 else 11;
}
} else {
if (update) {
self.rep[3] = self.rep[2];
self.rep[2] = self.rep[1];
self.rep[1] = self.rep[0];
}
len = try self.len_decoder.decode(rangecoder, pos_state, update);
if (update) {
self.state = if (self.state < 7) 7 else 10;
}
const rep_0 = try self.decodeDistance(rangecoder, len, update);
if (update) {
self.rep[0] = rep_0;
if (self.rep[0] == 0xFFFF_FFFF) {
if (rangecoder.isFinished()) {
return .finished;
}
return error.CorruptInput;
}
}
}
if (update) {
len += 2;
const dist = self.rep[0] + 1;
try output.appendLz(allocator, len, dist);
}
return .continue_;
}
fn processNext(
self: *DecoderState,
allocator: Allocator,
output: *LzAccumBuffer,
rangecoder: *RangeDecoder,
) !ProcessingStatus {
return self.processNextInner(allocator, output, rangecoder, true);
}
pub fn process(
self: *DecoderState,
allocator: Allocator,
output: *LzAccumBuffer,
rangecoder: *RangeDecoder,
) !void {
while (true) {
if (self.unpacked_size) |unpacked_size| {
if (output.len() >= unpacked_size) {
break;
}
} else if (rangecoder.isFinished()) {
break;
}
if (try self.processNext(allocator, output, rangecoder) == .finished) {
break;
}
}
if (self.unpacked_size) |len| {
if (len != output.len()) {
return error.CorruptInput;
}
}
}
fn decodeLiteral(
self: *DecoderState,
output: *LzAccumBuffer,
rangecoder: *RangeDecoder,
update: bool,
) !u8 {
const def_prev_byte = 0;
const prev_byte = @as(usize, output.lastOr(def_prev_byte));
var result: usize = 1;
const lit_state = ((output.len() & ((@as(usize, 1) << self.lzma_props.lp) - 1)) << self.lzma_props.lc) +
(prev_byte >> (8 - self.lzma_props.lc));
const probs = try self.literal_probs.get(lit_state);
if (self.state >= 7) {
var match_byte = @as(usize, try output.lastN(self.rep[0] + 1));
while (result < 0x100) {
const match_bit = (match_byte >> 7) & 1;
match_byte <<= 1;
const bit = @boolToInt(try rangecoder.decodeBit(
&probs[((@as(usize, 1) + match_bit) << 8) + result],
update,
));
result = (result << 1) ^ bit;
if (match_bit != bit) {
break;
}
}
}
while (result < 0x100) {
result = (result << 1) ^ @boolToInt(try rangecoder.decodeBit(&probs[result], update));
}
return @truncate(u8, result - 0x100);
}
fn decodeDistance(
self: *DecoderState,
rangecoder: *RangeDecoder,
length: usize,
update: bool,
) !usize {
const len_state = if (length > 3) 3 else length;
const pos_slot = @as(usize, try self.pos_slot_decoder[len_state].parse(rangecoder, update));
if (pos_slot < 4)
return pos_slot;
const num_direct_bits = @intCast(u5, (pos_slot >> 1) - 1);
var result = (2 ^ (pos_slot & 1)) << num_direct_bits;
if (pos_slot < 14) {
result += try rangecoder.parseReverseBitTree(
num_direct_bits,
&self.pos_decoders,
result - pos_slot,
update,
);
} else {
result += @as(usize, try rangecoder.get(num_direct_bits - 4)) << 4;
result += try self.align_decoder.parseReverse(rangecoder, update);
}
return result;
}
};
const ProcessingStatus = enum {
continue_,
finished,
};
pub const LzAccumBuffer = struct {
to_read: ArrayListUnmanaged(u8) = .{},
buf: ArrayListUnmanaged(u8) = .{},
pub fn deinit(self: *LzAccumBuffer, allocator: Allocator) void {
self.to_read.deinit(allocator);
self.buf.deinit(allocator);
}
pub fn read(self: *LzAccumBuffer, output: []u8) usize {
const input = self.to_read.items;
const n = std.math.min(input.len, output.len);
std.mem.copy(u8, output[0..n], input[0..n]);
std.mem.copy(u8, input, input[n..]);
self.to_read.shrinkRetainingCapacity(input.len - n);
return n;
}
pub fn ensureUnusedCapacity(
self: *LzAccumBuffer,
allocator: Allocator,
additional_count: usize,
) !void {
try self.buf.ensureUnusedCapacity(allocator, additional_count);
}
pub fn appendAssumeCapacity(self: *LzAccumBuffer, byte: u8) void {
self.buf.appendAssumeCapacity(byte);
}
pub fn reset(self: *LzAccumBuffer, allocator: Allocator) !void {
try self.to_read.appendSlice(allocator, self.buf.items);
self.buf.clearRetainingCapacity();
}
pub fn len(self: *const LzAccumBuffer) usize {
return self.buf.items.len;
}
pub fn lastOr(self: *const LzAccumBuffer, lit: u8) u8 {
const buf_len = self.buf.items.len;
return if (buf_len == 0)
lit
else
self.buf.items[buf_len - 1];
}
pub fn lastN(self: *const LzAccumBuffer, dist: usize) !u8 {
const buf_len = self.buf.items.len;
if (dist > buf_len) {
return error.CorruptInput;
}
return self.buf.items[buf_len - dist];
}
pub fn appendLiteral(self: *LzAccumBuffer, allocator: Allocator, lit: u8) !void {
try self.buf.append(allocator, lit);
}
pub fn appendLz(self: *LzAccumBuffer, allocator: Allocator, length: usize, dist: usize) !void {
const buf_len = self.buf.items.len;
if (dist > buf_len) {
return error.CorruptInput;
}
var offset = buf_len - dist;
var i: usize = 0;
while (i < length) : (i += 1) {
const x = self.buf.items[offset];
try self.buf.append(allocator, x);
offset += 1;
}
}
};
pub const RangeDecoder = struct {
stream: std.io.FixedBufferStream([]const u8),
range: u32,
code: u32,
pub fn init(buffer: []const u8) !RangeDecoder {
var dec = RangeDecoder{
.stream = std.io.fixedBufferStream(buffer),
.range = 0xFFFF_FFFF,
.code = 0,
};
const reader = dec.stream.reader();
_ = try reader.readByte();
dec.code = try reader.readIntBig(u32);
return dec;
}
pub fn fromParts(
buffer: []const u8,
range: u32,
code: u32,
) RangeDecoder {
return .{
.stream = std.io.fixedBufferStream(buffer),
.range = range,
.code = code,
};
}
pub fn set(self: *RangeDecoder, range: u32, code: u32) void {
self.range = range;
self.code = code;
}
pub fn readInto(self: *RangeDecoder, dest: []u8) !usize {
return self.stream.read(dest);
}
pub inline fn isFinished(self: *const RangeDecoder) bool {
return self.code == 0 and self.isEof();
}
pub inline fn isEof(self: *const RangeDecoder) bool {
return self.stream.pos == self.stream.buffer.len;
}
inline fn normalize(self: *RangeDecoder) !void {
if (self.range < 0x0100_0000) {
self.range <<= 8;
self.code = (self.code << 8) ^ @as(u32, try self.stream.reader().readByte());
}
}
inline fn getBit(self: *RangeDecoder) !bool {
self.range >>= 1;
const bit = self.code >= self.range;
if (bit)
self.code -= self.range;
try self.normalize();
return bit;
}
fn get(self: *RangeDecoder, count: usize) !u32 {
var result: u32 = 0;
var i: usize = 0;
while (i < count) : (i += 1)
result = (result << 1) ^ @boolToInt(try self.getBit());
return result;
}
pub inline fn decodeBit(self: *RangeDecoder, prob: *u16, update: bool) !bool {
const bound = (self.range >> 11) * prob.*;
if (self.code < bound) {
if (update)
prob.* += (0x800 - prob.*) >> 5;
self.range = bound;
try self.normalize();
return false;
} else {
if (update)
prob.* -= prob.* >> 5;
self.code -= bound;
self.range -= bound;
try self.normalize();
return true;
}
}
fn parseBitTree(
self: *RangeDecoder,
num_bits: u5,
probs: []u16,
update: bool,
) !u32 {
var tmp: u32 = 1;
var i: u5 = 0;
while (i < num_bits) : (i += 1) {
const bit = try self.decodeBit(&probs[tmp], update);
tmp = (tmp << 1) ^ @boolToInt(bit);
}
return tmp - (@as(u32, 1) << num_bits);
}
pub fn parseReverseBitTree(
self: *RangeDecoder,
num_bits: u5,
probs: []u16,
offset: usize,
update: bool,
) !u32 {
var result: u32 = 0;
var tmp: usize = 1;
var i: u5 = 0;
while (i < num_bits) : (i += 1) {
const bit = @boolToInt(try self.decodeBit(&probs[offset + tmp], update));
tmp = (tmp << 1) ^ bit;
result ^= @as(u32, bit) << i;
}
return result;
}
};
fn Vec2D(comptime T: type) type {
return struct {
data: []T,
cols: usize,
const Self = @This();
pub fn init(allocator: Allocator, data: T, rows: usize, cols: usize) !Self {
const len = try std.math.mul(usize, rows, cols);
var vec2d = Self{
.data = try allocator.alloc(T, len),
.cols = cols,
};
vec2d.fill(data);
return vec2d;
}
pub fn deinit(self: *Self, allocator: Allocator) void {
allocator.free(self.data);
}
pub fn fill(self: *Self, value: T) void {
std.mem.set(T, self.data, value);
}
pub fn get(self: *Self, row: usize) ![]T {
const start_row = try std.math.mul(usize, row, self.cols);
return self.data[start_row .. start_row + self.cols];
}
};
}
const BitTree = struct {
num_bits: u5,
probs: ArrayListUnmanaged(u16),
pub fn init(allocator: Allocator, num_bits: u5) !BitTree {
var probs_len = @as(usize, 1) << num_bits;
var probs = try ArrayListUnmanaged(u16).initCapacity(allocator, probs_len);
while (probs_len > 0) : (probs_len -= 1)
probs.appendAssumeCapacity(0x400);
return .{ .num_bits = num_bits, .probs = probs };
}
pub fn deinit(self: *BitTree, allocator: Allocator) void {
self.probs.deinit(allocator);
}
pub fn parse(
self: *BitTree,
rangecoder: *RangeDecoder,
update: bool,
) !u32 {
return rangecoder.parseBitTree(self.num_bits, self.probs.items, update);
}
pub fn parseReverse(
self: *BitTree,
rangecoder: *RangeDecoder,
update: bool,
) !u32 {
return rangecoder.parseReverseBitTree(self.num_bits, self.probs.items, 0, update);
}
pub fn reset(self: *BitTree) void {
std.mem.set(u16, self.probs.items, 0x400);
}
};
const LenDecoder = struct {
choice: u16,
choice2: u16,
low_coder: [16]BitTree,
mid_coder: [16]BitTree,
high_coder: BitTree,
pub fn init(allocator: Allocator) !LenDecoder {
return .{
.choice = 0x400,
.choice2 = 0x400,
.low_coder = .{
try BitTree.init(allocator, 3),
try BitTree.init(allocator, 3),
try BitTree.init(allocator, 3),
try BitTree.init(allocator, 3),
try BitTree.init(allocator, 3),
try BitTree.init(allocator, 3),
try BitTree.init(allocator, 3),
try BitTree.init(allocator, 3),
try BitTree.init(allocator, 3),
try BitTree.init(allocator, 3),
try BitTree.init(allocator, 3),
try BitTree.init(allocator, 3),
try BitTree.init(allocator, 3),
try BitTree.init(allocator, 3),
try BitTree.init(allocator, 3),
try BitTree.init(allocator, 3),
},
.mid_coder = .{
try BitTree.init(allocator, 3),
try BitTree.init(allocator, 3),
try BitTree.init(allocator, 3),
try BitTree.init(allocator, 3),
try BitTree.init(allocator, 3),
try BitTree.init(allocator, 3),
try BitTree.init(allocator, 3),
try BitTree.init(allocator, 3),
try BitTree.init(allocator, 3),
try BitTree.init(allocator, 3),
try BitTree.init(allocator, 3),
try BitTree.init(allocator, 3),
try BitTree.init(allocator, 3),
try BitTree.init(allocator, 3),
try BitTree.init(allocator, 3),
try BitTree.init(allocator, 3),
},
.high_coder = try BitTree.init(allocator, 8),
};
}
pub fn deinit(self: *LenDecoder, allocator: Allocator) void {
for (self.low_coder) |*t| t.deinit(allocator);
for (self.mid_coder) |*t| t.deinit(allocator);
self.high_coder.deinit(allocator);
}
pub fn decode(
self: *LenDecoder,
rangecoder: *RangeDecoder,
pos_state: usize,
update: bool,
) !usize {
if (!try rangecoder.decodeBit(&self.choice, update)) {
return @as(usize, try self.low_coder[pos_state].parse(rangecoder, update));
} else if (!try rangecoder.decodeBit(&self.choice2, update)) {
return @as(usize, try self.mid_coder[pos_state].parse(rangecoder, update)) + 8;
} else {
return @as(usize, try self.high_coder.parse(rangecoder, update)) + 16;
}
}
pub fn reset(self: *LenDecoder) void {
self.choice = 0x400;
self.choice2 = 0x400;
for (self.low_coder) |*t| t.reset();
for (self.mid_coder) |*t| t.reset();
self.high_coder.reset();
}
};

View file

@ -0,0 +1,80 @@
const std = @import("../../std.zig");
const testing = std.testing;
const xz = std.compress.xz;
fn decompress(data: []const u8) ![]u8 {
var in_stream = std.io.fixedBufferStream(data);
var xz_stream = try xz.decompress(testing.allocator, in_stream.reader());
defer xz_stream.deinit();
return xz_stream.reader().readAllAlloc(testing.allocator, std.math.maxInt(usize));
}
fn testReader(data: []const u8, comptime expected: []const u8) !void {
const buf = try decompress(data);
defer testing.allocator.free(buf);
try testing.expectEqualSlices(u8, expected, buf);
}
test "compressed data" {
try testReader(@embedFile("testdata/good-0-empty.xz"), "");
inline for ([_][]const u8{
"good-1-check-none.xz",
"good-1-check-crc32.xz",
"good-1-check-crc64.xz",
"good-1-check-sha256.xz",
"good-2-lzma2.xz",
"good-1-block_header-1.xz",
"good-1-block_header-2.xz",
"good-1-block_header-3.xz",
}) |filename| {
try testReader(@embedFile("testdata/" ++ filename),
\\Hello
\\World!
\\
);
}
inline for ([_][]const u8{
"good-1-lzma2-1.xz",
"good-1-lzma2-2.xz",
"good-1-lzma2-3.xz",
"good-1-lzma2-4.xz",
}) |filename| {
try testReader(@embedFile("testdata/" ++ filename),
\\Lorem ipsum dolor sit amet, consectetur adipisicing
\\elit, sed do eiusmod tempor incididunt ut
\\labore et dolore magna aliqua. Ut enim
\\ad minim veniam, quis nostrud exercitation ullamco
\\laboris nisi ut aliquip ex ea commodo
\\consequat. Duis aute irure dolor in reprehenderit
\\in voluptate velit esse cillum dolore eu
\\fugiat nulla pariatur. Excepteur sint occaecat cupidatat
\\non proident, sunt in culpa qui officia
\\deserunt mollit anim id est laborum.
\\
);
}
try testReader(@embedFile("testdata/good-1-lzma2-5.xz"), "");
}
test "unsupported" {
inline for ([_][]const u8{
"good-1-delta-lzma2.tiff.xz",
"good-1-x86-lzma2.xz",
"good-1-sparc-lzma2.xz",
"good-1-arm64-lzma2-1.xz",
"good-1-arm64-lzma2-2.xz",
"good-1-3delta-lzma2.xz",
"good-1-empty-bcj-lzma2.xz",
}) |filename| {
try testing.expectError(
error.Unsupported,
decompress(@embedFile("testdata/" ++ filename)),
);
}
}

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

View file

@ -370,14 +370,11 @@ fn fetchAndUnpack(
if (mem.endsWith(u8, uri.path, ".tar.gz")) {
// I observed the gzip stream to read 1 byte at a time, so I am using a
// buffered reader on the front of it.
var br = std.io.bufferedReaderSize(std.crypto.tls.max_ciphertext_record_len, req.reader());
var gzip_stream = try std.compress.gzip.gzipStream(gpa, br.reader());
defer gzip_stream.deinit();
try std.tar.pipeToFileSystem(tmp_directory.handle, gzip_stream.reader(), .{
.strip_components = 1,
});
try unpackTarball(gpa, &req, tmp_directory.handle, std.compress.gzip);
} else if (mem.endsWith(u8, uri.path, ".tar.xz")) {
// I have not checked what buffer sizes the xz decompression implementation uses
// by default, so the same logic applies for buffering the reader as for gzip.
try unpackTarball(gpa, &req, tmp_directory.handle, std.compress.xz);
} else {
return reportError(
ini,
@ -430,6 +427,22 @@ fn fetchAndUnpack(
return createWithDir(gpa, fqn, global_cache_directory, pkg_dir_sub_path, build_zig_basename);
}
fn unpackTarball(
gpa: Allocator,
req: *std.http.Client.Request,
out_dir: fs.Dir,
comptime compression: type,
) !void {
var br = std.io.bufferedReaderSize(std.crypto.tls.max_ciphertext_record_len, req.reader());
var decompress = try compression.decompress(gpa, br.reader());
defer decompress.deinit();
try std.tar.pipeToFileSystem(out_dir, decompress.reader(), .{
.strip_components = 1,
});
}
fn reportError(
ini: std.Ini,
comp_directory: Compilation.Directory,