mirror of
https://codeberg.org/ziglang/zig.git
synced 2025-12-06 13:54:21 +00:00
Merge pull request #24559 from ziglang/zstd
std: rework zstd for new I/O API
This commit is contained in:
commit
66e49d93b7
20 changed files with 2188 additions and 3072 deletions
|
|
@ -373,11 +373,11 @@ pub fn discard(self: Self) anyerror!u64 {
|
|||
}
|
||||
|
||||
/// Helper for bridging to the new `Reader` API while upgrading.
|
||||
pub fn adaptToNewApi(self: *const Self) Adapter {
|
||||
pub fn adaptToNewApi(self: *const Self, buffer: []u8) Adapter {
|
||||
return .{
|
||||
.derp_reader = self.*,
|
||||
.new_interface = .{
|
||||
.buffer = &.{},
|
||||
.buffer = buffer,
|
||||
.vtable = &.{ .stream = Adapter.stream },
|
||||
.seek = 0,
|
||||
.end = 0,
|
||||
|
|
|
|||
|
|
@ -185,6 +185,32 @@ pub fn streamExact64(r: *Reader, w: *Writer, n: u64) StreamError!void {
|
|||
while (remaining != 0) remaining -= try r.stream(w, .limited64(remaining));
|
||||
}
|
||||
|
||||
/// "Pump" exactly `n` bytes from the reader to the writer.
|
||||
///
|
||||
/// When draining `w`, ensures that at least `preserve_len` bytes remain
|
||||
/// buffered.
|
||||
///
|
||||
/// Asserts `Writer.buffer` capacity exceeds `preserve_len`.
|
||||
pub fn streamExactPreserve(r: *Reader, w: *Writer, preserve_len: usize, n: usize) StreamError!void {
|
||||
if (w.end + n <= w.buffer.len) {
|
||||
@branchHint(.likely);
|
||||
return streamExact(r, w, n);
|
||||
}
|
||||
// If `n` is large, we can ignore `preserve_len` up to a point.
|
||||
var remaining = n;
|
||||
while (remaining > preserve_len) {
|
||||
assert(remaining != 0);
|
||||
remaining -= try r.stream(w, .limited(remaining - preserve_len));
|
||||
if (w.end + remaining <= w.buffer.len) return streamExact(r, w, remaining);
|
||||
}
|
||||
// All the next bytes received must be preserved.
|
||||
if (preserve_len < w.end) {
|
||||
@memmove(w.buffer[0..preserve_len], w.buffer[w.end - preserve_len ..][0..preserve_len]);
|
||||
w.end = preserve_len;
|
||||
}
|
||||
return streamExact(r, w, remaining);
|
||||
}
|
||||
|
||||
/// "Pump" data from the reader to the writer, handling `error.EndOfStream` as
|
||||
/// a success case.
|
||||
///
|
||||
|
|
@ -240,7 +266,7 @@ pub fn allocRemaining(r: *Reader, gpa: Allocator, limit: Limit) LimitedAllocErro
|
|||
/// such case, the next byte that would be read will be the first one to exceed
|
||||
/// `limit`, and all preceeding bytes have been appended to `list`.
|
||||
///
|
||||
/// Asserts `buffer` has nonzero capacity.
|
||||
/// If `limit` is not `Limit.unlimited`, asserts `buffer` has nonzero capacity.
|
||||
///
|
||||
/// See also:
|
||||
/// * `allocRemaining`
|
||||
|
|
@ -251,7 +277,7 @@ pub fn appendRemaining(
|
|||
list: *std.ArrayListAlignedUnmanaged(u8, alignment),
|
||||
limit: Limit,
|
||||
) LimitedAllocError!void {
|
||||
assert(r.buffer.len != 0); // Needed to detect limit exceeded without losing data.
|
||||
if (limit != .unlimited) assert(r.buffer.len != 0); // Needed to detect limit exceeded without losing data.
|
||||
const buffer_contents = r.buffer[r.seek..r.end];
|
||||
const copy_len = limit.minInt(buffer_contents.len);
|
||||
try list.appendSlice(gpa, r.buffer[0..copy_len]);
|
||||
|
|
|
|||
|
|
@ -256,10 +256,10 @@ test "fixed buffer flush" {
|
|||
try testing.expectEqual(10, buffer[0]);
|
||||
}
|
||||
|
||||
/// Calls `VTable.drain` but hides the last `preserve_length` bytes from the
|
||||
/// Calls `VTable.drain` but hides the last `preserve_len` bytes from the
|
||||
/// implementation, keeping them buffered.
|
||||
pub fn drainPreserve(w: *Writer, preserve_length: usize) Error!void {
|
||||
const temp_end = w.end -| preserve_length;
|
||||
pub fn drainPreserve(w: *Writer, preserve_len: usize) Error!void {
|
||||
const temp_end = w.end -| preserve_len;
|
||||
const preserved = w.buffer[temp_end..w.end];
|
||||
w.end = temp_end;
|
||||
defer w.end += preserved.len;
|
||||
|
|
@ -310,24 +310,38 @@ pub fn writableSliceGreedy(w: *Writer, minimum_length: usize) Error![]u8 {
|
|||
}
|
||||
|
||||
/// Asserts the provided buffer has total capacity enough for `minimum_length`
|
||||
/// and `preserve_length` combined.
|
||||
/// and `preserve_len` combined.
|
||||
///
|
||||
/// Does not `advance` the buffer end position.
|
||||
///
|
||||
/// When draining the buffer, ensures that at least `preserve_length` bytes
|
||||
/// When draining the buffer, ensures that at least `preserve_len` bytes
|
||||
/// remain buffered.
|
||||
///
|
||||
/// If `preserve_length` is zero, this is equivalent to `writableSliceGreedy`.
|
||||
pub fn writableSliceGreedyPreserve(w: *Writer, preserve_length: usize, minimum_length: usize) Error![]u8 {
|
||||
assert(w.buffer.len >= preserve_length + minimum_length);
|
||||
/// If `preserve_len` is zero, this is equivalent to `writableSliceGreedy`.
|
||||
pub fn writableSliceGreedyPreserve(w: *Writer, preserve_len: usize, minimum_length: usize) Error![]u8 {
|
||||
assert(w.buffer.len >= preserve_len + minimum_length);
|
||||
while (w.buffer.len - w.end < minimum_length) {
|
||||
try drainPreserve(w, preserve_length);
|
||||
try drainPreserve(w, preserve_len);
|
||||
} else {
|
||||
@branchHint(.likely);
|
||||
return w.buffer[w.end..];
|
||||
}
|
||||
}
|
||||
|
||||
/// Asserts the provided buffer has total capacity enough for `len`.
|
||||
///
|
||||
/// Advances the buffer end position by `len`.
|
||||
///
|
||||
/// When draining the buffer, ensures that at least `preserve_len` bytes
|
||||
/// remain buffered.
|
||||
///
|
||||
/// If `preserve_len` is zero, this is equivalent to `writableSlice`.
|
||||
pub fn writableSlicePreserve(w: *Writer, preserve_len: usize, len: usize) Error![]u8 {
|
||||
const big_slice = try w.writableSliceGreedyPreserve(preserve_len, len);
|
||||
advance(w, len);
|
||||
return big_slice[0..len];
|
||||
}
|
||||
|
||||
pub const WritableVectorIterator = struct {
|
||||
first: []u8,
|
||||
middle: []const []u8 = &.{},
|
||||
|
|
@ -523,16 +537,16 @@ pub fn write(w: *Writer, bytes: []const u8) Error!usize {
|
|||
return w.vtable.drain(w, &.{bytes}, 1);
|
||||
}
|
||||
|
||||
/// Asserts `buffer` capacity exceeds `preserve_length`.
|
||||
pub fn writePreserve(w: *Writer, preserve_length: usize, bytes: []const u8) Error!usize {
|
||||
assert(preserve_length <= w.buffer.len);
|
||||
/// Asserts `buffer` capacity exceeds `preserve_len`.
|
||||
pub fn writePreserve(w: *Writer, preserve_len: usize, bytes: []const u8) Error!usize {
|
||||
assert(preserve_len <= w.buffer.len);
|
||||
if (w.end + bytes.len <= w.buffer.len) {
|
||||
@branchHint(.likely);
|
||||
@memcpy(w.buffer[w.end..][0..bytes.len], bytes);
|
||||
w.end += bytes.len;
|
||||
return bytes.len;
|
||||
}
|
||||
const temp_end = w.end -| preserve_length;
|
||||
const temp_end = w.end -| preserve_len;
|
||||
const preserved = w.buffer[temp_end..w.end];
|
||||
w.end = temp_end;
|
||||
defer w.end += preserved.len;
|
||||
|
|
@ -552,13 +566,13 @@ pub fn writeAll(w: *Writer, bytes: []const u8) Error!void {
|
|||
/// Calls `drain` as many times as necessary such that all of `bytes` are
|
||||
/// transferred.
|
||||
///
|
||||
/// When draining the buffer, ensures that at least `preserve_length` bytes
|
||||
/// When draining the buffer, ensures that at least `preserve_len` bytes
|
||||
/// remain buffered.
|
||||
///
|
||||
/// Asserts `buffer` capacity exceeds `preserve_length`.
|
||||
pub fn writeAllPreserve(w: *Writer, preserve_length: usize, bytes: []const u8) Error!void {
|
||||
/// Asserts `buffer` capacity exceeds `preserve_len`.
|
||||
pub fn writeAllPreserve(w: *Writer, preserve_len: usize, bytes: []const u8) Error!void {
|
||||
var index: usize = 0;
|
||||
while (index < bytes.len) index += try w.writePreserve(preserve_length, bytes[index..]);
|
||||
while (index < bytes.len) index += try w.writePreserve(preserve_len, bytes[index..]);
|
||||
}
|
||||
|
||||
/// Renders fmt string with args, calling `writer` with slices of bytes.
|
||||
|
|
@ -761,11 +775,11 @@ pub fn writeByte(w: *Writer, byte: u8) Error!void {
|
|||
}
|
||||
}
|
||||
|
||||
/// When draining the buffer, ensures that at least `preserve_length` bytes
|
||||
/// When draining the buffer, ensures that at least `preserve_len` bytes
|
||||
/// remain buffered.
|
||||
pub fn writeBytePreserve(w: *Writer, preserve_length: usize, byte: u8) Error!void {
|
||||
pub fn writeBytePreserve(w: *Writer, preserve_len: usize, byte: u8) Error!void {
|
||||
while (w.buffer.len - w.end == 0) {
|
||||
try drainPreserve(w, preserve_length);
|
||||
try drainPreserve(w, preserve_len);
|
||||
} else {
|
||||
@branchHint(.likely);
|
||||
w.buffer[w.end] = byte;
|
||||
|
|
@ -788,10 +802,42 @@ test splatByteAll {
|
|||
try testing.expectEqualStrings("7" ** 45, aw.writer.buffered());
|
||||
}
|
||||
|
||||
pub fn splatBytePreserve(w: *Writer, preserve_len: usize, byte: u8, n: usize) Error!void {
|
||||
const new_end = w.end + n;
|
||||
if (new_end <= w.buffer.len) {
|
||||
@memset(w.buffer[w.end..][0..n], byte);
|
||||
w.end = new_end;
|
||||
return;
|
||||
}
|
||||
// If `n` is large, we can ignore `preserve_len` up to a point.
|
||||
var remaining = n;
|
||||
while (remaining > preserve_len) {
|
||||
assert(remaining != 0);
|
||||
remaining -= try splatByte(w, byte, remaining - preserve_len);
|
||||
if (w.end + remaining <= w.buffer.len) {
|
||||
@memset(w.buffer[w.end..][0..remaining], byte);
|
||||
w.end += remaining;
|
||||
return;
|
||||
}
|
||||
}
|
||||
// All the next bytes received must be preserved.
|
||||
if (preserve_len < w.end) {
|
||||
@memmove(w.buffer[0..preserve_len], w.buffer[w.end - preserve_len ..][0..preserve_len]);
|
||||
w.end = preserve_len;
|
||||
}
|
||||
while (remaining > 0) remaining -= try w.splatByte(byte, remaining);
|
||||
}
|
||||
|
||||
/// Writes the same byte many times, allowing short writes.
|
||||
///
|
||||
/// Does maximum of one underlying `VTable.drain`.
|
||||
pub fn splatByte(w: *Writer, byte: u8, n: usize) Error!usize {
|
||||
if (w.end + n <= w.buffer.len) {
|
||||
@branchHint(.likely);
|
||||
@memset(w.buffer[w.end..][0..n], byte);
|
||||
w.end += n;
|
||||
return n;
|
||||
}
|
||||
return writeSplat(w, &.{&.{byte}}, n);
|
||||
}
|
||||
|
||||
|
|
@ -801,9 +847,10 @@ pub fn splatBytesAll(w: *Writer, bytes: []const u8, splat: usize) Error!void {
|
|||
var remaining_bytes: usize = bytes.len * splat;
|
||||
remaining_bytes -= try w.splatBytes(bytes, splat);
|
||||
while (remaining_bytes > 0) {
|
||||
const leftover = remaining_bytes % bytes.len;
|
||||
const buffers: [2][]const u8 = .{ bytes[bytes.len - leftover ..], bytes };
|
||||
remaining_bytes -= try w.writeSplat(&buffers, splat);
|
||||
const leftover_splat = remaining_bytes / bytes.len;
|
||||
const leftover_bytes = remaining_bytes % bytes.len;
|
||||
const buffers: [2][]const u8 = .{ bytes[bytes.len - leftover_bytes ..], bytes };
|
||||
remaining_bytes -= try w.writeSplat(&buffers, leftover_splat);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -1,75 +1,19 @@
|
|||
//! Compression algorithms.
|
||||
|
||||
const std = @import("std.zig");
|
||||
|
||||
pub const flate = @import("compress/flate.zig");
|
||||
pub const gzip = @import("compress/gzip.zig");
|
||||
pub const zlib = @import("compress/zlib.zig");
|
||||
pub const lzma = @import("compress/lzma.zig");
|
||||
pub const lzma2 = @import("compress/lzma2.zig");
|
||||
pub const xz = @import("compress/xz.zig");
|
||||
pub const zstd = @import("compress/zstandard.zig");
|
||||
|
||||
pub fn HashedReader(ReaderType: type, HasherType: type) type {
|
||||
return struct {
|
||||
child_reader: ReaderType,
|
||||
hasher: HasherType,
|
||||
|
||||
pub const Error = ReaderType.Error;
|
||||
pub const Reader = std.io.GenericReader(*@This(), Error, read);
|
||||
|
||||
pub fn read(self: *@This(), buf: []u8) Error!usize {
|
||||
const amt = try self.child_reader.read(buf);
|
||||
self.hasher.update(buf[0..amt]);
|
||||
return amt;
|
||||
}
|
||||
|
||||
pub fn reader(self: *@This()) Reader {
|
||||
return .{ .context = self };
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
pub fn hashedReader(
|
||||
reader: anytype,
|
||||
hasher: anytype,
|
||||
) HashedReader(@TypeOf(reader), @TypeOf(hasher)) {
|
||||
return .{ .child_reader = reader, .hasher = hasher };
|
||||
}
|
||||
|
||||
pub fn HashedWriter(WriterType: type, HasherType: type) type {
|
||||
return struct {
|
||||
child_writer: WriterType,
|
||||
hasher: HasherType,
|
||||
|
||||
pub const Error = WriterType.Error;
|
||||
pub const Writer = std.io.GenericWriter(*@This(), Error, write);
|
||||
|
||||
pub fn write(self: *@This(), buf: []const u8) Error!usize {
|
||||
const amt = try self.child_writer.write(buf);
|
||||
self.hasher.update(buf[0..amt]);
|
||||
return amt;
|
||||
}
|
||||
|
||||
pub fn writer(self: *@This()) Writer {
|
||||
return .{ .context = self };
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
pub fn hashedWriter(
|
||||
writer: anytype,
|
||||
hasher: anytype,
|
||||
) HashedWriter(@TypeOf(writer), @TypeOf(hasher)) {
|
||||
return .{ .child_writer = writer, .hasher = hasher };
|
||||
}
|
||||
pub const zstd = @import("compress/zstd.zig");
|
||||
|
||||
test {
|
||||
_ = flate;
|
||||
_ = lzma;
|
||||
_ = lzma2;
|
||||
_ = xz;
|
||||
_ = zstd;
|
||||
_ = flate;
|
||||
_ = gzip;
|
||||
_ = zlib;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -47,7 +47,7 @@ pub fn Decompress(comptime ReaderType: type) type {
|
|||
|
||||
var check: Check = undefined;
|
||||
const hash_a = blk: {
|
||||
var hasher = std.compress.hashedReader(source, Crc32.init());
|
||||
var hasher = hashedReader(source, Crc32.init());
|
||||
try readStreamFlags(hasher.reader(), &check);
|
||||
break :blk hasher.hasher.final();
|
||||
};
|
||||
|
|
@ -80,7 +80,7 @@ pub fn Decompress(comptime ReaderType: type) type {
|
|||
return r;
|
||||
|
||||
const index_size = blk: {
|
||||
var hasher = std.compress.hashedReader(self.in_reader, Crc32.init());
|
||||
var hasher = hashedReader(self.in_reader, Crc32.init());
|
||||
hasher.hasher.update(&[1]u8{0x00});
|
||||
|
||||
var counter = std.io.countingReader(hasher.reader());
|
||||
|
|
@ -115,7 +115,7 @@ pub fn Decompress(comptime ReaderType: type) type {
|
|||
const hash_a = try self.in_reader.readInt(u32, .little);
|
||||
|
||||
const hash_b = blk: {
|
||||
var hasher = std.compress.hashedReader(self.in_reader, Crc32.init());
|
||||
var hasher = hashedReader(self.in_reader, Crc32.init());
|
||||
const hashed_reader = hasher.reader();
|
||||
|
||||
const backward_size = (@as(u64, try hashed_reader.readInt(u32, .little)) + 1) * 4;
|
||||
|
|
@ -140,6 +140,33 @@ pub fn Decompress(comptime ReaderType: type) type {
|
|||
};
|
||||
}
|
||||
|
||||
pub fn HashedReader(ReaderType: type, HasherType: type) type {
|
||||
return struct {
|
||||
child_reader: ReaderType,
|
||||
hasher: HasherType,
|
||||
|
||||
pub const Error = ReaderType.Error;
|
||||
pub const Reader = std.io.GenericReader(*@This(), Error, read);
|
||||
|
||||
pub fn read(self: *@This(), buf: []u8) Error!usize {
|
||||
const amt = try self.child_reader.read(buf);
|
||||
self.hasher.update(buf[0..amt]);
|
||||
return amt;
|
||||
}
|
||||
|
||||
pub fn reader(self: *@This()) Reader {
|
||||
return .{ .context = self };
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
pub fn hashedReader(
|
||||
reader: anytype,
|
||||
hasher: anytype,
|
||||
) HashedReader(@TypeOf(reader), @TypeOf(hasher)) {
|
||||
return .{ .child_reader = reader, .hasher = hasher };
|
||||
}
|
||||
|
||||
test {
|
||||
_ = @import("xz/test.zig");
|
||||
}
|
||||
|
|
|
|||
|
|
@ -91,7 +91,7 @@ pub fn Decoder(comptime ReaderType: type) type {
|
|||
|
||||
// Block Header
|
||||
{
|
||||
var header_hasher = std.compress.hashedReader(block_reader, Crc32.init());
|
||||
var header_hasher = xz.hashedReader(block_reader, Crc32.init());
|
||||
const header_reader = header_hasher.reader();
|
||||
|
||||
const header_size = @as(u64, try header_reader.readByte()) * 4;
|
||||
|
|
|
|||
|
|
@ -1,310 +0,0 @@
|
|||
const std = @import("std");
|
||||
const RingBuffer = std.RingBuffer;
|
||||
|
||||
const types = @import("zstandard/types.zig");
|
||||
pub const frame = types.frame;
|
||||
pub const compressed_block = types.compressed_block;
|
||||
|
||||
pub const decompress = @import("zstandard/decompress.zig");
|
||||
|
||||
pub const DecompressorOptions = struct {
|
||||
verify_checksum: bool = true,
|
||||
window_buffer: []u8,
|
||||
|
||||
/// Recommended amount by the standard. Lower than this may result
|
||||
/// in inability to decompress common streams.
|
||||
pub const default_window_buffer_len = 8 * 1024 * 1024;
|
||||
};
|
||||
|
||||
pub fn Decompressor(comptime ReaderType: type) type {
|
||||
return struct {
|
||||
const Self = @This();
|
||||
|
||||
const table_size_max = types.compressed_block.table_size_max;
|
||||
|
||||
source: std.io.CountingReader(ReaderType),
|
||||
state: enum { NewFrame, InFrame, LastBlock },
|
||||
decode_state: decompress.block.DecodeState,
|
||||
frame_context: decompress.FrameContext,
|
||||
buffer: WindowBuffer,
|
||||
literal_fse_buffer: [table_size_max.literal]types.compressed_block.Table.Fse,
|
||||
match_fse_buffer: [table_size_max.match]types.compressed_block.Table.Fse,
|
||||
offset_fse_buffer: [table_size_max.offset]types.compressed_block.Table.Fse,
|
||||
literals_buffer: [types.block_size_max]u8,
|
||||
sequence_buffer: [types.block_size_max]u8,
|
||||
verify_checksum: bool,
|
||||
checksum: ?u32,
|
||||
current_frame_decompressed_size: usize,
|
||||
|
||||
const WindowBuffer = struct {
|
||||
data: []u8 = undefined,
|
||||
read_index: usize = 0,
|
||||
write_index: usize = 0,
|
||||
};
|
||||
|
||||
pub const Error = ReaderType.Error || error{
|
||||
ChecksumFailure,
|
||||
DictionaryIdFlagUnsupported,
|
||||
MalformedBlock,
|
||||
MalformedFrame,
|
||||
OutOfMemory,
|
||||
};
|
||||
|
||||
pub const Reader = std.io.GenericReader(*Self, Error, read);
|
||||
|
||||
pub fn init(source: ReaderType, options: DecompressorOptions) Self {
|
||||
return .{
|
||||
.source = std.io.countingReader(source),
|
||||
.state = .NewFrame,
|
||||
.decode_state = undefined,
|
||||
.frame_context = undefined,
|
||||
.buffer = .{ .data = options.window_buffer },
|
||||
.literal_fse_buffer = undefined,
|
||||
.match_fse_buffer = undefined,
|
||||
.offset_fse_buffer = undefined,
|
||||
.literals_buffer = undefined,
|
||||
.sequence_buffer = undefined,
|
||||
.verify_checksum = options.verify_checksum,
|
||||
.checksum = undefined,
|
||||
.current_frame_decompressed_size = undefined,
|
||||
};
|
||||
}
|
||||
|
||||
fn frameInit(self: *Self) !void {
|
||||
const source_reader = self.source.reader();
|
||||
switch (try decompress.decodeFrameHeader(source_reader)) {
|
||||
.skippable => |header| {
|
||||
try source_reader.skipBytes(header.frame_size, .{});
|
||||
self.state = .NewFrame;
|
||||
},
|
||||
.zstandard => |header| {
|
||||
const frame_context = try decompress.FrameContext.init(
|
||||
header,
|
||||
self.buffer.data.len,
|
||||
self.verify_checksum,
|
||||
);
|
||||
|
||||
const decode_state = decompress.block.DecodeState.init(
|
||||
&self.literal_fse_buffer,
|
||||
&self.match_fse_buffer,
|
||||
&self.offset_fse_buffer,
|
||||
);
|
||||
|
||||
self.decode_state = decode_state;
|
||||
self.frame_context = frame_context;
|
||||
|
||||
self.checksum = null;
|
||||
self.current_frame_decompressed_size = 0;
|
||||
|
||||
self.state = .InFrame;
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
pub fn reader(self: *Self) Reader {
|
||||
return .{ .context = self };
|
||||
}
|
||||
|
||||
pub fn read(self: *Self, buffer: []u8) Error!usize {
|
||||
if (buffer.len == 0) return 0;
|
||||
|
||||
var size: usize = 0;
|
||||
while (size == 0) {
|
||||
while (self.state == .NewFrame) {
|
||||
const initial_count = self.source.bytes_read;
|
||||
self.frameInit() catch |err| switch (err) {
|
||||
error.DictionaryIdFlagUnsupported => return error.DictionaryIdFlagUnsupported,
|
||||
error.EndOfStream => return if (self.source.bytes_read == initial_count)
|
||||
0
|
||||
else
|
||||
error.MalformedFrame,
|
||||
else => return error.MalformedFrame,
|
||||
};
|
||||
}
|
||||
size = try self.readInner(buffer);
|
||||
}
|
||||
return size;
|
||||
}
|
||||
|
||||
fn readInner(self: *Self, buffer: []u8) Error!usize {
|
||||
std.debug.assert(self.state != .NewFrame);
|
||||
|
||||
var ring_buffer = RingBuffer{
|
||||
.data = self.buffer.data,
|
||||
.read_index = self.buffer.read_index,
|
||||
.write_index = self.buffer.write_index,
|
||||
};
|
||||
defer {
|
||||
self.buffer.read_index = ring_buffer.read_index;
|
||||
self.buffer.write_index = ring_buffer.write_index;
|
||||
}
|
||||
|
||||
const source_reader = self.source.reader();
|
||||
while (ring_buffer.isEmpty() and self.state != .LastBlock) {
|
||||
const header_bytes = source_reader.readBytesNoEof(3) catch
|
||||
return error.MalformedFrame;
|
||||
const block_header = decompress.block.decodeBlockHeader(&header_bytes);
|
||||
|
||||
decompress.block.decodeBlockReader(
|
||||
&ring_buffer,
|
||||
source_reader,
|
||||
block_header,
|
||||
&self.decode_state,
|
||||
self.frame_context.block_size_max,
|
||||
&self.literals_buffer,
|
||||
&self.sequence_buffer,
|
||||
) catch
|
||||
return error.MalformedBlock;
|
||||
|
||||
if (self.frame_context.content_size) |size| {
|
||||
if (self.current_frame_decompressed_size > size) return error.MalformedFrame;
|
||||
}
|
||||
|
||||
const size = ring_buffer.len();
|
||||
self.current_frame_decompressed_size += size;
|
||||
|
||||
if (self.frame_context.hasher_opt) |*hasher| {
|
||||
if (size > 0) {
|
||||
const written_slice = ring_buffer.sliceLast(size);
|
||||
hasher.update(written_slice.first);
|
||||
hasher.update(written_slice.second);
|
||||
}
|
||||
}
|
||||
if (block_header.last_block) {
|
||||
self.state = .LastBlock;
|
||||
if (self.frame_context.has_checksum) {
|
||||
const checksum = source_reader.readInt(u32, .little) catch
|
||||
return error.MalformedFrame;
|
||||
if (self.verify_checksum) {
|
||||
if (self.frame_context.hasher_opt) |*hasher| {
|
||||
if (checksum != decompress.computeChecksum(hasher))
|
||||
return error.ChecksumFailure;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (self.frame_context.content_size) |content_size| {
|
||||
if (content_size != self.current_frame_decompressed_size) {
|
||||
return error.MalformedFrame;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const size = @min(ring_buffer.len(), buffer.len);
|
||||
if (size > 0) {
|
||||
ring_buffer.readFirstAssumeLength(buffer, size);
|
||||
}
|
||||
if (self.state == .LastBlock and ring_buffer.len() == 0) {
|
||||
self.state = .NewFrame;
|
||||
}
|
||||
return size;
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
pub fn decompressor(reader: anytype, options: DecompressorOptions) Decompressor(@TypeOf(reader)) {
|
||||
return Decompressor(@TypeOf(reader)).init(reader, options);
|
||||
}
|
||||
|
||||
fn testDecompress(data: []const u8) ![]u8 {
|
||||
const window_buffer = try std.testing.allocator.alloc(u8, 1 << 23);
|
||||
defer std.testing.allocator.free(window_buffer);
|
||||
|
||||
var in_stream = std.io.fixedBufferStream(data);
|
||||
var zstd_stream = decompressor(in_stream.reader(), .{ .window_buffer = window_buffer });
|
||||
const result = zstd_stream.reader().readAllAlloc(std.testing.allocator, std.math.maxInt(usize));
|
||||
return result;
|
||||
}
|
||||
|
||||
fn testReader(data: []const u8, comptime expected: []const u8) !void {
|
||||
const buf = try testDecompress(data);
|
||||
defer std.testing.allocator.free(buf);
|
||||
try std.testing.expectEqualSlices(u8, expected, buf);
|
||||
}
|
||||
|
||||
test "decompression" {
|
||||
const uncompressed = @embedFile("testdata/rfc8478.txt");
|
||||
const compressed3 = @embedFile("testdata/rfc8478.txt.zst.3");
|
||||
const compressed19 = @embedFile("testdata/rfc8478.txt.zst.19");
|
||||
|
||||
const buffer = try std.testing.allocator.alloc(u8, uncompressed.len);
|
||||
defer std.testing.allocator.free(buffer);
|
||||
|
||||
const res3 = try decompress.decode(buffer, compressed3, true);
|
||||
try std.testing.expectEqual(uncompressed.len, res3);
|
||||
try std.testing.expectEqualSlices(u8, uncompressed, buffer);
|
||||
|
||||
@memset(buffer, undefined);
|
||||
const res19 = try decompress.decode(buffer, compressed19, true);
|
||||
try std.testing.expectEqual(uncompressed.len, res19);
|
||||
try std.testing.expectEqualSlices(u8, uncompressed, buffer);
|
||||
|
||||
try testReader(compressed3, uncompressed);
|
||||
try testReader(compressed19, uncompressed);
|
||||
}
|
||||
|
||||
fn expectEqualDecoded(expected: []const u8, input: []const u8) !void {
|
||||
{
|
||||
const result = try decompress.decodeAlloc(std.testing.allocator, input, false, 1 << 23);
|
||||
defer std.testing.allocator.free(result);
|
||||
try std.testing.expectEqualStrings(expected, result);
|
||||
}
|
||||
|
||||
{
|
||||
var buffer = try std.testing.allocator.alloc(u8, 2 * expected.len);
|
||||
defer std.testing.allocator.free(buffer);
|
||||
|
||||
const size = try decompress.decode(buffer, input, false);
|
||||
try std.testing.expectEqualStrings(expected, buffer[0..size]);
|
||||
}
|
||||
}
|
||||
|
||||
fn expectEqualDecodedStreaming(expected: []const u8, input: []const u8) !void {
|
||||
const window_buffer = try std.testing.allocator.alloc(u8, 1 << 23);
|
||||
defer std.testing.allocator.free(window_buffer);
|
||||
|
||||
var in_stream = std.io.fixedBufferStream(input);
|
||||
var stream = decompressor(in_stream.reader(), .{ .window_buffer = window_buffer });
|
||||
|
||||
const result = try stream.reader().readAllAlloc(std.testing.allocator, std.math.maxInt(usize));
|
||||
defer std.testing.allocator.free(result);
|
||||
|
||||
try std.testing.expectEqualStrings(expected, result);
|
||||
}
|
||||
|
||||
test "zero sized block" {
|
||||
const input_raw =
|
||||
"\x28\xb5\x2f\xfd" ++ // zstandard frame magic number
|
||||
"\x20\x00" ++ // frame header: only single_segment_flag set, frame_content_size zero
|
||||
"\x01\x00\x00"; // block header with: last_block set, block_type raw, block_size zero
|
||||
|
||||
const input_rle =
|
||||
"\x28\xb5\x2f\xfd" ++ // zstandard frame magic number
|
||||
"\x20\x00" ++ // frame header: only single_segment_flag set, frame_content_size zero
|
||||
"\x03\x00\x00" ++ // block header with: last_block set, block_type rle, block_size zero
|
||||
"\xaa"; // block_content
|
||||
|
||||
try expectEqualDecoded("", input_raw);
|
||||
try expectEqualDecoded("", input_rle);
|
||||
try expectEqualDecodedStreaming("", input_raw);
|
||||
try expectEqualDecodedStreaming("", input_rle);
|
||||
}
|
||||
|
||||
test "declared raw literals size too large" {
|
||||
const input_raw =
|
||||
"\x28\xb5\x2f\xfd" ++ // zstandard frame magic number
|
||||
"\x00\x00" ++ // frame header: everything unset, window descriptor zero
|
||||
"\x95\x00\x00" ++ // block header with: last_block set, block_type compressed, block_size 18
|
||||
"\xbc\xf3\xae" ++ // literals section header with: type raw, size_format 3, regenerated_size 716603
|
||||
"\xa5\x9f\xe3"; // some bytes of literal content - the content is shorter than regenerated_size
|
||||
|
||||
// Note that the regenerated_size in the above input is larger than block maximum size, so the
|
||||
// block can't be valid as it is a raw literals block.
|
||||
|
||||
var fbs = std.io.fixedBufferStream(input_raw);
|
||||
var window: [1024]u8 = undefined;
|
||||
var stream = decompressor(fbs.reader(), .{ .window_buffer = &window });
|
||||
|
||||
var buf: [1024]u8 = undefined;
|
||||
try std.testing.expectError(error.MalformedBlock, stream.read(&buf));
|
||||
}
|
||||
File diff suppressed because it is too large
Load diff
|
|
@ -1,153 +0,0 @@
|
|||
const std = @import("std");
|
||||
const assert = std.debug.assert;
|
||||
|
||||
const types = @import("../types.zig");
|
||||
const Table = types.compressed_block.Table;
|
||||
|
||||
pub fn decodeFseTable(
|
||||
bit_reader: anytype,
|
||||
expected_symbol_count: usize,
|
||||
max_accuracy_log: u4,
|
||||
entries: []Table.Fse,
|
||||
) !usize {
|
||||
const accuracy_log_biased = try bit_reader.readBitsNoEof(u4, 4);
|
||||
if (accuracy_log_biased > max_accuracy_log -| 5) return error.MalformedAccuracyLog;
|
||||
const accuracy_log = accuracy_log_biased + 5;
|
||||
|
||||
var values: [256]u16 = undefined;
|
||||
var value_count: usize = 0;
|
||||
|
||||
const total_probability = @as(u16, 1) << accuracy_log;
|
||||
var accumulated_probability: u16 = 0;
|
||||
|
||||
while (accumulated_probability < total_probability) {
|
||||
// WARNING: The RFC is poorly worded, and would suggest std.math.log2_int_ceil is correct here,
|
||||
// but power of two (remaining probabilities + 1) need max bits set to 1 more.
|
||||
const max_bits = std.math.log2_int(u16, total_probability - accumulated_probability + 1) + 1;
|
||||
const small = try bit_reader.readBitsNoEof(u16, max_bits - 1);
|
||||
|
||||
const cutoff = (@as(u16, 1) << max_bits) - 1 - (total_probability - accumulated_probability + 1);
|
||||
|
||||
const value = if (small < cutoff)
|
||||
small
|
||||
else value: {
|
||||
const value_read = small + (try bit_reader.readBitsNoEof(u16, 1) << (max_bits - 1));
|
||||
break :value if (value_read < @as(u16, 1) << (max_bits - 1))
|
||||
value_read
|
||||
else
|
||||
value_read - cutoff;
|
||||
};
|
||||
|
||||
accumulated_probability += if (value != 0) value - 1 else 1;
|
||||
|
||||
values[value_count] = value;
|
||||
value_count += 1;
|
||||
|
||||
if (value == 1) {
|
||||
while (true) {
|
||||
const repeat_flag = try bit_reader.readBitsNoEof(u2, 2);
|
||||
if (repeat_flag + value_count > 256) return error.MalformedFseTable;
|
||||
for (0..repeat_flag) |_| {
|
||||
values[value_count] = 1;
|
||||
value_count += 1;
|
||||
}
|
||||
if (repeat_flag < 3) break;
|
||||
}
|
||||
}
|
||||
if (value_count == 256) break;
|
||||
}
|
||||
bit_reader.alignToByte();
|
||||
|
||||
if (value_count < 2) return error.MalformedFseTable;
|
||||
if (accumulated_probability != total_probability) return error.MalformedFseTable;
|
||||
if (value_count > expected_symbol_count) return error.MalformedFseTable;
|
||||
|
||||
const table_size = total_probability;
|
||||
|
||||
try buildFseTable(values[0..value_count], entries[0..table_size]);
|
||||
return table_size;
|
||||
}
|
||||
|
||||
fn buildFseTable(values: []const u16, entries: []Table.Fse) !void {
|
||||
const total_probability = @as(u16, @intCast(entries.len));
|
||||
const accuracy_log = std.math.log2_int(u16, total_probability);
|
||||
assert(total_probability <= 1 << 9);
|
||||
|
||||
var less_than_one_count: usize = 0;
|
||||
for (values, 0..) |value, i| {
|
||||
if (value == 0) {
|
||||
entries[entries.len - 1 - less_than_one_count] = Table.Fse{
|
||||
.symbol = @as(u8, @intCast(i)),
|
||||
.baseline = 0,
|
||||
.bits = accuracy_log,
|
||||
};
|
||||
less_than_one_count += 1;
|
||||
}
|
||||
}
|
||||
|
||||
var position: usize = 0;
|
||||
var temp_states: [1 << 9]u16 = undefined;
|
||||
for (values, 0..) |value, symbol| {
|
||||
if (value == 0 or value == 1) continue;
|
||||
const probability = value - 1;
|
||||
|
||||
const state_share_dividend = std.math.ceilPowerOfTwo(u16, probability) catch
|
||||
return error.MalformedFseTable;
|
||||
const share_size = @divExact(total_probability, state_share_dividend);
|
||||
const double_state_count = state_share_dividend - probability;
|
||||
const single_state_count = probability - double_state_count;
|
||||
const share_size_log = std.math.log2_int(u16, share_size);
|
||||
|
||||
for (0..probability) |i| {
|
||||
temp_states[i] = @as(u16, @intCast(position));
|
||||
position += (entries.len >> 1) + (entries.len >> 3) + 3;
|
||||
position &= entries.len - 1;
|
||||
while (position >= entries.len - less_than_one_count) {
|
||||
position += (entries.len >> 1) + (entries.len >> 3) + 3;
|
||||
position &= entries.len - 1;
|
||||
}
|
||||
}
|
||||
std.mem.sort(u16, temp_states[0..probability], {}, std.sort.asc(u16));
|
||||
for (0..probability) |i| {
|
||||
entries[temp_states[i]] = if (i < double_state_count) Table.Fse{
|
||||
.symbol = @as(u8, @intCast(symbol)),
|
||||
.bits = share_size_log + 1,
|
||||
.baseline = single_state_count * share_size + @as(u16, @intCast(i)) * 2 * share_size,
|
||||
} else Table.Fse{
|
||||
.symbol = @as(u8, @intCast(symbol)),
|
||||
.bits = share_size_log,
|
||||
.baseline = (@as(u16, @intCast(i)) - double_state_count) * share_size,
|
||||
};
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
test buildFseTable {
|
||||
const literals_length_default_values = [36]u16{
|
||||
5, 4, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 2,
|
||||
3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 3, 2, 2, 2, 2, 2,
|
||||
0, 0, 0, 0,
|
||||
};
|
||||
|
||||
const match_lengths_default_values = [53]u16{
|
||||
2, 5, 4, 3, 3, 3, 3, 3, 3, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0,
|
||||
0, 0, 0, 0, 0,
|
||||
};
|
||||
|
||||
const offset_codes_default_values = [29]u16{
|
||||
2, 2, 2, 2, 2, 2, 3, 3, 3, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0,
|
||||
};
|
||||
|
||||
var entries: [64]Table.Fse = undefined;
|
||||
try buildFseTable(&literals_length_default_values, &entries);
|
||||
try std.testing.expectEqualSlices(Table.Fse, types.compressed_block.predefined_literal_fse_table.fse, &entries);
|
||||
|
||||
try buildFseTable(&match_lengths_default_values, &entries);
|
||||
try std.testing.expectEqualSlices(Table.Fse, types.compressed_block.predefined_match_fse_table.fse, &entries);
|
||||
|
||||
try buildFseTable(&offset_codes_default_values, entries[0..32]);
|
||||
try std.testing.expectEqualSlices(Table.Fse, types.compressed_block.predefined_offset_fse_table.fse, entries[0..32]);
|
||||
}
|
||||
|
|
@ -1,234 +0,0 @@
|
|||
const std = @import("std");
|
||||
|
||||
const types = @import("../types.zig");
|
||||
const LiteralsSection = types.compressed_block.LiteralsSection;
|
||||
const Table = types.compressed_block.Table;
|
||||
|
||||
const readers = @import("../readers.zig");
|
||||
|
||||
const decodeFseTable = @import("fse.zig").decodeFseTable;
|
||||
|
||||
pub const Error = error{
|
||||
MalformedHuffmanTree,
|
||||
MalformedFseTable,
|
||||
MalformedAccuracyLog,
|
||||
EndOfStream,
|
||||
};
|
||||
|
||||
fn decodeFseHuffmanTree(
|
||||
source: anytype,
|
||||
compressed_size: usize,
|
||||
buffer: []u8,
|
||||
weights: *[256]u4,
|
||||
) !usize {
|
||||
var stream = std.io.limitedReader(source, compressed_size);
|
||||
var bit_reader = readers.bitReader(stream.reader());
|
||||
|
||||
var entries: [1 << 6]Table.Fse = undefined;
|
||||
const table_size = decodeFseTable(&bit_reader, 256, 6, &entries) catch |err| switch (err) {
|
||||
error.MalformedAccuracyLog, error.MalformedFseTable => |e| return e,
|
||||
error.EndOfStream => return error.MalformedFseTable,
|
||||
else => |e| return e,
|
||||
};
|
||||
const accuracy_log = std.math.log2_int_ceil(usize, table_size);
|
||||
|
||||
const amount = try stream.reader().readAll(buffer);
|
||||
var huff_bits: readers.ReverseBitReader = undefined;
|
||||
huff_bits.init(buffer[0..amount]) catch return error.MalformedHuffmanTree;
|
||||
|
||||
return assignWeights(&huff_bits, accuracy_log, &entries, weights);
|
||||
}
|
||||
|
||||
fn decodeFseHuffmanTreeSlice(src: []const u8, compressed_size: usize, weights: *[256]u4) !usize {
|
||||
if (src.len < compressed_size) return error.MalformedHuffmanTree;
|
||||
var stream = std.io.fixedBufferStream(src[0..compressed_size]);
|
||||
var counting_reader = std.io.countingReader(stream.reader());
|
||||
var bit_reader = readers.bitReader(counting_reader.reader());
|
||||
|
||||
var entries: [1 << 6]Table.Fse = undefined;
|
||||
const table_size = decodeFseTable(&bit_reader, 256, 6, &entries) catch |err| switch (err) {
|
||||
error.MalformedAccuracyLog, error.MalformedFseTable => |e| return e,
|
||||
error.EndOfStream => return error.MalformedFseTable,
|
||||
};
|
||||
const accuracy_log = std.math.log2_int_ceil(usize, table_size);
|
||||
|
||||
const start_index = std.math.cast(usize, counting_reader.bytes_read) orelse
|
||||
return error.MalformedHuffmanTree;
|
||||
const huff_data = src[start_index..compressed_size];
|
||||
var huff_bits: readers.ReverseBitReader = undefined;
|
||||
huff_bits.init(huff_data) catch return error.MalformedHuffmanTree;
|
||||
|
||||
return assignWeights(&huff_bits, accuracy_log, &entries, weights);
|
||||
}
|
||||
|
||||
fn assignWeights(
|
||||
huff_bits: *readers.ReverseBitReader,
|
||||
accuracy_log: u16,
|
||||
entries: *[1 << 6]Table.Fse,
|
||||
weights: *[256]u4,
|
||||
) !usize {
|
||||
var i: usize = 0;
|
||||
var even_state: u32 = huff_bits.readBitsNoEof(u32, accuracy_log) catch return error.MalformedHuffmanTree;
|
||||
var odd_state: u32 = huff_bits.readBitsNoEof(u32, accuracy_log) catch return error.MalformedHuffmanTree;
|
||||
|
||||
while (i < 254) {
|
||||
const even_data = entries[even_state];
|
||||
var read_bits: u16 = 0;
|
||||
const even_bits = huff_bits.readBits(u32, even_data.bits, &read_bits) catch unreachable;
|
||||
weights[i] = std.math.cast(u4, even_data.symbol) orelse return error.MalformedHuffmanTree;
|
||||
i += 1;
|
||||
if (read_bits < even_data.bits) {
|
||||
weights[i] = std.math.cast(u4, entries[odd_state].symbol) orelse return error.MalformedHuffmanTree;
|
||||
i += 1;
|
||||
break;
|
||||
}
|
||||
even_state = even_data.baseline + even_bits;
|
||||
|
||||
read_bits = 0;
|
||||
const odd_data = entries[odd_state];
|
||||
const odd_bits = huff_bits.readBits(u32, odd_data.bits, &read_bits) catch unreachable;
|
||||
weights[i] = std.math.cast(u4, odd_data.symbol) orelse return error.MalformedHuffmanTree;
|
||||
i += 1;
|
||||
if (read_bits < odd_data.bits) {
|
||||
if (i == 255) return error.MalformedHuffmanTree;
|
||||
weights[i] = std.math.cast(u4, entries[even_state].symbol) orelse return error.MalformedHuffmanTree;
|
||||
i += 1;
|
||||
break;
|
||||
}
|
||||
odd_state = odd_data.baseline + odd_bits;
|
||||
} else return error.MalformedHuffmanTree;
|
||||
|
||||
if (!huff_bits.isEmpty()) {
|
||||
return error.MalformedHuffmanTree;
|
||||
}
|
||||
|
||||
return i + 1; // stream contains all but the last symbol
|
||||
}
|
||||
|
||||
fn decodeDirectHuffmanTree(source: anytype, encoded_symbol_count: usize, weights: *[256]u4) !usize {
|
||||
const weights_byte_count = (encoded_symbol_count + 1) / 2;
|
||||
for (0..weights_byte_count) |i| {
|
||||
const byte = try source.readByte();
|
||||
weights[2 * i] = @as(u4, @intCast(byte >> 4));
|
||||
weights[2 * i + 1] = @as(u4, @intCast(byte & 0xF));
|
||||
}
|
||||
return encoded_symbol_count + 1;
|
||||
}
|
||||
|
||||
fn assignSymbols(weight_sorted_prefixed_symbols: []LiteralsSection.HuffmanTree.PrefixedSymbol, weights: [256]u4) usize {
|
||||
for (0..weight_sorted_prefixed_symbols.len) |i| {
|
||||
weight_sorted_prefixed_symbols[i] = .{
|
||||
.symbol = @as(u8, @intCast(i)),
|
||||
.weight = undefined,
|
||||
.prefix = undefined,
|
||||
};
|
||||
}
|
||||
|
||||
std.mem.sort(
|
||||
LiteralsSection.HuffmanTree.PrefixedSymbol,
|
||||
weight_sorted_prefixed_symbols,
|
||||
weights,
|
||||
lessThanByWeight,
|
||||
);
|
||||
|
||||
var prefix: u16 = 0;
|
||||
var prefixed_symbol_count: usize = 0;
|
||||
var sorted_index: usize = 0;
|
||||
const symbol_count = weight_sorted_prefixed_symbols.len;
|
||||
while (sorted_index < symbol_count) {
|
||||
var symbol = weight_sorted_prefixed_symbols[sorted_index].symbol;
|
||||
const weight = weights[symbol];
|
||||
if (weight == 0) {
|
||||
sorted_index += 1;
|
||||
continue;
|
||||
}
|
||||
|
||||
while (sorted_index < symbol_count) : ({
|
||||
sorted_index += 1;
|
||||
prefixed_symbol_count += 1;
|
||||
prefix += 1;
|
||||
}) {
|
||||
symbol = weight_sorted_prefixed_symbols[sorted_index].symbol;
|
||||
if (weights[symbol] != weight) {
|
||||
prefix = ((prefix - 1) >> (weights[symbol] - weight)) + 1;
|
||||
break;
|
||||
}
|
||||
weight_sorted_prefixed_symbols[prefixed_symbol_count].symbol = symbol;
|
||||
weight_sorted_prefixed_symbols[prefixed_symbol_count].prefix = prefix;
|
||||
weight_sorted_prefixed_symbols[prefixed_symbol_count].weight = weight;
|
||||
}
|
||||
}
|
||||
return prefixed_symbol_count;
|
||||
}
|
||||
|
||||
fn buildHuffmanTree(weights: *[256]u4, symbol_count: usize) error{MalformedHuffmanTree}!LiteralsSection.HuffmanTree {
|
||||
var weight_power_sum_big: u32 = 0;
|
||||
for (weights[0 .. symbol_count - 1]) |value| {
|
||||
weight_power_sum_big += (@as(u16, 1) << value) >> 1;
|
||||
}
|
||||
if (weight_power_sum_big >= 1 << 11) return error.MalformedHuffmanTree;
|
||||
const weight_power_sum = @as(u16, @intCast(weight_power_sum_big));
|
||||
|
||||
// advance to next power of two (even if weight_power_sum is a power of 2)
|
||||
// TODO: is it valid to have weight_power_sum == 0?
|
||||
const max_number_of_bits = if (weight_power_sum == 0) 1 else std.math.log2_int(u16, weight_power_sum) + 1;
|
||||
const next_power_of_two = @as(u16, 1) << max_number_of_bits;
|
||||
weights[symbol_count - 1] = std.math.log2_int(u16, next_power_of_two - weight_power_sum) + 1;
|
||||
|
||||
var weight_sorted_prefixed_symbols: [256]LiteralsSection.HuffmanTree.PrefixedSymbol = undefined;
|
||||
const prefixed_symbol_count = assignSymbols(weight_sorted_prefixed_symbols[0..symbol_count], weights.*);
|
||||
const tree = LiteralsSection.HuffmanTree{
|
||||
.max_bit_count = max_number_of_bits,
|
||||
.symbol_count_minus_one = @as(u8, @intCast(prefixed_symbol_count - 1)),
|
||||
.nodes = weight_sorted_prefixed_symbols,
|
||||
};
|
||||
return tree;
|
||||
}
|
||||
|
||||
pub fn decodeHuffmanTree(
|
||||
source: anytype,
|
||||
buffer: []u8,
|
||||
) (@TypeOf(source).Error || Error)!LiteralsSection.HuffmanTree {
|
||||
const header = try source.readByte();
|
||||
var weights: [256]u4 = undefined;
|
||||
const symbol_count = if (header < 128)
|
||||
// FSE compressed weights
|
||||
try decodeFseHuffmanTree(source, header, buffer, &weights)
|
||||
else
|
||||
try decodeDirectHuffmanTree(source, header - 127, &weights);
|
||||
|
||||
return buildHuffmanTree(&weights, symbol_count);
|
||||
}
|
||||
|
||||
pub fn decodeHuffmanTreeSlice(
|
||||
src: []const u8,
|
||||
consumed_count: *usize,
|
||||
) Error!LiteralsSection.HuffmanTree {
|
||||
if (src.len == 0) return error.MalformedHuffmanTree;
|
||||
const header = src[0];
|
||||
var bytes_read: usize = 1;
|
||||
var weights: [256]u4 = undefined;
|
||||
const symbol_count = if (header < 128) count: {
|
||||
// FSE compressed weights
|
||||
bytes_read += header;
|
||||
break :count try decodeFseHuffmanTreeSlice(src[1..], header, &weights);
|
||||
} else count: {
|
||||
var fbs = std.io.fixedBufferStream(src[1..]);
|
||||
defer bytes_read += fbs.pos;
|
||||
break :count try decodeDirectHuffmanTree(fbs.reader(), header - 127, &weights);
|
||||
};
|
||||
|
||||
consumed_count.* += bytes_read;
|
||||
return buildHuffmanTree(&weights, symbol_count);
|
||||
}
|
||||
|
||||
fn lessThanByWeight(
|
||||
weights: [256]u4,
|
||||
lhs: LiteralsSection.HuffmanTree.PrefixedSymbol,
|
||||
rhs: LiteralsSection.HuffmanTree.PrefixedSymbol,
|
||||
) bool {
|
||||
// NOTE: this function relies on the use of a stable sorting algorithm,
|
||||
// otherwise a special case of if (weights[lhs] == weights[rhs]) return lhs < rhs;
|
||||
// should be added
|
||||
return weights[lhs.symbol] < weights[rhs.symbol];
|
||||
}
|
||||
|
|
@ -1,633 +0,0 @@
|
|||
const std = @import("std");
|
||||
const assert = std.debug.assert;
|
||||
const Allocator = std.mem.Allocator;
|
||||
const RingBuffer = std.RingBuffer;
|
||||
|
||||
const types = @import("types.zig");
|
||||
const frame = types.frame;
|
||||
const LiteralsSection = types.compressed_block.LiteralsSection;
|
||||
const SequencesSection = types.compressed_block.SequencesSection;
|
||||
const SkippableHeader = types.frame.Skippable.Header;
|
||||
const ZstandardHeader = types.frame.Zstandard.Header;
|
||||
const Table = types.compressed_block.Table;
|
||||
|
||||
pub const block = @import("decode/block.zig");
|
||||
|
||||
const readers = @import("readers.zig");
|
||||
|
||||
/// Returns `true` is `magic` is a valid magic number for a skippable frame
|
||||
pub fn isSkippableMagic(magic: u32) bool {
|
||||
return frame.Skippable.magic_number_min <= magic and magic <= frame.Skippable.magic_number_max;
|
||||
}
|
||||
|
||||
/// Returns the kind of frame at the beginning of `source`.
|
||||
///
|
||||
/// Errors returned:
|
||||
/// - `error.BadMagic` if `source` begins with bytes not equal to the
|
||||
/// Zstandard frame magic number, or outside the range of magic numbers for
|
||||
/// skippable frames.
|
||||
/// - `error.EndOfStream` if `source` contains fewer than 4 bytes
|
||||
pub fn decodeFrameType(source: anytype) error{ BadMagic, EndOfStream }!frame.Kind {
|
||||
const magic = try source.readInt(u32, .little);
|
||||
return frameType(magic);
|
||||
}
|
||||
|
||||
/// Returns the kind of frame associated to `magic`.
|
||||
///
|
||||
/// Errors returned:
|
||||
/// - `error.BadMagic` if `magic` is not a valid magic number.
|
||||
pub fn frameType(magic: u32) error{BadMagic}!frame.Kind {
|
||||
return if (magic == frame.Zstandard.magic_number)
|
||||
.zstandard
|
||||
else if (isSkippableMagic(magic))
|
||||
.skippable
|
||||
else
|
||||
error.BadMagic;
|
||||
}
|
||||
|
||||
pub const FrameHeader = union(enum) {
|
||||
zstandard: ZstandardHeader,
|
||||
skippable: SkippableHeader,
|
||||
};
|
||||
|
||||
pub const HeaderError = error{ BadMagic, EndOfStream, ReservedBitSet };
|
||||
|
||||
/// Returns the header of the frame at the beginning of `source`.
|
||||
///
|
||||
/// Errors returned:
|
||||
/// - `error.BadMagic` if `source` begins with bytes not equal to the
|
||||
/// Zstandard frame magic number, or outside the range of magic numbers for
|
||||
/// skippable frames.
|
||||
/// - `error.EndOfStream` if `source` contains fewer than 4 bytes
|
||||
/// - `error.ReservedBitSet` if the frame is a Zstandard frame and any of the
|
||||
/// reserved bits are set
|
||||
pub fn decodeFrameHeader(source: anytype) (@TypeOf(source).Error || HeaderError)!FrameHeader {
|
||||
const magic = try source.readInt(u32, .little);
|
||||
const frame_type = try frameType(magic);
|
||||
switch (frame_type) {
|
||||
.zstandard => return FrameHeader{ .zstandard = try decodeZstandardHeader(source) },
|
||||
.skippable => return FrameHeader{
|
||||
.skippable = .{
|
||||
.magic_number = magic,
|
||||
.frame_size = try source.readInt(u32, .little),
|
||||
},
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
pub const ReadWriteCount = struct {
|
||||
read_count: usize,
|
||||
write_count: usize,
|
||||
};
|
||||
|
||||
/// Decodes frames from `src` into `dest`; returns the length of the result.
|
||||
/// The stream should not have extra trailing bytes - either all bytes in `src`
|
||||
/// will be decoded, or an error will be returned. An error will be returned if
|
||||
/// a Zstandard frame in `src` does not declare its content size.
|
||||
///
|
||||
/// Errors returned:
|
||||
/// - `error.DictionaryIdFlagUnsupported` if a `src` contains a frame that
|
||||
/// uses a dictionary
|
||||
/// - `error.MalformedFrame` if a frame in `src` is invalid
|
||||
/// - `error.UnknownContentSizeUnsupported` if a frame in `src` does not
|
||||
/// declare its content size
|
||||
pub fn decode(dest: []u8, src: []const u8, verify_checksum: bool) error{
|
||||
MalformedFrame,
|
||||
UnknownContentSizeUnsupported,
|
||||
DictionaryIdFlagUnsupported,
|
||||
}!usize {
|
||||
var write_count: usize = 0;
|
||||
var read_count: usize = 0;
|
||||
while (read_count < src.len) {
|
||||
const counts = decodeFrame(dest, src[read_count..], verify_checksum) catch |err| {
|
||||
switch (err) {
|
||||
error.UnknownContentSizeUnsupported => return error.UnknownContentSizeUnsupported,
|
||||
error.DictionaryIdFlagUnsupported => return error.DictionaryIdFlagUnsupported,
|
||||
else => return error.MalformedFrame,
|
||||
}
|
||||
};
|
||||
read_count += counts.read_count;
|
||||
write_count += counts.write_count;
|
||||
}
|
||||
return write_count;
|
||||
}
|
||||
|
||||
/// Decodes a stream of frames from `src`; returns the decoded bytes. The stream
|
||||
/// should not have extra trailing bytes - either all bytes in `src` will be
|
||||
/// decoded, or an error will be returned.
|
||||
///
|
||||
/// Errors returned:
|
||||
/// - `error.DictionaryIdFlagUnsupported` if a `src` contains a frame that
|
||||
/// uses a dictionary
|
||||
/// - `error.MalformedFrame` if a frame in `src` is invalid
|
||||
/// - `error.OutOfMemory` if `allocator` cannot allocate enough memory
|
||||
pub fn decodeAlloc(
|
||||
allocator: Allocator,
|
||||
src: []const u8,
|
||||
verify_checksum: bool,
|
||||
window_size_max: usize,
|
||||
) error{ DictionaryIdFlagUnsupported, MalformedFrame, OutOfMemory }![]u8 {
|
||||
var result = std.ArrayList(u8).init(allocator);
|
||||
errdefer result.deinit();
|
||||
|
||||
var read_count: usize = 0;
|
||||
while (read_count < src.len) {
|
||||
read_count += decodeFrameArrayList(
|
||||
allocator,
|
||||
&result,
|
||||
src[read_count..],
|
||||
verify_checksum,
|
||||
window_size_max,
|
||||
) catch |err| switch (err) {
|
||||
error.OutOfMemory => return error.OutOfMemory,
|
||||
error.DictionaryIdFlagUnsupported => return error.DictionaryIdFlagUnsupported,
|
||||
else => return error.MalformedFrame,
|
||||
};
|
||||
}
|
||||
return result.toOwnedSlice();
|
||||
}
|
||||
|
||||
/// Decodes the frame at the start of `src` into `dest`. Returns the number of
|
||||
/// bytes read from `src` and written to `dest`. This function can only decode
|
||||
/// frames that declare the decompressed content size.
|
||||
///
|
||||
/// Errors returned:
|
||||
/// - `error.BadMagic` if the first 4 bytes of `src` is not a valid magic
|
||||
/// number for a Zstandard or skippable frame
|
||||
/// - `error.UnknownContentSizeUnsupported` if the frame does not declare the
|
||||
/// uncompressed content size
|
||||
/// - `error.WindowSizeUnknown` if the frame does not have a valid window size
|
||||
/// - `error.ContentTooLarge` if `dest` is smaller than the uncompressed data
|
||||
/// size declared by the frame header
|
||||
/// - `error.ContentSizeTooLarge` if the frame header indicates a content size
|
||||
/// that is larger than `std.math.maxInt(usize)`
|
||||
/// - `error.DictionaryIdFlagUnsupported` if the frame uses a dictionary
|
||||
/// - `error.ChecksumFailure` if `verify_checksum` is true and the frame
|
||||
/// contains a checksum that does not match the checksum of the decompressed
|
||||
/// data
|
||||
/// - `error.ReservedBitSet` if any of the reserved bits of the frame header
|
||||
/// are set
|
||||
/// - `error.EndOfStream` if `src` does not contain a complete frame
|
||||
/// - `error.BadContentSize` if the content size declared by the frame does
|
||||
/// not equal the actual size of decompressed data
|
||||
/// - an error in `block.Error` if there are errors decoding a block
|
||||
/// - `error.SkippableSizeTooLarge` if the frame is skippable and reports a
|
||||
/// size greater than `src.len`
|
||||
pub fn decodeFrame(
|
||||
dest: []u8,
|
||||
src: []const u8,
|
||||
verify_checksum: bool,
|
||||
) (error{
|
||||
BadMagic,
|
||||
UnknownContentSizeUnsupported,
|
||||
ContentTooLarge,
|
||||
ContentSizeTooLarge,
|
||||
WindowSizeUnknown,
|
||||
DictionaryIdFlagUnsupported,
|
||||
SkippableSizeTooLarge,
|
||||
} || FrameError)!ReadWriteCount {
|
||||
var fbs = std.io.fixedBufferStream(src);
|
||||
switch (try decodeFrameType(fbs.reader())) {
|
||||
.zstandard => return decodeZstandardFrame(dest, src, verify_checksum),
|
||||
.skippable => {
|
||||
const content_size = try fbs.reader().readInt(u32, .little);
|
||||
if (content_size > std.math.maxInt(usize) - 8) return error.SkippableSizeTooLarge;
|
||||
const read_count = @as(usize, content_size) + 8;
|
||||
if (read_count > src.len) return error.SkippableSizeTooLarge;
|
||||
return ReadWriteCount{
|
||||
.read_count = read_count,
|
||||
.write_count = 0,
|
||||
};
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
/// Decodes the frame at the start of `src` into `dest`. Returns the number of
|
||||
/// bytes read from `src`.
|
||||
///
|
||||
/// Errors returned:
|
||||
/// - `error.BadMagic` if the first 4 bytes of `src` is not a valid magic
|
||||
/// number for a Zstandard or skippable frame
|
||||
/// - `error.WindowSizeUnknown` if the frame does not have a valid window size
|
||||
/// - `error.WindowTooLarge` if the window size is larger than
|
||||
/// `window_size_max`
|
||||
/// - `error.ContentSizeTooLarge` if the frame header indicates a content size
|
||||
/// that is larger than `std.math.maxInt(usize)`
|
||||
/// - `error.DictionaryIdFlagUnsupported` if the frame uses a dictionary
|
||||
/// - `error.ChecksumFailure` if `verify_checksum` is true and the frame
|
||||
/// contains a checksum that does not match the checksum of the decompressed
|
||||
/// data
|
||||
/// - `error.ReservedBitSet` if any of the reserved bits of the frame header
|
||||
/// are set
|
||||
/// - `error.EndOfStream` if `src` does not contain a complete frame
|
||||
/// - `error.BadContentSize` if the content size declared by the frame does
|
||||
/// not equal the actual size of decompressed data
|
||||
/// - `error.OutOfMemory` if `allocator` cannot allocate enough memory
|
||||
/// - an error in `block.Error` if there are errors decoding a block
|
||||
/// - `error.SkippableSizeTooLarge` if the frame is skippable and reports a
|
||||
/// size greater than `src.len`
|
||||
pub fn decodeFrameArrayList(
|
||||
allocator: Allocator,
|
||||
dest: *std.ArrayList(u8),
|
||||
src: []const u8,
|
||||
verify_checksum: bool,
|
||||
window_size_max: usize,
|
||||
) (error{ BadMagic, OutOfMemory, SkippableSizeTooLarge } || FrameContext.Error || FrameError)!usize {
|
||||
var fbs = std.io.fixedBufferStream(src);
|
||||
const reader = fbs.reader();
|
||||
const magic = try reader.readInt(u32, .little);
|
||||
switch (try frameType(magic)) {
|
||||
.zstandard => return decodeZstandardFrameArrayList(
|
||||
allocator,
|
||||
dest,
|
||||
src,
|
||||
verify_checksum,
|
||||
window_size_max,
|
||||
),
|
||||
.skippable => {
|
||||
const content_size = try fbs.reader().readInt(u32, .little);
|
||||
if (content_size > std.math.maxInt(usize) - 8) return error.SkippableSizeTooLarge;
|
||||
const read_count = @as(usize, content_size) + 8;
|
||||
if (read_count > src.len) return error.SkippableSizeTooLarge;
|
||||
return read_count;
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns the frame checksum corresponding to the data fed into `hasher`
|
||||
pub fn computeChecksum(hasher: *std.hash.XxHash64) u32 {
|
||||
const hash = hasher.final();
|
||||
return @as(u32, @intCast(hash & 0xFFFFFFFF));
|
||||
}
|
||||
|
||||
const FrameError = error{
|
||||
ChecksumFailure,
|
||||
BadContentSize,
|
||||
EndOfStream,
|
||||
ReservedBitSet,
|
||||
} || block.Error;
|
||||
|
||||
/// Decode a Zstandard frame from `src` into `dest`, returning the number of
|
||||
/// bytes read from `src` and written to `dest`. The first four bytes of `src`
|
||||
/// must be the magic number for a Zstandard frame.
|
||||
///
|
||||
/// Error returned:
|
||||
/// - `error.UnknownContentSizeUnsupported` if the frame does not declare the
|
||||
/// uncompressed content size
|
||||
/// - `error.ContentTooLarge` if `dest` is smaller than the uncompressed data
|
||||
/// size declared by the frame header
|
||||
/// - `error.WindowSizeUnknown` if the frame does not have a valid window size
|
||||
/// - `error.DictionaryIdFlagUnsupported` if the frame uses a dictionary
|
||||
/// - `error.ContentSizeTooLarge` if the frame header indicates a content size
|
||||
/// that is larger than `std.math.maxInt(usize)`
|
||||
/// - `error.ChecksumFailure` if `verify_checksum` is true and the frame
|
||||
/// contains a checksum that does not match the checksum of the decompressed
|
||||
/// data
|
||||
/// - `error.ReservedBitSet` if the reserved bit of the frame header is set
|
||||
/// - `error.EndOfStream` if `src` does not contain a complete frame
|
||||
/// - an error in `block.Error` if there are errors decoding a block
|
||||
/// - `error.BadContentSize` if the content size declared by the frame does
|
||||
/// not equal the actual size of decompressed data
|
||||
pub fn decodeZstandardFrame(
|
||||
dest: []u8,
|
||||
src: []const u8,
|
||||
verify_checksum: bool,
|
||||
) (error{
|
||||
UnknownContentSizeUnsupported,
|
||||
ContentTooLarge,
|
||||
ContentSizeTooLarge,
|
||||
WindowSizeUnknown,
|
||||
DictionaryIdFlagUnsupported,
|
||||
} || FrameError)!ReadWriteCount {
|
||||
assert(std.mem.readInt(u32, src[0..4], .little) == frame.Zstandard.magic_number);
|
||||
var consumed_count: usize = 4;
|
||||
|
||||
var frame_context = context: {
|
||||
var fbs = std.io.fixedBufferStream(src[consumed_count..]);
|
||||
const source = fbs.reader();
|
||||
const frame_header = try decodeZstandardHeader(source);
|
||||
consumed_count += fbs.pos;
|
||||
break :context FrameContext.init(
|
||||
frame_header,
|
||||
std.math.maxInt(usize),
|
||||
verify_checksum,
|
||||
) catch |err| switch (err) {
|
||||
error.WindowTooLarge => unreachable,
|
||||
inline else => |e| return e,
|
||||
};
|
||||
};
|
||||
const counts = try decodeZStandardFrameBlocks(
|
||||
dest,
|
||||
src[consumed_count..],
|
||||
&frame_context,
|
||||
);
|
||||
return ReadWriteCount{
|
||||
.read_count = counts.read_count + consumed_count,
|
||||
.write_count = counts.write_count,
|
||||
};
|
||||
}
|
||||
|
||||
pub fn decodeZStandardFrameBlocks(
|
||||
dest: []u8,
|
||||
src: []const u8,
|
||||
frame_context: *FrameContext,
|
||||
) (error{ ContentTooLarge, UnknownContentSizeUnsupported } || FrameError)!ReadWriteCount {
|
||||
const content_size = frame_context.content_size orelse
|
||||
return error.UnknownContentSizeUnsupported;
|
||||
if (dest.len < content_size) return error.ContentTooLarge;
|
||||
|
||||
var consumed_count: usize = 0;
|
||||
const written_count = decodeFrameBlocksInner(
|
||||
dest[0..content_size],
|
||||
src[consumed_count..],
|
||||
&consumed_count,
|
||||
if (frame_context.hasher_opt) |*hasher| hasher else null,
|
||||
frame_context.block_size_max,
|
||||
) catch |err| switch (err) {
|
||||
error.DestTooSmall => return error.BadContentSize,
|
||||
inline else => |e| return e,
|
||||
};
|
||||
|
||||
if (written_count != content_size) return error.BadContentSize;
|
||||
if (frame_context.has_checksum) {
|
||||
if (src.len < consumed_count + 4) return error.EndOfStream;
|
||||
const checksum = std.mem.readInt(u32, src[consumed_count..][0..4], .little);
|
||||
consumed_count += 4;
|
||||
if (frame_context.hasher_opt) |*hasher| {
|
||||
if (checksum != computeChecksum(hasher)) return error.ChecksumFailure;
|
||||
}
|
||||
}
|
||||
return ReadWriteCount{ .read_count = consumed_count, .write_count = written_count };
|
||||
}
|
||||
|
||||
pub const FrameContext = struct {
|
||||
hasher_opt: ?std.hash.XxHash64,
|
||||
window_size: usize,
|
||||
has_checksum: bool,
|
||||
block_size_max: usize,
|
||||
content_size: ?usize,
|
||||
|
||||
const Error = error{
|
||||
DictionaryIdFlagUnsupported,
|
||||
WindowSizeUnknown,
|
||||
WindowTooLarge,
|
||||
ContentSizeTooLarge,
|
||||
};
|
||||
/// Validates `frame_header` and returns the associated `FrameContext`.
|
||||
///
|
||||
/// Errors returned:
|
||||
/// - `error.DictionaryIdFlagUnsupported` if the frame uses a dictionary
|
||||
/// - `error.WindowSizeUnknown` if the frame does not have a valid window
|
||||
/// size
|
||||
/// - `error.WindowTooLarge` if the window size is larger than
|
||||
/// `window_size_max` or `std.math.intMax(usize)`
|
||||
/// - `error.ContentSizeTooLarge` if the frame header indicates a content
|
||||
/// size larger than `std.math.maxInt(usize)`
|
||||
pub fn init(
|
||||
frame_header: ZstandardHeader,
|
||||
window_size_max: usize,
|
||||
verify_checksum: bool,
|
||||
) Error!FrameContext {
|
||||
if (frame_header.descriptor.dictionary_id_flag != 0)
|
||||
return error.DictionaryIdFlagUnsupported;
|
||||
|
||||
const window_size_raw = frameWindowSize(frame_header) orelse return error.WindowSizeUnknown;
|
||||
const window_size = if (window_size_raw > window_size_max)
|
||||
return error.WindowTooLarge
|
||||
else
|
||||
std.math.cast(usize, window_size_raw) orelse return error.WindowTooLarge;
|
||||
|
||||
const should_compute_checksum =
|
||||
frame_header.descriptor.content_checksum_flag and verify_checksum;
|
||||
|
||||
const content_size = if (frame_header.content_size) |size|
|
||||
std.math.cast(usize, size) orelse return error.ContentSizeTooLarge
|
||||
else
|
||||
null;
|
||||
|
||||
return .{
|
||||
.hasher_opt = if (should_compute_checksum) std.hash.XxHash64.init(0) else null,
|
||||
.window_size = window_size,
|
||||
.has_checksum = frame_header.descriptor.content_checksum_flag,
|
||||
.block_size_max = @min(types.block_size_max, window_size),
|
||||
.content_size = content_size,
|
||||
};
|
||||
}
|
||||
};
|
||||
|
||||
/// Decode a Zstandard from from `src` and return number of bytes read; see
|
||||
/// `decodeZstandardFrame()`. The first four bytes of `src` must be the magic
|
||||
/// number for a Zstandard frame.
|
||||
///
|
||||
/// Errors returned:
|
||||
/// - `error.WindowSizeUnknown` if the frame does not have a valid window size
|
||||
/// - `error.WindowTooLarge` if the window size is larger than
|
||||
/// `window_size_max`
|
||||
/// - `error.DictionaryIdFlagUnsupported` if the frame uses a dictionary
|
||||
/// - `error.ContentSizeTooLarge` if the frame header indicates a content size
|
||||
/// that is larger than `std.math.maxInt(usize)`
|
||||
/// - `error.ChecksumFailure` if `verify_checksum` is true and the frame
|
||||
/// contains a checksum that does not match the checksum of the decompressed
|
||||
/// data
|
||||
/// - `error.ReservedBitSet` if the reserved bit of the frame header is set
|
||||
/// - `error.EndOfStream` if `src` does not contain a complete frame
|
||||
/// - `error.OutOfMemory` if `allocator` cannot allocate enough memory
|
||||
/// - an error in `block.Error` if there are errors decoding a block
|
||||
/// - `error.BadContentSize` if the content size declared by the frame does
|
||||
/// not equal the size of decompressed data
|
||||
pub fn decodeZstandardFrameArrayList(
|
||||
allocator: Allocator,
|
||||
dest: *std.ArrayList(u8),
|
||||
src: []const u8,
|
||||
verify_checksum: bool,
|
||||
window_size_max: usize,
|
||||
) (error{OutOfMemory} || FrameContext.Error || FrameError)!usize {
|
||||
assert(std.mem.readInt(u32, src[0..4], .little) == frame.Zstandard.magic_number);
|
||||
var consumed_count: usize = 4;
|
||||
|
||||
var frame_context = context: {
|
||||
var fbs = std.io.fixedBufferStream(src[consumed_count..]);
|
||||
const source = fbs.reader();
|
||||
const frame_header = try decodeZstandardHeader(source);
|
||||
consumed_count += fbs.pos;
|
||||
break :context try FrameContext.init(frame_header, window_size_max, verify_checksum);
|
||||
};
|
||||
|
||||
consumed_count += try decodeZstandardFrameBlocksArrayList(
|
||||
allocator,
|
||||
dest,
|
||||
src[consumed_count..],
|
||||
&frame_context,
|
||||
);
|
||||
return consumed_count;
|
||||
}
|
||||
|
||||
pub fn decodeZstandardFrameBlocksArrayList(
|
||||
allocator: Allocator,
|
||||
dest: *std.ArrayList(u8),
|
||||
src: []const u8,
|
||||
frame_context: *FrameContext,
|
||||
) (error{OutOfMemory} || FrameError)!usize {
|
||||
const initial_len = dest.items.len;
|
||||
|
||||
var ring_buffer = try RingBuffer.init(allocator, frame_context.window_size);
|
||||
defer ring_buffer.deinit(allocator);
|
||||
|
||||
// These tables take 7680 bytes
|
||||
var literal_fse_data: [types.compressed_block.table_size_max.literal]Table.Fse = undefined;
|
||||
var match_fse_data: [types.compressed_block.table_size_max.match]Table.Fse = undefined;
|
||||
var offset_fse_data: [types.compressed_block.table_size_max.offset]Table.Fse = undefined;
|
||||
|
||||
var block_header = try block.decodeBlockHeaderSlice(src);
|
||||
var consumed_count: usize = 3;
|
||||
var decode_state = block.DecodeState.init(&literal_fse_data, &match_fse_data, &offset_fse_data);
|
||||
while (true) : ({
|
||||
block_header = try block.decodeBlockHeaderSlice(src[consumed_count..]);
|
||||
consumed_count += 3;
|
||||
}) {
|
||||
const written_size = try block.decodeBlockRingBuffer(
|
||||
&ring_buffer,
|
||||
src[consumed_count..],
|
||||
block_header,
|
||||
&decode_state,
|
||||
&consumed_count,
|
||||
frame_context.block_size_max,
|
||||
);
|
||||
if (frame_context.content_size) |size| {
|
||||
if (dest.items.len - initial_len > size) {
|
||||
return error.BadContentSize;
|
||||
}
|
||||
}
|
||||
if (written_size > 0) {
|
||||
const written_slice = ring_buffer.sliceLast(written_size);
|
||||
try dest.appendSlice(written_slice.first);
|
||||
try dest.appendSlice(written_slice.second);
|
||||
if (frame_context.hasher_opt) |*hasher| {
|
||||
hasher.update(written_slice.first);
|
||||
hasher.update(written_slice.second);
|
||||
}
|
||||
}
|
||||
if (block_header.last_block) break;
|
||||
}
|
||||
if (frame_context.content_size) |size| {
|
||||
if (dest.items.len - initial_len != size) {
|
||||
return error.BadContentSize;
|
||||
}
|
||||
}
|
||||
|
||||
if (frame_context.has_checksum) {
|
||||
if (src.len < consumed_count + 4) return error.EndOfStream;
|
||||
const checksum = std.mem.readInt(u32, src[consumed_count..][0..4], .little);
|
||||
consumed_count += 4;
|
||||
if (frame_context.hasher_opt) |*hasher| {
|
||||
if (checksum != computeChecksum(hasher)) return error.ChecksumFailure;
|
||||
}
|
||||
}
|
||||
return consumed_count;
|
||||
}
|
||||
|
||||
fn decodeFrameBlocksInner(
|
||||
dest: []u8,
|
||||
src: []const u8,
|
||||
consumed_count: *usize,
|
||||
hash: ?*std.hash.XxHash64,
|
||||
block_size_max: usize,
|
||||
) (error{ EndOfStream, DestTooSmall } || block.Error)!usize {
|
||||
// These tables take 7680 bytes
|
||||
var literal_fse_data: [types.compressed_block.table_size_max.literal]Table.Fse = undefined;
|
||||
var match_fse_data: [types.compressed_block.table_size_max.match]Table.Fse = undefined;
|
||||
var offset_fse_data: [types.compressed_block.table_size_max.offset]Table.Fse = undefined;
|
||||
|
||||
var block_header = try block.decodeBlockHeaderSlice(src);
|
||||
var bytes_read: usize = 3;
|
||||
defer consumed_count.* += bytes_read;
|
||||
var decode_state = block.DecodeState.init(&literal_fse_data, &match_fse_data, &offset_fse_data);
|
||||
var count: usize = 0;
|
||||
while (true) : ({
|
||||
block_header = try block.decodeBlockHeaderSlice(src[bytes_read..]);
|
||||
bytes_read += 3;
|
||||
}) {
|
||||
const written_size = try block.decodeBlock(
|
||||
dest,
|
||||
src[bytes_read..],
|
||||
block_header,
|
||||
&decode_state,
|
||||
&bytes_read,
|
||||
block_size_max,
|
||||
count,
|
||||
);
|
||||
if (hash) |hash_state| hash_state.update(dest[count .. count + written_size]);
|
||||
count += written_size;
|
||||
if (block_header.last_block) break;
|
||||
}
|
||||
return count;
|
||||
}
|
||||
|
||||
/// Decode the header of a skippable frame. The first four bytes of `src` must
|
||||
/// be a valid magic number for a skippable frame.
|
||||
pub fn decodeSkippableHeader(src: *const [8]u8) SkippableHeader {
|
||||
const magic = std.mem.readInt(u32, src[0..4], .little);
|
||||
assert(isSkippableMagic(magic));
|
||||
const frame_size = std.mem.readInt(u32, src[4..8], .little);
|
||||
return .{
|
||||
.magic_number = magic,
|
||||
.frame_size = frame_size,
|
||||
};
|
||||
}
|
||||
|
||||
/// Returns the window size required to decompress a frame, or `null` if it
|
||||
/// cannot be determined (which indicates a malformed frame header).
|
||||
pub fn frameWindowSize(header: ZstandardHeader) ?u64 {
|
||||
if (header.window_descriptor) |descriptor| {
|
||||
const exponent = (descriptor & 0b11111000) >> 3;
|
||||
const mantissa = descriptor & 0b00000111;
|
||||
const window_log = 10 + exponent;
|
||||
const window_base = @as(u64, 1) << @as(u6, @intCast(window_log));
|
||||
const window_add = (window_base / 8) * mantissa;
|
||||
return window_base + window_add;
|
||||
} else return header.content_size;
|
||||
}
|
||||
|
||||
/// Decode the header of a Zstandard frame.
|
||||
///
|
||||
/// Errors returned:
|
||||
/// - `error.ReservedBitSet` if any of the reserved bits of the header are set
|
||||
/// - `error.EndOfStream` if `source` does not contain a complete header
|
||||
pub fn decodeZstandardHeader(
|
||||
source: anytype,
|
||||
) (@TypeOf(source).Error || error{ EndOfStream, ReservedBitSet })!ZstandardHeader {
|
||||
const descriptor = @as(ZstandardHeader.Descriptor, @bitCast(try source.readByte()));
|
||||
|
||||
if (descriptor.reserved) return error.ReservedBitSet;
|
||||
|
||||
var window_descriptor: ?u8 = null;
|
||||
if (!descriptor.single_segment_flag) {
|
||||
window_descriptor = try source.readByte();
|
||||
}
|
||||
|
||||
var dictionary_id: ?u32 = null;
|
||||
if (descriptor.dictionary_id_flag > 0) {
|
||||
// if flag is 3 then field_size = 4, else field_size = flag
|
||||
const field_size = (@as(u4, 1) << descriptor.dictionary_id_flag) >> 1;
|
||||
dictionary_id = try source.readVarInt(u32, .little, field_size);
|
||||
}
|
||||
|
||||
var content_size: ?u64 = null;
|
||||
if (descriptor.single_segment_flag or descriptor.content_size_flag > 0) {
|
||||
const field_size = @as(u4, 1) << descriptor.content_size_flag;
|
||||
content_size = try source.readVarInt(u64, .little, field_size);
|
||||
if (field_size == 2) content_size.? += 256;
|
||||
}
|
||||
|
||||
const header = ZstandardHeader{
|
||||
.descriptor = descriptor,
|
||||
.window_descriptor = window_descriptor,
|
||||
.dictionary_id = dictionary_id,
|
||||
.content_size = content_size,
|
||||
};
|
||||
return header;
|
||||
}
|
||||
|
||||
test {
|
||||
std.testing.refAllDecls(@This());
|
||||
}
|
||||
|
|
@ -1,82 +0,0 @@
|
|||
const std = @import("std");
|
||||
|
||||
pub const ReversedByteReader = struct {
|
||||
remaining_bytes: usize,
|
||||
bytes: []const u8,
|
||||
|
||||
const Reader = std.io.GenericReader(*ReversedByteReader, error{}, readFn);
|
||||
|
||||
pub fn init(bytes: []const u8) ReversedByteReader {
|
||||
return .{
|
||||
.bytes = bytes,
|
||||
.remaining_bytes = bytes.len,
|
||||
};
|
||||
}
|
||||
|
||||
pub fn reader(self: *ReversedByteReader) Reader {
|
||||
return .{ .context = self };
|
||||
}
|
||||
|
||||
fn readFn(ctx: *ReversedByteReader, buffer: []u8) !usize {
|
||||
if (ctx.remaining_bytes == 0) return 0;
|
||||
const byte_index = ctx.remaining_bytes - 1;
|
||||
buffer[0] = ctx.bytes[byte_index];
|
||||
// buffer[0] = @bitReverse(ctx.bytes[byte_index]);
|
||||
ctx.remaining_bytes = byte_index;
|
||||
return 1;
|
||||
}
|
||||
};
|
||||
|
||||
/// A bit reader for reading the reversed bit streams used to encode
|
||||
/// FSE compressed data.
|
||||
pub const ReverseBitReader = struct {
|
||||
byte_reader: ReversedByteReader,
|
||||
bit_reader: std.io.BitReader(.big, ReversedByteReader.Reader),
|
||||
|
||||
pub fn init(self: *ReverseBitReader, bytes: []const u8) error{BitStreamHasNoStartBit}!void {
|
||||
self.byte_reader = ReversedByteReader.init(bytes);
|
||||
self.bit_reader = std.io.bitReader(.big, self.byte_reader.reader());
|
||||
if (bytes.len == 0) return;
|
||||
var i: usize = 0;
|
||||
while (i < 8 and 0 == self.readBitsNoEof(u1, 1) catch unreachable) : (i += 1) {}
|
||||
if (i == 8) return error.BitStreamHasNoStartBit;
|
||||
}
|
||||
|
||||
pub fn readBitsNoEof(self: *@This(), comptime U: type, num_bits: u16) error{EndOfStream}!U {
|
||||
return self.bit_reader.readBitsNoEof(U, num_bits);
|
||||
}
|
||||
|
||||
pub fn readBits(self: *@This(), comptime U: type, num_bits: u16, out_bits: *u16) error{}!U {
|
||||
return try self.bit_reader.readBits(U, num_bits, out_bits);
|
||||
}
|
||||
|
||||
pub fn alignToByte(self: *@This()) void {
|
||||
self.bit_reader.alignToByte();
|
||||
}
|
||||
|
||||
pub fn isEmpty(self: ReverseBitReader) bool {
|
||||
return self.byte_reader.remaining_bytes == 0 and self.bit_reader.count == 0;
|
||||
}
|
||||
};
|
||||
|
||||
pub fn BitReader(comptime Reader: type) type {
|
||||
return struct {
|
||||
underlying: std.io.BitReader(.little, Reader),
|
||||
|
||||
pub fn readBitsNoEof(self: *@This(), comptime U: type, num_bits: u16) !U {
|
||||
return self.underlying.readBitsNoEof(U, num_bits);
|
||||
}
|
||||
|
||||
pub fn readBits(self: *@This(), comptime U: type, num_bits: u16, out_bits: *u16) !U {
|
||||
return self.underlying.readBits(U, num_bits, out_bits);
|
||||
}
|
||||
|
||||
pub fn alignToByte(self: *@This()) void {
|
||||
self.underlying.alignToByte();
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
pub fn bitReader(reader: anytype) BitReader(@TypeOf(reader)) {
|
||||
return .{ .underlying = std.io.bitReader(.little, reader) };
|
||||
}
|
||||
|
|
@ -1,403 +0,0 @@
|
|||
pub const block_size_max = 1 << 17;
|
||||
|
||||
pub const frame = struct {
|
||||
pub const Kind = enum { zstandard, skippable };
|
||||
|
||||
pub const Zstandard = struct {
|
||||
pub const magic_number = 0xFD2FB528;
|
||||
|
||||
header: Header,
|
||||
data_blocks: []Block,
|
||||
checksum: ?u32,
|
||||
|
||||
pub const Header = struct {
|
||||
descriptor: Descriptor,
|
||||
window_descriptor: ?u8,
|
||||
dictionary_id: ?u32,
|
||||
content_size: ?u64,
|
||||
|
||||
pub const Descriptor = packed struct {
|
||||
dictionary_id_flag: u2,
|
||||
content_checksum_flag: bool,
|
||||
reserved: bool,
|
||||
unused: bool,
|
||||
single_segment_flag: bool,
|
||||
content_size_flag: u2,
|
||||
};
|
||||
};
|
||||
|
||||
pub const Block = struct {
|
||||
pub const Header = struct {
|
||||
last_block: bool,
|
||||
block_type: Block.Type,
|
||||
block_size: u21,
|
||||
};
|
||||
|
||||
pub const Type = enum(u2) {
|
||||
raw,
|
||||
rle,
|
||||
compressed,
|
||||
reserved,
|
||||
};
|
||||
};
|
||||
};
|
||||
|
||||
pub const Skippable = struct {
|
||||
pub const magic_number_min = 0x184D2A50;
|
||||
pub const magic_number_max = 0x184D2A5F;
|
||||
|
||||
pub const Header = struct {
|
||||
magic_number: u32,
|
||||
frame_size: u32,
|
||||
};
|
||||
};
|
||||
};
|
||||
|
||||
pub const compressed_block = struct {
|
||||
pub const LiteralsSection = struct {
|
||||
header: Header,
|
||||
huffman_tree: ?HuffmanTree,
|
||||
streams: Streams,
|
||||
|
||||
pub const Streams = union(enum) {
|
||||
one: []const u8,
|
||||
four: [4][]const u8,
|
||||
};
|
||||
|
||||
pub const Header = struct {
|
||||
block_type: BlockType,
|
||||
size_format: u2,
|
||||
regenerated_size: u20,
|
||||
compressed_size: ?u18,
|
||||
};
|
||||
|
||||
pub const BlockType = enum(u2) {
|
||||
raw,
|
||||
rle,
|
||||
compressed,
|
||||
treeless,
|
||||
};
|
||||
|
||||
pub const HuffmanTree = struct {
|
||||
max_bit_count: u4,
|
||||
symbol_count_minus_one: u8,
|
||||
nodes: [256]PrefixedSymbol,
|
||||
|
||||
pub const PrefixedSymbol = struct {
|
||||
symbol: u8,
|
||||
prefix: u16,
|
||||
weight: u4,
|
||||
};
|
||||
|
||||
pub const Result = union(enum) {
|
||||
symbol: u8,
|
||||
index: usize,
|
||||
};
|
||||
|
||||
pub fn query(self: HuffmanTree, index: usize, prefix: u16) error{NotFound}!Result {
|
||||
var node = self.nodes[index];
|
||||
const weight = node.weight;
|
||||
var i: usize = index;
|
||||
while (node.weight == weight) {
|
||||
if (node.prefix == prefix) return Result{ .symbol = node.symbol };
|
||||
if (i == 0) return error.NotFound;
|
||||
i -= 1;
|
||||
node = self.nodes[i];
|
||||
}
|
||||
return Result{ .index = i };
|
||||
}
|
||||
|
||||
pub fn weightToBitCount(weight: u4, max_bit_count: u4) u4 {
|
||||
return if (weight == 0) 0 else ((max_bit_count + 1) - weight);
|
||||
}
|
||||
};
|
||||
|
||||
pub const StreamCount = enum { one, four };
|
||||
pub fn streamCount(size_format: u2, block_type: BlockType) StreamCount {
|
||||
return switch (block_type) {
|
||||
.raw, .rle => .one,
|
||||
.compressed, .treeless => if (size_format == 0) .one else .four,
|
||||
};
|
||||
}
|
||||
};
|
||||
|
||||
pub const SequencesSection = struct {
|
||||
header: SequencesSection.Header,
|
||||
literals_length_table: Table,
|
||||
offset_table: Table,
|
||||
match_length_table: Table,
|
||||
|
||||
pub const Header = struct {
|
||||
sequence_count: u24,
|
||||
match_lengths: Mode,
|
||||
offsets: Mode,
|
||||
literal_lengths: Mode,
|
||||
|
||||
pub const Mode = enum(u2) {
|
||||
predefined,
|
||||
rle,
|
||||
fse,
|
||||
repeat,
|
||||
};
|
||||
};
|
||||
};
|
||||
|
||||
pub const Table = union(enum) {
|
||||
fse: []const Fse,
|
||||
rle: u8,
|
||||
|
||||
pub const Fse = struct {
|
||||
symbol: u8,
|
||||
baseline: u16,
|
||||
bits: u8,
|
||||
};
|
||||
};
|
||||
|
||||
pub const literals_length_code_table = [36]struct { u32, u5 }{
|
||||
.{ 0, 0 }, .{ 1, 0 }, .{ 2, 0 }, .{ 3, 0 },
|
||||
.{ 4, 0 }, .{ 5, 0 }, .{ 6, 0 }, .{ 7, 0 },
|
||||
.{ 8, 0 }, .{ 9, 0 }, .{ 10, 0 }, .{ 11, 0 },
|
||||
.{ 12, 0 }, .{ 13, 0 }, .{ 14, 0 }, .{ 15, 0 },
|
||||
.{ 16, 1 }, .{ 18, 1 }, .{ 20, 1 }, .{ 22, 1 },
|
||||
.{ 24, 2 }, .{ 28, 2 }, .{ 32, 3 }, .{ 40, 3 },
|
||||
.{ 48, 4 }, .{ 64, 6 }, .{ 128, 7 }, .{ 256, 8 },
|
||||
.{ 512, 9 }, .{ 1024, 10 }, .{ 2048, 11 }, .{ 4096, 12 },
|
||||
.{ 8192, 13 }, .{ 16384, 14 }, .{ 32768, 15 }, .{ 65536, 16 },
|
||||
};
|
||||
|
||||
pub const match_length_code_table = [53]struct { u32, u5 }{
|
||||
.{ 3, 0 }, .{ 4, 0 }, .{ 5, 0 }, .{ 6, 0 }, .{ 7, 0 }, .{ 8, 0 },
|
||||
.{ 9, 0 }, .{ 10, 0 }, .{ 11, 0 }, .{ 12, 0 }, .{ 13, 0 }, .{ 14, 0 },
|
||||
.{ 15, 0 }, .{ 16, 0 }, .{ 17, 0 }, .{ 18, 0 }, .{ 19, 0 }, .{ 20, 0 },
|
||||
.{ 21, 0 }, .{ 22, 0 }, .{ 23, 0 }, .{ 24, 0 }, .{ 25, 0 }, .{ 26, 0 },
|
||||
.{ 27, 0 }, .{ 28, 0 }, .{ 29, 0 }, .{ 30, 0 }, .{ 31, 0 }, .{ 32, 0 },
|
||||
.{ 33, 0 }, .{ 34, 0 }, .{ 35, 1 }, .{ 37, 1 }, .{ 39, 1 }, .{ 41, 1 },
|
||||
.{ 43, 2 }, .{ 47, 2 }, .{ 51, 3 }, .{ 59, 3 }, .{ 67, 4 }, .{ 83, 4 },
|
||||
.{ 99, 5 }, .{ 131, 7 }, .{ 259, 8 }, .{ 515, 9 }, .{ 1027, 10 }, .{ 2051, 11 },
|
||||
.{ 4099, 12 }, .{ 8195, 13 }, .{ 16387, 14 }, .{ 32771, 15 }, .{ 65539, 16 },
|
||||
};
|
||||
|
||||
pub const literals_length_default_distribution = [36]i16{
|
||||
4, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 2, 1, 1, 1, 1, 1,
|
||||
-1, -1, -1, -1,
|
||||
};
|
||||
|
||||
pub const match_lengths_default_distribution = [53]i16{
|
||||
1, 4, 3, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -1, -1,
|
||||
-1, -1, -1, -1, -1,
|
||||
};
|
||||
|
||||
pub const offset_codes_default_distribution = [29]i16{
|
||||
1, 1, 1, 1, 1, 1, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1,
|
||||
};
|
||||
|
||||
pub const predefined_literal_fse_table = Table{
|
||||
.fse = &[64]Table.Fse{
|
||||
.{ .symbol = 0, .bits = 4, .baseline = 0 },
|
||||
.{ .symbol = 0, .bits = 4, .baseline = 16 },
|
||||
.{ .symbol = 1, .bits = 5, .baseline = 32 },
|
||||
.{ .symbol = 3, .bits = 5, .baseline = 0 },
|
||||
.{ .symbol = 4, .bits = 5, .baseline = 0 },
|
||||
.{ .symbol = 6, .bits = 5, .baseline = 0 },
|
||||
.{ .symbol = 7, .bits = 5, .baseline = 0 },
|
||||
.{ .symbol = 9, .bits = 5, .baseline = 0 },
|
||||
.{ .symbol = 10, .bits = 5, .baseline = 0 },
|
||||
.{ .symbol = 12, .bits = 5, .baseline = 0 },
|
||||
.{ .symbol = 14, .bits = 6, .baseline = 0 },
|
||||
.{ .symbol = 16, .bits = 5, .baseline = 0 },
|
||||
.{ .symbol = 18, .bits = 5, .baseline = 0 },
|
||||
.{ .symbol = 19, .bits = 5, .baseline = 0 },
|
||||
.{ .symbol = 21, .bits = 5, .baseline = 0 },
|
||||
.{ .symbol = 22, .bits = 5, .baseline = 0 },
|
||||
.{ .symbol = 24, .bits = 5, .baseline = 0 },
|
||||
.{ .symbol = 25, .bits = 5, .baseline = 32 },
|
||||
.{ .symbol = 26, .bits = 5, .baseline = 0 },
|
||||
.{ .symbol = 27, .bits = 6, .baseline = 0 },
|
||||
.{ .symbol = 29, .bits = 6, .baseline = 0 },
|
||||
.{ .symbol = 31, .bits = 6, .baseline = 0 },
|
||||
.{ .symbol = 0, .bits = 4, .baseline = 32 },
|
||||
.{ .symbol = 1, .bits = 4, .baseline = 0 },
|
||||
.{ .symbol = 2, .bits = 5, .baseline = 0 },
|
||||
.{ .symbol = 4, .bits = 5, .baseline = 32 },
|
||||
.{ .symbol = 5, .bits = 5, .baseline = 0 },
|
||||
.{ .symbol = 7, .bits = 5, .baseline = 32 },
|
||||
.{ .symbol = 8, .bits = 5, .baseline = 0 },
|
||||
.{ .symbol = 10, .bits = 5, .baseline = 32 },
|
||||
.{ .symbol = 11, .bits = 5, .baseline = 0 },
|
||||
.{ .symbol = 13, .bits = 6, .baseline = 0 },
|
||||
.{ .symbol = 16, .bits = 5, .baseline = 32 },
|
||||
.{ .symbol = 17, .bits = 5, .baseline = 0 },
|
||||
.{ .symbol = 19, .bits = 5, .baseline = 32 },
|
||||
.{ .symbol = 20, .bits = 5, .baseline = 0 },
|
||||
.{ .symbol = 22, .bits = 5, .baseline = 32 },
|
||||
.{ .symbol = 23, .bits = 5, .baseline = 0 },
|
||||
.{ .symbol = 25, .bits = 4, .baseline = 0 },
|
||||
.{ .symbol = 25, .bits = 4, .baseline = 16 },
|
||||
.{ .symbol = 26, .bits = 5, .baseline = 32 },
|
||||
.{ .symbol = 28, .bits = 6, .baseline = 0 },
|
||||
.{ .symbol = 30, .bits = 6, .baseline = 0 },
|
||||
.{ .symbol = 0, .bits = 4, .baseline = 48 },
|
||||
.{ .symbol = 1, .bits = 4, .baseline = 16 },
|
||||
.{ .symbol = 2, .bits = 5, .baseline = 32 },
|
||||
.{ .symbol = 3, .bits = 5, .baseline = 32 },
|
||||
.{ .symbol = 5, .bits = 5, .baseline = 32 },
|
||||
.{ .symbol = 6, .bits = 5, .baseline = 32 },
|
||||
.{ .symbol = 8, .bits = 5, .baseline = 32 },
|
||||
.{ .symbol = 9, .bits = 5, .baseline = 32 },
|
||||
.{ .symbol = 11, .bits = 5, .baseline = 32 },
|
||||
.{ .symbol = 12, .bits = 5, .baseline = 32 },
|
||||
.{ .symbol = 15, .bits = 6, .baseline = 0 },
|
||||
.{ .symbol = 17, .bits = 5, .baseline = 32 },
|
||||
.{ .symbol = 18, .bits = 5, .baseline = 32 },
|
||||
.{ .symbol = 20, .bits = 5, .baseline = 32 },
|
||||
.{ .symbol = 21, .bits = 5, .baseline = 32 },
|
||||
.{ .symbol = 23, .bits = 5, .baseline = 32 },
|
||||
.{ .symbol = 24, .bits = 5, .baseline = 32 },
|
||||
.{ .symbol = 35, .bits = 6, .baseline = 0 },
|
||||
.{ .symbol = 34, .bits = 6, .baseline = 0 },
|
||||
.{ .symbol = 33, .bits = 6, .baseline = 0 },
|
||||
.{ .symbol = 32, .bits = 6, .baseline = 0 },
|
||||
},
|
||||
};
|
||||
|
||||
pub const predefined_match_fse_table = Table{
|
||||
.fse = &[64]Table.Fse{
|
||||
.{ .symbol = 0, .bits = 6, .baseline = 0 },
|
||||
.{ .symbol = 1, .bits = 4, .baseline = 0 },
|
||||
.{ .symbol = 2, .bits = 5, .baseline = 32 },
|
||||
.{ .symbol = 3, .bits = 5, .baseline = 0 },
|
||||
.{ .symbol = 5, .bits = 5, .baseline = 0 },
|
||||
.{ .symbol = 6, .bits = 5, .baseline = 0 },
|
||||
.{ .symbol = 8, .bits = 5, .baseline = 0 },
|
||||
.{ .symbol = 10, .bits = 6, .baseline = 0 },
|
||||
.{ .symbol = 13, .bits = 6, .baseline = 0 },
|
||||
.{ .symbol = 16, .bits = 6, .baseline = 0 },
|
||||
.{ .symbol = 19, .bits = 6, .baseline = 0 },
|
||||
.{ .symbol = 22, .bits = 6, .baseline = 0 },
|
||||
.{ .symbol = 25, .bits = 6, .baseline = 0 },
|
||||
.{ .symbol = 28, .bits = 6, .baseline = 0 },
|
||||
.{ .symbol = 31, .bits = 6, .baseline = 0 },
|
||||
.{ .symbol = 33, .bits = 6, .baseline = 0 },
|
||||
.{ .symbol = 35, .bits = 6, .baseline = 0 },
|
||||
.{ .symbol = 37, .bits = 6, .baseline = 0 },
|
||||
.{ .symbol = 39, .bits = 6, .baseline = 0 },
|
||||
.{ .symbol = 41, .bits = 6, .baseline = 0 },
|
||||
.{ .symbol = 43, .bits = 6, .baseline = 0 },
|
||||
.{ .symbol = 45, .bits = 6, .baseline = 0 },
|
||||
.{ .symbol = 1, .bits = 4, .baseline = 16 },
|
||||
.{ .symbol = 2, .bits = 4, .baseline = 0 },
|
||||
.{ .symbol = 3, .bits = 5, .baseline = 32 },
|
||||
.{ .symbol = 4, .bits = 5, .baseline = 0 },
|
||||
.{ .symbol = 6, .bits = 5, .baseline = 32 },
|
||||
.{ .symbol = 7, .bits = 5, .baseline = 0 },
|
||||
.{ .symbol = 9, .bits = 6, .baseline = 0 },
|
||||
.{ .symbol = 12, .bits = 6, .baseline = 0 },
|
||||
.{ .symbol = 15, .bits = 6, .baseline = 0 },
|
||||
.{ .symbol = 18, .bits = 6, .baseline = 0 },
|
||||
.{ .symbol = 21, .bits = 6, .baseline = 0 },
|
||||
.{ .symbol = 24, .bits = 6, .baseline = 0 },
|
||||
.{ .symbol = 27, .bits = 6, .baseline = 0 },
|
||||
.{ .symbol = 30, .bits = 6, .baseline = 0 },
|
||||
.{ .symbol = 32, .bits = 6, .baseline = 0 },
|
||||
.{ .symbol = 34, .bits = 6, .baseline = 0 },
|
||||
.{ .symbol = 36, .bits = 6, .baseline = 0 },
|
||||
.{ .symbol = 38, .bits = 6, .baseline = 0 },
|
||||
.{ .symbol = 40, .bits = 6, .baseline = 0 },
|
||||
.{ .symbol = 42, .bits = 6, .baseline = 0 },
|
||||
.{ .symbol = 44, .bits = 6, .baseline = 0 },
|
||||
.{ .symbol = 1, .bits = 4, .baseline = 32 },
|
||||
.{ .symbol = 1, .bits = 4, .baseline = 48 },
|
||||
.{ .symbol = 2, .bits = 4, .baseline = 16 },
|
||||
.{ .symbol = 4, .bits = 5, .baseline = 32 },
|
||||
.{ .symbol = 5, .bits = 5, .baseline = 32 },
|
||||
.{ .symbol = 7, .bits = 5, .baseline = 32 },
|
||||
.{ .symbol = 8, .bits = 5, .baseline = 32 },
|
||||
.{ .symbol = 11, .bits = 6, .baseline = 0 },
|
||||
.{ .symbol = 14, .bits = 6, .baseline = 0 },
|
||||
.{ .symbol = 17, .bits = 6, .baseline = 0 },
|
||||
.{ .symbol = 20, .bits = 6, .baseline = 0 },
|
||||
.{ .symbol = 23, .bits = 6, .baseline = 0 },
|
||||
.{ .symbol = 26, .bits = 6, .baseline = 0 },
|
||||
.{ .symbol = 29, .bits = 6, .baseline = 0 },
|
||||
.{ .symbol = 52, .bits = 6, .baseline = 0 },
|
||||
.{ .symbol = 51, .bits = 6, .baseline = 0 },
|
||||
.{ .symbol = 50, .bits = 6, .baseline = 0 },
|
||||
.{ .symbol = 49, .bits = 6, .baseline = 0 },
|
||||
.{ .symbol = 48, .bits = 6, .baseline = 0 },
|
||||
.{ .symbol = 47, .bits = 6, .baseline = 0 },
|
||||
.{ .symbol = 46, .bits = 6, .baseline = 0 },
|
||||
},
|
||||
};
|
||||
|
||||
pub const predefined_offset_fse_table = Table{
|
||||
.fse = &[32]Table.Fse{
|
||||
.{ .symbol = 0, .bits = 5, .baseline = 0 },
|
||||
.{ .symbol = 6, .bits = 4, .baseline = 0 },
|
||||
.{ .symbol = 9, .bits = 5, .baseline = 0 },
|
||||
.{ .symbol = 15, .bits = 5, .baseline = 0 },
|
||||
.{ .symbol = 21, .bits = 5, .baseline = 0 },
|
||||
.{ .symbol = 3, .bits = 5, .baseline = 0 },
|
||||
.{ .symbol = 7, .bits = 4, .baseline = 0 },
|
||||
.{ .symbol = 12, .bits = 5, .baseline = 0 },
|
||||
.{ .symbol = 18, .bits = 5, .baseline = 0 },
|
||||
.{ .symbol = 23, .bits = 5, .baseline = 0 },
|
||||
.{ .symbol = 5, .bits = 5, .baseline = 0 },
|
||||
.{ .symbol = 8, .bits = 4, .baseline = 0 },
|
||||
.{ .symbol = 14, .bits = 5, .baseline = 0 },
|
||||
.{ .symbol = 20, .bits = 5, .baseline = 0 },
|
||||
.{ .symbol = 2, .bits = 5, .baseline = 0 },
|
||||
.{ .symbol = 7, .bits = 4, .baseline = 16 },
|
||||
.{ .symbol = 11, .bits = 5, .baseline = 0 },
|
||||
.{ .symbol = 17, .bits = 5, .baseline = 0 },
|
||||
.{ .symbol = 22, .bits = 5, .baseline = 0 },
|
||||
.{ .symbol = 4, .bits = 5, .baseline = 0 },
|
||||
.{ .symbol = 8, .bits = 4, .baseline = 16 },
|
||||
.{ .symbol = 13, .bits = 5, .baseline = 0 },
|
||||
.{ .symbol = 19, .bits = 5, .baseline = 0 },
|
||||
.{ .symbol = 1, .bits = 5, .baseline = 0 },
|
||||
.{ .symbol = 6, .bits = 4, .baseline = 16 },
|
||||
.{ .symbol = 10, .bits = 5, .baseline = 0 },
|
||||
.{ .symbol = 16, .bits = 5, .baseline = 0 },
|
||||
.{ .symbol = 28, .bits = 5, .baseline = 0 },
|
||||
.{ .symbol = 27, .bits = 5, .baseline = 0 },
|
||||
.{ .symbol = 26, .bits = 5, .baseline = 0 },
|
||||
.{ .symbol = 25, .bits = 5, .baseline = 0 },
|
||||
.{ .symbol = 24, .bits = 5, .baseline = 0 },
|
||||
},
|
||||
};
|
||||
pub const start_repeated_offset_1 = 1;
|
||||
pub const start_repeated_offset_2 = 4;
|
||||
pub const start_repeated_offset_3 = 8;
|
||||
|
||||
pub const table_accuracy_log_max = struct {
|
||||
pub const literal = 9;
|
||||
pub const match = 9;
|
||||
pub const offset = 8;
|
||||
};
|
||||
|
||||
pub const table_symbol_count_max = struct {
|
||||
pub const literal = 36;
|
||||
pub const match = 53;
|
||||
pub const offset = 32;
|
||||
};
|
||||
|
||||
pub const default_accuracy_log = struct {
|
||||
pub const literal = 6;
|
||||
pub const match = 6;
|
||||
pub const offset = 5;
|
||||
};
|
||||
pub const table_size_max = struct {
|
||||
pub const literal = 1 << table_accuracy_log_max.literal;
|
||||
pub const match = 1 << table_accuracy_log_max.match;
|
||||
pub const offset = 1 << table_accuracy_log_max.offset;
|
||||
};
|
||||
};
|
||||
|
||||
test {
|
||||
const testing = @import("std").testing;
|
||||
testing.refAllDeclsRecursive(@This());
|
||||
}
|
||||
152
lib/std/compress/zstd.zig
Normal file
152
lib/std/compress/zstd.zig
Normal file
|
|
@ -0,0 +1,152 @@
|
|||
const std = @import("../std.zig");
|
||||
const assert = std.debug.assert;
|
||||
|
||||
pub const Decompress = @import("zstd/Decompress.zig");
|
||||
|
||||
/// Recommended amount by the standard. Lower than this may result in inability
|
||||
/// to decompress common streams.
|
||||
pub const default_window_len = 8 * 1024 * 1024;
|
||||
pub const block_size_max = 1 << 17;
|
||||
|
||||
pub const literals_length_default_distribution = [36]i16{
|
||||
4, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 2, 1, 1, 1, 1, 1,
|
||||
-1, -1, -1, -1,
|
||||
};
|
||||
|
||||
pub const match_lengths_default_distribution = [53]i16{
|
||||
1, 4, 3, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -1, -1,
|
||||
-1, -1, -1, -1, -1,
|
||||
};
|
||||
|
||||
pub const offset_codes_default_distribution = [29]i16{
|
||||
1, 1, 1, 1, 1, 1, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1,
|
||||
};
|
||||
|
||||
pub const start_repeated_offset_1 = 1;
|
||||
pub const start_repeated_offset_2 = 4;
|
||||
pub const start_repeated_offset_3 = 8;
|
||||
|
||||
pub const literals_length_code_table = [36]struct { u32, u5 }{
|
||||
.{ 0, 0 }, .{ 1, 0 }, .{ 2, 0 }, .{ 3, 0 },
|
||||
.{ 4, 0 }, .{ 5, 0 }, .{ 6, 0 }, .{ 7, 0 },
|
||||
.{ 8, 0 }, .{ 9, 0 }, .{ 10, 0 }, .{ 11, 0 },
|
||||
.{ 12, 0 }, .{ 13, 0 }, .{ 14, 0 }, .{ 15, 0 },
|
||||
.{ 16, 1 }, .{ 18, 1 }, .{ 20, 1 }, .{ 22, 1 },
|
||||
.{ 24, 2 }, .{ 28, 2 }, .{ 32, 3 }, .{ 40, 3 },
|
||||
.{ 48, 4 }, .{ 64, 6 }, .{ 128, 7 }, .{ 256, 8 },
|
||||
.{ 512, 9 }, .{ 1024, 10 }, .{ 2048, 11 }, .{ 4096, 12 },
|
||||
.{ 8192, 13 }, .{ 16384, 14 }, .{ 32768, 15 }, .{ 65536, 16 },
|
||||
};
|
||||
|
||||
pub const match_length_code_table = [53]struct { u32, u5 }{
|
||||
.{ 3, 0 }, .{ 4, 0 }, .{ 5, 0 }, .{ 6, 0 }, .{ 7, 0 }, .{ 8, 0 },
|
||||
.{ 9, 0 }, .{ 10, 0 }, .{ 11, 0 }, .{ 12, 0 }, .{ 13, 0 }, .{ 14, 0 },
|
||||
.{ 15, 0 }, .{ 16, 0 }, .{ 17, 0 }, .{ 18, 0 }, .{ 19, 0 }, .{ 20, 0 },
|
||||
.{ 21, 0 }, .{ 22, 0 }, .{ 23, 0 }, .{ 24, 0 }, .{ 25, 0 }, .{ 26, 0 },
|
||||
.{ 27, 0 }, .{ 28, 0 }, .{ 29, 0 }, .{ 30, 0 }, .{ 31, 0 }, .{ 32, 0 },
|
||||
.{ 33, 0 }, .{ 34, 0 }, .{ 35, 1 }, .{ 37, 1 }, .{ 39, 1 }, .{ 41, 1 },
|
||||
.{ 43, 2 }, .{ 47, 2 }, .{ 51, 3 }, .{ 59, 3 }, .{ 67, 4 }, .{ 83, 4 },
|
||||
.{ 99, 5 }, .{ 131, 7 }, .{ 259, 8 }, .{ 515, 9 }, .{ 1027, 10 }, .{ 2051, 11 },
|
||||
.{ 4099, 12 }, .{ 8195, 13 }, .{ 16387, 14 }, .{ 32771, 15 }, .{ 65539, 16 },
|
||||
};
|
||||
|
||||
pub const table_accuracy_log_max = struct {
|
||||
pub const literal = 9;
|
||||
pub const match = 9;
|
||||
pub const offset = 8;
|
||||
};
|
||||
|
||||
pub const table_symbol_count_max = struct {
|
||||
pub const literal = 36;
|
||||
pub const match = 53;
|
||||
pub const offset = 32;
|
||||
};
|
||||
|
||||
pub const default_accuracy_log = struct {
|
||||
pub const literal = 6;
|
||||
pub const match = 6;
|
||||
pub const offset = 5;
|
||||
};
|
||||
pub const table_size_max = struct {
|
||||
pub const literal = 1 << table_accuracy_log_max.literal;
|
||||
pub const match = 1 << table_accuracy_log_max.match;
|
||||
pub const offset = 1 << table_accuracy_log_max.offset;
|
||||
};
|
||||
|
||||
fn testDecompress(gpa: std.mem.Allocator, compressed: []const u8) ![]u8 {
|
||||
var out: std.ArrayListUnmanaged(u8) = .empty;
|
||||
defer out.deinit(gpa);
|
||||
try out.ensureUnusedCapacity(gpa, default_window_len);
|
||||
|
||||
var in: std.io.Reader = .fixed(compressed);
|
||||
var zstd_stream: Decompress = .init(&in, &.{}, .{});
|
||||
try zstd_stream.reader.appendRemaining(gpa, null, &out, .unlimited);
|
||||
|
||||
return out.toOwnedSlice(gpa);
|
||||
}
|
||||
|
||||
fn testExpectDecompress(uncompressed: []const u8, compressed: []const u8) !void {
|
||||
const gpa = std.testing.allocator;
|
||||
const result = try testDecompress(gpa, compressed);
|
||||
defer gpa.free(result);
|
||||
try std.testing.expectEqualSlices(u8, uncompressed, result);
|
||||
}
|
||||
|
||||
fn testExpectDecompressError(err: anyerror, compressed: []const u8) !void {
|
||||
const gpa = std.testing.allocator;
|
||||
|
||||
var out: std.ArrayListUnmanaged(u8) = .empty;
|
||||
defer out.deinit(gpa);
|
||||
try out.ensureUnusedCapacity(gpa, default_window_len);
|
||||
|
||||
var in: std.io.Reader = .fixed(compressed);
|
||||
var zstd_stream: Decompress = .init(&in, &.{}, .{});
|
||||
try std.testing.expectError(
|
||||
error.ReadFailed,
|
||||
zstd_stream.reader.appendRemaining(gpa, null, &out, .unlimited),
|
||||
);
|
||||
try std.testing.expectError(err, zstd_stream.err orelse {});
|
||||
}
|
||||
|
||||
test Decompress {
|
||||
const uncompressed = @embedFile("testdata/rfc8478.txt");
|
||||
const compressed3 = @embedFile("testdata/rfc8478.txt.zst.3");
|
||||
const compressed19 = @embedFile("testdata/rfc8478.txt.zst.19");
|
||||
|
||||
try testExpectDecompress(uncompressed, compressed3);
|
||||
try testExpectDecompress(uncompressed, compressed19);
|
||||
}
|
||||
|
||||
test "zero sized raw block" {
|
||||
const input_raw =
|
||||
"\x28\xb5\x2f\xfd" ++ // zstandard frame magic number
|
||||
"\x20\x00" ++ // frame header: only single_segment_flag set, frame_content_size zero
|
||||
"\x01\x00\x00"; // block header with: last_block set, block_type raw, block_size zero
|
||||
try testExpectDecompress("", input_raw);
|
||||
}
|
||||
|
||||
test "zero sized rle block" {
|
||||
const input_rle =
|
||||
"\x28\xb5\x2f\xfd" ++ // zstandard frame magic number
|
||||
"\x20\x00" ++ // frame header: only single_segment_flag set, frame_content_size zero
|
||||
"\x03\x00\x00" ++ // block header with: last_block set, block_type rle, block_size zero
|
||||
"\xaa"; // block_content
|
||||
try testExpectDecompress("", input_rle);
|
||||
}
|
||||
|
||||
test "declared raw literals size too large" {
|
||||
const input_raw =
|
||||
"\x28\xb5\x2f\xfd" ++ // zstandard frame magic number
|
||||
"\x00\x00" ++ // frame header: everything unset, window descriptor zero
|
||||
"\x95\x00\x00" ++ // block header with: last_block set, block_type compressed, block_size 18
|
||||
"\xbc\xf3\xae" ++ // literals section header with: type raw, size_format 3, regenerated_size 716603
|
||||
"\xa5\x9f\xe3"; // some bytes of literal content - the content is shorter than regenerated_size
|
||||
|
||||
// Note that the regenerated_size in the above input is larger than block maximum size, so the
|
||||
// block can't be valid as it is a raw literals block.
|
||||
try testExpectDecompressError(error.MalformedLiteralsSection, input_raw);
|
||||
}
|
||||
1818
lib/std/compress/zstd/Decompress.zig
Normal file
1818
lib/std/compress/zstd/Decompress.zig
Normal file
File diff suppressed because it is too large
Load diff
|
|
@ -2026,7 +2026,19 @@ pub const Writer = struct {
|
|||
/// along with other write failures.
|
||||
pub fn end(w: *Writer) EndError!void {
|
||||
try w.interface.flush();
|
||||
return w.file.setEndPos(w.pos);
|
||||
switch (w.mode) {
|
||||
.positional,
|
||||
.positional_reading,
|
||||
=> w.file.setEndPos(w.pos) catch |err| switch (err) {
|
||||
error.NonResizable => return,
|
||||
else => |e| return e,
|
||||
},
|
||||
|
||||
.streaming,
|
||||
.streaming_reading,
|
||||
.failure,
|
||||
=> {},
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
|
|
|
|||
|
|
@ -129,11 +129,10 @@ pub const Request = struct {
|
|||
pub const Compression = union(enum) {
|
||||
pub const DeflateDecompressor = std.compress.zlib.Decompressor(std.io.AnyReader);
|
||||
pub const GzipDecompressor = std.compress.gzip.Decompressor(std.io.AnyReader);
|
||||
pub const ZstdDecompressor = std.compress.zstd.Decompressor(std.io.AnyReader);
|
||||
|
||||
deflate: DeflateDecompressor,
|
||||
gzip: GzipDecompressor,
|
||||
zstd: ZstdDecompressor,
|
||||
zstd: std.compress.zstd.Decompress,
|
||||
none: void,
|
||||
};
|
||||
|
||||
|
|
|
|||
|
|
@ -1052,6 +1052,7 @@ pub const TruncateError = error{
|
|||
FileBusy,
|
||||
AccessDenied,
|
||||
PermissionDenied,
|
||||
NonResizable,
|
||||
} || UnexpectedError;
|
||||
|
||||
/// Length must be positive when treated as an i64.
|
||||
|
|
@ -1091,7 +1092,7 @@ pub fn ftruncate(fd: fd_t, length: u64) TruncateError!void {
|
|||
.PERM => return error.PermissionDenied,
|
||||
.TXTBSY => return error.FileBusy,
|
||||
.BADF => unreachable, // Handle not open for writing
|
||||
.INVAL => unreachable, // Handle not open for writing, negative length, or non-resizable handle
|
||||
.INVAL => return error.NonResizable,
|
||||
.NOTCAPABLE => return error.AccessDenied,
|
||||
else => |err| return unexpectedErrno(err),
|
||||
}
|
||||
|
|
@ -1107,7 +1108,7 @@ pub fn ftruncate(fd: fd_t, length: u64) TruncateError!void {
|
|||
.PERM => return error.PermissionDenied,
|
||||
.TXTBSY => return error.FileBusy,
|
||||
.BADF => unreachable, // Handle not open for writing
|
||||
.INVAL => unreachable, // Handle not open for writing, negative length, or non-resizable handle
|
||||
.INVAL => return error.NonResizable, // This is returned for /dev/null for example.
|
||||
else => |err| return unexpectedErrno(err),
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1198,7 +1198,8 @@ fn unpackResource(
|
|||
|
||||
switch (file_type) {
|
||||
.tar => {
|
||||
var adapter = resource.reader().adaptToNewApi();
|
||||
var adapter_buffer: [1024]u8 = undefined;
|
||||
var adapter = resource.reader().adaptToNewApi(&adapter_buffer);
|
||||
return unpackTarball(f, tmp_directory.handle, &adapter.new_interface);
|
||||
},
|
||||
.@"tar.gz" => {
|
||||
|
|
@ -1225,16 +1226,14 @@ fn unpackResource(
|
|||
return try unpackTarball(f, tmp_directory.handle, &adapter.new_interface);
|
||||
},
|
||||
.@"tar.zst" => {
|
||||
const window_size = std.compress.zstd.DecompressorOptions.default_window_buffer_len;
|
||||
const window_size = std.compress.zstd.default_window_len;
|
||||
const window_buffer = try f.arena.allocator().create([window_size]u8);
|
||||
const reader = resource.reader();
|
||||
var br = std.io.bufferedReaderSize(std.crypto.tls.max_ciphertext_record_len, reader);
|
||||
var dcp = std.compress.zstd.decompressor(br.reader(), .{
|
||||
.window_buffer = window_buffer,
|
||||
var adapter_buffer: [std.crypto.tls.max_ciphertext_record_len]u8 = undefined;
|
||||
var adapter = resource.reader().adaptToNewApi(&adapter_buffer);
|
||||
var decompress: std.compress.zstd.Decompress = .init(&adapter.new_interface, window_buffer, .{
|
||||
.verify_checksum = false,
|
||||
});
|
||||
var adapter_buffer: [1024]u8 = undefined;
|
||||
var adapter = dcp.reader().adaptToNewApi(&adapter_buffer);
|
||||
return try unpackTarball(f, tmp_directory.handle, &adapter.new_interface);
|
||||
return try unpackTarball(f, tmp_directory.handle, &decompress.reader);
|
||||
},
|
||||
.git_pack => return unpackGitPack(f, tmp_directory.handle, &resource.git) catch |err| switch (err) {
|
||||
error.FetchFailed => return error.FetchFailed,
|
||||
|
|
|
|||
|
|
@ -1281,7 +1281,7 @@ pub fn indexPack(allocator: Allocator, format: Oid.Format, pack: std.fs.File, in
|
|||
}
|
||||
@memset(fan_out_table[fan_out_index..], count);
|
||||
|
||||
var index_hashed_writer = std.compress.hashedWriter(index_writer, Oid.Hasher.init(format));
|
||||
var index_hashed_writer = hashedWriter(index_writer, Oid.Hasher.init(format));
|
||||
const writer = index_hashed_writer.writer();
|
||||
try writer.writeAll(IndexHeader.signature);
|
||||
try writer.writeInt(u32, IndexHeader.supported_version, .big);
|
||||
|
|
@ -1331,7 +1331,7 @@ fn indexPackFirstPass(
|
|||
) !Oid {
|
||||
var pack_buffered_reader = std.io.bufferedReader(pack.deprecatedReader());
|
||||
var pack_counting_reader = std.io.countingReader(pack_buffered_reader.reader());
|
||||
var pack_hashed_reader = std.compress.hashedReader(pack_counting_reader.reader(), Oid.Hasher.init(format));
|
||||
var pack_hashed_reader = hashedReader(pack_counting_reader.reader(), Oid.Hasher.init(format));
|
||||
const pack_reader = pack_hashed_reader.reader();
|
||||
|
||||
const pack_header = try PackHeader.read(pack_reader);
|
||||
|
|
@ -1339,13 +1339,13 @@ fn indexPackFirstPass(
|
|||
var current_entry: u32 = 0;
|
||||
while (current_entry < pack_header.total_objects) : (current_entry += 1) {
|
||||
const entry_offset = pack_counting_reader.bytes_read;
|
||||
var entry_crc32_reader = std.compress.hashedReader(pack_reader, std.hash.Crc32.init());
|
||||
var entry_crc32_reader = hashedReader(pack_reader, std.hash.Crc32.init());
|
||||
const entry_header = try EntryHeader.read(format, entry_crc32_reader.reader());
|
||||
switch (entry_header) {
|
||||
.commit, .tree, .blob, .tag => |object| {
|
||||
var entry_decompress_stream = std.compress.zlib.decompressor(entry_crc32_reader.reader());
|
||||
var entry_counting_reader = std.io.countingReader(entry_decompress_stream.reader());
|
||||
var entry_hashed_writer = std.compress.hashedWriter(std.io.null_writer, Oid.Hasher.init(format));
|
||||
var entry_hashed_writer = hashedWriter(std.io.null_writer, Oid.Hasher.init(format));
|
||||
const entry_writer = entry_hashed_writer.writer();
|
||||
// The object header is not included in the pack data but is
|
||||
// part of the object's ID
|
||||
|
|
@ -1432,7 +1432,7 @@ fn indexPackHashDelta(
|
|||
const base_data = try resolveDeltaChain(allocator, format, pack, base_object, delta_offsets.items, cache);
|
||||
|
||||
var entry_hasher: Oid.Hasher = .init(format);
|
||||
var entry_hashed_writer = std.compress.hashedWriter(std.io.null_writer, &entry_hasher);
|
||||
var entry_hashed_writer = hashedWriter(std.io.null_writer, &entry_hasher);
|
||||
try entry_hashed_writer.writer().print("{s} {}\x00", .{ @tagName(base_object.type), base_data.len });
|
||||
entry_hasher.update(base_data);
|
||||
return entry_hasher.finalResult();
|
||||
|
|
@ -1703,3 +1703,58 @@ pub fn main() !void {
|
|||
std.debug.print("Diagnostic: {}\n", .{err});
|
||||
}
|
||||
}
|
||||
|
||||
/// Deprecated
|
||||
fn hashedReader(reader: anytype, hasher: anytype) HashedReader(@TypeOf(reader), @TypeOf(hasher)) {
|
||||
return .{ .child_reader = reader, .hasher = hasher };
|
||||
}
|
||||
|
||||
/// Deprecated
|
||||
fn HashedReader(ReaderType: type, HasherType: type) type {
|
||||
return struct {
|
||||
child_reader: ReaderType,
|
||||
hasher: HasherType,
|
||||
|
||||
pub const Error = ReaderType.Error;
|
||||
pub const Reader = std.io.GenericReader(*@This(), Error, read);
|
||||
|
||||
pub fn read(self: *@This(), buf: []u8) Error!usize {
|
||||
const amt = try self.child_reader.read(buf);
|
||||
self.hasher.update(buf[0..amt]);
|
||||
return amt;
|
||||
}
|
||||
|
||||
pub fn reader(self: *@This()) Reader {
|
||||
return .{ .context = self };
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
/// Deprecated
|
||||
pub fn HashedWriter(WriterType: type, HasherType: type) type {
|
||||
return struct {
|
||||
child_writer: WriterType,
|
||||
hasher: HasherType,
|
||||
|
||||
pub const Error = WriterType.Error;
|
||||
pub const Writer = std.io.GenericWriter(*@This(), Error, write);
|
||||
|
||||
pub fn write(self: *@This(), buf: []const u8) Error!usize {
|
||||
const amt = try self.child_writer.write(buf);
|
||||
self.hasher.update(buf[0..amt]);
|
||||
return amt;
|
||||
}
|
||||
|
||||
pub fn writer(self: *@This()) Writer {
|
||||
return .{ .context = self };
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
/// Deprecated
|
||||
pub fn hashedWriter(
|
||||
writer: anytype,
|
||||
hasher: anytype,
|
||||
) HashedWriter(@TypeOf(writer), @TypeOf(hasher)) {
|
||||
return .{ .child_writer = writer, .hasher = hasher };
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue