std.compress.zstd: keep frame state between blocks

This commit is contained in:
Andrew Kelley 2025-07-25 13:15:23 -07:00
parent 13e08ea6f0
commit 7f43f78b5b

View file

@ -23,6 +23,7 @@ const State = union(enum) {
frame: Frame, frame: Frame,
checksum: ?u32, checksum: ?u32,
decompressed_size: usize, decompressed_size: usize,
decode: Frame.Zstandard.Decode,
}; };
}; };
@ -138,6 +139,7 @@ fn initFrame(d: *Decompress, window_size_max: usize, magic: Frame.Magic) !void {
.frame = try Frame.init(header, window_size_max, d.verify_checksum), .frame = try Frame.init(header, window_size_max, d.verify_checksum),
.checksum = null, .checksum = null,
.decompressed_size = 0, .decompressed_size = 0,
.decode = .init,
} }; } };
}, },
.skippable => { .skippable => {
@ -168,16 +170,13 @@ fn readInFrame(d: *Decompress, w: *Writer, limit: Limit, state: *State.InFrame)
bytes_written = block_size; bytes_written = block_size;
}, },
.compressed => { .compressed => {
var literal_fse_buffer: [zstd.table_size_max.literal]Table.Fse = undefined;
var match_fse_buffer: [zstd.table_size_max.match]Table.Fse = undefined;
var offset_fse_buffer: [zstd.table_size_max.offset]Table.Fse = undefined;
var literals_buffer: [zstd.block_size_max]u8 = undefined; var literals_buffer: [zstd.block_size_max]u8 = undefined;
var sequence_buffer: [zstd.block_size_max]u8 = undefined; var sequence_buffer: [zstd.block_size_max]u8 = undefined;
var decode: Frame.Zstandard.Decode = .init(&literal_fse_buffer, &match_fse_buffer, &offset_fse_buffer, window_len);
var remaining: Limit = .limited(block_size); var remaining: Limit = .limited(block_size);
const literals = try LiteralsSection.decode(in, &remaining, &literals_buffer); const literals = try LiteralsSection.decode(in, &remaining, &literals_buffer);
const sequences_header = try SequencesSection.Header.decode(in, &remaining); const sequences_header = try SequencesSection.Header.decode(in, &remaining);
const decode = &state.decode;
try decode.prepare(in, &remaining, literals, sequences_header); try decode.prepare(in, &remaining, literals, sequences_header);
{ {
@ -370,16 +369,15 @@ pub const Frame = struct {
}; };
pub const Decode = struct { pub const Decode = struct {
window_len: u32,
repeat_offsets: [3]u32, repeat_offsets: [3]u32,
offset: StateData(8), offset: StateData(8),
match: StateData(9), match: StateData(9),
literal: StateData(9), literal: StateData(9),
offset_fse_buffer: []Table.Fse, literal_fse_buffer: [zstd.table_size_max.literal]Table.Fse,
match_fse_buffer: []Table.Fse, match_fse_buffer: [zstd.table_size_max.match]Table.Fse,
literal_fse_buffer: []Table.Fse, offset_fse_buffer: [zstd.table_size_max.offset]Table.Fse,
fse_tables_undefined: bool, fse_tables_undefined: bool,
@ -401,38 +399,30 @@ pub const Frame = struct {
}; };
} }
pub fn init( const init: Decode = .{
literal_fse_buffer: []Table.Fse, .repeat_offsets = .{
match_fse_buffer: []Table.Fse, zstd.start_repeated_offset_1,
offset_fse_buffer: []Table.Fse, zstd.start_repeated_offset_2,
window_len: u32, zstd.start_repeated_offset_3,
) Decode { },
return .{
.window_len = window_len,
.repeat_offsets = .{
zstd.start_repeated_offset_1,
zstd.start_repeated_offset_2,
zstd.start_repeated_offset_3,
},
.offset = undefined, .offset = undefined,
.match = undefined, .match = undefined,
.literal = undefined, .literal = undefined,
.literal_fse_buffer = literal_fse_buffer, .literal_fse_buffer = undefined,
.match_fse_buffer = match_fse_buffer, .match_fse_buffer = undefined,
.offset_fse_buffer = offset_fse_buffer, .offset_fse_buffer = undefined,
.fse_tables_undefined = true, .fse_tables_undefined = true,
.literal_written_count = 0, .literal_written_count = 0,
.literal_header = undefined, .literal_header = undefined,
.literal_streams = undefined, .literal_streams = undefined,
.literal_stream_reader = undefined, .literal_stream_reader = undefined,
.literal_stream_index = undefined, .literal_stream_index = undefined,
.huffman_tree = null, .huffman_tree = null,
}; };
}
pub const PrepareError = error{ pub const PrepareError = error{
/// the (reversed) literal bitstream's first byte does not have any bits set /// the (reversed) literal bitstream's first byte does not have any bits set
@ -514,12 +504,12 @@ pub const Frame = struct {
return self.repeat_offsets[0]; return self.repeat_offsets[0];
} }
const DataType = enum { offset, match, literal }; const WhichFse = enum { offset, match, literal };
/// TODO: don't use `@field` /// TODO: don't use `@field`
fn updateState( fn updateState(
self: *Decode, self: *Decode,
comptime choice: DataType, comptime choice: WhichFse,
bit_reader: *ReverseBitReader, bit_reader: *ReverseBitReader,
) error{ MalformedFseBits, EndOfStream }!void { ) error{ MalformedFseBits, EndOfStream }!void {
switch (@field(self, @tagName(choice)).table) { switch (@field(self, @tagName(choice)).table) {
@ -549,7 +539,7 @@ pub const Frame = struct {
self: *Decode, self: *Decode,
in: *Reader, in: *Reader,
remaining: *Limit, remaining: *Limit,
comptime choice: DataType, comptime choice: WhichFse,
mode: SequencesSection.Header.Mode, mode: SequencesSection.Header.Mode,
) !void { ) !void {
const field_name = @tagName(choice); const field_name = @tagName(choice);
@ -576,10 +566,10 @@ pub const Frame = struct {
&bit_reader, &bit_reader,
@field(zstd.table_symbol_count_max, field_name), @field(zstd.table_symbol_count_max, field_name),
@field(zstd.table_accuracy_log_max, field_name), @field(zstd.table_accuracy_log_max, field_name),
@field(self, field_name ++ "_fse_buffer"), &@field(self, field_name ++ "_fse_buffer"),
); );
@field(self, field_name).table = .{ @field(self, field_name).table = .{
.fse = @field(self, field_name ++ "_fse_buffer")[0..table_size], .fse = (&@field(self, field_name ++ "_fse_buffer"))[0..table_size],
}; };
@field(self, field_name).accuracy_log = std.math.log2_int_ceil(usize, table_size); @field(self, field_name).accuracy_log = std.math.log2_int_ceil(usize, table_size);
in.toss(bit_reader.index); in.toss(bit_reader.index);
@ -762,7 +752,7 @@ pub const Frame = struct {
} }
/// TODO: don't use `@field` /// TODO: don't use `@field`
fn getCode(self: *Decode, comptime choice: DataType) u32 { fn getCode(self: *Decode, comptime choice: WhichFse) u32 {
return switch (@field(self, @tagName(choice)).table) { return switch (@field(self, @tagName(choice)).table) {
.rle => |value| value, .rle => |value| value,
.fse => |table| table[@field(self, @tagName(choice)).state].symbol, .fse => |table| table[@field(self, @tagName(choice)).state].symbol,