std.compress.flate.Decompress: respect stream limit

This commit is contained in:
Andrew Kelley 2025-07-31 19:24:23 -07:00
parent 6caa100f0d
commit 64814dc986
2 changed files with 72 additions and 24 deletions

View file

@ -2286,6 +2286,13 @@ pub fn fixedDrain(w: *Writer, data: []const []const u8, splat: usize) Error!usiz
} }
} }
pub fn unreachableDrain(w: *Writer, data: []const []const u8, splat: usize) Error!usize {
_ = w;
_ = data;
_ = splat;
unreachable;
}
/// Provides a `Writer` implementation based on calling `Hasher.update`, sending /// Provides a `Writer` implementation based on calling `Hasher.update`, sending
/// all data also to an underlying `Writer`. /// all data also to an underlying `Writer`.
/// ///

View file

@ -37,6 +37,8 @@ const State = union(enum) {
stored_block: u16, stored_block: u16,
fixed_block, fixed_block,
dynamic_block, dynamic_block,
dynamic_block_literal: u8,
dynamic_block_match: u16,
protocol_footer, protocol_footer,
end, end,
}; };
@ -63,7 +65,7 @@ const direct_vtable: Reader.VTable = .{
const indirect_vtable: Reader.VTable = .{ const indirect_vtable: Reader.VTable = .{
.stream = streamIndirect, .stream = streamIndirect,
.rebase = rebaseFallible, .rebase = rebaseFallible,
.discard = discard, .discard = discardIndirect,
.readVec = readVec, .readVec = readVec,
}; };
@ -128,6 +130,26 @@ fn discard(r: *Reader, limit: std.Io.Limit) Reader.Error!usize {
return n; return n;
} }
fn discardIndirect(r: *Reader, limit: std.Io.Limit) Reader.Error!usize {
const d: *Decompress = @alignCast(@fieldParentPtr("reader", r));
if (r.end + flate.history_len > r.buffer.len) rebase(r, flate.history_len);
var writer: Writer = .{
.buffer = r.buffer,
.end = r.end,
.vtable = &.{ .drain = Writer.unreachableDrain },
};
{
defer r.end = writer.end;
_ = streamFallible(d, &writer, .limited(writer.buffer.len - writer.end)) catch |err| switch (err) {
error.WriteFailed => unreachable,
else => |e| return e,
};
}
const n = limit.minInt(r.end - r.seek);
r.seek += n;
return n;
}
fn readVec(r: *Reader, data: [][]u8) Reader.Error!usize { fn readVec(r: *Reader, data: [][]u8) Reader.Error!usize {
_ = data; _ = data;
const d: *Decompress = @alignCast(@fieldParentPtr("reader", r)); const d: *Decompress = @alignCast(@fieldParentPtr("reader", r));
@ -140,7 +162,7 @@ fn streamIndirectInner(d: *Decompress) Reader.Error!usize {
var writer: Writer = .{ var writer: Writer = .{
.buffer = r.buffer, .buffer = r.buffer,
.end = r.end, .end = r.end,
.vtable = &.{ .drain = Writer.fixedDrain }, .vtable = &.{ .drain = Writer.unreachableDrain },
}; };
defer r.end = writer.end; defer r.end = writer.end;
_ = streamFallible(d, &writer, .limited(writer.buffer.len - writer.end)) catch |err| switch (err) { _ = streamFallible(d, &writer, .limited(writer.buffer.len - writer.end)) catch |err| switch (err) {
@ -379,30 +401,49 @@ fn streamInner(d: *Decompress, w: *Writer, limit: std.Io.Limit) (Error || Reader
.dynamic_block => { .dynamic_block => {
// In larger archives most blocks are usually dynamic, so // In larger archives most blocks are usually dynamic, so
// decompression performance depends on this logic. // decompression performance depends on this logic.
while (remaining > 0) { var sym = try d.decodeSymbol(&d.lit_dec);
const sym = try d.decodeSymbol(&d.lit_dec); sym: switch (sym.kind) {
.literal => {
switch (sym.kind) { if (remaining != 0) {
.literal => { @branchHint(.likely);
try w.writeBytePreserve(flate.history_len, sym.symbol);
remaining -= 1; remaining -= 1;
}, try w.writeBytePreserve(flate.history_len, sym.symbol);
.match => { sym = try d.decodeSymbol(&d.lit_dec);
// Decode match backreference <length, distance> continue :sym sym.kind;
const length = try d.decodeLength(sym.symbol); } else {
const dsm = try d.decodeSymbol(&d.dst_dec); d.state = .{ .dynamic_block_literal = sym.symbol };
const distance = try d.decodeDistance(dsm.symbol);
try writeMatch(w, length, distance);
remaining -= length;
},
.end_of_block => {
d.state = if (d.final_block) .protocol_footer else .block_header;
return @intFromEnum(limit) - remaining; return @intFromEnum(limit) - remaining;
}, }
} },
.match => {
// Decode match backreference <length, distance>
const length = try d.decodeLength(sym.symbol);
continue :sw .{ .dynamic_block_match = length };
},
.end_of_block => {
d.state = if (d.final_block) .protocol_footer else .block_header;
continue :sw d.state;
},
}
},
.dynamic_block_literal => |symbol| {
assert(remaining != 0);
remaining -= 1;
try w.writeBytePreserve(flate.history_len, symbol);
continue :sw .dynamic_block;
},
.dynamic_block_match => |length| {
if (remaining >= length) {
@branchHint(.likely);
remaining -= length;
const dsm = try d.decodeSymbol(&d.dst_dec);
const distance = try d.decodeDistance(dsm.symbol);
try writeMatch(w, length, distance);
continue :sw .dynamic_block;
} else {
d.state = .{ .dynamic_block_match = length };
return @intFromEnum(limit) - remaining;
} }
d.state = .dynamic_block;
return @intFromEnum(limit) - remaining;
}, },
.protocol_footer => { .protocol_footer => {
switch (d.container_metadata) { switch (d.container_metadata) {
@ -424,7 +465,7 @@ fn streamInner(d: *Decompress, w: *Writer, limit: std.Io.Limit) (Error || Reader
}, },
} }
d.state = .end; d.state = .end;
return 0; return @intFromEnum(limit) - remaining;
}, },
.end => return error.EndOfStream, .end => return error.EndOfStream,
} }