mirror of
https://codeberg.org/ziglang/zig.git
synced 2025-12-06 05:44:20 +00:00
std.compress.xz: skeleton in place
missing these things: - implementation of finish() - detect packed bytes read for check and block padding - implementation of discard() - implementation of block stream checksum
This commit is contained in:
parent
a8ae6c2f42
commit
d87eb7d4e4
2 changed files with 236 additions and 203 deletions
|
|
@ -26,6 +26,8 @@ pub const Error = error{
|
|||
WrongChecksum,
|
||||
Unsupported,
|
||||
Overflow,
|
||||
InvalidRangeCode,
|
||||
DecompressedSizeMismatch,
|
||||
};
|
||||
|
||||
pub const Check = enum(u4) {
|
||||
|
|
@ -55,14 +57,14 @@ pub fn init(
|
|||
gpa: Allocator,
|
||||
/// Decompress takes ownership of this buffer and resizes it with `gpa`.
|
||||
buffer: []u8,
|
||||
) Decompress {
|
||||
const magic = try input.takeBytes(6);
|
||||
if (!std.mem.eql(u8, &magic, &.{ 0xFD, '7', 'z', 'X', 'Z', 0x00 }))
|
||||
) !Decompress {
|
||||
const magic = try input.takeArray(6);
|
||||
if (!std.mem.eql(u8, magic, &.{ 0xFD, '7', 'z', 'X', 'Z', 0x00 }))
|
||||
return error.NotXzStream;
|
||||
|
||||
const actual_hash = Crc32.hash(try input.peek(@sizeOf(StreamFlags)));
|
||||
const stream_flags = input.takeStruct(StreamFlags, .little) catch unreachable;
|
||||
const stored_hash = try input.readInt(u32, .little);
|
||||
const stored_hash = try input.takeInt(u32, .little);
|
||||
if (actual_hash != stored_hash) return error.WrongChecksum;
|
||||
|
||||
return .{
|
||||
|
|
@ -71,6 +73,7 @@ pub fn init(
|
|||
.vtable = &.{
|
||||
.stream = stream,
|
||||
.readVec = readVec,
|
||||
.discard = discard,
|
||||
},
|
||||
.buffer = buffer,
|
||||
.seek = 0,
|
||||
|
|
@ -83,206 +86,232 @@ pub fn init(
|
|||
};
|
||||
}
|
||||
|
||||
fn stream(r: *Reader, w: *Writer, limit: std.Io.Limit) Reader.StreamError!usize {
|
||||
_ = w;
|
||||
_ = limit;
|
||||
const d: *Decompress = @alignCast(@fieldParentPtr("reader", r));
|
||||
_ = d;
|
||||
@panic("TODO");
|
||||
/// Reclaim ownership of the buffer passed to `init`.
|
||||
pub fn takeBuffer(d: *Decompress) []u8 {
|
||||
const buffer = d.reader.buffer;
|
||||
d.reader.buffer = &.{};
|
||||
return buffer;
|
||||
}
|
||||
|
||||
pub fn deinit(d: *Decompress) void {
|
||||
const gpa = d.gpa;
|
||||
gpa.free(d.reader.buffer);
|
||||
d.* = undefined;
|
||||
}
|
||||
|
||||
fn readVec(r: *Reader, data: [][]u8) Reader.Error!usize {
|
||||
_ = data;
|
||||
return readIndirect(r);
|
||||
}
|
||||
|
||||
fn stream(r: *Reader, w: *Writer, limit: std.Io.Limit) Reader.StreamError!usize {
|
||||
_ = w;
|
||||
_ = limit;
|
||||
return readIndirect(r);
|
||||
}
|
||||
|
||||
fn discard(r: *Reader, limit: std.Io.Limit) Reader.Error!usize {
|
||||
const d: *Decompress = @alignCast(@fieldParentPtr("reader", r));
|
||||
_ = d;
|
||||
_ = limit;
|
||||
@panic("TODO");
|
||||
}
|
||||
|
||||
// if (buffer.len == 0)
|
||||
// return 0;
|
||||
//
|
||||
// const r = try self.block_decode.read(buffer);
|
||||
// if (r != 0)
|
||||
// return r;
|
||||
//
|
||||
// const index_size = blk: {
|
||||
// var hasher = hashedReader(self.in_reader, Crc32.init());
|
||||
// hasher.hasher.update(&[1]u8{0x00});
|
||||
//
|
||||
// var counter = std.io.countingReader(hasher.reader());
|
||||
// counter.bytes_read += 1;
|
||||
//
|
||||
// const counting_reader = counter.reader();
|
||||
//
|
||||
// const record_count = try std.leb.readUleb128(u64, counting_reader);
|
||||
// if (record_count != self.block_decode.block_count)
|
||||
// return error.CorruptInput;
|
||||
//
|
||||
// var i: usize = 0;
|
||||
// while (i < record_count) : (i += 1) {
|
||||
// // TODO: validate records
|
||||
// _ = try std.leb.readUleb128(u64, counting_reader);
|
||||
// _ = try std.leb.readUleb128(u64, counting_reader);
|
||||
// }
|
||||
//
|
||||
// while (counter.bytes_read % 4 != 0) {
|
||||
// if (try counting_reader.readByte() != 0)
|
||||
// return error.CorruptInput;
|
||||
// }
|
||||
//
|
||||
// const hash_a = hasher.hasher.final();
|
||||
// const hash_b = try counting_reader.readInt(u32, .little);
|
||||
// if (hash_a != hash_b)
|
||||
// return error.WrongChecksum;
|
||||
//
|
||||
// break :blk counter.bytes_read;
|
||||
// };
|
||||
//
|
||||
// const hash_a = try self.in_reader.readInt(u32, .little);
|
||||
//
|
||||
// const hash_b = blk: {
|
||||
// var hasher = hashedReader(self.in_reader, Crc32.init());
|
||||
// const hashed_reader = hasher.reader();
|
||||
//
|
||||
// const backward_size = (@as(u64, try hashed_reader.readInt(u32, .little)) + 1) * 4;
|
||||
// if (backward_size != index_size)
|
||||
// return error.CorruptInput;
|
||||
//
|
||||
// var check: Check = undefined;
|
||||
// try readStreamFlags(hashed_reader, &check);
|
||||
//
|
||||
// break :blk hasher.hasher.final();
|
||||
// };
|
||||
//
|
||||
// if (hash_a != hash_b)
|
||||
// return error.WrongChecksum;
|
||||
//
|
||||
// const magic = try self.in_reader.readBytesNoEof(2);
|
||||
// if (!std.mem.eql(u8, &magic, &.{ 'Y', 'Z' }))
|
||||
// return error.CorruptInput;
|
||||
//
|
||||
// return 0;
|
||||
//}
|
||||
fn readIndirect(r: *Reader) Reader.Error!usize {
|
||||
const d: *Decompress = @alignCast(@fieldParentPtr("reader", r));
|
||||
const gpa = d.gpa;
|
||||
const input = d.input;
|
||||
|
||||
//fn readBlock(self: *BlockDecode) Error!void {
|
||||
// var block_counter = std.io.countingReader(self.inner_reader);
|
||||
// const block_reader = block_counter.reader();
|
||||
//
|
||||
// var packed_size: ?u64 = null;
|
||||
// var unpacked_size: ?u64 = null;
|
||||
//
|
||||
// // Block Header
|
||||
// {
|
||||
// var header_hasher = hashedReader(block_reader, Crc32.init());
|
||||
// const header_reader = header_hasher.reader();
|
||||
//
|
||||
// const header_size = @as(u64, try header_reader.readByte()) * 4;
|
||||
// if (header_size == 0)
|
||||
// return error.EndOfStreamWithNoError;
|
||||
//
|
||||
// const Flags = packed struct(u8) {
|
||||
// last_filter_index: u2,
|
||||
// reserved: u4,
|
||||
// has_packed_size: bool,
|
||||
// has_unpacked_size: bool,
|
||||
// };
|
||||
//
|
||||
// const flags = @as(Flags, @bitCast(try header_reader.readByte()));
|
||||
// const filter_count = @as(u3, flags.last_filter_index) + 1;
|
||||
// if (filter_count > 1)
|
||||
// return error.Unsupported;
|
||||
//
|
||||
// if (flags.has_packed_size)
|
||||
// packed_size = try std.leb.readUleb128(u64, header_reader);
|
||||
//
|
||||
// if (flags.has_unpacked_size)
|
||||
// unpacked_size = try std.leb.readUleb128(u64, header_reader);
|
||||
//
|
||||
// const FilterId = enum(u64) {
|
||||
// lzma2 = 0x21,
|
||||
// _,
|
||||
// };
|
||||
//
|
||||
// const filter_id = @as(
|
||||
// FilterId,
|
||||
// @enumFromInt(try std.leb.readUleb128(u64, header_reader)),
|
||||
// );
|
||||
//
|
||||
// if (@intFromEnum(filter_id) >= 0x4000_0000_0000_0000)
|
||||
// return error.CorruptInput;
|
||||
//
|
||||
// if (filter_id != .lzma2)
|
||||
// return error.Unsupported;
|
||||
//
|
||||
// const properties_size = try std.leb.readUleb128(u64, header_reader);
|
||||
// if (properties_size != 1)
|
||||
// return error.CorruptInput;
|
||||
//
|
||||
// // TODO: use filter properties
|
||||
// _ = try header_reader.readByte();
|
||||
//
|
||||
// while (block_counter.bytes_read != header_size) {
|
||||
// if (try header_reader.readByte() != 0)
|
||||
// return error.CorruptInput;
|
||||
// }
|
||||
//
|
||||
// const hash_a = header_hasher.hasher.final();
|
||||
// const hash_b = try header_reader.readInt(u32, .little);
|
||||
// if (hash_a != hash_b)
|
||||
// return error.WrongChecksum;
|
||||
// }
|
||||
//
|
||||
// // Compressed Data
|
||||
// var packed_counter = std.io.countingReader(block_reader);
|
||||
// try lzma2.decompress(
|
||||
// self.allocator,
|
||||
// packed_counter.reader(),
|
||||
// self.to_read.writer(self.allocator),
|
||||
// );
|
||||
//
|
||||
// if (packed_size) |s| {
|
||||
// if (s != packed_counter.bytes_read)
|
||||
// return error.CorruptInput;
|
||||
// }
|
||||
//
|
||||
// const unpacked_bytes = self.to_read.items;
|
||||
// if (unpacked_size) |s| {
|
||||
// if (s != unpacked_bytes.len)
|
||||
// return error.CorruptInput;
|
||||
// }
|
||||
//
|
||||
// // Block Padding
|
||||
// while (block_counter.bytes_read % 4 != 0) {
|
||||
// if (try block_reader.readByte() != 0)
|
||||
// return error.CorruptInput;
|
||||
// }
|
||||
//
|
||||
// switch (self.check) {
|
||||
// .none => {},
|
||||
// .crc32 => {
|
||||
// const hash_a = Crc32.hash(unpacked_bytes);
|
||||
// const hash_b = try self.inner_reader.readInt(u32, .little);
|
||||
// if (hash_a != hash_b)
|
||||
// return error.WrongChecksum;
|
||||
// },
|
||||
// .crc64 => {
|
||||
// const hash_a = Crc64.hash(unpacked_bytes);
|
||||
// const hash_b = try self.inner_reader.readInt(u64, .little);
|
||||
// if (hash_a != hash_b)
|
||||
// return error.WrongChecksum;
|
||||
// },
|
||||
// .sha256 => {
|
||||
// var hash_a: [Sha256.digest_length]u8 = undefined;
|
||||
// Sha256.hash(unpacked_bytes, &hash_a, .{});
|
||||
//
|
||||
// var hash_b: [Sha256.digest_length]u8 = undefined;
|
||||
// try self.inner_reader.readNoEof(&hash_b);
|
||||
//
|
||||
// if (!std.mem.eql(u8, &hash_a, &hash_b))
|
||||
// return error.WrongChecksum;
|
||||
// },
|
||||
// else => return error.Unsupported,
|
||||
// }
|
||||
//
|
||||
// self.block_count += 1;
|
||||
//}
|
||||
var allocating = Writer.Allocating.initOwnedSlice(gpa, r.buffer);
|
||||
allocating.writer.end = r.end;
|
||||
defer {
|
||||
r.buffer = allocating.writer.buffer;
|
||||
r.end = allocating.writer.end;
|
||||
}
|
||||
|
||||
if (d.block_count == std.math.maxInt(usize)) return error.EndOfStream;
|
||||
|
||||
readBlock(input, &allocating) catch |err| switch (err) {
|
||||
error.WriteFailed => {
|
||||
d.err = error.OutOfMemory;
|
||||
return error.ReadFailed;
|
||||
},
|
||||
error.SuccessfulEndOfStream => {
|
||||
finish(d);
|
||||
d.block_count = std.math.maxInt(usize);
|
||||
return error.EndOfStream;
|
||||
},
|
||||
else => |e| {
|
||||
d.err = e;
|
||||
return error.ReadFailed;
|
||||
},
|
||||
};
|
||||
switch (d.check) {
|
||||
.none => {},
|
||||
.crc32 => {
|
||||
const declared_checksum = try input.takeInt(u32, .little);
|
||||
// TODO
|
||||
//const hash_a = Crc32.hash(unpacked_bytes);
|
||||
//if (hash_a != hash_b) return error.WrongChecksum;
|
||||
_ = declared_checksum;
|
||||
},
|
||||
.crc64 => {
|
||||
const declared_checksum = try input.takeInt(u64, .little);
|
||||
// TODO
|
||||
//const hash_a = Crc64.hash(unpacked_bytes);
|
||||
//if (hash_a != hash_b) return error.WrongChecksum;
|
||||
_ = declared_checksum;
|
||||
},
|
||||
.sha256 => {
|
||||
const declared_hash = try input.take(Sha256.digest_length);
|
||||
// TODO
|
||||
//var hash_a: [Sha256.digest_length]u8 = undefined;
|
||||
//Sha256.hash(unpacked_bytes, &hash_a, .{});
|
||||
//if (!std.mem.eql(u8, &hash_a, &hash_b))
|
||||
// return error.WrongChecksum;
|
||||
_ = declared_hash;
|
||||
},
|
||||
else => {
|
||||
d.err = error.Unsupported;
|
||||
return error.ReadFailed;
|
||||
},
|
||||
}
|
||||
d.block_count += 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
fn readBlock(input: *Reader, allocating: *Writer.Allocating) !void {
|
||||
var packed_size: ?u64 = null;
|
||||
var unpacked_size: ?u64 = null;
|
||||
|
||||
{
|
||||
// Read the block header via peeking so that we can hash the whole thing too.
|
||||
const first_byte: usize = try input.peekByte();
|
||||
if (first_byte == 0) return error.SuccessfulEndOfStream;
|
||||
|
||||
const declared_header_size = first_byte * 4;
|
||||
try input.fill(declared_header_size);
|
||||
const header_seek_start = input.seek;
|
||||
input.toss(1);
|
||||
|
||||
const Flags = packed struct(u8) {
|
||||
last_filter_index: u2,
|
||||
reserved: u4,
|
||||
has_packed_size: bool,
|
||||
has_unpacked_size: bool,
|
||||
};
|
||||
const flags = try input.takeStruct(Flags, .little);
|
||||
|
||||
const filter_count = @as(u3, flags.last_filter_index) + 1;
|
||||
if (filter_count > 1) return error.Unsupported;
|
||||
|
||||
if (flags.has_packed_size) packed_size = try input.takeLeb128(u64);
|
||||
if (flags.has_unpacked_size) unpacked_size = try input.takeLeb128(u64);
|
||||
|
||||
const FilterId = enum(u64) {
|
||||
lzma2 = 0x21,
|
||||
_,
|
||||
};
|
||||
|
||||
const filter_id: FilterId = @enumFromInt(try input.takeLeb128(u64));
|
||||
if (filter_id != .lzma2) return error.Unsupported;
|
||||
|
||||
const properties_size = try input.takeLeb128(u64);
|
||||
if (properties_size != 1) return error.CorruptInput;
|
||||
// TODO: use filter properties
|
||||
_ = try input.takeByte();
|
||||
|
||||
const actual_header_size = input.seek - header_seek_start;
|
||||
if (actual_header_size > declared_header_size) return error.CorruptInput;
|
||||
var remaining_bytes = declared_header_size - actual_header_size;
|
||||
while (remaining_bytes != 0) {
|
||||
if (try input.takeByte() != 0) return error.CorruptInput;
|
||||
remaining_bytes -= 1;
|
||||
}
|
||||
|
||||
const header_slice = input.buffer[header_seek_start..][0..declared_header_size];
|
||||
const actual_hash = Crc32.hash(header_slice);
|
||||
const declared_hash = try input.takeInt(u32, .little);
|
||||
if (actual_hash != declared_hash) return error.WrongChecksum;
|
||||
}
|
||||
|
||||
// Compressed Data
|
||||
|
||||
var lzma2_decode = try lzma2.Decode.init(allocating.allocator);
|
||||
const before_size = allocating.writer.end;
|
||||
try lzma2_decode.decompress(input, allocating);
|
||||
const unpacked_bytes = allocating.writer.end - before_size;
|
||||
|
||||
// TODO restore this check
|
||||
//if (packed_size) |s| {
|
||||
// if (s != packed_counter.bytes_read)
|
||||
// return error.CorruptInput;
|
||||
//}
|
||||
|
||||
if (unpacked_size) |s| {
|
||||
if (s != unpacked_bytes) return error.CorruptInput;
|
||||
}
|
||||
|
||||
// Block Padding
|
||||
if (true) @panic("TODO account for block padding");
|
||||
//while (block_counter.bytes_read % 4 != 0) {
|
||||
// if (try block_reader.takeByte() != 0)
|
||||
// return error.CorruptInput;
|
||||
//}
|
||||
|
||||
}
|
||||
|
||||
fn finish(d: *Decompress) void {
|
||||
_ = d;
|
||||
@panic("TODO");
|
||||
//const input = d.input;
|
||||
//const index_size = blk: {
|
||||
// const record_count = try input.takeLeb128(u64);
|
||||
// if (record_count != d.block_decode.block_count)
|
||||
// return error.CorruptInput;
|
||||
|
||||
// var i: usize = 0;
|
||||
// while (i < record_count) : (i += 1) {
|
||||
// // TODO: validate records
|
||||
// _ = try std.leb.readUleb128(u64, counting_reader);
|
||||
// _ = try std.leb.readUleb128(u64, counting_reader);
|
||||
// }
|
||||
|
||||
// while (counter.bytes_read % 4 != 0) {
|
||||
// if (try counting_reader.takeByte() != 0)
|
||||
// return error.CorruptInput;
|
||||
// }
|
||||
|
||||
// const hash_a = hasher.hasher.final();
|
||||
// const hash_b = try counting_reader.takeInt(u32, .little);
|
||||
// if (hash_a != hash_b)
|
||||
// return error.WrongChecksum;
|
||||
|
||||
// break :blk counter.bytes_read;
|
||||
//};
|
||||
|
||||
//const hash_a = try d.in_reader.takeInt(u32, .little);
|
||||
|
||||
//const hash_b = blk: {
|
||||
// var hasher = hashedReader(d.in_reader, Crc32.init());
|
||||
// const hashed_reader = hasher.reader();
|
||||
|
||||
// const backward_size = (@as(u64, try hashed_reader.takeInt(u32, .little)) + 1) * 4;
|
||||
// if (backward_size != index_size)
|
||||
// return error.CorruptInput;
|
||||
|
||||
// var check: Check = undefined;
|
||||
// try readStreamFlags(hashed_reader, &check);
|
||||
|
||||
// break :blk hasher.hasher.final();
|
||||
//};
|
||||
|
||||
//if (hash_a != hash_b)
|
||||
// return error.WrongChecksum;
|
||||
|
||||
//const magic = try d.in_reader.takeBytesNoEof(2);
|
||||
//if (!std.mem.eql(u8, &magic, &.{ 'Y', 'Z' }))
|
||||
// return error.CorruptInput;
|
||||
|
||||
//return 0;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -3,19 +3,23 @@ const testing = std.testing;
|
|||
const xz = std.compress.xz;
|
||||
|
||||
fn decompress(data: []const u8) ![]u8 {
|
||||
var in_stream = std.io.fixedBufferStream(data);
|
||||
const gpa = testing.allocator;
|
||||
|
||||
var xz_stream = try xz.decompress(testing.allocator, in_stream.reader());
|
||||
var in_stream: std.Io.Reader = .fixed(data);
|
||||
|
||||
var xz_stream = try xz.Decompress.init(&in_stream, gpa, &.{});
|
||||
defer xz_stream.deinit();
|
||||
|
||||
return xz_stream.reader().readAllAlloc(testing.allocator, std.math.maxInt(usize));
|
||||
return xz_stream.reader.allocRemaining(gpa, .unlimited);
|
||||
}
|
||||
|
||||
fn testReader(data: []const u8, comptime expected: []const u8) !void {
|
||||
const buf = try decompress(data);
|
||||
defer testing.allocator.free(buf);
|
||||
const gpa = testing.allocator;
|
||||
|
||||
try testing.expectEqualSlices(u8, expected, buf);
|
||||
const result = try decompress(data);
|
||||
defer gpa.free(result);
|
||||
|
||||
try testing.expectEqualSlices(u8, expected, result);
|
||||
}
|
||||
|
||||
test "compressed data" {
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue