zig/lib/std/compress/flate/container.zig
Igor Anić f2508abfa6 flate: use 4 bytes lookahead for zlib
That ensures no bytes are left in the BitReader buffer after we reach
end of the stream.
2024-03-04 09:53:01 +01:00

208 lines
7.3 KiB
Zig

//! Container of the deflate bit stream body. Container adds header before
//! deflate bit stream and footer after. It can bi gzip, zlib or raw (no header,
//! no footer, raw bit stream).
//!
//! Zlib format is defined in rfc 1950. Header has 2 bytes and footer 4 bytes
//! addler 32 checksum.
//!
//! Gzip format is defined in rfc 1952. Header has 10+ bytes and footer 4 bytes
//! crc32 checksum and 4 bytes of uncompressed data length.
//!
//!
//! rfc 1950: https://datatracker.ietf.org/doc/html/rfc1950#page-4
//! rfc 1952: https://datatracker.ietf.org/doc/html/rfc1952#page-5
//!
const std = @import("std");
pub const Container = enum {
raw, // no header or footer
gzip, // gzip header and footer
zlib, // zlib header and footer
pub fn size(w: Container) usize {
return headerSize(w) + footerSize(w);
}
pub fn headerSize(w: Container) usize {
return switch (w) {
.gzip => 10,
.zlib => 2,
.raw => 0,
};
}
pub fn footerSize(w: Container) usize {
return switch (w) {
.gzip => 8,
.zlib => 4,
.raw => 0,
};
}
pub const list = [_]Container{ .raw, .gzip, .zlib };
pub const Error = error{
BadGzipHeader,
BadZlibHeader,
WrongGzipChecksum,
WrongGzipSize,
WrongZlibChecksum,
};
pub fn writeHeader(comptime wrap: Container, writer: anytype) !void {
switch (wrap) {
.gzip => {
// GZIP 10 byte header (https://datatracker.ietf.org/doc/html/rfc1952#page-5):
// - ID1 (IDentification 1), always 0x1f
// - ID2 (IDentification 2), always 0x8b
// - CM (Compression Method), always 8 = deflate
// - FLG (Flags), all set to 0
// - 4 bytes, MTIME (Modification time), not used, all set to zero
// - XFL (eXtra FLags), all set to zero
// - OS (Operating System), 03 = Unix
const gzipHeader = [_]u8{ 0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03 };
try writer.writeAll(&gzipHeader);
},
.zlib => {
// ZLIB has a two-byte header (https://datatracker.ietf.org/doc/html/rfc1950#page-4):
// 1st byte:
// - First four bits is the CINFO (compression info), which is 7 for the default deflate window size.
// - The next four bits is the CM (compression method), which is 8 for deflate.
// 2nd byte:
// - Two bits is the FLEVEL (compression level). Values are: 0=fastest, 1=fast, 2=default, 3=best.
// - The next bit, FDICT, is set if a dictionary is given.
// - The final five FCHECK bits form a mod-31 checksum.
//
// CINFO = 7, CM = 8, FLEVEL = 0b10, FDICT = 0, FCHECK = 0b11100
const zlibHeader = [_]u8{ 0x78, 0b10_0_11100 };
try writer.writeAll(&zlibHeader);
},
.raw => {},
}
}
pub fn writeFooter(comptime wrap: Container, hasher: *Hasher(wrap), writer: anytype) !void {
var bits: [4]u8 = undefined;
switch (wrap) {
.gzip => {
// GZIP 8 bytes footer
// - 4 bytes, CRC32 (CRC-32)
// - 4 bytes, ISIZE (Input SIZE) - size of the original (uncompressed) input data modulo 2^32
std.mem.writeInt(u32, &bits, hasher.chksum(), .little);
try writer.writeAll(&bits);
std.mem.writeInt(u32, &bits, hasher.bytesRead(), .little);
try writer.writeAll(&bits);
},
.zlib => {
// ZLIB (RFC 1950) is big-endian, unlike GZIP (RFC 1952).
// 4 bytes of ADLER32 (Adler-32 checksum)
// Checksum value of the uncompressed data (excluding any
// dictionary data) computed according to Adler-32
// algorithm.
std.mem.writeInt(u32, &bits, hasher.chksum(), .big);
try writer.writeAll(&bits);
},
.raw => {},
}
}
pub fn parseHeader(comptime wrap: Container, reader: anytype) !void {
switch (wrap) {
.gzip => try parseGzipHeader(reader),
.zlib => try parseZlibHeader(reader),
.raw => {},
}
}
fn parseGzipHeader(reader: anytype) !void {
const magic1 = try reader.read(u8);
const magic2 = try reader.read(u8);
const method = try reader.read(u8);
const flags = try reader.read(u8);
try reader.skipBytes(6); // mtime(4), xflags, os
if (magic1 != 0x1f or magic2 != 0x8b or method != 0x08)
return error.BadGzipHeader;
// Flags description: https://www.rfc-editor.org/rfc/rfc1952.html#page-5
if (flags != 0) {
if (flags & 0b0000_0100 != 0) { // FEXTRA
const extra_len = try reader.read(u16);
try reader.skipBytes(extra_len);
}
if (flags & 0b0000_1000 != 0) { // FNAME
try reader.skipStringZ();
}
if (flags & 0b0001_0000 != 0) { // FCOMMENT
try reader.skipStringZ();
}
if (flags & 0b0000_0010 != 0) { // FHCRC
try reader.skipBytes(2);
}
}
}
fn parseZlibHeader(reader: anytype) !void {
const cm = try reader.read(u4);
const cinfo = try reader.read(u4);
_ = try reader.read(u8);
if (cm != 8 or cinfo > 7) {
return error.BadZlibHeader;
}
}
pub fn parseFooter(comptime wrap: Container, hasher: *Hasher(wrap), reader: anytype) !void {
switch (wrap) {
.gzip => {
try reader.fill(0);
if (try reader.read(u32) != hasher.chksum()) return error.WrongGzipChecksum;
if (try reader.read(u32) != hasher.bytesRead()) return error.WrongGzipSize;
},
.zlib => {
const chksum: u32 = @byteSwap(hasher.chksum());
if (try reader.read(u32) != chksum) return error.WrongZlibChecksum;
},
.raw => {},
}
}
pub fn Hasher(comptime wrap: Container) type {
const HasherType = switch (wrap) {
.gzip => std.hash.Crc32,
.zlib => std.hash.Adler32,
.raw => struct {
pub fn init() @This() {
return .{};
}
},
};
return struct {
hasher: HasherType = HasherType.init(),
bytes: usize = 0,
const Self = @This();
pub fn update(self: *Self, buf: []const u8) void {
switch (wrap) {
.raw => {},
else => {
self.hasher.update(buf);
self.bytes += buf.len;
},
}
}
pub fn chksum(self: *Self) u32 {
switch (wrap) {
.raw => return 0,
else => return self.hasher.final(),
}
}
pub fn bytesRead(self: *Self) u32 {
return @truncate(self.bytes);
}
};
}
};