zig/lib/std/compress/flate/Lookup.zig
Shun Sakai a3ad0a2f77 std.compress.flate.Lookup: Replace invisible doc comments with top-level doc comments
I think it would be better if this invisible doc comments is top-level
doc comments rather than doc comments. Because it is at the start of a
source file. This makes the doc comments visible.
2025-01-22 23:34:57 +09:00

125 lines
3.4 KiB
Zig

//! Lookup of the previous locations for the same 4 byte data. Works on hash of
//! 4 bytes data. Head contains position of the first match for each hash. Chain
//! points to the previous position of the same hash given the current location.
const std = @import("std");
const testing = std.testing;
const expect = testing.expect;
const consts = @import("consts.zig");
const Self = @This();
const prime4 = 0x9E3779B1; // 4 bytes prime number 2654435761
const chain_len = 2 * consts.history.len;
// Maps hash => first position
head: [consts.lookup.len]u16 = [_]u16{0} ** consts.lookup.len,
// Maps position => previous positions for the same hash value
chain: [chain_len]u16 = [_]u16{0} ** (chain_len),
// Calculates hash of the 4 bytes from data.
// Inserts `pos` position of that hash in the lookup tables.
// Returns previous location with the same hash value.
pub fn add(self: *Self, data: []const u8, pos: u16) u16 {
if (data.len < 4) return 0;
const h = hash(data[0..4]);
return self.set(h, pos);
}
// Returns previous location with the same hash value given the current
// position.
pub fn prev(self: *Self, pos: u16) u16 {
return self.chain[pos];
}
fn set(self: *Self, h: u32, pos: u16) u16 {
const p = self.head[h];
self.head[h] = pos;
self.chain[pos] = p;
return p;
}
// Slide all positions in head and chain for `n`
pub fn slide(self: *Self, n: u16) void {
for (&self.head) |*v| {
v.* -|= n;
}
var i: usize = 0;
while (i < n) : (i += 1) {
self.chain[i] = self.chain[i + n] -| n;
}
}
// Add `len` 4 bytes hashes from `data` into lookup.
// Position of the first byte is `pos`.
pub fn bulkAdd(self: *Self, data: []const u8, len: u16, pos: u16) void {
if (len == 0 or data.len < consts.match.min_length) {
return;
}
var hb =
@as(u32, data[3]) |
@as(u32, data[2]) << 8 |
@as(u32, data[1]) << 16 |
@as(u32, data[0]) << 24;
_ = self.set(hashu(hb), pos);
var i = pos;
for (4..@min(len + 3, data.len)) |j| {
hb = (hb << 8) | @as(u32, data[j]);
i += 1;
_ = self.set(hashu(hb), i);
}
}
// Calculates hash of the first 4 bytes of `b`.
fn hash(b: *const [4]u8) u32 {
return hashu(@as(u32, b[3]) |
@as(u32, b[2]) << 8 |
@as(u32, b[1]) << 16 |
@as(u32, b[0]) << 24);
}
fn hashu(v: u32) u32 {
return @intCast((v *% prime4) >> consts.lookup.shift);
}
test add {
const data = [_]u8{
0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08,
0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08,
0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08,
0x01, 0x02, 0x03,
};
var h: Self = .{};
for (data, 0..) |_, i| {
const p = h.add(data[i..], @intCast(i));
if (i >= 8 and i < 24) {
try expect(p == i - 8);
} else {
try expect(p == 0);
}
}
const v = Self.hash(data[2 .. 2 + 4]);
try expect(h.head[v] == 2 + 16);
try expect(h.chain[2 + 16] == 2 + 8);
try expect(h.chain[2 + 8] == 2);
}
test bulkAdd {
const data = "Lorem ipsum dolor sit amet, consectetur adipiscing elit.";
// one by one
var h: Self = .{};
for (data, 0..) |_, i| {
_ = h.add(data[i..], @intCast(i));
}
// in bulk
var bh: Self = .{};
bh.bulkAdd(data, data.len, 0);
try testing.expectEqualSlices(u16, &h.head, &bh.head);
try testing.expectEqualSlices(u16, &h.chain, &bh.chain);
}