mirror of
https://codeberg.org/ziglang/zig.git
synced 2025-12-06 05:44:20 +00:00
This is relevant to PIEs, which are notably enabled by default on macOS. The build system needs to only see virtual addresses, that is, those which do not have the slide applied; but the fuzzer itself naturally sees relocated addresses (i.e. with the slide applied). We just need to subtract the slide when we communicate addresses to the build system.
1457 lines
61 KiB
Zig
1457 lines
61 KiB
Zig
const builtin = @import("builtin");
|
|
const std = @import("std");
|
|
const fatal = std.process.fatal;
|
|
const mem = std.mem;
|
|
const math = std.math;
|
|
const Allocator = mem.Allocator;
|
|
const assert = std.debug.assert;
|
|
const panic = std.debug.panic;
|
|
const abi = std.Build.abi.fuzz;
|
|
const native_endian = builtin.cpu.arch.endian();
|
|
|
|
pub const std_options = std.Options{
|
|
.logFn = logOverride,
|
|
};
|
|
|
|
fn logOverride(
|
|
comptime level: std.log.Level,
|
|
comptime scope: @Type(.enum_literal),
|
|
comptime format: []const u8,
|
|
args: anytype,
|
|
) void {
|
|
const f = log_f orelse
|
|
panic("attempt to use log before initialization, message:\n" ++ format, args);
|
|
f.lock(.exclusive) catch |e| panic("failed to lock logging file: {t}", .{e});
|
|
defer f.unlock();
|
|
|
|
var buf: [256]u8 = undefined;
|
|
var fw = f.writer(&buf);
|
|
const end = f.getEndPos() catch |e| panic("failed to get fuzzer log file end: {t}", .{e});
|
|
fw.seekTo(end) catch |e| panic("failed to seek to fuzzer log file end: {t}", .{e});
|
|
|
|
const prefix1 = comptime level.asText();
|
|
const prefix2 = if (scope == .default) ": " else "(" ++ @tagName(scope) ++ "): ";
|
|
fw.interface.print(
|
|
"[{s}] " ++ prefix1 ++ prefix2 ++ format ++ "\n",
|
|
.{current_test_name orelse "setup"} ++ args,
|
|
) catch panic("failed to write to fuzzer log: {t}", .{fw.err.?});
|
|
fw.interface.flush() catch panic("failed to write to fuzzer log: {t}", .{fw.err.?});
|
|
}
|
|
|
|
var debug_allocator: std.heap.DebugAllocator(.{}) = .init;
|
|
const gpa = switch (builtin.mode) {
|
|
.Debug => debug_allocator.allocator(),
|
|
.ReleaseFast, .ReleaseSmall, .ReleaseSafe => std.heap.smp_allocator,
|
|
};
|
|
|
|
/// Part of `exec`, however seperate to allow it to be set before `exec` is.
|
|
var log_f: ?std.fs.File = null;
|
|
var exec: Executable = .preinit;
|
|
var inst: Instrumentation = .preinit;
|
|
var fuzzer: Fuzzer = undefined;
|
|
var current_test_name: ?[]const u8 = null;
|
|
|
|
fn bitsetUsizes(elems: usize) usize {
|
|
return math.divCeil(usize, elems, @bitSizeOf(usize)) catch unreachable;
|
|
}
|
|
|
|
const Executable = struct {
|
|
/// Tracks the hit count for each pc as updated by the process's instrumentation.
|
|
pc_counters: []u8,
|
|
|
|
cache_f: std.fs.Dir,
|
|
/// Shared copy of all pcs that have been hit stored in a memory-mapped file that can viewed
|
|
/// while the fuzzer is running.
|
|
shared_seen_pcs: MemoryMappedList,
|
|
/// Hash of pcs used to uniquely identify the shared coverage file
|
|
pc_digest: u64,
|
|
|
|
/// A minimal state for this struct which instrumentation can function on.
|
|
/// Used before this structure is initialized to avoid illegal behavior
|
|
/// from instrumentation functions being called and using undefined values.
|
|
pub const preinit: Executable = .{
|
|
.pc_counters = undefined, // instrumentation works off the __sancov_cntrs section
|
|
.cache_f = undefined,
|
|
.shared_seen_pcs = undefined,
|
|
.pc_digest = undefined,
|
|
};
|
|
|
|
fn getCoverageFile(cache_dir: std.fs.Dir, pcs: []const usize, pc_digest: u64) MemoryMappedList {
|
|
const pc_bitset_usizes = bitsetUsizes(pcs.len);
|
|
const coverage_file_name = std.fmt.hex(pc_digest);
|
|
comptime assert(abi.SeenPcsHeader.trailing[0] == .pc_bits_usize);
|
|
comptime assert(abi.SeenPcsHeader.trailing[1] == .pc_addr);
|
|
|
|
var v = cache_dir.makeOpenPath("v", .{}) catch |e|
|
|
panic("failed to create directory 'v': {t}", .{e});
|
|
defer v.close();
|
|
const coverage_file, const populate = if (v.createFile(&coverage_file_name, .{
|
|
.read = true,
|
|
// If we create the file, we want to block other processes while we populate it
|
|
.lock = .exclusive,
|
|
.exclusive = true,
|
|
})) |f|
|
|
.{ f, true }
|
|
else |e| switch (e) {
|
|
error.PathAlreadyExists => .{ v.openFile(&coverage_file_name, .{
|
|
.mode = .read_write,
|
|
.lock = .shared,
|
|
}) catch |e2| panic(
|
|
"failed to open existing coverage file '{s}': {t}",
|
|
.{ &coverage_file_name, e2 },
|
|
), false },
|
|
else => panic("failed to create coverage file '{s}': {t}", .{ &coverage_file_name, e }),
|
|
};
|
|
|
|
const coverage_file_len = @sizeOf(abi.SeenPcsHeader) +
|
|
pc_bitset_usizes * @sizeOf(usize) +
|
|
pcs.len * @sizeOf(usize);
|
|
|
|
if (populate) {
|
|
defer coverage_file.lock(.shared) catch |e| panic(
|
|
"failed to demote lock for coverage file '{s}': {t}",
|
|
.{ &coverage_file_name, e },
|
|
);
|
|
var map = MemoryMappedList.create(coverage_file, 0, coverage_file_len) catch |e| panic(
|
|
"failed to init memory map for coverage file '{s}': {t}",
|
|
.{ &coverage_file_name, e },
|
|
);
|
|
map.appendSliceAssumeCapacity(@ptrCast(&abi.SeenPcsHeader{
|
|
.n_runs = 0,
|
|
.unique_runs = 0,
|
|
.pcs_len = pcs.len,
|
|
}));
|
|
map.appendNTimesAssumeCapacity(0, pc_bitset_usizes * @sizeOf(usize));
|
|
// Relocations have been applied to `pcs` so it contains runtime addresses (with slide
|
|
// applied). We need to translate these to the virtual addresses as on disk.
|
|
for (pcs) |pc| {
|
|
const pc_vaddr = fuzzer_unslide_address(pc);
|
|
map.appendSliceAssumeCapacity(@ptrCast(&pc_vaddr));
|
|
}
|
|
return map;
|
|
} else {
|
|
const size = coverage_file.getEndPos() catch |e| panic(
|
|
"failed to stat coverage file '{s}': {t}",
|
|
.{ &coverage_file_name, e },
|
|
);
|
|
if (size != coverage_file_len) panic(
|
|
"incompatible existing coverage file '{s}' (differing lengths: {} != {})",
|
|
.{ &coverage_file_name, size, coverage_file_len },
|
|
);
|
|
|
|
const map = MemoryMappedList.init(
|
|
coverage_file,
|
|
coverage_file_len,
|
|
coverage_file_len,
|
|
) catch |e| panic(
|
|
"failed to init memory map for coverage file '{s}': {t}",
|
|
.{ &coverage_file_name, e },
|
|
);
|
|
|
|
const seen_pcs_header: *const abi.SeenPcsHeader = @ptrCast(@volatileCast(map.items));
|
|
if (seen_pcs_header.pcs_len != pcs.len) panic(
|
|
"incompatible existing coverage file '{s}' (differing pcs length: {} != {})",
|
|
.{ &coverage_file_name, seen_pcs_header.pcs_len, pcs.len },
|
|
);
|
|
if (mem.indexOfDiff(usize, seen_pcs_header.pcAddrs(), pcs)) |i| panic(
|
|
"incompatible existing coverage file '{s}' (differing pc at index {d}: {x} != {x})",
|
|
.{ &coverage_file_name, i, seen_pcs_header.pcAddrs()[i], pcs[i] },
|
|
);
|
|
|
|
return map;
|
|
}
|
|
}
|
|
|
|
pub fn init(cache_dir_path: []const u8) Executable {
|
|
var self: Executable = undefined;
|
|
|
|
const cache_dir = std.fs.cwd().makeOpenPath(cache_dir_path, .{}) catch |e| panic(
|
|
"failed to open directory '{s}': {t}",
|
|
.{ cache_dir_path, e },
|
|
);
|
|
log_f = cache_dir.createFile("tmp/libfuzzer.log", .{ .truncate = false }) catch |e|
|
|
panic("failed to create file 'tmp/libfuzzer.log': {t}", .{e});
|
|
self.cache_f = cache_dir.makeOpenPath("f", .{}) catch |e|
|
|
panic("failed to open directory 'f': {t}", .{e});
|
|
|
|
// Linkers are expected to automatically add symbols prefixed with these for the start and
|
|
// end of sections whose names are valid C identifiers.
|
|
const ofmt = builtin.object_format;
|
|
const section_start_prefix, const section_end_prefix = switch (ofmt) {
|
|
.elf => .{ "__start_", "__stop_" },
|
|
.macho => .{ "\x01section$start$__DATA$", "\x01section$end$__DATA$" },
|
|
else => @compileError("unsupported fuzzing object format '" ++ @tagName(ofmt) ++ "'"),
|
|
};
|
|
|
|
self.pc_counters = blk: {
|
|
const pc_counters_start_name = section_start_prefix ++ "__sancov_cntrs";
|
|
const pc_counters_start = @extern([*]u8, .{
|
|
.name = pc_counters_start_name,
|
|
.linkage = .weak,
|
|
}) orelse panic("missing {s} symbol", .{pc_counters_start_name});
|
|
|
|
const pc_counters_end_name = section_end_prefix ++ "__sancov_cntrs";
|
|
const pc_counters_end = @extern([*]u8, .{
|
|
.name = pc_counters_end_name,
|
|
.linkage = .weak,
|
|
}) orelse panic("missing {s} symbol", .{pc_counters_end_name});
|
|
|
|
break :blk pc_counters_start[0 .. pc_counters_end - pc_counters_start];
|
|
};
|
|
|
|
const pcs = blk: {
|
|
const pcs_start_name = section_start_prefix ++ "__sancov_pcs1";
|
|
const pcs_start = @extern([*]usize, .{
|
|
.name = pcs_start_name,
|
|
.linkage = .weak,
|
|
}) orelse panic("missing {s} symbol", .{pcs_start_name});
|
|
|
|
const pcs_end_name = section_end_prefix ++ "__sancov_pcs1";
|
|
const pcs_end = @extern([*]usize, .{
|
|
.name = pcs_end_name,
|
|
.linkage = .weak,
|
|
}) orelse panic("missing {s} symbol", .{pcs_end_name});
|
|
|
|
break :blk pcs_start[0 .. pcs_end - pcs_start];
|
|
};
|
|
|
|
if (self.pc_counters.len != pcs.len) panic(
|
|
"pc counters length and pcs length do not match ({} != {})",
|
|
.{ self.pc_counters.len, pcs.len },
|
|
);
|
|
|
|
self.pc_digest = digest: {
|
|
// Relocations have been applied to `pcs` so it contains runtime addresses (with slide
|
|
// applied). We need to translate these to the virtual addresses as on disk.
|
|
var h: std.hash.Wyhash = .init(0);
|
|
for (pcs) |pc| {
|
|
const pc_vaddr = fuzzer_unslide_address(pc);
|
|
h.update(@ptrCast(&pc_vaddr));
|
|
}
|
|
break :digest h.final();
|
|
};
|
|
self.shared_seen_pcs = getCoverageFile(cache_dir, pcs, self.pc_digest);
|
|
|
|
return self;
|
|
}
|
|
|
|
pub fn pcBitsetIterator(self: Executable) PcBitsetIterator {
|
|
return .{ .pc_counters = self.pc_counters };
|
|
}
|
|
|
|
/// Iterates over pc_counters returning a bitset for if each of them have been hit
|
|
pub const PcBitsetIterator = struct {
|
|
index: usize = 0,
|
|
pc_counters: []u8,
|
|
|
|
pub fn next(self: *PcBitsetIterator) usize {
|
|
const rest = self.pc_counters[self.index..];
|
|
if (rest.len >= @bitSizeOf(usize)) {
|
|
defer self.index += @bitSizeOf(usize);
|
|
const V = @Vector(@bitSizeOf(usize), u8);
|
|
return @as(usize, @bitCast(@as(V, @splat(0)) != rest[0..@bitSizeOf(usize)].*));
|
|
} else if (rest.len != 0) {
|
|
defer self.index += rest.len;
|
|
var res: usize = 0;
|
|
for (0.., rest) |bit_index, byte| {
|
|
res |= @shlExact(@as(usize, @intFromBool(byte != 0)), @intCast(bit_index));
|
|
}
|
|
return res;
|
|
} else unreachable;
|
|
}
|
|
};
|
|
};
|
|
|
|
/// Data gathered from instrumentation functions.
|
|
/// Seperate from Executable since its state is resetable and changes.
|
|
/// Seperate from Fuzzer since it may be needed before fuzzing starts.
|
|
const Instrumentation = struct {
|
|
/// Bitset of seen pcs across all runs excluding fresh pcs.
|
|
/// This is seperate then shared_seen_pcs because multiple fuzzing processes are likely using
|
|
/// it which causes contention and unrelated pcs to our campaign being set.
|
|
seen_pcs: []usize,
|
|
|
|
/// Stores a fresh input's new pcs
|
|
fresh_pcs: []usize,
|
|
|
|
/// Pcs which __sanitizer_cov_trace_switch and __sanitizer_cov_trace_const_cmpx
|
|
/// have been called from and have had their already been added to const_x_vals
|
|
const_pcs: std.AutoArrayHashMapUnmanaged(usize, void) = .empty,
|
|
/// Values that have been constant operands in comparisons and switch cases.
|
|
/// There may be duplicates in this array if they came from different addresses, which is
|
|
/// fine as they are likely more important and hence more likely to be selected.
|
|
const_vals2: std.ArrayListUnmanaged(u16) = .empty,
|
|
const_vals4: std.ArrayListUnmanaged(u32) = .empty,
|
|
const_vals8: std.ArrayListUnmanaged(u64) = .empty,
|
|
const_vals16: std.ArrayListUnmanaged(u128) = .empty,
|
|
|
|
/// A minimal state for this struct which instrumentation can function on.
|
|
/// Used before this structure is initialized to avoid illegal behavior
|
|
/// from instrumentation functions being called and using undefined values.
|
|
pub const preinit: Instrumentation = .{
|
|
.seen_pcs = undefined, // currently only updated by `Fuzzer`
|
|
.fresh_pcs = undefined,
|
|
};
|
|
|
|
pub fn depreinit(self: *Instrumentation) void {
|
|
self.const_vals2.deinit(gpa);
|
|
self.const_vals4.deinit(gpa);
|
|
self.const_vals8.deinit(gpa);
|
|
self.const_vals16.deinit(gpa);
|
|
self.* = undefined;
|
|
}
|
|
|
|
pub fn init() Instrumentation {
|
|
const pc_bitset_usizes = bitsetUsizes(exec.pc_counters.len);
|
|
const alloc_usizes = pc_bitset_usizes * 2;
|
|
const buf = gpa.alloc(u8, alloc_usizes * @sizeOf(usize)) catch @panic("OOM");
|
|
var fba_ctx: std.heap.FixedBufferAllocator = .init(buf);
|
|
const fba = fba_ctx.allocator();
|
|
|
|
var self: Instrumentation = .{
|
|
.seen_pcs = fba.alloc(usize, pc_bitset_usizes) catch unreachable,
|
|
.fresh_pcs = fba.alloc(usize, pc_bitset_usizes) catch unreachable,
|
|
};
|
|
self.reset();
|
|
return self;
|
|
}
|
|
|
|
pub fn reset(self: *Instrumentation) void {
|
|
@memset(self.seen_pcs, 0);
|
|
@memset(self.fresh_pcs, 0);
|
|
self.const_pcs.clearRetainingCapacity();
|
|
self.const_vals2.clearRetainingCapacity();
|
|
self.const_vals4.clearRetainingCapacity();
|
|
self.const_vals8.clearRetainingCapacity();
|
|
self.const_vals16.clearRetainingCapacity();
|
|
}
|
|
|
|
/// If false is returned, then the pc is marked as seen
|
|
pub fn constPcSeen(self: *Instrumentation, pc: usize) bool {
|
|
return (self.const_pcs.getOrPut(gpa, pc) catch @panic("OOM")).found_existing;
|
|
}
|
|
|
|
pub fn isFresh(self: *Instrumentation) bool {
|
|
var hit_pcs = exec.pcBitsetIterator();
|
|
for (self.seen_pcs) |seen_pcs| {
|
|
if (hit_pcs.next() & ~seen_pcs != 0) return true;
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
/// Updates `fresh_pcs`
|
|
pub fn setFresh(self: *Instrumentation) void {
|
|
var hit_pcs = exec.pcBitsetIterator();
|
|
for (self.seen_pcs, self.fresh_pcs) |seen_pcs, *fresh_pcs| {
|
|
fresh_pcs.* = hit_pcs.next() & ~seen_pcs;
|
|
}
|
|
}
|
|
|
|
/// Returns if `exec.pc_counters` is a superset of `fresh_pcs`.
|
|
pub fn atleastFresh(self: *Instrumentation) bool {
|
|
var hit_pcs = exec.pcBitsetIterator();
|
|
for (self.fresh_pcs) |fresh_pcs| {
|
|
if (fresh_pcs & hit_pcs.next() != fresh_pcs) return false;
|
|
}
|
|
return true;
|
|
}
|
|
|
|
/// Updates based off `fresh_pcs`
|
|
fn updateSeen(self: *Instrumentation) void {
|
|
comptime assert(abi.SeenPcsHeader.trailing[0] == .pc_bits_usize);
|
|
const shared_seen_pcs: [*]volatile usize = @ptrCast(
|
|
exec.shared_seen_pcs.items[@sizeOf(abi.SeenPcsHeader)..].ptr,
|
|
);
|
|
|
|
for (self.seen_pcs, shared_seen_pcs, self.fresh_pcs) |*seen, *shared_seen, fresh| {
|
|
seen.* |= fresh;
|
|
if (fresh != 0)
|
|
_ = @atomicRmw(usize, shared_seen, .Or, fresh, .monotonic);
|
|
}
|
|
}
|
|
};
|
|
|
|
const Fuzzer = struct {
|
|
arena_ctx: std.heap.ArenaAllocator = .init(gpa),
|
|
rng: std.Random.DefaultPrng = .init(0),
|
|
test_one: abi.TestOne,
|
|
/// The next input that will be given to the testOne function. When the
|
|
/// current process crashes, this memory-mapped file is used to recover the
|
|
/// input.
|
|
input: MemoryMappedList,
|
|
|
|
/// Minimized past inputs leading to new pc hits.
|
|
/// These are randomly mutated in round-robin fashion
|
|
/// Element zero is always an empty input. It is gauraunteed no other elements are empty.
|
|
corpus: std.ArrayListUnmanaged([]const u8),
|
|
corpus_pos: usize,
|
|
/// List of past mutations that have led to new inputs. This way, the mutations that are the
|
|
/// most effective are the most likely to be selected again. Starts with one of each mutation.
|
|
mutations: std.ArrayListUnmanaged(Mutation) = .empty,
|
|
|
|
/// Filesystem directory containing found inputs for future runs
|
|
corpus_dir: std.fs.Dir,
|
|
corpus_dir_idx: usize = 0,
|
|
|
|
pub fn init(test_one: abi.TestOne, unit_test_name: []const u8) Fuzzer {
|
|
var self: Fuzzer = .{
|
|
.test_one = test_one,
|
|
.input = undefined,
|
|
.corpus = .empty,
|
|
.corpus_pos = 0,
|
|
.mutations = .empty,
|
|
.corpus_dir = undefined,
|
|
};
|
|
const arena = self.arena_ctx.allocator();
|
|
|
|
self.corpus_dir = exec.cache_f.makeOpenPath(unit_test_name, .{}) catch |e|
|
|
panic("failed to open directory '{s}': {t}", .{ unit_test_name, e });
|
|
self.input = in: {
|
|
const f = self.corpus_dir.createFile("in", .{
|
|
.read = true,
|
|
.truncate = false,
|
|
// In case any other fuzz tests are running under the same test name,
|
|
// the input file is exclusively locked to ensures only one proceeds.
|
|
.lock = .exclusive,
|
|
.lock_nonblocking = true,
|
|
}) catch |e| switch (e) {
|
|
error.WouldBlock => @panic("input file 'in' is in use by another fuzzing process"),
|
|
else => panic("failed to create input file 'in': {t}", .{e}),
|
|
};
|
|
const size = f.getEndPos() catch |e| panic("failed to stat input file 'in': {t}", .{e});
|
|
const map = (if (size < std.heap.page_size_max)
|
|
MemoryMappedList.create(f, 8, std.heap.page_size_max)
|
|
else
|
|
MemoryMappedList.init(f, size, size)) catch |e|
|
|
panic("failed to memory map input file 'in': {t}", .{e});
|
|
|
|
// Perform a dry-run of the stored input if there was one in case it might reproduce a
|
|
// crash.
|
|
const old_in_len = mem.littleToNative(usize, mem.bytesAsValue(usize, map.items[0..8]).*);
|
|
if (size >= 8 and old_in_len != 0 and map.items.len - 8 < old_in_len) {
|
|
test_one(.fromSlice(@volatileCast(map.items[8..][0..old_in_len])));
|
|
}
|
|
|
|
break :in map;
|
|
};
|
|
inst.reset();
|
|
|
|
self.mutations.appendSlice(gpa, std.meta.tags(Mutation)) catch @panic("OOM");
|
|
// Ensure there is never an empty corpus. Additionally, an empty input usually leads to
|
|
// new inputs.
|
|
self.addInput(&.{});
|
|
|
|
while (true) {
|
|
var name_buf: [@sizeOf(usize) * 2]u8 = undefined;
|
|
const bytes = self.corpus_dir.readFileAlloc(
|
|
std.fmt.bufPrint(&name_buf, "{x}", .{self.corpus_dir_idx}) catch unreachable,
|
|
arena,
|
|
.unlimited,
|
|
) catch |e| switch (e) {
|
|
error.FileNotFound => break,
|
|
else => panic("failed to read corpus file '{x}': {t}", .{ self.corpus_dir_idx, e }),
|
|
};
|
|
// No corpus file of length zero will ever be created
|
|
if (bytes.len == 0)
|
|
panic("corrupt corpus file '{x}' (len of zero)", .{self.corpus_dir_idx});
|
|
self.addInput(bytes);
|
|
self.corpus_dir_idx += 1;
|
|
}
|
|
|
|
return self;
|
|
}
|
|
|
|
pub fn deinit(self: *Fuzzer) void {
|
|
self.input.deinit();
|
|
self.corpus.deinit(gpa);
|
|
self.mutations.deinit(gpa);
|
|
self.corpus_dir.close();
|
|
self.arena_ctx.deinit();
|
|
self.* = undefined;
|
|
}
|
|
|
|
pub fn addInput(self: *Fuzzer, bytes: []const u8) void {
|
|
self.corpus.append(gpa, bytes) catch @panic("OOM");
|
|
self.input.clearRetainingCapacity();
|
|
self.input.ensureTotalCapacity(8 + bytes.len) catch |e|
|
|
panic("could not resize shared input file: {t}", .{e});
|
|
self.input.items.len = 8;
|
|
self.input.appendSliceAssumeCapacity(bytes);
|
|
self.run();
|
|
inst.setFresh();
|
|
inst.updateSeen();
|
|
}
|
|
|
|
/// Assumes `fresh_pcs` correspond to the input
|
|
fn minimizeInput(self: *Fuzzer) void {
|
|
// The minimization technique is kept relatively simple, we sequentially try to remove each
|
|
// byte and check that the new pcs and memory loads are still hit.
|
|
var i = self.input.items.len;
|
|
while (i != 8) {
|
|
i -= 1;
|
|
const old = self.input.orderedRemove(i);
|
|
|
|
@memset(exec.pc_counters, 0);
|
|
self.run();
|
|
|
|
if (!inst.atleastFresh()) {
|
|
self.input.insertAssumeCapacity(i, old);
|
|
} else {
|
|
// This removal may have led to new pcs or memory loads being hit, so we need to
|
|
// update them to avoid duplicates.
|
|
inst.setFresh();
|
|
}
|
|
}
|
|
}
|
|
|
|
fn run(self: *Fuzzer) void {
|
|
// `pc_counters` is not cleared since only new hits are relevant.
|
|
|
|
mem.bytesAsValue(usize, self.input.items[0..8]).* =
|
|
mem.nativeToLittle(usize, self.input.items.len - 8);
|
|
self.test_one(.fromSlice(@volatileCast(self.input.items[8..])));
|
|
|
|
const header = mem.bytesAsValue(
|
|
abi.SeenPcsHeader,
|
|
exec.shared_seen_pcs.items[0..@sizeOf(abi.SeenPcsHeader)],
|
|
);
|
|
_ = @atomicRmw(usize, &header.n_runs, .Add, 1, .monotonic);
|
|
}
|
|
|
|
pub fn cycle(self: *Fuzzer) void {
|
|
const input = self.corpus.items[self.corpus_pos];
|
|
self.corpus_pos += 1;
|
|
if (self.corpus_pos == self.corpus.items.len)
|
|
self.corpus_pos = 0;
|
|
|
|
const rng = self.rng.random();
|
|
const m = while (true) {
|
|
const m = self.mutations.items[rng.uintLessThanBiased(usize, self.mutations.items.len)];
|
|
if (!m.mutate(
|
|
rng,
|
|
input,
|
|
&self.input,
|
|
self.corpus.items,
|
|
inst.const_vals2.items,
|
|
inst.const_vals4.items,
|
|
inst.const_vals8.items,
|
|
inst.const_vals16.items,
|
|
)) continue;
|
|
break m;
|
|
};
|
|
|
|
self.run();
|
|
|
|
if (inst.isFresh()) {
|
|
@branchHint(.unlikely);
|
|
|
|
const header = mem.bytesAsValue(
|
|
abi.SeenPcsHeader,
|
|
exec.shared_seen_pcs.items[0..@sizeOf(abi.SeenPcsHeader)],
|
|
);
|
|
_ = @atomicRmw(usize, &header.unique_runs, .Add, 1, .monotonic);
|
|
|
|
inst.setFresh();
|
|
self.minimizeInput();
|
|
inst.updateSeen();
|
|
|
|
// An empty-input has always been tried, so if an empty input is fresh then the
|
|
// test has to be non-deterministic. This has to be checked as duplicate empty
|
|
// entries are not allowed.
|
|
if (self.input.items.len - 8 == 0) {
|
|
std.log.warn("non-deterministic test (empty input produces different hits)", .{});
|
|
_ = @atomicRmw(usize, &header.unique_runs, .Sub, 1, .monotonic);
|
|
return;
|
|
}
|
|
|
|
const arena = self.arena_ctx.allocator();
|
|
const bytes = arena.dupe(u8, @volatileCast(self.input.items[8..])) catch @panic("OOM");
|
|
|
|
self.corpus.append(gpa, bytes) catch @panic("OOM");
|
|
self.mutations.appendNTimes(gpa, m, 6) catch @panic("OOM");
|
|
|
|
// Write new corpus to cache
|
|
var name_buf: [@sizeOf(usize) * 2]u8 = undefined;
|
|
self.corpus_dir.writeFile(.{
|
|
.sub_path = std.fmt.bufPrint(
|
|
&name_buf,
|
|
"{x}",
|
|
.{self.corpus_dir_idx},
|
|
) catch unreachable,
|
|
.data = bytes,
|
|
}) catch |e| panic(
|
|
"failed to write corpus file '{x}': {t}",
|
|
.{ self.corpus_dir_idx, e },
|
|
);
|
|
self.corpus_dir_idx += 1;
|
|
}
|
|
}
|
|
};
|
|
|
|
/// Instrumentation must not be triggered before this function is called
|
|
export fn fuzzer_init(cache_dir_path: abi.Slice) void {
|
|
inst.depreinit();
|
|
exec = .init(cache_dir_path.toSlice());
|
|
inst = .init();
|
|
}
|
|
|
|
/// Invalid until `fuzzer_init` is called.
|
|
export fn fuzzer_coverage() abi.Coverage {
|
|
const coverage_id = exec.pc_digest;
|
|
const header: *const abi.SeenPcsHeader = @ptrCast(@volatileCast(exec.shared_seen_pcs.items.ptr));
|
|
|
|
var seen_count: usize = 0;
|
|
for (header.seenBits()) |chunk| {
|
|
seen_count += @popCount(chunk);
|
|
}
|
|
|
|
return .{
|
|
.id = coverage_id,
|
|
.runs = header.n_runs,
|
|
.unique = header.unique_runs,
|
|
.seen = seen_count,
|
|
};
|
|
}
|
|
|
|
/// fuzzer_init must be called beforehand
|
|
export fn fuzzer_init_test(test_one: abi.TestOne, unit_test_name: abi.Slice) void {
|
|
current_test_name = unit_test_name.toSlice();
|
|
fuzzer = .init(test_one, unit_test_name.toSlice());
|
|
}
|
|
|
|
/// fuzzer_init_test must be called beforehand
|
|
/// The callee owns the memory of bytes and must not free it until the fuzzer is finished.
|
|
export fn fuzzer_new_input(bytes: abi.Slice) void {
|
|
// An entry of length zero is always added and duplicates of it are not allowed.
|
|
if (bytes.len != 0)
|
|
fuzzer.addInput(bytes.toSlice());
|
|
}
|
|
|
|
/// fuzzer_init_test must be called first
|
|
export fn fuzzer_main(limit_kind: abi.LimitKind, amount: u64) void {
|
|
switch (limit_kind) {
|
|
.forever => while (true) fuzzer.cycle(),
|
|
.iterations => for (0..amount) |_| fuzzer.cycle(),
|
|
}
|
|
}
|
|
|
|
export fn fuzzer_unslide_address(addr: usize) usize {
|
|
const si = std.debug.getSelfDebugInfo() catch @compileError("unsupported");
|
|
const slide = si.getModuleSlide(std.debug.getDebugInfoAllocator(), addr) catch |err| {
|
|
std.debug.panic("failed to find virtual address slide: {t}", .{err});
|
|
};
|
|
return addr - slide;
|
|
}
|
|
|
|
/// Helps determine run uniqueness in the face of recursion.
|
|
/// Currently not used by the fuzzer.
|
|
export threadlocal var __sancov_lowest_stack: usize = 0;
|
|
|
|
/// Inline since the return address of the callee is required
|
|
inline fn genericConstCmp(T: anytype, val: T, comptime const_vals_field: []const u8) void {
|
|
if (!inst.constPcSeen(@returnAddress())) {
|
|
@branchHint(.unlikely);
|
|
@field(inst, const_vals_field).append(gpa, val) catch @panic("OOM");
|
|
}
|
|
}
|
|
|
|
export fn __sanitizer_cov_trace_const_cmp1(const_arg: u8, arg: u8) void {
|
|
_ = const_arg;
|
|
_ = arg;
|
|
}
|
|
|
|
export fn __sanitizer_cov_trace_const_cmp2(const_arg: u16, arg: u16) void {
|
|
_ = arg;
|
|
genericConstCmp(u16, const_arg, "const_vals2");
|
|
}
|
|
|
|
export fn __sanitizer_cov_trace_const_cmp4(const_arg: u32, arg: u32) void {
|
|
_ = arg;
|
|
genericConstCmp(u32, const_arg, "const_vals4");
|
|
}
|
|
|
|
export fn __sanitizer_cov_trace_const_cmp8(const_arg: u64, arg: u64) void {
|
|
_ = arg;
|
|
genericConstCmp(u64, const_arg, "const_vals8");
|
|
}
|
|
|
|
export fn __sanitizer_cov_trace_switch(val: u64, cases: [*]const u64) void {
|
|
_ = val;
|
|
if (!inst.constPcSeen(@returnAddress())) {
|
|
@branchHint(.unlikely);
|
|
const case_bits = cases[1];
|
|
const cases_slice = cases[2..][0..cases[0]];
|
|
switch (case_bits) {
|
|
// 8-bit cases are ignored because they are likely to be randomly generated
|
|
0...8 => {},
|
|
9...16 => for (cases_slice) |c|
|
|
inst.const_vals2.append(gpa, @truncate(c)) catch @panic("OOM"),
|
|
17...32 => for (cases_slice) |c|
|
|
inst.const_vals4.append(gpa, @truncate(c)) catch @panic("OOM"),
|
|
33...64 => for (cases_slice) |c|
|
|
inst.const_vals8.append(gpa, @truncate(c)) catch @panic("OOM"),
|
|
else => {}, // Should be impossible
|
|
}
|
|
}
|
|
}
|
|
|
|
export fn __sanitizer_cov_trace_cmp1(arg1: u8, arg2: u8) void {
|
|
_ = arg1;
|
|
_ = arg2;
|
|
}
|
|
|
|
export fn __sanitizer_cov_trace_cmp2(arg1: u16, arg2: u16) void {
|
|
_ = arg1;
|
|
_ = arg2;
|
|
}
|
|
|
|
export fn __sanitizer_cov_trace_cmp4(arg1: u32, arg2: u32) void {
|
|
_ = arg1;
|
|
_ = arg2;
|
|
}
|
|
|
|
export fn __sanitizer_cov_trace_cmp8(arg1: u64, arg2: u64) void {
|
|
_ = arg1;
|
|
_ = arg2;
|
|
}
|
|
|
|
export fn __sanitizer_cov_trace_pc_indir(callee: usize) void {
|
|
// Not valuable because we already have pc tracing via 8bit counters.
|
|
_ = callee;
|
|
}
|
|
export fn __sanitizer_cov_8bit_counters_init(start: usize, end: usize) void {
|
|
// clang will emit a call to this function when compiling with code coverage instrumentation.
|
|
// however, fuzzer_init() does not need this information since it directly reads from the
|
|
// symbol table.
|
|
_ = start;
|
|
_ = end;
|
|
}
|
|
export fn __sanitizer_cov_pcs_init(start: usize, end: usize) void {
|
|
// clang will emit a call to this function when compiling with code coverage instrumentation.
|
|
// however, fuzzer_init() does not need this information since it directly reads from the
|
|
// symbol table.
|
|
_ = start;
|
|
_ = end;
|
|
}
|
|
|
|
/// Copy all of source into dest at position 0.
|
|
/// If the slices overlap, dest.ptr must be <= src.ptr.
|
|
fn volatileCopyForwards(comptime T: type, dest: []volatile T, source: []const volatile T) void {
|
|
for (dest, source) |*d, s| d.* = s;
|
|
}
|
|
|
|
/// Copy all of source into dest at position 0.
|
|
/// If the slices overlap, dest.ptr must be >= src.ptr.
|
|
fn volatileCopyBackwards(comptime T: type, dest: []volatile T, source: []const volatile T) void {
|
|
var i = source.len;
|
|
while (i > 0) {
|
|
i -= 1;
|
|
dest[i] = source[i];
|
|
}
|
|
}
|
|
|
|
const Mutation = enum {
|
|
/// Applies .insert_*_span, .push_*_span
|
|
/// For wtf-8, this limits code units, not code points
|
|
const max_insert_len = 12;
|
|
/// Applies to .insert_large_*_span and .push_large_*_span
|
|
/// 4096 is used as it is a common sector size
|
|
const max_large_insert_len = 4096;
|
|
/// Applies to .delete_span and .pop_span
|
|
const max_delete_len = 16;
|
|
/// Applies to .set_*span, .move_span, .set_existing_span
|
|
const max_set_len = 12;
|
|
const max_replicate_len = 64;
|
|
const AddValue = i6;
|
|
const SmallValue = i10;
|
|
|
|
delete_byte,
|
|
delete_span,
|
|
/// Removes the last byte from the input
|
|
pop_byte,
|
|
pop_span,
|
|
/// Inserts a group of bytes which is already in the input and removes the original copy.
|
|
move_span,
|
|
/// Replaces a group of bytes in the input with another group of bytes in the input
|
|
set_existing_span,
|
|
insert_existing_span,
|
|
push_existing_span,
|
|
set_rng_byte,
|
|
set_rng_span,
|
|
insert_rng_byte,
|
|
insert_rng_span,
|
|
/// Adds a byte to the end of the input
|
|
push_rng_byte,
|
|
push_rng_span,
|
|
set_zero_byte,
|
|
set_zero_span,
|
|
insert_zero_byte,
|
|
insert_zero_span,
|
|
push_zero_byte,
|
|
push_zero_span,
|
|
/// Inserts a lot of zeros to the end of the input
|
|
/// This is intended to work with fuzz tests that require data in (large) blocks
|
|
push_large_zero_span,
|
|
/// Inserts a group of ascii printable character
|
|
insert_print_span,
|
|
/// Inserts a group of character from a...z, A...Z, 0...9, _, and ' '
|
|
insert_common_span,
|
|
/// Inserts a group of ascii digits possibly preceded by a `-`
|
|
insert_integer,
|
|
/// Code units are evenly distributed between one to four
|
|
insert_wtf8_char,
|
|
insert_wtf8_span,
|
|
/// Inserts a group of bytes from another input
|
|
insert_splice_span,
|
|
// utf16 is not yet included since insertion of random bytes should adaquetly check
|
|
// BMP character, surrogate handling, and occasionally chacters outside of the BMP.
|
|
set_print_span,
|
|
set_common_span,
|
|
set_splice_span,
|
|
/// Similar to set_splice_span, but the bytes are copied to the same index instead of a random
|
|
replicate_splice_span,
|
|
push_print_span,
|
|
push_common_span,
|
|
push_integer,
|
|
push_wtf8_char,
|
|
push_wtf8_span,
|
|
push_splice_span,
|
|
/// Clears a random amount of high bits of a byte
|
|
truncate_8,
|
|
truncate_16le,
|
|
truncate_16be,
|
|
truncate_32le,
|
|
truncate_32be,
|
|
truncate_64le,
|
|
truncate_64be,
|
|
/// Flips a random bit
|
|
xor_1,
|
|
/// Swaps up to three bits of a byte biased to less bits
|
|
xor_few_8,
|
|
/// Swaps up to six bits of a 16-bit value biased to less bits
|
|
xor_few_16,
|
|
/// Swaps up to nine bits of a 32-bit value biased to less bits
|
|
xor_few_32,
|
|
/// Swaps up to twelve bits of 64-bit value biased to less bits
|
|
xor_few_64,
|
|
/// Adds to a byte a value of type AddValue
|
|
add_8,
|
|
add_16le,
|
|
add_16be,
|
|
add_32le,
|
|
add_32be,
|
|
add_64le,
|
|
add_64be,
|
|
/// Sets a 16-bit little-endian value to a value of type SmallValue
|
|
set_small_16le,
|
|
set_small_16be,
|
|
set_small_32le,
|
|
set_small_32be,
|
|
set_small_64le,
|
|
set_small_64be,
|
|
insert_small_16le,
|
|
insert_small_16be,
|
|
insert_small_32le,
|
|
insert_small_32be,
|
|
insert_small_64le,
|
|
insert_small_64be,
|
|
push_small_16le,
|
|
push_small_16be,
|
|
push_small_32le,
|
|
push_small_32be,
|
|
push_small_64le,
|
|
push_small_64be,
|
|
set_const_16,
|
|
set_const_32,
|
|
set_const_64,
|
|
set_const_128,
|
|
insert_const_16,
|
|
insert_const_32,
|
|
insert_const_64,
|
|
insert_const_128,
|
|
push_const_16,
|
|
push_const_32,
|
|
push_const_64,
|
|
push_const_128,
|
|
/// Sets a byte with up to three bits set biased to less bits
|
|
set_few_8,
|
|
/// Sets a 16-bit value with up to six bits set biased to less bits
|
|
set_few_16,
|
|
/// Sets a 32-bit value with up to nine bits set biased to less bits
|
|
set_few_32,
|
|
/// Sets a 64-bit value with up to twelve bits set biased to less bits
|
|
set_few_64,
|
|
insert_few_8,
|
|
insert_few_16,
|
|
insert_few_32,
|
|
insert_few_64,
|
|
push_few_8,
|
|
push_few_16,
|
|
push_few_32,
|
|
push_few_64,
|
|
/// Randomizes a random contigous group of bits in a byte
|
|
packed_set_rng_8,
|
|
packed_set_rng_16le,
|
|
packed_set_rng_16be,
|
|
packed_set_rng_32le,
|
|
packed_set_rng_32be,
|
|
packed_set_rng_64le,
|
|
packed_set_rng_64be,
|
|
|
|
fn fewValue(rng: std.Random, T: type, comptime bits: u16) T {
|
|
var result: T = 0;
|
|
var remaining_bits = rng.intRangeAtMostBiased(u16, 1, bits);
|
|
while (remaining_bits > 0) {
|
|
result |= @shlExact(@as(T, 1), rng.int(math.Log2Int(T)));
|
|
remaining_bits -= 1;
|
|
}
|
|
return result;
|
|
}
|
|
|
|
/// Returns if the mutation was applicable to the input
|
|
pub fn mutate(
|
|
mutation: Mutation,
|
|
rng: std.Random,
|
|
in: []const u8,
|
|
out: *MemoryMappedList,
|
|
corpus: []const []const u8,
|
|
const_vals2: []const u16,
|
|
const_vals4: []const u32,
|
|
const_vals8: []const u64,
|
|
const_vals16: []const u128,
|
|
) bool {
|
|
out.clearRetainingCapacity();
|
|
const new_capacity = 8 + in.len + @max(
|
|
16, // builtin 128 value
|
|
Mutation.max_insert_len,
|
|
Mutation.max_large_insert_len,
|
|
);
|
|
out.ensureTotalCapacity(new_capacity) catch |e|
|
|
panic("could not resize shared input file: {t}", .{e});
|
|
out.items.len = 8; // Length field
|
|
|
|
const applied = switch (mutation) {
|
|
inline else => |m| m.comptimeMutate(
|
|
rng,
|
|
in,
|
|
out,
|
|
corpus,
|
|
const_vals2,
|
|
const_vals4,
|
|
const_vals8,
|
|
const_vals16,
|
|
),
|
|
};
|
|
if (!applied)
|
|
assert(out.items.len == 8)
|
|
else
|
|
assert(out.items.len <= new_capacity);
|
|
return applied;
|
|
}
|
|
|
|
/// Assumes out has already been cleared
|
|
fn comptimeMutate(
|
|
comptime mutation: Mutation,
|
|
rng: std.Random,
|
|
in: []const u8,
|
|
out: *MemoryMappedList,
|
|
corpus: []const []const u8,
|
|
const_vals2: []const u16,
|
|
const_vals4: []const u32,
|
|
const_vals8: []const u64,
|
|
const_vals16: []const u128,
|
|
) bool {
|
|
const Class = enum { new, remove, rmw, move_span, replicate_splice_span };
|
|
const class: Class, const class_ctx = switch (mutation) {
|
|
// zig fmt: off
|
|
.move_span => .{ .move_span, null },
|
|
.replicate_splice_span => .{ .replicate_splice_span, null },
|
|
|
|
.delete_byte => .{ .remove, .{ .delete, 1 } },
|
|
.delete_span => .{ .remove, .{ .delete, max_delete_len } },
|
|
|
|
.pop_byte => .{ .remove, .{ .pop, 1 } },
|
|
.pop_span => .{ .remove, .{ .pop, max_delete_len } },
|
|
|
|
.set_rng_byte => .{ .new, .{ .set , 1, .rng , .one } },
|
|
.set_zero_byte => .{ .new, .{ .set , 1, .zero , .one } },
|
|
.set_rng_span => .{ .new, .{ .set , 1, .rng , .many } },
|
|
.set_zero_span => .{ .new, .{ .set , 1, .zero , .many } },
|
|
.set_common_span => .{ .new, .{ .set , 1, .common , .many } },
|
|
.set_print_span => .{ .new, .{ .set , 1, .print , .many } },
|
|
.set_existing_span => .{ .new, .{ .set , 2, .existing, .many } },
|
|
.set_splice_span => .{ .new, .{ .set , 1, .splice , .many } },
|
|
.set_const_16 => .{ .new, .{ .set , 2, .@"const", const_vals2 } },
|
|
.set_const_32 => .{ .new, .{ .set , 4, .@"const", const_vals4 } },
|
|
.set_const_64 => .{ .new, .{ .set , 8, .@"const", const_vals8 } },
|
|
.set_const_128 => .{ .new, .{ .set , 16, .@"const", const_vals16 } },
|
|
.set_small_16le => .{ .new, .{ .set , 2, .small , .{ i16, .little } } },
|
|
.set_small_32le => .{ .new, .{ .set , 4, .small , .{ i32, .little } } },
|
|
.set_small_64le => .{ .new, .{ .set , 8, .small , .{ i64, .little } } },
|
|
.set_small_16be => .{ .new, .{ .set , 2, .small , .{ i16, .big } } },
|
|
.set_small_32be => .{ .new, .{ .set , 4, .small , .{ i32, .big } } },
|
|
.set_small_64be => .{ .new, .{ .set , 8, .small , .{ i64, .big } } },
|
|
.set_few_8 => .{ .new, .{ .set , 1, .few , .{ u8 , 3 } } },
|
|
.set_few_16 => .{ .new, .{ .set , 2, .few , .{ u16, 6 } } },
|
|
.set_few_32 => .{ .new, .{ .set , 4, .few , .{ u32, 9 } } },
|
|
.set_few_64 => .{ .new, .{ .set , 8, .few , .{ u64, 12 } } },
|
|
|
|
.insert_rng_byte => .{ .new, .{ .insert, 0, .rng , .one } },
|
|
.insert_zero_byte => .{ .new, .{ .insert, 0, .zero , .one } },
|
|
.insert_rng_span => .{ .new, .{ .insert, 0, .rng , .many } },
|
|
.insert_zero_span => .{ .new, .{ .insert, 0, .zero , .many } },
|
|
.insert_print_span => .{ .new, .{ .insert, 0, .print , .many } },
|
|
.insert_common_span => .{ .new, .{ .insert, 0, .common , .many } },
|
|
.insert_integer => .{ .new, .{ .insert, 0, .integer , .many } },
|
|
.insert_wtf8_char => .{ .new, .{ .insert, 0, .wtf8 , .one } },
|
|
.insert_wtf8_span => .{ .new, .{ .insert, 0, .wtf8 , .many } },
|
|
.insert_existing_span => .{ .new, .{ .insert, 1, .existing, .many } },
|
|
.insert_splice_span => .{ .new, .{ .insert, 0, .splice , .many } },
|
|
.insert_const_16 => .{ .new, .{ .insert, 0, .@"const", const_vals2 } },
|
|
.insert_const_32 => .{ .new, .{ .insert, 0, .@"const", const_vals4 } },
|
|
.insert_const_64 => .{ .new, .{ .insert, 0, .@"const", const_vals8 } },
|
|
.insert_const_128 => .{ .new, .{ .insert, 0, .@"const", const_vals16 } },
|
|
.insert_small_16le => .{ .new, .{ .insert, 0, .small , .{ i16, .little } } },
|
|
.insert_small_32le => .{ .new, .{ .insert, 0, .small , .{ i32, .little } } },
|
|
.insert_small_64le => .{ .new, .{ .insert, 0, .small , .{ i64, .little } } },
|
|
.insert_small_16be => .{ .new, .{ .insert, 0, .small , .{ i16, .big } } },
|
|
.insert_small_32be => .{ .new, .{ .insert, 0, .small , .{ i32, .big } } },
|
|
.insert_small_64be => .{ .new, .{ .insert, 0, .small , .{ i64, .big } } },
|
|
.insert_few_8 => .{ .new, .{ .insert, 0, .few , .{ u8 , 3 } } },
|
|
.insert_few_16 => .{ .new, .{ .insert, 0, .few , .{ u16, 6 } } },
|
|
.insert_few_32 => .{ .new, .{ .insert, 0, .few , .{ u32, 9 } } },
|
|
.insert_few_64 => .{ .new, .{ .insert, 0, .few , .{ u64, 12 } } },
|
|
|
|
.push_rng_byte => .{ .new, .{ .push , 0, .rng , .one } },
|
|
.push_zero_byte => .{ .new, .{ .push , 0, .zero , .one } },
|
|
.push_rng_span => .{ .new, .{ .push , 0, .rng , .many } },
|
|
.push_zero_span => .{ .new, .{ .push , 0, .zero , .many } },
|
|
.push_print_span => .{ .new, .{ .push , 0, .print , .many } },
|
|
.push_common_span => .{ .new, .{ .push , 0, .common , .many } },
|
|
.push_integer => .{ .new, .{ .push , 0, .integer , .many } },
|
|
.push_large_zero_span => .{ .new, .{ .push , 0, .zero , .large } },
|
|
.push_wtf8_char => .{ .new, .{ .push , 0, .wtf8 , .one } },
|
|
.push_wtf8_span => .{ .new, .{ .push , 0, .wtf8 , .many } },
|
|
.push_existing_span => .{ .new, .{ .push , 1, .existing, .many } },
|
|
.push_splice_span => .{ .new, .{ .push , 0, .splice , .many } },
|
|
.push_const_16 => .{ .new, .{ .push , 0, .@"const", const_vals2 } },
|
|
.push_const_32 => .{ .new, .{ .push , 0, .@"const", const_vals4 } },
|
|
.push_const_64 => .{ .new, .{ .push , 0, .@"const", const_vals8 } },
|
|
.push_const_128 => .{ .new, .{ .push , 0, .@"const", const_vals16 } },
|
|
.push_small_16le => .{ .new, .{ .push , 0, .small , .{ i16, .little } } },
|
|
.push_small_32le => .{ .new, .{ .push , 0, .small , .{ i32, .little } } },
|
|
.push_small_64le => .{ .new, .{ .push , 0, .small , .{ i64, .little } } },
|
|
.push_small_16be => .{ .new, .{ .push , 0, .small , .{ i16, .big } } },
|
|
.push_small_32be => .{ .new, .{ .push , 0, .small , .{ i32, .big } } },
|
|
.push_small_64be => .{ .new, .{ .push , 0, .small , .{ i64, .big } } },
|
|
.push_few_8 => .{ .new, .{ .push , 0, .few , .{ u8 , 3 } } },
|
|
.push_few_16 => .{ .new, .{ .push , 0, .few , .{ u16, 6 } } },
|
|
.push_few_32 => .{ .new, .{ .push , 0, .few , .{ u32, 9 } } },
|
|
.push_few_64 => .{ .new, .{ .push , 0, .few , .{ u64, 12 } } },
|
|
|
|
.xor_1 => .{ .rmw, .{ .xor , u8 , native_endian, 1 } },
|
|
.xor_few_8 => .{ .rmw, .{ .xor , u8 , native_endian, 3 } },
|
|
.xor_few_16 => .{ .rmw, .{ .xor , u16, native_endian, 6 } },
|
|
.xor_few_32 => .{ .rmw, .{ .xor , u32, native_endian, 9 } },
|
|
.xor_few_64 => .{ .rmw, .{ .xor , u64, native_endian, 12 } },
|
|
|
|
.truncate_8 => .{ .rmw, .{ .truncate , u8 , native_endian, {} } },
|
|
.truncate_16le => .{ .rmw, .{ .truncate , u16, .little , {} } },
|
|
.truncate_32le => .{ .rmw, .{ .truncate , u32, .little , {} } },
|
|
.truncate_64le => .{ .rmw, .{ .truncate , u64, .little , {} } },
|
|
.truncate_16be => .{ .rmw, .{ .truncate , u16, .big , {} } },
|
|
.truncate_32be => .{ .rmw, .{ .truncate , u32, .big , {} } },
|
|
.truncate_64be => .{ .rmw, .{ .truncate , u64, .big , {} } },
|
|
|
|
.add_8 => .{ .rmw, .{ .add , i8 , native_endian, {} } },
|
|
.add_16le => .{ .rmw, .{ .add , i16, .little , {} } },
|
|
.add_32le => .{ .rmw, .{ .add , i32, .little , {} } },
|
|
.add_64le => .{ .rmw, .{ .add , i64, .little , {} } },
|
|
.add_16be => .{ .rmw, .{ .add , i16, .big , {} } },
|
|
.add_32be => .{ .rmw, .{ .add , i32, .big , {} } },
|
|
.add_64be => .{ .rmw, .{ .add , i64, .big , {} } },
|
|
|
|
.packed_set_rng_8 => .{ .rmw, .{ .packed_rng, u8 , native_endian, {} } },
|
|
.packed_set_rng_16le => .{ .rmw, .{ .packed_rng, u16, .little , {} } },
|
|
.packed_set_rng_32le => .{ .rmw, .{ .packed_rng, u32, .little , {} } },
|
|
.packed_set_rng_64le => .{ .rmw, .{ .packed_rng, u64, .little , {} } },
|
|
.packed_set_rng_16be => .{ .rmw, .{ .packed_rng, u16, .big , {} } },
|
|
.packed_set_rng_32be => .{ .rmw, .{ .packed_rng, u32, .big , {} } },
|
|
.packed_set_rng_64be => .{ .rmw, .{ .packed_rng, u64, .big , {} } },
|
|
// zig fmt: on
|
|
};
|
|
|
|
switch (class) {
|
|
.new => {
|
|
const op: enum {
|
|
set,
|
|
insert,
|
|
push,
|
|
|
|
pub fn maxLen(comptime op: @This(), in_len: usize) usize {
|
|
return switch (op) {
|
|
.set => @min(in_len, max_set_len),
|
|
.insert, .push => max_insert_len,
|
|
};
|
|
}
|
|
}, const min_in_len, const data: enum {
|
|
rng,
|
|
zero,
|
|
common,
|
|
print,
|
|
integer,
|
|
wtf8,
|
|
existing,
|
|
splice,
|
|
@"const",
|
|
small,
|
|
few,
|
|
}, const data_ctx = class_ctx;
|
|
const Size = enum { one, many, large };
|
|
if (in.len < min_in_len) return false;
|
|
if (data == .@"const" and data_ctx.len == 0) return false;
|
|
|
|
const splice_i = if (data == .splice) blk: {
|
|
// Element zero always holds an empty input, so we do not select it
|
|
if (corpus.len == 1) return false;
|
|
break :blk rng.intRangeLessThanBiased(usize, 1, corpus.len);
|
|
} else undefined;
|
|
|
|
// Only needs to be followed for set
|
|
const len = switch (data) {
|
|
else => switch (@as(Size, data_ctx)) {
|
|
.one => 1,
|
|
.many => rng.intRangeAtMostBiased(usize, 1, op.maxLen(in.len)),
|
|
.large => rng.intRangeAtMostBiased(usize, 1, max_large_insert_len),
|
|
},
|
|
.wtf8 => undefined, // varies by size of each code unit
|
|
.splice => rng.intRangeAtMostBiased(usize, 1, @min(
|
|
corpus[splice_i].len,
|
|
op.maxLen(in.len),
|
|
)),
|
|
.existing => rng.intRangeAtMostBiased(usize, 1, @min(
|
|
in.len,
|
|
op.maxLen(in.len),
|
|
)),
|
|
.@"const" => @sizeOf(@typeInfo(@TypeOf(data_ctx)).pointer.child),
|
|
.small, .few => @sizeOf(data_ctx[0]),
|
|
};
|
|
|
|
const i = switch (op) {
|
|
.set => rng.uintAtMostBiased(usize, in.len - len),
|
|
.insert => rng.uintAtMostBiased(usize, in.len),
|
|
.push => in.len,
|
|
};
|
|
|
|
out.appendSliceAssumeCapacity(in[0..i]);
|
|
switch (data) {
|
|
.rng => {
|
|
var bytes: [@max(max_insert_len, max_set_len)]u8 = undefined;
|
|
rng.bytes(bytes[0..len]);
|
|
out.appendSliceAssumeCapacity(bytes[0..len]);
|
|
},
|
|
.zero => out.appendNTimesAssumeCapacity(0, len),
|
|
.common => for (out.addManyAsSliceAssumeCapacity(len)) |*c| {
|
|
c.* = switch (rng.int(u6)) {
|
|
0 => ' ',
|
|
1...10 => |x| '0' + (@as(u8, x) - 1),
|
|
11...36 => |x| 'A' + (@as(u8, x) - 11),
|
|
37 => '_',
|
|
38...63 => |x| 'a' + (@as(u8, x) - 38),
|
|
};
|
|
},
|
|
.print => for (out.addManyAsSliceAssumeCapacity(len)) |*c| {
|
|
c.* = rng.intRangeAtMostBiased(u8, 0x20, 0x7E);
|
|
},
|
|
.integer => {
|
|
const negative = len != 0 and rng.boolean();
|
|
if (negative) {
|
|
out.appendAssumeCapacity('-');
|
|
}
|
|
|
|
for (out.addManyAsSliceAssumeCapacity(len - @intFromBool(negative))) |*c| {
|
|
c.* = rng.intRangeAtMostBiased(u8, '0', '9');
|
|
}
|
|
},
|
|
.wtf8 => {
|
|
comptime assert(op != .set);
|
|
var codepoints: usize = if (data_ctx == .one)
|
|
1
|
|
else
|
|
rng.intRangeAtMostBiased(usize, 1, Mutation.max_insert_len / 4);
|
|
|
|
while (true) {
|
|
const units1 = rng.int(u2);
|
|
const value = switch (units1) {
|
|
0 => rng.int(u7),
|
|
1 => rng.intRangeAtMostBiased(u11, 0x000080, 0x0007FF),
|
|
2 => rng.intRangeAtMostBiased(u16, 0x000800, 0x00FFFF),
|
|
3 => rng.intRangeAtMostBiased(u21, 0x010000, 0x10FFFF),
|
|
};
|
|
const units = @as(u3, units1) + 1;
|
|
|
|
var buf: [4]u8 = undefined;
|
|
assert(std.unicode.wtf8Encode(value, &buf) catch unreachable == units);
|
|
out.appendSliceAssumeCapacity(buf[0..units]);
|
|
|
|
codepoints -= 1;
|
|
if (codepoints == 0) break;
|
|
}
|
|
},
|
|
.existing => {
|
|
const j = rng.uintAtMostBiased(usize, in.len - len);
|
|
out.appendSliceAssumeCapacity(in[j..][0..len]);
|
|
},
|
|
.splice => {
|
|
const j = rng.uintAtMostBiased(usize, corpus[splice_i].len - len);
|
|
out.appendSliceAssumeCapacity(corpus[splice_i][j..][0..len]);
|
|
},
|
|
.@"const" => out.appendSliceAssumeCapacity(@ptrCast(
|
|
&data_ctx[rng.uintLessThanBiased(usize, data_ctx.len)],
|
|
)),
|
|
.small => out.appendSliceAssumeCapacity(@ptrCast(
|
|
&mem.nativeTo(data_ctx[0], rng.int(SmallValue), data_ctx[1]),
|
|
)),
|
|
.few => out.appendSliceAssumeCapacity(@ptrCast(
|
|
&fewValue(rng, data_ctx[0], data_ctx[1]),
|
|
)),
|
|
}
|
|
switch (op) {
|
|
.set => out.appendSliceAssumeCapacity(in[i + len ..]),
|
|
.insert => out.appendSliceAssumeCapacity(in[i..]),
|
|
.push => {},
|
|
}
|
|
},
|
|
.remove => {
|
|
if (in.len == 0) return false;
|
|
const Op = enum { delete, pop };
|
|
const op: Op, const max_len = class_ctx;
|
|
// LessThan is used so we don't delete the entire span (which is unproductive since
|
|
// an empty input has always been tried)
|
|
const len = if (max_len == 1) 1 else rng.uintLessThanBiased(
|
|
usize,
|
|
@min(max_len + 1, in.len),
|
|
);
|
|
switch (op) {
|
|
.delete => {
|
|
const i = rng.uintAtMostBiased(usize, in.len - len);
|
|
out.appendSliceAssumeCapacity(in[0..i]);
|
|
out.appendSliceAssumeCapacity(in[i + len ..]);
|
|
},
|
|
.pop => out.appendSliceAssumeCapacity(in[0 .. in.len - len]),
|
|
}
|
|
},
|
|
.rmw => {
|
|
const Op = enum { xor, truncate, add, packed_rng };
|
|
const op: Op, const T, const endian, const xor_bits = class_ctx;
|
|
if (in.len < @sizeOf(T)) return false;
|
|
const Log2T = math.Log2Int(T);
|
|
|
|
const idx = rng.uintAtMostBiased(usize, in.len - @sizeOf(T));
|
|
const old = mem.readInt(T, in[idx..][0..@sizeOf(T)], endian);
|
|
const new = switch (op) {
|
|
.xor => old ^ fewValue(rng, T, xor_bits),
|
|
.truncate => old & (@as(T, math.maxInt(T)) >> rng.int(Log2T)),
|
|
.add => old +% addend: {
|
|
const val = rng.int(Mutation.AddValue);
|
|
break :addend if (val == 0) 1 else val;
|
|
},
|
|
.packed_rng => blk: {
|
|
const bits = rng.int(math.Log2Int(T)) +| 1;
|
|
break :blk old ^ (rng.int(T) >> bits << rng.uintAtMostBiased(Log2T, bits));
|
|
},
|
|
};
|
|
out.appendSliceAssumeCapacity(in);
|
|
mem.bytesAsValue(T, out.items[8..][idx..][0..@sizeOf(T)]).* =
|
|
mem.nativeTo(T, new, endian);
|
|
},
|
|
.move_span => {
|
|
if (in.len < 2) return false;
|
|
// One less since moving whole output will never change anything
|
|
const len = rng.intRangeAtMostBiased(usize, 1, @min(
|
|
in.len - 1,
|
|
Mutation.max_set_len,
|
|
));
|
|
|
|
const src = rng.uintAtMostBiased(usize, in.len - len);
|
|
// This indexes into the final input
|
|
const dst = blk: {
|
|
const res = rng.uintAtMostBiased(usize, in.len - len - 1);
|
|
break :blk res + @intFromBool(res >= src);
|
|
};
|
|
|
|
if (src < dst) {
|
|
out.appendSliceAssumeCapacity(in[0..src]);
|
|
out.appendSliceAssumeCapacity(in[src + len .. dst + len]);
|
|
out.appendSliceAssumeCapacity(in[src..][0..len]);
|
|
out.appendSliceAssumeCapacity(in[dst + len ..]);
|
|
} else {
|
|
out.appendSliceAssumeCapacity(in[0..dst]);
|
|
out.appendSliceAssumeCapacity(in[src..][0..len]);
|
|
out.appendSliceAssumeCapacity(in[dst..src]);
|
|
out.appendSliceAssumeCapacity(in[src + len ..]);
|
|
}
|
|
},
|
|
.replicate_splice_span => {
|
|
if (in.len == 0) return false;
|
|
if (corpus.len == 1) return false;
|
|
const from = corpus[rng.intRangeLessThanBiased(usize, 1, corpus.len)];
|
|
const len = rng.uintLessThanBiased(usize, @min(in.len, from.len, max_replicate_len));
|
|
const i = rng.uintAtMostBiased(usize, @min(in.len, from.len) - len);
|
|
out.appendSliceAssumeCapacity(in[0..i]);
|
|
out.appendSliceAssumeCapacity(from[i..][0..len]);
|
|
out.appendSliceAssumeCapacity(in[i + len ..]);
|
|
},
|
|
}
|
|
return true;
|
|
}
|
|
};
|
|
|
|
/// Like `std.ArrayListUnmanaged(u8)` but backed by memory mapping.
|
|
pub const MemoryMappedList = struct {
|
|
/// Contents of the list.
|
|
///
|
|
/// Pointers to elements in this slice are invalidated by various functions
|
|
/// of this ArrayList in accordance with the respective documentation. In
|
|
/// all cases, "invalidated" means that the memory has been passed to this
|
|
/// allocator's resize or free function.
|
|
items: []align(std.heap.page_size_min) volatile u8,
|
|
/// How many bytes this list can hold without allocating additional memory.
|
|
capacity: usize,
|
|
/// The file is kept open so that it can be resized.
|
|
file: std.fs.File,
|
|
|
|
pub fn init(file: std.fs.File, length: usize, capacity: usize) !MemoryMappedList {
|
|
const ptr = try std.posix.mmap(
|
|
null,
|
|
capacity,
|
|
std.posix.PROT.READ | std.posix.PROT.WRITE,
|
|
.{ .TYPE = .SHARED },
|
|
file.handle,
|
|
0,
|
|
);
|
|
return .{
|
|
.file = file,
|
|
.items = ptr[0..length],
|
|
.capacity = capacity,
|
|
};
|
|
}
|
|
|
|
pub fn create(file: std.fs.File, length: usize, capacity: usize) !MemoryMappedList {
|
|
try file.setEndPos(capacity);
|
|
return init(file, length, capacity);
|
|
}
|
|
|
|
pub fn deinit(l: *MemoryMappedList) void {
|
|
l.file.close();
|
|
std.posix.munmap(@volatileCast(l.items.ptr[0..l.capacity]));
|
|
l.* = undefined;
|
|
}
|
|
|
|
/// Modify the array so that it can hold at least `additional_count` **more** items.
|
|
/// Invalidates element pointers if additional memory is needed.
|
|
pub fn ensureUnusedCapacity(l: *MemoryMappedList, additional_count: usize) !void {
|
|
return l.ensureTotalCapacity(l.items.len + additional_count);
|
|
}
|
|
|
|
/// If the current capacity is less than `new_capacity`, this function will
|
|
/// modify the array so that it can hold at least `new_capacity` items.
|
|
/// Invalidates element pointers if additional memory is needed.
|
|
pub fn ensureTotalCapacity(l: *MemoryMappedList, new_capacity: usize) !void {
|
|
if (l.capacity >= new_capacity) return;
|
|
|
|
const better_capacity = growCapacity(l.capacity, new_capacity);
|
|
return l.ensureTotalCapacityPrecise(better_capacity);
|
|
}
|
|
|
|
pub fn ensureTotalCapacityPrecise(l: *MemoryMappedList, new_capacity: usize) !void {
|
|
if (l.capacity >= new_capacity) return;
|
|
|
|
std.posix.munmap(@volatileCast(l.items.ptr[0..l.capacity]));
|
|
try l.file.setEndPos(new_capacity);
|
|
l.* = try init(l.file, l.items.len, new_capacity);
|
|
}
|
|
|
|
/// Invalidates all element pointers.
|
|
pub fn clearRetainingCapacity(l: *MemoryMappedList) void {
|
|
l.items.len = 0;
|
|
}
|
|
|
|
/// Append the slice of items to the list.
|
|
/// Asserts that the list can hold the additional items.
|
|
pub fn appendSliceAssumeCapacity(l: *MemoryMappedList, items: []const u8) void {
|
|
const old_len = l.items.len;
|
|
const new_len = old_len + items.len;
|
|
assert(new_len <= l.capacity);
|
|
l.items.len = new_len;
|
|
@memcpy(l.items[old_len..][0..items.len], items);
|
|
}
|
|
|
|
/// Extends the list by 1 element.
|
|
/// Never invalidates element pointers.
|
|
/// Asserts that the list can hold one additional item.
|
|
pub fn appendAssumeCapacity(l: *MemoryMappedList, item: u8) void {
|
|
const new_item_ptr = l.addOneAssumeCapacity();
|
|
new_item_ptr.* = item;
|
|
}
|
|
|
|
/// Increase length by 1, returning pointer to the new item.
|
|
/// The returned pointer becomes invalid when the list is resized.
|
|
/// Never invalidates element pointers.
|
|
/// Asserts that the list can hold one additional item.
|
|
pub fn addOneAssumeCapacity(l: *MemoryMappedList) *volatile u8 {
|
|
assert(l.items.len < l.capacity);
|
|
l.items.len += 1;
|
|
return &l.items[l.items.len - 1];
|
|
}
|
|
|
|
/// Append a value to the list `n` times.
|
|
/// Never invalidates element pointers.
|
|
/// The function is inline so that a comptime-known `value` parameter will
|
|
/// have better memset codegen in case it has a repeated byte pattern.
|
|
/// Asserts that the list can hold the additional items.
|
|
pub inline fn appendNTimesAssumeCapacity(l: *MemoryMappedList, value: u8, n: usize) void {
|
|
const new_len = l.items.len + n;
|
|
assert(new_len <= l.capacity);
|
|
@memset(l.items.ptr[l.items.len..new_len], value);
|
|
l.items.len = new_len;
|
|
}
|
|
|
|
/// Resize the array, adding `n` new elements, which have `undefined` values.
|
|
/// The return value is a slice pointing to the newly allocated elements.
|
|
/// Never invalidates element pointers.
|
|
/// The returned pointer becomes invalid when the list is resized.
|
|
/// Asserts that the list can hold the additional items.
|
|
pub fn addManyAsSliceAssumeCapacity(l: *MemoryMappedList, n: usize) []volatile u8 {
|
|
assert(l.items.len + n <= l.capacity);
|
|
const prev_len = l.items.len;
|
|
l.items.len += n;
|
|
return l.items[prev_len..][0..n];
|
|
}
|
|
|
|
/// Called when memory growth is necessary. Returns a capacity larger than
|
|
/// minimum that grows super-linearly.
|
|
fn growCapacity(current: usize, minimum: usize) usize {
|
|
var new = current;
|
|
while (true) {
|
|
new = mem.alignForward(usize, new + new / 2, std.heap.page_size_max);
|
|
if (new >= minimum) return new;
|
|
}
|
|
}
|
|
|
|
pub fn insertAssumeCapacity(l: *MemoryMappedList, i: usize, item: u8) void {
|
|
assert(l.items.len + 1 <= l.capacity);
|
|
l.items.len += 1;
|
|
volatileCopyBackwards(u8, l.items[i + 1 ..], l.items[i .. l.items.len - 1]);
|
|
l.items[i] = item;
|
|
}
|
|
|
|
pub fn orderedRemove(l: *MemoryMappedList, i: usize) u8 {
|
|
assert(l.items.len + 1 <= l.capacity);
|
|
const old = l.items[i];
|
|
volatileCopyForwards(u8, l.items[i .. l.items.len - 1], l.items[i + 1 ..]);
|
|
l.items.len -= 1;
|
|
return old;
|
|
}
|
|
};
|