fuzzer: remove rodata load tracing

This can be re-evaluated at a later time, but at the moment the
performance and stability concerns hold it back. Additionally, it
promotes a non-smithing approach to fuzz tests.
This commit is contained in:
Kendall Condon 2025-09-18 18:34:22 -04:00
parent b905c65661
commit 7c6ccca46d
2 changed files with 13 additions and 203 deletions

View file

@ -57,9 +57,6 @@ fn bitsetUsizes(elems: usize) usize {
const Executable = struct {
/// Tracks the hit count for each pc as updated by the process's instrumentation.
pc_counters: []u8,
/// Read-only memory section containing compiled-in constants found from parsing the executable
rodata_addr: usize,
rodata_size: usize,
cache_f: std.fs.Dir,
/// Shared copy of all pcs that have been hit stored in a memory-mapped file that can viewed
@ -72,80 +69,12 @@ const Executable = struct {
/// Used before this structure is initialized to avoid illegal behavior
/// from instrumentation functions being called and using undefined values.
pub const preinit: Executable = .{
.rodata_addr = 0,
.rodata_size = 0,
.pc_counters = undefined, // instrumentation works off the __sancov_cntrs section
.cache_f = undefined,
.shared_seen_pcs = undefined,
.pc_digest = undefined,
};
/// Even on error, this initializes rodata_addr and rodata_size to valid values
fn initRodata(self: *Executable) !void {
errdefer {
self.rodata_addr = 0;
self.rodata_size = 0;
}
const exec_path = std.fs.selfExePathAlloc(gpa) catch |e|
if (e == error.OutOfMemory) @panic("OOM") else return e;
defer gpa.free(exec_path);
const exec_file = try std.fs.cwd().openFile(exec_path, .{});
defer exec_file.close();
switch (builtin.object_format) {
.elf => {
// We use two reader instances since the data they respectively read are
// not next to each other in the file.
//
// Multiple instances is safe since Elf.SectionHeaderIterator always calls
// seekTo (which we also use to arbitrarily set the index) and we always
// call seekTo to arbitrarily read from the string table.
var r_buf: [4096]u8 = undefined;
var r = exec_file.reader(&r_buf);
var str_r_buf: [4096]u8 = undefined;
var str_r = exec_file.reader(&str_r_buf);
const ehdr: std.elf.Header = try .read(&r.interface);
if (ehdr.shstrndx == 0) return error.NoElfStringTable;
var shdr_it = ehdr.iterateSectionHeaders(&r);
shdr_it.index = ehdr.shstrndx;
const str_tab_shdr = try shdr_it.next() orelse return error.InvalidElfSection;
const str_tab_off = str_tab_shdr.sh_offset;
shdr_it.index = 0;
while (try shdr_it.next()) |shdr| {
const flags: packed struct {
write: bool,
alloc: bool,
execinstr: bool,
} = @bitCast(@as(u3, @truncate(shdr.sh_flags)));
if (shdr.sh_addr == 0 or shdr.sh_size == 0 or flags != @TypeOf(flags){
.alloc = true,
.write = false,
.execinstr = false,
}) continue;
const rodata_name = ".rodata\x00";
try str_r.seekTo(try math.add(u64, str_tab_off, shdr.sh_name));
const section_name = str_r.interface.take(rodata_name.len) catch return r.err.?;
if (!std.mem.eql(u8, section_name, rodata_name))
continue;
const addr = math.cast(usize, shdr.sh_addr) orelse return error.Overflow;
const size = math.cast(usize, shdr.sh_size) orelse return error.Overflow;
_ = try math.add(usize, addr, size); // make sure there is no overflow
self.rodata_addr = addr;
self.rodata_size = size;
return;
}
return error.NoRodataSection;
},
else => return error.UnsupportedObjectFormat,
}
}
fn getCoverageFile(cache_dir: std.fs.Dir, pcs: []const usize, pc_digest: u64) MemoryMappedList {
const pc_bitset_usizes = bitsetUsizes(pcs.len);
const coverage_file_name = std.fmt.hex(pc_digest);
@ -284,11 +213,6 @@ const Executable = struct {
.{ self.pc_counters.len, pcs.len },
);
self.initRodata() catch |e| if (e != error.UnsupportedObjectFormat) std.log.err(
\\failed to enumerate read-only memory: {t}
\\efficiency will be severly reduced
, .{e});
self.pc_digest = std.hash.Wyhash.hash(0, mem.sliceAsBytes(pcs));
self.shared_seen_pcs = getCoverageFile(cache_dir, pcs, self.pc_digest);
@ -322,33 +246,22 @@ const Executable = struct {
};
};
/// Data gathered from instrumentation functions
/// Seperate from Executable since its state is resetable and changes
/// Seperate from Fuzzer since it may be needed before fuzzing starts
/// Data gathered from instrumentation functions.
/// Seperate from Executable since its state is resetable and changes.
/// Seperate from Fuzzer since it may be needed before fuzzing starts.
const Instrumentation = struct {
/// Bitset of seen pcs across all runs excluding fresh pcs.
/// This is seperate then shared_seen_pcs because multiple fuzzing processes are likely using
/// it which causes contention and unrelated pcs to our campaign being set.
seen_pcs: []usize,
/// Bitset of seen rodata bytes read across all runs
seen_rodata_loads: []usize,
/// Bitset of run's read bytes that weren't present in seen_loads
/// Elements are always zero if !any_new_data_loads
new_rodata_loads: []usize,
any_new_rodata_loads: bool,
/// Stores a fresh input's new pcs
fresh_pcs: []usize,
/// Stores a fresh input's new reads
/// Elements are always zero if !any_fresh_rodata_loads
fresh_rodata_loads: []usize,
any_fresh_rodata_loads: bool,
/// Pcs which __sanitizer_cov_trace_switch and __sanitizer_cov_trace_const_cmpx
/// have been called from and have had their already been added to const_x_vals
const_pcs: std.AutoArrayHashMapUnmanaged(usize, void) = .empty,
/// Values that have been constant operands in comparisons, switch cases, or memory reads
/// Values that have been constant operands in comparisons and switch cases.
/// There may be duplicates in this array if they came from different addresses, which is
/// fine as they are likely more important and hence more likely to be selected.
const_vals2: std.ArrayListUnmanaged(u16) = .empty,
@ -361,12 +274,7 @@ const Instrumentation = struct {
/// from instrumentation functions being called and using undefined values.
pub const preinit: Instrumentation = .{
.seen_pcs = undefined, // currently only updated by `Fuzzer`
.seen_rodata_loads = undefined,
.new_rodata_loads = undefined,
.any_new_rodata_loads = undefined,
.fresh_pcs = undefined,
.fresh_rodata_loads = undefined,
.any_fresh_rodata_loads = undefined,
};
pub fn depreinit(self: *Instrumentation) void {
@ -379,20 +287,14 @@ const Instrumentation = struct {
pub fn init() Instrumentation {
const pc_bitset_usizes = bitsetUsizes(exec.pc_counters.len);
const rodata_bitset_usizes = bitsetUsizes(exec.rodata_size);
const alloc_usizes = pc_bitset_usizes * 2 + rodata_bitset_usizes * 3;
const alloc_usizes = pc_bitset_usizes * 2;
const buf = gpa.alloc(u8, alloc_usizes * @sizeOf(usize)) catch @panic("OOM");
var fba_ctx: std.heap.FixedBufferAllocator = .init(buf);
const fba = fba_ctx.allocator();
var self: Instrumentation = .{
.seen_pcs = fba.alloc(usize, pc_bitset_usizes) catch unreachable,
.seen_rodata_loads = fba.alloc(usize, rodata_bitset_usizes) catch unreachable,
.new_rodata_loads = fba.alloc(usize, rodata_bitset_usizes) catch unreachable,
.any_new_rodata_loads = undefined,
.fresh_pcs = fba.alloc(usize, pc_bitset_usizes) catch unreachable,
.fresh_rodata_loads = fba.alloc(usize, rodata_bitset_usizes) catch unreachable,
.any_fresh_rodata_loads = undefined,
};
self.reset();
return self;
@ -400,12 +302,7 @@ const Instrumentation = struct {
pub fn reset(self: *Instrumentation) void {
@memset(self.seen_pcs, 0);
@memset(self.seen_rodata_loads, 0);
@memset(self.new_rodata_loads, 0);
self.any_new_rodata_loads = false;
@memset(self.fresh_pcs, 0);
@memset(self.fresh_rodata_loads, 0);
self.any_fresh_rodata_loads = false;
self.const_pcs.clearRetainingCapacity();
self.const_vals2.clearRetainingCapacity();
self.const_vals4.clearRetainingCapacity();
@ -418,16 +315,7 @@ const Instrumentation = struct {
return (self.const_pcs.getOrPut(gpa, pc) catch @panic("OOM")).found_existing;
}
pub fn clearNewRodataLoads(self: *Instrumentation) void {
if (self.any_new_rodata_loads) {
@memset(self.new_rodata_loads, 0);
self.any_new_rodata_loads = false;
}
}
pub fn isFresh(self: *Instrumentation) bool {
if (self.any_new_rodata_loads) return true;
var hit_pcs = exec.pcBitsetIterator();
for (self.seen_pcs) |seen_pcs| {
if (hit_pcs.next() & ~seen_pcs != 0) return true;
@ -436,38 +324,24 @@ const Instrumentation = struct {
return false;
}
/// Updates fresh_pcs and fresh_rodata_loads
/// any_new_rodata_loads and elements of new_rodata_loads are unspecified
/// afterwards, but still valid.
/// Updates `fresh_pcs`
pub fn setFresh(self: *Instrumentation) void {
var hit_pcs = exec.pcBitsetIterator();
for (self.seen_pcs, self.fresh_pcs) |seen_pcs, *fresh_pcs| {
fresh_pcs.* = hit_pcs.next() & ~seen_pcs;
}
mem.swap([]usize, &self.fresh_rodata_loads, &self.new_rodata_loads);
mem.swap(bool, &self.any_fresh_rodata_loads, &self.any_new_rodata_loads);
}
/// Returns if exec.pc_counters and new_rodata_loads are the same or a superset of fresh_pcs and
/// fresh_rodata_loads respectively.
/// Returns if `exec.pc_counters` is a superset of `fresh_pcs`.
pub fn atleastFresh(self: *Instrumentation) bool {
var hit_pcs = exec.pcBitsetIterator();
for (self.fresh_pcs) |fresh_pcs| {
if (fresh_pcs & hit_pcs.next() != fresh_pcs) return false;
}
if (self.any_fresh_rodata_loads) {
if (!self.any_new_rodata_loads) return false;
for (self.new_rodata_loads, self.fresh_rodata_loads) |n, f| {
if (n & f != f) return false;
}
}
return true;
}
/// Updates based off fresh_pcs and fresh_rodata_loads
/// Updates based off `fresh_pcs`
fn updateSeen(self: *Instrumentation) void {
comptime assert(abi.SeenPcsHeader.trailing[0] == .pc_bits_usize);
const shared_seen_pcs: [*]volatile usize = @ptrCast(
@ -479,11 +353,6 @@ const Instrumentation = struct {
if (fresh != 0)
_ = @atomicRmw(usize, shared_seen, .Or, fresh, .monotonic);
}
if (self.any_fresh_rodata_loads) {
for (self.seen_rodata_loads, self.fresh_rodata_loads) |*s, f|
s.* |= f;
}
}
};
@ -496,8 +365,8 @@ const Fuzzer = struct {
/// input.
input: MemoryMappedList,
/// Minimized past inputs leading to new pcs or rodata reads. These are randomly mutated in
/// round-robin fashion
/// Minimized past inputs leading to new pc hits.
/// These are randomly mutated in round-robin fashion
/// Element zero is always an empty input. It is gauraunteed no other elements are empty.
corpus: std.ArrayListUnmanaged([]const u8),
corpus_pos: usize,
@ -596,10 +465,9 @@ const Fuzzer = struct {
self.run();
inst.setFresh();
inst.updateSeen();
inst.clearNewRodataLoads();
}
/// Assumes fresh_pcs and fresh_rodata_loads correspond to the input
/// Assumes `fresh_pcs` correspond to the input
fn minimizeInput(self: *Fuzzer) void {
// The minimization technique is kept relatively simple, we sequentially try to remove each
// byte and check that the new pcs and memory loads are still hit.
@ -609,7 +477,6 @@ const Fuzzer = struct {
const old = self.input.orderedRemove(i);
@memset(exec.pc_counters, 0);
inst.clearNewRodataLoads();
self.run();
if (!inst.atleastFresh()) {
@ -623,11 +490,7 @@ const Fuzzer = struct {
}
fn run(self: *Fuzzer) void {
// We don't need to clear pc_counters here; all we care about is new hits and not already
// seen hits. Ideally, we wouldn't even have these counters and do something similiar to
// what we do for tracking memory (i.e. a __sanitizer_cov function that updates a flag on a
// new hit.)
assert(!inst.any_new_rodata_loads);
// `pc_counters` is not cleared since only new hits are relevant.
mem.bytesAsValue(usize, self.input.items[0..8]).* =
mem.nativeToLittle(usize, self.input.items.len - 8);
@ -673,7 +536,6 @@ const Fuzzer = struct {
inst.setFresh();
self.minimizeInput();
inst.updateSeen();
inst.clearNewRodataLoads();
// An empty-input has always been tried, so if an empty input is fresh then the
// test has to be non-deterministic. This has to be checked as duplicate empty
@ -796,58 +658,6 @@ export fn __sanitizer_cov_trace_switch(val: u64, cases: [*]const u64) void {
}
}
fn genericLoad(T: anytype, ptr: *align(1) const T, comptime opt_const_vals_field: ?[]const u8) void {
const addr = @intFromPtr(ptr);
const off = addr -% exec.rodata_addr;
if (off >= exec.rodata_size) {
@branchHint(.likely);
return;
}
const i = off / @bitSizeOf(usize);
// Bits are intentionally truncated since the pointer will almost always be aligned
const hit = (@as(usize, (1 << @sizeOf(T)) - 1)) << @intCast(off % @bitSizeOf(usize));
const new = hit & ~inst.seen_rodata_loads[i];
if (new == 0) {
@branchHint(.likely);
return;
}
inst.new_rodata_loads[i] |= new;
inst.any_new_rodata_loads = true;
if (opt_const_vals_field) |const_vals_field| {
// This may have already been hit and this run is just being used for evaluating the
// input, in which case we do not want to readd the same value.
if (inst.any_fresh_rodata_loads) {
@branchHint(.unlikely);
if (new & ~inst.fresh_rodata_loads[i] == 0)
return;
}
@field(inst, const_vals_field).append(gpa, ptr.*) catch @panic("OOM");
}
}
export fn __sanitizer_cov_load1(ptr: *align(1) const u8) void {
genericLoad(u8, ptr, null);
}
export fn __sanitizer_cov_load2(ptr: *align(1) const u16) void {
genericLoad(u16, ptr, "const_vals2");
}
export fn __sanitizer_cov_load4(ptr: *align(1) const u32) void {
genericLoad(u32, ptr, "const_vals4");
}
export fn __sanitizer_cov_load8(ptr: *align(1) const u64) void {
genericLoad(u64, ptr, "const_vals8");
}
export fn __sanitizer_cov_load16(ptr: *align(1) const u128) void {
genericLoad(u128, ptr, "const_vals16");
}
export fn __sanitizer_cov_trace_cmp1(arg1: u8, arg2: u8) void {
_ = arg1;
_ = arg2;

View file

@ -1115,7 +1115,7 @@ pub const Object = struct {
.NoPrune = false,
// Workaround for https://github.com/llvm/llvm-project/pull/106464
.StackDepth = true,
.TraceLoads = options.fuzz,
.TraceLoads = false,
.TraceStores = false,
.CollectControlFlow = false,
},