Add SelectiveWalker/walkSelectively and implement Walker in terms of it

This is a breaking change, since the fields of Walker have changed. The function APIs are unchanged, though.
This commit is contained in:
Ryan Liptak 2025-09-24 16:17:51 -07:00
parent 07c3f9ef8e
commit 760127a760
2 changed files with 187 additions and 74 deletions

View file

@ -663,11 +663,120 @@ fn iterateImpl(self: Dir, first_iter_start_value: bool) Iterator {
}
}
pub const Walker = struct {
stack: std.ArrayListUnmanaged(StackItem),
pub const SelectiveWalker = struct {
stack: std.ArrayListUnmanaged(Walker.StackItem),
name_buffer: std.ArrayListUnmanaged(u8),
allocator: Allocator,
/// After each call to this function, and on deinit(), the memory returned
/// from this function becomes invalid. A copy must be made in order to keep
/// a reference to the path.
pub fn next(self: *SelectiveWalker) !?Walker.Entry {
while (self.stack.items.len > 0) {
const top = &self.stack.items[self.stack.items.len - 1];
var dirname_len = top.dirname_len;
if (top.iter.next() catch |err| {
// If we get an error, then we want the user to be able to continue
// walking if they want, which means that we need to pop the directory
// that errored from the stack. Otherwise, all future `next` calls would
// likely just fail with the same error.
var item = self.stack.pop().?;
if (self.stack.items.len != 0) {
item.iter.dir.close();
}
return err;
}) |entry| {
self.name_buffer.shrinkRetainingCapacity(dirname_len);
if (self.name_buffer.items.len != 0) {
try self.name_buffer.append(self.allocator, fs.path.sep);
dirname_len += 1;
}
try self.name_buffer.ensureUnusedCapacity(self.allocator, entry.name.len + 1);
self.name_buffer.appendSliceAssumeCapacity(entry.name);
self.name_buffer.appendAssumeCapacity(0);
const walker_entry: Walker.Entry = .{
.dir = top.iter.dir,
.basename = self.name_buffer.items[dirname_len .. self.name_buffer.items.len - 1 :0],
.path = self.name_buffer.items[0 .. self.name_buffer.items.len - 1 :0],
.kind = entry.kind,
};
return walker_entry;
} else {
var item = self.stack.pop().?;
if (self.stack.items.len != 0) {
item.iter.dir.close();
}
}
}
return null;
}
/// Traverses into the directory, continuing walking one level down.
pub fn enter(self: *SelectiveWalker, entry: Walker.Entry) !void {
if (entry.kind != .directory) {
@branchHint(.cold);
return;
}
var new_dir = entry.dir.openDir(entry.basename, .{ .iterate = true }) catch |err| {
switch (err) {
error.NameTooLong => unreachable,
else => |e| return e,
}
};
errdefer new_dir.close();
try self.stack.append(self.allocator, .{
.iter = new_dir.iterateAssumeFirstIteration(),
.dirname_len = self.name_buffer.items.len - 1,
});
}
pub fn deinit(self: *SelectiveWalker) void {
self.name_buffer.deinit(self.allocator);
self.stack.deinit(self.allocator);
}
/// Leaves the current directory, continuing walking one level up.
pub fn leave(self: *SelectiveWalker) void {
var item = self.stack.pop().?;
if (self.stack.items.len != 0) {
@branchHint(.likely);
item.iter.dir.close();
}
}
};
/// Recursively iterates over a directory, but requires the user to
/// opt-in to recursing into each directory entry.
///
/// `self` must have been opened with `OpenOptions{.iterate = true}`.
///
/// `Walker.deinit` releases allocated memory and directory handles.
///
/// The order of returned file system entries is undefined.
///
/// `self` will not be closed after walking it.
///
/// See also `walk`.
pub fn walkSelectively(self: Dir, allocator: Allocator) !SelectiveWalker {
var stack: std.ArrayListUnmanaged(Walker.StackItem) = .empty;
try stack.append(allocator, .{
.iter = self.iterate(),
.dirname_len = 0,
});
return .{
.stack = stack,
.name_buffer = .{},
.allocator = allocator,
};
}
pub const Walker = struct {
inner: SelectiveWalker,
pub const Entry = struct {
/// The containing directory. This can be used to operate directly on `basename`
/// rather than `path`, avoiding `error.NameTooLong` for deeply nested paths.
@ -687,72 +796,22 @@ pub const Walker = struct {
/// from this function becomes invalid. A copy must be made in order to keep
/// a reference to the path.
pub fn next(self: *Walker) !?Walker.Entry {
const gpa = self.allocator;
while (self.stack.items.len != 0) {
// `top` and `containing` become invalid after appending to `self.stack`
var top = &self.stack.items[self.stack.items.len - 1];
var containing = top;
var dirname_len = top.dirname_len;
if (top.iter.next() catch |err| {
// If we get an error, then we want the user to be able to continue
// walking if they want, which means that we need to pop the directory
// that errored from the stack. Otherwise, all future `next` calls would
// likely just fail with the same error.
var item = self.stack.pop().?;
if (self.stack.items.len != 0) {
item.iter.dir.close();
}
return err;
}) |base| {
self.name_buffer.shrinkRetainingCapacity(dirname_len);
if (self.name_buffer.items.len != 0) {
try self.name_buffer.append(gpa, fs.path.sep);
dirname_len += 1;
}
try self.name_buffer.ensureUnusedCapacity(gpa, base.name.len + 1);
self.name_buffer.appendSliceAssumeCapacity(base.name);
self.name_buffer.appendAssumeCapacity(0);
if (base.kind == .directory) {
var new_dir = top.iter.dir.openDir(base.name, .{ .iterate = true }) catch |err| switch (err) {
error.NameTooLong => unreachable, // no path sep in base.name
else => |e| return e,
};
{
errdefer new_dir.close();
try self.stack.append(gpa, .{
.iter = new_dir.iterateAssumeFirstIteration(),
.dirname_len = self.name_buffer.items.len - 1,
});
top = &self.stack.items[self.stack.items.len - 1];
containing = &self.stack.items[self.stack.items.len - 2];
}
}
return .{
.dir = containing.iter.dir,
.basename = self.name_buffer.items[dirname_len .. self.name_buffer.items.len - 1 :0],
.path = self.name_buffer.items[0 .. self.name_buffer.items.len - 1 :0],
.kind = base.kind,
};
} else {
var item = self.stack.pop().?;
if (self.stack.items.len != 0) {
item.iter.dir.close();
}
}
const entry = try self.inner.next();
if (entry != null and entry.?.kind == .directory) {
try self.inner.enter(entry.?);
}
return null;
return entry;
}
pub fn deinit(self: *Walker) void {
const gpa = self.allocator;
// Close any remaining directories except the initial one (which is always at index 0)
if (self.stack.items.len > 1) {
for (self.stack.items[1..]) |*item| {
item.iter.dir.close();
}
}
self.stack.deinit(gpa);
self.name_buffer.deinit(gpa);
self.inner.deinit();
}
/// Leaves the current directory, continuing walking one level up.
/// If the current entry is a directory entry, then the "current directory"
/// is the directory pertaining to the current entry.
pub fn leave(self: *Walker) void {
self.inner.leave();
}
};
@ -765,18 +824,11 @@ pub const Walker = struct {
/// The order of returned file system entries is undefined.
///
/// `self` will not be closed after walking it.
///
/// See also `walkSelectively`.
pub fn walk(self: Dir, allocator: Allocator) Allocator.Error!Walker {
var stack: std.ArrayListUnmanaged(Walker.StackItem) = .empty;
try stack.append(allocator, .{
.iter = self.iterate(),
.dirname_len = 0,
});
return .{
.stack = stack,
.name_buffer = .{},
.allocator = allocator,
.inner = try walkSelectively(self, allocator),
};
}

View file

@ -1810,6 +1810,67 @@ test "walker" {
try testing.expectEqual(expected_paths.kvs.len, num_walked);
}
test "selective walker, skip entries that start with ." {
var tmp = tmpDir(.{ .iterate = true });
defer tmp.cleanup();
const paths_to_create: []const []const u8 = &.{
"dir1/foo/.git/ignored",
".hidden/bar",
"a/b/c",
"a/baz",
};
// iteration order of walker is undefined, so need lookup maps to check against
const expected_paths = std.StaticStringMap(void).initComptime(.{
.{"dir1"},
.{"dir1" ++ fs.path.sep_str ++ "foo"},
.{"a"},
.{"a" ++ fs.path.sep_str ++ "b"},
.{"a" ++ fs.path.sep_str ++ "b" ++ fs.path.sep_str ++ "c"},
.{"a" ++ fs.path.sep_str ++ "baz"},
});
const expected_basenames = std.StaticStringMap(void).initComptime(.{
.{"dir1"},
.{"foo"},
.{"a"},
.{"b"},
.{"c"},
.{"baz"},
});
for (paths_to_create) |path| {
try tmp.dir.makePath(path);
}
var walker = try tmp.dir.walkSelectively(testing.allocator);
defer walker.deinit();
var num_walked: usize = 0;
while (try walker.next()) |entry| {
if (entry.basename[0] == '.') continue;
if (entry.kind == .directory) {
try walker.enter(entry);
}
testing.expect(expected_basenames.has(entry.basename)) catch |err| {
std.debug.print("found unexpected basename: {f}\n", .{std.ascii.hexEscape(entry.basename, .lower)});
return err;
};
testing.expect(expected_paths.has(entry.path)) catch |err| {
std.debug.print("found unexpected path: {f}\n", .{std.ascii.hexEscape(entry.path, .lower)});
return err;
};
// make sure that the entry.dir is the containing dir
var entry_dir = try entry.dir.openDir(entry.basename, .{});
defer entry_dir.close();
num_walked += 1;
}
try testing.expectEqual(expected_paths.kvs.len, num_walked);
}
test "walker without fully iterating" {
var tmp = tmpDir(.{ .iterate = true });
defer tmp.cleanup();