windows: Make readLinkW APIs output WTF-16, reduce stack usage of callers

- Affects the following functions:
  + `std.fs.Dir.readLinkW`
  + `std.os.windows.ReadLink`
  + `std.os.windows.ntToWin32Namespace`
  + `std.posix.readlinkW`
  + `std.posix.readlinkatW`

Each of these functions (except `ntToWin32Namespace`) took WTF-16 as input and would output WTF-8, which makes optimal buffer re-use difficult at callsites and could force unnecessary WTF-16 <-> WTF-8 conversion during an intermediate step.

The functions have been updated to output WTF-16, and also allow for the path and the output to re-use the same buffer (i.e. in-place modification), which can reduce the stack usage at callsites. For example, all of `std.fs.Dir.readLink`/`readLinkZ`/`std.posix.readlink`/`readlinkZ`/`readlinkat`/`readlinkatZ` have had their stack usage reduced by one PathSpace struct (64 KiB) when targeting Windows.

The new `ntToWin32Namespace` takes an output buffer and returns a slice from that instead of returning a PathSpace, which is necessary to make the above possible.
This commit is contained in:
Ryan Liptak 2025-10-10 23:04:40 -07:00
parent 06a7597ea8
commit 6aa3570cb0
4 changed files with 119 additions and 65 deletions

View file

@ -1354,8 +1354,14 @@ pub fn readLink(self: Dir, sub_path: []const u8, buffer: []u8) ReadLinkError![]u
return self.readLinkWasi(sub_path, buffer);
}
if (native_os == .windows) {
const sub_path_w = try windows.sliceToPrefixedFileW(self.fd, sub_path);
return self.readLinkW(sub_path_w.span(), buffer);
var sub_path_w = try windows.sliceToPrefixedFileW(self.fd, sub_path);
const result_w = try self.readLinkW(sub_path_w.span(), &sub_path_w.data);
const len = std.unicode.calcWtf8Len(result_w);
if (len > buffer.len) return error.NameTooLong;
const end_index = std.unicode.wtf16LeToWtf8(buffer, result_w);
return buffer[0..end_index];
}
const sub_path_c = try posix.toPosixPath(sub_path);
return self.readLinkZ(&sub_path_c, buffer);
@ -1369,15 +1375,24 @@ pub fn readLinkWasi(self: Dir, sub_path: []const u8, buffer: []u8) ![]u8 {
/// Same as `readLink`, except the `sub_path_c` parameter is null-terminated.
pub fn readLinkZ(self: Dir, sub_path_c: [*:0]const u8, buffer: []u8) ![]u8 {
if (native_os == .windows) {
const sub_path_w = try windows.cStrToPrefixedFileW(self.fd, sub_path_c);
return self.readLinkW(sub_path_w.span(), buffer);
var sub_path_w = try windows.cStrToPrefixedFileW(self.fd, sub_path_c);
const result_w = try self.readLinkW(sub_path_w.span(), &sub_path_w.data);
const len = std.unicode.calcWtf8Len(result_w);
if (len > buffer.len) return error.NameTooLong;
const end_index = std.unicode.wtf16LeToWtf8(buffer, result_w);
return buffer[0..end_index];
}
return posix.readlinkatZ(self.fd, sub_path_c, buffer);
}
/// Windows-only. Same as `readLink` except the pathname parameter
/// is WTF16 LE encoded.
pub fn readLinkW(self: Dir, sub_path_w: []const u16, buffer: []u8) ![]u8 {
/// Windows-only. Same as `readLink` except the path parameter
/// is WTF-16 LE encoded, NT-prefixed.
///
/// `sub_path_w` will never be accessed after `buffer` has been written to, so it
/// is safe to reuse a single buffer for both.
pub fn readLinkW(self: Dir, sub_path_w: []const u16, buffer: []u16) ![]u16 {
return windows.ReadLink(self.fd, sub_path_w, buffer);
}

View file

@ -193,10 +193,16 @@ test "Dir.readLink" {
// test 1: symlink to a file
try setupSymlink(ctx.dir, file_target_path, "symlink1", .{});
try testReadLink(ctx.dir, canonical_file_target_path, "symlink1");
if (builtin.os.tag == .windows) {
try testReadLinkW(testing.allocator, ctx.dir, canonical_file_target_path, "symlink1");
}
// test 2: symlink to a directory (can be different on Windows)
try setupSymlink(ctx.dir, dir_target_path, "symlink2", .{ .is_directory = true });
try testReadLink(ctx.dir, canonical_dir_target_path, "symlink2");
if (builtin.os.tag == .windows) {
try testReadLinkW(testing.allocator, ctx.dir, canonical_dir_target_path, "symlink2");
}
// test 3: relative path symlink
const parent_file = ".." ++ fs.path.sep_str ++ "target.txt";
@ -205,6 +211,9 @@ test "Dir.readLink" {
defer subdir.close();
try setupSymlink(subdir, canonical_parent_file, "relative-link.txt", .{});
try testReadLink(subdir, canonical_parent_file, "relative-link.txt");
if (builtin.os.tag == .windows) {
try testReadLinkW(testing.allocator, subdir, canonical_parent_file, "relative-link.txt");
}
}
}.impl);
}
@ -215,6 +224,17 @@ fn testReadLink(dir: Dir, target_path: []const u8, symlink_path: []const u8) !vo
try testing.expectEqualStrings(target_path, actual);
}
fn testReadLinkW(allocator: mem.Allocator, dir: Dir, target_path: []const u8, symlink_path: []const u8) !void {
const target_path_w = try std.unicode.wtf8ToWtf16LeAlloc(allocator, target_path);
defer allocator.free(target_path_w);
// Calling the W functions directly requires the path to be NT-prefixed
const symlink_path_w = try std.os.windows.sliceToPrefixedFileW(dir.fd, symlink_path);
const wtf16_buffer = try allocator.alloc(u16, target_path_w.len);
defer allocator.free(wtf16_buffer);
const actual = try dir.readLinkW(symlink_path_w.span(), wtf16_buffer);
try testing.expectEqualSlices(u16, target_path_w, actual);
}
fn testReadLinkAbsolute(target_path: []const u8, symlink_path: []const u8) !void {
var buffer: [fs.max_path_bytes]u8 = undefined;
const given = try fs.readLinkAbsolute(symlink_path, buffer[0..]);

View file

@ -894,7 +894,9 @@ pub const ReadLinkError = error{
UnsupportedReparsePointType,
};
pub fn ReadLink(dir: ?HANDLE, sub_path_w: []const u16, out_buffer: []u8) ReadLinkError![]u8 {
/// `sub_path_w` will never be accessed after `out_buffer` has been written to, so it
/// is safe to reuse a single buffer for both.
pub fn ReadLink(dir: ?HANDLE, sub_path_w: []const u16, out_buffer: []u16) ReadLinkError![]u16 {
const result_handle = OpenFile(sub_path_w, .{
.access_mask = FILE_READ_ATTRIBUTES | SYNCHRONIZE,
.dir = dir,
@ -926,14 +928,14 @@ pub fn ReadLink(dir: ?HANDLE, sub_path_w: []const u16, out_buffer: []u8) ReadLin
const len = buf.SubstituteNameLength >> 1;
const path_buf = @as([*]const u16, &buf.PathBuffer);
const is_relative = buf.Flags & SYMLINK_FLAG_RELATIVE != 0;
return parseReadlinkPath(path_buf[offset..][0..len], is_relative, out_buffer);
return parseReadLinkPath(path_buf[offset..][0..len], is_relative, out_buffer);
},
IO_REPARSE_TAG_MOUNT_POINT => {
const buf: *const MOUNT_POINT_REPARSE_BUFFER = @ptrCast(@alignCast(&reparse_struct.DataBuffer[0]));
const offset = buf.SubstituteNameOffset >> 1;
const len = buf.SubstituteNameLength >> 1;
const path_buf = @as([*]const u16, &buf.PathBuffer);
return parseReadlinkPath(path_buf[offset..][0..len], false, out_buffer);
return parseReadLinkPath(path_buf[offset..][0..len], false, out_buffer);
},
else => {
return error.UnsupportedReparsePointType;
@ -941,19 +943,18 @@ pub fn ReadLink(dir: ?HANDLE, sub_path_w: []const u16, out_buffer: []u8) ReadLin
}
}
/// Asserts that there is enough space is `out_buffer`.
/// The result is encoded as [WTF-8](https://wtf-8.codeberg.page/).
fn parseReadlinkPath(path: []const u16, is_relative: bool, out_buffer: []u8) []u8 {
const win32_namespace_path = path: {
if (is_relative) break :path path;
const win32_path = ntToWin32Namespace(path) catch |err| switch (err) {
error.NameTooLong => unreachable,
error.NotNtPath => break :path path,
fn parseReadLinkPath(path: []const u16, is_relative: bool, out_buffer: []u16) error{NameTooLong}![]u16 {
path: {
if (is_relative) break :path;
return ntToWin32Namespace(path, out_buffer) catch |err| switch (err) {
error.NameTooLong => |e| return e,
error.NotNtPath => break :path,
};
break :path win32_path.span();
};
const out_len = std.unicode.wtf16LeToWtf8(out_buffer, win32_namespace_path);
return out_buffer[0..out_len];
}
if (out_buffer.len < path.len) return error.NameTooLong;
const dest = out_buffer[0..path.len];
@memcpy(dest, path);
return dest;
}
pub const DeleteFileError = error{
@ -2584,10 +2585,11 @@ test getUnprefixedPathType {
/// https://github.com/reactos/reactos/blob/master/modules/rostests/apitests/ntdll/RtlNtPathNameToDosPathName.c
///
/// `path` should be encoded as WTF-16LE.
pub fn ntToWin32Namespace(path: []const u16) !PathSpace {
///
/// Supports in-place modification (`path` and `out` may refer to the same slice).
pub fn ntToWin32Namespace(path: []const u16, out: []u16) error{ NameTooLong, NotNtPath }![]u16 {
if (path.len > PATH_MAX_WIDE) return error.NameTooLong;
var path_space: PathSpace = undefined;
const namespace_prefix = getNamespacePrefix(u16, path);
switch (namespace_prefix) {
.nt => {
@ -2595,23 +2597,19 @@ pub fn ntToWin32Namespace(path: []const u16) !PathSpace {
var after_prefix = path[4..]; // after the `\??\`
// The prefix \??\UNC\ means this is a UNC path, in which case the
// `\??\UNC\` should be replaced by `\\` (two backslashes)
// TODO: the "UNC" should technically be matched case-insensitively, but
// it's unlikely to matter since most/all paths passed into this
// function will have come from the OS meaning it should have
// the 'canonical' uppercase UNC.
const is_unc = after_prefix.len >= 4 and
std.mem.eql(u16, after_prefix[0..3], std.unicode.utf8ToUtf16LeStringLiteral("UNC")) and
eqlIgnoreCaseWTF16(after_prefix[0..3], std.unicode.utf8ToUtf16LeStringLiteral("UNC")) and
std.fs.path.PathType.windows.isSep(u16, std.mem.littleToNative(u16, after_prefix[3]));
const win32_len = path.len - @as(usize, if (is_unc) 6 else 4);
if (out.len < win32_len) return error.NameTooLong;
if (is_unc) {
path_space.data[0] = comptime std.mem.nativeToLittle(u16, '\\');
out[0] = comptime std.mem.nativeToLittle(u16, '\\');
dest_index += 1;
// We want to include the last `\` of `\??\UNC\`
after_prefix = path[7..];
}
@memcpy(path_space.data[dest_index..][0..after_prefix.len], after_prefix);
path_space.len = dest_index + after_prefix.len;
path_space.data[path_space.len] = 0;
return path_space;
@memmove(out[dest_index..][0..after_prefix.len], after_prefix);
return out[0..win32_len];
},
else => return error.NotNtPath,
}
@ -2620,25 +2618,14 @@ pub fn ntToWin32Namespace(path: []const u16) !PathSpace {
test ntToWin32Namespace {
const L = std.unicode.utf8ToUtf16LeStringLiteral;
try testNtToWin32Namespace(L("UNC"), L("\\??\\UNC"));
try testNtToWin32Namespace(L("\\\\"), L("\\??\\UNC\\"));
try testNtToWin32Namespace(L("\\\\path1"), L("\\??\\UNC\\path1"));
try testNtToWin32Namespace(L("\\\\path1\\path2"), L("\\??\\UNC\\path1\\path2"));
var mutable_unc_path_buf = L("\\??\\UNC\\path1\\path2").*;
try std.testing.expectEqualSlices(u16, L("\\\\path1\\path2"), try ntToWin32Namespace(&mutable_unc_path_buf, &mutable_unc_path_buf));
try testNtToWin32Namespace(L(""), L("\\??\\"));
try testNtToWin32Namespace(L("C:"), L("\\??\\C:"));
try testNtToWin32Namespace(L("C:\\"), L("\\??\\C:\\"));
try testNtToWin32Namespace(L("C:\\test"), L("\\??\\C:\\test"));
try testNtToWin32Namespace(L("C:\\test\\"), L("\\??\\C:\\test\\"));
var mutable_path_buf = L("\\??\\C:\\test\\").*;
try std.testing.expectEqualSlices(u16, L("C:\\test\\"), try ntToWin32Namespace(&mutable_path_buf, &mutable_path_buf));
try std.testing.expectError(error.NotNtPath, ntToWin32Namespace(L("foo")));
try std.testing.expectError(error.NotNtPath, ntToWin32Namespace(L("C:\\test")));
try std.testing.expectError(error.NotNtPath, ntToWin32Namespace(L("\\\\.\\test")));
}
fn testNtToWin32Namespace(expected: []const u16, path: []const u16) !void {
const converted = try ntToWin32Namespace(path);
try std.testing.expectEqualSlices(u16, expected, converted.span());
var too_small_buf: [6]u16 = undefined;
try std.testing.expectError(error.NameTooLong, ntToWin32Namespace(L("\\??\\C:\\test"), &too_small_buf));
}
fn getFullPathNameW(path: [*:0]const u16, out: []u16) !usize {

View file

@ -3021,26 +3021,42 @@ pub fn readlink(file_path: []const u8, out_buffer: []u8) ReadLinkError![]u8 {
if (native_os == .wasi and !builtin.link_libc) {
return readlinkat(AT.FDCWD, file_path, out_buffer);
} else if (native_os == .windows) {
const file_path_w = try windows.sliceToPrefixedFileW(null, file_path);
return readlinkW(file_path_w.span(), out_buffer);
var file_path_w = try windows.sliceToPrefixedFileW(null, file_path);
const result_w = try readlinkW(file_path_w.span(), &file_path_w.data);
const len = std.unicode.calcWtf8Len(result_w);
if (len > out_buffer.len) return error.NameTooLong;
const end_index = std.unicode.wtf16LeToWtf8(out_buffer, result_w);
return out_buffer[0..end_index];
} else {
const file_path_c = try toPosixPath(file_path);
return readlinkZ(&file_path_c, out_buffer);
}
}
/// Windows-only. Same as `readlink` except `file_path` is WTF16 LE encoded.
/// The result is encoded as [WTF-8](https://wtf-8.codeberg.page/).
/// Windows-only. Same as `readlink` except `file_path` is WTF-16 LE encoded, NT-prefixed.
/// The result is encoded as WTF-16 LE.
///
/// `file_path` will never be accessed after `out_buffer` has been written to, so it
/// is safe to reuse a single buffer for both.
///
/// See also `readlinkZ`.
pub fn readlinkW(file_path: []const u16, out_buffer: []u8) ReadLinkError![]u8 {
pub fn readlinkW(file_path: []const u16, out_buffer: []u16) ReadLinkError![]u16 {
return windows.ReadLink(fs.cwd().fd, file_path, out_buffer);
}
/// Same as `readlink` except `file_path` is null-terminated.
pub fn readlinkZ(file_path: [*:0]const u8, out_buffer: []u8) ReadLinkError![]u8 {
if (native_os == .windows) {
const file_path_w = try windows.cStrToPrefixedFileW(null, file_path);
return readlinkW(file_path_w.span(), out_buffer);
var file_path_w = try windows.cStrToPrefixedFileW(null, file_path);
const result_w = try readlinkW(file_path_w.span(), &file_path_w.data);
const len = std.unicode.calcWtf8Len(result_w);
if (len > out_buffer.len) return error.NameTooLong;
const end_index = std.unicode.wtf16LeToWtf8(out_buffer, result_w);
return out_buffer[0..end_index];
} else if (native_os == .wasi and !builtin.link_libc) {
return readlink(mem.sliceTo(file_path, 0), out_buffer);
}
@ -3075,8 +3091,14 @@ pub fn readlinkat(dirfd: fd_t, file_path: []const u8, out_buffer: []u8) ReadLink
return readlinkatWasi(dirfd, file_path, out_buffer);
}
if (native_os == .windows) {
const file_path_w = try windows.sliceToPrefixedFileW(dirfd, file_path);
return readlinkatW(dirfd, file_path_w.span(), out_buffer);
var file_path_w = try windows.sliceToPrefixedFileW(dirfd, file_path);
const result_w = try readlinkatW(dirfd, file_path_w.span(), &file_path_w.data);
const len = std.unicode.calcWtf8Len(result_w);
if (len > out_buffer.len) return error.NameTooLong;
const end_index = std.unicode.wtf16LeToWtf8(out_buffer, result_w);
return out_buffer[0..end_index];
}
const file_path_c = try toPosixPath(file_path);
return readlinkatZ(dirfd, &file_path_c, out_buffer);
@ -3103,10 +3125,14 @@ pub fn readlinkatWasi(dirfd: fd_t, file_path: []const u8, out_buffer: []u8) Read
}
}
/// Windows-only. Same as `readlinkat` except `file_path` is null-terminated, WTF16 LE encoded.
/// The result is encoded as [WTF-8](https://wtf-8.codeberg.page/).
/// Windows-only. Same as `readlinkat` except `file_path` WTF16 LE encoded, NT-prefixed.
/// The result is encoded as WTF-16 LE.
///
/// `file_path` will never be accessed after `out_buffer` has been written to, so it
/// is safe to reuse a single buffer for both.
///
/// See also `readlinkat`.
pub fn readlinkatW(dirfd: fd_t, file_path: []const u16, out_buffer: []u8) ReadLinkError![]u8 {
pub fn readlinkatW(dirfd: fd_t, file_path: []const u16, out_buffer: []u16) ReadLinkError![]u16 {
return windows.ReadLink(dirfd, file_path, out_buffer);
}
@ -3114,8 +3140,14 @@ pub fn readlinkatW(dirfd: fd_t, file_path: []const u16, out_buffer: []u8) ReadLi
/// See also `readlinkat`.
pub fn readlinkatZ(dirfd: fd_t, file_path: [*:0]const u8, out_buffer: []u8) ReadLinkError![]u8 {
if (native_os == .windows) {
const file_path_w = try windows.cStrToPrefixedFileW(dirfd, file_path);
return readlinkatW(dirfd, file_path_w.span(), out_buffer);
var file_path_w = try windows.cStrToPrefixedFileW(dirfd, file_path);
const result_w = try readlinkatW(dirfd, file_path_w.span(), &file_path_w.data);
const len = std.unicode.calcWtf8Len(result_w);
if (len > out_buffer.len) return error.NameTooLong;
const end_index = std.unicode.wtf16LeToWtf8(out_buffer, result_w);
return out_buffer[0..end_index];
} else if (native_os == .wasi and !builtin.link_libc) {
return readlinkat(dirfd, mem.sliceTo(file_path, 0), out_buffer);
}