From 6aa3570cb0e599e89748073612e8d0317100c807 Mon Sep 17 00:00:00 2001 From: Ryan Liptak Date: Fri, 10 Oct 2025 23:04:40 -0700 Subject: [PATCH] windows: Make readLinkW APIs output WTF-16, reduce stack usage of callers - Affects the following functions: + `std.fs.Dir.readLinkW` + `std.os.windows.ReadLink` + `std.os.windows.ntToWin32Namespace` + `std.posix.readlinkW` + `std.posix.readlinkatW` Each of these functions (except `ntToWin32Namespace`) took WTF-16 as input and would output WTF-8, which makes optimal buffer re-use difficult at callsites and could force unnecessary WTF-16 <-> WTF-8 conversion during an intermediate step. The functions have been updated to output WTF-16, and also allow for the path and the output to re-use the same buffer (i.e. in-place modification), which can reduce the stack usage at callsites. For example, all of `std.fs.Dir.readLink`/`readLinkZ`/`std.posix.readlink`/`readlinkZ`/`readlinkat`/`readlinkatZ` have had their stack usage reduced by one PathSpace struct (64 KiB) when targeting Windows. The new `ntToWin32Namespace` takes an output buffer and returns a slice from that instead of returning a PathSpace, which is necessary to make the above possible. --- lib/std/fs/Dir.zig | 29 ++++++++++++---- lib/std/fs/test.zig | 20 +++++++++++ lib/std/os/windows.zig | 75 +++++++++++++++++------------------------- lib/std/posix.zig | 60 +++++++++++++++++++++++++-------- 4 files changed, 119 insertions(+), 65 deletions(-) diff --git a/lib/std/fs/Dir.zig b/lib/std/fs/Dir.zig index c90eeef508..fe4b1416ad 100644 --- a/lib/std/fs/Dir.zig +++ b/lib/std/fs/Dir.zig @@ -1354,8 +1354,14 @@ pub fn readLink(self: Dir, sub_path: []const u8, buffer: []u8) ReadLinkError![]u return self.readLinkWasi(sub_path, buffer); } if (native_os == .windows) { - const sub_path_w = try windows.sliceToPrefixedFileW(self.fd, sub_path); - return self.readLinkW(sub_path_w.span(), buffer); + var sub_path_w = try windows.sliceToPrefixedFileW(self.fd, sub_path); + const result_w = try self.readLinkW(sub_path_w.span(), &sub_path_w.data); + + const len = std.unicode.calcWtf8Len(result_w); + if (len > buffer.len) return error.NameTooLong; + + const end_index = std.unicode.wtf16LeToWtf8(buffer, result_w); + return buffer[0..end_index]; } const sub_path_c = try posix.toPosixPath(sub_path); return self.readLinkZ(&sub_path_c, buffer); @@ -1369,15 +1375,24 @@ pub fn readLinkWasi(self: Dir, sub_path: []const u8, buffer: []u8) ![]u8 { /// Same as `readLink`, except the `sub_path_c` parameter is null-terminated. pub fn readLinkZ(self: Dir, sub_path_c: [*:0]const u8, buffer: []u8) ![]u8 { if (native_os == .windows) { - const sub_path_w = try windows.cStrToPrefixedFileW(self.fd, sub_path_c); - return self.readLinkW(sub_path_w.span(), buffer); + var sub_path_w = try windows.cStrToPrefixedFileW(self.fd, sub_path_c); + const result_w = try self.readLinkW(sub_path_w.span(), &sub_path_w.data); + + const len = std.unicode.calcWtf8Len(result_w); + if (len > buffer.len) return error.NameTooLong; + + const end_index = std.unicode.wtf16LeToWtf8(buffer, result_w); + return buffer[0..end_index]; } return posix.readlinkatZ(self.fd, sub_path_c, buffer); } -/// Windows-only. Same as `readLink` except the pathname parameter -/// is WTF16 LE encoded. -pub fn readLinkW(self: Dir, sub_path_w: []const u16, buffer: []u8) ![]u8 { +/// Windows-only. Same as `readLink` except the path parameter +/// is WTF-16 LE encoded, NT-prefixed. +/// +/// `sub_path_w` will never be accessed after `buffer` has been written to, so it +/// is safe to reuse a single buffer for both. +pub fn readLinkW(self: Dir, sub_path_w: []const u16, buffer: []u16) ![]u16 { return windows.ReadLink(self.fd, sub_path_w, buffer); } diff --git a/lib/std/fs/test.zig b/lib/std/fs/test.zig index 75f35a46da..497079adce 100644 --- a/lib/std/fs/test.zig +++ b/lib/std/fs/test.zig @@ -193,10 +193,16 @@ test "Dir.readLink" { // test 1: symlink to a file try setupSymlink(ctx.dir, file_target_path, "symlink1", .{}); try testReadLink(ctx.dir, canonical_file_target_path, "symlink1"); + if (builtin.os.tag == .windows) { + try testReadLinkW(testing.allocator, ctx.dir, canonical_file_target_path, "symlink1"); + } // test 2: symlink to a directory (can be different on Windows) try setupSymlink(ctx.dir, dir_target_path, "symlink2", .{ .is_directory = true }); try testReadLink(ctx.dir, canonical_dir_target_path, "symlink2"); + if (builtin.os.tag == .windows) { + try testReadLinkW(testing.allocator, ctx.dir, canonical_dir_target_path, "symlink2"); + } // test 3: relative path symlink const parent_file = ".." ++ fs.path.sep_str ++ "target.txt"; @@ -205,6 +211,9 @@ test "Dir.readLink" { defer subdir.close(); try setupSymlink(subdir, canonical_parent_file, "relative-link.txt", .{}); try testReadLink(subdir, canonical_parent_file, "relative-link.txt"); + if (builtin.os.tag == .windows) { + try testReadLinkW(testing.allocator, subdir, canonical_parent_file, "relative-link.txt"); + } } }.impl); } @@ -215,6 +224,17 @@ fn testReadLink(dir: Dir, target_path: []const u8, symlink_path: []const u8) !vo try testing.expectEqualStrings(target_path, actual); } +fn testReadLinkW(allocator: mem.Allocator, dir: Dir, target_path: []const u8, symlink_path: []const u8) !void { + const target_path_w = try std.unicode.wtf8ToWtf16LeAlloc(allocator, target_path); + defer allocator.free(target_path_w); + // Calling the W functions directly requires the path to be NT-prefixed + const symlink_path_w = try std.os.windows.sliceToPrefixedFileW(dir.fd, symlink_path); + const wtf16_buffer = try allocator.alloc(u16, target_path_w.len); + defer allocator.free(wtf16_buffer); + const actual = try dir.readLinkW(symlink_path_w.span(), wtf16_buffer); + try testing.expectEqualSlices(u16, target_path_w, actual); +} + fn testReadLinkAbsolute(target_path: []const u8, symlink_path: []const u8) !void { var buffer: [fs.max_path_bytes]u8 = undefined; const given = try fs.readLinkAbsolute(symlink_path, buffer[0..]); diff --git a/lib/std/os/windows.zig b/lib/std/os/windows.zig index 46b3340a23..a855a58d4f 100644 --- a/lib/std/os/windows.zig +++ b/lib/std/os/windows.zig @@ -894,7 +894,9 @@ pub const ReadLinkError = error{ UnsupportedReparsePointType, }; -pub fn ReadLink(dir: ?HANDLE, sub_path_w: []const u16, out_buffer: []u8) ReadLinkError![]u8 { +/// `sub_path_w` will never be accessed after `out_buffer` has been written to, so it +/// is safe to reuse a single buffer for both. +pub fn ReadLink(dir: ?HANDLE, sub_path_w: []const u16, out_buffer: []u16) ReadLinkError![]u16 { const result_handle = OpenFile(sub_path_w, .{ .access_mask = FILE_READ_ATTRIBUTES | SYNCHRONIZE, .dir = dir, @@ -926,14 +928,14 @@ pub fn ReadLink(dir: ?HANDLE, sub_path_w: []const u16, out_buffer: []u8) ReadLin const len = buf.SubstituteNameLength >> 1; const path_buf = @as([*]const u16, &buf.PathBuffer); const is_relative = buf.Flags & SYMLINK_FLAG_RELATIVE != 0; - return parseReadlinkPath(path_buf[offset..][0..len], is_relative, out_buffer); + return parseReadLinkPath(path_buf[offset..][0..len], is_relative, out_buffer); }, IO_REPARSE_TAG_MOUNT_POINT => { const buf: *const MOUNT_POINT_REPARSE_BUFFER = @ptrCast(@alignCast(&reparse_struct.DataBuffer[0])); const offset = buf.SubstituteNameOffset >> 1; const len = buf.SubstituteNameLength >> 1; const path_buf = @as([*]const u16, &buf.PathBuffer); - return parseReadlinkPath(path_buf[offset..][0..len], false, out_buffer); + return parseReadLinkPath(path_buf[offset..][0..len], false, out_buffer); }, else => { return error.UnsupportedReparsePointType; @@ -941,19 +943,18 @@ pub fn ReadLink(dir: ?HANDLE, sub_path_w: []const u16, out_buffer: []u8) ReadLin } } -/// Asserts that there is enough space is `out_buffer`. -/// The result is encoded as [WTF-8](https://wtf-8.codeberg.page/). -fn parseReadlinkPath(path: []const u16, is_relative: bool, out_buffer: []u8) []u8 { - const win32_namespace_path = path: { - if (is_relative) break :path path; - const win32_path = ntToWin32Namespace(path) catch |err| switch (err) { - error.NameTooLong => unreachable, - error.NotNtPath => break :path path, +fn parseReadLinkPath(path: []const u16, is_relative: bool, out_buffer: []u16) error{NameTooLong}![]u16 { + path: { + if (is_relative) break :path; + return ntToWin32Namespace(path, out_buffer) catch |err| switch (err) { + error.NameTooLong => |e| return e, + error.NotNtPath => break :path, }; - break :path win32_path.span(); - }; - const out_len = std.unicode.wtf16LeToWtf8(out_buffer, win32_namespace_path); - return out_buffer[0..out_len]; + } + if (out_buffer.len < path.len) return error.NameTooLong; + const dest = out_buffer[0..path.len]; + @memcpy(dest, path); + return dest; } pub const DeleteFileError = error{ @@ -2584,10 +2585,11 @@ test getUnprefixedPathType { /// https://github.com/reactos/reactos/blob/master/modules/rostests/apitests/ntdll/RtlNtPathNameToDosPathName.c /// /// `path` should be encoded as WTF-16LE. -pub fn ntToWin32Namespace(path: []const u16) !PathSpace { +/// +/// Supports in-place modification (`path` and `out` may refer to the same slice). +pub fn ntToWin32Namespace(path: []const u16, out: []u16) error{ NameTooLong, NotNtPath }![]u16 { if (path.len > PATH_MAX_WIDE) return error.NameTooLong; - var path_space: PathSpace = undefined; const namespace_prefix = getNamespacePrefix(u16, path); switch (namespace_prefix) { .nt => { @@ -2595,23 +2597,19 @@ pub fn ntToWin32Namespace(path: []const u16) !PathSpace { var after_prefix = path[4..]; // after the `\??\` // The prefix \??\UNC\ means this is a UNC path, in which case the // `\??\UNC\` should be replaced by `\\` (two backslashes) - // TODO: the "UNC" should technically be matched case-insensitively, but - // it's unlikely to matter since most/all paths passed into this - // function will have come from the OS meaning it should have - // the 'canonical' uppercase UNC. const is_unc = after_prefix.len >= 4 and - std.mem.eql(u16, after_prefix[0..3], std.unicode.utf8ToUtf16LeStringLiteral("UNC")) and + eqlIgnoreCaseWTF16(after_prefix[0..3], std.unicode.utf8ToUtf16LeStringLiteral("UNC")) and std.fs.path.PathType.windows.isSep(u16, std.mem.littleToNative(u16, after_prefix[3])); + const win32_len = path.len - @as(usize, if (is_unc) 6 else 4); + if (out.len < win32_len) return error.NameTooLong; if (is_unc) { - path_space.data[0] = comptime std.mem.nativeToLittle(u16, '\\'); + out[0] = comptime std.mem.nativeToLittle(u16, '\\'); dest_index += 1; // We want to include the last `\` of `\??\UNC\` after_prefix = path[7..]; } - @memcpy(path_space.data[dest_index..][0..after_prefix.len], after_prefix); - path_space.len = dest_index + after_prefix.len; - path_space.data[path_space.len] = 0; - return path_space; + @memmove(out[dest_index..][0..after_prefix.len], after_prefix); + return out[0..win32_len]; }, else => return error.NotNtPath, } @@ -2620,25 +2618,14 @@ pub fn ntToWin32Namespace(path: []const u16) !PathSpace { test ntToWin32Namespace { const L = std.unicode.utf8ToUtf16LeStringLiteral; - try testNtToWin32Namespace(L("UNC"), L("\\??\\UNC")); - try testNtToWin32Namespace(L("\\\\"), L("\\??\\UNC\\")); - try testNtToWin32Namespace(L("\\\\path1"), L("\\??\\UNC\\path1")); - try testNtToWin32Namespace(L("\\\\path1\\path2"), L("\\??\\UNC\\path1\\path2")); + var mutable_unc_path_buf = L("\\??\\UNC\\path1\\path2").*; + try std.testing.expectEqualSlices(u16, L("\\\\path1\\path2"), try ntToWin32Namespace(&mutable_unc_path_buf, &mutable_unc_path_buf)); - try testNtToWin32Namespace(L(""), L("\\??\\")); - try testNtToWin32Namespace(L("C:"), L("\\??\\C:")); - try testNtToWin32Namespace(L("C:\\"), L("\\??\\C:\\")); - try testNtToWin32Namespace(L("C:\\test"), L("\\??\\C:\\test")); - try testNtToWin32Namespace(L("C:\\test\\"), L("\\??\\C:\\test\\")); + var mutable_path_buf = L("\\??\\C:\\test\\").*; + try std.testing.expectEqualSlices(u16, L("C:\\test\\"), try ntToWin32Namespace(&mutable_path_buf, &mutable_path_buf)); - try std.testing.expectError(error.NotNtPath, ntToWin32Namespace(L("foo"))); - try std.testing.expectError(error.NotNtPath, ntToWin32Namespace(L("C:\\test"))); - try std.testing.expectError(error.NotNtPath, ntToWin32Namespace(L("\\\\.\\test"))); -} - -fn testNtToWin32Namespace(expected: []const u16, path: []const u16) !void { - const converted = try ntToWin32Namespace(path); - try std.testing.expectEqualSlices(u16, expected, converted.span()); + var too_small_buf: [6]u16 = undefined; + try std.testing.expectError(error.NameTooLong, ntToWin32Namespace(L("\\??\\C:\\test"), &too_small_buf)); } fn getFullPathNameW(path: [*:0]const u16, out: []u16) !usize { diff --git a/lib/std/posix.zig b/lib/std/posix.zig index 75cdabaf8b..72c7cfa44a 100644 --- a/lib/std/posix.zig +++ b/lib/std/posix.zig @@ -3021,26 +3021,42 @@ pub fn readlink(file_path: []const u8, out_buffer: []u8) ReadLinkError![]u8 { if (native_os == .wasi and !builtin.link_libc) { return readlinkat(AT.FDCWD, file_path, out_buffer); } else if (native_os == .windows) { - const file_path_w = try windows.sliceToPrefixedFileW(null, file_path); - return readlinkW(file_path_w.span(), out_buffer); + var file_path_w = try windows.sliceToPrefixedFileW(null, file_path); + const result_w = try readlinkW(file_path_w.span(), &file_path_w.data); + + const len = std.unicode.calcWtf8Len(result_w); + if (len > out_buffer.len) return error.NameTooLong; + + const end_index = std.unicode.wtf16LeToWtf8(out_buffer, result_w); + return out_buffer[0..end_index]; } else { const file_path_c = try toPosixPath(file_path); return readlinkZ(&file_path_c, out_buffer); } } -/// Windows-only. Same as `readlink` except `file_path` is WTF16 LE encoded. -/// The result is encoded as [WTF-8](https://wtf-8.codeberg.page/). +/// Windows-only. Same as `readlink` except `file_path` is WTF-16 LE encoded, NT-prefixed. +/// The result is encoded as WTF-16 LE. +/// +/// `file_path` will never be accessed after `out_buffer` has been written to, so it +/// is safe to reuse a single buffer for both. +/// /// See also `readlinkZ`. -pub fn readlinkW(file_path: []const u16, out_buffer: []u8) ReadLinkError![]u8 { +pub fn readlinkW(file_path: []const u16, out_buffer: []u16) ReadLinkError![]u16 { return windows.ReadLink(fs.cwd().fd, file_path, out_buffer); } /// Same as `readlink` except `file_path` is null-terminated. pub fn readlinkZ(file_path: [*:0]const u8, out_buffer: []u8) ReadLinkError![]u8 { if (native_os == .windows) { - const file_path_w = try windows.cStrToPrefixedFileW(null, file_path); - return readlinkW(file_path_w.span(), out_buffer); + var file_path_w = try windows.cStrToPrefixedFileW(null, file_path); + const result_w = try readlinkW(file_path_w.span(), &file_path_w.data); + + const len = std.unicode.calcWtf8Len(result_w); + if (len > out_buffer.len) return error.NameTooLong; + + const end_index = std.unicode.wtf16LeToWtf8(out_buffer, result_w); + return out_buffer[0..end_index]; } else if (native_os == .wasi and !builtin.link_libc) { return readlink(mem.sliceTo(file_path, 0), out_buffer); } @@ -3075,8 +3091,14 @@ pub fn readlinkat(dirfd: fd_t, file_path: []const u8, out_buffer: []u8) ReadLink return readlinkatWasi(dirfd, file_path, out_buffer); } if (native_os == .windows) { - const file_path_w = try windows.sliceToPrefixedFileW(dirfd, file_path); - return readlinkatW(dirfd, file_path_w.span(), out_buffer); + var file_path_w = try windows.sliceToPrefixedFileW(dirfd, file_path); + const result_w = try readlinkatW(dirfd, file_path_w.span(), &file_path_w.data); + + const len = std.unicode.calcWtf8Len(result_w); + if (len > out_buffer.len) return error.NameTooLong; + + const end_index = std.unicode.wtf16LeToWtf8(out_buffer, result_w); + return out_buffer[0..end_index]; } const file_path_c = try toPosixPath(file_path); return readlinkatZ(dirfd, &file_path_c, out_buffer); @@ -3103,10 +3125,14 @@ pub fn readlinkatWasi(dirfd: fd_t, file_path: []const u8, out_buffer: []u8) Read } } -/// Windows-only. Same as `readlinkat` except `file_path` is null-terminated, WTF16 LE encoded. -/// The result is encoded as [WTF-8](https://wtf-8.codeberg.page/). +/// Windows-only. Same as `readlinkat` except `file_path` WTF16 LE encoded, NT-prefixed. +/// The result is encoded as WTF-16 LE. +/// +/// `file_path` will never be accessed after `out_buffer` has been written to, so it +/// is safe to reuse a single buffer for both. +/// /// See also `readlinkat`. -pub fn readlinkatW(dirfd: fd_t, file_path: []const u16, out_buffer: []u8) ReadLinkError![]u8 { +pub fn readlinkatW(dirfd: fd_t, file_path: []const u16, out_buffer: []u16) ReadLinkError![]u16 { return windows.ReadLink(dirfd, file_path, out_buffer); } @@ -3114,8 +3140,14 @@ pub fn readlinkatW(dirfd: fd_t, file_path: []const u16, out_buffer: []u8) ReadLi /// See also `readlinkat`. pub fn readlinkatZ(dirfd: fd_t, file_path: [*:0]const u8, out_buffer: []u8) ReadLinkError![]u8 { if (native_os == .windows) { - const file_path_w = try windows.cStrToPrefixedFileW(dirfd, file_path); - return readlinkatW(dirfd, file_path_w.span(), out_buffer); + var file_path_w = try windows.cStrToPrefixedFileW(dirfd, file_path); + const result_w = try readlinkatW(dirfd, file_path_w.span(), &file_path_w.data); + + const len = std.unicode.calcWtf8Len(result_w); + if (len > out_buffer.len) return error.NameTooLong; + + const end_index = std.unicode.wtf16LeToWtf8(out_buffer, result_w); + return out_buffer[0..end_index]; } else if (native_os == .wasi and !builtin.link_libc) { return readlinkat(dirfd, mem.sliceTo(file_path, 0), out_buffer); }