Build system: Support Windows depfiles with unquoted, backslash escaped spaces (#20100)

This commit is contained in:
Ben Crist 2024-06-06 13:40:10 -05:00 committed by GitHub
parent 63754916c5
commit a9e9c9965d
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 140 additions and 44 deletions

View file

@ -860,14 +860,23 @@ pub const Manifest = struct {
var it: DepTokenizer = .{ .bytes = dep_file_contents }; var it: DepTokenizer = .{ .bytes = dep_file_contents };
while (true) { while (it.next()) |token| {
switch (it.next() orelse return) { switch (token) {
// We don't care about targets, we only want the prereqs // We don't care about targets, we only want the prereqs
// Clang is invoked in single-source mode but other programs may not // Clang is invoked in single-source mode but other programs may not
.target, .target_must_resolve => {}, .target, .target_must_resolve => {},
.prereq => |file_path| if (self.manifest_file == null) { .prereq => |file_path| if (self.manifest_file == null) {
_ = try self.addFile(file_path, null); _ = try self.addFile(file_path, null);
} else try self.addFilePost(file_path), } else try self.addFilePost(file_path),
.prereq_must_resolve => {
var resolve_buf = std.ArrayList(u8).init(self.cache.gpa);
defer resolve_buf.deinit();
try token.resolve(resolve_buf.writer());
if (self.manifest_file == null) {
_ = try self.addFile(resolve_buf.items, null);
} else try self.addFilePost(resolve_buf.items);
},
else => |err| { else => |err| {
try err.printError(error_buf.writer()); try err.printError(error_buf.writer());
log.err("failed parsing {s}: {s}", .{ dep_file_basename, error_buf.items }); log.err("failed parsing {s}: {s}", .{ dep_file_basename, error_buf.items });

View file

@ -158,7 +158,7 @@ pub fn next(self: *Tokenizer) ?Token {
'"' => { '"' => {
self.index += 1; self.index += 1;
self.state = .rhs; self.state = .rhs;
return Token{ .prereq = self.bytes[start .. self.index - 1] }; return finishPrereq(must_resolve, self.bytes[start .. self.index - 1]);
}, },
else => { else => {
self.index += 1; self.index += 1;
@ -167,11 +167,11 @@ pub fn next(self: *Tokenizer) ?Token {
.prereq => switch (char) { .prereq => switch (char) {
'\t', ' ' => { '\t', ' ' => {
self.state = .rhs; self.state = .rhs;
return Token{ .prereq = self.bytes[start..self.index] }; return finishPrereq(must_resolve, self.bytes[start..self.index]);
}, },
'\n', '\r' => { '\n', '\r' => {
self.state = .lhs; self.state = .lhs;
return Token{ .prereq = self.bytes[start..self.index] }; return finishPrereq(must_resolve, self.bytes[start..self.index]);
}, },
'\\' => { '\\' => {
self.state = .prereq_continuation; self.state = .prereq_continuation;
@ -185,12 +185,22 @@ pub fn next(self: *Tokenizer) ?Token {
'\n' => { '\n' => {
self.index += 1; self.index += 1;
self.state = .rhs; self.state = .rhs;
return Token{ .prereq = self.bytes[start .. self.index - 2] }; return finishPrereq(must_resolve, self.bytes[start .. self.index - 2]);
}, },
'\r' => { '\r' => {
self.state = .prereq_continuation_linefeed; self.state = .prereq_continuation_linefeed;
self.index += 1; self.index += 1;
}, },
'\\' => {
// The previous \ wasn't a continuation, but this one might be.
self.index += 1;
},
' ' => {
// not continuation, but escaped space must be resolved
must_resolve = true;
self.state = .prereq;
self.index += 1;
},
else => { else => {
// not continuation // not continuation
self.state = .prereq; self.state = .prereq;
@ -201,7 +211,7 @@ pub fn next(self: *Tokenizer) ?Token {
'\n' => { '\n' => {
self.index += 1; self.index += 1;
self.state = .rhs; self.state = .rhs;
return Token{ .prereq = self.bytes[start .. self.index - 1] }; return finishPrereq(must_resolve, self.bytes[start .. self.index - 3]);
}, },
else => { else => {
return errorIllegalChar(.continuation_eol, self.index, char); return errorIllegalChar(.continuation_eol, self.index, char);
@ -251,15 +261,15 @@ pub fn next(self: *Tokenizer) ?Token {
}, },
.prereq => { .prereq => {
self.state = .lhs; self.state = .lhs;
return Token{ .prereq = self.bytes[start..] }; return finishPrereq(must_resolve, self.bytes[start..]);
}, },
.prereq_continuation => { .prereq_continuation => {
self.state = .lhs; self.state = .lhs;
return Token{ .prereq = self.bytes[start .. self.index - 1] }; return finishPrereq(must_resolve, self.bytes[start .. self.index - 1]);
}, },
.prereq_continuation_linefeed => { .prereq_continuation_linefeed => {
self.state = .lhs; self.state = .lhs;
return Token{ .prereq = self.bytes[start .. self.index - 2] }; return finishPrereq(must_resolve, self.bytes[start .. self.index - 2]);
}, },
} }
} }
@ -278,6 +288,10 @@ fn finishTarget(must_resolve: bool, bytes: []const u8) Token {
return if (must_resolve) .{ .target_must_resolve = bytes } else .{ .target = bytes }; return if (must_resolve) .{ .target_must_resolve = bytes } else .{ .target = bytes };
} }
fn finishPrereq(must_resolve: bool, bytes: []const u8) Token {
return if (must_resolve) .{ .prereq_must_resolve = bytes } else .{ .prereq = bytes };
}
const State = enum { const State = enum {
lhs, lhs,
target, target,
@ -298,6 +312,7 @@ pub const Token = union(enum) {
target: []const u8, target: []const u8,
target_must_resolve: []const u8, target_must_resolve: []const u8,
prereq: []const u8, prereq: []const u8,
prereq_must_resolve: []const u8,
incomplete_quoted_prerequisite: IndexAndBytes, incomplete_quoted_prerequisite: IndexAndBytes,
incomplete_target: IndexAndBytes, incomplete_target: IndexAndBytes,
@ -318,48 +333,76 @@ pub const Token = union(enum) {
bytes: []const u8, bytes: []const u8,
}; };
/// Resolve escapes in target. Only valid with .target_must_resolve. /// Resolve escapes in target or prereq. Only valid with .target_must_resolve or .prereq_must_resolve.
pub fn resolve(self: Token, writer: anytype) @TypeOf(writer).Error!void { pub fn resolve(self: Token, writer: anytype) @TypeOf(writer).Error!void {
const bytes = self.target_must_resolve; // resolve called on incorrect token switch (self) {
.target_must_resolve => |bytes| {
var state: enum { start, escape, dollar } = .start; var state: enum { start, escape, dollar } = .start;
for (bytes) |c| { for (bytes) |c| {
switch (state) { switch (state) {
.start => { .start => {
switch (c) { switch (c) {
'\\' => state = .escape, '\\' => state = .escape,
'$' => state = .dollar, '$' => state = .dollar,
else => try writer.writeByte(c), else => try writer.writeByte(c),
} }
}, },
.escape => { .escape => {
switch (c) { switch (c) {
' ', '#', '\\' => {}, ' ', '#', '\\' => {},
'$' => { '$' => {
try writer.writeByte('\\'); try writer.writeByte('\\');
state = .dollar; state = .dollar;
continue; continue;
},
else => try writer.writeByte('\\'),
}
try writer.writeByte(c);
state = .start;
},
.dollar => {
try writer.writeByte('$');
switch (c) {
'$' => {},
else => try writer.writeByte(c),
}
state = .start;
}, },
else => try writer.writeByte('\\'),
} }
try writer.writeByte(c); }
state = .start; },
}, .prereq_must_resolve => |bytes| {
.dollar => { var state: enum { start, escape } = .start;
try writer.writeByte('$'); for (bytes) |c| {
switch (c) { switch (state) {
'$' => {}, .start => {
else => try writer.writeByte(c), switch (c) {
'\\' => state = .escape,
else => try writer.writeByte(c),
}
},
.escape => {
switch (c) {
' ' => {},
'\\' => {
try writer.writeByte(c);
continue;
},
else => try writer.writeByte('\\'),
}
try writer.writeByte(c);
state = .start;
},
} }
state = .start; }
}, },
} else => unreachable,
} }
} }
pub fn printError(self: Token, writer: anytype) @TypeOf(writer).Error!void { pub fn printError(self: Token, writer: anytype) @TypeOf(writer).Error!void {
switch (self) { switch (self) {
.target, .target_must_resolve, .prereq => unreachable, // not an error .target, .target_must_resolve, .prereq, .prereq_must_resolve => unreachable, // not an error
.incomplete_quoted_prerequisite, .incomplete_quoted_prerequisite,
.incomplete_target, .incomplete_target,
=> |index_and_bytes| { => |index_and_bytes| {
@ -387,7 +430,7 @@ pub const Token = union(enum) {
fn errStr(self: Token) []const u8 { fn errStr(self: Token) []const u8 {
return switch (self) { return switch (self) {
.target, .target_must_resolve, .prereq => unreachable, // not an error .target, .target_must_resolve, .prereq, .prereq_must_resolve => unreachable, // not an error
.incomplete_quoted_prerequisite => "incomplete quoted prerequisite", .incomplete_quoted_prerequisite => "incomplete quoted prerequisite",
.incomplete_target => "incomplete target", .incomplete_target => "incomplete target",
.invalid_target => "invalid target", .invalid_target => "invalid target",
@ -538,6 +581,15 @@ test "prereq continuation" {
, expect); , expect);
} }
test "prereq continuation (CRLF)" {
const expect =
\\target = {foo.o}
\\prereq = {foo.h}
\\prereq = {bar.h}
;
try depTokenizer("foo.o: foo.h\\\r\nbar.h", expect);
}
test "multiple prereqs" { test "multiple prereqs" {
const expect = const expect =
\\target = {foo.o} \\target = {foo.o}
@ -728,6 +780,32 @@ test "windows funky targets" {
); );
} }
test "windows funky prereqs" {
// Note we don't support unquoted escaped spaces at the very beginning of a relative path
// e.g. `\ SpaceAtTheBeginning.c`
// This typically wouldn't be seen in the wild, since depfiles usually use absolute paths
// and supporting it would degrade error messages for cases where it was meant to be a
// continuation, but the line ending is missing.
try depTokenizer(
\\cimport.o: \
\\ trailingbackslash\\
\\ C:\Users\John\ Smith\AppData\Local\zig\p\1220d14057af1a9d6dde4643293527bd5ee5099517d655251a066666a4320737ea7c\cimport.c \
\\ somedir\\ a.c\
\\ somedir/\ a.c\
\\ somedir\\ \ \ b.c\
\\ somedir\\ \\ \c.c\
\\
,
\\target = {cimport.o}
\\prereq = {trailingbackslash\}
\\prereq = {C:\Users\John Smith\AppData\Local\zig\p\1220d14057af1a9d6dde4643293527bd5ee5099517d655251a066666a4320737ea7c\cimport.c}
\\prereq = {somedir\ a.c}
\\prereq = {somedir/ a.c}
\\prereq = {somedir\ b.c}
\\prereq = {somedir\ \ \c.c}
);
}
test "windows drive and forward slashes" { test "windows drive and forward slashes" {
try depTokenizer( try depTokenizer(
\\C:/msys64/what/zig-cache\tmp\48ac4d78dd531abd-cxa_thread_atexit.obj: \ \\C:/msys64/what/zig-cache\tmp\48ac4d78dd531abd-cxa_thread_atexit.obj: \
@ -915,6 +993,15 @@ fn depTokenizer(input: []const u8, expect: []const u8) !void {
resolve_buf.items.len = 0; resolve_buf.items.len = 0;
try buffer.appendSlice("}"); try buffer.appendSlice("}");
}, },
.prereq_must_resolve => {
try buffer.appendSlice("prereq = {");
try token.resolve(resolve_buf.writer());
for (resolve_buf.items) |b| {
try buffer.append(printable_char_tab[b]);
}
resolve_buf.items.len = 0;
try buffer.appendSlice("}");
},
else => { else => {
try buffer.appendSlice("ERROR: "); try buffer.appendSlice("ERROR: ");
try token.printError(buffer.writer()); try token.printError(buffer.writer());