mirror of
https://codeberg.org/ziglang/zig.git
synced 2025-12-06 13:54:21 +00:00
Note: This mostly matches resinator v0.1.0 rather than the latest master version, since the latest master version focuses on adding support for .res -> .obj conversion which is not necessary for the future planned relationship of zig and resinator (resinator will likely be moved out of the compiler and into the build system, a la translate-c). So, ultimately the changes here consist mostly of bug fixes for obscure edge cases.
99 lines
4.6 KiB
Zig
Vendored
99 lines
4.6 KiB
Zig
Vendored
const std = @import("std");
|
|
const lex = @import("lex.zig");
|
|
const SourceMappings = @import("source_mapping.zig").SourceMappings;
|
|
const SupportedCodePage = @import("code_pages.zig").SupportedCodePage;
|
|
|
|
pub fn hasDisjointCodePage(source: []const u8, source_mappings: ?*const SourceMappings, default_code_page: SupportedCodePage) bool {
|
|
var line_handler = lex.LineHandler{ .buffer = source };
|
|
var i: usize = 0;
|
|
while (i < source.len) {
|
|
const codepoint = default_code_page.codepointAt(i, source) orelse break;
|
|
const c = codepoint.value;
|
|
switch (c) {
|
|
'\r', '\n' => {
|
|
_ = line_handler.incrementLineNumber(i);
|
|
// Any lines that are not from the root file interrupt the disjoint code page
|
|
if (source_mappings != null and !source_mappings.?.isRootFile(line_handler.line_number)) return false;
|
|
},
|
|
// whitespace is ignored
|
|
' ',
|
|
'\t',
|
|
// NBSP, this should technically be in the TODO below, but it is treated as whitespace
|
|
// due to a (misguided) special casing in the lexer, see the TODO in lex.zig
|
|
'\u{A0}',
|
|
=> {},
|
|
|
|
// TODO: All of the below are treated as whitespace by the Win32 RC preprocessor, which also
|
|
// means they are trimmed from the file during preprocessing. This means that these characters
|
|
// should be treated like ' ', '\t' above, but since the resinator preprocessor does not treat
|
|
// them as whitespace *or* trim whitespace, files with these characters are likely going to
|
|
// error. So, in the future some sort of emulation of/rejection of the Win32 behavior might
|
|
// make handling these codepoints specially make sense, but for now it doesn't really matter
|
|
// so they are not handled specially for simplicity's sake.
|
|
//'\u{1680}',
|
|
//'\u{180E}',
|
|
//'\u{2001}',
|
|
//'\u{2002}',
|
|
//'\u{2003}',
|
|
//'\u{2004}',
|
|
//'\u{2005}',
|
|
//'\u{2006}',
|
|
//'\u{2007}',
|
|
//'\u{2008}',
|
|
//'\u{2009}',
|
|
//'\u{200A}',
|
|
//'\u{2028}',
|
|
//'\u{2029}',
|
|
//'\u{202F}',
|
|
//'\u{205F}',
|
|
//'\u{3000}',
|
|
|
|
'#' => {
|
|
if (source_mappings != null and !source_mappings.?.isRootFile(line_handler.line_number)) {
|
|
return false;
|
|
}
|
|
const start_i = i;
|
|
while (i < source.len and source[i] != '\r' and source[i] != '\n') : (i += 1) {}
|
|
const line = source[start_i..i];
|
|
_ = (lex.parsePragmaCodePage(line) catch |err| switch (err) {
|
|
error.NotPragma => return false,
|
|
error.NotCodePagePragma => continue,
|
|
error.CodePagePragmaUnsupportedCodePage => continue,
|
|
else => continue,
|
|
}) orelse return false; // DEFAULT interrupts disjoint code page
|
|
|
|
// If we got a code page, then it is a disjoint code page pragma
|
|
return true;
|
|
},
|
|
else => {
|
|
// Any other character interrupts the disjoint code page
|
|
return false;
|
|
},
|
|
}
|
|
|
|
i += codepoint.byte_len;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
test hasDisjointCodePage {
|
|
try std.testing.expect(hasDisjointCodePage("#pragma code_page(65001)\n", null, .windows1252));
|
|
// NBSP is a special case
|
|
try std.testing.expect(hasDisjointCodePage("\xA0\n#pragma code_page(65001)\n", null, .windows1252));
|
|
try std.testing.expect(hasDisjointCodePage("\u{A0}\n#pragma code_page(1252)\n", null, .utf8));
|
|
// other preprocessor commands don't interrupt
|
|
try std.testing.expect(hasDisjointCodePage("#pragma foo\n#pragma code_page(65001)\n", null, .windows1252));
|
|
// invalid code page doesn't interrupt
|
|
try std.testing.expect(hasDisjointCodePage("#pragma code_page(1234567)\n#pragma code_page(65001)\n", null, .windows1252));
|
|
|
|
try std.testing.expect(!hasDisjointCodePage("#if 1\n#endif\n#pragma code_page(65001)", null, .windows1252));
|
|
try std.testing.expect(!hasDisjointCodePage("// comment\n#pragma code_page(65001)", null, .windows1252));
|
|
try std.testing.expect(!hasDisjointCodePage("/* comment */\n#pragma code_page(65001)", null, .windows1252));
|
|
}
|
|
|
|
test "multiline comment edge case" {
|
|
// TODO
|
|
if (true) return error.SkipZigTest;
|
|
|
|
try std.testing.expect(hasDisjointCodePage("/* comment */#pragma code_page(65001)", null, .windows1252));
|
|
}
|