zig/lib/compiler/aro/aro/Preprocessor.zig
2025-09-24 20:01:19 -07:00

3781 lines
152 KiB
Zig
Vendored

const std = @import("std");
const mem = std.mem;
const Allocator = mem.Allocator;
const assert = std.debug.assert;
const Attribute = @import("Attribute.zig");
const Compilation = @import("Compilation.zig");
const Error = Compilation.Error;
const Diagnostics = @import("Diagnostics.zig");
const DepFile = @import("DepFile.zig");
const features = @import("features.zig");
const Hideset = @import("Hideset.zig");
const Parser = @import("Parser.zig");
const Source = @import("Source.zig");
const text_literal = @import("text_literal.zig");
const Tokenizer = @import("Tokenizer.zig");
const RawToken = Tokenizer.Token;
const SourceEpoch = Compilation.Environment.SourceEpoch;
const Tree = @import("Tree.zig");
const Token = Tree.Token;
const TokenWithExpansionLocs = Tree.TokenWithExpansionLocs;
const DefineMap = std.StringArrayHashMapUnmanaged(Macro);
const RawTokenList = std.array_list.Managed(RawToken);
const max_include_depth = 200;
/// Errors that can be returned when expanding a macro.
/// error.UnknownPragma can occur within Preprocessor.pragma() but
/// it is handled there and doesn't escape that function
const MacroError = Error || error{StopPreprocessing};
const IfContext = struct {
const Backing = u2;
const Nesting = enum(Backing) {
until_else,
until_endif,
until_endif_seen_else,
};
const buf_size_bits = @bitSizeOf(Backing) * 256;
kind: [buf_size_bits / std.mem.byte_size_in_bits]u8,
level: u8,
fn get(self: *const IfContext) Nesting {
return @enumFromInt(std.mem.readPackedIntNative(Backing, &self.kind, @as(usize, self.level) * 2));
}
fn set(self: *IfContext, context: Nesting) void {
std.mem.writePackedIntNative(Backing, &self.kind, @as(usize, self.level) * 2, @intFromEnum(context));
}
fn increment(self: *IfContext) bool {
self.level, const overflowed = @addWithOverflow(self.level, 1);
return overflowed != 0;
}
fn decrement(self: *IfContext) void {
self.level -= 1;
}
/// Initialize `kind` to an invalid value since it is an error to read the kind before setting it.
/// Doing so will trigger safety-checked undefined behavior in `IfContext.get`
const default: IfContext = .{ .kind = @splat(0xFF), .level = 0 };
};
pub const Macro = struct {
/// Parameters of the function type macro
params: []const []const u8,
/// Token constituting the macro body
tokens: []const RawToken,
/// If the function type macro has variable number of arguments
var_args: bool,
/// Is a function type macro
is_func: bool,
/// Is a predefined macro
is_builtin: bool = false,
/// Location of macro in the source
loc: Source.Location,
fn eql(a: Macro, b: Macro, pp: *Preprocessor) bool {
if (a.tokens.len != b.tokens.len) return false;
if (a.is_builtin != b.is_builtin) return false;
for (a.tokens, b.tokens) |a_tok, b_tok| if (!tokEql(pp, a_tok, b_tok)) return false;
if (a.is_func and b.is_func) {
if (a.var_args != b.var_args) return false;
if (a.params.len != b.params.len) return false;
for (a.params, b.params) |a_param, b_param| if (!mem.eql(u8, a_param, b_param)) return false;
}
return true;
}
fn tokEql(pp: *Preprocessor, a: RawToken, b: RawToken) bool {
return mem.eql(u8, pp.tokSlice(a), pp.tokSlice(b));
}
};
const Preprocessor = @This();
const ExpansionEntry = struct {
idx: Tree.TokenIndex,
locs: [*]Source.Location,
};
const TokenState = struct {
tokens_len: usize,
expansion_entries_len: usize,
};
comp: *Compilation,
diagnostics: *Diagnostics,
gpa: mem.Allocator,
arena: std.heap.ArenaAllocator,
defines: DefineMap = .{},
/// Do not directly mutate this; use addToken / addTokenAssumeCapacity / ensureTotalTokenCapacity / ensureUnusedTokenCapacity
tokens: Token.List = .{},
/// Do not directly mutate this; must be kept in sync with `tokens`
expansion_entries: std.MultiArrayList(ExpansionEntry) = .{},
token_buf: RawTokenList,
char_buf: std.array_list.Managed(u8),
/// Counter that is incremented each time preprocess() is called
/// Can be used to distinguish multiple preprocessings of the same file
preprocess_count: u32 = 0,
generated_line: u32 = 1,
add_expansion_nl: u32 = 0,
include_depth: u8 = 0,
counter: u32 = 0,
expansion_source_loc: Source.Location = undefined,
poisoned_identifiers: std.StringHashMap(void),
/// Map from Source.Id to macro name in the `#ifndef` condition which guards the source, if any
include_guards: std.AutoHashMapUnmanaged(Source.Id, []const u8) = .{},
/// Store `keyword_define` and `keyword_undef` tokens.
/// Used to implement preprocessor debug dump options
/// Must be false unless in -E mode (parser does not handle those token types)
store_macro_tokens: bool = false,
/// Memory is retained to avoid allocation on every single token.
top_expansion_buf: ExpandBuf,
/// Dump current state to stderr.
verbose: bool = false,
preserve_whitespace: bool = false,
/// linemarker tokens. Must be .none unless in -E mode (parser does not handle linemarkers)
linemarkers: Linemarkers = .none,
hideset: Hideset,
/// Epoch used for __DATE__, __TIME__, and possibly __TIMESTAMP__
source_epoch: SourceEpoch,
m_times: std.AutoHashMapUnmanaged(Source.Id, u64) = .{},
/// The dependency file tracking all includes and embeds.
dep_file: ?*DepFile = null,
pub const parse = Parser.parse;
pub const Linemarkers = enum {
/// No linemarker tokens. Required setting if parser will run
none,
/// #line <num> "filename"
line_directives,
/// # <num> "filename" flags
numeric_directives,
};
pub fn init(comp: *Compilation, source_epoch: SourceEpoch) Preprocessor {
const pp: Preprocessor = .{
.comp = comp,
.diagnostics = comp.diagnostics,
.gpa = comp.gpa,
.arena = std.heap.ArenaAllocator.init(comp.gpa),
.token_buf = RawTokenList.init(comp.gpa),
.char_buf = std.array_list.Managed(u8).init(comp.gpa),
.poisoned_identifiers = std.StringHashMap(void).init(comp.gpa),
.top_expansion_buf = ExpandBuf.init(comp.gpa),
.hideset = .{ .comp = comp },
.source_epoch = source_epoch,
};
comp.pragmaEvent(.before_preprocess);
return pp;
}
/// Initialize Preprocessor with builtin macros.
pub fn initDefault(comp: *Compilation) !Preprocessor {
const source_epoch: SourceEpoch = comp.environment.sourceEpoch() catch |er| switch (er) {
error.InvalidEpoch => blk: {
const diagnostic: Diagnostic = .invalid_source_epoch;
try comp.diagnostics.add(.{ .text = diagnostic.fmt, .kind = diagnostic.kind, .opt = diagnostic.opt, .location = null });
break :blk .default;
},
};
var pp = init(comp, source_epoch);
errdefer pp.deinit();
try pp.addBuiltinMacros();
return pp;
}
// `param_tok_id` is comptime so that the generated `tokens` list is unique for every macro.
fn addBuiltinMacro(pp: *Preprocessor, name: []const u8, is_func: bool, comptime param_tok_id: Token.Id) !void {
try pp.defines.putNoClobber(pp.gpa, name, .{
.params = &[1][]const u8{"X"},
.tokens = &[1]RawToken{.{
.id = param_tok_id,
.source = .generated,
}},
.var_args = false,
.is_func = is_func,
.loc = .{ .id = .generated },
.is_builtin = true,
});
}
pub fn addBuiltinMacros(pp: *Preprocessor) !void {
try pp.addBuiltinMacro("__has_attribute", true, .macro_param_has_attribute);
try pp.addBuiltinMacro("__has_c_attribute", true, .macro_param_has_c_attribute);
try pp.addBuiltinMacro("__has_declspec_attribute", true, .macro_param_has_declspec_attribute);
try pp.addBuiltinMacro("__has_warning", true, .macro_param_has_warning);
try pp.addBuiltinMacro("__has_feature", true, .macro_param_has_feature);
try pp.addBuiltinMacro("__has_extension", true, .macro_param_has_extension);
try pp.addBuiltinMacro("__has_builtin", true, .macro_param_has_builtin);
try pp.addBuiltinMacro("__has_include", true, .macro_param_has_include);
try pp.addBuiltinMacro("__has_include_next", true, .macro_param_has_include_next);
try pp.addBuiltinMacro("__has_embed", true, .macro_param_has_embed);
try pp.addBuiltinMacro("__is_identifier", true, .macro_param_is_identifier);
try pp.addBuiltinMacro("_Pragma", true, .macro_param_pragma_operator);
if (pp.comp.langopts.ms_extensions) {
try pp.addBuiltinMacro("__identifier", true, .macro_param_ms_identifier);
try pp.addBuiltinMacro("__pragma", true, .macro_param_ms_pragma);
}
try pp.addBuiltinMacro("__FILE__", false, .macro_file);
try pp.addBuiltinMacro("__LINE__", false, .macro_line);
try pp.addBuiltinMacro("__COUNTER__", false, .macro_counter);
try pp.addBuiltinMacro("__DATE__", false, .macro_date);
try pp.addBuiltinMacro("__TIME__", false, .macro_time);
try pp.addBuiltinMacro("__TIMESTAMP__", false, .macro_timestamp);
}
pub fn deinit(pp: *Preprocessor) void {
pp.defines.deinit(pp.gpa);
pp.tokens.deinit(pp.gpa);
pp.arena.deinit();
pp.token_buf.deinit();
pp.char_buf.deinit();
pp.poisoned_identifiers.deinit();
pp.include_guards.deinit(pp.gpa);
pp.top_expansion_buf.deinit();
pp.hideset.deinit();
for (pp.expansion_entries.items(.locs)) |locs| TokenWithExpansionLocs.free(locs, pp.gpa);
pp.expansion_entries.deinit(pp.gpa);
pp.m_times.deinit(pp.gpa);
}
/// Free buffers that are not needed after preprocessing
fn clearBuffers(pp: *Preprocessor) void {
pp.token_buf.clearAndFree();
pp.char_buf.clearAndFree();
pp.top_expansion_buf.clearAndFree();
pp.hideset.clearAndFree();
}
fn mTime(pp: *Preprocessor, source_id: Source.Id) !u64 {
const gop = try pp.m_times.getOrPut(pp.gpa, source_id);
if (!gop.found_existing) {
gop.value_ptr.* = pp.comp.getSourceMTimeUncached(source_id) orelse 0;
}
return gop.value_ptr.*;
}
pub fn expansionSlice(pp: *Preprocessor, tok: Tree.TokenIndex) []Source.Location {
const S = struct {
fn orderTokenIndex(context: Tree.TokenIndex, item: Tree.TokenIndex) std.math.Order {
return std.math.order(context, item);
}
};
const indices = pp.expansion_entries.items(.idx);
const idx = std.sort.binarySearch(Tree.TokenIndex, indices, tok, S.orderTokenIndex) orelse return &.{};
const locs = pp.expansion_entries.items(.locs)[idx];
var i: usize = 0;
while (locs[i].id != .unused) : (i += 1) {}
return locs[0..i];
}
/// Preprocess a compilation unit of sources into a parsable list of tokens.
pub fn preprocessSources(pp: *Preprocessor, sources: []const Source) Error!void {
assert(sources.len > 1);
const first = sources[0];
try pp.addIncludeStart(first);
for (sources[1..]) |header| {
try pp.addIncludeStart(header);
_ = try pp.preprocess(header);
}
try pp.addIncludeResume(first.id, 0, 1);
const eof = try pp.preprocess(first);
try pp.addToken(eof);
pp.clearBuffers();
}
/// Preprocess a source file, returns eof token.
pub fn preprocess(pp: *Preprocessor, source: Source) Error!TokenWithExpansionLocs {
const eof = pp.preprocessExtra(source) catch |er| switch (er) {
// This cannot occur in the main file and is handled in `include`.
error.StopPreprocessing => unreachable,
else => |e| return e,
};
try eof.checkMsEof(source, pp.comp);
return eof;
}
/// Tokenize a file without any preprocessing, returns eof token.
pub fn tokenize(pp: *Preprocessor, source: Source) Error!Token {
assert(pp.linemarkers == .none);
assert(pp.preserve_whitespace == false);
var tokenizer = Tokenizer{
.buf = source.buf,
.comp = pp.comp,
.source = source.id,
};
// Estimate how many new tokens this source will contain.
const estimated_token_count = source.buf.len / 8;
try pp.ensureTotalTokenCapacity(pp.tokens.len + estimated_token_count);
while (true) {
const tok = tokenizer.next();
if (tok.id == .eof) return tokFromRaw(tok);
try pp.addToken(tokFromRaw(tok));
}
}
pub fn addIncludeStart(pp: *Preprocessor, source: Source) !void {
if (pp.linemarkers == .none) return;
try pp.addToken(.{ .id = .include_start, .loc = .{
.id = source.id,
.byte_offset = std.math.maxInt(u32),
.line = 1,
} });
}
pub fn addIncludeResume(pp: *Preprocessor, source: Source.Id, offset: u32, line: u32) !void {
if (pp.linemarkers == .none) return;
try pp.addToken(.{ .id = .include_resume, .loc = .{
.id = source,
.byte_offset = offset,
.line = line,
} });
}
fn invalidTokenDiagnostic(tok_id: Token.Id) Diagnostic {
return switch (tok_id) {
.unterminated_string_literal => .unterminated_string_literal_warning,
.empty_char_literal => .empty_char_literal_warning,
.unterminated_char_literal => .unterminated_char_literal_warning,
else => unreachable,
};
}
/// Return the name of the #ifndef guard macro that starts a source, if any.
fn findIncludeGuard(pp: *Preprocessor, source: Source) ?[]const u8 {
var tokenizer = Tokenizer{
.buf = source.buf,
.langopts = pp.comp.langopts,
.source = source.id,
};
var hash = tokenizer.nextNoWS();
while (hash.id == .nl) hash = tokenizer.nextNoWS();
if (hash.id != .hash) return null;
const ifndef = tokenizer.nextNoWS();
if (ifndef.id != .keyword_ifndef) return null;
const guard = tokenizer.nextNoWS();
if (guard.id != .identifier) return null;
return pp.tokSlice(guard);
}
fn preprocessExtra(pp: *Preprocessor, source: Source) MacroError!TokenWithExpansionLocs {
var guard_name = pp.findIncludeGuard(source);
pp.preprocess_count += 1;
var tokenizer = Tokenizer{
.buf = source.buf,
.langopts = pp.comp.langopts,
.source = source.id,
};
// Estimate how many new tokens this source will contain.
const estimated_token_count = source.buf.len / 8;
try pp.ensureTotalTokenCapacity(pp.tokens.len + estimated_token_count);
var if_context: IfContext = .default;
var start_of_line = true;
while (true) {
var tok = tokenizer.next();
switch (tok.id) {
.hash => if (!start_of_line) try pp.addToken(tokFromRaw(tok)) else {
const directive = tokenizer.nextNoWS();
const directive_loc: Source.Location = .{ .id = tok.source, .byte_offset = directive.start, .line = directive.line };
switch (directive.id) {
.keyword_error, .keyword_warning => {
// #error tokens..
pp.top_expansion_buf.items.len = 0;
const char_top = pp.char_buf.items.len;
defer pp.char_buf.items.len = char_top;
while (true) {
tok = tokenizer.next();
if (tok.id == .nl or tok.id == .eof) break;
if (tok.id == .whitespace) tok.id = .macro_ws;
try pp.top_expansion_buf.append(tokFromRaw(tok));
}
try pp.stringify(pp.top_expansion_buf.items);
const slice = pp.char_buf.items[char_top + 1 .. pp.char_buf.items.len - 2];
try pp.err(
directive_loc,
if (directive.id == .keyword_error) .error_directive else .warning_directive,
.{slice},
);
},
.keyword_if => {
const overflowed = if_context.increment();
if (overflowed)
return pp.fatal(directive, "too many #if nestings", .{});
if (try pp.expr(&tokenizer)) {
if_context.set(.until_endif);
if (pp.verbose) {
pp.verboseLog(directive, "entering then branch of #if", .{});
}
} else {
if_context.set(.until_else);
try pp.skip(&tokenizer, .until_else);
if (pp.verbose) {
pp.verboseLog(directive, "entering else branch of #if", .{});
}
}
},
.keyword_ifdef => {
const overflowed = if_context.increment();
if (overflowed)
return pp.fatal(directive, "too many #if nestings", .{});
const macro_name = (try pp.expectMacroName(&tokenizer)) orelse continue;
try pp.expectNl(&tokenizer);
if (pp.defines.get(macro_name) != null) {
if_context.set(.until_endif);
if (pp.verbose) {
pp.verboseLog(directive, "entering then branch of #ifdef", .{});
}
} else {
if_context.set(.until_else);
try pp.skip(&tokenizer, .until_else);
if (pp.verbose) {
pp.verboseLog(directive, "entering else branch of #ifdef", .{});
}
}
},
.keyword_ifndef => {
const overflowed = if_context.increment();
if (overflowed)
return pp.fatal(directive, "too many #if nestings", .{});
const macro_name = (try pp.expectMacroName(&tokenizer)) orelse continue;
try pp.expectNl(&tokenizer);
if (pp.defines.get(macro_name) == null) {
if_context.set(.until_endif);
} else {
if_context.set(.until_else);
try pp.skip(&tokenizer, .until_else);
}
},
.keyword_elif => {
if (if_context.level == 0) {
try pp.err(directive, .elif_without_if, .{});
_ = if_context.increment();
if_context.set(.until_else);
} else if (if_context.level == 1) {
guard_name = null;
}
switch (if_context.get()) {
.until_else => if (try pp.expr(&tokenizer)) {
if_context.set(.until_endif);
if (pp.verbose) {
pp.verboseLog(directive, "entering then branch of #elif", .{});
}
} else {
try pp.skip(&tokenizer, .until_else);
if (pp.verbose) {
pp.verboseLog(directive, "entering else branch of #elif", .{});
}
},
.until_endif => try pp.skip(&tokenizer, .until_endif),
.until_endif_seen_else => {
try pp.err(directive, .elif_after_else, .{});
skipToNl(&tokenizer);
},
}
},
.keyword_elifdef => {
if (if_context.level == 0) {
try pp.err(directive, .elifdef_without_if, .{});
_ = if_context.increment();
if_context.set(.until_else);
} else if (if_context.level == 1) {
guard_name = null;
}
switch (if_context.get()) {
.until_else => {
const macro_name = try pp.expectMacroName(&tokenizer);
if (macro_name == null) {
if_context.set(.until_else);
try pp.skip(&tokenizer, .until_else);
if (pp.verbose) {
pp.verboseLog(directive, "entering else branch of #elifdef", .{});
}
} else {
try pp.expectNl(&tokenizer);
if (pp.defines.get(macro_name.?) != null) {
if_context.set(.until_endif);
if (pp.verbose) {
pp.verboseLog(directive, "entering then branch of #elifdef", .{});
}
} else {
if_context.set(.until_else);
try pp.skip(&tokenizer, .until_else);
if (pp.verbose) {
pp.verboseLog(directive, "entering else branch of #elifdef", .{});
}
}
}
},
.until_endif => try pp.skip(&tokenizer, .until_endif),
.until_endif_seen_else => {
try pp.err(directive, .elifdef_after_else, .{});
skipToNl(&tokenizer);
},
}
},
.keyword_elifndef => {
if (if_context.level == 0) {
try pp.err(directive, .elifndef_without_if, .{});
_ = if_context.increment();
if_context.set(.until_else);
} else if (if_context.level == 1) {
guard_name = null;
}
switch (if_context.get()) {
.until_else => {
const macro_name = try pp.expectMacroName(&tokenizer);
if (macro_name == null) {
if_context.set(.until_else);
try pp.skip(&tokenizer, .until_else);
if (pp.verbose) {
pp.verboseLog(directive, "entering else branch of #elifndef", .{});
}
} else {
try pp.expectNl(&tokenizer);
if (pp.defines.get(macro_name.?) == null) {
if_context.set(.until_endif);
if (pp.verbose) {
pp.verboseLog(directive, "entering then branch of #elifndef", .{});
}
} else {
if_context.set(.until_else);
try pp.skip(&tokenizer, .until_else);
if (pp.verbose) {
pp.verboseLog(directive, "entering else branch of #elifndef", .{});
}
}
}
},
.until_endif => try pp.skip(&tokenizer, .until_endif),
.until_endif_seen_else => {
try pp.err(directive, .elifdef_after_else, .{});
skipToNl(&tokenizer);
},
}
},
.keyword_else => {
try pp.expectNl(&tokenizer);
if (if_context.level == 0) {
try pp.err(directive, .else_without_if, .{});
continue;
} else if (if_context.level == 1) {
guard_name = null;
}
switch (if_context.get()) {
.until_else => {
if_context.set(.until_endif_seen_else);
if (pp.verbose) {
pp.verboseLog(directive, "#else branch here", .{});
}
},
.until_endif => try pp.skip(&tokenizer, .until_endif_seen_else),
.until_endif_seen_else => {
try pp.err(directive, .else_after_else, .{});
skipToNl(&tokenizer);
},
}
},
.keyword_endif => {
try pp.expectNl(&tokenizer);
if (if_context.level == 0) {
guard_name = null;
try pp.err(directive, .endif_without_if, .{});
continue;
} else if (if_context.level == 1) {
const saved_tokenizer = tokenizer;
defer tokenizer = saved_tokenizer;
var next = tokenizer.nextNoWS();
while (next.id == .nl) : (next = tokenizer.nextNoWS()) {}
if (next.id != .eof) guard_name = null;
}
if_context.decrement();
},
.keyword_define => try pp.define(&tokenizer, directive),
.keyword_undef => {
const macro_name = (try pp.expectMacroName(&tokenizer)) orelse continue;
if (pp.store_macro_tokens) {
try pp.addToken(tokFromRaw(directive));
}
_ = pp.defines.orderedRemove(macro_name);
try pp.expectNl(&tokenizer);
},
.keyword_include => {
try pp.include(&tokenizer, .first);
continue;
},
.keyword_include_next => {
try pp.err(directive_loc, .include_next, .{});
if (pp.include_depth == 0) {
try pp.err(directive_loc, .include_next_outside_header, .{});
try pp.include(&tokenizer, .first);
} else {
try pp.include(&tokenizer, .next);
}
},
.keyword_embed => try pp.embed(&tokenizer),
.keyword_pragma => {
try pp.pragma(&tokenizer, directive, null, &.{});
continue;
},
.keyword_line => {
// #line number "file"
const digits = tokenizer.nextNoWS();
if (digits.id != .pp_num) try pp.err(digits, .line_simple_digit, .{});
// TODO: validate that the pp_num token is solely digits
if (digits.id == .eof or digits.id == .nl) continue;
const name = tokenizer.nextNoWS();
if (name.id == .eof or name.id == .nl) continue;
if (name.id != .string_literal) try pp.err(name, .line_invalid_filename, .{});
try pp.expectNl(&tokenizer);
},
.pp_num => {
// # number "file" flags
// TODO: validate that the pp_num token is solely digits
// if not, emit `GNU line marker directive requires a simple digit sequence`
const name = tokenizer.nextNoWS();
if (name.id == .eof or name.id == .nl) continue;
if (name.id != .string_literal) try pp.err(name, .line_invalid_filename, .{});
const flag_1 = tokenizer.nextNoWS();
if (flag_1.id == .eof or flag_1.id == .nl) continue;
const flag_2 = tokenizer.nextNoWS();
if (flag_2.id == .eof or flag_2.id == .nl) continue;
const flag_3 = tokenizer.nextNoWS();
if (flag_3.id == .eof or flag_3.id == .nl) continue;
const flag_4 = tokenizer.nextNoWS();
if (flag_4.id == .eof or flag_4.id == .nl) continue;
try pp.expectNl(&tokenizer);
},
.nl => {},
.eof => {
if (if_context.level != 0) try pp.err(tok, .unterminated_conditional_directive, .{});
return tokFromRaw(directive);
},
else => {
try pp.err(tok, .invalid_preprocessing_directive, .{});
skipToNl(&tokenizer);
},
}
if (pp.preserve_whitespace) {
tok.id = .nl;
try pp.addToken(tokFromRaw(tok));
}
},
.whitespace => if (pp.preserve_whitespace) try pp.addToken(tokFromRaw(tok)),
.nl => {
start_of_line = true;
if (pp.preserve_whitespace) try pp.addToken(tokFromRaw(tok));
},
.eof => {
if (if_context.level != 0) try pp.err(tok, .unterminated_conditional_directive, .{});
// The following check needs to occur here and not at the top of the function
// because a pragma may change the level during preprocessing
if (source.buf.len > 0 and source.buf[source.buf.len - 1] != '\n') {
try pp.err(tok, .newline_eof, .{});
}
if (guard_name) |name| {
if (try pp.include_guards.fetchPut(pp.gpa, source.id, name)) |prev| {
assert(mem.eql(u8, name, prev.value));
}
}
return tokFromRaw(tok);
},
.unterminated_string_literal, .unterminated_char_literal, .empty_char_literal => |tag| {
start_of_line = false;
try pp.err(tok, invalidTokenDiagnostic(tag), .{});
try pp.expandMacro(&tokenizer, tok);
},
.unterminated_comment => try pp.err(tok, .unterminated_comment, .{}),
else => {
if (tok.id.isMacroIdentifier() and pp.poisoned_identifiers.get(pp.tokSlice(tok)) != null) {
try pp.err(tok, .poisoned_identifier, .{});
}
// Add the token to the buffer doing any necessary expansions.
start_of_line = false;
try pp.expandMacro(&tokenizer, tok);
},
}
}
}
/// Get raw token source string.
/// Returned slice is invalidated when comp.generated_buf is updated.
pub fn tokSlice(pp: *const Preprocessor, token: anytype) []const u8 {
if (token.id.lexeme()) |some| return some;
const source = pp.comp.getSource(token.source);
return source.buf[token.start..token.end];
}
/// Convert a token from the Tokenizer into a token used by the parser.
fn tokFromRaw(raw: RawToken) TokenWithExpansionLocs {
return .{
.id = raw.id,
.loc = .{
.id = raw.source,
.byte_offset = raw.start,
.line = raw.line,
},
};
}
pub const Diagnostic = @import("Preprocessor/Diagnostic.zig");
fn err(pp: *Preprocessor, loc: anytype, diagnostic: Diagnostic, args: anytype) Compilation.Error!void {
if (pp.diagnostics.effectiveKind(diagnostic) == .off) return;
var sf = std.heap.stackFallback(1024, pp.gpa);
var allocating: std.Io.Writer.Allocating = .init(sf.get());
defer allocating.deinit();
Diagnostics.formatArgs(&allocating.writer, diagnostic.fmt, args) catch return error.OutOfMemory;
try pp.diagnostics.addWithLocation(pp.comp, .{
.kind = diagnostic.kind,
.text = allocating.getWritten(),
.opt = diagnostic.opt,
.extension = diagnostic.extension,
.location = switch (@TypeOf(loc)) {
RawToken => (Source.Location{
.id = loc.source,
.byte_offset = loc.start,
.line = loc.line,
}).expand(pp.comp),
TokenWithExpansionLocs, *TokenWithExpansionLocs => loc.loc.expand(pp.comp),
Source.Location => loc.expand(pp.comp),
else => @compileError("invalid token type " ++ @typeName(@TypeOf(loc))),
},
}, switch (@TypeOf(loc)) {
RawToken => &.{},
TokenWithExpansionLocs, *TokenWithExpansionLocs => loc.expansionSlice(),
Source.Location => &.{},
else => @compileError("invalid token type"),
}, true);
}
fn fatal(pp: *Preprocessor, raw: RawToken, comptime fmt: []const u8, args: anytype) Compilation.Error {
var sf = std.heap.stackFallback(1024, pp.gpa);
var allocating: std.Io.Writer.Allocating = .init(sf.get());
defer allocating.deinit();
Diagnostics.formatArgs(&allocating.writer, fmt, args) catch return error.OutOfMemory;
try pp.diagnostics.add(.{
.kind = .@"fatal error",
.text = allocating.getWritten(),
.location = (Source.Location{
.id = raw.source,
.byte_offset = raw.start,
.line = raw.line,
}).expand(pp.comp),
});
unreachable;
}
fn fatalNotFound(pp: *Preprocessor, tok: TokenWithExpansionLocs, filename: []const u8) Compilation.Error {
const old = pp.diagnostics.state.fatal_errors;
pp.diagnostics.state.fatal_errors = true;
defer pp.diagnostics.state.fatal_errors = old;
var sf = std.heap.stackFallback(1024, pp.gpa);
var buf = std.array_list.Managed(u8).init(sf.get());
defer buf.deinit();
try buf.print("'{s}' not found", .{filename});
try pp.diagnostics.addWithLocation(pp.comp, .{
.kind = .@"fatal error",
.text = buf.items,
.location = tok.loc.expand(pp.comp),
}, tok.expansionSlice(), true);
unreachable; // should've returned FatalError
}
fn verboseLog(pp: *Preprocessor, raw: RawToken, comptime fmt: []const u8, args: anytype) void {
@branchHint(.cold);
const source = pp.comp.getSource(raw.source);
const line_col = source.lineCol(.{ .id = raw.source, .line = raw.line, .byte_offset = raw.start });
var stderr_buffer: [64]u8 = undefined;
var writer = std.debug.lockStderrWriter(&stderr_buffer);
defer std.debug.unlockStderrWriter();
writer.print("{s}:{d}:{d}: ", .{ source.path, line_col.line_no, line_col.col }) catch return;
writer.print(fmt, args) catch return;
writer.writeByte('\n') catch return;
writer.writeAll(line_col.line) catch return;
writer.writeByte('\n') catch return;
}
/// Consume next token, error if it is not an identifier.
fn expectMacroName(pp: *Preprocessor, tokenizer: *Tokenizer) Error!?[]const u8 {
const macro_name = tokenizer.nextNoWS();
if (!macro_name.id.isMacroIdentifier()) {
try pp.err(macro_name, .macro_name_missing, .{});
skipToNl(tokenizer);
return null;
}
return pp.tokSlice(macro_name);
}
/// Skip until after a newline, error if extra tokens before it.
fn expectNl(pp: *Preprocessor, tokenizer: *Tokenizer) Error!void {
var sent_err = false;
while (true) {
const tok = tokenizer.next();
if (tok.id == .nl or tok.id == .eof) return;
if (tok.id == .whitespace or tok.id == .comment) continue;
if (!sent_err) {
sent_err = true;
try pp.err(tok, .extra_tokens_directive_end, .{});
}
}
}
fn getTokenState(pp: *const Preprocessor) TokenState {
return .{
.tokens_len = pp.tokens.len,
.expansion_entries_len = pp.expansion_entries.len,
};
}
fn restoreTokenState(pp: *Preprocessor, state: TokenState) void {
pp.tokens.len = state.tokens_len;
pp.expansion_entries.len = state.expansion_entries_len;
}
/// Consume all tokens until a newline and parse the result into a boolean.
fn expr(pp: *Preprocessor, tokenizer: *Tokenizer) MacroError!bool {
const token_state = pp.getTokenState();
defer {
for (pp.top_expansion_buf.items) |tok| TokenWithExpansionLocs.free(tok.expansion_locs, pp.gpa);
pp.restoreTokenState(token_state);
}
pp.top_expansion_buf.items.len = 0;
const eof = while (true) {
const tok = tokenizer.next();
switch (tok.id) {
.nl, .eof => break tok,
.whitespace => if (pp.top_expansion_buf.items.len == 0) continue,
else => {},
}
try pp.top_expansion_buf.append(tokFromRaw(tok));
} else unreachable;
if (pp.top_expansion_buf.items.len != 0) {
pp.expansion_source_loc = pp.top_expansion_buf.items[0].loc;
pp.hideset.clearRetainingCapacity();
try pp.expandMacroExhaustive(tokenizer, &pp.top_expansion_buf, 0, pp.top_expansion_buf.items.len, false, .expr);
}
for (pp.top_expansion_buf.items) |tok| {
if (tok.id == .macro_ws) continue;
if (!tok.id.validPreprocessorExprStart()) {
try pp.err(tok, .invalid_preproc_expr_start, .{});
return false;
}
break;
} else {
try pp.err(eof, .expected_value_in_expr, .{});
return false;
}
// validate the tokens in the expression
try pp.ensureUnusedTokenCapacity(pp.top_expansion_buf.items.len);
var i: usize = 0;
const items = pp.top_expansion_buf.items;
while (i < items.len) : (i += 1) {
var tok = items[i];
switch (tok.id) {
.string_literal,
.string_literal_utf_16,
.string_literal_utf_8,
.string_literal_utf_32,
.string_literal_wide,
=> {
try pp.err(tok, .string_literal_in_pp_expr, .{});
return false;
},
.plus_plus,
.minus_minus,
.plus_equal,
.minus_equal,
.asterisk_equal,
.slash_equal,
.percent_equal,
.angle_bracket_angle_bracket_left_equal,
.angle_bracket_angle_bracket_right_equal,
.ampersand_equal,
.caret_equal,
.pipe_equal,
.l_bracket,
.r_bracket,
.l_brace,
.r_brace,
.ellipsis,
.semicolon,
.hash,
.hash_hash,
.equal,
.arrow,
.period,
=> {
try pp.err(tok, .invalid_preproc_operator, .{});
return false;
},
.macro_ws, .whitespace => continue,
.keyword_false => tok.id = .zero,
.keyword_true => tok.id = .one,
else => if (tok.id.isMacroIdentifier()) {
if (tok.id == .keyword_defined) {
const tokens_consumed = try pp.handleKeywordDefined(&tok, items[i + 1 ..], eof);
i += tokens_consumed;
} else {
try pp.err(tok, .undefined_macro, .{pp.expandedSlice(tok)});
if (i + 1 < pp.top_expansion_buf.items.len and
pp.top_expansion_buf.items[i + 1].id == .l_paren)
{
try pp.err(tok, .fn_macro_undefined, .{pp.expandedSlice(tok)});
return false;
}
tok.id = .zero; // undefined macro
}
},
}
pp.addTokenAssumeCapacity(try pp.unescapeUcn(tok));
}
try pp.addToken(.{
.id = .eof,
.loc = tokFromRaw(eof).loc,
});
// Actually parse it.
var parser: Parser = .{
.pp = pp,
.comp = pp.comp,
.diagnostics = pp.diagnostics,
.gpa = pp.gpa,
.tok_ids = pp.tokens.items(.id),
.tok_i = @intCast(token_state.tokens_len),
.in_macro = true,
.strings = std.array_list.Managed(u8).init(pp.comp.gpa),
.tree = undefined,
.labels = undefined,
.decl_buf = undefined,
.list_buf = undefined,
.param_buf = undefined,
.enum_buf = undefined,
.record_buf = undefined,
.attr_buf = undefined,
.string_ids = undefined,
};
defer parser.strings.deinit();
return parser.macroExpr();
}
/// Turns macro_tok from .keyword_defined into .zero or .one depending on whether the argument is defined
/// Returns the number of tokens consumed
fn handleKeywordDefined(pp: *Preprocessor, macro_tok: *TokenWithExpansionLocs, tokens: []const TokenWithExpansionLocs, eof: RawToken) !usize {
std.debug.assert(macro_tok.id == .keyword_defined);
var it = TokenIterator.init(tokens);
const first = it.nextNoWS() orelse {
try pp.err(eof, .macro_name_missing, .{});
return it.i;
};
switch (first.id) {
.l_paren => {},
else => {
if (!first.id.isMacroIdentifier()) {
try pp.err(first, .macro_name_must_be_identifier, .{});
}
macro_tok.id = if (pp.defines.contains(pp.expandedSlice(first))) .one else .zero;
return it.i;
},
}
const second = it.nextNoWS() orelse {
try pp.err(eof, .macro_name_missing, .{});
return it.i;
};
if (!second.id.isMacroIdentifier()) {
try pp.err(second, .macro_name_must_be_identifier, .{});
return it.i;
}
macro_tok.id = if (pp.defines.contains(pp.expandedSlice(second))) .one else .zero;
const last = it.nextNoWS();
if (last == null or last.?.id != .r_paren) {
const tok = last orelse tokFromRaw(eof);
try pp.err(tok, .closing_paren, .{});
try pp.err(first, .to_match_paren, .{});
}
return it.i;
}
/// Skip until #else #elif #endif, return last directive token id.
/// Also skips nested #if ... #endifs.
fn skip(
pp: *Preprocessor,
tokenizer: *Tokenizer,
cont: enum { until_else, until_endif, until_endif_seen_else },
) Error!void {
var ifs_seen: u32 = 0;
var line_start = true;
while (tokenizer.index < tokenizer.buf.len) {
if (line_start) {
const saved_tokenizer = tokenizer.*;
const hash = tokenizer.nextNoWS();
if (hash.id == .nl) continue;
line_start = false;
if (hash.id != .hash) continue;
const directive = tokenizer.nextNoWS();
switch (directive.id) {
.keyword_else => {
if (ifs_seen != 0) continue;
if (cont == .until_endif_seen_else) {
try pp.err(directive, .else_after_else, .{});
continue;
}
tokenizer.* = saved_tokenizer;
return;
},
.keyword_elif => {
if (ifs_seen != 0 or cont == .until_endif) continue;
if (cont == .until_endif_seen_else) {
try pp.err(directive, .elif_after_else, .{});
continue;
}
tokenizer.* = saved_tokenizer;
return;
},
.keyword_elifdef => {
if (ifs_seen != 0 or cont == .until_endif) continue;
if (cont == .until_endif_seen_else) {
try pp.err(directive, .elifdef_after_else, .{});
continue;
}
tokenizer.* = saved_tokenizer;
return;
},
.keyword_elifndef => {
if (ifs_seen != 0 or cont == .until_endif) continue;
if (cont == .until_endif_seen_else) {
try pp.err(directive, .elifndef_after_else, .{});
continue;
}
tokenizer.* = saved_tokenizer;
return;
},
.keyword_endif => {
if (ifs_seen == 0) {
tokenizer.* = saved_tokenizer;
return;
}
ifs_seen -= 1;
},
.keyword_if, .keyword_ifdef, .keyword_ifndef => ifs_seen += 1,
else => {},
}
} else if (tokenizer.buf[tokenizer.index] == '\n') {
line_start = true;
tokenizer.index += 1;
tokenizer.line += 1;
if (pp.preserve_whitespace) {
try pp.addToken(.{ .id = .nl, .loc = .{
.id = tokenizer.source,
.line = tokenizer.line,
} });
}
} else {
line_start = false;
tokenizer.index += 1;
}
} else {
const eof = tokenizer.next();
return pp.err(eof, .unterminated_conditional_directive, .{});
}
}
// Skip until newline, ignore other tokens.
fn skipToNl(tokenizer: *Tokenizer) void {
while (true) {
const tok = tokenizer.next();
if (tok.id == .nl or tok.id == .eof) return;
}
}
const ExpandBuf = std.array_list.Managed(TokenWithExpansionLocs);
fn removePlacemarkers(buf: *ExpandBuf) void {
var i: usize = buf.items.len -% 1;
while (i < buf.items.len) : (i -%= 1) {
if (buf.items[i].id == .placemarker) {
const placemarker = buf.orderedRemove(i);
TokenWithExpansionLocs.free(placemarker.expansion_locs, buf.allocator);
}
}
}
const MacroArguments = std.array_list.Managed([]const TokenWithExpansionLocs);
fn deinitMacroArguments(allocator: Allocator, args: *const MacroArguments) void {
for (args.items) |item| {
for (item) |tok| TokenWithExpansionLocs.free(tok.expansion_locs, allocator);
allocator.free(item);
}
args.deinit();
}
fn expandObjMacro(pp: *Preprocessor, simple_macro: *const Macro) Error!ExpandBuf {
var buf = ExpandBuf.init(pp.gpa);
errdefer buf.deinit();
if (simple_macro.tokens.len == 0) {
try buf.append(.{ .id = .placemarker, .loc = .{ .id = .generated } });
return buf;
}
try buf.ensureTotalCapacity(simple_macro.tokens.len);
// Add all of the simple_macros tokens to the new buffer handling any concats.
var i: usize = 0;
while (i < simple_macro.tokens.len) : (i += 1) {
const raw = simple_macro.tokens[i];
const tok = tokFromRaw(raw);
switch (raw.id) {
.hash_hash => {
var rhs = tokFromRaw(simple_macro.tokens[i + 1]);
i += 1;
while (true) {
if (rhs.id == .whitespace) {
rhs = tokFromRaw(simple_macro.tokens[i + 1]);
i += 1;
} else if (rhs.id == .comment and !pp.comp.langopts.preserve_comments_in_macros) {
rhs = tokFromRaw(simple_macro.tokens[i + 1]);
i += 1;
} else break;
}
try pp.pasteTokens(&buf, &.{rhs});
},
.whitespace => if (pp.preserve_whitespace) buf.appendAssumeCapacity(tok),
.macro_file => {
const start = pp.comp.generated_buf.items.len;
const source = pp.comp.getSource(pp.expansion_source_loc.id);
try pp.comp.generated_buf.print(pp.gpa, "\"{f}\"\n", .{fmtEscapes(source.path)});
buf.appendAssumeCapacity(try pp.makeGeneratedToken(start, .string_literal, tok));
},
.macro_line => {
const start = pp.comp.generated_buf.items.len;
const source = pp.comp.getSource(pp.expansion_source_loc.id);
try pp.comp.generated_buf.print(pp.gpa, "{d}\n", .{source.physicalLine(pp.expansion_source_loc)});
buf.appendAssumeCapacity(try pp.makeGeneratedToken(start, .pp_num, tok));
},
.macro_counter => {
defer pp.counter += 1;
const start = pp.comp.generated_buf.items.len;
try pp.comp.generated_buf.print(pp.gpa, "{d}\n", .{pp.counter});
buf.appendAssumeCapacity(try pp.makeGeneratedToken(start, .pp_num, tok));
},
.macro_date, .macro_time => {
const start = pp.comp.generated_buf.items.len;
const timestamp = switch (pp.source_epoch) {
.system, .provided => |ts| ts,
};
try pp.writeDateTimeStamp(.fromTokId(raw.id), timestamp);
buf.appendAssumeCapacity(try pp.makeGeneratedToken(start, .string_literal, tok));
},
.macro_timestamp => {
const start = pp.comp.generated_buf.items.len;
const timestamp = switch (pp.source_epoch) {
.provided => |ts| ts,
.system => try pp.mTime(pp.expansion_source_loc.id),
};
try pp.writeDateTimeStamp(.fromTokId(raw.id), timestamp);
buf.appendAssumeCapacity(try pp.makeGeneratedToken(start, .string_literal, tok));
},
else => buf.appendAssumeCapacity(tok),
}
}
return buf;
}
const DateTimeStampKind = enum {
date,
time,
timestamp,
fn fromTokId(tok_id: RawToken.Id) DateTimeStampKind {
return switch (tok_id) {
.macro_date => .date,
.macro_time => .time,
.macro_timestamp => .timestamp,
else => unreachable,
};
}
};
fn writeDateTimeStamp(pp: *Preprocessor, kind: DateTimeStampKind, timestamp: u64) !void {
std.debug.assert(std.time.epoch.Month.jan.numeric() == 1);
const epoch_seconds = std.time.epoch.EpochSeconds{ .secs = timestamp };
const epoch_day = epoch_seconds.getEpochDay();
const day_seconds = epoch_seconds.getDaySeconds();
const year_day = epoch_day.calculateYearDay();
const month_day = year_day.calculateMonthDay();
const day_names = [_][]const u8{ "Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun" };
const month_names = [_][]const u8{ "Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec" };
const day_name = day_names[@intCast((epoch_day.day + 3) % 7)];
const month_name = month_names[month_day.month.numeric() - 1];
switch (kind) {
.date => {
try pp.comp.generated_buf.print(pp.gpa, "\"{s} {d: >2} {d}\"", .{
month_name,
month_day.day_index + 1,
year_day.year,
});
},
.time => {
try pp.comp.generated_buf.print(pp.gpa, "\"{d:0>2}:{d:0>2}:{d:0>2}\"", .{
day_seconds.getHoursIntoDay(),
day_seconds.getMinutesIntoHour(),
day_seconds.getSecondsIntoMinute(),
});
},
.timestamp => {
try pp.comp.generated_buf.print(pp.gpa, "\"{s} {s} {d: >2} {d:0>2}:{d:0>2}:{d:0>2} {d}\"", .{
day_name,
month_name,
month_day.day_index + 1,
day_seconds.getHoursIntoDay(),
day_seconds.getMinutesIntoHour(),
day_seconds.getSecondsIntoMinute(),
year_day.year,
});
},
}
}
/// Join a possibly-parenthesized series of string literal tokens into a single string without
/// leading or trailing quotes. The returned slice is invalidated if pp.char_buf changes.
/// Returns error.ExpectedStringLiteral if parentheses are not balanced, a non-string-literal
/// is encountered, or if no string literals are encountered
/// TODO: destringize (replace all '\\' with a single `\` and all '\"' with a '"')
fn pasteStringsUnsafe(pp: *Preprocessor, toks: []const TokenWithExpansionLocs) ![]const u8 {
const char_top = pp.char_buf.items.len;
defer pp.char_buf.items.len = char_top;
var unwrapped = toks;
if (toks.len >= 2 and toks[0].id == .l_paren and toks[toks.len - 1].id == .r_paren) {
unwrapped = toks[1 .. toks.len - 1];
}
if (unwrapped.len == 0) return error.ExpectedStringLiteral;
for (unwrapped) |tok| {
if (tok.id == .macro_ws) continue;
if (tok.id != .string_literal) return error.ExpectedStringLiteral;
const str = pp.expandedSlice(tok);
try pp.char_buf.appendSlice(str[1 .. str.len - 1]);
}
return pp.char_buf.items[char_top..];
}
/// Handle the _Pragma operator (implemented as a builtin macro)
fn pragmaOperator(pp: *Preprocessor, arg_tok: TokenWithExpansionLocs, operator_loc: Source.Location) !void {
const arg_slice = pp.expandedSlice(arg_tok);
const content = arg_slice[1 .. arg_slice.len - 1];
const directive = "#pragma ";
pp.char_buf.clearRetainingCapacity();
const total_len = directive.len + content.len + 1; // destringify can never grow the string, + 1 for newline
try pp.char_buf.ensureUnusedCapacity(total_len);
pp.char_buf.appendSliceAssumeCapacity(directive);
pp.destringify(content);
pp.char_buf.appendAssumeCapacity('\n');
const start = pp.comp.generated_buf.items.len;
try pp.comp.generated_buf.appendSlice(pp.gpa, pp.char_buf.items);
var tmp_tokenizer = Tokenizer{
.buf = pp.comp.generated_buf.items,
.langopts = pp.comp.langopts,
.index = @intCast(start),
.source = .generated,
.line = pp.generated_line,
};
pp.generated_line += 1;
const hash_tok = tmp_tokenizer.next();
assert(hash_tok.id == .hash);
const pragma_tok = tmp_tokenizer.next();
assert(pragma_tok.id == .keyword_pragma);
try pp.pragma(&tmp_tokenizer, pragma_tok, operator_loc, arg_tok.expansionSlice());
}
/// Handle Microsoft __pragma operator
fn msPragmaOperator(pp: *Preprocessor, pragma_tok: TokenWithExpansionLocs, args: []const TokenWithExpansionLocs) !void {
if (args.len == 0) {
try pp.err(pragma_tok, .unknown_pragma, .{});
return;
}
{
var copy = try pragma_tok.dupe(pp.gpa);
copy.id = .keyword_pragma;
try pp.addToken(copy);
}
const pragma_start: u32 = @intCast(pp.tokens.len);
for (args) |tok| {
switch (tok.id) {
.macro_ws, .comment => continue,
else => try pp.addToken(try tok.dupe(pp.gpa)),
}
}
try pp.addToken(.{ .id = .nl, .loc = .{ .id = .generated } });
const name = pp.expandedSlice(pp.tokens.get(pragma_start));
if (pp.comp.getPragma(name)) |prag| unknown: {
return prag.preprocessorCB(pp, pragma_start) catch |er| switch (er) {
error.UnknownPragma => break :unknown,
else => |e| return e,
};
}
try pp.err(args[0], .unknown_pragma, .{});
}
/// Inverts the output of the preprocessor stringify (#) operation
/// (except all whitespace is condensed to a single space)
/// writes output to pp.char_buf; assumes capacity is sufficient
/// backslash backslash -> backslash
/// backslash doublequote -> doublequote
/// All other characters remain the same
fn destringify(pp: *Preprocessor, str: []const u8) void {
var state: enum { start, backslash_seen } = .start;
for (str) |c| {
switch (c) {
'\\' => {
if (state == .backslash_seen) pp.char_buf.appendAssumeCapacity(c);
state = if (state == .start) .backslash_seen else .start;
},
else => {
if (state == .backslash_seen and c != '"') pp.char_buf.appendAssumeCapacity('\\');
pp.char_buf.appendAssumeCapacity(c);
state = .start;
},
}
}
}
/// Stringify `tokens` into pp.char_buf.
/// See https://gcc.gnu.org/onlinedocs/gcc-11.2.0/cpp/Stringizing.html#Stringizing
fn stringify(pp: *Preprocessor, tokens: []const TokenWithExpansionLocs) !void {
try pp.char_buf.append('"');
var ws_state: enum { start, need, not_needed } = .start;
for (tokens) |tok| {
if (tok.id == .macro_ws) {
if (ws_state == .start) continue;
ws_state = .need;
continue;
}
if (ws_state == .need) try pp.char_buf.append(' ');
ws_state = .not_needed;
// backslashes not inside strings are not escaped
const is_str = switch (tok.id) {
.string_literal,
.string_literal_utf_16,
.string_literal_utf_8,
.string_literal_utf_32,
.string_literal_wide,
.char_literal,
.char_literal_utf_16,
.char_literal_utf_32,
.char_literal_wide,
=> true,
else => false,
};
for (pp.expandedSlice(tok)) |c| {
if (c == '"')
try pp.char_buf.appendSlice("\\\"")
else if (c == '\\' and is_str)
try pp.char_buf.appendSlice("\\\\")
else
try pp.char_buf.append(c);
}
}
try pp.char_buf.ensureUnusedCapacity(2);
if (pp.char_buf.items[pp.char_buf.items.len - 1] != '\\') {
pp.char_buf.appendSliceAssumeCapacity("\"\n");
return;
}
pp.char_buf.appendAssumeCapacity('"');
var tokenizer: Tokenizer = .{
.buf = pp.char_buf.items,
.index = 0,
.source = .generated,
.langopts = pp.comp.langopts,
.line = 0,
};
const item = tokenizer.next();
if (item.id == .unterminated_string_literal) {
const tok = tokens[tokens.len - 1];
try pp.err(tok, .invalid_pp_stringify_escape, .{});
pp.char_buf.items.len -= 2; // erase unpaired backslash and appended end quote
pp.char_buf.appendAssumeCapacity('"');
}
pp.char_buf.appendAssumeCapacity('\n');
}
fn reconstructIncludeString(pp: *Preprocessor, param_toks: []const TokenWithExpansionLocs, embed_args: ?*[]const TokenWithExpansionLocs, first: TokenWithExpansionLocs) !?[]const u8 {
if (param_toks.len == 0) {
try pp.err(first, .expected_filename, .{});
return null;
}
const char_top = pp.char_buf.items.len;
defer pp.char_buf.items.len = char_top;
// Trim leading/trailing whitespace
var begin: usize = 0;
var end: usize = param_toks.len;
while (begin < end and param_toks[begin].id == .macro_ws) : (begin += 1) {}
while (end > begin and param_toks[end - 1].id == .macro_ws) : (end -= 1) {}
const params = param_toks[begin..end];
if (params.len == 0) {
try pp.err(first, .expected_filename, .{});
return null;
}
// no string pasting
if (embed_args == null and params[0].id == .string_literal and params.len > 1) {
try pp.err(params[1], .closing_paren, .{});
return null;
}
for (params, 0..) |tok, i| {
const str = pp.expandedSliceExtra(tok, .preserve_macro_ws);
try pp.char_buf.appendSlice(str);
if (embed_args) |some| {
if ((i == 0 and tok.id == .string_literal) or tok.id == .angle_bracket_right) {
some.* = params[i + 1 ..];
break;
}
}
}
const include_str = pp.char_buf.items[char_top..];
if (include_str.len < 3) {
if (include_str.len == 0) {
try pp.err(first, .expected_filename, .{});
return null;
}
try pp.err(params[0], .empty_filename, .{});
return null;
}
switch (include_str[0]) {
'<' => {
if (include_str[include_str.len - 1] != '>') {
// Ugly hack to find out where the '>' should go, since we don't have the closing ')' location
var closing = params[0];
closing.loc.byte_offset += @as(u32, @intCast(include_str.len)) + 1;
try pp.err(closing, .header_str_closing, .{});
try pp.err(params[0], .header_str_match, .{});
return null;
}
return include_str;
},
'"' => return include_str,
else => {
try pp.err(params[0], .expected_filename, .{});
return null;
},
}
}
fn handleBuiltinMacro(pp: *Preprocessor, builtin: RawToken.Id, param_toks: []const TokenWithExpansionLocs, src_loc: Source.Location) Error!bool {
switch (builtin) {
.macro_param_has_attribute,
.macro_param_has_declspec_attribute,
.macro_param_has_feature,
.macro_param_has_extension,
.macro_param_has_builtin,
=> {
var invalid: ?TokenWithExpansionLocs = null;
var identifier: ?TokenWithExpansionLocs = null;
for (param_toks) |tok| {
if (tok.id == .macro_ws) continue;
if (tok.id == .comment) continue;
if (!tok.id.isMacroIdentifier()) {
invalid = tok;
break;
}
if (identifier) |_| invalid = tok else identifier = tok;
}
if (identifier == null and invalid == null) invalid = .{ .id = .eof, .loc = src_loc };
if (invalid) |some| {
try pp.err(some, .feature_check_requires_identifier, .{});
return false;
}
const ident_str = pp.expandedSlice(identifier.?);
return switch (builtin) {
.macro_param_has_attribute => Attribute.fromString(.gnu, null, ident_str) != null,
.macro_param_has_declspec_attribute => {
return if (pp.comp.langopts.declspec_attrs)
Attribute.fromString(.declspec, null, ident_str) != null
else
false;
},
.macro_param_has_feature => features.hasFeature(pp.comp, ident_str),
// If -pedantic-errors is given __has_extension is equivalent to __has_feature
.macro_param_has_extension => if (pp.comp.diagnostics.state.extensions == .@"error")
features.hasFeature(pp.comp, ident_str)
else
features.hasExtension(pp.comp, ident_str),
.macro_param_has_builtin => pp.comp.hasBuiltin(ident_str),
else => unreachable,
};
},
.macro_param_has_warning => {
const actual_param = pp.pasteStringsUnsafe(param_toks) catch |er| switch (er) {
error.ExpectedStringLiteral => {
try pp.err(param_toks[0], .expected_str_literal_in, .{"__has_warning"});
return false;
},
else => |e| return e,
};
if (!mem.startsWith(u8, actual_param, "-W")) {
try pp.err(param_toks[0], .malformed_warning_check, .{"__has_warning"});
return false;
}
const warning_name = actual_param[2..];
return Diagnostics.warningExists(warning_name);
},
.macro_param_is_identifier => {
var invalid: ?TokenWithExpansionLocs = null;
var identifier: ?TokenWithExpansionLocs = null;
for (param_toks) |tok| switch (tok.id) {
.macro_ws => continue,
.comment => continue,
else => {
if (identifier) |_| invalid = tok else identifier = tok;
},
};
if (identifier == null and invalid == null) invalid = .{ .id = .eof, .loc = src_loc };
if (invalid) |some| {
try pp.err(some, .builtin_missing_r_paren, .{"builtin feature-check macro"});
return false;
}
const id = identifier.?.id;
return id == .identifier or id == .extended_identifier;
},
.macro_param_has_include, .macro_param_has_include_next => {
const include_str = (try pp.reconstructIncludeString(param_toks, null, param_toks[0])) orelse return false;
const include_type: Compilation.IncludeType = switch (include_str[0]) {
'"' => .quotes,
'<' => .angle_brackets,
else => unreachable,
};
const filename = include_str[1 .. include_str.len - 1];
const res = res: {
if (builtin == .macro_param_has_include or pp.include_depth == 0) {
if (builtin == .macro_param_has_include_next) {
try pp.err(src_loc, .include_next_outside_header, .{});
}
break :res try pp.comp.hasInclude(filename, src_loc.id, include_type, .first);
}
break :res try pp.comp.hasInclude(filename, src_loc.id, include_type, .next);
};
if (res) if (pp.dep_file) |dep_file| try dep_file.addDependencyDupe(pp.gpa, pp.comp.arena, filename);
return res;
},
else => unreachable,
}
}
/// Treat whitespace-only paste arguments as empty
fn getPasteArgs(args: []const TokenWithExpansionLocs) []const TokenWithExpansionLocs {
for (args) |tok| {
if (tok.id != .macro_ws) return args;
}
return &[1]TokenWithExpansionLocs{.{
.id = .placemarker,
.loc = .{ .id = .generated, .byte_offset = 0, .line = 0 },
}};
}
fn expandFuncMacro(
pp: *Preprocessor,
macro_tok: TokenWithExpansionLocs,
func_macro: *const Macro,
args: *const MacroArguments,
expanded_args: *const MacroArguments,
hideset_arg: Hideset.Index,
) MacroError!ExpandBuf {
var hideset = hideset_arg;
var buf = ExpandBuf.init(pp.gpa);
try buf.ensureTotalCapacity(func_macro.tokens.len);
errdefer buf.deinit();
var expanded_variable_arguments = ExpandBuf.init(pp.gpa);
defer expanded_variable_arguments.deinit();
var variable_arguments = ExpandBuf.init(pp.gpa);
defer variable_arguments.deinit();
if (func_macro.var_args) {
var i: usize = func_macro.params.len;
while (i < expanded_args.items.len) : (i += 1) {
try variable_arguments.appendSlice(args.items[i]);
try expanded_variable_arguments.appendSlice(expanded_args.items[i]);
if (i != expanded_args.items.len - 1) {
const comma = TokenWithExpansionLocs{ .id = .comma, .loc = .{ .id = .generated } };
try variable_arguments.append(comma);
try expanded_variable_arguments.append(comma);
}
}
}
// token concatenation and expansion phase
var tok_i: usize = 0;
while (tok_i < func_macro.tokens.len) : (tok_i += 1) {
const raw = func_macro.tokens[tok_i];
switch (raw.id) {
.hash_hash => while (tok_i + 1 < func_macro.tokens.len) {
const raw_next = func_macro.tokens[tok_i + 1];
tok_i += 1;
var va_opt_buf = ExpandBuf.init(pp.gpa);
defer va_opt_buf.deinit();
const next = switch (raw_next.id) {
.macro_ws => continue,
.hash_hash => continue,
.comment => if (!pp.comp.langopts.preserve_comments_in_macros)
continue
else
&[1]TokenWithExpansionLocs{tokFromRaw(raw_next)},
.macro_param, .macro_param_no_expand => getPasteArgs(args.items[raw_next.end]),
.keyword_va_args => variable_arguments.items,
.keyword_va_opt => blk: {
try pp.expandVaOpt(&va_opt_buf, raw_next, variable_arguments.items.len != 0);
if (va_opt_buf.items.len == 0) break;
break :blk va_opt_buf.items;
},
else => &[1]TokenWithExpansionLocs{tokFromRaw(raw_next)},
};
try pp.pasteTokens(&buf, next);
if (next.len != 0) break;
},
.macro_param_no_expand => {
if (tok_i + 1 < func_macro.tokens.len and func_macro.tokens[tok_i + 1].id == .hash_hash) {
hideset = pp.hideset.get(tokFromRaw(func_macro.tokens[tok_i + 1]).loc);
}
const slice = getPasteArgs(args.items[raw.end]);
const raw_loc = Source.Location{ .id = raw.source, .byte_offset = raw.start, .line = raw.line };
try bufCopyTokens(&buf, slice, &.{raw_loc});
},
.macro_param => {
if (tok_i + 1 < func_macro.tokens.len and func_macro.tokens[tok_i + 1].id == .hash_hash) {
hideset = pp.hideset.get(tokFromRaw(func_macro.tokens[tok_i + 1]).loc);
}
const arg = expanded_args.items[raw.end];
const raw_loc = Source.Location{ .id = raw.source, .byte_offset = raw.start, .line = raw.line };
try bufCopyTokens(&buf, arg, &.{raw_loc});
},
.keyword_va_args => {
const raw_loc = Source.Location{ .id = raw.source, .byte_offset = raw.start, .line = raw.line };
try bufCopyTokens(&buf, expanded_variable_arguments.items, &.{raw_loc});
},
.keyword_va_opt => {
try pp.expandVaOpt(&buf, raw, variable_arguments.items.len != 0);
},
.stringify_param, .stringify_va_args => {
const arg = if (raw.id == .stringify_va_args)
variable_arguments.items
else
args.items[raw.end];
pp.char_buf.clearRetainingCapacity();
try pp.stringify(arg);
const start = pp.comp.generated_buf.items.len;
try pp.comp.generated_buf.appendSlice(pp.gpa, pp.char_buf.items);
try buf.append(try pp.makeGeneratedToken(start, .string_literal, tokFromRaw(raw)));
},
.macro_param_has_attribute,
.macro_param_has_declspec_attribute,
.macro_param_has_warning,
.macro_param_has_feature,
.macro_param_has_extension,
.macro_param_has_builtin,
.macro_param_has_include,
.macro_param_has_include_next,
.macro_param_is_identifier,
=> {
const arg = expanded_args.items[0];
const result = if (arg.len == 0) blk: {
try pp.err(macro_tok, .expected_arguments, .{ 1, 0 });
break :blk false;
} else try pp.handleBuiltinMacro(raw.id, arg, macro_tok.loc);
const start = pp.comp.generated_buf.items.len;
try pp.comp.generated_buf.print(pp.gpa, "{}\n", .{@intFromBool(result)});
try buf.append(try pp.makeGeneratedToken(start, .pp_num, tokFromRaw(raw)));
},
.macro_param_has_c_attribute => {
const arg = expanded_args.items[0];
const not_found = "0\n";
const result = if (arg.len == 0) blk: {
try pp.err(macro_tok, .expected_arguments, .{ 1, 0 });
break :blk not_found;
} else res: {
var invalid: ?TokenWithExpansionLocs = null;
var vendor_ident: ?TokenWithExpansionLocs = null;
var colon_colon: ?TokenWithExpansionLocs = null;
var attr_ident: ?TokenWithExpansionLocs = null;
for (arg) |tok| {
if (tok.id == .macro_ws) continue;
if (tok.id == .comment) continue;
if (tok.id == .colon_colon) {
if (colon_colon != null or attr_ident == null) {
invalid = tok;
break;
}
vendor_ident = attr_ident;
attr_ident = null;
colon_colon = tok;
continue;
}
if (!tok.id.isMacroIdentifier()) {
invalid = tok;
break;
}
if (attr_ident) |_| {
invalid = tok;
break;
} else attr_ident = tok;
}
if (vendor_ident != null and attr_ident == null) {
invalid = vendor_ident;
} else if (attr_ident == null and invalid == null) {
invalid = .{ .id = .eof, .loc = macro_tok.loc };
}
if (invalid) |some| {
try pp.err(some, .feature_check_requires_identifier, .{});
break :res not_found;
}
if (vendor_ident) |some| {
const vendor_str = pp.expandedSlice(some);
const attr_str = pp.expandedSlice(attr_ident.?);
const exists = Attribute.fromString(.gnu, vendor_str, attr_str) != null;
const start = pp.comp.generated_buf.items.len;
try pp.comp.generated_buf.appendSlice(pp.gpa, if (exists) "1\n" else "0\n");
try buf.append(try pp.makeGeneratedToken(start, .pp_num, tokFromRaw(raw)));
continue;
}
if (!pp.comp.langopts.standard.atLeast(.c23)) break :res not_found;
const attrs = std.StaticStringMap([]const u8).initComptime(.{
.{ "deprecated", "201904L\n" },
.{ "fallthrough", "201904L\n" },
.{ "maybe_unused", "201904L\n" },
.{ "nodiscard", "202003L\n" },
.{ "noreturn", "202202L\n" },
.{ "_Noreturn", "202202L\n" },
.{ "unsequenced", "202207L\n" },
.{ "reproducible", "202207L\n" },
});
const attr_str = Attribute.normalize(pp.expandedSlice(attr_ident.?));
break :res attrs.get(attr_str) orelse not_found;
};
const start = pp.comp.generated_buf.items.len;
try pp.comp.generated_buf.appendSlice(pp.gpa, result);
try buf.append(try pp.makeGeneratedToken(start, .pp_num, tokFromRaw(raw)));
},
.macro_param_has_embed => {
const arg = expanded_args.items[0];
const not_found = "0\n";
const result = if (arg.len == 0) blk: {
try pp.err(macro_tok, .expected_arguments, .{ 1, 0 });
break :blk not_found;
} else res: {
var embed_args: []const TokenWithExpansionLocs = &.{};
const include_str = (try pp.reconstructIncludeString(arg, &embed_args, arg[0])) orelse
break :res not_found;
var prev = tokFromRaw(raw);
prev.id = .eof;
var it: struct {
i: u32 = 0,
slice: []const TokenWithExpansionLocs,
prev: TokenWithExpansionLocs,
fn next(it: *@This()) TokenWithExpansionLocs {
while (it.i < it.slice.len) switch (it.slice[it.i].id) {
.macro_ws, .whitespace => it.i += 1,
else => break,
} else return it.prev;
defer it.i += 1;
it.prev = it.slice[it.i];
it.prev.id = .eof;
return it.slice[it.i];
}
} = .{ .slice = embed_args, .prev = prev };
while (true) {
const param_first = it.next();
if (param_first.id == .eof) break;
if (param_first.id != .identifier) {
try pp.err(param_first, .malformed_embed_param, .{});
continue;
}
const char_top = pp.char_buf.items.len;
defer pp.char_buf.items.len = char_top;
const maybe_colon = it.next();
const param = switch (maybe_colon.id) {
.colon_colon => blk: {
// vendor::param
const param = it.next();
if (param.id != .identifier) {
try pp.err(param, .malformed_embed_param, .{});
continue;
}
const l_paren = it.next();
if (l_paren.id != .l_paren) {
try pp.err(l_paren, .malformed_embed_param, .{});
continue;
}
break :blk "doesn't exist";
},
.l_paren => Attribute.normalize(pp.expandedSlice(param_first)),
else => {
try pp.err(maybe_colon, .malformed_embed_param, .{});
continue;
},
};
var arg_count: u32 = 0;
var first_arg: TokenWithExpansionLocs = undefined;
while (true) {
const next = it.next();
if (next.id == .eof) {
try pp.err(param_first, .malformed_embed_limit, .{});
break;
}
if (next.id == .r_paren) break;
arg_count += 1;
if (arg_count == 1) first_arg = next;
}
if (std.mem.eql(u8, param, "limit")) {
if (arg_count != 1) {
try pp.err(param_first, .malformed_embed_limit, .{});
continue;
}
if (first_arg.id != .pp_num) {
try pp.err(param_first, .malformed_embed_limit, .{});
continue;
}
_ = std.fmt.parseInt(u32, pp.expandedSlice(first_arg), 10) catch {
break :res not_found;
};
} else if (!std.mem.eql(u8, param, "prefix") and !std.mem.eql(u8, param, "suffix") and
!std.mem.eql(u8, param, "if_empty"))
{
break :res not_found;
}
}
const include_type: Compilation.IncludeType = switch (include_str[0]) {
'"' => .quotes,
'<' => .angle_brackets,
else => unreachable,
};
const filename = include_str[1 .. include_str.len - 1];
const contents = (try pp.comp.findEmbed(filename, arg[0].loc.id, include_type, .limited(1), pp.dep_file)) orelse
break :res not_found;
defer pp.comp.gpa.free(contents);
break :res if (contents.len != 0) "1\n" else "2\n";
};
const start = pp.comp.generated_buf.items.len;
try pp.comp.generated_buf.appendSlice(pp.comp.gpa, result);
try buf.append(try pp.makeGeneratedToken(start, .pp_num, tokFromRaw(raw)));
},
.macro_param_pragma_operator => {
// Clang and GCC require exactly one token (so, no parentheses or string pasting)
// even though their error messages indicate otherwise. Ours is slightly more
// descriptive.
var invalid: ?TokenWithExpansionLocs = null;
var string: ?TokenWithExpansionLocs = null;
for (expanded_args.items[0]) |tok| {
switch (tok.id) {
.string_literal => {
if (string) |_| {
invalid = tok;
break;
}
string = tok;
},
.macro_ws => continue,
.comment => continue,
else => {
invalid = tok;
break;
},
}
}
if (string == null and invalid == null) invalid = macro_tok;
if (invalid) |some|
try pp.err(some, .pragma_operator_string_literal, .{})
else
try pp.pragmaOperator(string.?, macro_tok.loc);
},
.macro_param_ms_identifier => blk: {
// Expect '__identifier' '(' macro-identifier ')'
var ident: ?TokenWithExpansionLocs = null;
for (expanded_args.items[0]) |tok| {
switch (tok.id) {
.macro_ws => continue,
.comment => continue,
else => {},
}
if (ident) |_| {
try pp.err(tok, .builtin_missing_r_paren, .{"identifier"});
break :blk;
} else if (tok.id.isMacroIdentifier()) {
ident = tok;
} else {
try pp.err(tok, .cannot_convert_to_identifier, .{tok.id.symbol()});
break :blk;
}
}
if (ident) |*some| {
some.id = .identifier;
try buf.append(some.*);
} else {
try pp.err(macro_tok, .expected_identifier, .{});
}
},
.macro_param_ms_pragma => {
try pp.msPragmaOperator(macro_tok, expanded_args.items[0]);
},
.comma => {
if (tok_i + 2 < func_macro.tokens.len and func_macro.tokens[tok_i + 1].id == .hash_hash) {
const hash_hash = func_macro.tokens[tok_i + 1];
var maybe_va_args = func_macro.tokens[tok_i + 2];
var consumed: usize = 2;
if (maybe_va_args.id == .macro_ws and tok_i + 3 < func_macro.tokens.len) {
consumed = 3;
maybe_va_args = func_macro.tokens[tok_i + 3];
}
if (maybe_va_args.id == .keyword_va_args) {
// GNU extension: `, ##__VA_ARGS__` deletes the comma if __VA_ARGS__ is empty
tok_i += consumed;
if (func_macro.params.len == expanded_args.items.len) {
// Empty __VA_ARGS__, drop the comma
try pp.err(hash_hash, .comma_deletion_va_args, .{});
} else if (func_macro.params.len == 0 and expanded_args.items.len == 1 and expanded_args.items[0].len == 0) {
// Ambiguous whether this is "empty __VA_ARGS__" or "__VA_ARGS__ omitted"
if (pp.comp.langopts.standard.isGNU()) {
// GNU standard, drop the comma
try pp.err(hash_hash, .comma_deletion_va_args, .{});
} else {
// C standard, retain the comma
try buf.append(tokFromRaw(raw));
}
} else {
try buf.append(tokFromRaw(raw));
if (expanded_variable_arguments.items.len > 0 or variable_arguments.items.len == func_macro.params.len) {
try pp.err(hash_hash, .comma_deletion_va_args, .{});
}
const raw_loc = Source.Location{
.id = maybe_va_args.source,
.byte_offset = maybe_va_args.start,
.line = maybe_va_args.line,
};
try bufCopyTokens(&buf, expanded_variable_arguments.items, &.{raw_loc});
}
continue;
}
}
// Regular comma, no token pasting with __VA_ARGS__
try buf.append(tokFromRaw(raw));
},
else => try buf.append(tokFromRaw(raw)),
}
}
removePlacemarkers(&buf);
const macro_expansion_locs = macro_tok.expansionSlice();
for (buf.items) |*tok| {
try tok.addExpansionLocation(pp.gpa, &.{macro_tok.loc});
try tok.addExpansionLocation(pp.gpa, macro_expansion_locs);
const tok_hidelist = pp.hideset.get(tok.loc);
const new_hidelist = try pp.hideset.@"union"(tok_hidelist, hideset);
try pp.hideset.put(tok.loc, new_hidelist);
}
return buf;
}
fn expandVaOpt(
pp: *Preprocessor,
buf: *ExpandBuf,
raw: RawToken,
should_expand: bool,
) !void {
if (!should_expand) return;
const source = pp.comp.getSource(raw.source);
var tokenizer: Tokenizer = .{
.buf = source.buf,
.index = raw.start,
.source = raw.source,
.langopts = pp.comp.langopts,
.line = raw.line,
};
while (tokenizer.index < raw.end) {
const tok = tokenizer.next();
try buf.append(tokFromRaw(tok));
}
}
fn bufCopyTokens(buf: *ExpandBuf, tokens: []const TokenWithExpansionLocs, src: []const Source.Location) !void {
try buf.ensureUnusedCapacity(tokens.len);
for (tokens) |tok| {
var copy = try tok.dupe(buf.allocator);
errdefer TokenWithExpansionLocs.free(copy.expansion_locs, buf.allocator);
try copy.addExpansionLocation(buf.allocator, src);
buf.appendAssumeCapacity(copy);
}
}
fn nextBufToken(
pp: *Preprocessor,
tokenizer: *Tokenizer,
buf: *ExpandBuf,
start_idx: *usize,
end_idx: *usize,
extend_buf: bool,
) Error!TokenWithExpansionLocs {
start_idx.* += 1;
if (start_idx.* == buf.items.len and start_idx.* >= end_idx.*) {
if (extend_buf) {
const raw_tok = tokenizer.next();
if (raw_tok.id.isMacroIdentifier() and
pp.poisoned_identifiers.get(pp.tokSlice(raw_tok)) != null)
try pp.err(raw_tok, .poisoned_identifier, .{});
if (raw_tok.id == .nl) pp.add_expansion_nl += 1;
const new_tok = tokFromRaw(raw_tok);
end_idx.* += 1;
try buf.append(new_tok);
return new_tok;
} else {
return TokenWithExpansionLocs{ .id = .eof, .loc = .{ .id = .generated } };
}
} else {
return buf.items[start_idx.*];
}
}
fn collectMacroFuncArguments(
pp: *Preprocessor,
tokenizer: *Tokenizer,
buf: *ExpandBuf,
start_idx: *usize,
end_idx: *usize,
extend_buf: bool,
is_builtin: bool,
r_paren: *TokenWithExpansionLocs,
) !MacroArguments {
const name_tok = buf.items[start_idx.*];
const saved_tokenizer = tokenizer.*;
const old_end = end_idx.*;
while (true) {
const tok = try nextBufToken(pp, tokenizer, buf, start_idx, end_idx, extend_buf);
switch (tok.id) {
.nl, .whitespace, .macro_ws => {},
.l_paren => break,
else => {
if (is_builtin) {
try pp.err(name_tok, .missing_lparen_after_builtin, .{pp.expandedSlice(name_tok)});
}
// Not a macro function call, go over normal identifier, rewind
tokenizer.* = saved_tokenizer;
end_idx.* = old_end;
return error.MissingLParen;
},
}
}
// collect the arguments.
var parens: u32 = 0;
var args = MacroArguments.init(pp.gpa);
errdefer deinitMacroArguments(pp.gpa, &args);
var curArgument = std.array_list.Managed(TokenWithExpansionLocs).init(pp.gpa);
defer curArgument.deinit();
while (true) {
var tok = try nextBufToken(pp, tokenizer, buf, start_idx, end_idx, extend_buf);
tok.flags.is_macro_arg = true;
switch (tok.id) {
.comma => {
if (parens == 0) {
const owned = try curArgument.toOwnedSlice();
errdefer pp.gpa.free(owned);
try args.append(owned);
} else {
const duped = try tok.dupe(pp.gpa);
errdefer TokenWithExpansionLocs.free(duped.expansion_locs, pp.gpa);
try curArgument.append(duped);
}
},
.l_paren => {
const duped = try tok.dupe(pp.gpa);
errdefer TokenWithExpansionLocs.free(duped.expansion_locs, pp.gpa);
try curArgument.append(duped);
parens += 1;
},
.r_paren => {
if (parens == 0) {
const owned = try curArgument.toOwnedSlice();
errdefer pp.gpa.free(owned);
try args.append(owned);
r_paren.* = tok;
break;
} else {
const duped = try tok.dupe(pp.gpa);
errdefer TokenWithExpansionLocs.free(duped.expansion_locs, pp.gpa);
try curArgument.append(duped);
parens -= 1;
}
},
.eof => {
{
const owned = try curArgument.toOwnedSlice();
errdefer pp.gpa.free(owned);
try args.append(owned);
}
tokenizer.* = saved_tokenizer;
try pp.err(name_tok, .unterminated_macro_arg_list, .{});
return error.Unterminated;
},
.nl, .whitespace => {
try curArgument.append(.{ .id = .macro_ws, .loc = tok.loc });
},
else => {
const duped = try tok.dupe(pp.gpa);
errdefer TokenWithExpansionLocs.free(duped.expansion_locs, pp.gpa);
try curArgument.append(duped);
},
}
}
return args;
}
fn removeExpandedTokens(pp: *Preprocessor, buf: *ExpandBuf, start: usize, len: usize, moving_end_idx: *usize) !void {
for (buf.items[start .. start + len]) |tok| TokenWithExpansionLocs.free(tok.expansion_locs, pp.gpa);
try buf.replaceRange(start, len, &.{});
moving_end_idx.* -|= len;
}
/// The behavior of `defined` depends on whether we are in a preprocessor
/// expression context (#if or #elif) or not.
/// In a non-expression context it's just an identifier. Within a preprocessor
/// expression it is a unary operator or one-argument function.
const EvalContext = enum {
expr,
non_expr,
};
/// Helper for safely iterating over a slice of tokens while skipping whitespace
const TokenIterator = struct {
toks: []const TokenWithExpansionLocs,
i: usize,
fn init(toks: []const TokenWithExpansionLocs) TokenIterator {
return .{ .toks = toks, .i = 0 };
}
fn nextNoWS(self: *TokenIterator) ?TokenWithExpansionLocs {
while (self.i < self.toks.len) : (self.i += 1) {
const tok = self.toks[self.i];
if (tok.id == .whitespace or tok.id == .macro_ws) continue;
self.i += 1;
return tok;
}
return null;
}
};
fn expandMacroExhaustive(
pp: *Preprocessor,
tokenizer: *Tokenizer,
buf: *ExpandBuf,
start_idx: usize,
end_idx: usize,
extend_buf: bool,
eval_ctx: EvalContext,
) MacroError!void {
var moving_end_idx = end_idx;
var advance_index: usize = 0;
// rescan loop
var do_rescan = true;
while (do_rescan) {
do_rescan = false;
// expansion loop
var idx: usize = start_idx + advance_index;
while (idx < moving_end_idx) {
const macro_tok = buf.items[idx];
if (macro_tok.id == .keyword_defined and eval_ctx == .expr) {
idx += 1;
var it = TokenIterator.init(buf.items[idx..moving_end_idx]);
if (it.nextNoWS()) |tok| {
switch (tok.id) {
.l_paren => {
_ = it.nextNoWS(); // eat (what should be) identifier
_ = it.nextNoWS(); // eat (what should be) r paren
},
.identifier, .extended_identifier => {},
else => {},
}
}
idx += it.i;
continue;
}
if (!macro_tok.id.isMacroIdentifier() or macro_tok.flags.expansion_disabled) {
idx += 1;
continue;
}
const expanded = pp.expandedSlice(macro_tok);
const macro = pp.defines.getPtr(expanded) orelse {
idx += 1;
continue;
};
const macro_hidelist = pp.hideset.get(macro_tok.loc);
if (pp.hideset.contains(macro_hidelist, expanded)) {
idx += 1;
continue;
}
macro_handler: {
if (macro.is_func) {
var r_paren: TokenWithExpansionLocs = undefined;
var macro_scan_idx = idx;
// to be saved in case this doesn't turn out to be a call
const args = pp.collectMacroFuncArguments(
tokenizer,
buf,
&macro_scan_idx,
&moving_end_idx,
extend_buf,
macro.is_builtin,
&r_paren,
) catch |er| switch (er) {
error.MissingLParen => {
if (!buf.items[idx].flags.is_macro_arg) buf.items[idx].flags.expansion_disabled = true;
idx += 1;
break :macro_handler;
},
error.Unterminated => {
if (pp.comp.langopts.emulate == .gcc) idx += 1;
try pp.removeExpandedTokens(buf, idx, macro_scan_idx - idx, &moving_end_idx);
break :macro_handler;
},
else => |e| return e,
};
assert(r_paren.id == .r_paren);
var free_arg_expansion_locs = false;
defer {
for (args.items) |item| {
if (free_arg_expansion_locs) for (item) |tok| TokenWithExpansionLocs.free(tok.expansion_locs, pp.gpa);
pp.gpa.free(item);
}
args.deinit();
}
const r_paren_hidelist = pp.hideset.get(r_paren.loc);
var hs = try pp.hideset.intersection(macro_hidelist, r_paren_hidelist);
hs = try pp.hideset.prepend(macro_tok.loc, hs);
var args_count: u32 = @intCast(args.items.len);
// if the macro has zero arguments g() args_count is still 1
// an empty token list g() and a whitespace-only token list g( )
// counts as zero arguments for the purposes of argument-count validation
if (args_count == 1 and macro.params.len == 0) {
for (args.items[0]) |tok| {
if (tok.id != .macro_ws) break;
} else {
args_count = 0;
}
}
// Validate argument count.
if (macro.var_args and args_count < macro.params.len) {
free_arg_expansion_locs = true;
try pp.err(buf.items[idx], .expected_at_least_arguments, .{ macro.params.len, args_count });
idx += 1;
try pp.removeExpandedTokens(buf, idx, macro_scan_idx - idx + 1, &moving_end_idx);
continue;
}
if (!macro.var_args and args_count != macro.params.len) {
free_arg_expansion_locs = true;
try pp.err(buf.items[idx], .expected_arguments, .{ macro.params.len, args_count });
idx += 1;
try pp.removeExpandedTokens(buf, idx, macro_scan_idx - idx + 1, &moving_end_idx);
continue;
}
var expanded_args = MacroArguments.init(pp.gpa);
defer deinitMacroArguments(pp.gpa, &expanded_args);
try expanded_args.ensureTotalCapacity(args.items.len);
for (args.items) |arg| {
var expand_buf = ExpandBuf.init(pp.gpa);
errdefer expand_buf.deinit();
try expand_buf.appendSlice(arg);
try pp.expandMacroExhaustive(tokenizer, &expand_buf, 0, expand_buf.items.len, false, eval_ctx);
expanded_args.appendAssumeCapacity(try expand_buf.toOwnedSlice());
}
var res = try pp.expandFuncMacro(macro_tok, macro, &args, &expanded_args, hs);
defer res.deinit();
const tokens_added = res.items.len;
const tokens_removed = macro_scan_idx - idx + 1;
for (buf.items[idx .. idx + tokens_removed]) |tok| TokenWithExpansionLocs.free(tok.expansion_locs, pp.gpa);
try buf.replaceRange(idx, tokens_removed, res.items);
moving_end_idx += tokens_added;
// Overflow here means that we encountered an unterminated argument list
// while expanding the body of this macro.
moving_end_idx -|= tokens_removed;
idx += tokens_added;
do_rescan = true;
} else {
const res = try pp.expandObjMacro(macro);
defer res.deinit();
const hs = try pp.hideset.prepend(macro_tok.loc, macro_hidelist);
const macro_expansion_locs = macro_tok.expansionSlice();
var increment_idx_by = res.items.len;
for (res.items, 0..) |*tok, i| {
tok.flags.is_macro_arg = macro_tok.flags.is_macro_arg;
try tok.addExpansionLocation(pp.gpa, &.{macro_tok.loc});
try tok.addExpansionLocation(pp.gpa, macro_expansion_locs);
const tok_hidelist = pp.hideset.get(tok.loc);
const new_hidelist = try pp.hideset.@"union"(tok_hidelist, hs);
try pp.hideset.put(tok.loc, new_hidelist);
if (tok.id == .keyword_defined and eval_ctx == .expr) {
if (macro.is_func) {
try pp.err(tok, .expansion_to_defined_func, .{});
} else {
try pp.err(tok, .expansion_to_defined_obj, .{});
}
}
if (i < increment_idx_by and (tok.id == .keyword_defined or pp.defines.contains(pp.expandedSlice(tok.*)))) {
increment_idx_by = i;
}
}
TokenWithExpansionLocs.free(buf.items[idx].expansion_locs, pp.gpa);
try buf.replaceRange(idx, 1, res.items);
idx += increment_idx_by;
moving_end_idx = moving_end_idx + res.items.len - 1;
do_rescan = true;
}
}
if (idx - start_idx == advance_index + 1 and !do_rescan) {
advance_index += 1;
}
} // end of replacement phase
}
// end of scanning phase
// trim excess buffer
for (buf.items[moving_end_idx..]) |item| {
TokenWithExpansionLocs.free(item.expansion_locs, pp.gpa);
}
buf.items.len = moving_end_idx;
}
fn unescapeUcn(pp: *Preprocessor, tok: TokenWithExpansionLocs) !TokenWithExpansionLocs {
switch (tok.id) {
.incomplete_ucn => {
@branchHint(.cold);
try pp.err(tok, .incomplete_ucn, .{});
},
.extended_identifier => {
@branchHint(.cold);
const identifier = pp.expandedSlice(tok);
if (mem.indexOfScalar(u8, identifier, '\\') != null) {
@branchHint(.cold);
const start = pp.comp.generated_buf.items.len;
try pp.comp.generated_buf.ensureUnusedCapacity(pp.gpa, identifier.len + 1);
var identifier_parser: text_literal.Parser = .{
.comp = pp.comp,
.literal = pp.expandedSlice(tok), // re-expand since previous line may have caused a reallocation, invalidating `identifier`
.kind = .utf_8,
.max_codepoint = 0x10ffff,
.loc = tok.loc,
.expansion_locs = tok.expansionSlice(),
.diagnose_incorrect_encoding = false,
};
while (try identifier_parser.next()) |decoded| {
switch (decoded) {
.value => unreachable, // validated by tokenizer
.codepoint => |c| {
var buf: [4]u8 = undefined;
const written = std.unicode.utf8Encode(c, &buf) catch unreachable;
pp.comp.generated_buf.appendSliceAssumeCapacity(buf[0..written]);
},
.improperly_encoded => |bytes| {
pp.comp.generated_buf.appendSliceAssumeCapacity(bytes);
},
.utf8_text => |view| {
pp.comp.generated_buf.appendSliceAssumeCapacity(view.bytes);
},
}
}
pp.comp.generated_buf.appendAssumeCapacity('\n');
defer TokenWithExpansionLocs.free(tok.expansion_locs, pp.gpa);
return pp.makeGeneratedToken(start, .extended_identifier, tok);
}
},
else => {},
}
return tok;
}
/// Try to expand a macro after a possible candidate has been read from the `tokenizer`
/// into the `raw` token passed as argument
fn expandMacro(pp: *Preprocessor, tokenizer: *Tokenizer, raw: RawToken) MacroError!void {
var source_tok = tokFromRaw(raw);
if (!raw.id.isMacroIdentifier()) {
source_tok.id.simplifyMacroKeyword();
return pp.addToken(source_tok);
}
pp.top_expansion_buf.items.len = 0;
try pp.top_expansion_buf.append(source_tok);
pp.expansion_source_loc = source_tok.loc;
pp.hideset.clearRetainingCapacity();
try pp.expandMacroExhaustive(tokenizer, &pp.top_expansion_buf, 0, 1, true, .non_expr);
try pp.ensureUnusedTokenCapacity(pp.top_expansion_buf.items.len);
for (pp.top_expansion_buf.items) |*tok| {
if (tok.id == .macro_ws and !pp.preserve_whitespace) {
TokenWithExpansionLocs.free(tok.expansion_locs, pp.gpa);
continue;
}
if (tok.id == .comment and !pp.comp.langopts.preserve_comments_in_macros) {
TokenWithExpansionLocs.free(tok.expansion_locs, pp.gpa);
continue;
}
if (tok.id == .placemarker) {
TokenWithExpansionLocs.free(tok.expansion_locs, pp.gpa);
continue;
}
tok.id.simplifyMacroKeywordExtra(true);
pp.addTokenAssumeCapacity(try pp.unescapeUcn(tok.*));
}
if (pp.preserve_whitespace) {
try pp.ensureUnusedTokenCapacity(pp.add_expansion_nl);
while (pp.add_expansion_nl > 0) : (pp.add_expansion_nl -= 1) {
pp.addTokenAssumeCapacity(.{ .id = .nl, .loc = .{
.id = tokenizer.source,
.line = tokenizer.line,
} });
}
}
}
fn expandedSliceExtra(pp: *const Preprocessor, tok: anytype, macro_ws_handling: enum { single_macro_ws, preserve_macro_ws }) []const u8 {
if (tok.id.lexeme()) |some| {
if (!tok.id.allowsDigraphs(pp.comp.langopts) and !(tok.id == .macro_ws and macro_ws_handling == .preserve_macro_ws)) return some;
}
var tmp_tokenizer: Tokenizer = .{
.buf = pp.comp.getSource(tok.loc.id).buf,
.langopts = pp.comp.langopts,
.index = tok.loc.byte_offset,
.source = .generated,
};
if (tok.id == .macro_string) {
while (true) : (tmp_tokenizer.index += 1) {
if (tmp_tokenizer.buf[tmp_tokenizer.index] == '>') break;
}
return tmp_tokenizer.buf[tok.loc.byte_offset .. tmp_tokenizer.index + 1];
}
const res = tmp_tokenizer.next();
return tmp_tokenizer.buf[res.start..res.end];
}
/// Get expanded token source string.
pub fn expandedSlice(pp: *const Preprocessor, tok: anytype) []const u8 {
return pp.expandedSliceExtra(tok, .single_macro_ws);
}
/// Concat two tokens and add the result to pp.generated
fn pasteTokens(pp: *Preprocessor, lhs_toks: *ExpandBuf, rhs_toks: []const TokenWithExpansionLocs) Error!void {
const lhs = while (lhs_toks.pop()) |lhs| {
if ((pp.comp.langopts.preserve_comments_in_macros and lhs.id == .comment) or
(lhs.id != .macro_ws and lhs.id != .comment))
break lhs;
TokenWithExpansionLocs.free(lhs.expansion_locs, pp.gpa);
} else {
return bufCopyTokens(lhs_toks, rhs_toks, &.{});
};
var rhs_rest: u32 = 1;
const rhs = for (rhs_toks) |rhs| {
if ((pp.comp.langopts.preserve_comments_in_macros and rhs.id == .comment) or
(rhs.id != .macro_ws and rhs.id != .comment))
break rhs;
rhs_rest += 1;
} else {
return lhs_toks.appendAssumeCapacity(lhs);
};
defer TokenWithExpansionLocs.free(lhs.expansion_locs, pp.gpa);
const start = pp.comp.generated_buf.items.len;
const end = start + pp.expandedSlice(lhs).len + pp.expandedSlice(rhs).len;
try pp.comp.generated_buf.ensureTotalCapacity(pp.gpa, end + 1); // +1 for a newline
// We cannot use the same slices here since they might be invalidated by `ensureCapacity`
pp.comp.generated_buf.appendSliceAssumeCapacity(pp.expandedSlice(lhs));
pp.comp.generated_buf.appendSliceAssumeCapacity(pp.expandedSlice(rhs));
pp.comp.generated_buf.appendAssumeCapacity('\n');
// Try to tokenize the result.
var tmp_tokenizer = Tokenizer{
.buf = pp.comp.generated_buf.items,
.langopts = pp.comp.langopts,
.index = @intCast(start),
.source = .generated,
};
const pasted_token = tmp_tokenizer.nextNoWSComments();
const next = tmp_tokenizer.nextNoWSComments();
const pasted_id = if (lhs.id == .placemarker and rhs.id == .placemarker)
.placemarker
else
pasted_token.id;
try lhs_toks.append(try pp.makeGeneratedToken(start, pasted_id, lhs));
if (next.id != .nl and next.id != .eof) {
try pp.err(lhs, .pasting_formed_invalid, .{pp.comp.generated_buf.items[start..end]});
try lhs_toks.append(tokFromRaw(next));
}
try bufCopyTokens(lhs_toks, rhs_toks[rhs_rest..], &.{});
}
fn makeGeneratedToken(pp: *Preprocessor, start: usize, id: Token.Id, source: TokenWithExpansionLocs) !TokenWithExpansionLocs {
var pasted_token = TokenWithExpansionLocs{ .id = id, .loc = .{
.id = .generated,
.byte_offset = @intCast(start),
.line = pp.generated_line,
} };
pp.generated_line += 1;
try pasted_token.addExpansionLocation(pp.gpa, &.{source.loc});
try pasted_token.addExpansionLocation(pp.gpa, source.expansionSlice());
return pasted_token;
}
/// Defines a new macro and warns if it is a duplicate
fn defineMacro(pp: *Preprocessor, define_tok: RawToken, name_tok: TokenWithExpansionLocs, macro: Macro) Error!void {
const name_str = pp.expandedSlice(name_tok);
const gop = try pp.defines.getOrPut(pp.gpa, name_str);
if (gop.found_existing and !gop.value_ptr.eql(macro, pp)) {
const loc = name_tok.loc;
const prev_total = pp.diagnostics.total;
if (gop.value_ptr.is_builtin) {
try pp.err(loc, .builtin_macro_redefined, .{});
} else {
try pp.err(loc, .macro_redefined, .{name_str});
}
if (!gop.value_ptr.is_builtin and pp.diagnostics.total != prev_total) {
try pp.err(gop.value_ptr.loc, .previous_definition, .{});
}
}
if (pp.verbose) {
const raw: RawToken = .{ .id = name_tok.id, .source = name_tok.loc.id, .start = name_tok.loc.byte_offset, .line = name_tok.loc.line };
pp.verboseLog(raw, "macro {s} defined", .{name_str});
}
if (pp.store_macro_tokens) {
try pp.addToken(tokFromRaw(define_tok));
}
gop.value_ptr.* = macro;
}
/// Handle a #define directive.
fn define(pp: *Preprocessor, tokenizer: *Tokenizer, define_tok: RawToken) Error!void {
// Get macro name and validate it.
const escaped_macro_name = tokenizer.nextNoWS();
if (escaped_macro_name.id == .keyword_defined) {
try pp.err(escaped_macro_name, .defined_as_macro_name, .{});
return skipToNl(tokenizer);
}
if (!escaped_macro_name.id.isMacroIdentifier()) {
try pp.err(escaped_macro_name, .macro_name_must_be_identifier, .{});
return skipToNl(tokenizer);
}
const macro_name = try pp.unescapeUcn(tokFromRaw(escaped_macro_name));
defer TokenWithExpansionLocs.free(macro_name.expansion_locs, pp.gpa);
var macro_name_token_id = macro_name.id;
macro_name_token_id.simplifyMacroKeyword();
switch (macro_name_token_id) {
.identifier, .extended_identifier => {},
// TODO allow #define <keyword> <keyword> and #define extern|inline|static|const
else => if (macro_name_token_id.isMacroIdentifier() and
!mem.eql(u8, pp.comp.getSource(tokenizer.source).path, "<builtin>"))
{
try pp.err(macro_name, .keyword_macro, .{});
},
}
// Check for function macros and empty defines.
var first = tokenizer.next();
switch (first.id) {
.nl, .eof => return pp.defineMacro(define_tok, macro_name, .{
.params = &.{},
.tokens = &.{},
.var_args = false,
.loc = macro_name.loc,
.is_func = false,
}),
.whitespace => first = tokenizer.next(),
.l_paren => return pp.defineFn(tokenizer, define_tok, macro_name, first),
else => try pp.err(first, .whitespace_after_macro_name, .{}),
}
if (first.id == .hash_hash) {
try pp.err(first, .hash_hash_at_start, .{});
return skipToNl(tokenizer);
}
first.id.simplifyMacroKeyword();
pp.token_buf.items.len = 0; // Safe to use since we can only be in one directive at a time.
var need_ws = false;
// Collect the token body and validate any ## found.
var tok = first;
while (true) {
tok.id.simplifyMacroKeyword();
switch (tok.id) {
.hash_hash => {
const next = tokenizer.nextNoWSComments();
switch (next.id) {
.nl, .eof => {
try pp.err(tok, .hash_hash_at_end, .{});
return;
},
.hash_hash => {
try pp.err(next, .hash_hash_at_end, .{});
return;
},
else => {},
}
try pp.token_buf.append(tok);
try pp.token_buf.append(next);
},
.nl, .eof => break,
.comment => if (pp.comp.langopts.preserve_comments_in_macros) {
if (need_ws) {
need_ws = false;
try pp.token_buf.append(.{ .id = .macro_ws, .source = .generated });
}
try pp.token_buf.append(tok);
},
.whitespace => need_ws = true,
.unterminated_string_literal, .unterminated_char_literal, .empty_char_literal => |tag| {
try pp.err(tok, invalidTokenDiagnostic(tag), .{});
try pp.token_buf.append(tok);
},
.unterminated_comment => try pp.err(tok, .unterminated_comment, .{}),
else => {
if (tok.id == .incomplete_ucn) {
@branchHint(.cold);
try pp.err(tok, .incomplete_ucn, .{});
}
if (tok.id != .whitespace and need_ws) {
need_ws = false;
try pp.token_buf.append(.{ .id = .macro_ws, .source = .generated });
}
try pp.token_buf.append(tok);
},
}
tok = tokenizer.next();
}
const list = try pp.arena.allocator().dupe(RawToken, pp.token_buf.items);
try pp.defineMacro(define_tok, macro_name, .{
.loc = macro_name.loc,
.tokens = list,
.params = &.{},
.is_func = false,
.var_args = false,
});
}
/// Handle a function like #define directive.
fn defineFn(pp: *Preprocessor, tokenizer: *Tokenizer, define_tok: RawToken, macro_name: TokenWithExpansionLocs, l_paren: RawToken) Error!void {
assert(macro_name.id.isMacroIdentifier());
var params = std.array_list.Managed([]const u8).init(pp.gpa);
defer params.deinit();
// Parse the parameter list.
var gnu_var_args: []const u8 = "";
var var_args = false;
while (true) {
var tok = tokenizer.nextNoWS();
if (tok.id == .r_paren) break;
if (tok.id == .eof) return pp.err(tok, .unterminated_macro_param_list, .{});
if (tok.id == .ellipsis) {
var_args = true;
const r_paren = tokenizer.nextNoWS();
if (r_paren.id != .r_paren) {
try pp.err(r_paren, .missing_paren_param_list, .{});
try pp.err(l_paren, .to_match_paren, .{});
return skipToNl(tokenizer);
}
break;
}
if (!tok.id.isMacroIdentifier()) {
try pp.err(tok, .invalid_token_param_list, .{});
return skipToNl(tokenizer);
}
try params.append(pp.tokSlice(tok));
tok = tokenizer.nextNoWS();
if (tok.id == .ellipsis) {
try pp.err(tok, .gnu_va_macro, .{});
gnu_var_args = params.pop().?;
const r_paren = tokenizer.nextNoWS();
if (r_paren.id != .r_paren) {
try pp.err(r_paren, .missing_paren_param_list, .{});
try pp.err(l_paren, .to_match_paren, .{});
return skipToNl(tokenizer);
}
break;
} else if (tok.id == .r_paren) {
break;
} else if (tok.id != .comma) {
try pp.err(tok, .expected_comma_param_list, .{});
return skipToNl(tokenizer);
}
}
var need_ws = false;
// Collect the body tokens and validate # and ##'s found.
pp.token_buf.items.len = 0; // Safe to use since we can only be in one directive at a time.
tok_loop: while (true) {
var tok = tokenizer.next();
switch (tok.id) {
.nl, .eof => break,
.whitespace => need_ws = pp.token_buf.items.len != 0,
.comment => if (!pp.comp.langopts.preserve_comments_in_macros) continue else {
if (need_ws) {
need_ws = false;
try pp.token_buf.append(.{ .id = .macro_ws, .source = .generated });
}
try pp.token_buf.append(tok);
},
.hash => {
if (tok.id != .whitespace and need_ws) {
need_ws = false;
try pp.token_buf.append(.{ .id = .macro_ws, .source = .generated });
}
const param = tokenizer.nextNoWS();
blk: {
if (var_args and param.id == .keyword_va_args) {
tok.id = .stringify_va_args;
try pp.token_buf.append(tok);
continue :tok_loop;
}
if (!param.id.isMacroIdentifier()) break :blk;
const s = pp.tokSlice(param);
if (mem.eql(u8, s, gnu_var_args)) {
tok.id = .stringify_va_args;
try pp.token_buf.append(tok);
continue :tok_loop;
}
for (params.items, 0..) |p, i| {
if (mem.eql(u8, p, s)) {
tok.id = .stringify_param;
tok.end = @intCast(i);
try pp.token_buf.append(tok);
continue :tok_loop;
}
}
}
try pp.err(param, .hash_not_followed_param, .{});
return skipToNl(tokenizer);
},
.hash_hash => {
need_ws = false;
// if ## appears at the beginning, the token buf is still empty
// in this case, error out
if (pp.token_buf.items.len == 0) {
try pp.err(tok, .hash_hash_at_start, .{});
return skipToNl(tokenizer);
}
const saved_tokenizer = tokenizer.*;
const next = tokenizer.nextNoWSComments();
if (next.id == .nl or next.id == .eof) {
try pp.err(tok, .hash_hash_at_end, .{});
return;
}
tokenizer.* = saved_tokenizer;
// convert the previous token to .macro_param_no_expand if it was .macro_param
if (pp.token_buf.items[pp.token_buf.items.len - 1].id == .macro_param) {
pp.token_buf.items[pp.token_buf.items.len - 1].id = .macro_param_no_expand;
}
try pp.token_buf.append(tok);
},
.unterminated_string_literal, .unterminated_char_literal, .empty_char_literal => |tag| {
try pp.err(tok, invalidTokenDiagnostic(tag), .{});
try pp.token_buf.append(tok);
},
.unterminated_comment => try pp.err(tok, .unterminated_comment, .{}),
else => {
if (tok.id != .whitespace and need_ws) {
need_ws = false;
try pp.token_buf.append(.{ .id = .macro_ws, .source = .generated });
}
if (var_args and tok.id == .keyword_va_args) {
// do nothing
} else if (var_args and tok.id == .keyword_va_opt) {
const opt_l_paren = tokenizer.next();
if (opt_l_paren.id != .l_paren) {
try pp.err(opt_l_paren, .va_opt_lparen, .{});
return skipToNl(tokenizer);
}
tok.start = opt_l_paren.end;
var parens: u32 = 0;
while (true) {
const opt_tok = tokenizer.next();
switch (opt_tok.id) {
.l_paren => parens += 1,
.r_paren => if (parens == 0) {
break;
} else {
parens -= 1;
},
.nl, .eof => {
try pp.err(opt_tok, .va_opt_rparen, .{});
try pp.err(opt_l_paren, .to_match_paren, .{});
return skipToNl(tokenizer);
},
.whitespace => {},
else => tok.end = opt_tok.end,
}
}
} else if (tok.id.isMacroIdentifier()) {
tok.id.simplifyMacroKeyword();
const s = pp.tokSlice(tok);
if (mem.eql(u8, gnu_var_args, s)) {
tok.id = .keyword_va_args;
} else for (params.items, 0..) |param, i| {
if (mem.eql(u8, param, s)) {
// NOTE: it doesn't matter to assign .macro_param_no_expand
// here in case a ## was the previous token, because
// ## processing will eat this token with the same semantics
tok.id = .macro_param;
tok.end = @intCast(i);
break;
}
}
}
try pp.token_buf.append(tok);
},
}
}
const param_list = try pp.arena.allocator().dupe([]const u8, params.items);
const token_list = try pp.arena.allocator().dupe(RawToken, pp.token_buf.items);
try pp.defineMacro(define_tok, macro_name, .{
.is_func = true,
.params = param_list,
.var_args = var_args or gnu_var_args.len != 0,
.tokens = token_list,
.loc = macro_name.loc,
});
}
/// Handle an #embed directive
/// embedDirective : ("FILENAME" | <FILENAME>) embedParam*
/// embedParam : IDENTIFIER (:: IDENTIFIER)? '(' <tokens> ')'
fn embed(pp: *Preprocessor, tokenizer: *Tokenizer) MacroError!void {
const first = tokenizer.nextNoWS();
const filename_tok = pp.findIncludeFilenameToken(first, tokenizer, .ignore_trailing_tokens) catch |er| switch (er) {
error.InvalidInclude => return,
else => |e| return e,
};
defer TokenWithExpansionLocs.free(filename_tok.expansion_locs, pp.gpa);
// Check for empty filename.
const tok_slice = pp.expandedSliceExtra(filename_tok, .single_macro_ws);
if (tok_slice.len < 3) {
try pp.err(first, .empty_filename, .{});
return;
}
const filename = tok_slice[1 .. tok_slice.len - 1];
const include_type: Compilation.IncludeType = switch (filename_tok.id) {
.string_literal => .quotes,
.macro_string => .angle_brackets,
else => unreachable,
};
// Index into `token_buf`
const Range = struct {
start: u32,
end: u32,
fn expand(opt_range: ?@This(), pp_: *Preprocessor, tokenizer_: *Tokenizer) !void {
const range = opt_range orelse return;
const slice = pp_.token_buf.items[range.start..range.end];
for (slice) |tok| {
try pp_.expandMacro(tokenizer_, tok);
}
}
};
pp.token_buf.items.len = 0;
var limit: ?std.Io.Limit = null;
var prefix: ?Range = null;
var suffix: ?Range = null;
var if_empty: ?Range = null;
while (true) {
const param_first = tokenizer.nextNoWS();
switch (param_first.id) {
.nl, .eof => break,
.identifier => {},
else => {
try pp.err(param_first, .malformed_embed_param, .{});
continue;
},
}
const char_top = pp.char_buf.items.len;
defer pp.char_buf.items.len = char_top;
const maybe_colon = tokenizer.colonColon();
const param = switch (maybe_colon.id) {
.colon_colon => blk: {
// vendor::param
const param = tokenizer.nextNoWS();
if (param.id != .identifier) {
try pp.err(param, .malformed_embed_param, .{});
continue;
}
const l_paren = tokenizer.nextNoWS();
if (l_paren.id != .l_paren) {
try pp.err(l_paren, .malformed_embed_param, .{});
continue;
}
try pp.char_buf.appendSlice(Attribute.normalize(pp.tokSlice(param_first)));
try pp.char_buf.appendSlice("::");
try pp.char_buf.appendSlice(Attribute.normalize(pp.tokSlice(param)));
break :blk pp.char_buf.items;
},
.l_paren => Attribute.normalize(pp.tokSlice(param_first)),
else => {
try pp.err(maybe_colon, .malformed_embed_param, .{});
continue;
},
};
const start: u32 = @intCast(pp.token_buf.items.len);
while (true) {
const next = tokenizer.nextNoWS();
if (next.id == .r_paren) break;
if (next.id == .eof) {
try pp.err(maybe_colon, .malformed_embed_param, .{});
break;
}
try pp.token_buf.append(next);
}
const end: u32 = @intCast(pp.token_buf.items.len);
if (std.mem.eql(u8, param, "limit")) {
if (limit != null) {
try pp.err(tokFromRaw(param_first), .duplicate_embed_param, .{"limit"});
continue;
}
if (start + 1 != end) {
try pp.err(param_first, .malformed_embed_limit, .{});
continue;
}
const limit_tok = pp.token_buf.items[start];
if (limit_tok.id != .pp_num) {
try pp.err(param_first, .malformed_embed_limit, .{});
continue;
}
limit = .limited(std.fmt.parseInt(u32, pp.tokSlice(limit_tok), 10) catch {
try pp.err(limit_tok, .malformed_embed_limit, .{});
continue;
});
pp.token_buf.items.len = start;
} else if (std.mem.eql(u8, param, "prefix")) {
if (prefix != null) {
try pp.err(tokFromRaw(param_first), .duplicate_embed_param, .{"prefix"});
continue;
}
prefix = .{ .start = start, .end = end };
} else if (std.mem.eql(u8, param, "suffix")) {
if (suffix != null) {
try pp.err(tokFromRaw(param_first), .duplicate_embed_param, .{"suffix"});
continue;
}
suffix = .{ .start = start, .end = end };
} else if (std.mem.eql(u8, param, "if_empty")) {
if (if_empty != null) {
try pp.err(tokFromRaw(param_first), .duplicate_embed_param, .{"if_empty"});
continue;
}
if_empty = .{ .start = start, .end = end };
} else {
try pp.err(tokFromRaw(param_first), .unsupported_embed_param, .{param});
pp.token_buf.items.len = start;
}
}
const embed_bytes = (try pp.comp.findEmbed(filename, first.source, include_type, limit orelse .unlimited, pp.dep_file)) orelse
return pp.fatalNotFound(filename_tok, filename);
defer pp.comp.gpa.free(embed_bytes);
try Range.expand(prefix, pp, tokenizer);
if (embed_bytes.len == 0) {
try Range.expand(if_empty, pp, tokenizer);
try Range.expand(suffix, pp, tokenizer);
return;
}
try pp.ensureUnusedTokenCapacity(2 * embed_bytes.len - 1); // N bytes and N-1 commas
// TODO: We currently only support systems with CHAR_BIT == 8
// If the target's CHAR_BIT is not 8, we need to write out correctly-sized embed_bytes
// and correctly account for the target's endianness
{
const byte = embed_bytes[0];
const start = pp.comp.generated_buf.items.len;
try pp.comp.generated_buf.print(pp.gpa, "{d}", .{byte});
pp.addTokenAssumeCapacity(try pp.makeGeneratedToken(start, .embed_byte, filename_tok));
}
for (embed_bytes[1..]) |byte| {
const start = pp.comp.generated_buf.items.len;
try pp.comp.generated_buf.print(pp.gpa, ",{d}", .{byte});
pp.addTokenAssumeCapacity(.{ .id = .comma, .loc = .{ .id = .generated, .byte_offset = @intCast(start) } });
pp.addTokenAssumeCapacity(try pp.makeGeneratedToken(start + 1, .embed_byte, filename_tok));
}
try pp.comp.generated_buf.append(pp.gpa, '\n');
try Range.expand(suffix, pp, tokenizer);
}
// Handle a #include directive.
fn include(pp: *Preprocessor, tokenizer: *Tokenizer, which: Compilation.WhichInclude) MacroError!void {
const first = tokenizer.nextNoWS();
const new_source = findIncludeSource(pp, tokenizer, first, which) catch |er| switch (er) {
error.InvalidInclude => return,
else => |e| return e,
};
// Prevent stack overflow
pp.include_depth += 1;
defer pp.include_depth -= 1;
if (pp.include_depth > max_include_depth) {
const loc: Source.Location = .{ .id = first.source, .byte_offset = first.start, .line = first.line };
try pp.err(loc, .too_many_includes, .{});
return error.StopPreprocessing;
}
if (pp.include_guards.get(new_source.id)) |guard| {
if (pp.defines.contains(guard)) return;
}
if (pp.dep_file) |dep| try dep.addDependency(pp.gpa, new_source.path);
if (pp.verbose) {
pp.verboseLog(first, "include file {s}", .{new_source.path});
}
const token_state = pp.getTokenState();
try pp.addIncludeStart(new_source);
const eof = pp.preprocessExtra(new_source) catch |er| switch (er) {
error.StopPreprocessing => {
for (pp.expansion_entries.items(.locs)[token_state.expansion_entries_len..]) |loc| TokenWithExpansionLocs.free(loc, pp.gpa);
pp.restoreTokenState(token_state);
return;
},
else => |e| return e,
};
try eof.checkMsEof(new_source, pp.comp);
if (pp.preserve_whitespace and pp.tokens.items(.id)[pp.tokens.len - 1] != .nl) {
try pp.addToken(.{ .id = .nl, .loc = .{
.id = tokenizer.source,
.line = tokenizer.line,
} });
}
if (pp.linemarkers == .none) return;
var next = first;
while (true) {
var tmp = tokenizer.*;
next = tmp.nextNoWS();
if (next.id != .nl) break;
tokenizer.* = tmp;
}
try pp.addIncludeResume(next.source, next.end, next.line);
}
/// tokens that are part of a pragma directive can happen in 3 ways:
/// 1. directly in the text via `#pragma ...`
/// 2. Via a string literal argument to `_Pragma`
/// 3. Via a stringified macro argument which is used as an argument to `_Pragma`
/// operator_loc: Location of `_Pragma`; null if this is from #pragma
/// arg_locs: expansion locations of the argument to _Pragma. empty if #pragma or a raw string literal was used
fn makePragmaToken(pp: *Preprocessor, raw: RawToken, operator_loc: ?Source.Location, arg_locs: []const Source.Location) !TokenWithExpansionLocs {
var tok = tokFromRaw(raw);
if (operator_loc) |loc| {
try tok.addExpansionLocation(pp.gpa, &.{loc});
}
try tok.addExpansionLocation(pp.gpa, arg_locs);
return tok;
}
pub fn addToken(pp: *Preprocessor, tok_arg: TokenWithExpansionLocs) !void {
const tok = try pp.unescapeUcn(tok_arg);
if (tok.expansion_locs) |expansion_locs| {
try pp.expansion_entries.append(pp.gpa, .{ .idx = @intCast(pp.tokens.len), .locs = expansion_locs });
}
try pp.tokens.append(pp.gpa, .{ .id = tok.id, .loc = tok.loc });
}
pub fn addTokenAssumeCapacity(pp: *Preprocessor, tok: TokenWithExpansionLocs) void {
if (tok.expansion_locs) |expansion_locs| {
pp.expansion_entries.appendAssumeCapacity(.{ .idx = @intCast(pp.tokens.len), .locs = expansion_locs });
}
pp.tokens.appendAssumeCapacity(.{ .id = tok.id, .loc = tok.loc });
}
pub fn ensureTotalTokenCapacity(pp: *Preprocessor, capacity: usize) !void {
try pp.tokens.ensureTotalCapacity(pp.gpa, capacity);
try pp.expansion_entries.ensureTotalCapacity(pp.gpa, capacity);
}
pub fn ensureUnusedTokenCapacity(pp: *Preprocessor, capacity: usize) !void {
try pp.tokens.ensureUnusedCapacity(pp.gpa, capacity);
try pp.expansion_entries.ensureUnusedCapacity(pp.gpa, capacity);
}
/// Handle a pragma directive
fn pragma(pp: *Preprocessor, tokenizer: *Tokenizer, pragma_tok: RawToken, operator_loc: ?Source.Location, arg_locs: []const Source.Location) !void {
const name_tok = tokenizer.nextNoWS();
if (name_tok.id == .nl or name_tok.id == .eof) return;
try pp.addToken(try pp.makePragmaToken(pragma_tok, operator_loc, arg_locs));
const pragma_start: u32 = @intCast(pp.tokens.len);
const name = pp.tokSlice(name_tok);
const pragma_name_tok = try pp.makePragmaToken(name_tok, operator_loc, arg_locs);
try pp.addToken(pragma_name_tok);
while (true) {
const next_tok = tokenizer.next();
if (next_tok.id == .whitespace) continue;
if (next_tok.id == .eof) {
try pp.addToken(.{
.id = .nl,
.loc = .{ .id = .generated },
});
break;
}
try pp.addToken(try pp.makePragmaToken(next_tok, operator_loc, arg_locs));
if (next_tok.id == .nl) break;
}
if (pp.comp.getPragma(name)) |prag| unknown: {
return prag.preprocessorCB(pp, pragma_start) catch |er| switch (er) {
error.UnknownPragma => break :unknown,
else => |e| return e,
};
}
try pp.err(pragma_name_tok, .unknown_pragma, .{});
}
fn findIncludeFilenameToken(
pp: *Preprocessor,
first_token: RawToken,
tokenizer: *Tokenizer,
trailing_token_behavior: enum { ignore_trailing_tokens, expect_nl_eof },
) !TokenWithExpansionLocs {
var first = first_token;
if (first.id == .angle_bracket_left) to_end: {
// The tokenizer does not handle <foo> include strings so do it here.
while (tokenizer.index < tokenizer.buf.len) : (tokenizer.index += 1) {
switch (tokenizer.buf[tokenizer.index]) {
'>' => {
tokenizer.index += 1;
first.end = tokenizer.index;
first.id = .macro_string;
break :to_end;
},
'\n' => break,
else => {},
}
}
const loc: Source.Location = .{ .id = first.source, .byte_offset = tokenizer.index, .line = first.line };
try pp.err(loc, .header_str_closing, .{});
try pp.err(first, .header_str_match, .{});
}
const source_tok = tokFromRaw(first);
const filename_tok, const expanded_trailing = switch (source_tok.id) {
.string_literal, .macro_string => .{ source_tok, false },
else => expanded: {
// Try to expand if the argument is a macro.
pp.top_expansion_buf.items.len = 0;
defer for (pp.top_expansion_buf.items) |tok| TokenWithExpansionLocs.free(tok.expansion_locs, pp.gpa);
try pp.top_expansion_buf.append(source_tok);
pp.expansion_source_loc = source_tok.loc;
try pp.expandMacroExhaustive(tokenizer, &pp.top_expansion_buf, 0, 1, true, .non_expr);
var trailing_toks: []const TokenWithExpansionLocs = &.{};
const include_str = (try pp.reconstructIncludeString(pp.top_expansion_buf.items, &trailing_toks, tokFromRaw(first))) orelse {
try pp.expectNl(tokenizer);
return error.InvalidInclude;
};
const start = pp.comp.generated_buf.items.len;
try pp.comp.generated_buf.appendSlice(pp.gpa, include_str);
break :expanded .{ try pp.makeGeneratedToken(start, switch (include_str[0]) {
'"' => .string_literal,
'<' => .macro_string,
else => unreachable,
}, pp.top_expansion_buf.items[0]), trailing_toks.len != 0 };
},
};
switch (trailing_token_behavior) {
.expect_nl_eof => {
// Error on extra tokens.
const nl = tokenizer.nextNoWS();
if ((nl.id != .nl and nl.id != .eof) or expanded_trailing) {
skipToNl(tokenizer);
try pp.err(filename_tok, .extra_tokens_directive_end, .{});
}
},
.ignore_trailing_tokens => if (expanded_trailing) {
try pp.err(filename_tok, .extra_tokens_directive_end, .{});
},
}
return filename_tok;
}
fn findIncludeSource(pp: *Preprocessor, tokenizer: *Tokenizer, first: RawToken, which: Compilation.WhichInclude) !Source {
const filename_tok = try pp.findIncludeFilenameToken(first, tokenizer, .expect_nl_eof);
defer TokenWithExpansionLocs.free(filename_tok.expansion_locs, pp.gpa);
// Check for empty filename.
const tok_slice = pp.expandedSliceExtra(filename_tok, .single_macro_ws);
if (tok_slice.len < 3) {
try pp.err(first, .empty_filename, .{});
return error.InvalidInclude;
}
// Find the file.
const filename = tok_slice[1 .. tok_slice.len - 1];
const include_type: Compilation.IncludeType = switch (filename_tok.id) {
.string_literal => .quotes,
.macro_string => .angle_brackets,
else => unreachable,
};
return (try pp.comp.findInclude(filename, first, include_type, which)) orelse
return pp.fatalNotFound(filename_tok, filename);
}
fn printLinemarker(
pp: *Preprocessor,
w: *std.Io.Writer,
line_no: u32,
source: Source,
start_resume: enum(u8) { start, @"resume", none },
) !void {
try w.writeByte('#');
if (pp.linemarkers == .line_directives) try w.writeAll("line");
try w.print(" {d} \"{f}\"", .{ line_no, fmtEscapes(source.path) });
if (pp.linemarkers == .numeric_directives) {
switch (start_resume) {
.none => {},
.start => try w.writeAll(" 1"),
.@"resume" => try w.writeAll(" 2"),
}
switch (source.kind) {
.user => {},
.system => try w.writeAll(" 3"),
.extern_c_system => try w.writeAll(" 3 4"),
}
}
try w.writeByte('\n');
}
// After how many empty lines are needed to replace them with linemarkers.
const collapse_newlines = 8;
pub const DumpMode = enum {
/// Standard preprocessor output; no macros
result_only,
/// Output only #define directives for all the macros defined during the execution of the preprocessor
/// Only macros which are still defined at the end of preprocessing are printed.
/// Only the most recent definition is printed
/// Defines are printed in arbitrary order
macros_only,
/// Standard preprocessor output; but additionally output #define's and #undef's for macros as they are encountered
macros_and_result,
/// Same as macros_and_result, except only the macro name is printed for #define's
macro_names_and_result,
};
/// Pretty-print the macro define or undef at location `loc`.
/// We re-tokenize the directive because we are printing a macro that may have the same name as one in
/// `pp.defines` but a different definition (due to being #undef'ed and then redefined)
fn prettyPrintMacro(pp: *Preprocessor, w: *std.Io.Writer, loc: Source.Location, parts: enum { name_only, name_and_body }) !void {
const source = pp.comp.getSource(loc.id);
var tokenizer: Tokenizer = .{
.buf = source.buf,
.langopts = pp.comp.langopts,
.source = source.id,
.index = loc.byte_offset,
};
var prev_ws = false; // avoid printing multiple whitespace if /* */ comments are within the macro def
var saw_name = false; // do not print comments before the name token is seen.
while (true) {
const tok = tokenizer.next();
switch (tok.id) {
.comment => {
if (saw_name) {
prev_ws = false;
try w.print("{s}", .{pp.tokSlice(tok)});
}
},
.nl, .eof => break,
.whitespace => {
if (!prev_ws) {
try w.writeByte(' ');
prev_ws = true;
}
},
else => {
prev_ws = false;
try w.print("{s}", .{pp.tokSlice(tok)});
},
}
if (tok.id == .identifier or tok.id == .extended_identifier) {
if (parts == .name_only) break;
saw_name = true;
}
}
}
fn prettyPrintMacrosOnly(pp: *Preprocessor, w: *std.Io.Writer) !void {
for (pp.defines.values()) |macro| {
if (macro.is_builtin) continue;
try w.writeAll("#define ");
try pp.prettyPrintMacro(w, macro.loc, .name_and_body);
try w.writeByte('\n');
}
}
/// Pretty print tokens and try to preserve whitespace.
pub fn prettyPrintTokens(pp: *Preprocessor, w: *std.Io.Writer, macro_dump_mode: DumpMode) !void {
if (macro_dump_mode == .macros_only) {
return pp.prettyPrintMacrosOnly(w);
}
const tok_ids = pp.tokens.items(.id);
var i: u32 = 0;
var last_nl = true;
outer: while (true) : (i += 1) {
var cur: Token = pp.tokens.get(i);
switch (cur.id) {
.eof => {
if (!last_nl) try w.writeByte('\n');
try w.flush();
return;
},
.nl => {
var newlines: u32 = 0;
for (tok_ids[i..], i..) |id, j| {
if (id == .nl) {
newlines += 1;
} else if (id == .eof) {
if (!last_nl) try w.writeByte('\n');
try w.flush();
return;
} else if (id != .whitespace) {
if (pp.linemarkers == .none) {
if (newlines < 2) break;
} else if (newlines < collapse_newlines) {
break;
}
i = @intCast((j - 1) - @intFromBool(tok_ids[j - 1] == .whitespace));
if (!last_nl) try w.writeAll("\n");
if (pp.linemarkers != .none) {
const next = pp.tokens.get(i);
const source = pp.comp.getSource(next.loc.id);
const line_col = source.lineCol(next.loc);
try pp.printLinemarker(w, line_col.line_no, source, .none);
last_nl = true;
}
continue :outer;
}
}
last_nl = true;
try w.writeAll("\n");
},
.keyword_pragma => {
const pragma_name = pp.expandedSlice(pp.tokens.get(i + 1));
const end_idx = mem.indexOfScalarPos(Token.Id, tok_ids, i, .nl) orelse i + 1;
const pragma_len = @as(u32, @intCast(end_idx)) - i;
if (pp.comp.getPragma(pragma_name)) |prag| {
if (!prag.shouldPreserveTokens(pp, i + 1)) {
try w.writeByte('\n');
i += pragma_len;
cur = pp.tokens.get(i);
continue;
}
}
try w.writeAll("#pragma");
i += 1;
while (true) : (i += 1) {
cur = pp.tokens.get(i);
if (cur.id == .nl) {
try w.writeByte('\n');
last_nl = true;
break;
}
try w.writeByte(' ');
const slice = pp.expandedSlice(cur);
try w.writeAll(slice);
}
},
.whitespace => {
var slice = pp.expandedSlice(cur);
while (mem.indexOfScalar(u8, slice, '\n')) |some| {
if (pp.linemarkers != .none) try w.writeByte('\n');
slice = slice[some + 1 ..];
}
for (slice) |_| try w.writeByte(' ');
last_nl = false;
},
.include_start => {
const source = pp.comp.getSource(cur.loc.id);
try pp.printLinemarker(w, 1, source, .start);
last_nl = true;
},
.include_resume => {
const source = pp.comp.getSource(cur.loc.id);
const line_col = source.lineCol(cur.loc);
if (!last_nl) try w.writeAll("\n");
try pp.printLinemarker(w, line_col.line_no, source, .@"resume");
last_nl = true;
},
.keyword_define, .keyword_undef => {
switch (macro_dump_mode) {
.macros_and_result, .macro_names_and_result => {
try w.writeByte('#');
try pp.prettyPrintMacro(w, cur.loc, if (macro_dump_mode == .macros_and_result) .name_and_body else .name_only);
last_nl = false;
},
.result_only => unreachable, // `pp.store_macro_tokens` should be false for standard preprocessor output
.macros_only => unreachable, // handled by prettyPrintMacrosOnly
}
},
else => {
const slice = pp.expandedSlice(cur);
try w.writeAll(slice);
last_nl = false;
},
}
}
}
/// Like `std.zig.fmtEscapes`, but for C strings. Hex escapes are used for any
/// non-ASCII/unprintable bytes to ensure that the string bytes do not change if
/// the encoding of the file is not UTF-8.
fn fmtEscapes(bytes: []const u8) FmtEscapes {
return .{ .bytes = bytes };
}
const FmtEscapes = struct {
bytes: []const u8,
pub fn format(ctx: FmtEscapes, w: *std.Io.Writer) !void {
for (ctx.bytes) |byte| switch (byte) {
'\n' => try w.writeAll("\\n"),
'\r' => try w.writeAll("\\r"),
'\t' => try w.writeAll("\\t"),
'\\' => try w.writeAll("\\\\"),
'"' => try w.writeAll("\\\""),
' ', '!', '#'...'&', '('...'[', ']'...'~' => try w.writeByte(byte),
// Use hex escapes for any non-ASCII/unprintable characters.
// This ensures that the parsed version of this string will end up
// containing the same bytes as the input regardless of encoding.
else => try w.print("\\x{x:0>2}", .{byte}),
};
}
};
test "Preserve pragma tokens sometimes" {
const gpa = std.testing.allocator;
const Test = struct {
fn runPreprocessor(source_text: []const u8) ![]const u8 {
var arena: std.heap.ArenaAllocator = .init(gpa);
defer arena.deinit();
var diagnostics: Diagnostics = .{ .output = .ignore };
var comp = Compilation.init(gpa, arena.allocator(), &diagnostics, std.fs.cwd());
defer comp.deinit();
try comp.addDefaultPragmaHandlers();
var pp = Preprocessor.init(&comp, .default);
defer pp.deinit();
pp.preserve_whitespace = true;
assert(pp.linemarkers == .none);
const test_runner_macros = try comp.addSourceFromBuffer("<test_runner>", source_text);
const eof = try pp.preprocess(test_runner_macros);
try pp.addToken(eof);
var allocating: std.Io.Writer.Allocating = .init(gpa);
defer allocating.deinit();
try pp.prettyPrintTokens(&allocating.writer, .result_only);
return allocating.toOwnedSlice();
}
fn check(source_text: []const u8, expected: []const u8) !void {
const output = try runPreprocessor(source_text);
defer gpa.free(output);
try std.testing.expectEqualStrings(expected, output);
}
};
const preserve_gcc_diagnostic =
\\#pragma GCC diagnostic error "-Wnewline-eof"
\\#pragma GCC warning error "-Wnewline-eof"
\\int x;
\\#pragma GCC ignored error "-Wnewline-eof"
\\
;
try Test.check(preserve_gcc_diagnostic, preserve_gcc_diagnostic);
const omit_once =
\\#pragma once
\\int x;
\\#pragma once
\\
;
// TODO should only be one newline afterwards when emulating clang
try Test.check(omit_once, "\nint x;\n\n");
const omit_poison =
\\#pragma GCC poison foobar
\\
;
try Test.check(omit_poison, "\n");
}
test "destringify" {
const gpa = std.testing.allocator;
const Test = struct {
fn testDestringify(pp: *Preprocessor, stringified: []const u8, destringified: []const u8) !void {
pp.char_buf.clearRetainingCapacity();
try pp.char_buf.ensureUnusedCapacity(stringified.len);
pp.destringify(stringified);
try std.testing.expectEqualStrings(destringified, pp.char_buf.items);
}
};
var arena: std.heap.ArenaAllocator = .init(gpa);
defer arena.deinit();
var diagnostics: Diagnostics = .{ .output = .ignore };
var comp = Compilation.init(gpa, arena.allocator(), &diagnostics, std.fs.cwd());
defer comp.deinit();
var pp = Preprocessor.init(&comp, .default);
defer pp.deinit();
try Test.testDestringify(&pp, "hello\tworld\n", "hello\tworld\n");
try Test.testDestringify(&pp,
\\ \"FOO BAR BAZ\"
,
\\ "FOO BAR BAZ"
);
try Test.testDestringify(&pp,
\\ \\t\\n
\\
,
\\ \t\n
\\
);
}
test "Include guards" {
const Test = struct {
/// This is here so that when #elifdef / #elifndef are added we don't forget
/// to test that they don't accidentally break include guard detection
fn pairsWithIfndef(tok_id: RawToken.Id) bool {
return switch (tok_id) {
.keyword_elif,
.keyword_elifdef,
.keyword_elifndef,
.keyword_else,
=> true,
.keyword_include,
.keyword_include_next,
.keyword_embed,
.keyword_define,
.keyword_defined,
.keyword_undef,
.keyword_ifdef,
.keyword_ifndef,
.keyword_error,
.keyword_warning,
.keyword_pragma,
.keyword_line,
.keyword_endif,
=> false,
else => unreachable,
};
}
fn skippable(tok_id: RawToken.Id) bool {
return switch (tok_id) {
.keyword_defined, .keyword_va_args, .keyword_va_opt, .keyword_endif => true,
else => false,
};
}
fn testIncludeGuard(gpa: std.mem.Allocator, comptime template: []const u8, tok_id: RawToken.Id, expected_guards: u32) !void {
var arena_state: std.heap.ArenaAllocator = .init(gpa);
defer arena_state.deinit();
const arena = arena_state.allocator();
var diagnostics: Diagnostics = .{ .output = .ignore };
var comp = Compilation.init(gpa, arena, &diagnostics, std.fs.cwd());
defer comp.deinit();
var pp = Preprocessor.init(&comp, .default);
defer pp.deinit();
const path = try std.fs.path.join(arena, &.{ ".", "bar.h" });
_ = try comp.addSourceFromBuffer(path, "int bar = 5;\n");
var buf = std.array_list.Managed(u8).init(gpa);
defer buf.deinit();
switch (tok_id) {
.keyword_include, .keyword_include_next => try buf.print(template, .{ tok_id.lexeme().?, " \"bar.h\"" }),
.keyword_define, .keyword_undef => try buf.print(template, .{ tok_id.lexeme().?, " BAR" }),
.keyword_ifndef,
.keyword_ifdef,
.keyword_elifdef,
.keyword_elifndef,
=> try buf.print(template, .{ tok_id.lexeme().?, " BAR\n#endif" }),
else => try buf.print(template, .{ tok_id.lexeme().?, "" }),
}
const source = try comp.addSourceFromBuffer("test.h", buf.items);
_ = try pp.preprocess(source);
try std.testing.expectEqual(expected_guards, pp.include_guards.count());
}
};
const tags = std.meta.tags(RawToken.Id);
for (tags) |tag| {
if (Test.skippable(tag)) continue;
var copy = tag;
copy.simplifyMacroKeyword();
if (copy != tag or tag == .keyword_else) {
const inside_ifndef_template =
\\//Leading comment (should be ignored)
\\
\\#ifndef FOO
\\#{s}{s}
\\#endif
;
const expected_guards: u32 = if (Test.pairsWithIfndef(tag)) 0 else 1;
try Test.testIncludeGuard(std.testing.allocator, inside_ifndef_template, tag, expected_guards);
const outside_ifndef_template =
\\#ifndef FOO
\\#endif
\\#{s}{s}
;
try Test.testIncludeGuard(std.testing.allocator, outside_ifndef_template, tag, 0);
}
}
}