mirror of
https://codeberg.org/ziglang/zig.git
synced 2025-12-06 13:54:21 +00:00
3781 lines
152 KiB
Zig
Vendored
3781 lines
152 KiB
Zig
Vendored
const std = @import("std");
|
|
const mem = std.mem;
|
|
const Allocator = mem.Allocator;
|
|
const assert = std.debug.assert;
|
|
|
|
const Attribute = @import("Attribute.zig");
|
|
const Compilation = @import("Compilation.zig");
|
|
const Error = Compilation.Error;
|
|
const Diagnostics = @import("Diagnostics.zig");
|
|
const DepFile = @import("DepFile.zig");
|
|
const features = @import("features.zig");
|
|
const Hideset = @import("Hideset.zig");
|
|
const Parser = @import("Parser.zig");
|
|
const Source = @import("Source.zig");
|
|
const text_literal = @import("text_literal.zig");
|
|
const Tokenizer = @import("Tokenizer.zig");
|
|
const RawToken = Tokenizer.Token;
|
|
const SourceEpoch = Compilation.Environment.SourceEpoch;
|
|
const Tree = @import("Tree.zig");
|
|
const Token = Tree.Token;
|
|
const TokenWithExpansionLocs = Tree.TokenWithExpansionLocs;
|
|
|
|
const DefineMap = std.StringArrayHashMapUnmanaged(Macro);
|
|
const RawTokenList = std.array_list.Managed(RawToken);
|
|
const max_include_depth = 200;
|
|
|
|
/// Errors that can be returned when expanding a macro.
|
|
/// error.UnknownPragma can occur within Preprocessor.pragma() but
|
|
/// it is handled there and doesn't escape that function
|
|
const MacroError = Error || error{StopPreprocessing};
|
|
|
|
const IfContext = struct {
|
|
const Backing = u2;
|
|
const Nesting = enum(Backing) {
|
|
until_else,
|
|
until_endif,
|
|
until_endif_seen_else,
|
|
};
|
|
|
|
const buf_size_bits = @bitSizeOf(Backing) * 256;
|
|
kind: [buf_size_bits / std.mem.byte_size_in_bits]u8,
|
|
level: u8,
|
|
|
|
fn get(self: *const IfContext) Nesting {
|
|
return @enumFromInt(std.mem.readPackedIntNative(Backing, &self.kind, @as(usize, self.level) * 2));
|
|
}
|
|
|
|
fn set(self: *IfContext, context: Nesting) void {
|
|
std.mem.writePackedIntNative(Backing, &self.kind, @as(usize, self.level) * 2, @intFromEnum(context));
|
|
}
|
|
|
|
fn increment(self: *IfContext) bool {
|
|
self.level, const overflowed = @addWithOverflow(self.level, 1);
|
|
return overflowed != 0;
|
|
}
|
|
|
|
fn decrement(self: *IfContext) void {
|
|
self.level -= 1;
|
|
}
|
|
|
|
/// Initialize `kind` to an invalid value since it is an error to read the kind before setting it.
|
|
/// Doing so will trigger safety-checked undefined behavior in `IfContext.get`
|
|
const default: IfContext = .{ .kind = @splat(0xFF), .level = 0 };
|
|
};
|
|
|
|
pub const Macro = struct {
|
|
/// Parameters of the function type macro
|
|
params: []const []const u8,
|
|
|
|
/// Token constituting the macro body
|
|
tokens: []const RawToken,
|
|
|
|
/// If the function type macro has variable number of arguments
|
|
var_args: bool,
|
|
|
|
/// Is a function type macro
|
|
is_func: bool,
|
|
|
|
/// Is a predefined macro
|
|
is_builtin: bool = false,
|
|
|
|
/// Location of macro in the source
|
|
loc: Source.Location,
|
|
|
|
fn eql(a: Macro, b: Macro, pp: *Preprocessor) bool {
|
|
if (a.tokens.len != b.tokens.len) return false;
|
|
if (a.is_builtin != b.is_builtin) return false;
|
|
for (a.tokens, b.tokens) |a_tok, b_tok| if (!tokEql(pp, a_tok, b_tok)) return false;
|
|
|
|
if (a.is_func and b.is_func) {
|
|
if (a.var_args != b.var_args) return false;
|
|
if (a.params.len != b.params.len) return false;
|
|
for (a.params, b.params) |a_param, b_param| if (!mem.eql(u8, a_param, b_param)) return false;
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
fn tokEql(pp: *Preprocessor, a: RawToken, b: RawToken) bool {
|
|
return mem.eql(u8, pp.tokSlice(a), pp.tokSlice(b));
|
|
}
|
|
};
|
|
|
|
const Preprocessor = @This();
|
|
|
|
const ExpansionEntry = struct {
|
|
idx: Tree.TokenIndex,
|
|
locs: [*]Source.Location,
|
|
};
|
|
|
|
const TokenState = struct {
|
|
tokens_len: usize,
|
|
expansion_entries_len: usize,
|
|
};
|
|
|
|
comp: *Compilation,
|
|
diagnostics: *Diagnostics,
|
|
gpa: mem.Allocator,
|
|
|
|
arena: std.heap.ArenaAllocator,
|
|
defines: DefineMap = .{},
|
|
/// Do not directly mutate this; use addToken / addTokenAssumeCapacity / ensureTotalTokenCapacity / ensureUnusedTokenCapacity
|
|
tokens: Token.List = .{},
|
|
/// Do not directly mutate this; must be kept in sync with `tokens`
|
|
expansion_entries: std.MultiArrayList(ExpansionEntry) = .{},
|
|
token_buf: RawTokenList,
|
|
char_buf: std.array_list.Managed(u8),
|
|
/// Counter that is incremented each time preprocess() is called
|
|
/// Can be used to distinguish multiple preprocessings of the same file
|
|
preprocess_count: u32 = 0,
|
|
generated_line: u32 = 1,
|
|
add_expansion_nl: u32 = 0,
|
|
include_depth: u8 = 0,
|
|
counter: u32 = 0,
|
|
expansion_source_loc: Source.Location = undefined,
|
|
poisoned_identifiers: std.StringHashMap(void),
|
|
/// Map from Source.Id to macro name in the `#ifndef` condition which guards the source, if any
|
|
include_guards: std.AutoHashMapUnmanaged(Source.Id, []const u8) = .{},
|
|
|
|
/// Store `keyword_define` and `keyword_undef` tokens.
|
|
/// Used to implement preprocessor debug dump options
|
|
/// Must be false unless in -E mode (parser does not handle those token types)
|
|
store_macro_tokens: bool = false,
|
|
|
|
/// Memory is retained to avoid allocation on every single token.
|
|
top_expansion_buf: ExpandBuf,
|
|
|
|
/// Dump current state to stderr.
|
|
verbose: bool = false,
|
|
preserve_whitespace: bool = false,
|
|
|
|
/// linemarker tokens. Must be .none unless in -E mode (parser does not handle linemarkers)
|
|
linemarkers: Linemarkers = .none,
|
|
|
|
hideset: Hideset,
|
|
|
|
/// Epoch used for __DATE__, __TIME__, and possibly __TIMESTAMP__
|
|
source_epoch: SourceEpoch,
|
|
m_times: std.AutoHashMapUnmanaged(Source.Id, u64) = .{},
|
|
|
|
/// The dependency file tracking all includes and embeds.
|
|
dep_file: ?*DepFile = null,
|
|
|
|
pub const parse = Parser.parse;
|
|
|
|
pub const Linemarkers = enum {
|
|
/// No linemarker tokens. Required setting if parser will run
|
|
none,
|
|
/// #line <num> "filename"
|
|
line_directives,
|
|
/// # <num> "filename" flags
|
|
numeric_directives,
|
|
};
|
|
|
|
pub fn init(comp: *Compilation, source_epoch: SourceEpoch) Preprocessor {
|
|
const pp: Preprocessor = .{
|
|
.comp = comp,
|
|
.diagnostics = comp.diagnostics,
|
|
.gpa = comp.gpa,
|
|
.arena = std.heap.ArenaAllocator.init(comp.gpa),
|
|
.token_buf = RawTokenList.init(comp.gpa),
|
|
.char_buf = std.array_list.Managed(u8).init(comp.gpa),
|
|
.poisoned_identifiers = std.StringHashMap(void).init(comp.gpa),
|
|
.top_expansion_buf = ExpandBuf.init(comp.gpa),
|
|
.hideset = .{ .comp = comp },
|
|
.source_epoch = source_epoch,
|
|
};
|
|
comp.pragmaEvent(.before_preprocess);
|
|
return pp;
|
|
}
|
|
|
|
/// Initialize Preprocessor with builtin macros.
|
|
pub fn initDefault(comp: *Compilation) !Preprocessor {
|
|
const source_epoch: SourceEpoch = comp.environment.sourceEpoch() catch |er| switch (er) {
|
|
error.InvalidEpoch => blk: {
|
|
const diagnostic: Diagnostic = .invalid_source_epoch;
|
|
try comp.diagnostics.add(.{ .text = diagnostic.fmt, .kind = diagnostic.kind, .opt = diagnostic.opt, .location = null });
|
|
break :blk .default;
|
|
},
|
|
};
|
|
|
|
var pp = init(comp, source_epoch);
|
|
errdefer pp.deinit();
|
|
try pp.addBuiltinMacros();
|
|
return pp;
|
|
}
|
|
|
|
// `param_tok_id` is comptime so that the generated `tokens` list is unique for every macro.
|
|
fn addBuiltinMacro(pp: *Preprocessor, name: []const u8, is_func: bool, comptime param_tok_id: Token.Id) !void {
|
|
try pp.defines.putNoClobber(pp.gpa, name, .{
|
|
.params = &[1][]const u8{"X"},
|
|
.tokens = &[1]RawToken{.{
|
|
.id = param_tok_id,
|
|
.source = .generated,
|
|
}},
|
|
.var_args = false,
|
|
.is_func = is_func,
|
|
.loc = .{ .id = .generated },
|
|
.is_builtin = true,
|
|
});
|
|
}
|
|
|
|
pub fn addBuiltinMacros(pp: *Preprocessor) !void {
|
|
try pp.addBuiltinMacro("__has_attribute", true, .macro_param_has_attribute);
|
|
try pp.addBuiltinMacro("__has_c_attribute", true, .macro_param_has_c_attribute);
|
|
try pp.addBuiltinMacro("__has_declspec_attribute", true, .macro_param_has_declspec_attribute);
|
|
try pp.addBuiltinMacro("__has_warning", true, .macro_param_has_warning);
|
|
try pp.addBuiltinMacro("__has_feature", true, .macro_param_has_feature);
|
|
try pp.addBuiltinMacro("__has_extension", true, .macro_param_has_extension);
|
|
try pp.addBuiltinMacro("__has_builtin", true, .macro_param_has_builtin);
|
|
try pp.addBuiltinMacro("__has_include", true, .macro_param_has_include);
|
|
try pp.addBuiltinMacro("__has_include_next", true, .macro_param_has_include_next);
|
|
try pp.addBuiltinMacro("__has_embed", true, .macro_param_has_embed);
|
|
try pp.addBuiltinMacro("__is_identifier", true, .macro_param_is_identifier);
|
|
try pp.addBuiltinMacro("_Pragma", true, .macro_param_pragma_operator);
|
|
|
|
if (pp.comp.langopts.ms_extensions) {
|
|
try pp.addBuiltinMacro("__identifier", true, .macro_param_ms_identifier);
|
|
try pp.addBuiltinMacro("__pragma", true, .macro_param_ms_pragma);
|
|
}
|
|
|
|
try pp.addBuiltinMacro("__FILE__", false, .macro_file);
|
|
try pp.addBuiltinMacro("__LINE__", false, .macro_line);
|
|
try pp.addBuiltinMacro("__COUNTER__", false, .macro_counter);
|
|
try pp.addBuiltinMacro("__DATE__", false, .macro_date);
|
|
try pp.addBuiltinMacro("__TIME__", false, .macro_time);
|
|
try pp.addBuiltinMacro("__TIMESTAMP__", false, .macro_timestamp);
|
|
}
|
|
|
|
pub fn deinit(pp: *Preprocessor) void {
|
|
pp.defines.deinit(pp.gpa);
|
|
pp.tokens.deinit(pp.gpa);
|
|
pp.arena.deinit();
|
|
pp.token_buf.deinit();
|
|
pp.char_buf.deinit();
|
|
pp.poisoned_identifiers.deinit();
|
|
pp.include_guards.deinit(pp.gpa);
|
|
pp.top_expansion_buf.deinit();
|
|
pp.hideset.deinit();
|
|
for (pp.expansion_entries.items(.locs)) |locs| TokenWithExpansionLocs.free(locs, pp.gpa);
|
|
pp.expansion_entries.deinit(pp.gpa);
|
|
pp.m_times.deinit(pp.gpa);
|
|
}
|
|
|
|
/// Free buffers that are not needed after preprocessing
|
|
fn clearBuffers(pp: *Preprocessor) void {
|
|
pp.token_buf.clearAndFree();
|
|
pp.char_buf.clearAndFree();
|
|
pp.top_expansion_buf.clearAndFree();
|
|
pp.hideset.clearAndFree();
|
|
}
|
|
|
|
fn mTime(pp: *Preprocessor, source_id: Source.Id) !u64 {
|
|
const gop = try pp.m_times.getOrPut(pp.gpa, source_id);
|
|
if (!gop.found_existing) {
|
|
gop.value_ptr.* = pp.comp.getSourceMTimeUncached(source_id) orelse 0;
|
|
}
|
|
return gop.value_ptr.*;
|
|
}
|
|
|
|
pub fn expansionSlice(pp: *Preprocessor, tok: Tree.TokenIndex) []Source.Location {
|
|
const S = struct {
|
|
fn orderTokenIndex(context: Tree.TokenIndex, item: Tree.TokenIndex) std.math.Order {
|
|
return std.math.order(context, item);
|
|
}
|
|
};
|
|
|
|
const indices = pp.expansion_entries.items(.idx);
|
|
const idx = std.sort.binarySearch(Tree.TokenIndex, indices, tok, S.orderTokenIndex) orelse return &.{};
|
|
const locs = pp.expansion_entries.items(.locs)[idx];
|
|
var i: usize = 0;
|
|
while (locs[i].id != .unused) : (i += 1) {}
|
|
return locs[0..i];
|
|
}
|
|
|
|
/// Preprocess a compilation unit of sources into a parsable list of tokens.
|
|
pub fn preprocessSources(pp: *Preprocessor, sources: []const Source) Error!void {
|
|
assert(sources.len > 1);
|
|
const first = sources[0];
|
|
try pp.addIncludeStart(first);
|
|
for (sources[1..]) |header| {
|
|
try pp.addIncludeStart(header);
|
|
_ = try pp.preprocess(header);
|
|
}
|
|
try pp.addIncludeResume(first.id, 0, 1);
|
|
const eof = try pp.preprocess(first);
|
|
try pp.addToken(eof);
|
|
pp.clearBuffers();
|
|
}
|
|
|
|
/// Preprocess a source file, returns eof token.
|
|
pub fn preprocess(pp: *Preprocessor, source: Source) Error!TokenWithExpansionLocs {
|
|
const eof = pp.preprocessExtra(source) catch |er| switch (er) {
|
|
// This cannot occur in the main file and is handled in `include`.
|
|
error.StopPreprocessing => unreachable,
|
|
else => |e| return e,
|
|
};
|
|
try eof.checkMsEof(source, pp.comp);
|
|
return eof;
|
|
}
|
|
|
|
/// Tokenize a file without any preprocessing, returns eof token.
|
|
pub fn tokenize(pp: *Preprocessor, source: Source) Error!Token {
|
|
assert(pp.linemarkers == .none);
|
|
assert(pp.preserve_whitespace == false);
|
|
var tokenizer = Tokenizer{
|
|
.buf = source.buf,
|
|
.comp = pp.comp,
|
|
.source = source.id,
|
|
};
|
|
|
|
// Estimate how many new tokens this source will contain.
|
|
const estimated_token_count = source.buf.len / 8;
|
|
try pp.ensureTotalTokenCapacity(pp.tokens.len + estimated_token_count);
|
|
|
|
while (true) {
|
|
const tok = tokenizer.next();
|
|
if (tok.id == .eof) return tokFromRaw(tok);
|
|
try pp.addToken(tokFromRaw(tok));
|
|
}
|
|
}
|
|
|
|
pub fn addIncludeStart(pp: *Preprocessor, source: Source) !void {
|
|
if (pp.linemarkers == .none) return;
|
|
try pp.addToken(.{ .id = .include_start, .loc = .{
|
|
.id = source.id,
|
|
.byte_offset = std.math.maxInt(u32),
|
|
.line = 1,
|
|
} });
|
|
}
|
|
|
|
pub fn addIncludeResume(pp: *Preprocessor, source: Source.Id, offset: u32, line: u32) !void {
|
|
if (pp.linemarkers == .none) return;
|
|
try pp.addToken(.{ .id = .include_resume, .loc = .{
|
|
.id = source,
|
|
.byte_offset = offset,
|
|
.line = line,
|
|
} });
|
|
}
|
|
|
|
fn invalidTokenDiagnostic(tok_id: Token.Id) Diagnostic {
|
|
return switch (tok_id) {
|
|
.unterminated_string_literal => .unterminated_string_literal_warning,
|
|
.empty_char_literal => .empty_char_literal_warning,
|
|
.unterminated_char_literal => .unterminated_char_literal_warning,
|
|
else => unreachable,
|
|
};
|
|
}
|
|
|
|
/// Return the name of the #ifndef guard macro that starts a source, if any.
|
|
fn findIncludeGuard(pp: *Preprocessor, source: Source) ?[]const u8 {
|
|
var tokenizer = Tokenizer{
|
|
.buf = source.buf,
|
|
.langopts = pp.comp.langopts,
|
|
.source = source.id,
|
|
};
|
|
var hash = tokenizer.nextNoWS();
|
|
while (hash.id == .nl) hash = tokenizer.nextNoWS();
|
|
if (hash.id != .hash) return null;
|
|
const ifndef = tokenizer.nextNoWS();
|
|
if (ifndef.id != .keyword_ifndef) return null;
|
|
const guard = tokenizer.nextNoWS();
|
|
if (guard.id != .identifier) return null;
|
|
return pp.tokSlice(guard);
|
|
}
|
|
|
|
fn preprocessExtra(pp: *Preprocessor, source: Source) MacroError!TokenWithExpansionLocs {
|
|
var guard_name = pp.findIncludeGuard(source);
|
|
|
|
pp.preprocess_count += 1;
|
|
var tokenizer = Tokenizer{
|
|
.buf = source.buf,
|
|
.langopts = pp.comp.langopts,
|
|
.source = source.id,
|
|
};
|
|
|
|
// Estimate how many new tokens this source will contain.
|
|
const estimated_token_count = source.buf.len / 8;
|
|
try pp.ensureTotalTokenCapacity(pp.tokens.len + estimated_token_count);
|
|
|
|
var if_context: IfContext = .default;
|
|
|
|
var start_of_line = true;
|
|
while (true) {
|
|
var tok = tokenizer.next();
|
|
switch (tok.id) {
|
|
.hash => if (!start_of_line) try pp.addToken(tokFromRaw(tok)) else {
|
|
const directive = tokenizer.nextNoWS();
|
|
const directive_loc: Source.Location = .{ .id = tok.source, .byte_offset = directive.start, .line = directive.line };
|
|
switch (directive.id) {
|
|
.keyword_error, .keyword_warning => {
|
|
// #error tokens..
|
|
pp.top_expansion_buf.items.len = 0;
|
|
const char_top = pp.char_buf.items.len;
|
|
defer pp.char_buf.items.len = char_top;
|
|
|
|
while (true) {
|
|
tok = tokenizer.next();
|
|
if (tok.id == .nl or tok.id == .eof) break;
|
|
if (tok.id == .whitespace) tok.id = .macro_ws;
|
|
try pp.top_expansion_buf.append(tokFromRaw(tok));
|
|
}
|
|
try pp.stringify(pp.top_expansion_buf.items);
|
|
const slice = pp.char_buf.items[char_top + 1 .. pp.char_buf.items.len - 2];
|
|
|
|
try pp.err(
|
|
directive_loc,
|
|
if (directive.id == .keyword_error) .error_directive else .warning_directive,
|
|
.{slice},
|
|
);
|
|
},
|
|
.keyword_if => {
|
|
const overflowed = if_context.increment();
|
|
if (overflowed)
|
|
return pp.fatal(directive, "too many #if nestings", .{});
|
|
|
|
if (try pp.expr(&tokenizer)) {
|
|
if_context.set(.until_endif);
|
|
if (pp.verbose) {
|
|
pp.verboseLog(directive, "entering then branch of #if", .{});
|
|
}
|
|
} else {
|
|
if_context.set(.until_else);
|
|
try pp.skip(&tokenizer, .until_else);
|
|
if (pp.verbose) {
|
|
pp.verboseLog(directive, "entering else branch of #if", .{});
|
|
}
|
|
}
|
|
},
|
|
.keyword_ifdef => {
|
|
const overflowed = if_context.increment();
|
|
if (overflowed)
|
|
return pp.fatal(directive, "too many #if nestings", .{});
|
|
|
|
const macro_name = (try pp.expectMacroName(&tokenizer)) orelse continue;
|
|
try pp.expectNl(&tokenizer);
|
|
if (pp.defines.get(macro_name) != null) {
|
|
if_context.set(.until_endif);
|
|
if (pp.verbose) {
|
|
pp.verboseLog(directive, "entering then branch of #ifdef", .{});
|
|
}
|
|
} else {
|
|
if_context.set(.until_else);
|
|
try pp.skip(&tokenizer, .until_else);
|
|
if (pp.verbose) {
|
|
pp.verboseLog(directive, "entering else branch of #ifdef", .{});
|
|
}
|
|
}
|
|
},
|
|
.keyword_ifndef => {
|
|
const overflowed = if_context.increment();
|
|
if (overflowed)
|
|
return pp.fatal(directive, "too many #if nestings", .{});
|
|
|
|
const macro_name = (try pp.expectMacroName(&tokenizer)) orelse continue;
|
|
try pp.expectNl(&tokenizer);
|
|
if (pp.defines.get(macro_name) == null) {
|
|
if_context.set(.until_endif);
|
|
} else {
|
|
if_context.set(.until_else);
|
|
try pp.skip(&tokenizer, .until_else);
|
|
}
|
|
},
|
|
.keyword_elif => {
|
|
if (if_context.level == 0) {
|
|
try pp.err(directive, .elif_without_if, .{});
|
|
_ = if_context.increment();
|
|
if_context.set(.until_else);
|
|
} else if (if_context.level == 1) {
|
|
guard_name = null;
|
|
}
|
|
switch (if_context.get()) {
|
|
.until_else => if (try pp.expr(&tokenizer)) {
|
|
if_context.set(.until_endif);
|
|
if (pp.verbose) {
|
|
pp.verboseLog(directive, "entering then branch of #elif", .{});
|
|
}
|
|
} else {
|
|
try pp.skip(&tokenizer, .until_else);
|
|
if (pp.verbose) {
|
|
pp.verboseLog(directive, "entering else branch of #elif", .{});
|
|
}
|
|
},
|
|
.until_endif => try pp.skip(&tokenizer, .until_endif),
|
|
.until_endif_seen_else => {
|
|
try pp.err(directive, .elif_after_else, .{});
|
|
skipToNl(&tokenizer);
|
|
},
|
|
}
|
|
},
|
|
.keyword_elifdef => {
|
|
if (if_context.level == 0) {
|
|
try pp.err(directive, .elifdef_without_if, .{});
|
|
_ = if_context.increment();
|
|
if_context.set(.until_else);
|
|
} else if (if_context.level == 1) {
|
|
guard_name = null;
|
|
}
|
|
switch (if_context.get()) {
|
|
.until_else => {
|
|
const macro_name = try pp.expectMacroName(&tokenizer);
|
|
if (macro_name == null) {
|
|
if_context.set(.until_else);
|
|
try pp.skip(&tokenizer, .until_else);
|
|
if (pp.verbose) {
|
|
pp.verboseLog(directive, "entering else branch of #elifdef", .{});
|
|
}
|
|
} else {
|
|
try pp.expectNl(&tokenizer);
|
|
if (pp.defines.get(macro_name.?) != null) {
|
|
if_context.set(.until_endif);
|
|
if (pp.verbose) {
|
|
pp.verboseLog(directive, "entering then branch of #elifdef", .{});
|
|
}
|
|
} else {
|
|
if_context.set(.until_else);
|
|
try pp.skip(&tokenizer, .until_else);
|
|
if (pp.verbose) {
|
|
pp.verboseLog(directive, "entering else branch of #elifdef", .{});
|
|
}
|
|
}
|
|
}
|
|
},
|
|
.until_endif => try pp.skip(&tokenizer, .until_endif),
|
|
.until_endif_seen_else => {
|
|
try pp.err(directive, .elifdef_after_else, .{});
|
|
skipToNl(&tokenizer);
|
|
},
|
|
}
|
|
},
|
|
.keyword_elifndef => {
|
|
if (if_context.level == 0) {
|
|
try pp.err(directive, .elifndef_without_if, .{});
|
|
_ = if_context.increment();
|
|
if_context.set(.until_else);
|
|
} else if (if_context.level == 1) {
|
|
guard_name = null;
|
|
}
|
|
switch (if_context.get()) {
|
|
.until_else => {
|
|
const macro_name = try pp.expectMacroName(&tokenizer);
|
|
if (macro_name == null) {
|
|
if_context.set(.until_else);
|
|
try pp.skip(&tokenizer, .until_else);
|
|
if (pp.verbose) {
|
|
pp.verboseLog(directive, "entering else branch of #elifndef", .{});
|
|
}
|
|
} else {
|
|
try pp.expectNl(&tokenizer);
|
|
if (pp.defines.get(macro_name.?) == null) {
|
|
if_context.set(.until_endif);
|
|
if (pp.verbose) {
|
|
pp.verboseLog(directive, "entering then branch of #elifndef", .{});
|
|
}
|
|
} else {
|
|
if_context.set(.until_else);
|
|
try pp.skip(&tokenizer, .until_else);
|
|
if (pp.verbose) {
|
|
pp.verboseLog(directive, "entering else branch of #elifndef", .{});
|
|
}
|
|
}
|
|
}
|
|
},
|
|
.until_endif => try pp.skip(&tokenizer, .until_endif),
|
|
.until_endif_seen_else => {
|
|
try pp.err(directive, .elifdef_after_else, .{});
|
|
skipToNl(&tokenizer);
|
|
},
|
|
}
|
|
},
|
|
.keyword_else => {
|
|
try pp.expectNl(&tokenizer);
|
|
if (if_context.level == 0) {
|
|
try pp.err(directive, .else_without_if, .{});
|
|
continue;
|
|
} else if (if_context.level == 1) {
|
|
guard_name = null;
|
|
}
|
|
switch (if_context.get()) {
|
|
.until_else => {
|
|
if_context.set(.until_endif_seen_else);
|
|
if (pp.verbose) {
|
|
pp.verboseLog(directive, "#else branch here", .{});
|
|
}
|
|
},
|
|
.until_endif => try pp.skip(&tokenizer, .until_endif_seen_else),
|
|
.until_endif_seen_else => {
|
|
try pp.err(directive, .else_after_else, .{});
|
|
skipToNl(&tokenizer);
|
|
},
|
|
}
|
|
},
|
|
.keyword_endif => {
|
|
try pp.expectNl(&tokenizer);
|
|
if (if_context.level == 0) {
|
|
guard_name = null;
|
|
try pp.err(directive, .endif_without_if, .{});
|
|
continue;
|
|
} else if (if_context.level == 1) {
|
|
const saved_tokenizer = tokenizer;
|
|
defer tokenizer = saved_tokenizer;
|
|
|
|
var next = tokenizer.nextNoWS();
|
|
while (next.id == .nl) : (next = tokenizer.nextNoWS()) {}
|
|
if (next.id != .eof) guard_name = null;
|
|
}
|
|
if_context.decrement();
|
|
},
|
|
.keyword_define => try pp.define(&tokenizer, directive),
|
|
.keyword_undef => {
|
|
const macro_name = (try pp.expectMacroName(&tokenizer)) orelse continue;
|
|
if (pp.store_macro_tokens) {
|
|
try pp.addToken(tokFromRaw(directive));
|
|
}
|
|
|
|
_ = pp.defines.orderedRemove(macro_name);
|
|
try pp.expectNl(&tokenizer);
|
|
},
|
|
.keyword_include => {
|
|
try pp.include(&tokenizer, .first);
|
|
continue;
|
|
},
|
|
.keyword_include_next => {
|
|
try pp.err(directive_loc, .include_next, .{});
|
|
|
|
if (pp.include_depth == 0) {
|
|
try pp.err(directive_loc, .include_next_outside_header, .{});
|
|
try pp.include(&tokenizer, .first);
|
|
} else {
|
|
try pp.include(&tokenizer, .next);
|
|
}
|
|
},
|
|
.keyword_embed => try pp.embed(&tokenizer),
|
|
.keyword_pragma => {
|
|
try pp.pragma(&tokenizer, directive, null, &.{});
|
|
continue;
|
|
},
|
|
.keyword_line => {
|
|
// #line number "file"
|
|
const digits = tokenizer.nextNoWS();
|
|
if (digits.id != .pp_num) try pp.err(digits, .line_simple_digit, .{});
|
|
// TODO: validate that the pp_num token is solely digits
|
|
|
|
if (digits.id == .eof or digits.id == .nl) continue;
|
|
const name = tokenizer.nextNoWS();
|
|
if (name.id == .eof or name.id == .nl) continue;
|
|
if (name.id != .string_literal) try pp.err(name, .line_invalid_filename, .{});
|
|
try pp.expectNl(&tokenizer);
|
|
},
|
|
.pp_num => {
|
|
// # number "file" flags
|
|
// TODO: validate that the pp_num token is solely digits
|
|
// if not, emit `GNU line marker directive requires a simple digit sequence`
|
|
const name = tokenizer.nextNoWS();
|
|
if (name.id == .eof or name.id == .nl) continue;
|
|
if (name.id != .string_literal) try pp.err(name, .line_invalid_filename, .{});
|
|
|
|
const flag_1 = tokenizer.nextNoWS();
|
|
if (flag_1.id == .eof or flag_1.id == .nl) continue;
|
|
const flag_2 = tokenizer.nextNoWS();
|
|
if (flag_2.id == .eof or flag_2.id == .nl) continue;
|
|
const flag_3 = tokenizer.nextNoWS();
|
|
if (flag_3.id == .eof or flag_3.id == .nl) continue;
|
|
const flag_4 = tokenizer.nextNoWS();
|
|
if (flag_4.id == .eof or flag_4.id == .nl) continue;
|
|
try pp.expectNl(&tokenizer);
|
|
},
|
|
.nl => {},
|
|
.eof => {
|
|
if (if_context.level != 0) try pp.err(tok, .unterminated_conditional_directive, .{});
|
|
return tokFromRaw(directive);
|
|
},
|
|
else => {
|
|
try pp.err(tok, .invalid_preprocessing_directive, .{});
|
|
skipToNl(&tokenizer);
|
|
},
|
|
}
|
|
if (pp.preserve_whitespace) {
|
|
tok.id = .nl;
|
|
try pp.addToken(tokFromRaw(tok));
|
|
}
|
|
},
|
|
.whitespace => if (pp.preserve_whitespace) try pp.addToken(tokFromRaw(tok)),
|
|
.nl => {
|
|
start_of_line = true;
|
|
if (pp.preserve_whitespace) try pp.addToken(tokFromRaw(tok));
|
|
},
|
|
.eof => {
|
|
if (if_context.level != 0) try pp.err(tok, .unterminated_conditional_directive, .{});
|
|
// The following check needs to occur here and not at the top of the function
|
|
// because a pragma may change the level during preprocessing
|
|
if (source.buf.len > 0 and source.buf[source.buf.len - 1] != '\n') {
|
|
try pp.err(tok, .newline_eof, .{});
|
|
}
|
|
if (guard_name) |name| {
|
|
if (try pp.include_guards.fetchPut(pp.gpa, source.id, name)) |prev| {
|
|
assert(mem.eql(u8, name, prev.value));
|
|
}
|
|
}
|
|
return tokFromRaw(tok);
|
|
},
|
|
.unterminated_string_literal, .unterminated_char_literal, .empty_char_literal => |tag| {
|
|
start_of_line = false;
|
|
try pp.err(tok, invalidTokenDiagnostic(tag), .{});
|
|
try pp.expandMacro(&tokenizer, tok);
|
|
},
|
|
.unterminated_comment => try pp.err(tok, .unterminated_comment, .{}),
|
|
else => {
|
|
if (tok.id.isMacroIdentifier() and pp.poisoned_identifiers.get(pp.tokSlice(tok)) != null) {
|
|
try pp.err(tok, .poisoned_identifier, .{});
|
|
}
|
|
// Add the token to the buffer doing any necessary expansions.
|
|
start_of_line = false;
|
|
try pp.expandMacro(&tokenizer, tok);
|
|
},
|
|
}
|
|
}
|
|
}
|
|
|
|
/// Get raw token source string.
|
|
/// Returned slice is invalidated when comp.generated_buf is updated.
|
|
pub fn tokSlice(pp: *const Preprocessor, token: anytype) []const u8 {
|
|
if (token.id.lexeme()) |some| return some;
|
|
const source = pp.comp.getSource(token.source);
|
|
return source.buf[token.start..token.end];
|
|
}
|
|
|
|
/// Convert a token from the Tokenizer into a token used by the parser.
|
|
fn tokFromRaw(raw: RawToken) TokenWithExpansionLocs {
|
|
return .{
|
|
.id = raw.id,
|
|
.loc = .{
|
|
.id = raw.source,
|
|
.byte_offset = raw.start,
|
|
.line = raw.line,
|
|
},
|
|
};
|
|
}
|
|
|
|
pub const Diagnostic = @import("Preprocessor/Diagnostic.zig");
|
|
|
|
fn err(pp: *Preprocessor, loc: anytype, diagnostic: Diagnostic, args: anytype) Compilation.Error!void {
|
|
if (pp.diagnostics.effectiveKind(diagnostic) == .off) return;
|
|
|
|
var sf = std.heap.stackFallback(1024, pp.gpa);
|
|
var allocating: std.Io.Writer.Allocating = .init(sf.get());
|
|
defer allocating.deinit();
|
|
|
|
Diagnostics.formatArgs(&allocating.writer, diagnostic.fmt, args) catch return error.OutOfMemory;
|
|
try pp.diagnostics.addWithLocation(pp.comp, .{
|
|
.kind = diagnostic.kind,
|
|
.text = allocating.getWritten(),
|
|
.opt = diagnostic.opt,
|
|
.extension = diagnostic.extension,
|
|
.location = switch (@TypeOf(loc)) {
|
|
RawToken => (Source.Location{
|
|
.id = loc.source,
|
|
.byte_offset = loc.start,
|
|
.line = loc.line,
|
|
}).expand(pp.comp),
|
|
TokenWithExpansionLocs, *TokenWithExpansionLocs => loc.loc.expand(pp.comp),
|
|
Source.Location => loc.expand(pp.comp),
|
|
else => @compileError("invalid token type " ++ @typeName(@TypeOf(loc))),
|
|
},
|
|
}, switch (@TypeOf(loc)) {
|
|
RawToken => &.{},
|
|
TokenWithExpansionLocs, *TokenWithExpansionLocs => loc.expansionSlice(),
|
|
Source.Location => &.{},
|
|
else => @compileError("invalid token type"),
|
|
}, true);
|
|
}
|
|
|
|
fn fatal(pp: *Preprocessor, raw: RawToken, comptime fmt: []const u8, args: anytype) Compilation.Error {
|
|
var sf = std.heap.stackFallback(1024, pp.gpa);
|
|
var allocating: std.Io.Writer.Allocating = .init(sf.get());
|
|
defer allocating.deinit();
|
|
|
|
Diagnostics.formatArgs(&allocating.writer, fmt, args) catch return error.OutOfMemory;
|
|
try pp.diagnostics.add(.{
|
|
.kind = .@"fatal error",
|
|
.text = allocating.getWritten(),
|
|
.location = (Source.Location{
|
|
.id = raw.source,
|
|
.byte_offset = raw.start,
|
|
.line = raw.line,
|
|
}).expand(pp.comp),
|
|
});
|
|
unreachable;
|
|
}
|
|
|
|
fn fatalNotFound(pp: *Preprocessor, tok: TokenWithExpansionLocs, filename: []const u8) Compilation.Error {
|
|
const old = pp.diagnostics.state.fatal_errors;
|
|
pp.diagnostics.state.fatal_errors = true;
|
|
defer pp.diagnostics.state.fatal_errors = old;
|
|
|
|
var sf = std.heap.stackFallback(1024, pp.gpa);
|
|
var buf = std.array_list.Managed(u8).init(sf.get());
|
|
defer buf.deinit();
|
|
|
|
try buf.print("'{s}' not found", .{filename});
|
|
try pp.diagnostics.addWithLocation(pp.comp, .{
|
|
.kind = .@"fatal error",
|
|
.text = buf.items,
|
|
.location = tok.loc.expand(pp.comp),
|
|
}, tok.expansionSlice(), true);
|
|
unreachable; // should've returned FatalError
|
|
}
|
|
|
|
fn verboseLog(pp: *Preprocessor, raw: RawToken, comptime fmt: []const u8, args: anytype) void {
|
|
@branchHint(.cold);
|
|
const source = pp.comp.getSource(raw.source);
|
|
const line_col = source.lineCol(.{ .id = raw.source, .line = raw.line, .byte_offset = raw.start });
|
|
|
|
var stderr_buffer: [64]u8 = undefined;
|
|
var writer = std.debug.lockStderrWriter(&stderr_buffer);
|
|
defer std.debug.unlockStderrWriter();
|
|
writer.print("{s}:{d}:{d}: ", .{ source.path, line_col.line_no, line_col.col }) catch return;
|
|
writer.print(fmt, args) catch return;
|
|
writer.writeByte('\n') catch return;
|
|
writer.writeAll(line_col.line) catch return;
|
|
writer.writeByte('\n') catch return;
|
|
}
|
|
|
|
/// Consume next token, error if it is not an identifier.
|
|
fn expectMacroName(pp: *Preprocessor, tokenizer: *Tokenizer) Error!?[]const u8 {
|
|
const macro_name = tokenizer.nextNoWS();
|
|
if (!macro_name.id.isMacroIdentifier()) {
|
|
try pp.err(macro_name, .macro_name_missing, .{});
|
|
skipToNl(tokenizer);
|
|
return null;
|
|
}
|
|
return pp.tokSlice(macro_name);
|
|
}
|
|
|
|
/// Skip until after a newline, error if extra tokens before it.
|
|
fn expectNl(pp: *Preprocessor, tokenizer: *Tokenizer) Error!void {
|
|
var sent_err = false;
|
|
while (true) {
|
|
const tok = tokenizer.next();
|
|
if (tok.id == .nl or tok.id == .eof) return;
|
|
if (tok.id == .whitespace or tok.id == .comment) continue;
|
|
if (!sent_err) {
|
|
sent_err = true;
|
|
try pp.err(tok, .extra_tokens_directive_end, .{});
|
|
}
|
|
}
|
|
}
|
|
|
|
fn getTokenState(pp: *const Preprocessor) TokenState {
|
|
return .{
|
|
.tokens_len = pp.tokens.len,
|
|
.expansion_entries_len = pp.expansion_entries.len,
|
|
};
|
|
}
|
|
|
|
fn restoreTokenState(pp: *Preprocessor, state: TokenState) void {
|
|
pp.tokens.len = state.tokens_len;
|
|
pp.expansion_entries.len = state.expansion_entries_len;
|
|
}
|
|
|
|
/// Consume all tokens until a newline and parse the result into a boolean.
|
|
fn expr(pp: *Preprocessor, tokenizer: *Tokenizer) MacroError!bool {
|
|
const token_state = pp.getTokenState();
|
|
defer {
|
|
for (pp.top_expansion_buf.items) |tok| TokenWithExpansionLocs.free(tok.expansion_locs, pp.gpa);
|
|
pp.restoreTokenState(token_state);
|
|
}
|
|
|
|
pp.top_expansion_buf.items.len = 0;
|
|
const eof = while (true) {
|
|
const tok = tokenizer.next();
|
|
switch (tok.id) {
|
|
.nl, .eof => break tok,
|
|
.whitespace => if (pp.top_expansion_buf.items.len == 0) continue,
|
|
else => {},
|
|
}
|
|
try pp.top_expansion_buf.append(tokFromRaw(tok));
|
|
} else unreachable;
|
|
if (pp.top_expansion_buf.items.len != 0) {
|
|
pp.expansion_source_loc = pp.top_expansion_buf.items[0].loc;
|
|
pp.hideset.clearRetainingCapacity();
|
|
try pp.expandMacroExhaustive(tokenizer, &pp.top_expansion_buf, 0, pp.top_expansion_buf.items.len, false, .expr);
|
|
}
|
|
for (pp.top_expansion_buf.items) |tok| {
|
|
if (tok.id == .macro_ws) continue;
|
|
if (!tok.id.validPreprocessorExprStart()) {
|
|
try pp.err(tok, .invalid_preproc_expr_start, .{});
|
|
return false;
|
|
}
|
|
break;
|
|
} else {
|
|
try pp.err(eof, .expected_value_in_expr, .{});
|
|
return false;
|
|
}
|
|
|
|
// validate the tokens in the expression
|
|
try pp.ensureUnusedTokenCapacity(pp.top_expansion_buf.items.len);
|
|
var i: usize = 0;
|
|
const items = pp.top_expansion_buf.items;
|
|
while (i < items.len) : (i += 1) {
|
|
var tok = items[i];
|
|
switch (tok.id) {
|
|
.string_literal,
|
|
.string_literal_utf_16,
|
|
.string_literal_utf_8,
|
|
.string_literal_utf_32,
|
|
.string_literal_wide,
|
|
=> {
|
|
try pp.err(tok, .string_literal_in_pp_expr, .{});
|
|
return false;
|
|
},
|
|
.plus_plus,
|
|
.minus_minus,
|
|
.plus_equal,
|
|
.minus_equal,
|
|
.asterisk_equal,
|
|
.slash_equal,
|
|
.percent_equal,
|
|
.angle_bracket_angle_bracket_left_equal,
|
|
.angle_bracket_angle_bracket_right_equal,
|
|
.ampersand_equal,
|
|
.caret_equal,
|
|
.pipe_equal,
|
|
.l_bracket,
|
|
.r_bracket,
|
|
.l_brace,
|
|
.r_brace,
|
|
.ellipsis,
|
|
.semicolon,
|
|
.hash,
|
|
.hash_hash,
|
|
.equal,
|
|
.arrow,
|
|
.period,
|
|
=> {
|
|
try pp.err(tok, .invalid_preproc_operator, .{});
|
|
return false;
|
|
},
|
|
.macro_ws, .whitespace => continue,
|
|
.keyword_false => tok.id = .zero,
|
|
.keyword_true => tok.id = .one,
|
|
else => if (tok.id.isMacroIdentifier()) {
|
|
if (tok.id == .keyword_defined) {
|
|
const tokens_consumed = try pp.handleKeywordDefined(&tok, items[i + 1 ..], eof);
|
|
i += tokens_consumed;
|
|
} else {
|
|
try pp.err(tok, .undefined_macro, .{pp.expandedSlice(tok)});
|
|
|
|
if (i + 1 < pp.top_expansion_buf.items.len and
|
|
pp.top_expansion_buf.items[i + 1].id == .l_paren)
|
|
{
|
|
try pp.err(tok, .fn_macro_undefined, .{pp.expandedSlice(tok)});
|
|
return false;
|
|
}
|
|
|
|
tok.id = .zero; // undefined macro
|
|
}
|
|
},
|
|
}
|
|
pp.addTokenAssumeCapacity(try pp.unescapeUcn(tok));
|
|
}
|
|
try pp.addToken(.{
|
|
.id = .eof,
|
|
.loc = tokFromRaw(eof).loc,
|
|
});
|
|
|
|
// Actually parse it.
|
|
var parser: Parser = .{
|
|
.pp = pp,
|
|
.comp = pp.comp,
|
|
.diagnostics = pp.diagnostics,
|
|
.gpa = pp.gpa,
|
|
.tok_ids = pp.tokens.items(.id),
|
|
.tok_i = @intCast(token_state.tokens_len),
|
|
.in_macro = true,
|
|
.strings = std.array_list.Managed(u8).init(pp.comp.gpa),
|
|
|
|
.tree = undefined,
|
|
.labels = undefined,
|
|
.decl_buf = undefined,
|
|
.list_buf = undefined,
|
|
.param_buf = undefined,
|
|
.enum_buf = undefined,
|
|
.record_buf = undefined,
|
|
.attr_buf = undefined,
|
|
.string_ids = undefined,
|
|
};
|
|
defer parser.strings.deinit();
|
|
return parser.macroExpr();
|
|
}
|
|
|
|
/// Turns macro_tok from .keyword_defined into .zero or .one depending on whether the argument is defined
|
|
/// Returns the number of tokens consumed
|
|
fn handleKeywordDefined(pp: *Preprocessor, macro_tok: *TokenWithExpansionLocs, tokens: []const TokenWithExpansionLocs, eof: RawToken) !usize {
|
|
std.debug.assert(macro_tok.id == .keyword_defined);
|
|
var it = TokenIterator.init(tokens);
|
|
const first = it.nextNoWS() orelse {
|
|
try pp.err(eof, .macro_name_missing, .{});
|
|
return it.i;
|
|
};
|
|
switch (first.id) {
|
|
.l_paren => {},
|
|
else => {
|
|
if (!first.id.isMacroIdentifier()) {
|
|
try pp.err(first, .macro_name_must_be_identifier, .{});
|
|
}
|
|
macro_tok.id = if (pp.defines.contains(pp.expandedSlice(first))) .one else .zero;
|
|
return it.i;
|
|
},
|
|
}
|
|
const second = it.nextNoWS() orelse {
|
|
try pp.err(eof, .macro_name_missing, .{});
|
|
return it.i;
|
|
};
|
|
if (!second.id.isMacroIdentifier()) {
|
|
try pp.err(second, .macro_name_must_be_identifier, .{});
|
|
return it.i;
|
|
}
|
|
macro_tok.id = if (pp.defines.contains(pp.expandedSlice(second))) .one else .zero;
|
|
|
|
const last = it.nextNoWS();
|
|
if (last == null or last.?.id != .r_paren) {
|
|
const tok = last orelse tokFromRaw(eof);
|
|
try pp.err(tok, .closing_paren, .{});
|
|
try pp.err(first, .to_match_paren, .{});
|
|
}
|
|
|
|
return it.i;
|
|
}
|
|
|
|
/// Skip until #else #elif #endif, return last directive token id.
|
|
/// Also skips nested #if ... #endifs.
|
|
fn skip(
|
|
pp: *Preprocessor,
|
|
tokenizer: *Tokenizer,
|
|
cont: enum { until_else, until_endif, until_endif_seen_else },
|
|
) Error!void {
|
|
var ifs_seen: u32 = 0;
|
|
var line_start = true;
|
|
while (tokenizer.index < tokenizer.buf.len) {
|
|
if (line_start) {
|
|
const saved_tokenizer = tokenizer.*;
|
|
const hash = tokenizer.nextNoWS();
|
|
if (hash.id == .nl) continue;
|
|
line_start = false;
|
|
if (hash.id != .hash) continue;
|
|
const directive = tokenizer.nextNoWS();
|
|
switch (directive.id) {
|
|
.keyword_else => {
|
|
if (ifs_seen != 0) continue;
|
|
if (cont == .until_endif_seen_else) {
|
|
try pp.err(directive, .else_after_else, .{});
|
|
continue;
|
|
}
|
|
tokenizer.* = saved_tokenizer;
|
|
return;
|
|
},
|
|
.keyword_elif => {
|
|
if (ifs_seen != 0 or cont == .until_endif) continue;
|
|
if (cont == .until_endif_seen_else) {
|
|
try pp.err(directive, .elif_after_else, .{});
|
|
continue;
|
|
}
|
|
tokenizer.* = saved_tokenizer;
|
|
return;
|
|
},
|
|
.keyword_elifdef => {
|
|
if (ifs_seen != 0 or cont == .until_endif) continue;
|
|
if (cont == .until_endif_seen_else) {
|
|
try pp.err(directive, .elifdef_after_else, .{});
|
|
continue;
|
|
}
|
|
tokenizer.* = saved_tokenizer;
|
|
return;
|
|
},
|
|
.keyword_elifndef => {
|
|
if (ifs_seen != 0 or cont == .until_endif) continue;
|
|
if (cont == .until_endif_seen_else) {
|
|
try pp.err(directive, .elifndef_after_else, .{});
|
|
continue;
|
|
}
|
|
tokenizer.* = saved_tokenizer;
|
|
return;
|
|
},
|
|
.keyword_endif => {
|
|
if (ifs_seen == 0) {
|
|
tokenizer.* = saved_tokenizer;
|
|
return;
|
|
}
|
|
ifs_seen -= 1;
|
|
},
|
|
.keyword_if, .keyword_ifdef, .keyword_ifndef => ifs_seen += 1,
|
|
else => {},
|
|
}
|
|
} else if (tokenizer.buf[tokenizer.index] == '\n') {
|
|
line_start = true;
|
|
tokenizer.index += 1;
|
|
tokenizer.line += 1;
|
|
if (pp.preserve_whitespace) {
|
|
try pp.addToken(.{ .id = .nl, .loc = .{
|
|
.id = tokenizer.source,
|
|
.line = tokenizer.line,
|
|
} });
|
|
}
|
|
} else {
|
|
line_start = false;
|
|
tokenizer.index += 1;
|
|
}
|
|
} else {
|
|
const eof = tokenizer.next();
|
|
return pp.err(eof, .unterminated_conditional_directive, .{});
|
|
}
|
|
}
|
|
|
|
// Skip until newline, ignore other tokens.
|
|
fn skipToNl(tokenizer: *Tokenizer) void {
|
|
while (true) {
|
|
const tok = tokenizer.next();
|
|
if (tok.id == .nl or tok.id == .eof) return;
|
|
}
|
|
}
|
|
|
|
const ExpandBuf = std.array_list.Managed(TokenWithExpansionLocs);
|
|
fn removePlacemarkers(buf: *ExpandBuf) void {
|
|
var i: usize = buf.items.len -% 1;
|
|
while (i < buf.items.len) : (i -%= 1) {
|
|
if (buf.items[i].id == .placemarker) {
|
|
const placemarker = buf.orderedRemove(i);
|
|
TokenWithExpansionLocs.free(placemarker.expansion_locs, buf.allocator);
|
|
}
|
|
}
|
|
}
|
|
|
|
const MacroArguments = std.array_list.Managed([]const TokenWithExpansionLocs);
|
|
fn deinitMacroArguments(allocator: Allocator, args: *const MacroArguments) void {
|
|
for (args.items) |item| {
|
|
for (item) |tok| TokenWithExpansionLocs.free(tok.expansion_locs, allocator);
|
|
allocator.free(item);
|
|
}
|
|
args.deinit();
|
|
}
|
|
|
|
fn expandObjMacro(pp: *Preprocessor, simple_macro: *const Macro) Error!ExpandBuf {
|
|
var buf = ExpandBuf.init(pp.gpa);
|
|
errdefer buf.deinit();
|
|
if (simple_macro.tokens.len == 0) {
|
|
try buf.append(.{ .id = .placemarker, .loc = .{ .id = .generated } });
|
|
return buf;
|
|
}
|
|
try buf.ensureTotalCapacity(simple_macro.tokens.len);
|
|
|
|
// Add all of the simple_macros tokens to the new buffer handling any concats.
|
|
var i: usize = 0;
|
|
while (i < simple_macro.tokens.len) : (i += 1) {
|
|
const raw = simple_macro.tokens[i];
|
|
const tok = tokFromRaw(raw);
|
|
switch (raw.id) {
|
|
.hash_hash => {
|
|
var rhs = tokFromRaw(simple_macro.tokens[i + 1]);
|
|
i += 1;
|
|
while (true) {
|
|
if (rhs.id == .whitespace) {
|
|
rhs = tokFromRaw(simple_macro.tokens[i + 1]);
|
|
i += 1;
|
|
} else if (rhs.id == .comment and !pp.comp.langopts.preserve_comments_in_macros) {
|
|
rhs = tokFromRaw(simple_macro.tokens[i + 1]);
|
|
i += 1;
|
|
} else break;
|
|
}
|
|
try pp.pasteTokens(&buf, &.{rhs});
|
|
},
|
|
.whitespace => if (pp.preserve_whitespace) buf.appendAssumeCapacity(tok),
|
|
.macro_file => {
|
|
const start = pp.comp.generated_buf.items.len;
|
|
const source = pp.comp.getSource(pp.expansion_source_loc.id);
|
|
try pp.comp.generated_buf.print(pp.gpa, "\"{f}\"\n", .{fmtEscapes(source.path)});
|
|
|
|
buf.appendAssumeCapacity(try pp.makeGeneratedToken(start, .string_literal, tok));
|
|
},
|
|
.macro_line => {
|
|
const start = pp.comp.generated_buf.items.len;
|
|
const source = pp.comp.getSource(pp.expansion_source_loc.id);
|
|
try pp.comp.generated_buf.print(pp.gpa, "{d}\n", .{source.physicalLine(pp.expansion_source_loc)});
|
|
|
|
buf.appendAssumeCapacity(try pp.makeGeneratedToken(start, .pp_num, tok));
|
|
},
|
|
.macro_counter => {
|
|
defer pp.counter += 1;
|
|
const start = pp.comp.generated_buf.items.len;
|
|
try pp.comp.generated_buf.print(pp.gpa, "{d}\n", .{pp.counter});
|
|
|
|
buf.appendAssumeCapacity(try pp.makeGeneratedToken(start, .pp_num, tok));
|
|
},
|
|
.macro_date, .macro_time => {
|
|
const start = pp.comp.generated_buf.items.len;
|
|
const timestamp = switch (pp.source_epoch) {
|
|
.system, .provided => |ts| ts,
|
|
};
|
|
try pp.writeDateTimeStamp(.fromTokId(raw.id), timestamp);
|
|
buf.appendAssumeCapacity(try pp.makeGeneratedToken(start, .string_literal, tok));
|
|
},
|
|
.macro_timestamp => {
|
|
const start = pp.comp.generated_buf.items.len;
|
|
const timestamp = switch (pp.source_epoch) {
|
|
.provided => |ts| ts,
|
|
.system => try pp.mTime(pp.expansion_source_loc.id),
|
|
};
|
|
|
|
try pp.writeDateTimeStamp(.fromTokId(raw.id), timestamp);
|
|
buf.appendAssumeCapacity(try pp.makeGeneratedToken(start, .string_literal, tok));
|
|
},
|
|
else => buf.appendAssumeCapacity(tok),
|
|
}
|
|
}
|
|
|
|
return buf;
|
|
}
|
|
|
|
const DateTimeStampKind = enum {
|
|
date,
|
|
time,
|
|
timestamp,
|
|
|
|
fn fromTokId(tok_id: RawToken.Id) DateTimeStampKind {
|
|
return switch (tok_id) {
|
|
.macro_date => .date,
|
|
.macro_time => .time,
|
|
.macro_timestamp => .timestamp,
|
|
else => unreachable,
|
|
};
|
|
}
|
|
};
|
|
|
|
fn writeDateTimeStamp(pp: *Preprocessor, kind: DateTimeStampKind, timestamp: u64) !void {
|
|
std.debug.assert(std.time.epoch.Month.jan.numeric() == 1);
|
|
|
|
const epoch_seconds = std.time.epoch.EpochSeconds{ .secs = timestamp };
|
|
const epoch_day = epoch_seconds.getEpochDay();
|
|
const day_seconds = epoch_seconds.getDaySeconds();
|
|
const year_day = epoch_day.calculateYearDay();
|
|
const month_day = year_day.calculateMonthDay();
|
|
|
|
const day_names = [_][]const u8{ "Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun" };
|
|
const month_names = [_][]const u8{ "Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec" };
|
|
const day_name = day_names[@intCast((epoch_day.day + 3) % 7)];
|
|
const month_name = month_names[month_day.month.numeric() - 1];
|
|
|
|
switch (kind) {
|
|
.date => {
|
|
try pp.comp.generated_buf.print(pp.gpa, "\"{s} {d: >2} {d}\"", .{
|
|
month_name,
|
|
month_day.day_index + 1,
|
|
year_day.year,
|
|
});
|
|
},
|
|
.time => {
|
|
try pp.comp.generated_buf.print(pp.gpa, "\"{d:0>2}:{d:0>2}:{d:0>2}\"", .{
|
|
day_seconds.getHoursIntoDay(),
|
|
day_seconds.getMinutesIntoHour(),
|
|
day_seconds.getSecondsIntoMinute(),
|
|
});
|
|
},
|
|
.timestamp => {
|
|
try pp.comp.generated_buf.print(pp.gpa, "\"{s} {s} {d: >2} {d:0>2}:{d:0>2}:{d:0>2} {d}\"", .{
|
|
day_name,
|
|
month_name,
|
|
month_day.day_index + 1,
|
|
day_seconds.getHoursIntoDay(),
|
|
day_seconds.getMinutesIntoHour(),
|
|
day_seconds.getSecondsIntoMinute(),
|
|
year_day.year,
|
|
});
|
|
},
|
|
}
|
|
}
|
|
|
|
/// Join a possibly-parenthesized series of string literal tokens into a single string without
|
|
/// leading or trailing quotes. The returned slice is invalidated if pp.char_buf changes.
|
|
/// Returns error.ExpectedStringLiteral if parentheses are not balanced, a non-string-literal
|
|
/// is encountered, or if no string literals are encountered
|
|
/// TODO: destringize (replace all '\\' with a single `\` and all '\"' with a '"')
|
|
fn pasteStringsUnsafe(pp: *Preprocessor, toks: []const TokenWithExpansionLocs) ![]const u8 {
|
|
const char_top = pp.char_buf.items.len;
|
|
defer pp.char_buf.items.len = char_top;
|
|
var unwrapped = toks;
|
|
if (toks.len >= 2 and toks[0].id == .l_paren and toks[toks.len - 1].id == .r_paren) {
|
|
unwrapped = toks[1 .. toks.len - 1];
|
|
}
|
|
if (unwrapped.len == 0) return error.ExpectedStringLiteral;
|
|
|
|
for (unwrapped) |tok| {
|
|
if (tok.id == .macro_ws) continue;
|
|
if (tok.id != .string_literal) return error.ExpectedStringLiteral;
|
|
const str = pp.expandedSlice(tok);
|
|
try pp.char_buf.appendSlice(str[1 .. str.len - 1]);
|
|
}
|
|
return pp.char_buf.items[char_top..];
|
|
}
|
|
|
|
/// Handle the _Pragma operator (implemented as a builtin macro)
|
|
fn pragmaOperator(pp: *Preprocessor, arg_tok: TokenWithExpansionLocs, operator_loc: Source.Location) !void {
|
|
const arg_slice = pp.expandedSlice(arg_tok);
|
|
const content = arg_slice[1 .. arg_slice.len - 1];
|
|
const directive = "#pragma ";
|
|
|
|
pp.char_buf.clearRetainingCapacity();
|
|
const total_len = directive.len + content.len + 1; // destringify can never grow the string, + 1 for newline
|
|
try pp.char_buf.ensureUnusedCapacity(total_len);
|
|
pp.char_buf.appendSliceAssumeCapacity(directive);
|
|
pp.destringify(content);
|
|
pp.char_buf.appendAssumeCapacity('\n');
|
|
|
|
const start = pp.comp.generated_buf.items.len;
|
|
try pp.comp.generated_buf.appendSlice(pp.gpa, pp.char_buf.items);
|
|
var tmp_tokenizer = Tokenizer{
|
|
.buf = pp.comp.generated_buf.items,
|
|
.langopts = pp.comp.langopts,
|
|
.index = @intCast(start),
|
|
.source = .generated,
|
|
.line = pp.generated_line,
|
|
};
|
|
pp.generated_line += 1;
|
|
const hash_tok = tmp_tokenizer.next();
|
|
assert(hash_tok.id == .hash);
|
|
const pragma_tok = tmp_tokenizer.next();
|
|
assert(pragma_tok.id == .keyword_pragma);
|
|
try pp.pragma(&tmp_tokenizer, pragma_tok, operator_loc, arg_tok.expansionSlice());
|
|
}
|
|
|
|
/// Handle Microsoft __pragma operator
|
|
fn msPragmaOperator(pp: *Preprocessor, pragma_tok: TokenWithExpansionLocs, args: []const TokenWithExpansionLocs) !void {
|
|
if (args.len == 0) {
|
|
try pp.err(pragma_tok, .unknown_pragma, .{});
|
|
return;
|
|
}
|
|
|
|
{
|
|
var copy = try pragma_tok.dupe(pp.gpa);
|
|
copy.id = .keyword_pragma;
|
|
try pp.addToken(copy);
|
|
}
|
|
|
|
const pragma_start: u32 = @intCast(pp.tokens.len);
|
|
for (args) |tok| {
|
|
switch (tok.id) {
|
|
.macro_ws, .comment => continue,
|
|
else => try pp.addToken(try tok.dupe(pp.gpa)),
|
|
}
|
|
}
|
|
try pp.addToken(.{ .id = .nl, .loc = .{ .id = .generated } });
|
|
|
|
const name = pp.expandedSlice(pp.tokens.get(pragma_start));
|
|
if (pp.comp.getPragma(name)) |prag| unknown: {
|
|
return prag.preprocessorCB(pp, pragma_start) catch |er| switch (er) {
|
|
error.UnknownPragma => break :unknown,
|
|
else => |e| return e,
|
|
};
|
|
}
|
|
|
|
try pp.err(args[0], .unknown_pragma, .{});
|
|
}
|
|
|
|
/// Inverts the output of the preprocessor stringify (#) operation
|
|
/// (except all whitespace is condensed to a single space)
|
|
/// writes output to pp.char_buf; assumes capacity is sufficient
|
|
/// backslash backslash -> backslash
|
|
/// backslash doublequote -> doublequote
|
|
/// All other characters remain the same
|
|
fn destringify(pp: *Preprocessor, str: []const u8) void {
|
|
var state: enum { start, backslash_seen } = .start;
|
|
for (str) |c| {
|
|
switch (c) {
|
|
'\\' => {
|
|
if (state == .backslash_seen) pp.char_buf.appendAssumeCapacity(c);
|
|
state = if (state == .start) .backslash_seen else .start;
|
|
},
|
|
else => {
|
|
if (state == .backslash_seen and c != '"') pp.char_buf.appendAssumeCapacity('\\');
|
|
pp.char_buf.appendAssumeCapacity(c);
|
|
state = .start;
|
|
},
|
|
}
|
|
}
|
|
}
|
|
|
|
/// Stringify `tokens` into pp.char_buf.
|
|
/// See https://gcc.gnu.org/onlinedocs/gcc-11.2.0/cpp/Stringizing.html#Stringizing
|
|
fn stringify(pp: *Preprocessor, tokens: []const TokenWithExpansionLocs) !void {
|
|
try pp.char_buf.append('"');
|
|
var ws_state: enum { start, need, not_needed } = .start;
|
|
for (tokens) |tok| {
|
|
if (tok.id == .macro_ws) {
|
|
if (ws_state == .start) continue;
|
|
ws_state = .need;
|
|
continue;
|
|
}
|
|
if (ws_state == .need) try pp.char_buf.append(' ');
|
|
ws_state = .not_needed;
|
|
|
|
// backslashes not inside strings are not escaped
|
|
const is_str = switch (tok.id) {
|
|
.string_literal,
|
|
.string_literal_utf_16,
|
|
.string_literal_utf_8,
|
|
.string_literal_utf_32,
|
|
.string_literal_wide,
|
|
.char_literal,
|
|
.char_literal_utf_16,
|
|
.char_literal_utf_32,
|
|
.char_literal_wide,
|
|
=> true,
|
|
else => false,
|
|
};
|
|
|
|
for (pp.expandedSlice(tok)) |c| {
|
|
if (c == '"')
|
|
try pp.char_buf.appendSlice("\\\"")
|
|
else if (c == '\\' and is_str)
|
|
try pp.char_buf.appendSlice("\\\\")
|
|
else
|
|
try pp.char_buf.append(c);
|
|
}
|
|
}
|
|
try pp.char_buf.ensureUnusedCapacity(2);
|
|
if (pp.char_buf.items[pp.char_buf.items.len - 1] != '\\') {
|
|
pp.char_buf.appendSliceAssumeCapacity("\"\n");
|
|
return;
|
|
}
|
|
pp.char_buf.appendAssumeCapacity('"');
|
|
var tokenizer: Tokenizer = .{
|
|
.buf = pp.char_buf.items,
|
|
.index = 0,
|
|
.source = .generated,
|
|
.langopts = pp.comp.langopts,
|
|
.line = 0,
|
|
};
|
|
const item = tokenizer.next();
|
|
if (item.id == .unterminated_string_literal) {
|
|
const tok = tokens[tokens.len - 1];
|
|
try pp.err(tok, .invalid_pp_stringify_escape, .{});
|
|
pp.char_buf.items.len -= 2; // erase unpaired backslash and appended end quote
|
|
pp.char_buf.appendAssumeCapacity('"');
|
|
}
|
|
pp.char_buf.appendAssumeCapacity('\n');
|
|
}
|
|
|
|
fn reconstructIncludeString(pp: *Preprocessor, param_toks: []const TokenWithExpansionLocs, embed_args: ?*[]const TokenWithExpansionLocs, first: TokenWithExpansionLocs) !?[]const u8 {
|
|
if (param_toks.len == 0) {
|
|
try pp.err(first, .expected_filename, .{});
|
|
return null;
|
|
}
|
|
|
|
const char_top = pp.char_buf.items.len;
|
|
defer pp.char_buf.items.len = char_top;
|
|
|
|
// Trim leading/trailing whitespace
|
|
var begin: usize = 0;
|
|
var end: usize = param_toks.len;
|
|
while (begin < end and param_toks[begin].id == .macro_ws) : (begin += 1) {}
|
|
while (end > begin and param_toks[end - 1].id == .macro_ws) : (end -= 1) {}
|
|
const params = param_toks[begin..end];
|
|
|
|
if (params.len == 0) {
|
|
try pp.err(first, .expected_filename, .{});
|
|
return null;
|
|
}
|
|
// no string pasting
|
|
if (embed_args == null and params[0].id == .string_literal and params.len > 1) {
|
|
try pp.err(params[1], .closing_paren, .{});
|
|
return null;
|
|
}
|
|
|
|
for (params, 0..) |tok, i| {
|
|
const str = pp.expandedSliceExtra(tok, .preserve_macro_ws);
|
|
try pp.char_buf.appendSlice(str);
|
|
if (embed_args) |some| {
|
|
if ((i == 0 and tok.id == .string_literal) or tok.id == .angle_bracket_right) {
|
|
some.* = params[i + 1 ..];
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
const include_str = pp.char_buf.items[char_top..];
|
|
if (include_str.len < 3) {
|
|
if (include_str.len == 0) {
|
|
try pp.err(first, .expected_filename, .{});
|
|
return null;
|
|
}
|
|
try pp.err(params[0], .empty_filename, .{});
|
|
return null;
|
|
}
|
|
|
|
switch (include_str[0]) {
|
|
'<' => {
|
|
if (include_str[include_str.len - 1] != '>') {
|
|
// Ugly hack to find out where the '>' should go, since we don't have the closing ')' location
|
|
var closing = params[0];
|
|
closing.loc.byte_offset += @as(u32, @intCast(include_str.len)) + 1;
|
|
try pp.err(closing, .header_str_closing, .{});
|
|
|
|
try pp.err(params[0], .header_str_match, .{});
|
|
return null;
|
|
}
|
|
return include_str;
|
|
},
|
|
'"' => return include_str,
|
|
else => {
|
|
try pp.err(params[0], .expected_filename, .{});
|
|
return null;
|
|
},
|
|
}
|
|
}
|
|
|
|
fn handleBuiltinMacro(pp: *Preprocessor, builtin: RawToken.Id, param_toks: []const TokenWithExpansionLocs, src_loc: Source.Location) Error!bool {
|
|
switch (builtin) {
|
|
.macro_param_has_attribute,
|
|
.macro_param_has_declspec_attribute,
|
|
.macro_param_has_feature,
|
|
.macro_param_has_extension,
|
|
.macro_param_has_builtin,
|
|
=> {
|
|
var invalid: ?TokenWithExpansionLocs = null;
|
|
var identifier: ?TokenWithExpansionLocs = null;
|
|
for (param_toks) |tok| {
|
|
if (tok.id == .macro_ws) continue;
|
|
if (tok.id == .comment) continue;
|
|
if (!tok.id.isMacroIdentifier()) {
|
|
invalid = tok;
|
|
break;
|
|
}
|
|
if (identifier) |_| invalid = tok else identifier = tok;
|
|
}
|
|
if (identifier == null and invalid == null) invalid = .{ .id = .eof, .loc = src_loc };
|
|
if (invalid) |some| {
|
|
try pp.err(some, .feature_check_requires_identifier, .{});
|
|
return false;
|
|
}
|
|
|
|
const ident_str = pp.expandedSlice(identifier.?);
|
|
return switch (builtin) {
|
|
.macro_param_has_attribute => Attribute.fromString(.gnu, null, ident_str) != null,
|
|
.macro_param_has_declspec_attribute => {
|
|
return if (pp.comp.langopts.declspec_attrs)
|
|
Attribute.fromString(.declspec, null, ident_str) != null
|
|
else
|
|
false;
|
|
},
|
|
.macro_param_has_feature => features.hasFeature(pp.comp, ident_str),
|
|
// If -pedantic-errors is given __has_extension is equivalent to __has_feature
|
|
.macro_param_has_extension => if (pp.comp.diagnostics.state.extensions == .@"error")
|
|
features.hasFeature(pp.comp, ident_str)
|
|
else
|
|
features.hasExtension(pp.comp, ident_str),
|
|
.macro_param_has_builtin => pp.comp.hasBuiltin(ident_str),
|
|
else => unreachable,
|
|
};
|
|
},
|
|
.macro_param_has_warning => {
|
|
const actual_param = pp.pasteStringsUnsafe(param_toks) catch |er| switch (er) {
|
|
error.ExpectedStringLiteral => {
|
|
try pp.err(param_toks[0], .expected_str_literal_in, .{"__has_warning"});
|
|
return false;
|
|
},
|
|
else => |e| return e,
|
|
};
|
|
if (!mem.startsWith(u8, actual_param, "-W")) {
|
|
try pp.err(param_toks[0], .malformed_warning_check, .{"__has_warning"});
|
|
return false;
|
|
}
|
|
const warning_name = actual_param[2..];
|
|
return Diagnostics.warningExists(warning_name);
|
|
},
|
|
.macro_param_is_identifier => {
|
|
var invalid: ?TokenWithExpansionLocs = null;
|
|
var identifier: ?TokenWithExpansionLocs = null;
|
|
for (param_toks) |tok| switch (tok.id) {
|
|
.macro_ws => continue,
|
|
.comment => continue,
|
|
else => {
|
|
if (identifier) |_| invalid = tok else identifier = tok;
|
|
},
|
|
};
|
|
if (identifier == null and invalid == null) invalid = .{ .id = .eof, .loc = src_loc };
|
|
if (invalid) |some| {
|
|
try pp.err(some, .builtin_missing_r_paren, .{"builtin feature-check macro"});
|
|
return false;
|
|
}
|
|
|
|
const id = identifier.?.id;
|
|
return id == .identifier or id == .extended_identifier;
|
|
},
|
|
.macro_param_has_include, .macro_param_has_include_next => {
|
|
const include_str = (try pp.reconstructIncludeString(param_toks, null, param_toks[0])) orelse return false;
|
|
const include_type: Compilation.IncludeType = switch (include_str[0]) {
|
|
'"' => .quotes,
|
|
'<' => .angle_brackets,
|
|
else => unreachable,
|
|
};
|
|
const filename = include_str[1 .. include_str.len - 1];
|
|
const res = res: {
|
|
if (builtin == .macro_param_has_include or pp.include_depth == 0) {
|
|
if (builtin == .macro_param_has_include_next) {
|
|
try pp.err(src_loc, .include_next_outside_header, .{});
|
|
}
|
|
break :res try pp.comp.hasInclude(filename, src_loc.id, include_type, .first);
|
|
}
|
|
break :res try pp.comp.hasInclude(filename, src_loc.id, include_type, .next);
|
|
};
|
|
|
|
if (res) if (pp.dep_file) |dep_file| try dep_file.addDependencyDupe(pp.gpa, pp.comp.arena, filename);
|
|
return res;
|
|
},
|
|
else => unreachable,
|
|
}
|
|
}
|
|
|
|
/// Treat whitespace-only paste arguments as empty
|
|
fn getPasteArgs(args: []const TokenWithExpansionLocs) []const TokenWithExpansionLocs {
|
|
for (args) |tok| {
|
|
if (tok.id != .macro_ws) return args;
|
|
}
|
|
return &[1]TokenWithExpansionLocs{.{
|
|
.id = .placemarker,
|
|
.loc = .{ .id = .generated, .byte_offset = 0, .line = 0 },
|
|
}};
|
|
}
|
|
|
|
fn expandFuncMacro(
|
|
pp: *Preprocessor,
|
|
macro_tok: TokenWithExpansionLocs,
|
|
func_macro: *const Macro,
|
|
args: *const MacroArguments,
|
|
expanded_args: *const MacroArguments,
|
|
hideset_arg: Hideset.Index,
|
|
) MacroError!ExpandBuf {
|
|
var hideset = hideset_arg;
|
|
var buf = ExpandBuf.init(pp.gpa);
|
|
try buf.ensureTotalCapacity(func_macro.tokens.len);
|
|
errdefer buf.deinit();
|
|
|
|
var expanded_variable_arguments = ExpandBuf.init(pp.gpa);
|
|
defer expanded_variable_arguments.deinit();
|
|
var variable_arguments = ExpandBuf.init(pp.gpa);
|
|
defer variable_arguments.deinit();
|
|
|
|
if (func_macro.var_args) {
|
|
var i: usize = func_macro.params.len;
|
|
while (i < expanded_args.items.len) : (i += 1) {
|
|
try variable_arguments.appendSlice(args.items[i]);
|
|
try expanded_variable_arguments.appendSlice(expanded_args.items[i]);
|
|
if (i != expanded_args.items.len - 1) {
|
|
const comma = TokenWithExpansionLocs{ .id = .comma, .loc = .{ .id = .generated } };
|
|
try variable_arguments.append(comma);
|
|
try expanded_variable_arguments.append(comma);
|
|
}
|
|
}
|
|
}
|
|
|
|
// token concatenation and expansion phase
|
|
var tok_i: usize = 0;
|
|
while (tok_i < func_macro.tokens.len) : (tok_i += 1) {
|
|
const raw = func_macro.tokens[tok_i];
|
|
switch (raw.id) {
|
|
.hash_hash => while (tok_i + 1 < func_macro.tokens.len) {
|
|
const raw_next = func_macro.tokens[tok_i + 1];
|
|
tok_i += 1;
|
|
|
|
var va_opt_buf = ExpandBuf.init(pp.gpa);
|
|
defer va_opt_buf.deinit();
|
|
|
|
const next = switch (raw_next.id) {
|
|
.macro_ws => continue,
|
|
.hash_hash => continue,
|
|
.comment => if (!pp.comp.langopts.preserve_comments_in_macros)
|
|
continue
|
|
else
|
|
&[1]TokenWithExpansionLocs{tokFromRaw(raw_next)},
|
|
.macro_param, .macro_param_no_expand => getPasteArgs(args.items[raw_next.end]),
|
|
.keyword_va_args => variable_arguments.items,
|
|
.keyword_va_opt => blk: {
|
|
try pp.expandVaOpt(&va_opt_buf, raw_next, variable_arguments.items.len != 0);
|
|
if (va_opt_buf.items.len == 0) break;
|
|
break :blk va_opt_buf.items;
|
|
},
|
|
else => &[1]TokenWithExpansionLocs{tokFromRaw(raw_next)},
|
|
};
|
|
try pp.pasteTokens(&buf, next);
|
|
if (next.len != 0) break;
|
|
},
|
|
.macro_param_no_expand => {
|
|
if (tok_i + 1 < func_macro.tokens.len and func_macro.tokens[tok_i + 1].id == .hash_hash) {
|
|
hideset = pp.hideset.get(tokFromRaw(func_macro.tokens[tok_i + 1]).loc);
|
|
}
|
|
const slice = getPasteArgs(args.items[raw.end]);
|
|
const raw_loc = Source.Location{ .id = raw.source, .byte_offset = raw.start, .line = raw.line };
|
|
try bufCopyTokens(&buf, slice, &.{raw_loc});
|
|
},
|
|
.macro_param => {
|
|
if (tok_i + 1 < func_macro.tokens.len and func_macro.tokens[tok_i + 1].id == .hash_hash) {
|
|
hideset = pp.hideset.get(tokFromRaw(func_macro.tokens[tok_i + 1]).loc);
|
|
}
|
|
const arg = expanded_args.items[raw.end];
|
|
const raw_loc = Source.Location{ .id = raw.source, .byte_offset = raw.start, .line = raw.line };
|
|
try bufCopyTokens(&buf, arg, &.{raw_loc});
|
|
},
|
|
.keyword_va_args => {
|
|
const raw_loc = Source.Location{ .id = raw.source, .byte_offset = raw.start, .line = raw.line };
|
|
try bufCopyTokens(&buf, expanded_variable_arguments.items, &.{raw_loc});
|
|
},
|
|
.keyword_va_opt => {
|
|
try pp.expandVaOpt(&buf, raw, variable_arguments.items.len != 0);
|
|
},
|
|
.stringify_param, .stringify_va_args => {
|
|
const arg = if (raw.id == .stringify_va_args)
|
|
variable_arguments.items
|
|
else
|
|
args.items[raw.end];
|
|
|
|
pp.char_buf.clearRetainingCapacity();
|
|
try pp.stringify(arg);
|
|
|
|
const start = pp.comp.generated_buf.items.len;
|
|
try pp.comp.generated_buf.appendSlice(pp.gpa, pp.char_buf.items);
|
|
|
|
try buf.append(try pp.makeGeneratedToken(start, .string_literal, tokFromRaw(raw)));
|
|
},
|
|
.macro_param_has_attribute,
|
|
.macro_param_has_declspec_attribute,
|
|
.macro_param_has_warning,
|
|
.macro_param_has_feature,
|
|
.macro_param_has_extension,
|
|
.macro_param_has_builtin,
|
|
.macro_param_has_include,
|
|
.macro_param_has_include_next,
|
|
.macro_param_is_identifier,
|
|
=> {
|
|
const arg = expanded_args.items[0];
|
|
const result = if (arg.len == 0) blk: {
|
|
try pp.err(macro_tok, .expected_arguments, .{ 1, 0 });
|
|
break :blk false;
|
|
} else try pp.handleBuiltinMacro(raw.id, arg, macro_tok.loc);
|
|
const start = pp.comp.generated_buf.items.len;
|
|
|
|
try pp.comp.generated_buf.print(pp.gpa, "{}\n", .{@intFromBool(result)});
|
|
try buf.append(try pp.makeGeneratedToken(start, .pp_num, tokFromRaw(raw)));
|
|
},
|
|
.macro_param_has_c_attribute => {
|
|
const arg = expanded_args.items[0];
|
|
const not_found = "0\n";
|
|
const result = if (arg.len == 0) blk: {
|
|
try pp.err(macro_tok, .expected_arguments, .{ 1, 0 });
|
|
break :blk not_found;
|
|
} else res: {
|
|
var invalid: ?TokenWithExpansionLocs = null;
|
|
var vendor_ident: ?TokenWithExpansionLocs = null;
|
|
var colon_colon: ?TokenWithExpansionLocs = null;
|
|
var attr_ident: ?TokenWithExpansionLocs = null;
|
|
for (arg) |tok| {
|
|
if (tok.id == .macro_ws) continue;
|
|
if (tok.id == .comment) continue;
|
|
if (tok.id == .colon_colon) {
|
|
if (colon_colon != null or attr_ident == null) {
|
|
invalid = tok;
|
|
break;
|
|
}
|
|
vendor_ident = attr_ident;
|
|
attr_ident = null;
|
|
colon_colon = tok;
|
|
continue;
|
|
}
|
|
if (!tok.id.isMacroIdentifier()) {
|
|
invalid = tok;
|
|
break;
|
|
}
|
|
if (attr_ident) |_| {
|
|
invalid = tok;
|
|
break;
|
|
} else attr_ident = tok;
|
|
}
|
|
if (vendor_ident != null and attr_ident == null) {
|
|
invalid = vendor_ident;
|
|
} else if (attr_ident == null and invalid == null) {
|
|
invalid = .{ .id = .eof, .loc = macro_tok.loc };
|
|
}
|
|
if (invalid) |some| {
|
|
try pp.err(some, .feature_check_requires_identifier, .{});
|
|
break :res not_found;
|
|
}
|
|
if (vendor_ident) |some| {
|
|
const vendor_str = pp.expandedSlice(some);
|
|
const attr_str = pp.expandedSlice(attr_ident.?);
|
|
const exists = Attribute.fromString(.gnu, vendor_str, attr_str) != null;
|
|
|
|
const start = pp.comp.generated_buf.items.len;
|
|
try pp.comp.generated_buf.appendSlice(pp.gpa, if (exists) "1\n" else "0\n");
|
|
try buf.append(try pp.makeGeneratedToken(start, .pp_num, tokFromRaw(raw)));
|
|
continue;
|
|
}
|
|
if (!pp.comp.langopts.standard.atLeast(.c23)) break :res not_found;
|
|
|
|
const attrs = std.StaticStringMap([]const u8).initComptime(.{
|
|
.{ "deprecated", "201904L\n" },
|
|
.{ "fallthrough", "201904L\n" },
|
|
.{ "maybe_unused", "201904L\n" },
|
|
.{ "nodiscard", "202003L\n" },
|
|
.{ "noreturn", "202202L\n" },
|
|
.{ "_Noreturn", "202202L\n" },
|
|
.{ "unsequenced", "202207L\n" },
|
|
.{ "reproducible", "202207L\n" },
|
|
});
|
|
|
|
const attr_str = Attribute.normalize(pp.expandedSlice(attr_ident.?));
|
|
break :res attrs.get(attr_str) orelse not_found;
|
|
};
|
|
const start = pp.comp.generated_buf.items.len;
|
|
try pp.comp.generated_buf.appendSlice(pp.gpa, result);
|
|
try buf.append(try pp.makeGeneratedToken(start, .pp_num, tokFromRaw(raw)));
|
|
},
|
|
.macro_param_has_embed => {
|
|
const arg = expanded_args.items[0];
|
|
const not_found = "0\n";
|
|
const result = if (arg.len == 0) blk: {
|
|
try pp.err(macro_tok, .expected_arguments, .{ 1, 0 });
|
|
break :blk not_found;
|
|
} else res: {
|
|
var embed_args: []const TokenWithExpansionLocs = &.{};
|
|
const include_str = (try pp.reconstructIncludeString(arg, &embed_args, arg[0])) orelse
|
|
break :res not_found;
|
|
|
|
var prev = tokFromRaw(raw);
|
|
prev.id = .eof;
|
|
var it: struct {
|
|
i: u32 = 0,
|
|
slice: []const TokenWithExpansionLocs,
|
|
prev: TokenWithExpansionLocs,
|
|
fn next(it: *@This()) TokenWithExpansionLocs {
|
|
while (it.i < it.slice.len) switch (it.slice[it.i].id) {
|
|
.macro_ws, .whitespace => it.i += 1,
|
|
else => break,
|
|
} else return it.prev;
|
|
defer it.i += 1;
|
|
it.prev = it.slice[it.i];
|
|
it.prev.id = .eof;
|
|
return it.slice[it.i];
|
|
}
|
|
} = .{ .slice = embed_args, .prev = prev };
|
|
|
|
while (true) {
|
|
const param_first = it.next();
|
|
if (param_first.id == .eof) break;
|
|
if (param_first.id != .identifier) {
|
|
try pp.err(param_first, .malformed_embed_param, .{});
|
|
continue;
|
|
}
|
|
|
|
const char_top = pp.char_buf.items.len;
|
|
defer pp.char_buf.items.len = char_top;
|
|
|
|
const maybe_colon = it.next();
|
|
const param = switch (maybe_colon.id) {
|
|
.colon_colon => blk: {
|
|
// vendor::param
|
|
const param = it.next();
|
|
if (param.id != .identifier) {
|
|
try pp.err(param, .malformed_embed_param, .{});
|
|
continue;
|
|
}
|
|
const l_paren = it.next();
|
|
if (l_paren.id != .l_paren) {
|
|
try pp.err(l_paren, .malformed_embed_param, .{});
|
|
continue;
|
|
}
|
|
break :blk "doesn't exist";
|
|
},
|
|
.l_paren => Attribute.normalize(pp.expandedSlice(param_first)),
|
|
else => {
|
|
try pp.err(maybe_colon, .malformed_embed_param, .{});
|
|
continue;
|
|
},
|
|
};
|
|
|
|
var arg_count: u32 = 0;
|
|
var first_arg: TokenWithExpansionLocs = undefined;
|
|
while (true) {
|
|
const next = it.next();
|
|
if (next.id == .eof) {
|
|
try pp.err(param_first, .malformed_embed_limit, .{});
|
|
break;
|
|
}
|
|
if (next.id == .r_paren) break;
|
|
arg_count += 1;
|
|
if (arg_count == 1) first_arg = next;
|
|
}
|
|
|
|
if (std.mem.eql(u8, param, "limit")) {
|
|
if (arg_count != 1) {
|
|
try pp.err(param_first, .malformed_embed_limit, .{});
|
|
continue;
|
|
}
|
|
if (first_arg.id != .pp_num) {
|
|
try pp.err(param_first, .malformed_embed_limit, .{});
|
|
continue;
|
|
}
|
|
_ = std.fmt.parseInt(u32, pp.expandedSlice(first_arg), 10) catch {
|
|
break :res not_found;
|
|
};
|
|
} else if (!std.mem.eql(u8, param, "prefix") and !std.mem.eql(u8, param, "suffix") and
|
|
!std.mem.eql(u8, param, "if_empty"))
|
|
{
|
|
break :res not_found;
|
|
}
|
|
}
|
|
|
|
const include_type: Compilation.IncludeType = switch (include_str[0]) {
|
|
'"' => .quotes,
|
|
'<' => .angle_brackets,
|
|
else => unreachable,
|
|
};
|
|
const filename = include_str[1 .. include_str.len - 1];
|
|
const contents = (try pp.comp.findEmbed(filename, arg[0].loc.id, include_type, .limited(1), pp.dep_file)) orelse
|
|
break :res not_found;
|
|
|
|
defer pp.comp.gpa.free(contents);
|
|
break :res if (contents.len != 0) "1\n" else "2\n";
|
|
};
|
|
const start = pp.comp.generated_buf.items.len;
|
|
try pp.comp.generated_buf.appendSlice(pp.comp.gpa, result);
|
|
try buf.append(try pp.makeGeneratedToken(start, .pp_num, tokFromRaw(raw)));
|
|
},
|
|
.macro_param_pragma_operator => {
|
|
// Clang and GCC require exactly one token (so, no parentheses or string pasting)
|
|
// even though their error messages indicate otherwise. Ours is slightly more
|
|
// descriptive.
|
|
var invalid: ?TokenWithExpansionLocs = null;
|
|
var string: ?TokenWithExpansionLocs = null;
|
|
for (expanded_args.items[0]) |tok| {
|
|
switch (tok.id) {
|
|
.string_literal => {
|
|
if (string) |_| {
|
|
invalid = tok;
|
|
break;
|
|
}
|
|
string = tok;
|
|
},
|
|
.macro_ws => continue,
|
|
.comment => continue,
|
|
else => {
|
|
invalid = tok;
|
|
break;
|
|
},
|
|
}
|
|
}
|
|
if (string == null and invalid == null) invalid = macro_tok;
|
|
if (invalid) |some|
|
|
try pp.err(some, .pragma_operator_string_literal, .{})
|
|
else
|
|
try pp.pragmaOperator(string.?, macro_tok.loc);
|
|
},
|
|
.macro_param_ms_identifier => blk: {
|
|
// Expect '__identifier' '(' macro-identifier ')'
|
|
var ident: ?TokenWithExpansionLocs = null;
|
|
for (expanded_args.items[0]) |tok| {
|
|
switch (tok.id) {
|
|
.macro_ws => continue,
|
|
.comment => continue,
|
|
else => {},
|
|
}
|
|
if (ident) |_| {
|
|
try pp.err(tok, .builtin_missing_r_paren, .{"identifier"});
|
|
break :blk;
|
|
} else if (tok.id.isMacroIdentifier()) {
|
|
ident = tok;
|
|
} else {
|
|
try pp.err(tok, .cannot_convert_to_identifier, .{tok.id.symbol()});
|
|
break :blk;
|
|
}
|
|
}
|
|
if (ident) |*some| {
|
|
some.id = .identifier;
|
|
try buf.append(some.*);
|
|
} else {
|
|
try pp.err(macro_tok, .expected_identifier, .{});
|
|
}
|
|
},
|
|
.macro_param_ms_pragma => {
|
|
try pp.msPragmaOperator(macro_tok, expanded_args.items[0]);
|
|
},
|
|
.comma => {
|
|
if (tok_i + 2 < func_macro.tokens.len and func_macro.tokens[tok_i + 1].id == .hash_hash) {
|
|
const hash_hash = func_macro.tokens[tok_i + 1];
|
|
var maybe_va_args = func_macro.tokens[tok_i + 2];
|
|
var consumed: usize = 2;
|
|
if (maybe_va_args.id == .macro_ws and tok_i + 3 < func_macro.tokens.len) {
|
|
consumed = 3;
|
|
maybe_va_args = func_macro.tokens[tok_i + 3];
|
|
}
|
|
if (maybe_va_args.id == .keyword_va_args) {
|
|
// GNU extension: `, ##__VA_ARGS__` deletes the comma if __VA_ARGS__ is empty
|
|
tok_i += consumed;
|
|
if (func_macro.params.len == expanded_args.items.len) {
|
|
// Empty __VA_ARGS__, drop the comma
|
|
try pp.err(hash_hash, .comma_deletion_va_args, .{});
|
|
} else if (func_macro.params.len == 0 and expanded_args.items.len == 1 and expanded_args.items[0].len == 0) {
|
|
// Ambiguous whether this is "empty __VA_ARGS__" or "__VA_ARGS__ omitted"
|
|
if (pp.comp.langopts.standard.isGNU()) {
|
|
// GNU standard, drop the comma
|
|
try pp.err(hash_hash, .comma_deletion_va_args, .{});
|
|
} else {
|
|
// C standard, retain the comma
|
|
try buf.append(tokFromRaw(raw));
|
|
}
|
|
} else {
|
|
try buf.append(tokFromRaw(raw));
|
|
if (expanded_variable_arguments.items.len > 0 or variable_arguments.items.len == func_macro.params.len) {
|
|
try pp.err(hash_hash, .comma_deletion_va_args, .{});
|
|
}
|
|
const raw_loc = Source.Location{
|
|
.id = maybe_va_args.source,
|
|
.byte_offset = maybe_va_args.start,
|
|
.line = maybe_va_args.line,
|
|
};
|
|
try bufCopyTokens(&buf, expanded_variable_arguments.items, &.{raw_loc});
|
|
}
|
|
continue;
|
|
}
|
|
}
|
|
// Regular comma, no token pasting with __VA_ARGS__
|
|
try buf.append(tokFromRaw(raw));
|
|
},
|
|
else => try buf.append(tokFromRaw(raw)),
|
|
}
|
|
}
|
|
removePlacemarkers(&buf);
|
|
|
|
const macro_expansion_locs = macro_tok.expansionSlice();
|
|
for (buf.items) |*tok| {
|
|
try tok.addExpansionLocation(pp.gpa, &.{macro_tok.loc});
|
|
try tok.addExpansionLocation(pp.gpa, macro_expansion_locs);
|
|
const tok_hidelist = pp.hideset.get(tok.loc);
|
|
const new_hidelist = try pp.hideset.@"union"(tok_hidelist, hideset);
|
|
try pp.hideset.put(tok.loc, new_hidelist);
|
|
}
|
|
|
|
return buf;
|
|
}
|
|
|
|
fn expandVaOpt(
|
|
pp: *Preprocessor,
|
|
buf: *ExpandBuf,
|
|
raw: RawToken,
|
|
should_expand: bool,
|
|
) !void {
|
|
if (!should_expand) return;
|
|
|
|
const source = pp.comp.getSource(raw.source);
|
|
var tokenizer: Tokenizer = .{
|
|
.buf = source.buf,
|
|
.index = raw.start,
|
|
.source = raw.source,
|
|
.langopts = pp.comp.langopts,
|
|
.line = raw.line,
|
|
};
|
|
while (tokenizer.index < raw.end) {
|
|
const tok = tokenizer.next();
|
|
try buf.append(tokFromRaw(tok));
|
|
}
|
|
}
|
|
|
|
fn bufCopyTokens(buf: *ExpandBuf, tokens: []const TokenWithExpansionLocs, src: []const Source.Location) !void {
|
|
try buf.ensureUnusedCapacity(tokens.len);
|
|
for (tokens) |tok| {
|
|
var copy = try tok.dupe(buf.allocator);
|
|
errdefer TokenWithExpansionLocs.free(copy.expansion_locs, buf.allocator);
|
|
try copy.addExpansionLocation(buf.allocator, src);
|
|
buf.appendAssumeCapacity(copy);
|
|
}
|
|
}
|
|
|
|
fn nextBufToken(
|
|
pp: *Preprocessor,
|
|
tokenizer: *Tokenizer,
|
|
buf: *ExpandBuf,
|
|
start_idx: *usize,
|
|
end_idx: *usize,
|
|
extend_buf: bool,
|
|
) Error!TokenWithExpansionLocs {
|
|
start_idx.* += 1;
|
|
if (start_idx.* == buf.items.len and start_idx.* >= end_idx.*) {
|
|
if (extend_buf) {
|
|
const raw_tok = tokenizer.next();
|
|
if (raw_tok.id.isMacroIdentifier() and
|
|
pp.poisoned_identifiers.get(pp.tokSlice(raw_tok)) != null)
|
|
try pp.err(raw_tok, .poisoned_identifier, .{});
|
|
|
|
if (raw_tok.id == .nl) pp.add_expansion_nl += 1;
|
|
|
|
const new_tok = tokFromRaw(raw_tok);
|
|
end_idx.* += 1;
|
|
try buf.append(new_tok);
|
|
return new_tok;
|
|
} else {
|
|
return TokenWithExpansionLocs{ .id = .eof, .loc = .{ .id = .generated } };
|
|
}
|
|
} else {
|
|
return buf.items[start_idx.*];
|
|
}
|
|
}
|
|
|
|
fn collectMacroFuncArguments(
|
|
pp: *Preprocessor,
|
|
tokenizer: *Tokenizer,
|
|
buf: *ExpandBuf,
|
|
start_idx: *usize,
|
|
end_idx: *usize,
|
|
extend_buf: bool,
|
|
is_builtin: bool,
|
|
r_paren: *TokenWithExpansionLocs,
|
|
) !MacroArguments {
|
|
const name_tok = buf.items[start_idx.*];
|
|
const saved_tokenizer = tokenizer.*;
|
|
const old_end = end_idx.*;
|
|
|
|
while (true) {
|
|
const tok = try nextBufToken(pp, tokenizer, buf, start_idx, end_idx, extend_buf);
|
|
switch (tok.id) {
|
|
.nl, .whitespace, .macro_ws => {},
|
|
.l_paren => break,
|
|
else => {
|
|
if (is_builtin) {
|
|
try pp.err(name_tok, .missing_lparen_after_builtin, .{pp.expandedSlice(name_tok)});
|
|
}
|
|
// Not a macro function call, go over normal identifier, rewind
|
|
tokenizer.* = saved_tokenizer;
|
|
end_idx.* = old_end;
|
|
return error.MissingLParen;
|
|
},
|
|
}
|
|
}
|
|
|
|
// collect the arguments.
|
|
var parens: u32 = 0;
|
|
var args = MacroArguments.init(pp.gpa);
|
|
errdefer deinitMacroArguments(pp.gpa, &args);
|
|
var curArgument = std.array_list.Managed(TokenWithExpansionLocs).init(pp.gpa);
|
|
defer curArgument.deinit();
|
|
while (true) {
|
|
var tok = try nextBufToken(pp, tokenizer, buf, start_idx, end_idx, extend_buf);
|
|
tok.flags.is_macro_arg = true;
|
|
switch (tok.id) {
|
|
.comma => {
|
|
if (parens == 0) {
|
|
const owned = try curArgument.toOwnedSlice();
|
|
errdefer pp.gpa.free(owned);
|
|
try args.append(owned);
|
|
} else {
|
|
const duped = try tok.dupe(pp.gpa);
|
|
errdefer TokenWithExpansionLocs.free(duped.expansion_locs, pp.gpa);
|
|
try curArgument.append(duped);
|
|
}
|
|
},
|
|
.l_paren => {
|
|
const duped = try tok.dupe(pp.gpa);
|
|
errdefer TokenWithExpansionLocs.free(duped.expansion_locs, pp.gpa);
|
|
try curArgument.append(duped);
|
|
parens += 1;
|
|
},
|
|
.r_paren => {
|
|
if (parens == 0) {
|
|
const owned = try curArgument.toOwnedSlice();
|
|
errdefer pp.gpa.free(owned);
|
|
try args.append(owned);
|
|
r_paren.* = tok;
|
|
break;
|
|
} else {
|
|
const duped = try tok.dupe(pp.gpa);
|
|
errdefer TokenWithExpansionLocs.free(duped.expansion_locs, pp.gpa);
|
|
try curArgument.append(duped);
|
|
parens -= 1;
|
|
}
|
|
},
|
|
.eof => {
|
|
{
|
|
const owned = try curArgument.toOwnedSlice();
|
|
errdefer pp.gpa.free(owned);
|
|
try args.append(owned);
|
|
}
|
|
tokenizer.* = saved_tokenizer;
|
|
try pp.err(name_tok, .unterminated_macro_arg_list, .{});
|
|
return error.Unterminated;
|
|
},
|
|
.nl, .whitespace => {
|
|
try curArgument.append(.{ .id = .macro_ws, .loc = tok.loc });
|
|
},
|
|
else => {
|
|
const duped = try tok.dupe(pp.gpa);
|
|
errdefer TokenWithExpansionLocs.free(duped.expansion_locs, pp.gpa);
|
|
try curArgument.append(duped);
|
|
},
|
|
}
|
|
}
|
|
|
|
return args;
|
|
}
|
|
|
|
fn removeExpandedTokens(pp: *Preprocessor, buf: *ExpandBuf, start: usize, len: usize, moving_end_idx: *usize) !void {
|
|
for (buf.items[start .. start + len]) |tok| TokenWithExpansionLocs.free(tok.expansion_locs, pp.gpa);
|
|
try buf.replaceRange(start, len, &.{});
|
|
moving_end_idx.* -|= len;
|
|
}
|
|
|
|
/// The behavior of `defined` depends on whether we are in a preprocessor
|
|
/// expression context (#if or #elif) or not.
|
|
/// In a non-expression context it's just an identifier. Within a preprocessor
|
|
/// expression it is a unary operator or one-argument function.
|
|
const EvalContext = enum {
|
|
expr,
|
|
non_expr,
|
|
};
|
|
|
|
/// Helper for safely iterating over a slice of tokens while skipping whitespace
|
|
const TokenIterator = struct {
|
|
toks: []const TokenWithExpansionLocs,
|
|
i: usize,
|
|
|
|
fn init(toks: []const TokenWithExpansionLocs) TokenIterator {
|
|
return .{ .toks = toks, .i = 0 };
|
|
}
|
|
|
|
fn nextNoWS(self: *TokenIterator) ?TokenWithExpansionLocs {
|
|
while (self.i < self.toks.len) : (self.i += 1) {
|
|
const tok = self.toks[self.i];
|
|
if (tok.id == .whitespace or tok.id == .macro_ws) continue;
|
|
|
|
self.i += 1;
|
|
return tok;
|
|
}
|
|
return null;
|
|
}
|
|
};
|
|
|
|
fn expandMacroExhaustive(
|
|
pp: *Preprocessor,
|
|
tokenizer: *Tokenizer,
|
|
buf: *ExpandBuf,
|
|
start_idx: usize,
|
|
end_idx: usize,
|
|
extend_buf: bool,
|
|
eval_ctx: EvalContext,
|
|
) MacroError!void {
|
|
var moving_end_idx = end_idx;
|
|
var advance_index: usize = 0;
|
|
// rescan loop
|
|
var do_rescan = true;
|
|
while (do_rescan) {
|
|
do_rescan = false;
|
|
// expansion loop
|
|
var idx: usize = start_idx + advance_index;
|
|
while (idx < moving_end_idx) {
|
|
const macro_tok = buf.items[idx];
|
|
if (macro_tok.id == .keyword_defined and eval_ctx == .expr) {
|
|
idx += 1;
|
|
var it = TokenIterator.init(buf.items[idx..moving_end_idx]);
|
|
if (it.nextNoWS()) |tok| {
|
|
switch (tok.id) {
|
|
.l_paren => {
|
|
_ = it.nextNoWS(); // eat (what should be) identifier
|
|
_ = it.nextNoWS(); // eat (what should be) r paren
|
|
},
|
|
.identifier, .extended_identifier => {},
|
|
else => {},
|
|
}
|
|
}
|
|
idx += it.i;
|
|
continue;
|
|
}
|
|
if (!macro_tok.id.isMacroIdentifier() or macro_tok.flags.expansion_disabled) {
|
|
idx += 1;
|
|
continue;
|
|
}
|
|
const expanded = pp.expandedSlice(macro_tok);
|
|
const macro = pp.defines.getPtr(expanded) orelse {
|
|
idx += 1;
|
|
continue;
|
|
};
|
|
const macro_hidelist = pp.hideset.get(macro_tok.loc);
|
|
if (pp.hideset.contains(macro_hidelist, expanded)) {
|
|
idx += 1;
|
|
continue;
|
|
}
|
|
|
|
macro_handler: {
|
|
if (macro.is_func) {
|
|
var r_paren: TokenWithExpansionLocs = undefined;
|
|
var macro_scan_idx = idx;
|
|
// to be saved in case this doesn't turn out to be a call
|
|
const args = pp.collectMacroFuncArguments(
|
|
tokenizer,
|
|
buf,
|
|
¯o_scan_idx,
|
|
&moving_end_idx,
|
|
extend_buf,
|
|
macro.is_builtin,
|
|
&r_paren,
|
|
) catch |er| switch (er) {
|
|
error.MissingLParen => {
|
|
if (!buf.items[idx].flags.is_macro_arg) buf.items[idx].flags.expansion_disabled = true;
|
|
idx += 1;
|
|
break :macro_handler;
|
|
},
|
|
error.Unterminated => {
|
|
if (pp.comp.langopts.emulate == .gcc) idx += 1;
|
|
try pp.removeExpandedTokens(buf, idx, macro_scan_idx - idx, &moving_end_idx);
|
|
break :macro_handler;
|
|
},
|
|
else => |e| return e,
|
|
};
|
|
assert(r_paren.id == .r_paren);
|
|
var free_arg_expansion_locs = false;
|
|
defer {
|
|
for (args.items) |item| {
|
|
if (free_arg_expansion_locs) for (item) |tok| TokenWithExpansionLocs.free(tok.expansion_locs, pp.gpa);
|
|
pp.gpa.free(item);
|
|
}
|
|
args.deinit();
|
|
}
|
|
const r_paren_hidelist = pp.hideset.get(r_paren.loc);
|
|
var hs = try pp.hideset.intersection(macro_hidelist, r_paren_hidelist);
|
|
hs = try pp.hideset.prepend(macro_tok.loc, hs);
|
|
|
|
var args_count: u32 = @intCast(args.items.len);
|
|
// if the macro has zero arguments g() args_count is still 1
|
|
// an empty token list g() and a whitespace-only token list g( )
|
|
// counts as zero arguments for the purposes of argument-count validation
|
|
if (args_count == 1 and macro.params.len == 0) {
|
|
for (args.items[0]) |tok| {
|
|
if (tok.id != .macro_ws) break;
|
|
} else {
|
|
args_count = 0;
|
|
}
|
|
}
|
|
|
|
// Validate argument count.
|
|
if (macro.var_args and args_count < macro.params.len) {
|
|
free_arg_expansion_locs = true;
|
|
try pp.err(buf.items[idx], .expected_at_least_arguments, .{ macro.params.len, args_count });
|
|
idx += 1;
|
|
try pp.removeExpandedTokens(buf, idx, macro_scan_idx - idx + 1, &moving_end_idx);
|
|
continue;
|
|
}
|
|
if (!macro.var_args and args_count != macro.params.len) {
|
|
free_arg_expansion_locs = true;
|
|
try pp.err(buf.items[idx], .expected_arguments, .{ macro.params.len, args_count });
|
|
idx += 1;
|
|
try pp.removeExpandedTokens(buf, idx, macro_scan_idx - idx + 1, &moving_end_idx);
|
|
continue;
|
|
}
|
|
var expanded_args = MacroArguments.init(pp.gpa);
|
|
defer deinitMacroArguments(pp.gpa, &expanded_args);
|
|
try expanded_args.ensureTotalCapacity(args.items.len);
|
|
for (args.items) |arg| {
|
|
var expand_buf = ExpandBuf.init(pp.gpa);
|
|
errdefer expand_buf.deinit();
|
|
try expand_buf.appendSlice(arg);
|
|
|
|
try pp.expandMacroExhaustive(tokenizer, &expand_buf, 0, expand_buf.items.len, false, eval_ctx);
|
|
|
|
expanded_args.appendAssumeCapacity(try expand_buf.toOwnedSlice());
|
|
}
|
|
|
|
var res = try pp.expandFuncMacro(macro_tok, macro, &args, &expanded_args, hs);
|
|
defer res.deinit();
|
|
const tokens_added = res.items.len;
|
|
const tokens_removed = macro_scan_idx - idx + 1;
|
|
for (buf.items[idx .. idx + tokens_removed]) |tok| TokenWithExpansionLocs.free(tok.expansion_locs, pp.gpa);
|
|
try buf.replaceRange(idx, tokens_removed, res.items);
|
|
|
|
moving_end_idx += tokens_added;
|
|
// Overflow here means that we encountered an unterminated argument list
|
|
// while expanding the body of this macro.
|
|
moving_end_idx -|= tokens_removed;
|
|
idx += tokens_added;
|
|
do_rescan = true;
|
|
} else {
|
|
const res = try pp.expandObjMacro(macro);
|
|
defer res.deinit();
|
|
|
|
const hs = try pp.hideset.prepend(macro_tok.loc, macro_hidelist);
|
|
|
|
const macro_expansion_locs = macro_tok.expansionSlice();
|
|
var increment_idx_by = res.items.len;
|
|
for (res.items, 0..) |*tok, i| {
|
|
tok.flags.is_macro_arg = macro_tok.flags.is_macro_arg;
|
|
try tok.addExpansionLocation(pp.gpa, &.{macro_tok.loc});
|
|
try tok.addExpansionLocation(pp.gpa, macro_expansion_locs);
|
|
|
|
const tok_hidelist = pp.hideset.get(tok.loc);
|
|
const new_hidelist = try pp.hideset.@"union"(tok_hidelist, hs);
|
|
try pp.hideset.put(tok.loc, new_hidelist);
|
|
|
|
if (tok.id == .keyword_defined and eval_ctx == .expr) {
|
|
if (macro.is_func) {
|
|
try pp.err(tok, .expansion_to_defined_func, .{});
|
|
} else {
|
|
try pp.err(tok, .expansion_to_defined_obj, .{});
|
|
}
|
|
}
|
|
|
|
if (i < increment_idx_by and (tok.id == .keyword_defined or pp.defines.contains(pp.expandedSlice(tok.*)))) {
|
|
increment_idx_by = i;
|
|
}
|
|
}
|
|
|
|
TokenWithExpansionLocs.free(buf.items[idx].expansion_locs, pp.gpa);
|
|
try buf.replaceRange(idx, 1, res.items);
|
|
idx += increment_idx_by;
|
|
moving_end_idx = moving_end_idx + res.items.len - 1;
|
|
do_rescan = true;
|
|
}
|
|
}
|
|
if (idx - start_idx == advance_index + 1 and !do_rescan) {
|
|
advance_index += 1;
|
|
}
|
|
} // end of replacement phase
|
|
}
|
|
// end of scanning phase
|
|
|
|
// trim excess buffer
|
|
for (buf.items[moving_end_idx..]) |item| {
|
|
TokenWithExpansionLocs.free(item.expansion_locs, pp.gpa);
|
|
}
|
|
buf.items.len = moving_end_idx;
|
|
}
|
|
|
|
fn unescapeUcn(pp: *Preprocessor, tok: TokenWithExpansionLocs) !TokenWithExpansionLocs {
|
|
switch (tok.id) {
|
|
.incomplete_ucn => {
|
|
@branchHint(.cold);
|
|
try pp.err(tok, .incomplete_ucn, .{});
|
|
},
|
|
.extended_identifier => {
|
|
@branchHint(.cold);
|
|
const identifier = pp.expandedSlice(tok);
|
|
if (mem.indexOfScalar(u8, identifier, '\\') != null) {
|
|
@branchHint(.cold);
|
|
const start = pp.comp.generated_buf.items.len;
|
|
try pp.comp.generated_buf.ensureUnusedCapacity(pp.gpa, identifier.len + 1);
|
|
var identifier_parser: text_literal.Parser = .{
|
|
.comp = pp.comp,
|
|
.literal = pp.expandedSlice(tok), // re-expand since previous line may have caused a reallocation, invalidating `identifier`
|
|
.kind = .utf_8,
|
|
.max_codepoint = 0x10ffff,
|
|
.loc = tok.loc,
|
|
.expansion_locs = tok.expansionSlice(),
|
|
.diagnose_incorrect_encoding = false,
|
|
};
|
|
while (try identifier_parser.next()) |decoded| {
|
|
switch (decoded) {
|
|
.value => unreachable, // validated by tokenizer
|
|
.codepoint => |c| {
|
|
var buf: [4]u8 = undefined;
|
|
const written = std.unicode.utf8Encode(c, &buf) catch unreachable;
|
|
pp.comp.generated_buf.appendSliceAssumeCapacity(buf[0..written]);
|
|
},
|
|
.improperly_encoded => |bytes| {
|
|
pp.comp.generated_buf.appendSliceAssumeCapacity(bytes);
|
|
},
|
|
.utf8_text => |view| {
|
|
pp.comp.generated_buf.appendSliceAssumeCapacity(view.bytes);
|
|
},
|
|
}
|
|
}
|
|
pp.comp.generated_buf.appendAssumeCapacity('\n');
|
|
defer TokenWithExpansionLocs.free(tok.expansion_locs, pp.gpa);
|
|
return pp.makeGeneratedToken(start, .extended_identifier, tok);
|
|
}
|
|
},
|
|
else => {},
|
|
}
|
|
return tok;
|
|
}
|
|
|
|
/// Try to expand a macro after a possible candidate has been read from the `tokenizer`
|
|
/// into the `raw` token passed as argument
|
|
fn expandMacro(pp: *Preprocessor, tokenizer: *Tokenizer, raw: RawToken) MacroError!void {
|
|
var source_tok = tokFromRaw(raw);
|
|
if (!raw.id.isMacroIdentifier()) {
|
|
source_tok.id.simplifyMacroKeyword();
|
|
return pp.addToken(source_tok);
|
|
}
|
|
pp.top_expansion_buf.items.len = 0;
|
|
try pp.top_expansion_buf.append(source_tok);
|
|
pp.expansion_source_loc = source_tok.loc;
|
|
|
|
pp.hideset.clearRetainingCapacity();
|
|
try pp.expandMacroExhaustive(tokenizer, &pp.top_expansion_buf, 0, 1, true, .non_expr);
|
|
try pp.ensureUnusedTokenCapacity(pp.top_expansion_buf.items.len);
|
|
for (pp.top_expansion_buf.items) |*tok| {
|
|
if (tok.id == .macro_ws and !pp.preserve_whitespace) {
|
|
TokenWithExpansionLocs.free(tok.expansion_locs, pp.gpa);
|
|
continue;
|
|
}
|
|
if (tok.id == .comment and !pp.comp.langopts.preserve_comments_in_macros) {
|
|
TokenWithExpansionLocs.free(tok.expansion_locs, pp.gpa);
|
|
continue;
|
|
}
|
|
if (tok.id == .placemarker) {
|
|
TokenWithExpansionLocs.free(tok.expansion_locs, pp.gpa);
|
|
continue;
|
|
}
|
|
tok.id.simplifyMacroKeywordExtra(true);
|
|
pp.addTokenAssumeCapacity(try pp.unescapeUcn(tok.*));
|
|
}
|
|
if (pp.preserve_whitespace) {
|
|
try pp.ensureUnusedTokenCapacity(pp.add_expansion_nl);
|
|
while (pp.add_expansion_nl > 0) : (pp.add_expansion_nl -= 1) {
|
|
pp.addTokenAssumeCapacity(.{ .id = .nl, .loc = .{
|
|
.id = tokenizer.source,
|
|
.line = tokenizer.line,
|
|
} });
|
|
}
|
|
}
|
|
}
|
|
|
|
fn expandedSliceExtra(pp: *const Preprocessor, tok: anytype, macro_ws_handling: enum { single_macro_ws, preserve_macro_ws }) []const u8 {
|
|
if (tok.id.lexeme()) |some| {
|
|
if (!tok.id.allowsDigraphs(pp.comp.langopts) and !(tok.id == .macro_ws and macro_ws_handling == .preserve_macro_ws)) return some;
|
|
}
|
|
var tmp_tokenizer: Tokenizer = .{
|
|
.buf = pp.comp.getSource(tok.loc.id).buf,
|
|
.langopts = pp.comp.langopts,
|
|
.index = tok.loc.byte_offset,
|
|
.source = .generated,
|
|
};
|
|
if (tok.id == .macro_string) {
|
|
while (true) : (tmp_tokenizer.index += 1) {
|
|
if (tmp_tokenizer.buf[tmp_tokenizer.index] == '>') break;
|
|
}
|
|
return tmp_tokenizer.buf[tok.loc.byte_offset .. tmp_tokenizer.index + 1];
|
|
}
|
|
const res = tmp_tokenizer.next();
|
|
return tmp_tokenizer.buf[res.start..res.end];
|
|
}
|
|
|
|
/// Get expanded token source string.
|
|
pub fn expandedSlice(pp: *const Preprocessor, tok: anytype) []const u8 {
|
|
return pp.expandedSliceExtra(tok, .single_macro_ws);
|
|
}
|
|
|
|
/// Concat two tokens and add the result to pp.generated
|
|
fn pasteTokens(pp: *Preprocessor, lhs_toks: *ExpandBuf, rhs_toks: []const TokenWithExpansionLocs) Error!void {
|
|
const lhs = while (lhs_toks.pop()) |lhs| {
|
|
if ((pp.comp.langopts.preserve_comments_in_macros and lhs.id == .comment) or
|
|
(lhs.id != .macro_ws and lhs.id != .comment))
|
|
break lhs;
|
|
|
|
TokenWithExpansionLocs.free(lhs.expansion_locs, pp.gpa);
|
|
} else {
|
|
return bufCopyTokens(lhs_toks, rhs_toks, &.{});
|
|
};
|
|
|
|
var rhs_rest: u32 = 1;
|
|
const rhs = for (rhs_toks) |rhs| {
|
|
if ((pp.comp.langopts.preserve_comments_in_macros and rhs.id == .comment) or
|
|
(rhs.id != .macro_ws and rhs.id != .comment))
|
|
break rhs;
|
|
|
|
rhs_rest += 1;
|
|
} else {
|
|
return lhs_toks.appendAssumeCapacity(lhs);
|
|
};
|
|
defer TokenWithExpansionLocs.free(lhs.expansion_locs, pp.gpa);
|
|
|
|
const start = pp.comp.generated_buf.items.len;
|
|
const end = start + pp.expandedSlice(lhs).len + pp.expandedSlice(rhs).len;
|
|
try pp.comp.generated_buf.ensureTotalCapacity(pp.gpa, end + 1); // +1 for a newline
|
|
// We cannot use the same slices here since they might be invalidated by `ensureCapacity`
|
|
pp.comp.generated_buf.appendSliceAssumeCapacity(pp.expandedSlice(lhs));
|
|
pp.comp.generated_buf.appendSliceAssumeCapacity(pp.expandedSlice(rhs));
|
|
pp.comp.generated_buf.appendAssumeCapacity('\n');
|
|
|
|
// Try to tokenize the result.
|
|
var tmp_tokenizer = Tokenizer{
|
|
.buf = pp.comp.generated_buf.items,
|
|
.langopts = pp.comp.langopts,
|
|
.index = @intCast(start),
|
|
.source = .generated,
|
|
};
|
|
const pasted_token = tmp_tokenizer.nextNoWSComments();
|
|
const next = tmp_tokenizer.nextNoWSComments();
|
|
const pasted_id = if (lhs.id == .placemarker and rhs.id == .placemarker)
|
|
.placemarker
|
|
else
|
|
pasted_token.id;
|
|
try lhs_toks.append(try pp.makeGeneratedToken(start, pasted_id, lhs));
|
|
|
|
if (next.id != .nl and next.id != .eof) {
|
|
try pp.err(lhs, .pasting_formed_invalid, .{pp.comp.generated_buf.items[start..end]});
|
|
try lhs_toks.append(tokFromRaw(next));
|
|
}
|
|
|
|
try bufCopyTokens(lhs_toks, rhs_toks[rhs_rest..], &.{});
|
|
}
|
|
|
|
fn makeGeneratedToken(pp: *Preprocessor, start: usize, id: Token.Id, source: TokenWithExpansionLocs) !TokenWithExpansionLocs {
|
|
var pasted_token = TokenWithExpansionLocs{ .id = id, .loc = .{
|
|
.id = .generated,
|
|
.byte_offset = @intCast(start),
|
|
.line = pp.generated_line,
|
|
} };
|
|
pp.generated_line += 1;
|
|
try pasted_token.addExpansionLocation(pp.gpa, &.{source.loc});
|
|
try pasted_token.addExpansionLocation(pp.gpa, source.expansionSlice());
|
|
return pasted_token;
|
|
}
|
|
|
|
/// Defines a new macro and warns if it is a duplicate
|
|
fn defineMacro(pp: *Preprocessor, define_tok: RawToken, name_tok: TokenWithExpansionLocs, macro: Macro) Error!void {
|
|
const name_str = pp.expandedSlice(name_tok);
|
|
const gop = try pp.defines.getOrPut(pp.gpa, name_str);
|
|
if (gop.found_existing and !gop.value_ptr.eql(macro, pp)) {
|
|
const loc = name_tok.loc;
|
|
const prev_total = pp.diagnostics.total;
|
|
if (gop.value_ptr.is_builtin) {
|
|
try pp.err(loc, .builtin_macro_redefined, .{});
|
|
} else {
|
|
try pp.err(loc, .macro_redefined, .{name_str});
|
|
}
|
|
|
|
if (!gop.value_ptr.is_builtin and pp.diagnostics.total != prev_total) {
|
|
try pp.err(gop.value_ptr.loc, .previous_definition, .{});
|
|
}
|
|
}
|
|
if (pp.verbose) {
|
|
const raw: RawToken = .{ .id = name_tok.id, .source = name_tok.loc.id, .start = name_tok.loc.byte_offset, .line = name_tok.loc.line };
|
|
pp.verboseLog(raw, "macro {s} defined", .{name_str});
|
|
}
|
|
if (pp.store_macro_tokens) {
|
|
try pp.addToken(tokFromRaw(define_tok));
|
|
}
|
|
gop.value_ptr.* = macro;
|
|
}
|
|
|
|
/// Handle a #define directive.
|
|
fn define(pp: *Preprocessor, tokenizer: *Tokenizer, define_tok: RawToken) Error!void {
|
|
// Get macro name and validate it.
|
|
const escaped_macro_name = tokenizer.nextNoWS();
|
|
if (escaped_macro_name.id == .keyword_defined) {
|
|
try pp.err(escaped_macro_name, .defined_as_macro_name, .{});
|
|
return skipToNl(tokenizer);
|
|
}
|
|
if (!escaped_macro_name.id.isMacroIdentifier()) {
|
|
try pp.err(escaped_macro_name, .macro_name_must_be_identifier, .{});
|
|
return skipToNl(tokenizer);
|
|
}
|
|
const macro_name = try pp.unescapeUcn(tokFromRaw(escaped_macro_name));
|
|
defer TokenWithExpansionLocs.free(macro_name.expansion_locs, pp.gpa);
|
|
|
|
var macro_name_token_id = macro_name.id;
|
|
macro_name_token_id.simplifyMacroKeyword();
|
|
switch (macro_name_token_id) {
|
|
.identifier, .extended_identifier => {},
|
|
// TODO allow #define <keyword> <keyword> and #define extern|inline|static|const
|
|
else => if (macro_name_token_id.isMacroIdentifier() and
|
|
!mem.eql(u8, pp.comp.getSource(tokenizer.source).path, "<builtin>"))
|
|
{
|
|
try pp.err(macro_name, .keyword_macro, .{});
|
|
},
|
|
}
|
|
|
|
// Check for function macros and empty defines.
|
|
var first = tokenizer.next();
|
|
switch (first.id) {
|
|
.nl, .eof => return pp.defineMacro(define_tok, macro_name, .{
|
|
.params = &.{},
|
|
.tokens = &.{},
|
|
.var_args = false,
|
|
.loc = macro_name.loc,
|
|
.is_func = false,
|
|
}),
|
|
.whitespace => first = tokenizer.next(),
|
|
.l_paren => return pp.defineFn(tokenizer, define_tok, macro_name, first),
|
|
else => try pp.err(first, .whitespace_after_macro_name, .{}),
|
|
}
|
|
if (first.id == .hash_hash) {
|
|
try pp.err(first, .hash_hash_at_start, .{});
|
|
return skipToNl(tokenizer);
|
|
}
|
|
first.id.simplifyMacroKeyword();
|
|
|
|
pp.token_buf.items.len = 0; // Safe to use since we can only be in one directive at a time.
|
|
|
|
var need_ws = false;
|
|
// Collect the token body and validate any ## found.
|
|
var tok = first;
|
|
while (true) {
|
|
tok.id.simplifyMacroKeyword();
|
|
switch (tok.id) {
|
|
.hash_hash => {
|
|
const next = tokenizer.nextNoWSComments();
|
|
switch (next.id) {
|
|
.nl, .eof => {
|
|
try pp.err(tok, .hash_hash_at_end, .{});
|
|
return;
|
|
},
|
|
.hash_hash => {
|
|
try pp.err(next, .hash_hash_at_end, .{});
|
|
return;
|
|
},
|
|
else => {},
|
|
}
|
|
try pp.token_buf.append(tok);
|
|
try pp.token_buf.append(next);
|
|
},
|
|
.nl, .eof => break,
|
|
.comment => if (pp.comp.langopts.preserve_comments_in_macros) {
|
|
if (need_ws) {
|
|
need_ws = false;
|
|
try pp.token_buf.append(.{ .id = .macro_ws, .source = .generated });
|
|
}
|
|
try pp.token_buf.append(tok);
|
|
},
|
|
.whitespace => need_ws = true,
|
|
.unterminated_string_literal, .unterminated_char_literal, .empty_char_literal => |tag| {
|
|
try pp.err(tok, invalidTokenDiagnostic(tag), .{});
|
|
try pp.token_buf.append(tok);
|
|
},
|
|
.unterminated_comment => try pp.err(tok, .unterminated_comment, .{}),
|
|
else => {
|
|
if (tok.id == .incomplete_ucn) {
|
|
@branchHint(.cold);
|
|
try pp.err(tok, .incomplete_ucn, .{});
|
|
}
|
|
if (tok.id != .whitespace and need_ws) {
|
|
need_ws = false;
|
|
try pp.token_buf.append(.{ .id = .macro_ws, .source = .generated });
|
|
}
|
|
try pp.token_buf.append(tok);
|
|
},
|
|
}
|
|
tok = tokenizer.next();
|
|
}
|
|
|
|
const list = try pp.arena.allocator().dupe(RawToken, pp.token_buf.items);
|
|
try pp.defineMacro(define_tok, macro_name, .{
|
|
.loc = macro_name.loc,
|
|
.tokens = list,
|
|
.params = &.{},
|
|
.is_func = false,
|
|
.var_args = false,
|
|
});
|
|
}
|
|
|
|
/// Handle a function like #define directive.
|
|
fn defineFn(pp: *Preprocessor, tokenizer: *Tokenizer, define_tok: RawToken, macro_name: TokenWithExpansionLocs, l_paren: RawToken) Error!void {
|
|
assert(macro_name.id.isMacroIdentifier());
|
|
var params = std.array_list.Managed([]const u8).init(pp.gpa);
|
|
defer params.deinit();
|
|
|
|
// Parse the parameter list.
|
|
var gnu_var_args: []const u8 = "";
|
|
var var_args = false;
|
|
while (true) {
|
|
var tok = tokenizer.nextNoWS();
|
|
if (tok.id == .r_paren) break;
|
|
if (tok.id == .eof) return pp.err(tok, .unterminated_macro_param_list, .{});
|
|
if (tok.id == .ellipsis) {
|
|
var_args = true;
|
|
const r_paren = tokenizer.nextNoWS();
|
|
if (r_paren.id != .r_paren) {
|
|
try pp.err(r_paren, .missing_paren_param_list, .{});
|
|
try pp.err(l_paren, .to_match_paren, .{});
|
|
return skipToNl(tokenizer);
|
|
}
|
|
break;
|
|
}
|
|
if (!tok.id.isMacroIdentifier()) {
|
|
try pp.err(tok, .invalid_token_param_list, .{});
|
|
return skipToNl(tokenizer);
|
|
}
|
|
|
|
try params.append(pp.tokSlice(tok));
|
|
|
|
tok = tokenizer.nextNoWS();
|
|
if (tok.id == .ellipsis) {
|
|
try pp.err(tok, .gnu_va_macro, .{});
|
|
gnu_var_args = params.pop().?;
|
|
const r_paren = tokenizer.nextNoWS();
|
|
if (r_paren.id != .r_paren) {
|
|
try pp.err(r_paren, .missing_paren_param_list, .{});
|
|
try pp.err(l_paren, .to_match_paren, .{});
|
|
return skipToNl(tokenizer);
|
|
}
|
|
break;
|
|
} else if (tok.id == .r_paren) {
|
|
break;
|
|
} else if (tok.id != .comma) {
|
|
try pp.err(tok, .expected_comma_param_list, .{});
|
|
return skipToNl(tokenizer);
|
|
}
|
|
}
|
|
|
|
var need_ws = false;
|
|
// Collect the body tokens and validate # and ##'s found.
|
|
pp.token_buf.items.len = 0; // Safe to use since we can only be in one directive at a time.
|
|
tok_loop: while (true) {
|
|
var tok = tokenizer.next();
|
|
switch (tok.id) {
|
|
.nl, .eof => break,
|
|
.whitespace => need_ws = pp.token_buf.items.len != 0,
|
|
.comment => if (!pp.comp.langopts.preserve_comments_in_macros) continue else {
|
|
if (need_ws) {
|
|
need_ws = false;
|
|
try pp.token_buf.append(.{ .id = .macro_ws, .source = .generated });
|
|
}
|
|
try pp.token_buf.append(tok);
|
|
},
|
|
.hash => {
|
|
if (tok.id != .whitespace and need_ws) {
|
|
need_ws = false;
|
|
try pp.token_buf.append(.{ .id = .macro_ws, .source = .generated });
|
|
}
|
|
const param = tokenizer.nextNoWS();
|
|
blk: {
|
|
if (var_args and param.id == .keyword_va_args) {
|
|
tok.id = .stringify_va_args;
|
|
try pp.token_buf.append(tok);
|
|
continue :tok_loop;
|
|
}
|
|
if (!param.id.isMacroIdentifier()) break :blk;
|
|
const s = pp.tokSlice(param);
|
|
if (mem.eql(u8, s, gnu_var_args)) {
|
|
tok.id = .stringify_va_args;
|
|
try pp.token_buf.append(tok);
|
|
continue :tok_loop;
|
|
}
|
|
for (params.items, 0..) |p, i| {
|
|
if (mem.eql(u8, p, s)) {
|
|
tok.id = .stringify_param;
|
|
tok.end = @intCast(i);
|
|
try pp.token_buf.append(tok);
|
|
continue :tok_loop;
|
|
}
|
|
}
|
|
}
|
|
try pp.err(param, .hash_not_followed_param, .{});
|
|
return skipToNl(tokenizer);
|
|
},
|
|
.hash_hash => {
|
|
need_ws = false;
|
|
// if ## appears at the beginning, the token buf is still empty
|
|
// in this case, error out
|
|
if (pp.token_buf.items.len == 0) {
|
|
try pp.err(tok, .hash_hash_at_start, .{});
|
|
return skipToNl(tokenizer);
|
|
}
|
|
const saved_tokenizer = tokenizer.*;
|
|
const next = tokenizer.nextNoWSComments();
|
|
if (next.id == .nl or next.id == .eof) {
|
|
try pp.err(tok, .hash_hash_at_end, .{});
|
|
return;
|
|
}
|
|
tokenizer.* = saved_tokenizer;
|
|
// convert the previous token to .macro_param_no_expand if it was .macro_param
|
|
if (pp.token_buf.items[pp.token_buf.items.len - 1].id == .macro_param) {
|
|
pp.token_buf.items[pp.token_buf.items.len - 1].id = .macro_param_no_expand;
|
|
}
|
|
try pp.token_buf.append(tok);
|
|
},
|
|
.unterminated_string_literal, .unterminated_char_literal, .empty_char_literal => |tag| {
|
|
try pp.err(tok, invalidTokenDiagnostic(tag), .{});
|
|
try pp.token_buf.append(tok);
|
|
},
|
|
.unterminated_comment => try pp.err(tok, .unterminated_comment, .{}),
|
|
else => {
|
|
if (tok.id != .whitespace and need_ws) {
|
|
need_ws = false;
|
|
try pp.token_buf.append(.{ .id = .macro_ws, .source = .generated });
|
|
}
|
|
if (var_args and tok.id == .keyword_va_args) {
|
|
// do nothing
|
|
} else if (var_args and tok.id == .keyword_va_opt) {
|
|
const opt_l_paren = tokenizer.next();
|
|
if (opt_l_paren.id != .l_paren) {
|
|
try pp.err(opt_l_paren, .va_opt_lparen, .{});
|
|
return skipToNl(tokenizer);
|
|
}
|
|
tok.start = opt_l_paren.end;
|
|
|
|
var parens: u32 = 0;
|
|
while (true) {
|
|
const opt_tok = tokenizer.next();
|
|
switch (opt_tok.id) {
|
|
.l_paren => parens += 1,
|
|
.r_paren => if (parens == 0) {
|
|
break;
|
|
} else {
|
|
parens -= 1;
|
|
},
|
|
.nl, .eof => {
|
|
try pp.err(opt_tok, .va_opt_rparen, .{});
|
|
try pp.err(opt_l_paren, .to_match_paren, .{});
|
|
return skipToNl(tokenizer);
|
|
},
|
|
.whitespace => {},
|
|
else => tok.end = opt_tok.end,
|
|
}
|
|
}
|
|
} else if (tok.id.isMacroIdentifier()) {
|
|
tok.id.simplifyMacroKeyword();
|
|
const s = pp.tokSlice(tok);
|
|
if (mem.eql(u8, gnu_var_args, s)) {
|
|
tok.id = .keyword_va_args;
|
|
} else for (params.items, 0..) |param, i| {
|
|
if (mem.eql(u8, param, s)) {
|
|
// NOTE: it doesn't matter to assign .macro_param_no_expand
|
|
// here in case a ## was the previous token, because
|
|
// ## processing will eat this token with the same semantics
|
|
tok.id = .macro_param;
|
|
tok.end = @intCast(i);
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
try pp.token_buf.append(tok);
|
|
},
|
|
}
|
|
}
|
|
|
|
const param_list = try pp.arena.allocator().dupe([]const u8, params.items);
|
|
const token_list = try pp.arena.allocator().dupe(RawToken, pp.token_buf.items);
|
|
try pp.defineMacro(define_tok, macro_name, .{
|
|
.is_func = true,
|
|
.params = param_list,
|
|
.var_args = var_args or gnu_var_args.len != 0,
|
|
.tokens = token_list,
|
|
.loc = macro_name.loc,
|
|
});
|
|
}
|
|
|
|
/// Handle an #embed directive
|
|
/// embedDirective : ("FILENAME" | <FILENAME>) embedParam*
|
|
/// embedParam : IDENTIFIER (:: IDENTIFIER)? '(' <tokens> ')'
|
|
fn embed(pp: *Preprocessor, tokenizer: *Tokenizer) MacroError!void {
|
|
const first = tokenizer.nextNoWS();
|
|
const filename_tok = pp.findIncludeFilenameToken(first, tokenizer, .ignore_trailing_tokens) catch |er| switch (er) {
|
|
error.InvalidInclude => return,
|
|
else => |e| return e,
|
|
};
|
|
defer TokenWithExpansionLocs.free(filename_tok.expansion_locs, pp.gpa);
|
|
|
|
// Check for empty filename.
|
|
const tok_slice = pp.expandedSliceExtra(filename_tok, .single_macro_ws);
|
|
if (tok_slice.len < 3) {
|
|
try pp.err(first, .empty_filename, .{});
|
|
return;
|
|
}
|
|
const filename = tok_slice[1 .. tok_slice.len - 1];
|
|
const include_type: Compilation.IncludeType = switch (filename_tok.id) {
|
|
.string_literal => .quotes,
|
|
.macro_string => .angle_brackets,
|
|
else => unreachable,
|
|
};
|
|
|
|
// Index into `token_buf`
|
|
const Range = struct {
|
|
start: u32,
|
|
end: u32,
|
|
|
|
fn expand(opt_range: ?@This(), pp_: *Preprocessor, tokenizer_: *Tokenizer) !void {
|
|
const range = opt_range orelse return;
|
|
const slice = pp_.token_buf.items[range.start..range.end];
|
|
for (slice) |tok| {
|
|
try pp_.expandMacro(tokenizer_, tok);
|
|
}
|
|
}
|
|
};
|
|
pp.token_buf.items.len = 0;
|
|
|
|
var limit: ?std.Io.Limit = null;
|
|
var prefix: ?Range = null;
|
|
var suffix: ?Range = null;
|
|
var if_empty: ?Range = null;
|
|
while (true) {
|
|
const param_first = tokenizer.nextNoWS();
|
|
switch (param_first.id) {
|
|
.nl, .eof => break,
|
|
.identifier => {},
|
|
else => {
|
|
try pp.err(param_first, .malformed_embed_param, .{});
|
|
continue;
|
|
},
|
|
}
|
|
|
|
const char_top = pp.char_buf.items.len;
|
|
defer pp.char_buf.items.len = char_top;
|
|
|
|
const maybe_colon = tokenizer.colonColon();
|
|
const param = switch (maybe_colon.id) {
|
|
.colon_colon => blk: {
|
|
// vendor::param
|
|
const param = tokenizer.nextNoWS();
|
|
if (param.id != .identifier) {
|
|
try pp.err(param, .malformed_embed_param, .{});
|
|
continue;
|
|
}
|
|
const l_paren = tokenizer.nextNoWS();
|
|
if (l_paren.id != .l_paren) {
|
|
try pp.err(l_paren, .malformed_embed_param, .{});
|
|
continue;
|
|
}
|
|
try pp.char_buf.appendSlice(Attribute.normalize(pp.tokSlice(param_first)));
|
|
try pp.char_buf.appendSlice("::");
|
|
try pp.char_buf.appendSlice(Attribute.normalize(pp.tokSlice(param)));
|
|
break :blk pp.char_buf.items;
|
|
},
|
|
.l_paren => Attribute.normalize(pp.tokSlice(param_first)),
|
|
else => {
|
|
try pp.err(maybe_colon, .malformed_embed_param, .{});
|
|
continue;
|
|
},
|
|
};
|
|
|
|
const start: u32 = @intCast(pp.token_buf.items.len);
|
|
while (true) {
|
|
const next = tokenizer.nextNoWS();
|
|
if (next.id == .r_paren) break;
|
|
if (next.id == .eof) {
|
|
try pp.err(maybe_colon, .malformed_embed_param, .{});
|
|
break;
|
|
}
|
|
try pp.token_buf.append(next);
|
|
}
|
|
const end: u32 = @intCast(pp.token_buf.items.len);
|
|
|
|
if (std.mem.eql(u8, param, "limit")) {
|
|
if (limit != null) {
|
|
try pp.err(tokFromRaw(param_first), .duplicate_embed_param, .{"limit"});
|
|
continue;
|
|
}
|
|
if (start + 1 != end) {
|
|
try pp.err(param_first, .malformed_embed_limit, .{});
|
|
continue;
|
|
}
|
|
const limit_tok = pp.token_buf.items[start];
|
|
if (limit_tok.id != .pp_num) {
|
|
try pp.err(param_first, .malformed_embed_limit, .{});
|
|
continue;
|
|
}
|
|
limit = .limited(std.fmt.parseInt(u32, pp.tokSlice(limit_tok), 10) catch {
|
|
try pp.err(limit_tok, .malformed_embed_limit, .{});
|
|
continue;
|
|
});
|
|
pp.token_buf.items.len = start;
|
|
} else if (std.mem.eql(u8, param, "prefix")) {
|
|
if (prefix != null) {
|
|
try pp.err(tokFromRaw(param_first), .duplicate_embed_param, .{"prefix"});
|
|
continue;
|
|
}
|
|
prefix = .{ .start = start, .end = end };
|
|
} else if (std.mem.eql(u8, param, "suffix")) {
|
|
if (suffix != null) {
|
|
try pp.err(tokFromRaw(param_first), .duplicate_embed_param, .{"suffix"});
|
|
continue;
|
|
}
|
|
suffix = .{ .start = start, .end = end };
|
|
} else if (std.mem.eql(u8, param, "if_empty")) {
|
|
if (if_empty != null) {
|
|
try pp.err(tokFromRaw(param_first), .duplicate_embed_param, .{"if_empty"});
|
|
continue;
|
|
}
|
|
if_empty = .{ .start = start, .end = end };
|
|
} else {
|
|
try pp.err(tokFromRaw(param_first), .unsupported_embed_param, .{param});
|
|
pp.token_buf.items.len = start;
|
|
}
|
|
}
|
|
|
|
const embed_bytes = (try pp.comp.findEmbed(filename, first.source, include_type, limit orelse .unlimited, pp.dep_file)) orelse
|
|
return pp.fatalNotFound(filename_tok, filename);
|
|
defer pp.comp.gpa.free(embed_bytes);
|
|
|
|
try Range.expand(prefix, pp, tokenizer);
|
|
|
|
if (embed_bytes.len == 0) {
|
|
try Range.expand(if_empty, pp, tokenizer);
|
|
try Range.expand(suffix, pp, tokenizer);
|
|
return;
|
|
}
|
|
|
|
try pp.ensureUnusedTokenCapacity(2 * embed_bytes.len - 1); // N bytes and N-1 commas
|
|
|
|
// TODO: We currently only support systems with CHAR_BIT == 8
|
|
// If the target's CHAR_BIT is not 8, we need to write out correctly-sized embed_bytes
|
|
// and correctly account for the target's endianness
|
|
{
|
|
const byte = embed_bytes[0];
|
|
const start = pp.comp.generated_buf.items.len;
|
|
try pp.comp.generated_buf.print(pp.gpa, "{d}", .{byte});
|
|
pp.addTokenAssumeCapacity(try pp.makeGeneratedToken(start, .embed_byte, filename_tok));
|
|
}
|
|
|
|
for (embed_bytes[1..]) |byte| {
|
|
const start = pp.comp.generated_buf.items.len;
|
|
try pp.comp.generated_buf.print(pp.gpa, ",{d}", .{byte});
|
|
pp.addTokenAssumeCapacity(.{ .id = .comma, .loc = .{ .id = .generated, .byte_offset = @intCast(start) } });
|
|
pp.addTokenAssumeCapacity(try pp.makeGeneratedToken(start + 1, .embed_byte, filename_tok));
|
|
}
|
|
try pp.comp.generated_buf.append(pp.gpa, '\n');
|
|
|
|
try Range.expand(suffix, pp, tokenizer);
|
|
}
|
|
|
|
// Handle a #include directive.
|
|
fn include(pp: *Preprocessor, tokenizer: *Tokenizer, which: Compilation.WhichInclude) MacroError!void {
|
|
const first = tokenizer.nextNoWS();
|
|
const new_source = findIncludeSource(pp, tokenizer, first, which) catch |er| switch (er) {
|
|
error.InvalidInclude => return,
|
|
else => |e| return e,
|
|
};
|
|
|
|
// Prevent stack overflow
|
|
pp.include_depth += 1;
|
|
defer pp.include_depth -= 1;
|
|
if (pp.include_depth > max_include_depth) {
|
|
const loc: Source.Location = .{ .id = first.source, .byte_offset = first.start, .line = first.line };
|
|
try pp.err(loc, .too_many_includes, .{});
|
|
return error.StopPreprocessing;
|
|
}
|
|
|
|
if (pp.include_guards.get(new_source.id)) |guard| {
|
|
if (pp.defines.contains(guard)) return;
|
|
}
|
|
|
|
if (pp.dep_file) |dep| try dep.addDependency(pp.gpa, new_source.path);
|
|
if (pp.verbose) {
|
|
pp.verboseLog(first, "include file {s}", .{new_source.path});
|
|
}
|
|
|
|
const token_state = pp.getTokenState();
|
|
try pp.addIncludeStart(new_source);
|
|
const eof = pp.preprocessExtra(new_source) catch |er| switch (er) {
|
|
error.StopPreprocessing => {
|
|
for (pp.expansion_entries.items(.locs)[token_state.expansion_entries_len..]) |loc| TokenWithExpansionLocs.free(loc, pp.gpa);
|
|
pp.restoreTokenState(token_state);
|
|
return;
|
|
},
|
|
else => |e| return e,
|
|
};
|
|
try eof.checkMsEof(new_source, pp.comp);
|
|
if (pp.preserve_whitespace and pp.tokens.items(.id)[pp.tokens.len - 1] != .nl) {
|
|
try pp.addToken(.{ .id = .nl, .loc = .{
|
|
.id = tokenizer.source,
|
|
.line = tokenizer.line,
|
|
} });
|
|
}
|
|
if (pp.linemarkers == .none) return;
|
|
var next = first;
|
|
while (true) {
|
|
var tmp = tokenizer.*;
|
|
next = tmp.nextNoWS();
|
|
if (next.id != .nl) break;
|
|
tokenizer.* = tmp;
|
|
}
|
|
try pp.addIncludeResume(next.source, next.end, next.line);
|
|
}
|
|
|
|
/// tokens that are part of a pragma directive can happen in 3 ways:
|
|
/// 1. directly in the text via `#pragma ...`
|
|
/// 2. Via a string literal argument to `_Pragma`
|
|
/// 3. Via a stringified macro argument which is used as an argument to `_Pragma`
|
|
/// operator_loc: Location of `_Pragma`; null if this is from #pragma
|
|
/// arg_locs: expansion locations of the argument to _Pragma. empty if #pragma or a raw string literal was used
|
|
fn makePragmaToken(pp: *Preprocessor, raw: RawToken, operator_loc: ?Source.Location, arg_locs: []const Source.Location) !TokenWithExpansionLocs {
|
|
var tok = tokFromRaw(raw);
|
|
if (operator_loc) |loc| {
|
|
try tok.addExpansionLocation(pp.gpa, &.{loc});
|
|
}
|
|
try tok.addExpansionLocation(pp.gpa, arg_locs);
|
|
return tok;
|
|
}
|
|
|
|
pub fn addToken(pp: *Preprocessor, tok_arg: TokenWithExpansionLocs) !void {
|
|
const tok = try pp.unescapeUcn(tok_arg);
|
|
if (tok.expansion_locs) |expansion_locs| {
|
|
try pp.expansion_entries.append(pp.gpa, .{ .idx = @intCast(pp.tokens.len), .locs = expansion_locs });
|
|
}
|
|
try pp.tokens.append(pp.gpa, .{ .id = tok.id, .loc = tok.loc });
|
|
}
|
|
|
|
pub fn addTokenAssumeCapacity(pp: *Preprocessor, tok: TokenWithExpansionLocs) void {
|
|
if (tok.expansion_locs) |expansion_locs| {
|
|
pp.expansion_entries.appendAssumeCapacity(.{ .idx = @intCast(pp.tokens.len), .locs = expansion_locs });
|
|
}
|
|
pp.tokens.appendAssumeCapacity(.{ .id = tok.id, .loc = tok.loc });
|
|
}
|
|
|
|
pub fn ensureTotalTokenCapacity(pp: *Preprocessor, capacity: usize) !void {
|
|
try pp.tokens.ensureTotalCapacity(pp.gpa, capacity);
|
|
try pp.expansion_entries.ensureTotalCapacity(pp.gpa, capacity);
|
|
}
|
|
|
|
pub fn ensureUnusedTokenCapacity(pp: *Preprocessor, capacity: usize) !void {
|
|
try pp.tokens.ensureUnusedCapacity(pp.gpa, capacity);
|
|
try pp.expansion_entries.ensureUnusedCapacity(pp.gpa, capacity);
|
|
}
|
|
|
|
/// Handle a pragma directive
|
|
fn pragma(pp: *Preprocessor, tokenizer: *Tokenizer, pragma_tok: RawToken, operator_loc: ?Source.Location, arg_locs: []const Source.Location) !void {
|
|
const name_tok = tokenizer.nextNoWS();
|
|
if (name_tok.id == .nl or name_tok.id == .eof) return;
|
|
|
|
try pp.addToken(try pp.makePragmaToken(pragma_tok, operator_loc, arg_locs));
|
|
const pragma_start: u32 = @intCast(pp.tokens.len);
|
|
|
|
const name = pp.tokSlice(name_tok);
|
|
const pragma_name_tok = try pp.makePragmaToken(name_tok, operator_loc, arg_locs);
|
|
try pp.addToken(pragma_name_tok);
|
|
while (true) {
|
|
const next_tok = tokenizer.next();
|
|
if (next_tok.id == .whitespace) continue;
|
|
if (next_tok.id == .eof) {
|
|
try pp.addToken(.{
|
|
.id = .nl,
|
|
.loc = .{ .id = .generated },
|
|
});
|
|
break;
|
|
}
|
|
try pp.addToken(try pp.makePragmaToken(next_tok, operator_loc, arg_locs));
|
|
if (next_tok.id == .nl) break;
|
|
}
|
|
if (pp.comp.getPragma(name)) |prag| unknown: {
|
|
return prag.preprocessorCB(pp, pragma_start) catch |er| switch (er) {
|
|
error.UnknownPragma => break :unknown,
|
|
else => |e| return e,
|
|
};
|
|
}
|
|
|
|
try pp.err(pragma_name_tok, .unknown_pragma, .{});
|
|
}
|
|
|
|
fn findIncludeFilenameToken(
|
|
pp: *Preprocessor,
|
|
first_token: RawToken,
|
|
tokenizer: *Tokenizer,
|
|
trailing_token_behavior: enum { ignore_trailing_tokens, expect_nl_eof },
|
|
) !TokenWithExpansionLocs {
|
|
var first = first_token;
|
|
|
|
if (first.id == .angle_bracket_left) to_end: {
|
|
// The tokenizer does not handle <foo> include strings so do it here.
|
|
while (tokenizer.index < tokenizer.buf.len) : (tokenizer.index += 1) {
|
|
switch (tokenizer.buf[tokenizer.index]) {
|
|
'>' => {
|
|
tokenizer.index += 1;
|
|
first.end = tokenizer.index;
|
|
first.id = .macro_string;
|
|
break :to_end;
|
|
},
|
|
'\n' => break,
|
|
else => {},
|
|
}
|
|
}
|
|
const loc: Source.Location = .{ .id = first.source, .byte_offset = tokenizer.index, .line = first.line };
|
|
try pp.err(loc, .header_str_closing, .{});
|
|
try pp.err(first, .header_str_match, .{});
|
|
}
|
|
|
|
const source_tok = tokFromRaw(first);
|
|
const filename_tok, const expanded_trailing = switch (source_tok.id) {
|
|
.string_literal, .macro_string => .{ source_tok, false },
|
|
else => expanded: {
|
|
// Try to expand if the argument is a macro.
|
|
pp.top_expansion_buf.items.len = 0;
|
|
defer for (pp.top_expansion_buf.items) |tok| TokenWithExpansionLocs.free(tok.expansion_locs, pp.gpa);
|
|
try pp.top_expansion_buf.append(source_tok);
|
|
pp.expansion_source_loc = source_tok.loc;
|
|
|
|
try pp.expandMacroExhaustive(tokenizer, &pp.top_expansion_buf, 0, 1, true, .non_expr);
|
|
var trailing_toks: []const TokenWithExpansionLocs = &.{};
|
|
const include_str = (try pp.reconstructIncludeString(pp.top_expansion_buf.items, &trailing_toks, tokFromRaw(first))) orelse {
|
|
try pp.expectNl(tokenizer);
|
|
return error.InvalidInclude;
|
|
};
|
|
const start = pp.comp.generated_buf.items.len;
|
|
try pp.comp.generated_buf.appendSlice(pp.gpa, include_str);
|
|
|
|
break :expanded .{ try pp.makeGeneratedToken(start, switch (include_str[0]) {
|
|
'"' => .string_literal,
|
|
'<' => .macro_string,
|
|
else => unreachable,
|
|
}, pp.top_expansion_buf.items[0]), trailing_toks.len != 0 };
|
|
},
|
|
};
|
|
|
|
switch (trailing_token_behavior) {
|
|
.expect_nl_eof => {
|
|
// Error on extra tokens.
|
|
const nl = tokenizer.nextNoWS();
|
|
if ((nl.id != .nl and nl.id != .eof) or expanded_trailing) {
|
|
skipToNl(tokenizer);
|
|
try pp.err(filename_tok, .extra_tokens_directive_end, .{});
|
|
}
|
|
},
|
|
.ignore_trailing_tokens => if (expanded_trailing) {
|
|
try pp.err(filename_tok, .extra_tokens_directive_end, .{});
|
|
},
|
|
}
|
|
return filename_tok;
|
|
}
|
|
|
|
fn findIncludeSource(pp: *Preprocessor, tokenizer: *Tokenizer, first: RawToken, which: Compilation.WhichInclude) !Source {
|
|
const filename_tok = try pp.findIncludeFilenameToken(first, tokenizer, .expect_nl_eof);
|
|
defer TokenWithExpansionLocs.free(filename_tok.expansion_locs, pp.gpa);
|
|
|
|
// Check for empty filename.
|
|
const tok_slice = pp.expandedSliceExtra(filename_tok, .single_macro_ws);
|
|
if (tok_slice.len < 3) {
|
|
try pp.err(first, .empty_filename, .{});
|
|
return error.InvalidInclude;
|
|
}
|
|
|
|
// Find the file.
|
|
const filename = tok_slice[1 .. tok_slice.len - 1];
|
|
const include_type: Compilation.IncludeType = switch (filename_tok.id) {
|
|
.string_literal => .quotes,
|
|
.macro_string => .angle_brackets,
|
|
else => unreachable,
|
|
};
|
|
|
|
return (try pp.comp.findInclude(filename, first, include_type, which)) orelse
|
|
return pp.fatalNotFound(filename_tok, filename);
|
|
}
|
|
|
|
fn printLinemarker(
|
|
pp: *Preprocessor,
|
|
w: *std.Io.Writer,
|
|
line_no: u32,
|
|
source: Source,
|
|
start_resume: enum(u8) { start, @"resume", none },
|
|
) !void {
|
|
try w.writeByte('#');
|
|
if (pp.linemarkers == .line_directives) try w.writeAll("line");
|
|
try w.print(" {d} \"{f}\"", .{ line_no, fmtEscapes(source.path) });
|
|
if (pp.linemarkers == .numeric_directives) {
|
|
switch (start_resume) {
|
|
.none => {},
|
|
.start => try w.writeAll(" 1"),
|
|
.@"resume" => try w.writeAll(" 2"),
|
|
}
|
|
switch (source.kind) {
|
|
.user => {},
|
|
.system => try w.writeAll(" 3"),
|
|
.extern_c_system => try w.writeAll(" 3 4"),
|
|
}
|
|
}
|
|
try w.writeByte('\n');
|
|
}
|
|
|
|
// After how many empty lines are needed to replace them with linemarkers.
|
|
const collapse_newlines = 8;
|
|
|
|
pub const DumpMode = enum {
|
|
/// Standard preprocessor output; no macros
|
|
result_only,
|
|
/// Output only #define directives for all the macros defined during the execution of the preprocessor
|
|
/// Only macros which are still defined at the end of preprocessing are printed.
|
|
/// Only the most recent definition is printed
|
|
/// Defines are printed in arbitrary order
|
|
macros_only,
|
|
/// Standard preprocessor output; but additionally output #define's and #undef's for macros as they are encountered
|
|
macros_and_result,
|
|
/// Same as macros_and_result, except only the macro name is printed for #define's
|
|
macro_names_and_result,
|
|
};
|
|
|
|
/// Pretty-print the macro define or undef at location `loc`.
|
|
/// We re-tokenize the directive because we are printing a macro that may have the same name as one in
|
|
/// `pp.defines` but a different definition (due to being #undef'ed and then redefined)
|
|
fn prettyPrintMacro(pp: *Preprocessor, w: *std.Io.Writer, loc: Source.Location, parts: enum { name_only, name_and_body }) !void {
|
|
const source = pp.comp.getSource(loc.id);
|
|
var tokenizer: Tokenizer = .{
|
|
.buf = source.buf,
|
|
.langopts = pp.comp.langopts,
|
|
.source = source.id,
|
|
.index = loc.byte_offset,
|
|
};
|
|
var prev_ws = false; // avoid printing multiple whitespace if /* */ comments are within the macro def
|
|
var saw_name = false; // do not print comments before the name token is seen.
|
|
while (true) {
|
|
const tok = tokenizer.next();
|
|
switch (tok.id) {
|
|
.comment => {
|
|
if (saw_name) {
|
|
prev_ws = false;
|
|
try w.print("{s}", .{pp.tokSlice(tok)});
|
|
}
|
|
},
|
|
.nl, .eof => break,
|
|
.whitespace => {
|
|
if (!prev_ws) {
|
|
try w.writeByte(' ');
|
|
prev_ws = true;
|
|
}
|
|
},
|
|
else => {
|
|
prev_ws = false;
|
|
try w.print("{s}", .{pp.tokSlice(tok)});
|
|
},
|
|
}
|
|
if (tok.id == .identifier or tok.id == .extended_identifier) {
|
|
if (parts == .name_only) break;
|
|
saw_name = true;
|
|
}
|
|
}
|
|
}
|
|
|
|
fn prettyPrintMacrosOnly(pp: *Preprocessor, w: *std.Io.Writer) !void {
|
|
for (pp.defines.values()) |macro| {
|
|
if (macro.is_builtin) continue;
|
|
|
|
try w.writeAll("#define ");
|
|
try pp.prettyPrintMacro(w, macro.loc, .name_and_body);
|
|
try w.writeByte('\n');
|
|
}
|
|
}
|
|
|
|
/// Pretty print tokens and try to preserve whitespace.
|
|
pub fn prettyPrintTokens(pp: *Preprocessor, w: *std.Io.Writer, macro_dump_mode: DumpMode) !void {
|
|
if (macro_dump_mode == .macros_only) {
|
|
return pp.prettyPrintMacrosOnly(w);
|
|
}
|
|
|
|
const tok_ids = pp.tokens.items(.id);
|
|
|
|
var i: u32 = 0;
|
|
var last_nl = true;
|
|
outer: while (true) : (i += 1) {
|
|
var cur: Token = pp.tokens.get(i);
|
|
switch (cur.id) {
|
|
.eof => {
|
|
if (!last_nl) try w.writeByte('\n');
|
|
try w.flush();
|
|
return;
|
|
},
|
|
.nl => {
|
|
var newlines: u32 = 0;
|
|
for (tok_ids[i..], i..) |id, j| {
|
|
if (id == .nl) {
|
|
newlines += 1;
|
|
} else if (id == .eof) {
|
|
if (!last_nl) try w.writeByte('\n');
|
|
try w.flush();
|
|
return;
|
|
} else if (id != .whitespace) {
|
|
if (pp.linemarkers == .none) {
|
|
if (newlines < 2) break;
|
|
} else if (newlines < collapse_newlines) {
|
|
break;
|
|
}
|
|
|
|
i = @intCast((j - 1) - @intFromBool(tok_ids[j - 1] == .whitespace));
|
|
if (!last_nl) try w.writeAll("\n");
|
|
if (pp.linemarkers != .none) {
|
|
const next = pp.tokens.get(i);
|
|
const source = pp.comp.getSource(next.loc.id);
|
|
const line_col = source.lineCol(next.loc);
|
|
try pp.printLinemarker(w, line_col.line_no, source, .none);
|
|
last_nl = true;
|
|
}
|
|
continue :outer;
|
|
}
|
|
}
|
|
last_nl = true;
|
|
try w.writeAll("\n");
|
|
},
|
|
.keyword_pragma => {
|
|
const pragma_name = pp.expandedSlice(pp.tokens.get(i + 1));
|
|
const end_idx = mem.indexOfScalarPos(Token.Id, tok_ids, i, .nl) orelse i + 1;
|
|
const pragma_len = @as(u32, @intCast(end_idx)) - i;
|
|
|
|
if (pp.comp.getPragma(pragma_name)) |prag| {
|
|
if (!prag.shouldPreserveTokens(pp, i + 1)) {
|
|
try w.writeByte('\n');
|
|
i += pragma_len;
|
|
cur = pp.tokens.get(i);
|
|
continue;
|
|
}
|
|
}
|
|
try w.writeAll("#pragma");
|
|
i += 1;
|
|
while (true) : (i += 1) {
|
|
cur = pp.tokens.get(i);
|
|
if (cur.id == .nl) {
|
|
try w.writeByte('\n');
|
|
last_nl = true;
|
|
break;
|
|
}
|
|
try w.writeByte(' ');
|
|
const slice = pp.expandedSlice(cur);
|
|
try w.writeAll(slice);
|
|
}
|
|
},
|
|
.whitespace => {
|
|
var slice = pp.expandedSlice(cur);
|
|
while (mem.indexOfScalar(u8, slice, '\n')) |some| {
|
|
if (pp.linemarkers != .none) try w.writeByte('\n');
|
|
slice = slice[some + 1 ..];
|
|
}
|
|
for (slice) |_| try w.writeByte(' ');
|
|
last_nl = false;
|
|
},
|
|
.include_start => {
|
|
const source = pp.comp.getSource(cur.loc.id);
|
|
|
|
try pp.printLinemarker(w, 1, source, .start);
|
|
last_nl = true;
|
|
},
|
|
.include_resume => {
|
|
const source = pp.comp.getSource(cur.loc.id);
|
|
const line_col = source.lineCol(cur.loc);
|
|
if (!last_nl) try w.writeAll("\n");
|
|
|
|
try pp.printLinemarker(w, line_col.line_no, source, .@"resume");
|
|
last_nl = true;
|
|
},
|
|
.keyword_define, .keyword_undef => {
|
|
switch (macro_dump_mode) {
|
|
.macros_and_result, .macro_names_and_result => {
|
|
try w.writeByte('#');
|
|
try pp.prettyPrintMacro(w, cur.loc, if (macro_dump_mode == .macros_and_result) .name_and_body else .name_only);
|
|
last_nl = false;
|
|
},
|
|
.result_only => unreachable, // `pp.store_macro_tokens` should be false for standard preprocessor output
|
|
.macros_only => unreachable, // handled by prettyPrintMacrosOnly
|
|
}
|
|
},
|
|
else => {
|
|
const slice = pp.expandedSlice(cur);
|
|
try w.writeAll(slice);
|
|
last_nl = false;
|
|
},
|
|
}
|
|
}
|
|
}
|
|
|
|
/// Like `std.zig.fmtEscapes`, but for C strings. Hex escapes are used for any
|
|
/// non-ASCII/unprintable bytes to ensure that the string bytes do not change if
|
|
/// the encoding of the file is not UTF-8.
|
|
fn fmtEscapes(bytes: []const u8) FmtEscapes {
|
|
return .{ .bytes = bytes };
|
|
}
|
|
const FmtEscapes = struct {
|
|
bytes: []const u8,
|
|
pub fn format(ctx: FmtEscapes, w: *std.Io.Writer) !void {
|
|
for (ctx.bytes) |byte| switch (byte) {
|
|
'\n' => try w.writeAll("\\n"),
|
|
'\r' => try w.writeAll("\\r"),
|
|
'\t' => try w.writeAll("\\t"),
|
|
'\\' => try w.writeAll("\\\\"),
|
|
'"' => try w.writeAll("\\\""),
|
|
' ', '!', '#'...'&', '('...'[', ']'...'~' => try w.writeByte(byte),
|
|
// Use hex escapes for any non-ASCII/unprintable characters.
|
|
// This ensures that the parsed version of this string will end up
|
|
// containing the same bytes as the input regardless of encoding.
|
|
else => try w.print("\\x{x:0>2}", .{byte}),
|
|
};
|
|
}
|
|
};
|
|
|
|
test "Preserve pragma tokens sometimes" {
|
|
const gpa = std.testing.allocator;
|
|
const Test = struct {
|
|
fn runPreprocessor(source_text: []const u8) ![]const u8 {
|
|
var arena: std.heap.ArenaAllocator = .init(gpa);
|
|
defer arena.deinit();
|
|
|
|
var diagnostics: Diagnostics = .{ .output = .ignore };
|
|
var comp = Compilation.init(gpa, arena.allocator(), &diagnostics, std.fs.cwd());
|
|
defer comp.deinit();
|
|
|
|
try comp.addDefaultPragmaHandlers();
|
|
|
|
var pp = Preprocessor.init(&comp, .default);
|
|
defer pp.deinit();
|
|
|
|
pp.preserve_whitespace = true;
|
|
assert(pp.linemarkers == .none);
|
|
|
|
const test_runner_macros = try comp.addSourceFromBuffer("<test_runner>", source_text);
|
|
const eof = try pp.preprocess(test_runner_macros);
|
|
try pp.addToken(eof);
|
|
|
|
var allocating: std.Io.Writer.Allocating = .init(gpa);
|
|
defer allocating.deinit();
|
|
|
|
try pp.prettyPrintTokens(&allocating.writer, .result_only);
|
|
return allocating.toOwnedSlice();
|
|
}
|
|
|
|
fn check(source_text: []const u8, expected: []const u8) !void {
|
|
const output = try runPreprocessor(source_text);
|
|
defer gpa.free(output);
|
|
|
|
try std.testing.expectEqualStrings(expected, output);
|
|
}
|
|
};
|
|
const preserve_gcc_diagnostic =
|
|
\\#pragma GCC diagnostic error "-Wnewline-eof"
|
|
\\#pragma GCC warning error "-Wnewline-eof"
|
|
\\int x;
|
|
\\#pragma GCC ignored error "-Wnewline-eof"
|
|
\\
|
|
;
|
|
try Test.check(preserve_gcc_diagnostic, preserve_gcc_diagnostic);
|
|
|
|
const omit_once =
|
|
\\#pragma once
|
|
\\int x;
|
|
\\#pragma once
|
|
\\
|
|
;
|
|
// TODO should only be one newline afterwards when emulating clang
|
|
try Test.check(omit_once, "\nint x;\n\n");
|
|
|
|
const omit_poison =
|
|
\\#pragma GCC poison foobar
|
|
\\
|
|
;
|
|
try Test.check(omit_poison, "\n");
|
|
}
|
|
|
|
test "destringify" {
|
|
const gpa = std.testing.allocator;
|
|
const Test = struct {
|
|
fn testDestringify(pp: *Preprocessor, stringified: []const u8, destringified: []const u8) !void {
|
|
pp.char_buf.clearRetainingCapacity();
|
|
try pp.char_buf.ensureUnusedCapacity(stringified.len);
|
|
pp.destringify(stringified);
|
|
try std.testing.expectEqualStrings(destringified, pp.char_buf.items);
|
|
}
|
|
};
|
|
var arena: std.heap.ArenaAllocator = .init(gpa);
|
|
defer arena.deinit();
|
|
var diagnostics: Diagnostics = .{ .output = .ignore };
|
|
var comp = Compilation.init(gpa, arena.allocator(), &diagnostics, std.fs.cwd());
|
|
defer comp.deinit();
|
|
var pp = Preprocessor.init(&comp, .default);
|
|
defer pp.deinit();
|
|
|
|
try Test.testDestringify(&pp, "hello\tworld\n", "hello\tworld\n");
|
|
try Test.testDestringify(&pp,
|
|
\\ \"FOO BAR BAZ\"
|
|
,
|
|
\\ "FOO BAR BAZ"
|
|
);
|
|
try Test.testDestringify(&pp,
|
|
\\ \\t\\n
|
|
\\
|
|
,
|
|
\\ \t\n
|
|
\\
|
|
);
|
|
}
|
|
|
|
test "Include guards" {
|
|
const Test = struct {
|
|
/// This is here so that when #elifdef / #elifndef are added we don't forget
|
|
/// to test that they don't accidentally break include guard detection
|
|
fn pairsWithIfndef(tok_id: RawToken.Id) bool {
|
|
return switch (tok_id) {
|
|
.keyword_elif,
|
|
.keyword_elifdef,
|
|
.keyword_elifndef,
|
|
.keyword_else,
|
|
=> true,
|
|
|
|
.keyword_include,
|
|
.keyword_include_next,
|
|
.keyword_embed,
|
|
.keyword_define,
|
|
.keyword_defined,
|
|
.keyword_undef,
|
|
.keyword_ifdef,
|
|
.keyword_ifndef,
|
|
.keyword_error,
|
|
.keyword_warning,
|
|
.keyword_pragma,
|
|
.keyword_line,
|
|
.keyword_endif,
|
|
=> false,
|
|
else => unreachable,
|
|
};
|
|
}
|
|
|
|
fn skippable(tok_id: RawToken.Id) bool {
|
|
return switch (tok_id) {
|
|
.keyword_defined, .keyword_va_args, .keyword_va_opt, .keyword_endif => true,
|
|
else => false,
|
|
};
|
|
}
|
|
|
|
fn testIncludeGuard(gpa: std.mem.Allocator, comptime template: []const u8, tok_id: RawToken.Id, expected_guards: u32) !void {
|
|
var arena_state: std.heap.ArenaAllocator = .init(gpa);
|
|
defer arena_state.deinit();
|
|
const arena = arena_state.allocator();
|
|
|
|
var diagnostics: Diagnostics = .{ .output = .ignore };
|
|
var comp = Compilation.init(gpa, arena, &diagnostics, std.fs.cwd());
|
|
defer comp.deinit();
|
|
var pp = Preprocessor.init(&comp, .default);
|
|
defer pp.deinit();
|
|
|
|
const path = try std.fs.path.join(arena, &.{ ".", "bar.h" });
|
|
|
|
_ = try comp.addSourceFromBuffer(path, "int bar = 5;\n");
|
|
|
|
var buf = std.array_list.Managed(u8).init(gpa);
|
|
defer buf.deinit();
|
|
|
|
switch (tok_id) {
|
|
.keyword_include, .keyword_include_next => try buf.print(template, .{ tok_id.lexeme().?, " \"bar.h\"" }),
|
|
.keyword_define, .keyword_undef => try buf.print(template, .{ tok_id.lexeme().?, " BAR" }),
|
|
.keyword_ifndef,
|
|
.keyword_ifdef,
|
|
.keyword_elifdef,
|
|
.keyword_elifndef,
|
|
=> try buf.print(template, .{ tok_id.lexeme().?, " BAR\n#endif" }),
|
|
else => try buf.print(template, .{ tok_id.lexeme().?, "" }),
|
|
}
|
|
const source = try comp.addSourceFromBuffer("test.h", buf.items);
|
|
_ = try pp.preprocess(source);
|
|
|
|
try std.testing.expectEqual(expected_guards, pp.include_guards.count());
|
|
}
|
|
};
|
|
const tags = std.meta.tags(RawToken.Id);
|
|
for (tags) |tag| {
|
|
if (Test.skippable(tag)) continue;
|
|
var copy = tag;
|
|
copy.simplifyMacroKeyword();
|
|
if (copy != tag or tag == .keyword_else) {
|
|
const inside_ifndef_template =
|
|
\\//Leading comment (should be ignored)
|
|
\\
|
|
\\#ifndef FOO
|
|
\\#{s}{s}
|
|
\\#endif
|
|
;
|
|
const expected_guards: u32 = if (Test.pairsWithIfndef(tag)) 0 else 1;
|
|
try Test.testIncludeGuard(std.testing.allocator, inside_ifndef_template, tag, expected_guards);
|
|
|
|
const outside_ifndef_template =
|
|
\\#ifndef FOO
|
|
\\#endif
|
|
\\#{s}{s}
|
|
;
|
|
try Test.testIncludeGuard(std.testing.allocator, outside_ifndef_template, tag, 0);
|
|
}
|
|
}
|
|
}
|