zig/lib/compiler/aro/aro/Preprocessor.zig

const std = @import("std");
const mem = std.mem;
const Allocator = mem.Allocator;
const assert = std.debug.assert;

const Attribute = @import("Attribute.zig");
const Compilation = @import("Compilation.zig");
const Error = Compilation.Error;
const Diagnostics = @import("Diagnostics.zig");
const DepFile = @import("DepFile.zig");
const features = @import("features.zig");
const Hideset = @import("Hideset.zig");
const Parser = @import("Parser.zig");
const Source = @import("Source.zig");
const text_literal = @import("text_literal.zig");
const Tokenizer = @import("Tokenizer.zig");
const RawToken = Tokenizer.Token;
const SourceEpoch = Compilation.Environment.SourceEpoch;
const Tree = @import("Tree.zig");
const Token = Tree.Token;
const TokenWithExpansionLocs = Tree.TokenWithExpansionLocs;

const DefineMap = std.StringArrayHashMapUnmanaged(Macro);
const RawTokenList = std.array_list.Managed(RawToken);
const max_include_depth = 200;

/// Errors that can be returned when expanding a macro.
/// error.UnknownPragma can occur within Preprocessor.pragma() but
/// it is handled there and doesn't escape that function
const MacroError = Error || error{StopPreprocessing};

const IfContext = struct {
    const Backing = u2;
    const Nesting = enum(Backing) {
        until_else,
        until_endif,
        until_endif_seen_else,
    };

    const buf_size_bits = @bitSizeOf(Backing) * 256;
    kind: [buf_size_bits / std.mem.byte_size_in_bits]u8,
    level: u8,

    fn get(self: *const IfContext) Nesting {
        return @enumFromInt(std.mem.readPackedIntNative(Backing, &self.kind, @as(usize, self.level) * 2));
    }

    fn set(self: *IfContext, context: Nesting) void {
        std.mem.writePackedIntNative(Backing, &self.kind, @as(usize, self.level) * 2, @intFromEnum(context));
    }

    fn increment(self: *IfContext) bool {
        self.level, const overflowed = @addWithOverflow(self.level, 1);
        return overflowed != 0;
    }

    fn decrement(self: *IfContext) void {
        self.level -= 1;
    }

    /// Initialize `kind` to an invalid value since it is an error to read the kind before setting it.
    /// Doing so will trigger safety-checked undefined behavior in `IfContext.get`
    const default: IfContext = .{ .kind = @splat(0xFF), .level = 0 };
};

pub const Macro = struct {
    /// Parameters of the function type macro
    params: []const []const u8,

    /// Token constituting the macro body
    tokens: []const RawToken,

    /// If the function type macro has variable number of arguments
    var_args: bool,

    /// Is a function type macro
    is_func: bool,

    /// Is a predefined macro
    is_builtin: bool = false,

    /// Location of macro in the source
    loc: Source.Location,

    fn eql(a: Macro, b: Macro, pp: *Preprocessor) bool {
        if (a.tokens.len != b.tokens.len) return false;
        if (a.is_builtin != b.is_builtin) return false;
        for (a.tokens, b.tokens) |a_tok, b_tok| if (!tokEql(pp, a_tok, b_tok)) return false;

        if (a.is_func and b.is_func) {
            if (a.var_args != b.var_args) return false;
            if (a.params.len != b.params.len) return false;
            for (a.params, b.params) |a_param, b_param| if (!mem.eql(u8, a_param, b_param)) return false;
        }

        return true;
    }

    fn tokEql(pp: *Preprocessor, a: RawToken, b: RawToken) bool {
        return mem.eql(u8, pp.tokSlice(a), pp.tokSlice(b));
    }
};

const Preprocessor = @This();

const ExpansionEntry = struct {
    idx: Tree.TokenIndex,
    locs: [*]Source.Location,
};

const TokenState = struct {
    tokens_len: usize,
    expansion_entries_len: usize,
};

comp: *Compilation,
diagnostics: *Diagnostics,
gpa: mem.Allocator,

arena: std.heap.ArenaAllocator,
defines: DefineMap = .{},
/// Do not directly mutate this; use addToken / addTokenAssumeCapacity / ensureTotalTokenCapacity / ensureUnusedTokenCapacity
tokens: Token.List = .{},
/// Do not directly mutate this; must be kept in sync with `tokens`
expansion_entries: std.MultiArrayList(ExpansionEntry) = .{},
token_buf: RawTokenList,
char_buf: std.array_list.Managed(u8),
/// Counter that is incremented each time preprocess() is called
/// Can be used to distinguish multiple preprocessings of the same file
preprocess_count: u32 = 0,
generated_line: u32 = 1,
add_expansion_nl: u32 = 0,
include_depth: u8 = 0,
counter: u32 = 0,
expansion_source_loc: Source.Location = undefined,
poisoned_identifiers: std.StringHashMap(void),
/// Map from Source.Id to macro name in the `#ifndef` condition which guards the source, if any
include_guards: std.AutoHashMapUnmanaged(Source.Id, []const u8) = .{},

/// Store `keyword_define` and `keyword_undef` tokens.
/// Used to implement preprocessor debug dump options
/// Must be false unless in -E mode (parser does not handle those token types)
store_macro_tokens: bool = false,

/// Memory is retained to avoid allocation on every single token.
top_expansion_buf: ExpandBuf,

/// Dump current state to stderr.
verbose: bool = false,
preserve_whitespace: bool = false,

/// linemarker tokens. Must be .none unless in -E mode (parser does not handle linemarkers)
linemarkers: Linemarkers = .none,

hideset: Hideset,

/// Epoch used for __DATE__, __TIME__, and possibly __TIMESTAMP__
source_epoch: SourceEpoch,
m_times: std.AutoHashMapUnmanaged(Source.Id, u64) = .{},

/// The dependency file tracking all includes and embeds.
dep_file: ?*DepFile = null,

pub const parse = Parser.parse;

pub const Linemarkers = enum {
    /// No linemarker tokens. Required setting if parser will run
    none,
    /// #line <num> "filename"
    line_directives,
    /// # <num> "filename" flags
    numeric_directives,
};

pub fn init(comp: *Compilation, source_epoch: SourceEpoch) Preprocessor {
    const pp: Preprocessor = .{
        .comp = comp,
        .diagnostics = comp.diagnostics,
        .gpa = comp.gpa,
        .arena = std.heap.ArenaAllocator.init(comp.gpa),
        .token_buf = RawTokenList.init(comp.gpa),
        .char_buf = std.array_list.Managed(u8).init(comp.gpa),
        .poisoned_identifiers = std.StringHashMap(void).init(comp.gpa),
        .top_expansion_buf = ExpandBuf.init(comp.gpa),
        .hideset = .{ .comp = comp },
        .source_epoch = source_epoch,
    };
    comp.pragmaEvent(.before_preprocess);
    return pp;
}

/// Initialize Preprocessor with builtin macros.
pub fn initDefault(comp: *Compilation) !Preprocessor {
    const source_epoch: SourceEpoch = comp.environment.sourceEpoch() catch |er| switch (er) {
        error.InvalidEpoch => blk: {
            const diagnostic: Diagnostic = .invalid_source_epoch;
            try comp.diagnostics.add(.{ .text = diagnostic.fmt, .kind = diagnostic.kind, .opt = diagnostic.opt, .location = null });
            break :blk .default;
        },
    };

    var pp = init(comp, source_epoch);
    errdefer pp.deinit();
    try pp.addBuiltinMacros();
    return pp;
}

// `param_tok_id` is comptime so that the generated `tokens` list is unique for every macro.
fn addBuiltinMacro(pp: *Preprocessor, name: []const u8, is_func: bool, comptime param_tok_id: Token.Id) !void {
    try pp.defines.putNoClobber(pp.gpa, name, .{
        .params = &[1][]const u8{"X"},
        .tokens = &[1]RawToken{.{
            .id = param_tok_id,
            .source = .generated,
        }},
        .var_args = false,
        .is_func = is_func,
        .loc = .{ .id = .generated },
        .is_builtin = true,
    });
}

pub fn addBuiltinMacros(pp: *Preprocessor) !void {
    try pp.addBuiltinMacro("__has_attribute", true, .macro_param_has_attribute);
    try pp.addBuiltinMacro("__has_c_attribute", true, .macro_param_has_c_attribute);
    try pp.addBuiltinMacro("__has_declspec_attribute", true, .macro_param_has_declspec_attribute);
    try pp.addBuiltinMacro("__has_warning", true, .macro_param_has_warning);
    try pp.addBuiltinMacro("__has_feature", true, .macro_param_has_feature);
    try pp.addBuiltinMacro("__has_extension", true, .macro_param_has_extension);
    try pp.addBuiltinMacro("__has_builtin", true, .macro_param_has_builtin);
    try pp.addBuiltinMacro("__has_include", true, .macro_param_has_include);
    try pp.addBuiltinMacro("__has_include_next", true, .macro_param_has_include_next);
    try pp.addBuiltinMacro("__has_embed", true, .macro_param_has_embed);
    try pp.addBuiltinMacro("__is_identifier", true, .macro_param_is_identifier);
    try pp.addBuiltinMacro("_Pragma", true, .macro_param_pragma_operator);

    if (pp.comp.langopts.ms_extensions) {
        try pp.addBuiltinMacro("__identifier", true, .macro_param_ms_identifier);
        try pp.addBuiltinMacro("__pragma", true, .macro_param_ms_pragma);
    }

    try pp.addBuiltinMacro("__FILE__", false, .macro_file);
    try pp.addBuiltinMacro("__LINE__", false, .macro_line);
    try pp.addBuiltinMacro("__COUNTER__", false, .macro_counter);
    try pp.addBuiltinMacro("__DATE__", false, .macro_date);
    try pp.addBuiltinMacro("__TIME__", false, .macro_time);
    try pp.addBuiltinMacro("__TIMESTAMP__", false, .macro_timestamp);
}

pub fn deinit(pp: *Preprocessor) void {
    pp.defines.deinit(pp.gpa);
    pp.tokens.deinit(pp.gpa);
    pp.arena.deinit();
    pp.token_buf.deinit();
    pp.char_buf.deinit();
    pp.poisoned_identifiers.deinit();
    pp.include_guards.deinit(pp.gpa);
    pp.top_expansion_buf.deinit();
    pp.hideset.deinit();
    for (pp.expansion_entries.items(.locs)) |locs| TokenWithExpansionLocs.free(locs, pp.gpa);
    pp.expansion_entries.deinit(pp.gpa);
    pp.m_times.deinit(pp.gpa);
}

/// Free buffers that are not needed after preprocessing
fn clearBuffers(pp: *Preprocessor) void {
    pp.token_buf.clearAndFree();
    pp.char_buf.clearAndFree();
    pp.top_expansion_buf.clearAndFree();
    pp.hideset.clearAndFree();
}

fn mTime(pp: *Preprocessor, source_id: Source.Id) !u64 {
    const gop = try pp.m_times.getOrPut(pp.gpa, source_id);
    if (!gop.found_existing) {
        gop.value_ptr.* = pp.comp.getSourceMTimeUncached(source_id) orelse 0;
    }
    return gop.value_ptr.*;
}

pub fn expansionSlice(pp: *Preprocessor, tok: Tree.TokenIndex) []Source.Location {
    const S = struct {
        fn orderTokenIndex(context: Tree.TokenIndex, item: Tree.TokenIndex) std.math.Order {
            return std.math.order(context, item);
        }
    };

    const indices = pp.expansion_entries.items(.idx);
    const idx = std.sort.binarySearch(Tree.TokenIndex, indices, tok, S.orderTokenIndex) orelse return &.{};
    const locs = pp.expansion_entries.items(.locs)[idx];
    var i: usize = 0;
    while (locs[i].id != .unused) : (i += 1) {}
    return locs[0..i];
}

/// Preprocess a compilation unit of sources into a parsable list of tokens.
pub fn preprocessSources(pp: *Preprocessor, sources: []const Source) Error!void {
    assert(sources.len > 1);
    const first = sources[0];
    try pp.addIncludeStart(first);
    for (sources[1..]) |header| {
        try pp.addIncludeStart(header);
        _ = try pp.preprocess(header);
    }
    try pp.addIncludeResume(first.id, 0, 1);
    const eof = try pp.preprocess(first);
    try pp.addToken(eof);
    pp.clearBuffers();
}

/// Preprocess a source file, returns eof token.
pub fn preprocess(pp: *Preprocessor, source: Source) Error!TokenWithExpansionLocs {
    const eof = pp.preprocessExtra(source) catch |er| switch (er) {
        // This cannot occur in the main file and is handled in `include`.
        error.StopPreprocessing => unreachable,
        else => |e| return e,
    };
    try eof.checkMsEof(source, pp.comp);
    return eof;
}

/// Tokenize a file without any preprocessing, returns eof token.
pub fn tokenize(pp: *Preprocessor, source: Source) Error!Token {
    assert(pp.linemarkers == .none);
    assert(pp.preserve_whitespace == false);
    var tokenizer = Tokenizer{
        .buf = source.buf,
        .comp = pp.comp,
        .source = source.id,
    };

    // Estimate how many new tokens this source will contain.
    const estimated_token_count = source.buf.len / 8;
    try pp.ensureTotalTokenCapacity(pp.tokens.len + estimated_token_count);

    while (true) {
        const tok = tokenizer.next();
        if (tok.id == .eof) return tokFromRaw(tok);
        try pp.addToken(tokFromRaw(tok));
    }
}

pub fn addIncludeStart(pp: *Preprocessor, source: Source) !void {
    if (pp.linemarkers == .none) return;
    try pp.addToken(.{ .id = .include_start, .loc = .{
        .id = source.id,
        .byte_offset = std.math.maxInt(u32),
        .line = 1,
    } });
}

pub fn addIncludeResume(pp: *Preprocessor, source: Source.Id, offset: u32, line: u32) !void {
    if (pp.linemarkers == .none) return;
    try pp.addToken(.{ .id = .include_resume, .loc = .{
        .id = source,
        .byte_offset = offset,
        .line = line,
    } });
}

fn invalidTokenDiagnostic(tok_id: Token.Id) Diagnostic {
    return switch (tok_id) {
        .unterminated_string_literal => .unterminated_string_literal_warning,
        .empty_char_literal => .empty_char_literal_warning,
        .unterminated_char_literal => .unterminated_char_literal_warning,
        else => unreachable,
    };
}

/// Return the name of the #ifndef guard macro that starts a source, if any.
fn findIncludeGuard(pp: *Preprocessor, source: Source) ?[]const u8 {
    var tokenizer = Tokenizer{
        .buf = source.buf,
        .langopts = pp.comp.langopts,
        .source = source.id,
    };
    var hash = tokenizer.nextNoWS();
    while (hash.id == .nl) hash = tokenizer.nextNoWS();
    if (hash.id != .hash) return null;
    const ifndef = tokenizer.nextNoWS();
    if (ifndef.id != .keyword_ifndef) return null;
    const guard = tokenizer.nextNoWS();
    if (guard.id != .identifier) return null;
    return pp.tokSlice(guard);
}

fn preprocessExtra(pp: *Preprocessor, source: Source) MacroError!TokenWithExpansionLocs {
    var guard_name = pp.findIncludeGuard(source);

    pp.preprocess_count += 1;
    var tokenizer = Tokenizer{
        .buf = source.buf,
        .langopts = pp.comp.langopts,
        .source = source.id,
    };

    // Estimate how many new tokens this source will contain.
    const estimated_token_count = source.buf.len / 8;
    try pp.ensureTotalTokenCapacity(pp.tokens.len + estimated_token_count);

    var if_context: IfContext = .default;

    var start_of_line = true;
    while (true) {
        var tok = tokenizer.next();
        switch (tok.id) {
            .hash => if (!start_of_line) try pp.addToken(tokFromRaw(tok)) else {
                const directive = tokenizer.nextNoWS();
                const directive_loc: Source.Location = .{ .id = tok.source, .byte_offset = directive.start, .line = directive.line };
                switch (directive.id) {
                    .keyword_error, .keyword_warning => {
                        // #error tokens..
                        pp.top_expansion_buf.items.len = 0;
                        const char_top = pp.char_buf.items.len;
                        defer pp.char_buf.items.len = char_top;

                        while (true) {
                            tok = tokenizer.next();
                            if (tok.id == .nl or tok.id == .eof) break;
                            if (tok.id == .whitespace) tok.id = .macro_ws;
                            try pp.top_expansion_buf.append(tokFromRaw(tok));
                        }
                        try pp.stringify(pp.top_expansion_buf.items);
                        const slice = pp.char_buf.items[char_top + 1 .. pp.char_buf.items.len - 2];

                        try pp.err(
                            directive_loc,
                            if (directive.id == .keyword_error) .error_directive else .warning_directive,
                            .{slice},
                        );
                    },
                    .keyword_if => {
                        const overflowed = if_context.increment();
                        if (overflowed)
                            return pp.fatal(directive, "too many #if nestings", .{});

                        if (try pp.expr(&tokenizer)) {
                            if_context.set(.until_endif);
                            if (pp.verbose) {
                                pp.verboseLog(directive, "entering then branch of #if", .{});
                            }
                        } else {
                            if_context.set(.until_else);
                            try pp.skip(&tokenizer, .until_else);
                            if (pp.verbose) {
                                pp.verboseLog(directive, "entering else branch of #if", .{});
                            }
                        }
                    },
                    .keyword_ifdef => {
                        const overflowed = if_context.increment();
                        if (overflowed)
                            return pp.fatal(directive, "too many #if nestings", .{});

                        const macro_name = (try pp.expectMacroName(&tokenizer)) orelse continue;
                        try pp.expectNl(&tokenizer);
                        if (pp.defines.get(macro_name) != null) {
                            if_context.set(.until_endif);
                            if (pp.verbose) {
                                pp.verboseLog(directive, "entering then branch of #ifdef", .{});
                            }
                        } else {
                            if_context.set(.until_else);
                            try pp.skip(&tokenizer, .until_else);
                            if (pp.verbose) {
                                pp.verboseLog(directive, "entering else branch of #ifdef", .{});
                            }
                        }
                    },
                    .keyword_ifndef => {
                        const overflowed = if_context.increment();
                        if (overflowed)
                            return pp.fatal(directive, "too many #if nestings", .{});

                        const macro_name = (try pp.expectMacroName(&tokenizer)) orelse continue;
                        try pp.expectNl(&tokenizer);
                        if (pp.defines.get(macro_name) == null) {
                            if_context.set(.until_endif);
                        } else {
                            if_context.set(.until_else);
                            try pp.skip(&tokenizer, .until_else);
                        }
                    },
                    .keyword_elif => {
                        if (if_context.level == 0) {
                            try pp.err(directive, .elif_without_if, .{});
                            _ = if_context.increment();
                            if_context.set(.until_else);
                        } else if (if_context.level == 1) {
                            guard_name = null;
                        }
                        switch (if_context.get()) {
                            .until_else => if (try pp.expr(&tokenizer)) {
                                if_context.set(.until_endif);
                                if (pp.verbose) {
                                    pp.verboseLog(directive, "entering then branch of #elif", .{});
                                }
                            } else {
                                try pp.skip(&tokenizer, .until_else);
                                if (pp.verbose) {
                                    pp.verboseLog(directive, "entering else branch of #elif", .{});
                                }
                            },
                            .until_endif => try pp.skip(&tokenizer, .until_endif),
                            .until_endif_seen_else => {
                                try pp.err(directive, .elif_after_else, .{});
                                skipToNl(&tokenizer);
                            },
                        }
                    },
                    .keyword_elifdef => {
                        if (if_context.level == 0) {
                            try pp.err(directive, .elifdef_without_if, .{});
                            _ = if_context.increment();
                            if_context.set(.until_else);
                        } else if (if_context.level == 1) {
                            guard_name = null;
                        }
                        switch (if_context.get()) {
                            .until_else => {
                                const macro_name = try pp.expectMacroName(&tokenizer);
                                if (macro_name == null) {
                                    if_context.set(.until_else);
                                    try pp.skip(&tokenizer, .until_else);
                                    if (pp.verbose) {
                                        pp.verboseLog(directive, "entering else branch of #elifdef", .{});
                                    }
                                } else {
                                    try pp.expectNl(&tokenizer);
                                    if (pp.defines.get(macro_name.?) != null) {
                                        if_context.set(.until_endif);
                                        if (pp.verbose) {
                                            pp.verboseLog(directive, "entering then branch of #elifdef", .{});
                                        }
                                    } else {
                                        if_context.set(.until_else);
                                        try pp.skip(&tokenizer, .until_else);
                                        if (pp.verbose) {
                                            pp.verboseLog(directive, "entering else branch of #elifdef", .{});
                                        }
                                    }
                                }
                            },
                            .until_endif => try pp.skip(&tokenizer, .until_endif),
                            .until_endif_seen_else => {
                                try pp.err(directive, .elifdef_after_else, .{});
                                skipToNl(&tokenizer);
                            },
                        }
                    },
                    .keyword_elifndef => {
                        if (if_context.level == 0) {
                            try pp.err(directive, .elifndef_without_if, .{});
                            _ = if_context.increment();
                            if_context.set(.until_else);
                        } else if (if_context.level == 1) {
                            guard_name = null;
                        }
                        switch (if_context.get()) {
                            .until_else => {
                                const macro_name = try pp.expectMacroName(&tokenizer);
                                if (macro_name == null) {
                                    if_context.set(.until_else);
                                    try pp.skip(&tokenizer, .until_else);
                                    if (pp.verbose) {
                                        pp.verboseLog(directive, "entering else branch of #elifndef", .{});
                                    }
                                } else {
                                    try pp.expectNl(&tokenizer);
                                    if (pp.defines.get(macro_name.?) == null) {
                                        if_context.set(.until_endif);
                                        if (pp.verbose) {
                                            pp.verboseLog(directive, "entering then branch of #elifndef", .{});
                                        }
                                    } else {
                                        if_context.set(.until_else);
                                        try pp.skip(&tokenizer, .until_else);
                                        if (pp.verbose) {
                                            pp.verboseLog(directive, "entering else branch of #elifndef", .{});
                                        }
                                    }
                                }
                            },
                            .until_endif => try pp.skip(&tokenizer, .until_endif),
                            .until_endif_seen_else => {
                                try pp.err(directive, .elifdef_after_else, .{});
                                skipToNl(&tokenizer);
                            },
                        }
                    },
                    .keyword_else => {
                        try pp.expectNl(&tokenizer);
                        if (if_context.level == 0) {
                            try pp.err(directive, .else_without_if, .{});
                            continue;
                        } else if (if_context.level == 1) {
                            guard_name = null;
                        }
                        switch (if_context.get()) {
                            .until_else => {
                                if_context.set(.until_endif_seen_else);
                                if (pp.verbose) {
                                    pp.verboseLog(directive, "#else branch here", .{});
                                }
                            },
                            .until_endif => try pp.skip(&tokenizer, .until_endif_seen_else),
                            .until_endif_seen_else => {
                                try pp.err(directive, .else_after_else, .{});
                                skipToNl(&tokenizer);
                            },
                        }
                    },
                    .keyword_endif => {
                        try pp.expectNl(&tokenizer);
                        if (if_context.level == 0) {
                            guard_name = null;
                            try pp.err(directive, .endif_without_if, .{});
                            continue;
                        } else if (if_context.level == 1) {
                            const saved_tokenizer = tokenizer;
                            defer tokenizer = saved_tokenizer;

                            var next = tokenizer.nextNoWS();
                            while (next.id == .nl) : (next = tokenizer.nextNoWS()) {}
                            if (next.id != .eof) guard_name = null;
                        }
                        if_context.decrement();
                    },
                    .keyword_define => try pp.define(&tokenizer, directive),
                    .keyword_undef => {
                        const macro_name = (try pp.expectMacroName(&tokenizer)) orelse continue;
                        if (pp.store_macro_tokens) {
                            try pp.addToken(tokFromRaw(directive));
                        }

                        _ = pp.defines.orderedRemove(macro_name);
                        try pp.expectNl(&tokenizer);
                    },
                    .keyword_include => {
                        try pp.include(&tokenizer, .first);
                        continue;
                    },
                    .keyword_include_next => {
                        try pp.err(directive_loc, .include_next, .{});

                        if (pp.include_depth == 0) {
                            try pp.err(directive_loc, .include_next_outside_header, .{});
                            try pp.include(&tokenizer, .first);
                        } else {
                            try pp.include(&tokenizer, .next);
                        }
                    },
                    .keyword_embed => try pp.embed(&tokenizer),
                    .keyword_pragma => {
                        try pp.pragma(&tokenizer, directive, null, &.{});
                        continue;
                    },
                    .keyword_line => {
                        // #line number "file"
                        const digits = tokenizer.nextNoWS();
                        if (digits.id != .pp_num) try pp.err(digits, .line_simple_digit, .{});
                        // TODO: validate that the pp_num token is solely digits

                        if (digits.id == .eof or digits.id == .nl) continue;
                        const name = tokenizer.nextNoWS();
                        if (name.id == .eof or name.id == .nl) continue;
                        if (name.id != .string_literal) try pp.err(name, .line_invalid_filename, .{});
                        try pp.expectNl(&tokenizer);
                    },
                    .pp_num => {
                        // # number "file" flags
                        // TODO: validate that the pp_num token is solely digits
                        // if not, emit `GNU line marker directive requires a simple digit sequence`
                        const name = tokenizer.nextNoWS();
                        if (name.id == .eof or name.id == .nl) continue;
                        if (name.id != .string_literal) try pp.err(name, .line_invalid_filename, .{});

                        const flag_1 = tokenizer.nextNoWS();
                        if (flag_1.id == .eof or flag_1.id == .nl) continue;
                        const flag_2 = tokenizer.nextNoWS();
                        if (flag_2.id == .eof or flag_2.id == .nl) continue;
                        const flag_3 = tokenizer.nextNoWS();
                        if (flag_3.id == .eof or flag_3.id == .nl) continue;
                        const flag_4 = tokenizer.nextNoWS();
                        if (flag_4.id == .eof or flag_4.id == .nl) continue;
                        try pp.expectNl(&tokenizer);
                    },
                    .nl => {},
                    .eof => {
                        if (if_context.level != 0) try pp.err(tok, .unterminated_conditional_directive, .{});
                        return tokFromRaw(directive);
                    },
                    else => {
                        try pp.err(tok, .invalid_preprocessing_directive, .{});
                        skipToNl(&tokenizer);
                    },
                }
                if (pp.preserve_whitespace) {
                    tok.id = .nl;
                    try pp.addToken(tokFromRaw(tok));
                }
            },
            .whitespace => if (pp.preserve_whitespace) try pp.addToken(tokFromRaw(tok)),
            .nl => {
                start_of_line = true;
                if (pp.preserve_whitespace) try pp.addToken(tokFromRaw(tok));
            },
            .eof => {
                if (if_context.level != 0) try pp.err(tok, .unterminated_conditional_directive, .{});
                // The following check needs to occur here and not at the top of the function
                // because a pragma may change the level during preprocessing
                if (source.buf.len > 0 and source.buf[source.buf.len - 1] != '\n') {
                    try pp.err(tok, .newline_eof, .{});
                }
                if (guard_name) |name| {
                    if (try pp.include_guards.fetchPut(pp.gpa, source.id, name)) |prev| {
                        assert(mem.eql(u8, name, prev.value));
                    }
                }
                return tokFromRaw(tok);
            },
            .unterminated_string_literal, .unterminated_char_literal, .empty_char_literal => |tag| {
                start_of_line = false;
                try pp.err(tok, invalidTokenDiagnostic(tag), .{});
                try pp.expandMacro(&tokenizer, tok);
            },
            .unterminated_comment => try pp.err(tok, .unterminated_comment, .{}),
            else => {
                if (tok.id.isMacroIdentifier() and pp.poisoned_identifiers.get(pp.tokSlice(tok)) != null) {
                    try pp.err(tok, .poisoned_identifier, .{});
                }
                // Add the token to the buffer doing any necessary expansions.
                start_of_line = false;
                try pp.expandMacro(&tokenizer, tok);
            },
        }
    }
}

/// Get raw token source string.
/// Returned slice is invalidated when comp.generated_buf is updated.
pub fn tokSlice(pp: *const Preprocessor, token: anytype) []const u8 {
    if (token.id.lexeme()) |some| return some;
    const source = pp.comp.getSource(token.source);
    return source.buf[token.start..token.end];
}

/// Convert a token from the Tokenizer into a token used by the parser.
fn tokFromRaw(raw: RawToken) TokenWithExpansionLocs {
    return .{
        .id = raw.id,
        .loc = .{
            .id = raw.source,
            .byte_offset = raw.start,
            .line = raw.line,
        },
    };
}

pub const Diagnostic = @import("Preprocessor/Diagnostic.zig");

fn err(pp: *Preprocessor, loc: anytype, diagnostic: Diagnostic, args: anytype) Compilation.Error!void {
    if (pp.diagnostics.effectiveKind(diagnostic) == .off) return;

    var sf = std.heap.stackFallback(1024, pp.gpa);
    var allocating: std.Io.Writer.Allocating = .init(sf.get());
    defer allocating.deinit();

    Diagnostics.formatArgs(&allocating.writer, diagnostic.fmt, args) catch return error.OutOfMemory;
    try pp.diagnostics.addWithLocation(pp.comp, .{
        .kind = diagnostic.kind,
        .text = allocating.getWritten(),
        .opt = diagnostic.opt,
        .extension = diagnostic.extension,
        .location = switch (@TypeOf(loc)) {
            RawToken => (Source.Location{
                .id = loc.source,
                .byte_offset = loc.start,
                .line = loc.line,
            }).expand(pp.comp),
            TokenWithExpansionLocs, *TokenWithExpansionLocs => loc.loc.expand(pp.comp),
            Source.Location => loc.expand(pp.comp),
            else => @compileError("invalid token type " ++ @typeName(@TypeOf(loc))),
        },
    }, switch (@TypeOf(loc)) {
        RawToken => &.{},
        TokenWithExpansionLocs, *TokenWithExpansionLocs => loc.expansionSlice(),
        Source.Location => &.{},
        else => @compileError("invalid token type"),
    }, true);
}

fn fatal(pp: *Preprocessor, raw: RawToken, comptime fmt: []const u8, args: anytype) Compilation.Error {
    var sf = std.heap.stackFallback(1024, pp.gpa);
    var allocating: std.Io.Writer.Allocating = .init(sf.get());
    defer allocating.deinit();

    Diagnostics.formatArgs(&allocating.writer, fmt, args) catch return error.OutOfMemory;
    try pp.diagnostics.add(.{
        .kind = .@"fatal error",
        .text = allocating.getWritten(),
        .location = (Source.Location{
            .id = raw.source,
            .byte_offset = raw.start,
            .line = raw.line,
        }).expand(pp.comp),
    });
    unreachable;
}

fn fatalNotFound(pp: *Preprocessor, tok: TokenWithExpansionLocs, filename: []const u8) Compilation.Error {
    const old = pp.diagnostics.state.fatal_errors;
    pp.diagnostics.state.fatal_errors = true;
    defer pp.diagnostics.state.fatal_errors = old;

    var sf = std.heap.stackFallback(1024, pp.gpa);
    var buf = std.array_list.Managed(u8).init(sf.get());
    defer buf.deinit();

    try buf.print("'{s}' not found", .{filename});
    try pp.diagnostics.addWithLocation(pp.comp, .{
        .kind = .@"fatal error",
        .text = buf.items,
        .location = tok.loc.expand(pp.comp),
    }, tok.expansionSlice(), true);
    unreachable; // should've returned FatalError
}

fn verboseLog(pp: *Preprocessor, raw: RawToken, comptime fmt: []const u8, args: anytype) void {
    @branchHint(.cold);
    const source = pp.comp.getSource(raw.source);
    const line_col = source.lineCol(.{ .id = raw.source, .line = raw.line, .byte_offset = raw.start });

    var stderr_buffer: [64]u8 = undefined;
    var writer = std.debug.lockStderrWriter(&stderr_buffer);
    defer std.debug.unlockStderrWriter();
    writer.print("{s}:{d}:{d}: ", .{ source.path, line_col.line_no, line_col.col }) catch return;
    writer.print(fmt, args) catch return;
    writer.writeByte('\n') catch return;
    writer.writeAll(line_col.line) catch return;
    writer.writeByte('\n') catch return;
}

/// Consume next token, error if it is not an identifier.
fn expectMacroName(pp: *Preprocessor, tokenizer: *Tokenizer) Error!?[]const u8 {
    const macro_name = tokenizer.nextNoWS();
    if (!macro_name.id.isMacroIdentifier()) {
        try pp.err(macro_name, .macro_name_missing, .{});
        skipToNl(tokenizer);
        return null;
    }
    return pp.tokSlice(macro_name);
}

/// Skip until after a newline, error if extra tokens before it.
fn expectNl(pp: *Preprocessor, tokenizer: *Tokenizer) Error!void {
    var sent_err = false;
    while (true) {
        const tok = tokenizer.next();
        if (tok.id == .nl or tok.id == .eof) return;
        if (tok.id == .whitespace or tok.id == .comment) continue;
        if (!sent_err) {
            sent_err = true;
            try pp.err(tok, .extra_tokens_directive_end, .{});
        }
    }
}

fn getTokenState(pp: *const Preprocessor) TokenState {
    return .{
        .tokens_len = pp.tokens.len,
        .expansion_entries_len = pp.expansion_entries.len,
    };
}

fn restoreTokenState(pp: *Preprocessor, state: TokenState) void {
    pp.tokens.len = state.tokens_len;
    pp.expansion_entries.len = state.expansion_entries_len;
}

/// Consume all tokens until a newline and parse the result into a boolean.
fn expr(pp: *Preprocessor, tokenizer: *Tokenizer) MacroError!bool {
    const token_state = pp.getTokenState();
    defer {
        for (pp.top_expansion_buf.items) |tok| TokenWithExpansionLocs.free(tok.expansion_locs, pp.gpa);
        pp.restoreTokenState(token_state);
    }

    pp.top_expansion_buf.items.len = 0;
    const eof = while (true) {
        const tok = tokenizer.next();
        switch (tok.id) {
            .nl, .eof => break tok,
            .whitespace => if (pp.top_expansion_buf.items.len == 0) continue,
            else => {},
        }
        try pp.top_expansion_buf.append(tokFromRaw(tok));
    } else unreachable;
    if (pp.top_expansion_buf.items.len != 0) {
        pp.expansion_source_loc = pp.top_expansion_buf.items[0].loc;
        pp.hideset.clearRetainingCapacity();
        try pp.expandMacroExhaustive(tokenizer, &pp.top_expansion_buf, 0, pp.top_expansion_buf.items.len, false, .expr);
    }
    for (pp.top_expansion_buf.items) |tok| {
        if (tok.id == .macro_ws) continue;
        if (!tok.id.validPreprocessorExprStart()) {
            try pp.err(tok, .invalid_preproc_expr_start, .{});
            return false;
        }
        break;
    } else {
        try pp.err(eof, .expected_value_in_expr, .{});
        return false;
    }

    // validate the tokens in the expression
    try pp.ensureUnusedTokenCapacity(pp.top_expansion_buf.items.len);
    var i: usize = 0;
    const items = pp.top_expansion_buf.items;
    while (i < items.len) : (i += 1) {
        var tok = items[i];
        switch (tok.id) {
            .string_literal,
            .string_literal_utf_16,
            .string_literal_utf_8,
            .string_literal_utf_32,
            .string_literal_wide,
            => {
                try pp.err(tok, .string_literal_in_pp_expr, .{});
                return false;
            },
            .plus_plus,
            .minus_minus,
            .plus_equal,
            .minus_equal,
            .asterisk_equal,
            .slash_equal,
            .percent_equal,
            .angle_bracket_angle_bracket_left_equal,
            .angle_bracket_angle_bracket_right_equal,
            .ampersand_equal,
            .caret_equal,
            .pipe_equal,
            .l_bracket,
            .r_bracket,
            .l_brace,
            .r_brace,
            .ellipsis,
            .semicolon,
            .hash,
            .hash_hash,
            .equal,
            .arrow,
            .period,
            => {
                try pp.err(tok, .invalid_preproc_operator, .{});
                return false;
            },
            .macro_ws, .whitespace => continue,
            .keyword_false => tok.id = .zero,
            .keyword_true => tok.id = .one,
            else => if (tok.id.isMacroIdentifier()) {
                if (tok.id == .keyword_defined) {
                    const tokens_consumed = try pp.handleKeywordDefined(&tok, items[i + 1 ..], eof);
                    i += tokens_consumed;
                } else {
                    try pp.err(tok, .undefined_macro, .{pp.expandedSlice(tok)});

                    if (i + 1 < pp.top_expansion_buf.items.len and
                        pp.top_expansion_buf.items[i + 1].id == .l_paren)
                    {
                        try pp.err(tok, .fn_macro_undefined, .{pp.expandedSlice(tok)});
                        return false;
                    }

                    tok.id = .zero; // undefined macro
                }
            },
        }
        pp.addTokenAssumeCapacity(try pp.unescapeUcn(tok));
    }
    try pp.addToken(.{
        .id = .eof,
        .loc = tokFromRaw(eof).loc,
    });

    // Actually parse it.
    var parser: Parser = .{
        .pp = pp,
        .comp = pp.comp,
        .diagnostics = pp.diagnostics,
        .gpa = pp.gpa,
        .tok_ids = pp.tokens.items(.id),
        .tok_i = @intCast(token_state.tokens_len),
        .in_macro = true,
        .strings = std.array_list.Managed(u8).init(pp.comp.gpa),

        .tree = undefined,
        .labels = undefined,
        .decl_buf = undefined,
        .list_buf = undefined,
        .param_buf = undefined,
        .enum_buf = undefined,
        .record_buf = undefined,
        .attr_buf = undefined,
        .string_ids = undefined,
    };
    defer parser.strings.deinit();
    return parser.macroExpr();
}

/// Turns macro_tok from .keyword_defined into .zero or .one depending on whether the argument is defined
/// Returns the number of tokens consumed
fn handleKeywordDefined(pp: *Preprocessor, macro_tok: *TokenWithExpansionLocs, tokens: []const TokenWithExpansionLocs, eof: RawToken) !usize {
    std.debug.assert(macro_tok.id == .keyword_defined);
    var it = TokenIterator.init(tokens);
    const first = it.nextNoWS() orelse {
        try pp.err(eof, .macro_name_missing, .{});
        return it.i;
    };
    switch (first.id) {
        .l_paren => {},
        else => {
            if (!first.id.isMacroIdentifier()) {
                try pp.err(first, .macro_name_must_be_identifier, .{});
            }
            macro_tok.id = if (pp.defines.contains(pp.expandedSlice(first))) .one else .zero;
            return it.i;
        },
    }
    const second = it.nextNoWS() orelse {
        try pp.err(eof, .macro_name_missing, .{});
        return it.i;
    };
    if (!second.id.isMacroIdentifier()) {
        try pp.err(second, .macro_name_must_be_identifier, .{});
        return it.i;
    }
    macro_tok.id = if (pp.defines.contains(pp.expandedSlice(second))) .one else .zero;

    const last = it.nextNoWS();
    if (last == null or last.?.id != .r_paren) {
        const tok = last orelse tokFromRaw(eof);
        try pp.err(tok, .closing_paren, .{});
        try pp.err(first, .to_match_paren, .{});
    }

    return it.i;
}

/// Skip until #else #elif #endif, return last directive token id.
/// Also skips nested #if ... #endifs.
fn skip(
    pp: *Preprocessor,
    tokenizer: *Tokenizer,
    cont: enum { until_else, until_endif, until_endif_seen_else },
) Error!void {
    var ifs_seen: u32 = 0;
    var line_start = true;
    while (tokenizer.index < tokenizer.buf.len) {
        if (line_start) {
            const saved_tokenizer = tokenizer.*;
            const hash = tokenizer.nextNoWS();
            if (hash.id == .nl) continue;
            line_start = false;
            if (hash.id != .hash) continue;
            const directive = tokenizer.nextNoWS();
            switch (directive.id) {
                .keyword_else => {
                    if (ifs_seen != 0) continue;
                    if (cont == .until_endif_seen_else) {
                        try pp.err(directive, .else_after_else, .{});
                        continue;
                    }
                    tokenizer.* = saved_tokenizer;
                    return;
                },
                .keyword_elif => {
                    if (ifs_seen != 0 or cont == .until_endif) continue;
                    if (cont == .until_endif_seen_else) {
                        try pp.err(directive, .elif_after_else, .{});
                        continue;
                    }
                    tokenizer.* = saved_tokenizer;
                    return;
                },
                .keyword_elifdef => {
                    if (ifs_seen != 0 or cont == .until_endif) continue;
                    if (cont == .until_endif_seen_else) {
                        try pp.err(directive, .elifdef_after_else, .{});
                        continue;
                    }
                    tokenizer.* = saved_tokenizer;
                    return;
                },
                .keyword_elifndef => {
                    if (ifs_seen != 0 or cont == .until_endif) continue;
                    if (cont == .until_endif_seen_else) {
                        try pp.err(directive, .elifndef_after_else, .{});
                        continue;
                    }
                    tokenizer.* = saved_tokenizer;
                    return;
                },
                .keyword_endif => {
                    if (ifs_seen == 0) {
                        tokenizer.* = saved_tokenizer;
                        return;
                    }
                    ifs_seen -= 1;
                },
                .keyword_if, .keyword_ifdef, .keyword_ifndef => ifs_seen += 1,
                else => {},
            }
        } else if (tokenizer.buf[tokenizer.index] == '\n') {
            line_start = true;
            tokenizer.index += 1;
            tokenizer.line += 1;
            if (pp.preserve_whitespace) {
                try pp.addToken(.{ .id = .nl, .loc = .{
                    .id = tokenizer.source,
                    .line = tokenizer.line,
                } });
            }
        } else {
            line_start = false;
            tokenizer.index += 1;
        }
    } else {
        const eof = tokenizer.next();
        return pp.err(eof, .unterminated_conditional_directive, .{});
    }
}

// Skip until newline, ignore other tokens.
fn skipToNl(tokenizer: *Tokenizer) void {
    while (true) {
        const tok = tokenizer.next();
        if (tok.id == .nl or tok.id == .eof) return;
    }
}

const ExpandBuf = std.array_list.Managed(TokenWithExpansionLocs);
fn removePlacemarkers(buf: *ExpandBuf) void {
    var i: usize = buf.items.len -% 1;
    while (i < buf.items.len) : (i -%= 1) {
        if (buf.items[i].id == .placemarker) {
            const placemarker = buf.orderedRemove(i);
            TokenWithExpansionLocs.free(placemarker.expansion_locs, buf.allocator);
        }
    }
}

const MacroArguments = std.array_list.Managed([]const TokenWithExpansionLocs);
fn deinitMacroArguments(allocator: Allocator, args: *const MacroArguments) void {
    for (args.items) |item| {
        for (item) |tok| TokenWithExpansionLocs.free(tok.expansion_locs, allocator);
        allocator.free(item);
    }
    args.deinit();
}

fn expandObjMacro(pp: *Preprocessor, simple_macro: *const Macro) Error!ExpandBuf {
    var buf = ExpandBuf.init(pp.gpa);
    errdefer buf.deinit();
    if (simple_macro.tokens.len == 0) {
        try buf.append(.{ .id = .placemarker, .loc = .{ .id = .generated } });
        return buf;
    }
    try buf.ensureTotalCapacity(simple_macro.tokens.len);

    // Add all of the simple_macros tokens to the new buffer handling any concats.
    var i: usize = 0;
    while (i < simple_macro.tokens.len) : (i += 1) {
        const raw = simple_macro.tokens[i];
        const tok = tokFromRaw(raw);
        switch (raw.id) {
            .hash_hash => {
                var rhs = tokFromRaw(simple_macro.tokens[i + 1]);
                i += 1;
                while (true) {
                    if (rhs.id == .whitespace) {
                        rhs = tokFromRaw(simple_macro.tokens[i + 1]);
                        i += 1;
                    } else if (rhs.id == .comment and !pp.comp.langopts.preserve_comments_in_macros) {
                        rhs = tokFromRaw(simple_macro.tokens[i + 1]);
                        i += 1;
                    } else break;
                }
                try pp.pasteTokens(&buf, &.{rhs});
            },
            .whitespace => if (pp.preserve_whitespace) buf.appendAssumeCapacity(tok),
            .macro_file => {
                const start = pp.comp.generated_buf.items.len;
                const source = pp.comp.getSource(pp.expansion_source_loc.id);
                try pp.comp.generated_buf.print(pp.gpa, "\"{f}\"\n", .{fmtEscapes(source.path)});

                buf.appendAssumeCapacity(try pp.makeGeneratedToken(start, .string_literal, tok));
            },
            .macro_line => {
                const start = pp.comp.generated_buf.items.len;
                const source = pp.comp.getSource(pp.expansion_source_loc.id);
                try pp.comp.generated_buf.print(pp.gpa, "{d}\n", .{source.physicalLine(pp.expansion_source_loc)});

                buf.appendAssumeCapacity(try pp.makeGeneratedToken(start, .pp_num, tok));
            },
            .macro_counter => {
                defer pp.counter += 1;
                const start = pp.comp.generated_buf.items.len;
                try pp.comp.generated_buf.print(pp.gpa, "{d}\n", .{pp.counter});

                buf.appendAssumeCapacity(try pp.makeGeneratedToken(start, .pp_num, tok));
            },
            .macro_date, .macro_time => {
                const start = pp.comp.generated_buf.items.len;
                const timestamp = switch (pp.source_epoch) {
                    .system, .provided => |ts| ts,
                };
                try pp.writeDateTimeStamp(.fromTokId(raw.id), timestamp);
                buf.appendAssumeCapacity(try pp.makeGeneratedToken(start, .string_literal, tok));
            },
            .macro_timestamp => {
                const start = pp.comp.generated_buf.items.len;
                const timestamp = switch (pp.source_epoch) {
                    .provided => |ts| ts,
                    .system => try pp.mTime(pp.expansion_source_loc.id),
                };

                try pp.writeDateTimeStamp(.fromTokId(raw.id), timestamp);
                buf.appendAssumeCapacity(try pp.makeGeneratedToken(start, .string_literal, tok));
            },
            else => buf.appendAssumeCapacity(tok),
        }
    }

    return buf;
}

const DateTimeStampKind = enum {
    date,
    time,
    timestamp,

    fn fromTokId(tok_id: RawToken.Id) DateTimeStampKind {
        return switch (tok_id) {
            .macro_date => .date,
            .macro_time => .time,
            .macro_timestamp => .timestamp,
            else => unreachable,
        };
    }
};

fn writeDateTimeStamp(pp: *Preprocessor, kind: DateTimeStampKind, timestamp: u64) !void {
    std.debug.assert(std.time.epoch.Month.jan.numeric() == 1);

    const epoch_seconds = std.time.epoch.EpochSeconds{ .secs = timestamp };
    const epoch_day = epoch_seconds.getEpochDay();
    const day_seconds = epoch_seconds.getDaySeconds();
    const year_day = epoch_day.calculateYearDay();
    const month_day = year_day.calculateMonthDay();

    const day_names = [_][]const u8{ "Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun" };
    const month_names = [_][]const u8{ "Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec" };
    const day_name = day_names[@intCast((epoch_day.day + 3) % 7)];
    const month_name = month_names[month_day.month.numeric() - 1];

    switch (kind) {
        .date => {
            try pp.comp.generated_buf.print(pp.gpa, "\"{s} {d: >2} {d}\"", .{
                month_name,
                month_day.day_index + 1,
                year_day.year,
            });
        },
        .time => {
            try pp.comp.generated_buf.print(pp.gpa, "\"{d:0>2}:{d:0>2}:{d:0>2}\"", .{
                day_seconds.getHoursIntoDay(),
                day_seconds.getMinutesIntoHour(),
                day_seconds.getSecondsIntoMinute(),
            });
        },
        .timestamp => {
            try pp.comp.generated_buf.print(pp.gpa, "\"{s} {s} {d: >2} {d:0>2}:{d:0>2}:{d:0>2} {d}\"", .{
                day_name,
                month_name,
                month_day.day_index + 1,
                day_seconds.getHoursIntoDay(),
                day_seconds.getMinutesIntoHour(),
                day_seconds.getSecondsIntoMinute(),
                year_day.year,
            });
        },
    }
}

/// Join a possibly-parenthesized series of string literal tokens into a single string without
/// leading or trailing quotes. The returned slice is invalidated if pp.char_buf changes.
/// Returns error.ExpectedStringLiteral if parentheses are not balanced, a non-string-literal
/// is encountered, or if no string literals are encountered
/// TODO: destringize (replace all '\\' with a single `\` and all '\"' with a '"')
fn pasteStringsUnsafe(pp: *Preprocessor, toks: []const TokenWithExpansionLocs) ![]const u8 {
    const char_top = pp.char_buf.items.len;
    defer pp.char_buf.items.len = char_top;
    var unwrapped = toks;
    if (toks.len >= 2 and toks[0].id == .l_paren and toks[toks.len - 1].id == .r_paren) {
        unwrapped = toks[1 .. toks.len - 1];
    }
    if (unwrapped.len == 0) return error.ExpectedStringLiteral;

    for (unwrapped) |tok| {
        if (tok.id == .macro_ws) continue;
        if (tok.id != .string_literal) return error.ExpectedStringLiteral;
        const str = pp.expandedSlice(tok);
        try pp.char_buf.appendSlice(str[1 .. str.len - 1]);
    }
    return pp.char_buf.items[char_top..];
}

/// Handle the _Pragma operator (implemented as a builtin macro)
fn pragmaOperator(pp: *Preprocessor, arg_tok: TokenWithExpansionLocs, operator_loc: Source.Location) !void {
    const arg_slice = pp.expandedSlice(arg_tok);
    const content = arg_slice[1 .. arg_slice.len - 1];
    const directive = "#pragma ";

    pp.char_buf.clearRetainingCapacity();
    const total_len = directive.len + content.len + 1; // destringify can never grow the string, + 1 for newline
    try pp.char_buf.ensureUnusedCapacity(total_len);
    pp.char_buf.appendSliceAssumeCapacity(directive);
    pp.destringify(content);
    pp.char_buf.appendAssumeCapacity('\n');

    const start = pp.comp.generated_buf.items.len;
    try pp.comp.generated_buf.appendSlice(pp.gpa, pp.char_buf.items);
    var tmp_tokenizer = Tokenizer{
        .buf = pp.comp.generated_buf.items,
        .langopts = pp.comp.langopts,
        .index = @intCast(start),
        .source = .generated,
        .line = pp.generated_line,
    };
    pp.generated_line += 1;
    const hash_tok = tmp_tokenizer.next();
    assert(hash_tok.id == .hash);
    const pragma_tok = tmp_tokenizer.next();
    assert(pragma_tok.id == .keyword_pragma);
    try pp.pragma(&tmp_tokenizer, pragma_tok, operator_loc, arg_tok.expansionSlice());
}

/// Handle Microsoft __pragma operator
fn msPragmaOperator(pp: *Preprocessor, pragma_tok: TokenWithExpansionLocs, args: []const TokenWithExpansionLocs) !void {
    if (args.len == 0) {
        try pp.err(pragma_tok, .unknown_pragma, .{});
        return;
    }

    {
        var copy = try pragma_tok.dupe(pp.gpa);
        copy.id = .keyword_pragma;
        try pp.addToken(copy);
    }

    const pragma_start: u32 = @intCast(pp.tokens.len);
    for (args) |tok| {
        switch (tok.id) {
            .macro_ws, .comment => continue,
            else => try pp.addToken(try tok.dupe(pp.gpa)),
        }
    }
    try pp.addToken(.{ .id = .nl, .loc = .{ .id = .generated } });

    const name = pp.expandedSlice(pp.tokens.get(pragma_start));
    if (pp.comp.getPragma(name)) |prag| unknown: {
        return prag.preprocessorCB(pp, pragma_start) catch |er| switch (er) {
            error.UnknownPragma => break :unknown,
            else => |e| return e,
        };
    }

    try pp.err(args[0], .unknown_pragma, .{});
}

/// Inverts the output of the preprocessor stringify (#) operation
/// (except all whitespace is condensed to a single space)
/// writes output to pp.char_buf; assumes capacity is sufficient
/// backslash backslash -> backslash
/// backslash doublequote -> doublequote
/// All other characters remain the same
fn destringify(pp: *Preprocessor, str: []const u8) void {
    var state: enum { start, backslash_seen } = .start;
    for (str) |c| {
        switch (c) {
            '\\' => {
                if (state == .backslash_seen) pp.char_buf.appendAssumeCapacity(c);
                state = if (state == .start) .backslash_seen else .start;
            },
            else => {
                if (state == .backslash_seen and c != '"') pp.char_buf.appendAssumeCapacity('\\');
                pp.char_buf.appendAssumeCapacity(c);
                state = .start;
            },
        }
    }
}

/// Stringify `tokens` into pp.char_buf.
/// See https://gcc.gnu.org/onlinedocs/gcc-11.2.0/cpp/Stringizing.html#Stringizing
fn stringify(pp: *Preprocessor, tokens: []const TokenWithExpansionLocs) !void {
    try pp.char_buf.append('"');
    var ws_state: enum { start, need, not_needed } = .start;
    for (tokens) |tok| {
        if (tok.id == .macro_ws) {
            if (ws_state == .start) continue;
            ws_state = .need;
            continue;
        }
        if (ws_state == .need) try pp.char_buf.append(' ');
        ws_state = .not_needed;

        // backslashes not inside strings are not escaped
        const is_str = switch (tok.id) {
            .string_literal,
            .string_literal_utf_16,
            .string_literal_utf_8,
            .string_literal_utf_32,
            .string_literal_wide,
            .char_literal,
            .char_literal_utf_16,
            .char_literal_utf_32,
            .char_literal_wide,
            => true,
            else => false,
        };

        for (pp.expandedSlice(tok)) |c| {
            if (c == '"')
                try pp.char_buf.appendSlice("\\\"")
            else if (c == '\\' and is_str)
                try pp.char_buf.appendSlice("\\\\")
            else
                try pp.char_buf.append(c);
        }
    }
    try pp.char_buf.ensureUnusedCapacity(2);
    if (pp.char_buf.items[pp.char_buf.items.len - 1] != '\\') {
        pp.char_buf.appendSliceAssumeCapacity("\"\n");
        return;
    }
    pp.char_buf.appendAssumeCapacity('"');
    var tokenizer: Tokenizer = .{
        .buf = pp.char_buf.items,
        .index = 0,
        .source = .generated,
        .langopts = pp.comp.langopts,
        .line = 0,
    };
    const item = tokenizer.next();
    if (item.id == .unterminated_string_literal) {
        const tok = tokens[tokens.len - 1];
        try pp.err(tok, .invalid_pp_stringify_escape, .{});
        pp.char_buf.items.len -= 2; // erase unpaired backslash and appended end quote
        pp.char_buf.appendAssumeCapacity('"');
    }
    pp.char_buf.appendAssumeCapacity('\n');
}

fn reconstructIncludeString(pp: *Preprocessor, param_toks: []const TokenWithExpansionLocs, embed_args: ?*[]const TokenWithExpansionLocs, first: TokenWithExpansionLocs) !?[]const u8 {
    if (param_toks.len == 0) {
        try pp.err(first, .expected_filename, .{});
        return null;
    }

    const char_top = pp.char_buf.items.len;
    defer pp.char_buf.items.len = char_top;

    // Trim leading/trailing whitespace
    var begin: usize = 0;
    var end: usize = param_toks.len;
    while (begin < end and param_toks[begin].id == .macro_ws) : (begin += 1) {}
    while (end > begin and param_toks[end - 1].id == .macro_ws) : (end -= 1) {}
    const params = param_toks[begin..end];

    if (params.len == 0) {
        try pp.err(first, .expected_filename, .{});
        return null;
    }
    // no string pasting
    if (embed_args == null and params[0].id == .string_literal and params.len > 1) {
        try pp.err(params[1], .closing_paren, .{});
        return null;
    }

    for (params, 0..) |tok, i| {
        const str = pp.expandedSliceExtra(tok, .preserve_macro_ws);
        try pp.char_buf.appendSlice(str);
        if (embed_args) |some| {
            if ((i == 0 and tok.id == .string_literal) or tok.id == .angle_bracket_right) {
                some.* = params[i + 1 ..];
                break;
            }
        }
    }

    const include_str = pp.char_buf.items[char_top..];
    if (include_str.len < 3) {
        if (include_str.len == 0) {
            try pp.err(first, .expected_filename, .{});
            return null;
        }
        try pp.err(params[0], .empty_filename, .{});
        return null;
    }

    switch (include_str[0]) {
        '<' => {
            if (include_str[include_str.len - 1] != '>') {
                // Ugly hack to find out where the '>' should go, since we don't have the closing ')' location
                var closing = params[0];
                closing.loc.byte_offset += @as(u32, @intCast(include_str.len)) + 1;
                try pp.err(closing, .header_str_closing, .{});

                try pp.err(params[0], .header_str_match, .{});
                return null;
            }
            return include_str;
        },
        '"' => return include_str,
        else => {
            try pp.err(params[0], .expected_filename, .{});
            return null;
        },
    }
}

fn handleBuiltinMacro(pp: *Preprocessor, builtin: RawToken.Id, param_toks: []const TokenWithExpansionLocs, src_loc: Source.Location) Error!bool {
    switch (builtin) {
        .macro_param_has_attribute,
        .macro_param_has_declspec_attribute,
        .macro_param_has_feature,
        .macro_param_has_extension,
        .macro_param_has_builtin,
        => {
            var invalid: ?TokenWithExpansionLocs = null;
            var identifier: ?TokenWithExpansionLocs = null;
            for (param_toks) |tok| {
                if (tok.id == .macro_ws) continue;
                if (tok.id == .comment) continue;
                if (!tok.id.isMacroIdentifier()) {
                    invalid = tok;
                    break;
                }
                if (identifier) |_| invalid = tok else identifier = tok;
            }
            if (identifier == null and invalid == null) invalid = .{ .id = .eof, .loc = src_loc };
            if (invalid) |some| {
                try pp.err(some, .feature_check_requires_identifier, .{});
                return false;
            }

            const ident_str = pp.expandedSlice(identifier.?);
            return switch (builtin) {
                .macro_param_has_attribute => Attribute.fromString(.gnu, null, ident_str) != null,
                .macro_param_has_declspec_attribute => {
                    return if (pp.comp.langopts.declspec_attrs)
                        Attribute.fromString(.declspec, null, ident_str) != null
                    else
                        false;
                },
                .macro_param_has_feature => features.hasFeature(pp.comp, ident_str),
                // If -pedantic-errors is given __has_extension is equivalent to __has_feature
                .macro_param_has_extension => if (pp.comp.diagnostics.state.extensions == .@"error")
                    features.hasFeature(pp.comp, ident_str)
                else
                    features.hasExtension(pp.comp, ident_str),
                .macro_param_has_builtin => pp.comp.hasBuiltin(ident_str),
                else => unreachable,
            };
        },
        .macro_param_has_warning => {
            const actual_param = pp.pasteStringsUnsafe(param_toks) catch |er| switch (er) {
                error.ExpectedStringLiteral => {
                    try pp.err(param_toks[0], .expected_str_literal_in, .{"__has_warning"});
                    return false;
                },
                else => |e| return e,
            };
            if (!mem.startsWith(u8, actual_param, "-W")) {
                try pp.err(param_toks[0], .malformed_warning_check, .{"__has_warning"});
                return false;
            }
            const warning_name = actual_param[2..];
            return Diagnostics.warningExists(warning_name);
        },
        .macro_param_is_identifier => {
            var invalid: ?TokenWithExpansionLocs = null;
            var identifier: ?TokenWithExpansionLocs = null;
            for (param_toks) |tok| switch (tok.id) {
                .macro_ws => continue,
                .comment => continue,
                else => {
                    if (identifier) |_| invalid = tok else identifier = tok;
                },
            };
            if (identifier == null and invalid == null) invalid = .{ .id = .eof, .loc = src_loc };
            if (invalid) |some| {
                try pp.err(some, .builtin_missing_r_paren, .{"builtin feature-check macro"});
                return false;
            }

            const id = identifier.?.id;
            return id == .identifier or id == .extended_identifier;
        },
        .macro_param_has_include, .macro_param_has_include_next => {
            const include_str = (try pp.reconstructIncludeString(param_toks, null, param_toks[0])) orelse return false;
            const include_type: Compilation.IncludeType = switch (include_str[0]) {
                '"' => .quotes,
                '<' => .angle_brackets,
                else => unreachable,
            };
            const filename = include_str[1 .. include_str.len - 1];
            const res = res: {
                if (builtin == .macro_param_has_include or pp.include_depth == 0) {
                    if (builtin == .macro_param_has_include_next) {
                        try pp.err(src_loc, .include_next_outside_header, .{});
                    }
                    break :res try pp.comp.hasInclude(filename, src_loc.id, include_type, .first);
                }
                break :res try pp.comp.hasInclude(filename, src_loc.id, include_type, .next);
            };

            if (res) if (pp.dep_file) |dep_file| try dep_file.addDependencyDupe(pp.gpa, pp.comp.arena, filename);
            return res;
        },
        else => unreachable,
    }
}

/// Treat whitespace-only paste arguments as empty
fn getPasteArgs(args: []const TokenWithExpansionLocs) []const TokenWithExpansionLocs {
    for (args) |tok| {
        if (tok.id != .macro_ws) return args;
    }
    return &[1]TokenWithExpansionLocs{.{
        .id = .placemarker,
        .loc = .{ .id = .generated, .byte_offset = 0, .line = 0 },
    }};
}

fn expandFuncMacro(
    pp: *Preprocessor,
    macro_tok: TokenWithExpansionLocs,
    func_macro: *const Macro,
    args: *const MacroArguments,
    expanded_args: *const MacroArguments,
    hideset_arg: Hideset.Index,
) MacroError!ExpandBuf {
    var hideset = hideset_arg;
    var buf = ExpandBuf.init(pp.gpa);
    try buf.ensureTotalCapacity(func_macro.tokens.len);
    errdefer buf.deinit();

    var expanded_variable_arguments = ExpandBuf.init(pp.gpa);
    defer expanded_variable_arguments.deinit();
    var variable_arguments = ExpandBuf.init(pp.gpa);
    defer variable_arguments.deinit();

    if (func_macro.var_args) {
        var i: usize = func_macro.params.len;
        while (i < expanded_args.items.len) : (i += 1) {
            try variable_arguments.appendSlice(args.items[i]);
            try expanded_variable_arguments.appendSlice(expanded_args.items[i]);
            if (i != expanded_args.items.len - 1) {
                const comma = TokenWithExpansionLocs{ .id = .comma, .loc = .{ .id = .generated } };
                try variable_arguments.append(comma);
                try expanded_variable_arguments.append(comma);
            }
        }
    }

    // token concatenation and expansion phase
    var tok_i: usize = 0;
    while (tok_i < func_macro.tokens.len) : (tok_i += 1) {
        const raw = func_macro.tokens[tok_i];
        switch (raw.id) {
            .hash_hash => while (tok_i + 1 < func_macro.tokens.len) {
                const raw_next = func_macro.tokens[tok_i + 1];
                tok_i += 1;

                var va_opt_buf = ExpandBuf.init(pp.gpa);
                defer va_opt_buf.deinit();

                const next = switch (raw_next.id) {
                    .macro_ws => continue,
                    .hash_hash => continue,
                    .comment => if (!pp.comp.langopts.preserve_comments_in_macros)
                        continue
                    else
                        &[1]TokenWithExpansionLocs{tokFromRaw(raw_next)},
                    .macro_param, .macro_param_no_expand => getPasteArgs(args.items[raw_next.end]),
                    .keyword_va_args => variable_arguments.items,
                    .keyword_va_opt => blk: {
                        try pp.expandVaOpt(&va_opt_buf, raw_next, variable_arguments.items.len != 0);
                        if (va_opt_buf.items.len == 0) break;
                        break :blk va_opt_buf.items;
                    },
                    else => &[1]TokenWithExpansionLocs{tokFromRaw(raw_next)},
                };
                try pp.pasteTokens(&buf, next);
                if (next.len != 0) break;
            },
            .macro_param_no_expand => {
                if (tok_i + 1 < func_macro.tokens.len and func_macro.tokens[tok_i + 1].id == .hash_hash) {
                    hideset = pp.hideset.get(tokFromRaw(func_macro.tokens[tok_i + 1]).loc);
                }
                const slice = getPasteArgs(args.items[raw.end]);
                const raw_loc = Source.Location{ .id = raw.source, .byte_offset = raw.start, .line = raw.line };
                try bufCopyTokens(&buf, slice, &.{raw_loc});
            },
            .macro_param => {
                if (tok_i + 1 < func_macro.tokens.len and func_macro.tokens[tok_i + 1].id == .hash_hash) {
                    hideset = pp.hideset.get(tokFromRaw(func_macro.tokens[tok_i + 1]).loc);
                }
                const arg = expanded_args.items[raw.end];
                const raw_loc = Source.Location{ .id = raw.source, .byte_offset = raw.start, .line = raw.line };
                try bufCopyTokens(&buf, arg, &.{raw_loc});
            },
            .keyword_va_args => {
                const raw_loc = Source.Location{ .id = raw.source, .byte_offset = raw.start, .line = raw.line };
                try bufCopyTokens(&buf, expanded_variable_arguments.items, &.{raw_loc});
            },
            .keyword_va_opt => {
                try pp.expandVaOpt(&buf, raw, variable_arguments.items.len != 0);
            },
            .stringify_param, .stringify_va_args => {
                const arg = if (raw.id == .stringify_va_args)
                    variable_arguments.items
                else
                    args.items[raw.end];

                pp.char_buf.clearRetainingCapacity();
                try pp.stringify(arg);

                const start = pp.comp.generated_buf.items.len;
                try pp.comp.generated_buf.appendSlice(pp.gpa, pp.char_buf.items);

                try buf.append(try pp.makeGeneratedToken(start, .string_literal, tokFromRaw(raw)));
            },
            .macro_param_has_attribute,
            .macro_param_has_declspec_attribute,
            .macro_param_has_warning,
            .macro_param_has_feature,
            .macro_param_has_extension,
            .macro_param_has_builtin,
            .macro_param_has_include,
            .macro_param_has_include_next,
            .macro_param_is_identifier,
            => {
                const arg = expanded_args.items[0];
                const result = if (arg.len == 0) blk: {
                    try pp.err(macro_tok, .expected_arguments, .{ 1, 0 });
                    break :blk false;
                } else try pp.handleBuiltinMacro(raw.id, arg, macro_tok.loc);
                const start = pp.comp.generated_buf.items.len;

                try pp.comp.generated_buf.print(pp.gpa, "{}\n", .{@intFromBool(result)});
                try buf.append(try pp.makeGeneratedToken(start, .pp_num, tokFromRaw(raw)));
            },
            .macro_param_has_c_attribute => {
                const arg = expanded_args.items[0];
                const not_found = "0\n";
                const result = if (arg.len == 0) blk: {
                    try pp.err(macro_tok, .expected_arguments, .{ 1, 0 });
                    break :blk not_found;
                } else res: {
                    var invalid: ?TokenWithExpansionLocs = null;
                    var vendor_ident: ?TokenWithExpansionLocs = null;
                    var colon_colon: ?TokenWithExpansionLocs = null;
                    var attr_ident: ?TokenWithExpansionLocs = null;
                    for (arg) |tok| {
                        if (tok.id == .macro_ws) continue;
                        if (tok.id == .comment) continue;
                        if (tok.id == .colon_colon) {
                            if (colon_colon != null or attr_ident == null) {
                                invalid = tok;
                                break;
                            }
                            vendor_ident = attr_ident;
                            attr_ident = null;
                            colon_colon = tok;
                            continue;
                        }
                        if (!tok.id.isMacroIdentifier()) {
                            invalid = tok;
                            break;
                        }
                        if (attr_ident) |_| {
                            invalid = tok;
                            break;
                        } else attr_ident = tok;
                    }
                    if (vendor_ident != null and attr_ident == null) {
                        invalid = vendor_ident;
                    } else if (attr_ident == null and invalid == null) {
                        invalid = .{ .id = .eof, .loc = macro_tok.loc };
                    }
                    if (invalid) |some| {
                        try pp.err(some, .feature_check_requires_identifier, .{});
                        break :res not_found;
                    }
                    if (vendor_ident) |some| {
                        const vendor_str = pp.expandedSlice(some);
                        const attr_str = pp.expandedSlice(attr_ident.?);
                        const exists = Attribute.fromString(.gnu, vendor_str, attr_str) != null;

                        const start = pp.comp.generated_buf.items.len;
                        try pp.comp.generated_buf.appendSlice(pp.gpa, if (exists) "1\n" else "0\n");
                        try buf.append(try pp.makeGeneratedToken(start, .pp_num, tokFromRaw(raw)));
                        continue;
                    }
                    if (!pp.comp.langopts.standard.atLeast(.c23)) break :res not_found;

                    const attrs = std.StaticStringMap([]const u8).initComptime(.{
                        .{ "deprecated", "201904L\n" },
                        .{ "fallthrough", "201904L\n" },
                        .{ "maybe_unused", "201904L\n" },
                        .{ "nodiscard", "202003L\n" },
                        .{ "noreturn", "202202L\n" },
                        .{ "_Noreturn", "202202L\n" },
                        .{ "unsequenced", "202207L\n" },
                        .{ "reproducible", "202207L\n" },
                    });

                    const attr_str = Attribute.normalize(pp.expandedSlice(attr_ident.?));
                    break :res attrs.get(attr_str) orelse not_found;
                };
                const start = pp.comp.generated_buf.items.len;
                try pp.comp.generated_buf.appendSlice(pp.gpa, result);
                try buf.append(try pp.makeGeneratedToken(start, .pp_num, tokFromRaw(raw)));
            },
            .macro_param_has_embed => {
                const arg = expanded_args.items[0];
                const not_found = "0\n";
                const result = if (arg.len == 0) blk: {
                    try pp.err(macro_tok, .expected_arguments, .{ 1, 0 });
                    break :blk not_found;
                } else res: {
                    var embed_args: []const TokenWithExpansionLocs = &.{};
                    const include_str = (try pp.reconstructIncludeString(arg, &embed_args, arg[0])) orelse
                        break :res not_found;

                    var prev = tokFromRaw(raw);
                    prev.id = .eof;
                    var it: struct {
                        i: u32 = 0,
                        slice: []const TokenWithExpansionLocs,
                        prev: TokenWithExpansionLocs,
                        fn next(it: *@This()) TokenWithExpansionLocs {
                            while (it.i < it.slice.len) switch (it.slice[it.i].id) {
                                .macro_ws, .whitespace => it.i += 1,
                                else => break,
                            } else return it.prev;
                            defer it.i += 1;
                            it.prev = it.slice[it.i];
                            it.prev.id = .eof;
                            return it.slice[it.i];
                        }
                    } = .{ .slice = embed_args, .prev = prev };

                    while (true) {
                        const param_first = it.next();
                        if (param_first.id == .eof) break;
                        if (param_first.id != .identifier) {
                            try pp.err(param_first, .malformed_embed_param, .{});
                            continue;
                        }

                        const char_top = pp.char_buf.items.len;
                        defer pp.char_buf.items.len = char_top;

                        const maybe_colon = it.next();
                        const param = switch (maybe_colon.id) {
                            .colon_colon => blk: {
                                // vendor::param
                                const param = it.next();
                                if (param.id != .identifier) {
                                    try pp.err(param, .malformed_embed_param, .{});
                                    continue;
                                }
                                const l_paren = it.next();
                                if (l_paren.id != .l_paren) {
                                    try pp.err(l_paren, .malformed_embed_param, .{});
                                    continue;
                                }
                                break :blk "doesn't exist";
                            },
                            .l_paren => Attribute.normalize(pp.expandedSlice(param_first)),
                            else => {
                                try pp.err(maybe_colon, .malformed_embed_param, .{});
                                continue;
                            },
                        };

                        var arg_count: u32 = 0;
                        var first_arg: TokenWithExpansionLocs = undefined;
                        while (true) {
                            const next = it.next();
                            if (next.id == .eof) {
                                try pp.err(param_first, .malformed_embed_limit, .{});
                                break;
                            }
                            if (next.id == .r_paren) break;
                            arg_count += 1;
                            if (arg_count == 1) first_arg = next;
                        }

                        if (std.mem.eql(u8, param, "limit")) {
                            if (arg_count != 1) {
                                try pp.err(param_first, .malformed_embed_limit, .{});
                                continue;
                            }
                            if (first_arg.id != .pp_num) {
                                try pp.err(param_first, .malformed_embed_limit, .{});
                                continue;
                            }
                            _ = std.fmt.parseInt(u32, pp.expandedSlice(first_arg), 10) catch {
                                break :res not_found;
                            };
                        } else if (!std.mem.eql(u8, param, "prefix") and !std.mem.eql(u8, param, "suffix") and
                            !std.mem.eql(u8, param, "if_empty"))
                        {
                            break :res not_found;
                        }
                    }

                    const include_type: Compilation.IncludeType = switch (include_str[0]) {
                        '"' => .quotes,
                        '<' => .angle_brackets,
                        else => unreachable,
                    };
                    const filename = include_str[1 .. include_str.len - 1];
                    const contents = (try pp.comp.findEmbed(filename, arg[0].loc.id, include_type, .limited(1), pp.dep_file)) orelse
                        break :res not_found;

                    defer pp.comp.gpa.free(contents);
                    break :res if (contents.len != 0) "1\n" else "2\n";
                };
                const start = pp.comp.generated_buf.items.len;
                try pp.comp.generated_buf.appendSlice(pp.comp.gpa, result);
                try buf.append(try pp.makeGeneratedToken(start, .pp_num, tokFromRaw(raw)));
            },
            .macro_param_pragma_operator => {
                // Clang and GCC require exactly one token (so, no parentheses or string pasting)
                // even though their error messages indicate otherwise. Ours is slightly more
                // descriptive.
                var invalid: ?TokenWithExpansionLocs = null;
                var string: ?TokenWithExpansionLocs = null;
                for (expanded_args.items[0]) |tok| {
                    switch (tok.id) {
                        .string_literal => {
                            if (string) |_| {
                                invalid = tok;
                                break;
                            }
                            string = tok;
                        },
                        .macro_ws => continue,
                        .comment => continue,
                        else => {
                            invalid = tok;
                            break;
                        },
                    }
                }
                if (string == null and invalid == null) invalid = macro_tok;
                if (invalid) |some|
                    try pp.err(some, .pragma_operator_string_literal, .{})
                else
                    try pp.pragmaOperator(string.?, macro_tok.loc);
            },
            .macro_param_ms_identifier => blk: {
                // Expect '__identifier' '(' macro-identifier ')'
                var ident: ?TokenWithExpansionLocs = null;
                for (expanded_args.items[0]) |tok| {
                    switch (tok.id) {
                        .macro_ws => continue,
                        .comment => continue,
                        else => {},
                    }
                    if (ident) |_| {
                        try pp.err(tok, .builtin_missing_r_paren, .{"identifier"});
                        break :blk;
                    } else if (tok.id.isMacroIdentifier()) {
                        ident = tok;
                    } else {
                        try pp.err(tok, .cannot_convert_to_identifier, .{tok.id.symbol()});
                        break :blk;
                    }
                }
                if (ident) |*some| {
                    some.id = .identifier;
                    try buf.append(some.*);
                } else {
                    try pp.err(macro_tok, .expected_identifier, .{});
                }
            },
            .macro_param_ms_pragma => {
                try pp.msPragmaOperator(macro_tok, expanded_args.items[0]);
            },
            .comma => {
                if (tok_i + 2 < func_macro.tokens.len and func_macro.tokens[tok_i + 1].id == .hash_hash) {
                    const hash_hash = func_macro.tokens[tok_i + 1];
                    var maybe_va_args = func_macro.tokens[tok_i + 2];
                    var consumed: usize = 2;
                    if (maybe_va_args.id == .macro_ws and tok_i + 3 < func_macro.tokens.len) {
                        consumed = 3;
                        maybe_va_args = func_macro.tokens[tok_i + 3];
                    }
                    if (maybe_va_args.id == .keyword_va_args) {
                        // GNU extension: `, ##__VA_ARGS__` deletes the comma if __VA_ARGS__ is empty
                        tok_i += consumed;
                        if (func_macro.params.len == expanded_args.items.len) {
                            // Empty __VA_ARGS__, drop the comma
                            try pp.err(hash_hash, .comma_deletion_va_args, .{});
                        } else if (func_macro.params.len == 0 and expanded_args.items.len == 1 and expanded_args.items[0].len == 0) {
                            // Ambiguous whether this is "empty __VA_ARGS__" or "__VA_ARGS__ omitted"
                            if (pp.comp.langopts.standard.isGNU()) {
                                // GNU standard, drop the comma
                                try pp.err(hash_hash, .comma_deletion_va_args, .{});
                            } else {
                                // C standard, retain the comma
                                try buf.append(tokFromRaw(raw));
                            }
                        } else {
                            try buf.append(tokFromRaw(raw));
                            if (expanded_variable_arguments.items.len > 0 or variable_arguments.items.len == func_macro.params.len) {
                                try pp.err(hash_hash, .comma_deletion_va_args, .{});
                            }
                            const raw_loc = Source.Location{
                                .id = maybe_va_args.source,
                                .byte_offset = maybe_va_args.start,
                                .line = maybe_va_args.line,
                            };
                            try bufCopyTokens(&buf, expanded_variable_arguments.items, &.{raw_loc});
                        }
                        continue;
                    }
                }
                // Regular comma, no token pasting with __VA_ARGS__
                try buf.append(tokFromRaw(raw));
            },
            else => try buf.append(tokFromRaw(raw)),
        }
    }
    removePlacemarkers(&buf);

    const macro_expansion_locs = macro_tok.expansionSlice();
    for (buf.items) |*tok| {
        try tok.addExpansionLocation(pp.gpa, &.{macro_tok.loc});
        try tok.addExpansionLocation(pp.gpa, macro_expansion_locs);
        const tok_hidelist = pp.hideset.get(tok.loc);
        const new_hidelist = try pp.hideset.@"union"(tok_hidelist, hideset);
        try pp.hideset.put(tok.loc, new_hidelist);
    }

    return buf;
}

fn expandVaOpt(
    pp: *Preprocessor,
    buf: *ExpandBuf,
    raw: RawToken,
    should_expand: bool,
) !void {
    if (!should_expand) return;

    const source = pp.comp.getSource(raw.source);
    var tokenizer: Tokenizer = .{
        .buf = source.buf,
        .index = raw.start,
        .source = raw.source,
        .langopts = pp.comp.langopts,
        .line = raw.line,
    };
    while (tokenizer.index < raw.end) {
        const tok = tokenizer.next();
        try buf.append(tokFromRaw(tok));
    }
}

fn bufCopyTokens(buf: *ExpandBuf, tokens: []const TokenWithExpansionLocs, src: []const Source.Location) !void {
    try buf.ensureUnusedCapacity(tokens.len);
    for (tokens) |tok| {
        var copy = try tok.dupe(buf.allocator);
        errdefer TokenWithExpansionLocs.free(copy.expansion_locs, buf.allocator);
        try copy.addExpansionLocation(buf.allocator, src);
        buf.appendAssumeCapacity(copy);
    }
}

fn nextBufToken(
    pp: *Preprocessor,
    tokenizer: *Tokenizer,
    buf: *ExpandBuf,
    start_idx: *usize,
    end_idx: *usize,
    extend_buf: bool,
) Error!TokenWithExpansionLocs {
    start_idx.* += 1;
    if (start_idx.* == buf.items.len and start_idx.* >= end_idx.*) {
        if (extend_buf) {
            const raw_tok = tokenizer.next();
            if (raw_tok.id.isMacroIdentifier() and
                pp.poisoned_identifiers.get(pp.tokSlice(raw_tok)) != null)
                try pp.err(raw_tok, .poisoned_identifier, .{});

            if (raw_tok.id == .nl) pp.add_expansion_nl += 1;

            const new_tok = tokFromRaw(raw_tok);
            end_idx.* += 1;
            try buf.append(new_tok);
            return new_tok;
        } else {
            return TokenWithExpansionLocs{ .id = .eof, .loc = .{ .id = .generated } };
        }
    } else {
        return buf.items[start_idx.*];
    }
}

fn collectMacroFuncArguments(
    pp: *Preprocessor,
    tokenizer: *Tokenizer,
    buf: *ExpandBuf,
    start_idx: *usize,
    end_idx: *usize,
    extend_buf: bool,
    is_builtin: bool,
    r_paren: *TokenWithExpansionLocs,
) !MacroArguments {
    const name_tok = buf.items[start_idx.*];
    const saved_tokenizer = tokenizer.*;
    const old_end = end_idx.*;

    while (true) {
        const tok = try nextBufToken(pp, tokenizer, buf, start_idx, end_idx, extend_buf);
        switch (tok.id) {
            .nl, .whitespace, .macro_ws => {},
            .l_paren => break,
            else => {
                if (is_builtin) {
                    try pp.err(name_tok, .missing_lparen_after_builtin, .{pp.expandedSlice(name_tok)});
                }
                // Not a macro function call, go over normal identifier, rewind
                tokenizer.* = saved_tokenizer;
                end_idx.* = old_end;
                return error.MissingLParen;
            },
        }
    }

    // collect the arguments.
    var parens: u32 = 0;
    var args = MacroArguments.init(pp.gpa);
    errdefer deinitMacroArguments(pp.gpa, &args);
    var curArgument = std.array_list.Managed(TokenWithExpansionLocs).init(pp.gpa);
    defer curArgument.deinit();
    while (true) {
        var tok = try nextBufToken(pp, tokenizer, buf, start_idx, end_idx, extend_buf);
        tok.flags.is_macro_arg = true;
        switch (tok.id) {
            .comma => {
                if (parens == 0) {
                    const owned = try curArgument.toOwnedSlice();
                    errdefer pp.gpa.free(owned);
                    try args.append(owned);
                } else {
                    const duped = try tok.dupe(pp.gpa);
                    errdefer TokenWithExpansionLocs.free(duped.expansion_locs, pp.gpa);
                    try curArgument.append(duped);
                }
            },
            .l_paren => {
                const duped = try tok.dupe(pp.gpa);
                errdefer TokenWithExpansionLocs.free(duped.expansion_locs, pp.gpa);
                try curArgument.append(duped);
                parens += 1;
            },
            .r_paren => {
                if (parens == 0) {
                    const owned = try curArgument.toOwnedSlice();
                    errdefer pp.gpa.free(owned);
                    try args.append(owned);
                    r_paren.* = tok;
                    break;
                } else {
                    const duped = try tok.dupe(pp.gpa);
                    errdefer TokenWithExpansionLocs.free(duped.expansion_locs, pp.gpa);
                    try curArgument.append(duped);
                    parens -= 1;
                }
            },
            .eof => {
                {
                    const owned = try curArgument.toOwnedSlice();
                    errdefer pp.gpa.free(owned);
                    try args.append(owned);
                }
                tokenizer.* = saved_tokenizer;
                try pp.err(name_tok, .unterminated_macro_arg_list, .{});
                return error.Unterminated;
            },
            .nl, .whitespace => {
                try curArgument.append(.{ .id = .macro_ws, .loc = tok.loc });
            },
            else => {
                const duped = try tok.dupe(pp.gpa);
                errdefer TokenWithExpansionLocs.free(duped.expansion_locs, pp.gpa);
                try curArgument.append(duped);
            },
        }
    }

    return args;
}

fn removeExpandedTokens(pp: *Preprocessor, buf: *ExpandBuf, start: usize, len: usize, moving_end_idx: *usize) !void {
    for (buf.items[start .. start + len]) |tok| TokenWithExpansionLocs.free(tok.expansion_locs, pp.gpa);
    try buf.replaceRange(start, len, &.{});
    moving_end_idx.* -|= len;
}

/// The behavior of `defined` depends on whether we are in a preprocessor
/// expression context (#if or #elif) or not.
/// In a non-expression context it's just an identifier. Within a preprocessor
/// expression it is a unary operator or one-argument function.
const EvalContext = enum {
    expr,
    non_expr,
};

/// Helper for safely iterating over a slice of tokens while skipping whitespace
const TokenIterator = struct {
    toks: []const TokenWithExpansionLocs,
    i: usize,

    fn init(toks: []const TokenWithExpansionLocs) TokenIterator {
        return .{ .toks = toks, .i = 0 };
    }

    fn nextNoWS(self: *TokenIterator) ?TokenWithExpansionLocs {
        while (self.i < self.toks.len) : (self.i += 1) {
            const tok = self.toks[self.i];
            if (tok.id == .whitespace or tok.id == .macro_ws) continue;

            self.i += 1;
            return tok;
        }
        return null;
    }
};

fn expandMacroExhaustive(
    pp: *Preprocessor,
    tokenizer: *Tokenizer,
    buf: *ExpandBuf,
    start_idx: usize,
    end_idx: usize,
    extend_buf: bool,
    eval_ctx: EvalContext,
) MacroError!void {
    var moving_end_idx = end_idx;
    var advance_index: usize = 0;
    // rescan loop
    var do_rescan = true;
    while (do_rescan) {
        do_rescan = false;
        // expansion loop
        var idx: usize = start_idx + advance_index;
        while (idx < moving_end_idx) {
            const macro_tok = buf.items[idx];
            if (macro_tok.id == .keyword_defined and eval_ctx == .expr) {
                idx += 1;
                var it = TokenIterator.init(buf.items[idx..moving_end_idx]);
                if (it.nextNoWS()) |tok| {
                    switch (tok.id) {
                        .l_paren => {
                            _ = it.nextNoWS(); // eat (what should be) identifier
                            _ = it.nextNoWS(); // eat (what should be) r paren
                        },
                        .identifier, .extended_identifier => {},
                        else => {},
                    }
                }
                idx += it.i;
                continue;
            }
            if (!macro_tok.id.isMacroIdentifier() or macro_tok.flags.expansion_disabled) {
                idx += 1;
                continue;
            }
            const expanded = pp.expandedSlice(macro_tok);
            const macro = pp.defines.getPtr(expanded) orelse {
                idx += 1;
                continue;
            };
            const macro_hidelist = pp.hideset.get(macro_tok.loc);
            if (pp.hideset.contains(macro_hidelist, expanded)) {
                idx += 1;
                continue;
            }

            macro_handler: {
                if (macro.is_func) {
                    var r_paren: TokenWithExpansionLocs = undefined;
                    var macro_scan_idx = idx;
                    // to be saved in case this doesn't turn out to be a call
                    const args = pp.collectMacroFuncArguments(
                        tokenizer,
                        buf,
                        &macro_scan_idx,
                        &moving_end_idx,
                        extend_buf,
                        macro.is_builtin,
                        &r_paren,
                    ) catch |er| switch (er) {
                        error.MissingLParen => {
                            if (!buf.items[idx].flags.is_macro_arg) buf.items[idx].flags.expansion_disabled = true;
                            idx += 1;
                            break :macro_handler;
                        },
                        error.Unterminated => {
                            if (pp.comp.langopts.emulate == .gcc) idx += 1;
                            try pp.removeExpandedTokens(buf, idx, macro_scan_idx - idx, &moving_end_idx);
                            break :macro_handler;
                        },
                        else => |e| return e,
                    };
                    assert(r_paren.id == .r_paren);
                    var free_arg_expansion_locs = false;
                    defer {
                        for (args.items) |item| {
                            if (free_arg_expansion_locs) for (item) |tok| TokenWithExpansionLocs.free(tok.expansion_locs, pp.gpa);
                            pp.gpa.free(item);
                        }
                        args.deinit();
                    }
                    const r_paren_hidelist = pp.hideset.get(r_paren.loc);
                    var hs = try pp.hideset.intersection(macro_hidelist, r_paren_hidelist);
                    hs = try pp.hideset.prepend(macro_tok.loc, hs);

                    var args_count: u32 = @intCast(args.items.len);
                    // if the macro has zero arguments g() args_count is still 1
                    // an empty token list g() and a whitespace-only token list g(    )
                    // counts as zero arguments for the purposes of argument-count validation
                    if (args_count == 1 and macro.params.len == 0) {
                        for (args.items[0]) |tok| {
                            if (tok.id != .macro_ws) break;
                        } else {
                            args_count = 0;
                        }
                    }

                    // Validate argument count.
                    if (macro.var_args and args_count < macro.params.len) {
                        free_arg_expansion_locs = true;
                        try pp.err(buf.items[idx], .expected_at_least_arguments, .{ macro.params.len, args_count });
                        idx += 1;
                        try pp.removeExpandedTokens(buf, idx, macro_scan_idx - idx + 1, &moving_end_idx);
                        continue;
                    }
                    if (!macro.var_args and args_count != macro.params.len) {
                        free_arg_expansion_locs = true;
                        try pp.err(buf.items[idx], .expected_arguments, .{ macro.params.len, args_count });
                        idx += 1;
                        try pp.removeExpandedTokens(buf, idx, macro_scan_idx - idx + 1, &moving_end_idx);
                        continue;
                    }
                    var expanded_args = MacroArguments.init(pp.gpa);
                    defer deinitMacroArguments(pp.gpa, &expanded_args);
                    try expanded_args.ensureTotalCapacity(args.items.len);
                    for (args.items) |arg| {
                        var expand_buf = ExpandBuf.init(pp.gpa);
                        errdefer expand_buf.deinit();
                        try expand_buf.appendSlice(arg);

                        try pp.expandMacroExhaustive(tokenizer, &expand_buf, 0, expand_buf.items.len, false, eval_ctx);

                        expanded_args.appendAssumeCapacity(try expand_buf.toOwnedSlice());
                    }

                    var res = try pp.expandFuncMacro(macro_tok, macro, &args, &expanded_args, hs);
                    defer res.deinit();
                    const tokens_added = res.items.len;
                    const tokens_removed = macro_scan_idx - idx + 1;
                    for (buf.items[idx .. idx + tokens_removed]) |tok| TokenWithExpansionLocs.free(tok.expansion_locs, pp.gpa);
                    try buf.replaceRange(idx, tokens_removed, res.items);

                    moving_end_idx += tokens_added;
                    // Overflow here means that we encountered an unterminated argument list
                    // while expanding the body of this macro.
                    moving_end_idx -|= tokens_removed;
                    idx += tokens_added;
                    do_rescan = true;
                } else {
                    const res = try pp.expandObjMacro(macro);
                    defer res.deinit();

                    const hs = try pp.hideset.prepend(macro_tok.loc, macro_hidelist);

                    const macro_expansion_locs = macro_tok.expansionSlice();
                    var increment_idx_by = res.items.len;
                    for (res.items, 0..) |*tok, i| {
                        tok.flags.is_macro_arg = macro_tok.flags.is_macro_arg;
                        try tok.addExpansionLocation(pp.gpa, &.{macro_tok.loc});
                        try tok.addExpansionLocation(pp.gpa, macro_expansion_locs);

                        const tok_hidelist = pp.hideset.get(tok.loc);
                        const new_hidelist = try pp.hideset.@"union"(tok_hidelist, hs);
                        try pp.hideset.put(tok.loc, new_hidelist);

                        if (tok.id == .keyword_defined and eval_ctx == .expr) {
                            if (macro.is_func) {
                                try pp.err(tok, .expansion_to_defined_func, .{});
                            } else {
                                try pp.err(tok, .expansion_to_defined_obj, .{});
                            }
                        }

                        if (i < increment_idx_by and (tok.id == .keyword_defined or pp.defines.contains(pp.expandedSlice(tok.*)))) {
                            increment_idx_by = i;
                        }
                    }

                    TokenWithExpansionLocs.free(buf.items[idx].expansion_locs, pp.gpa);
                    try buf.replaceRange(idx, 1, res.items);
                    idx += increment_idx_by;
                    moving_end_idx = moving_end_idx + res.items.len - 1;
                    do_rescan = true;
                }
            }
            if (idx - start_idx == advance_index + 1 and !do_rescan) {
                advance_index += 1;
            }
        } // end of replacement phase
    }
    // end of scanning phase

    // trim excess buffer
    for (buf.items[moving_end_idx..]) |item| {
        TokenWithExpansionLocs.free(item.expansion_locs, pp.gpa);
    }
    buf.items.len = moving_end_idx;
}

fn unescapeUcn(pp: *Preprocessor, tok: TokenWithExpansionLocs) !TokenWithExpansionLocs {
    switch (tok.id) {
        .incomplete_ucn => {
            @branchHint(.cold);
            try pp.err(tok, .incomplete_ucn, .{});
        },
        .extended_identifier => {
            @branchHint(.cold);
            const identifier = pp.expandedSlice(tok);
            if (mem.indexOfScalar(u8, identifier, '\\') != null) {
                @branchHint(.cold);
                const start = pp.comp.generated_buf.items.len;
                try pp.comp.generated_buf.ensureUnusedCapacity(pp.gpa, identifier.len + 1);
                var identifier_parser: text_literal.Parser = .{
                    .comp = pp.comp,
                    .literal = pp.expandedSlice(tok), // re-expand since previous line may have caused a reallocation, invalidating `identifier`
                    .kind = .utf_8,
                    .max_codepoint = 0x10ffff,
                    .loc = tok.loc,
                    .expansion_locs = tok.expansionSlice(),
                    .diagnose_incorrect_encoding = false,
                };
                while (try identifier_parser.next()) |decoded| {
                    switch (decoded) {
                        .value => unreachable, // validated by tokenizer
                        .codepoint => |c| {
                            var buf: [4]u8 = undefined;
                            const written = std.unicode.utf8Encode(c, &buf) catch unreachable;
                            pp.comp.generated_buf.appendSliceAssumeCapacity(buf[0..written]);
                        },
                        .improperly_encoded => |bytes| {
                            pp.comp.generated_buf.appendSliceAssumeCapacity(bytes);
                        },
                        .utf8_text => |view| {
                            pp.comp.generated_buf.appendSliceAssumeCapacity(view.bytes);
                        },
                    }
                }
                pp.comp.generated_buf.appendAssumeCapacity('\n');
                defer TokenWithExpansionLocs.free(tok.expansion_locs, pp.gpa);
                return pp.makeGeneratedToken(start, .extended_identifier, tok);
            }
        },
        else => {},
    }
    return tok;
}

/// Try to expand a macro after a possible candidate has been read from the `tokenizer`
/// into the `raw` token passed as argument
fn expandMacro(pp: *Preprocessor, tokenizer: *Tokenizer, raw: RawToken) MacroError!void {
    var source_tok = tokFromRaw(raw);
    if (!raw.id.isMacroIdentifier()) {
        source_tok.id.simplifyMacroKeyword();
        return pp.addToken(source_tok);
    }
    pp.top_expansion_buf.items.len = 0;
    try pp.top_expansion_buf.append(source_tok);
    pp.expansion_source_loc = source_tok.loc;

    pp.hideset.clearRetainingCapacity();
    try pp.expandMacroExhaustive(tokenizer, &pp.top_expansion_buf, 0, 1, true, .non_expr);
    try pp.ensureUnusedTokenCapacity(pp.top_expansion_buf.items.len);
    for (pp.top_expansion_buf.items) |*tok| {
        if (tok.id == .macro_ws and !pp.preserve_whitespace) {
            TokenWithExpansionLocs.free(tok.expansion_locs, pp.gpa);
            continue;
        }
        if (tok.id == .comment and !pp.comp.langopts.preserve_comments_in_macros) {
            TokenWithExpansionLocs.free(tok.expansion_locs, pp.gpa);
            continue;
        }
        if (tok.id == .placemarker) {
            TokenWithExpansionLocs.free(tok.expansion_locs, pp.gpa);
            continue;
        }
        tok.id.simplifyMacroKeywordExtra(true);
        pp.addTokenAssumeCapacity(try pp.unescapeUcn(tok.*));
    }
    if (pp.preserve_whitespace) {
        try pp.ensureUnusedTokenCapacity(pp.add_expansion_nl);
        while (pp.add_expansion_nl > 0) : (pp.add_expansion_nl -= 1) {
            pp.addTokenAssumeCapacity(.{ .id = .nl, .loc = .{
                .id = tokenizer.source,
                .line = tokenizer.line,
            } });
        }
    }
}

fn expandedSliceExtra(pp: *const Preprocessor, tok: anytype, macro_ws_handling: enum { single_macro_ws, preserve_macro_ws }) []const u8 {
    if (tok.id.lexeme()) |some| {
        if (!tok.id.allowsDigraphs(pp.comp.langopts) and !(tok.id == .macro_ws and macro_ws_handling == .preserve_macro_ws)) return some;
    }
    var tmp_tokenizer: Tokenizer = .{
        .buf = pp.comp.getSource(tok.loc.id).buf,
        .langopts = pp.comp.langopts,
        .index = tok.loc.byte_offset,
        .source = .generated,
    };
    if (tok.id == .macro_string) {
        while (true) : (tmp_tokenizer.index += 1) {
            if (tmp_tokenizer.buf[tmp_tokenizer.index] == '>') break;
        }
        return tmp_tokenizer.buf[tok.loc.byte_offset .. tmp_tokenizer.index + 1];
    }
    const res = tmp_tokenizer.next();
    return tmp_tokenizer.buf[res.start..res.end];
}

/// Get expanded token source string.
pub fn expandedSlice(pp: *const Preprocessor, tok: anytype) []const u8 {
    return pp.expandedSliceExtra(tok, .single_macro_ws);
}

/// Concat two tokens and add the result to pp.generated
fn pasteTokens(pp: *Preprocessor, lhs_toks: *ExpandBuf, rhs_toks: []const TokenWithExpansionLocs) Error!void {
    const lhs = while (lhs_toks.pop()) |lhs| {
        if ((pp.comp.langopts.preserve_comments_in_macros and lhs.id == .comment) or
            (lhs.id != .macro_ws and lhs.id != .comment))
            break lhs;

        TokenWithExpansionLocs.free(lhs.expansion_locs, pp.gpa);
    } else {
        return bufCopyTokens(lhs_toks, rhs_toks, &.{});
    };

    var rhs_rest: u32 = 1;
    const rhs = for (rhs_toks) |rhs| {
        if ((pp.comp.langopts.preserve_comments_in_macros and rhs.id == .comment) or
            (rhs.id != .macro_ws and rhs.id != .comment))
            break rhs;

        rhs_rest += 1;
    } else {
        return lhs_toks.appendAssumeCapacity(lhs);
    };
    defer TokenWithExpansionLocs.free(lhs.expansion_locs, pp.gpa);

    const start = pp.comp.generated_buf.items.len;
    const end = start + pp.expandedSlice(lhs).len + pp.expandedSlice(rhs).len;
    try pp.comp.generated_buf.ensureTotalCapacity(pp.gpa, end + 1); // +1 for a newline
    // We cannot use the same slices here since they might be invalidated by `ensureCapacity`
    pp.comp.generated_buf.appendSliceAssumeCapacity(pp.expandedSlice(lhs));
    pp.comp.generated_buf.appendSliceAssumeCapacity(pp.expandedSlice(rhs));
    pp.comp.generated_buf.appendAssumeCapacity('\n');

    // Try to tokenize the result.
    var tmp_tokenizer = Tokenizer{
        .buf = pp.comp.generated_buf.items,
        .langopts = pp.comp.langopts,
        .index = @intCast(start),
        .source = .generated,
    };
    const pasted_token = tmp_tokenizer.nextNoWSComments();
    const next = tmp_tokenizer.nextNoWSComments();
    const pasted_id = if (lhs.id == .placemarker and rhs.id == .placemarker)
        .placemarker
    else
        pasted_token.id;
    try lhs_toks.append(try pp.makeGeneratedToken(start, pasted_id, lhs));

    if (next.id != .nl and next.id != .eof) {
        try pp.err(lhs, .pasting_formed_invalid, .{pp.comp.generated_buf.items[start..end]});
        try lhs_toks.append(tokFromRaw(next));
    }

    try bufCopyTokens(lhs_toks, rhs_toks[rhs_rest..], &.{});
}

fn makeGeneratedToken(pp: *Preprocessor, start: usize, id: Token.Id, source: TokenWithExpansionLocs) !TokenWithExpansionLocs {
    var pasted_token = TokenWithExpansionLocs{ .id = id, .loc = .{
        .id = .generated,
        .byte_offset = @intCast(start),
        .line = pp.generated_line,
    } };
    pp.generated_line += 1;
    try pasted_token.addExpansionLocation(pp.gpa, &.{source.loc});
    try pasted_token.addExpansionLocation(pp.gpa, source.expansionSlice());
    return pasted_token;
}

/// Defines a new macro and warns if it is a duplicate
fn defineMacro(pp: *Preprocessor, define_tok: RawToken, name_tok: TokenWithExpansionLocs, macro: Macro) Error!void {
    const name_str = pp.expandedSlice(name_tok);
    const gop = try pp.defines.getOrPut(pp.gpa, name_str);
    if (gop.found_existing and !gop.value_ptr.eql(macro, pp)) {
        const loc = name_tok.loc;
        const prev_total = pp.diagnostics.total;
        if (gop.value_ptr.is_builtin) {
            try pp.err(loc, .builtin_macro_redefined, .{});
        } else {
            try pp.err(loc, .macro_redefined, .{name_str});
        }

        if (!gop.value_ptr.is_builtin and pp.diagnostics.total != prev_total) {
            try pp.err(gop.value_ptr.loc, .previous_definition, .{});
        }
    }
    if (pp.verbose) {
        const raw: RawToken = .{ .id = name_tok.id, .source = name_tok.loc.id, .start = name_tok.loc.byte_offset, .line = name_tok.loc.line };
        pp.verboseLog(raw, "macro {s} defined", .{name_str});
    }
    if (pp.store_macro_tokens) {
        try pp.addToken(tokFromRaw(define_tok));
    }
    gop.value_ptr.* = macro;
}

/// Handle a #define directive.
fn define(pp: *Preprocessor, tokenizer: *Tokenizer, define_tok: RawToken) Error!void {
    // Get macro name and validate it.
    const escaped_macro_name = tokenizer.nextNoWS();
    if (escaped_macro_name.id == .keyword_defined) {
        try pp.err(escaped_macro_name, .defined_as_macro_name, .{});
        return skipToNl(tokenizer);
    }
    if (!escaped_macro_name.id.isMacroIdentifier()) {
        try pp.err(escaped_macro_name, .macro_name_must_be_identifier, .{});
        return skipToNl(tokenizer);
    }
    const macro_name = try pp.unescapeUcn(tokFromRaw(escaped_macro_name));
    defer TokenWithExpansionLocs.free(macro_name.expansion_locs, pp.gpa);

    var macro_name_token_id = macro_name.id;
    macro_name_token_id.simplifyMacroKeyword();
    switch (macro_name_token_id) {
        .identifier, .extended_identifier => {},
        // TODO allow #define <keyword> <keyword> and #define extern|inline|static|const
        else => if (macro_name_token_id.isMacroIdentifier() and
            !mem.eql(u8, pp.comp.getSource(tokenizer.source).path, "<builtin>"))
        {
            try pp.err(macro_name, .keyword_macro, .{});
        },
    }

    // Check for function macros and empty defines.
    var first = tokenizer.next();
    switch (first.id) {
        .nl, .eof => return pp.defineMacro(define_tok, macro_name, .{
            .params = &.{},
            .tokens = &.{},
            .var_args = false,
            .loc = macro_name.loc,
            .is_func = false,
        }),
        .whitespace => first = tokenizer.next(),
        .l_paren => return pp.defineFn(tokenizer, define_tok, macro_name, first),
        else => try pp.err(first, .whitespace_after_macro_name, .{}),
    }
    if (first.id == .hash_hash) {
        try pp.err(first, .hash_hash_at_start, .{});
        return skipToNl(tokenizer);
    }
    first.id.simplifyMacroKeyword();

    pp.token_buf.items.len = 0; // Safe to use since we can only be in one directive at a time.

    var need_ws = false;
    // Collect the token body and validate any ## found.
    var tok = first;
    while (true) {
        tok.id.simplifyMacroKeyword();
        switch (tok.id) {
            .hash_hash => {
                const next = tokenizer.nextNoWSComments();
                switch (next.id) {
                    .nl, .eof => {
                        try pp.err(tok, .hash_hash_at_end, .{});
                        return;
                    },
                    .hash_hash => {
                        try pp.err(next, .hash_hash_at_end, .{});
                        return;
                    },
                    else => {},
                }
                try pp.token_buf.append(tok);
                try pp.token_buf.append(next);
            },
            .nl, .eof => break,
            .comment => if (pp.comp.langopts.preserve_comments_in_macros) {
                if (need_ws) {
                    need_ws = false;
                    try pp.token_buf.append(.{ .id = .macro_ws, .source = .generated });
                }
                try pp.token_buf.append(tok);
            },
            .whitespace => need_ws = true,
            .unterminated_string_literal, .unterminated_char_literal, .empty_char_literal => |tag| {
                try pp.err(tok, invalidTokenDiagnostic(tag), .{});
                try pp.token_buf.append(tok);
            },
            .unterminated_comment => try pp.err(tok, .unterminated_comment, .{}),
            else => {
                if (tok.id == .incomplete_ucn) {
                    @branchHint(.cold);
                    try pp.err(tok, .incomplete_ucn, .{});
                }
                if (tok.id != .whitespace and need_ws) {
                    need_ws = false;
                    try pp.token_buf.append(.{ .id = .macro_ws, .source = .generated });
                }
                try pp.token_buf.append(tok);
            },
        }
        tok = tokenizer.next();
    }

    const list = try pp.arena.allocator().dupe(RawToken, pp.token_buf.items);
    try pp.defineMacro(define_tok, macro_name, .{
        .loc = macro_name.loc,
        .tokens = list,
        .params = &.{},
        .is_func = false,
        .var_args = false,
    });
}

/// Handle a function like #define directive.
fn defineFn(pp: *Preprocessor, tokenizer: *Tokenizer, define_tok: RawToken, macro_name: TokenWithExpansionLocs, l_paren: RawToken) Error!void {
    assert(macro_name.id.isMacroIdentifier());
    var params = std.array_list.Managed([]const u8).init(pp.gpa);
    defer params.deinit();

    // Parse the parameter list.
    var gnu_var_args: []const u8 = "";
    var var_args = false;
    while (true) {
        var tok = tokenizer.nextNoWS();
        if (tok.id == .r_paren) break;
        if (tok.id == .eof) return pp.err(tok, .unterminated_macro_param_list, .{});
        if (tok.id == .ellipsis) {
            var_args = true;
            const r_paren = tokenizer.nextNoWS();
            if (r_paren.id != .r_paren) {
                try pp.err(r_paren, .missing_paren_param_list, .{});
                try pp.err(l_paren, .to_match_paren, .{});
                return skipToNl(tokenizer);
            }
            break;
        }
        if (!tok.id.isMacroIdentifier()) {
            try pp.err(tok, .invalid_token_param_list, .{});
            return skipToNl(tokenizer);
        }

        try params.append(pp.tokSlice(tok));

        tok = tokenizer.nextNoWS();
        if (tok.id == .ellipsis) {
            try pp.err(tok, .gnu_va_macro, .{});
            gnu_var_args = params.pop().?;
            const r_paren = tokenizer.nextNoWS();
            if (r_paren.id != .r_paren) {
                try pp.err(r_paren, .missing_paren_param_list, .{});
                try pp.err(l_paren, .to_match_paren, .{});
                return skipToNl(tokenizer);
            }
            break;
        } else if (tok.id == .r_paren) {
            break;
        } else if (tok.id != .comma) {
            try pp.err(tok, .expected_comma_param_list, .{});
            return skipToNl(tokenizer);
        }
    }

    var need_ws = false;
    // Collect the body tokens and validate # and ##'s found.
    pp.token_buf.items.len = 0; // Safe to use since we can only be in one directive at a time.
    tok_loop: while (true) {
        var tok = tokenizer.next();
        switch (tok.id) {
            .nl, .eof => break,
            .whitespace => need_ws = pp.token_buf.items.len != 0,
            .comment => if (!pp.comp.langopts.preserve_comments_in_macros) continue else {
                if (need_ws) {
                    need_ws = false;
                    try pp.token_buf.append(.{ .id = .macro_ws, .source = .generated });
                }
                try pp.token_buf.append(tok);
            },
            .hash => {
                if (tok.id != .whitespace and need_ws) {
                    need_ws = false;
                    try pp.token_buf.append(.{ .id = .macro_ws, .source = .generated });
                }
                const param = tokenizer.nextNoWS();
                blk: {
                    if (var_args and param.id == .keyword_va_args) {
                        tok.id = .stringify_va_args;
                        try pp.token_buf.append(tok);
                        continue :tok_loop;
                    }
                    if (!param.id.isMacroIdentifier()) break :blk;
                    const s = pp.tokSlice(param);
                    if (mem.eql(u8, s, gnu_var_args)) {
                        tok.id = .stringify_va_args;
                        try pp.token_buf.append(tok);
                        continue :tok_loop;
                    }
                    for (params.items, 0..) |p, i| {
                        if (mem.eql(u8, p, s)) {
                            tok.id = .stringify_param;
                            tok.end = @intCast(i);
                            try pp.token_buf.append(tok);
                            continue :tok_loop;
                        }
                    }
                }
                try pp.err(param, .hash_not_followed_param, .{});
                return skipToNl(tokenizer);
            },
            .hash_hash => {
                need_ws = false;
                // if ## appears at the beginning, the token buf is still empty
                // in this case, error out
                if (pp.token_buf.items.len == 0) {
                    try pp.err(tok, .hash_hash_at_start, .{});
                    return skipToNl(tokenizer);
                }
                const saved_tokenizer = tokenizer.*;
                const next = tokenizer.nextNoWSComments();
                if (next.id == .nl or next.id == .eof) {
                    try pp.err(tok, .hash_hash_at_end, .{});
                    return;
                }
                tokenizer.* = saved_tokenizer;
                // convert the previous token to .macro_param_no_expand if it was .macro_param
                if (pp.token_buf.items[pp.token_buf.items.len - 1].id == .macro_param) {
                    pp.token_buf.items[pp.token_buf.items.len - 1].id = .macro_param_no_expand;
                }
                try pp.token_buf.append(tok);
            },
            .unterminated_string_literal, .unterminated_char_literal, .empty_char_literal => |tag| {
                try pp.err(tok, invalidTokenDiagnostic(tag), .{});
                try pp.token_buf.append(tok);
            },
            .unterminated_comment => try pp.err(tok, .unterminated_comment, .{}),
            else => {
                if (tok.id != .whitespace and need_ws) {
                    need_ws = false;
                    try pp.token_buf.append(.{ .id = .macro_ws, .source = .generated });
                }
                if (var_args and tok.id == .keyword_va_args) {
                    // do nothing
                } else if (var_args and tok.id == .keyword_va_opt) {
                    const opt_l_paren = tokenizer.next();
                    if (opt_l_paren.id != .l_paren) {
                        try pp.err(opt_l_paren, .va_opt_lparen, .{});
                        return skipToNl(tokenizer);
                    }
                    tok.start = opt_l_paren.end;

                    var parens: u32 = 0;
                    while (true) {
                        const opt_tok = tokenizer.next();
                        switch (opt_tok.id) {
                            .l_paren => parens += 1,
                            .r_paren => if (parens == 0) {
                                break;
                            } else {
                                parens -= 1;
                            },
                            .nl, .eof => {
                                try pp.err(opt_tok, .va_opt_rparen, .{});
                                try pp.err(opt_l_paren, .to_match_paren, .{});
                                return skipToNl(tokenizer);
                            },
                            .whitespace => {},
                            else => tok.end = opt_tok.end,
                        }
                    }
                } else if (tok.id.isMacroIdentifier()) {
                    tok.id.simplifyMacroKeyword();
                    const s = pp.tokSlice(tok);
                    if (mem.eql(u8, gnu_var_args, s)) {
                        tok.id = .keyword_va_args;
                    } else for (params.items, 0..) |param, i| {
                        if (mem.eql(u8, param, s)) {
                            // NOTE: it doesn't matter to assign .macro_param_no_expand
                            // here in case a ## was the previous token, because
                            // ## processing will eat this token with the same semantics
                            tok.id = .macro_param;
                            tok.end = @intCast(i);
                            break;
                        }
                    }
                }
                try pp.token_buf.append(tok);
            },
        }
    }

    const param_list = try pp.arena.allocator().dupe([]const u8, params.items);
    const token_list = try pp.arena.allocator().dupe(RawToken, pp.token_buf.items);
    try pp.defineMacro(define_tok, macro_name, .{
        .is_func = true,
        .params = param_list,
        .var_args = var_args or gnu_var_args.len != 0,
        .tokens = token_list,
        .loc = macro_name.loc,
    });
}

/// Handle an #embed directive
/// embedDirective : ("FILENAME" | <FILENAME>) embedParam*
/// embedParam : IDENTIFIER (:: IDENTIFIER)? '(' <tokens> ')'
fn embed(pp: *Preprocessor, tokenizer: *Tokenizer) MacroError!void {
    const first = tokenizer.nextNoWS();
    const filename_tok = pp.findIncludeFilenameToken(first, tokenizer, .ignore_trailing_tokens) catch |er| switch (er) {
        error.InvalidInclude => return,
        else => |e| return e,
    };
    defer TokenWithExpansionLocs.free(filename_tok.expansion_locs, pp.gpa);

    // Check for empty filename.
    const tok_slice = pp.expandedSliceExtra(filename_tok, .single_macro_ws);
    if (tok_slice.len < 3) {
        try pp.err(first, .empty_filename, .{});
        return;
    }
    const filename = tok_slice[1 .. tok_slice.len - 1];
    const include_type: Compilation.IncludeType = switch (filename_tok.id) {
        .string_literal => .quotes,
        .macro_string => .angle_brackets,
        else => unreachable,
    };

    // Index into `token_buf`
    const Range = struct {
        start: u32,
        end: u32,

        fn expand(opt_range: ?@This(), pp_: *Preprocessor, tokenizer_: *Tokenizer) !void {
            const range = opt_range orelse return;
            const slice = pp_.token_buf.items[range.start..range.end];
            for (slice) |tok| {
                try pp_.expandMacro(tokenizer_, tok);
            }
        }
    };
    pp.token_buf.items.len = 0;

    var limit: ?std.Io.Limit = null;
    var prefix: ?Range = null;
    var suffix: ?Range = null;
    var if_empty: ?Range = null;
    while (true) {
        const param_first = tokenizer.nextNoWS();
        switch (param_first.id) {
            .nl, .eof => break,
            .identifier => {},
            else => {
                try pp.err(param_first, .malformed_embed_param, .{});
                continue;
            },
        }

        const char_top = pp.char_buf.items.len;
        defer pp.char_buf.items.len = char_top;

        const maybe_colon = tokenizer.colonColon();
        const param = switch (maybe_colon.id) {
            .colon_colon => blk: {
                // vendor::param
                const param = tokenizer.nextNoWS();
                if (param.id != .identifier) {
                    try pp.err(param, .malformed_embed_param, .{});
                    continue;
                }
                const l_paren = tokenizer.nextNoWS();
                if (l_paren.id != .l_paren) {
                    try pp.err(l_paren, .malformed_embed_param, .{});
                    continue;
                }
                try pp.char_buf.appendSlice(Attribute.normalize(pp.tokSlice(param_first)));
                try pp.char_buf.appendSlice("::");
                try pp.char_buf.appendSlice(Attribute.normalize(pp.tokSlice(param)));
                break :blk pp.char_buf.items;
            },
            .l_paren => Attribute.normalize(pp.tokSlice(param_first)),
            else => {
                try pp.err(maybe_colon, .malformed_embed_param, .{});
                continue;
            },
        };

        const start: u32 = @intCast(pp.token_buf.items.len);
        while (true) {
            const next = tokenizer.nextNoWS();
            if (next.id == .r_paren) break;
            if (next.id == .eof) {
                try pp.err(maybe_colon, .malformed_embed_param, .{});
                break;
            }
            try pp.token_buf.append(next);
        }
        const end: u32 = @intCast(pp.token_buf.items.len);

        if (std.mem.eql(u8, param, "limit")) {
            if (limit != null) {
                try pp.err(tokFromRaw(param_first), .duplicate_embed_param, .{"limit"});
                continue;
            }
            if (start + 1 != end) {
                try pp.err(param_first, .malformed_embed_limit, .{});
                continue;
            }
            const limit_tok = pp.token_buf.items[start];
            if (limit_tok.id != .pp_num) {
                try pp.err(param_first, .malformed_embed_limit, .{});
                continue;
            }
            limit = .limited(std.fmt.parseInt(u32, pp.tokSlice(limit_tok), 10) catch {
                try pp.err(limit_tok, .malformed_embed_limit, .{});
                continue;
            });
            pp.token_buf.items.len = start;
        } else if (std.mem.eql(u8, param, "prefix")) {
            if (prefix != null) {
                try pp.err(tokFromRaw(param_first), .duplicate_embed_param, .{"prefix"});
                continue;
            }
            prefix = .{ .start = start, .end = end };
        } else if (std.mem.eql(u8, param, "suffix")) {
            if (suffix != null) {
                try pp.err(tokFromRaw(param_first), .duplicate_embed_param, .{"suffix"});
                continue;
            }
            suffix = .{ .start = start, .end = end };
        } else if (std.mem.eql(u8, param, "if_empty")) {
            if (if_empty != null) {
                try pp.err(tokFromRaw(param_first), .duplicate_embed_param, .{"if_empty"});
                continue;
            }
            if_empty = .{ .start = start, .end = end };
        } else {
            try pp.err(tokFromRaw(param_first), .unsupported_embed_param, .{param});
            pp.token_buf.items.len = start;
        }
    }

    const embed_bytes = (try pp.comp.findEmbed(filename, first.source, include_type, limit orelse .unlimited, pp.dep_file)) orelse
        return pp.fatalNotFound(filename_tok, filename);
    defer pp.comp.gpa.free(embed_bytes);

    try Range.expand(prefix, pp, tokenizer);

    if (embed_bytes.len == 0) {
        try Range.expand(if_empty, pp, tokenizer);
        try Range.expand(suffix, pp, tokenizer);
        return;
    }

    try pp.ensureUnusedTokenCapacity(2 * embed_bytes.len - 1); // N bytes and N-1 commas

    // TODO: We currently only support systems with CHAR_BIT == 8
    // If the target's CHAR_BIT is not 8, we need to write out correctly-sized embed_bytes
    // and correctly account for the target's endianness
    {
        const byte = embed_bytes[0];
        const start = pp.comp.generated_buf.items.len;
        try pp.comp.generated_buf.print(pp.gpa, "{d}", .{byte});
        pp.addTokenAssumeCapacity(try pp.makeGeneratedToken(start, .embed_byte, filename_tok));
    }

    for (embed_bytes[1..]) |byte| {
        const start = pp.comp.generated_buf.items.len;
        try pp.comp.generated_buf.print(pp.gpa, ",{d}", .{byte});
        pp.addTokenAssumeCapacity(.{ .id = .comma, .loc = .{ .id = .generated, .byte_offset = @intCast(start) } });
        pp.addTokenAssumeCapacity(try pp.makeGeneratedToken(start + 1, .embed_byte, filename_tok));
    }
    try pp.comp.generated_buf.append(pp.gpa, '\n');

    try Range.expand(suffix, pp, tokenizer);
}

// Handle a #include directive.
fn include(pp: *Preprocessor, tokenizer: *Tokenizer, which: Compilation.WhichInclude) MacroError!void {
    const first = tokenizer.nextNoWS();
    const new_source = findIncludeSource(pp, tokenizer, first, which) catch |er| switch (er) {
        error.InvalidInclude => return,
        else => |e| return e,
    };

    // Prevent stack overflow
    pp.include_depth += 1;
    defer pp.include_depth -= 1;
    if (pp.include_depth > max_include_depth) {
        const loc: Source.Location = .{ .id = first.source, .byte_offset = first.start, .line = first.line };
        try pp.err(loc, .too_many_includes, .{});
        return error.StopPreprocessing;
    }

    if (pp.include_guards.get(new_source.id)) |guard| {
        if (pp.defines.contains(guard)) return;
    }

    if (pp.dep_file) |dep| try dep.addDependency(pp.gpa, new_source.path);
    if (pp.verbose) {
        pp.verboseLog(first, "include file {s}", .{new_source.path});
    }

    const token_state = pp.getTokenState();
    try pp.addIncludeStart(new_source);
    const eof = pp.preprocessExtra(new_source) catch |er| switch (er) {
        error.StopPreprocessing => {
            for (pp.expansion_entries.items(.locs)[token_state.expansion_entries_len..]) |loc| TokenWithExpansionLocs.free(loc, pp.gpa);
            pp.restoreTokenState(token_state);
            return;
        },
        else => |e| return e,
    };
    try eof.checkMsEof(new_source, pp.comp);
    if (pp.preserve_whitespace and pp.tokens.items(.id)[pp.tokens.len - 1] != .nl) {
        try pp.addToken(.{ .id = .nl, .loc = .{
            .id = tokenizer.source,
            .line = tokenizer.line,
        } });
    }
    if (pp.linemarkers == .none) return;
    var next = first;
    while (true) {
        var tmp = tokenizer.*;
        next = tmp.nextNoWS();
        if (next.id != .nl) break;
        tokenizer.* = tmp;
    }
    try pp.addIncludeResume(next.source, next.end, next.line);
}

/// tokens that are part of a pragma directive can happen in 3 ways:
///     1. directly in the text via `#pragma ...`
///     2. Via a string literal argument to `_Pragma`
///     3. Via a stringified macro argument which is used as an argument to `_Pragma`
/// operator_loc: Location of `_Pragma`; null if this is from #pragma
/// arg_locs: expansion locations of the argument to _Pragma. empty if #pragma or a raw string literal was used
fn makePragmaToken(pp: *Preprocessor, raw: RawToken, operator_loc: ?Source.Location, arg_locs: []const Source.Location) !TokenWithExpansionLocs {
    var tok = tokFromRaw(raw);
    if (operator_loc) |loc| {
        try tok.addExpansionLocation(pp.gpa, &.{loc});
    }
    try tok.addExpansionLocation(pp.gpa, arg_locs);
    return tok;
}

pub fn addToken(pp: *Preprocessor, tok_arg: TokenWithExpansionLocs) !void {
    const tok = try pp.unescapeUcn(tok_arg);
    if (tok.expansion_locs) |expansion_locs| {
        try pp.expansion_entries.append(pp.gpa, .{ .idx = @intCast(pp.tokens.len), .locs = expansion_locs });
    }
    try pp.tokens.append(pp.gpa, .{ .id = tok.id, .loc = tok.loc });
}

pub fn addTokenAssumeCapacity(pp: *Preprocessor, tok: TokenWithExpansionLocs) void {
    if (tok.expansion_locs) |expansion_locs| {
        pp.expansion_entries.appendAssumeCapacity(.{ .idx = @intCast(pp.tokens.len), .locs = expansion_locs });
    }
    pp.tokens.appendAssumeCapacity(.{ .id = tok.id, .loc = tok.loc });
}

pub fn ensureTotalTokenCapacity(pp: *Preprocessor, capacity: usize) !void {
    try pp.tokens.ensureTotalCapacity(pp.gpa, capacity);
    try pp.expansion_entries.ensureTotalCapacity(pp.gpa, capacity);
}

pub fn ensureUnusedTokenCapacity(pp: *Preprocessor, capacity: usize) !void {
    try pp.tokens.ensureUnusedCapacity(pp.gpa, capacity);
    try pp.expansion_entries.ensureUnusedCapacity(pp.gpa, capacity);
}

/// Handle a pragma directive
fn pragma(pp: *Preprocessor, tokenizer: *Tokenizer, pragma_tok: RawToken, operator_loc: ?Source.Location, arg_locs: []const Source.Location) !void {
    const name_tok = tokenizer.nextNoWS();
    if (name_tok.id == .nl or name_tok.id == .eof) return;

    try pp.addToken(try pp.makePragmaToken(pragma_tok, operator_loc, arg_locs));
    const pragma_start: u32 = @intCast(pp.tokens.len);

    const name = pp.tokSlice(name_tok);
    const pragma_name_tok = try pp.makePragmaToken(name_tok, operator_loc, arg_locs);
    try pp.addToken(pragma_name_tok);
    while (true) {
        const next_tok = tokenizer.next();
        if (next_tok.id == .whitespace) continue;
        if (next_tok.id == .eof) {
            try pp.addToken(.{
                .id = .nl,
                .loc = .{ .id = .generated },
            });
            break;
        }
        try pp.addToken(try pp.makePragmaToken(next_tok, operator_loc, arg_locs));
        if (next_tok.id == .nl) break;
    }
    if (pp.comp.getPragma(name)) |prag| unknown: {
        return prag.preprocessorCB(pp, pragma_start) catch |er| switch (er) {
            error.UnknownPragma => break :unknown,
            else => |e| return e,
        };
    }

    try pp.err(pragma_name_tok, .unknown_pragma, .{});
}

fn findIncludeFilenameToken(
    pp: *Preprocessor,
    first_token: RawToken,
    tokenizer: *Tokenizer,
    trailing_token_behavior: enum { ignore_trailing_tokens, expect_nl_eof },
) !TokenWithExpansionLocs {
    var first = first_token;

    if (first.id == .angle_bracket_left) to_end: {
        // The tokenizer does not handle <foo> include strings so do it here.
        while (tokenizer.index < tokenizer.buf.len) : (tokenizer.index += 1) {
            switch (tokenizer.buf[tokenizer.index]) {
                '>' => {
                    tokenizer.index += 1;
                    first.end = tokenizer.index;
                    first.id = .macro_string;
                    break :to_end;
                },
                '\n' => break,
                else => {},
            }
        }
        const loc: Source.Location = .{ .id = first.source, .byte_offset = tokenizer.index, .line = first.line };
        try pp.err(loc, .header_str_closing, .{});
        try pp.err(first, .header_str_match, .{});
    }

    const source_tok = tokFromRaw(first);
    const filename_tok, const expanded_trailing = switch (source_tok.id) {
        .string_literal, .macro_string => .{ source_tok, false },
        else => expanded: {
            // Try to expand if the argument is a macro.
            pp.top_expansion_buf.items.len = 0;
            defer for (pp.top_expansion_buf.items) |tok| TokenWithExpansionLocs.free(tok.expansion_locs, pp.gpa);
            try pp.top_expansion_buf.append(source_tok);
            pp.expansion_source_loc = source_tok.loc;

            try pp.expandMacroExhaustive(tokenizer, &pp.top_expansion_buf, 0, 1, true, .non_expr);
            var trailing_toks: []const TokenWithExpansionLocs = &.{};
            const include_str = (try pp.reconstructIncludeString(pp.top_expansion_buf.items, &trailing_toks, tokFromRaw(first))) orelse {
                try pp.expectNl(tokenizer);
                return error.InvalidInclude;
            };
            const start = pp.comp.generated_buf.items.len;
            try pp.comp.generated_buf.appendSlice(pp.gpa, include_str);

            break :expanded .{ try pp.makeGeneratedToken(start, switch (include_str[0]) {
                '"' => .string_literal,
                '<' => .macro_string,
                else => unreachable,
            }, pp.top_expansion_buf.items[0]), trailing_toks.len != 0 };
        },
    };

    switch (trailing_token_behavior) {
        .expect_nl_eof => {
            // Error on extra tokens.
            const nl = tokenizer.nextNoWS();
            if ((nl.id != .nl and nl.id != .eof) or expanded_trailing) {
                skipToNl(tokenizer);
                try pp.err(filename_tok, .extra_tokens_directive_end, .{});
            }
        },
        .ignore_trailing_tokens => if (expanded_trailing) {
            try pp.err(filename_tok, .extra_tokens_directive_end, .{});
        },
    }
    return filename_tok;
}

fn findIncludeSource(pp: *Preprocessor, tokenizer: *Tokenizer, first: RawToken, which: Compilation.WhichInclude) !Source {
    const filename_tok = try pp.findIncludeFilenameToken(first, tokenizer, .expect_nl_eof);
    defer TokenWithExpansionLocs.free(filename_tok.expansion_locs, pp.gpa);

    // Check for empty filename.
    const tok_slice = pp.expandedSliceExtra(filename_tok, .single_macro_ws);
    if (tok_slice.len < 3) {
        try pp.err(first, .empty_filename, .{});
        return error.InvalidInclude;
    }

    // Find the file.
    const filename = tok_slice[1 .. tok_slice.len - 1];
    const include_type: Compilation.IncludeType = switch (filename_tok.id) {
        .string_literal => .quotes,
        .macro_string => .angle_brackets,
        else => unreachable,
    };

    return (try pp.comp.findInclude(filename, first, include_type, which)) orelse
        return pp.fatalNotFound(filename_tok, filename);
}

fn printLinemarker(
    pp: *Preprocessor,
    w: *std.Io.Writer,
    line_no: u32,
    source: Source,
    start_resume: enum(u8) { start, @"resume", none },
) !void {
    try w.writeByte('#');
    if (pp.linemarkers == .line_directives) try w.writeAll("line");
    try w.print(" {d} \"{f}\"", .{ line_no, fmtEscapes(source.path) });
    if (pp.linemarkers == .numeric_directives) {
        switch (start_resume) {
            .none => {},
            .start => try w.writeAll(" 1"),
            .@"resume" => try w.writeAll(" 2"),
        }
        switch (source.kind) {
            .user => {},
            .system => try w.writeAll(" 3"),
            .extern_c_system => try w.writeAll(" 3 4"),
        }
    }
    try w.writeByte('\n');
}

// After how many empty lines are needed to replace them with linemarkers.
const collapse_newlines = 8;

pub const DumpMode = enum {
    /// Standard preprocessor output; no macros
    result_only,
    /// Output only #define directives for all the macros defined during the execution of the preprocessor
    /// Only macros which are still defined at the end of preprocessing are printed.
    /// Only the most recent definition is printed
    /// Defines are printed in arbitrary order
    macros_only,
    /// Standard preprocessor output; but additionally output #define's and #undef's for macros as they are encountered
    macros_and_result,
    /// Same as macros_and_result, except only the macro name is printed for #define's
    macro_names_and_result,
};

/// Pretty-print the macro define or undef at location `loc`.
/// We re-tokenize the directive because we are printing a macro that may have the same name as one in
/// `pp.defines` but a different definition (due to being #undef'ed and then redefined)
fn prettyPrintMacro(pp: *Preprocessor, w: *std.Io.Writer, loc: Source.Location, parts: enum { name_only, name_and_body }) !void {
    const source = pp.comp.getSource(loc.id);
    var tokenizer: Tokenizer = .{
        .buf = source.buf,
        .langopts = pp.comp.langopts,
        .source = source.id,
        .index = loc.byte_offset,
    };
    var prev_ws = false; // avoid printing multiple whitespace if /* */ comments are within the macro def
    var saw_name = false; // do not print comments before the name token is seen.
    while (true) {
        const tok = tokenizer.next();
        switch (tok.id) {
            .comment => {
                if (saw_name) {
                    prev_ws = false;
                    try w.print("{s}", .{pp.tokSlice(tok)});
                }
            },
            .nl, .eof => break,
            .whitespace => {
                if (!prev_ws) {
                    try w.writeByte(' ');
                    prev_ws = true;
                }
            },
            else => {
                prev_ws = false;
                try w.print("{s}", .{pp.tokSlice(tok)});
            },
        }
        if (tok.id == .identifier or tok.id == .extended_identifier) {
            if (parts == .name_only) break;
            saw_name = true;
        }
    }
}

fn prettyPrintMacrosOnly(pp: *Preprocessor, w: *std.Io.Writer) !void {
    for (pp.defines.values()) |macro| {
        if (macro.is_builtin) continue;

        try w.writeAll("#define ");
        try pp.prettyPrintMacro(w, macro.loc, .name_and_body);
        try w.writeByte('\n');
    }
}

/// Pretty print tokens and try to preserve whitespace.
pub fn prettyPrintTokens(pp: *Preprocessor, w: *std.Io.Writer, macro_dump_mode: DumpMode) !void {
    if (macro_dump_mode == .macros_only) {
        return pp.prettyPrintMacrosOnly(w);
    }

    const tok_ids = pp.tokens.items(.id);

    var i: u32 = 0;
    var last_nl = true;
    outer: while (true) : (i += 1) {
        var cur: Token = pp.tokens.get(i);
        switch (cur.id) {
            .eof => {
                if (!last_nl) try w.writeByte('\n');
                try w.flush();
                return;
            },
            .nl => {
                var newlines: u32 = 0;
                for (tok_ids[i..], i..) |id, j| {
                    if (id == .nl) {
                        newlines += 1;
                    } else if (id == .eof) {
                        if (!last_nl) try w.writeByte('\n');
                        try w.flush();
                        return;
                    } else if (id != .whitespace) {
                        if (pp.linemarkers == .none) {
                            if (newlines < 2) break;
                        } else if (newlines < collapse_newlines) {
                            break;
                        }

                        i = @intCast((j - 1) - @intFromBool(tok_ids[j - 1] == .whitespace));
                        if (!last_nl) try w.writeAll("\n");
                        if (pp.linemarkers != .none) {
                            const next = pp.tokens.get(i);
                            const source = pp.comp.getSource(next.loc.id);
                            const line_col = source.lineCol(next.loc);
                            try pp.printLinemarker(w, line_col.line_no, source, .none);
                            last_nl = true;
                        }
                        continue :outer;
                    }
                }
                last_nl = true;
                try w.writeAll("\n");
            },
            .keyword_pragma => {
                const pragma_name = pp.expandedSlice(pp.tokens.get(i + 1));
                const end_idx = mem.indexOfScalarPos(Token.Id, tok_ids, i, .nl) orelse i + 1;
                const pragma_len = @as(u32, @intCast(end_idx)) - i;

                if (pp.comp.getPragma(pragma_name)) |prag| {
                    if (!prag.shouldPreserveTokens(pp, i + 1)) {
                        try w.writeByte('\n');
                        i += pragma_len;
                        cur = pp.tokens.get(i);
                        continue;
                    }
                }
                try w.writeAll("#pragma");
                i += 1;
                while (true) : (i += 1) {
                    cur = pp.tokens.get(i);
                    if (cur.id == .nl) {
                        try w.writeByte('\n');
                        last_nl = true;
                        break;
                    }
                    try w.writeByte(' ');
                    const slice = pp.expandedSlice(cur);
                    try w.writeAll(slice);
                }
            },
            .whitespace => {
                var slice = pp.expandedSlice(cur);
                while (mem.indexOfScalar(u8, slice, '\n')) |some| {
                    if (pp.linemarkers != .none) try w.writeByte('\n');
                    slice = slice[some + 1 ..];
                }
                for (slice) |_| try w.writeByte(' ');
                last_nl = false;
            },
            .include_start => {
                const source = pp.comp.getSource(cur.loc.id);

                try pp.printLinemarker(w, 1, source, .start);
                last_nl = true;
            },
            .include_resume => {
                const source = pp.comp.getSource(cur.loc.id);
                const line_col = source.lineCol(cur.loc);
                if (!last_nl) try w.writeAll("\n");

                try pp.printLinemarker(w, line_col.line_no, source, .@"resume");
                last_nl = true;
            },
            .keyword_define, .keyword_undef => {
                switch (macro_dump_mode) {
                    .macros_and_result, .macro_names_and_result => {
                        try w.writeByte('#');
                        try pp.prettyPrintMacro(w, cur.loc, if (macro_dump_mode == .macros_and_result) .name_and_body else .name_only);
                        last_nl = false;
                    },
                    .result_only => unreachable, // `pp.store_macro_tokens` should be false for standard preprocessor output
                    .macros_only => unreachable, // handled by prettyPrintMacrosOnly
                }
            },
            else => {
                const slice = pp.expandedSlice(cur);
                try w.writeAll(slice);
                last_nl = false;
            },
        }
    }
}

/// Like `std.zig.fmtEscapes`, but for C strings. Hex escapes are used for any
/// non-ASCII/unprintable bytes to ensure that the string bytes do not change if
/// the encoding of the file is not UTF-8.
fn fmtEscapes(bytes: []const u8) FmtEscapes {
    return .{ .bytes = bytes };
}
const FmtEscapes = struct {
    bytes: []const u8,
    pub fn format(ctx: FmtEscapes, w: *std.Io.Writer) !void {
        for (ctx.bytes) |byte| switch (byte) {
            '\n' => try w.writeAll("\\n"),
            '\r' => try w.writeAll("\\r"),
            '\t' => try w.writeAll("\\t"),
            '\\' => try w.writeAll("\\\\"),
            '"' => try w.writeAll("\\\""),
            ' ', '!', '#'...'&', '('...'[', ']'...'~' => try w.writeByte(byte),
            // Use hex escapes for any non-ASCII/unprintable characters.
            // This ensures that the parsed version of this string will end up
            // containing the same bytes as the input regardless of encoding.
            else => try w.print("\\x{x:0>2}", .{byte}),
        };
    }
};

test "Preserve pragma tokens sometimes" {
    const gpa = std.testing.allocator;
    const Test = struct {
        fn runPreprocessor(source_text: []const u8) ![]const u8 {
            var arena: std.heap.ArenaAllocator = .init(gpa);
            defer arena.deinit();

            var diagnostics: Diagnostics = .{ .output = .ignore };
            var comp = Compilation.init(gpa, arena.allocator(), &diagnostics, std.fs.cwd());
            defer comp.deinit();

            try comp.addDefaultPragmaHandlers();

            var pp = Preprocessor.init(&comp, .default);
            defer pp.deinit();

            pp.preserve_whitespace = true;
            assert(pp.linemarkers == .none);

            const test_runner_macros = try comp.addSourceFromBuffer("<test_runner>", source_text);
            const eof = try pp.preprocess(test_runner_macros);
            try pp.addToken(eof);

            var allocating: std.Io.Writer.Allocating = .init(gpa);
            defer allocating.deinit();

            try pp.prettyPrintTokens(&allocating.writer, .result_only);
            return allocating.toOwnedSlice();
        }

        fn check(source_text: []const u8, expected: []const u8) !void {
            const output = try runPreprocessor(source_text);
            defer gpa.free(output);

            try std.testing.expectEqualStrings(expected, output);
        }
    };
    const preserve_gcc_diagnostic =
        \\#pragma GCC diagnostic error "-Wnewline-eof"
        \\#pragma GCC warning error "-Wnewline-eof"
        \\int x;
        \\#pragma GCC ignored error "-Wnewline-eof"
        \\
    ;
    try Test.check(preserve_gcc_diagnostic, preserve_gcc_diagnostic);

    const omit_once =
        \\#pragma once
        \\int x;
        \\#pragma once
        \\
    ;
    // TODO should only be one newline afterwards when emulating clang
    try Test.check(omit_once, "\nint x;\n\n");

    const omit_poison =
        \\#pragma GCC poison foobar
        \\
    ;
    try Test.check(omit_poison, "\n");
}

test "destringify" {
    const gpa = std.testing.allocator;
    const Test = struct {
        fn testDestringify(pp: *Preprocessor, stringified: []const u8, destringified: []const u8) !void {
            pp.char_buf.clearRetainingCapacity();
            try pp.char_buf.ensureUnusedCapacity(stringified.len);
            pp.destringify(stringified);
            try std.testing.expectEqualStrings(destringified, pp.char_buf.items);
        }
    };
    var arena: std.heap.ArenaAllocator = .init(gpa);
    defer arena.deinit();
    var diagnostics: Diagnostics = .{ .output = .ignore };
    var comp = Compilation.init(gpa, arena.allocator(), &diagnostics, std.fs.cwd());
    defer comp.deinit();
    var pp = Preprocessor.init(&comp, .default);
    defer pp.deinit();

    try Test.testDestringify(&pp, "hello\tworld\n", "hello\tworld\n");
    try Test.testDestringify(&pp,
        \\ \"FOO BAR BAZ\"
    ,
        \\ "FOO BAR BAZ"
    );
    try Test.testDestringify(&pp,
        \\ \\t\\n
        \\
    ,
        \\ \t\n
        \\
    );
}

test "Include guards" {
    const Test = struct {
        /// This is here so that when #elifdef / #elifndef are added we don't forget
        /// to test that they don't accidentally break include guard detection
        fn pairsWithIfndef(tok_id: RawToken.Id) bool {
            return switch (tok_id) {
                .keyword_elif,
                .keyword_elifdef,
                .keyword_elifndef,
                .keyword_else,
                => true,

                .keyword_include,
                .keyword_include_next,
                .keyword_embed,
                .keyword_define,
                .keyword_defined,
                .keyword_undef,
                .keyword_ifdef,
                .keyword_ifndef,
                .keyword_error,
                .keyword_warning,
                .keyword_pragma,
                .keyword_line,
                .keyword_endif,
                => false,
                else => unreachable,
            };
        }

        fn skippable(tok_id: RawToken.Id) bool {
            return switch (tok_id) {
                .keyword_defined, .keyword_va_args, .keyword_va_opt, .keyword_endif => true,
                else => false,
            };
        }

        fn testIncludeGuard(gpa: std.mem.Allocator, comptime template: []const u8, tok_id: RawToken.Id, expected_guards: u32) !void {
            var arena_state: std.heap.ArenaAllocator = .init(gpa);
            defer arena_state.deinit();
            const arena = arena_state.allocator();

            var diagnostics: Diagnostics = .{ .output = .ignore };
            var comp = Compilation.init(gpa, arena, &diagnostics, std.fs.cwd());
            defer comp.deinit();
            var pp = Preprocessor.init(&comp, .default);
            defer pp.deinit();

            const path = try std.fs.path.join(arena, &.{ ".", "bar.h" });

            _ = try comp.addSourceFromBuffer(path, "int bar = 5;\n");

            var buf = std.array_list.Managed(u8).init(gpa);
            defer buf.deinit();

            switch (tok_id) {
                .keyword_include, .keyword_include_next => try buf.print(template, .{ tok_id.lexeme().?, " \"bar.h\"" }),
                .keyword_define, .keyword_undef => try buf.print(template, .{ tok_id.lexeme().?, " BAR" }),
                .keyword_ifndef,
                .keyword_ifdef,
                .keyword_elifdef,
                .keyword_elifndef,
                => try buf.print(template, .{ tok_id.lexeme().?, " BAR\n#endif" }),
                else => try buf.print(template, .{ tok_id.lexeme().?, "" }),
            }
            const source = try comp.addSourceFromBuffer("test.h", buf.items);
            _ = try pp.preprocess(source);

            try std.testing.expectEqual(expected_guards, pp.include_guards.count());
        }
    };
    const tags = std.meta.tags(RawToken.Id);
    for (tags) |tag| {
        if (Test.skippable(tag)) continue;
        var copy = tag;
        copy.simplifyMacroKeyword();
        if (copy != tag or tag == .keyword_else) {
            const inside_ifndef_template =
                \\//Leading comment (should be ignored)
                \\
                \\#ifndef FOO
                \\#{s}{s}
                \\#endif
            ;
            const expected_guards: u32 = if (Test.pairsWithIfndef(tag)) 0 else 1;
            try Test.testIncludeGuard(std.testing.allocator, inside_ifndef_template, tag, expected_guards);

            const outside_ifndef_template =
                \\#ifndef FOO
                \\#endif
                \\#{s}{s}
            ;
            try Test.testIncludeGuard(std.testing.allocator, outside_ifndef_template, tag, 0);
        }
    }
}