diff --git a/lib/std/c.zig b/lib/std/c.zig index c912c72418..4d5e72f502 100644 --- a/lib/std/c.zig +++ b/lib/std/c.zig @@ -2,6 +2,12 @@ const builtin = @import("builtin"); const std = @import("std"); const page_size = std.mem.page_size; +pub const tokenizer = @import("c/tokenizer.zig"); +pub const Token = tokenizer.Token; +pub const Tokenizer = tokenizer.Tokenizer; +pub const parse = @import("c/parse.zig").parse; +pub const ast = @import("c/ast.zig"); + pub usingnamespace @import("os/bits.zig"); pub usingnamespace switch (builtin.os) { diff --git a/lib/std/c/ast.zig b/lib/std/c/ast.zig new file mode 100644 index 0000000000..bb8c01f138 --- /dev/null +++ b/lib/std/c/ast.zig @@ -0,0 +1,681 @@ +const std = @import("std"); +const SegmentedList = std.SegmentedList; +const Token = std.c.Token; +const Source = std.c.tokenizer.Source; + +pub const TokenIndex = usize; + +pub const Tree = struct { + tokens: TokenList, + sources: SourceList, + root_node: *Node.Root, + arena_allocator: std.heap.ArenaAllocator, + msgs: MsgList, + + pub const SourceList = SegmentedList(Source, 4); + pub const TokenList = Source.TokenList; + pub const MsgList = SegmentedList(Msg, 0); + + pub fn deinit(self: *Tree) void { + // Here we copy the arena allocator into stack memory, because + // otherwise it would destroy itself while it was still working. + var arena_allocator = self.arena_allocator; + arena_allocator.deinit(); + // self is destroyed + } + + pub fn tokenSlice(tree: *Tree, token: TokenIndex) []const u8 { + return tree.tokens.at(token).slice(); + } + + pub fn tokenEql(tree: *Tree, a: TokenIndex, b: TokenIndex) bool { + const atok = tree.tokens.at(a); + const btok = tree.tokens.at(b); + return atok.eql(btok.*); + } +}; + +pub const Msg = struct { + kind: enum { + Error, + Warning, + Note, + }, + inner: Error, +}; + +pub const Error = union(enum) { + InvalidToken: SingleTokenError("invalid token '{}'"), + ExpectedToken: ExpectedToken, + ExpectedExpr: SingleTokenError("expected expression, found '{}'"), + ExpectedTypeName: SingleTokenError("expected type name, found '{}'"), + ExpectedFnBody: SingleTokenError("expected function body, found '{}'"), + ExpectedDeclarator: SingleTokenError("expected declarator, found '{}'"), + ExpectedInitializer: SingleTokenError("expected initializer, found '{}'"), + ExpectedEnumField: SingleTokenError("expected enum field, found '{}'"), + ExpectedType: SingleTokenError("expected enum field, found '{}'"), + InvalidTypeSpecifier: InvalidTypeSpecifier, + InvalidStorageClass: SingleTokenError("invalid storage class, found '{}'"), + InvalidDeclarator: SimpleError("invalid declarator"), + DuplicateQualifier: SingleTokenError("duplicate type qualifier '{}'"), + DuplicateSpecifier: SingleTokenError("duplicate declaration specifier '{}'"), + MustUseKwToRefer: MustUseKwToRefer, + FnSpecOnNonFn: SingleTokenError("function specifier '{}' on non function"), + NothingDeclared: SimpleError("declaration doesn't declare anything"), + QualifierIgnored: SingleTokenError("qualifier '{}' ignored"), + + pub fn render(self: *const Error, tree: *Tree, stream: var) !void { + switch (self.*) { + .InvalidToken => |*x| return x.render(tree, stream), + .ExpectedToken => |*x| return x.render(tree, stream), + .ExpectedExpr => |*x| return x.render(tree, stream), + .ExpectedTypeName => |*x| return x.render(tree, stream), + .ExpectedDeclarator => |*x| return x.render(tree, stream), + .ExpectedFnBody => |*x| return x.render(tree, stream), + .ExpectedInitializer => |*x| return x.render(tree, stream), + .ExpectedEnumField => |*x| return x.render(tree, stream), + .ExpectedType => |*x| return x.render(tree, stream), + .InvalidTypeSpecifier => |*x| return x.render(tree, stream), + .InvalidStorageClass => |*x| return x.render(tree, stream), + .InvalidDeclarator => |*x| return x.render(tree, stream), + .DuplicateQualifier => |*x| return x.render(tree, stream), + .DuplicateSpecifier => |*x| return x.render(tree, stream), + .MustUseKwToRefer => |*x| return x.render(tree, stream), + .FnSpecOnNonFn => |*x| return x.render(tree, stream), + .NothingDeclared => |*x| return x.render(tree, stream), + .QualifierIgnored => |*x| return x.render(tree, stream), + } + } + + pub fn loc(self: *const Error) TokenIndex { + switch (self.*) { + .InvalidToken => |x| return x.token, + .ExpectedToken => |x| return x.token, + .ExpectedExpr => |x| return x.token, + .ExpectedTypeName => |x| return x.token, + .ExpectedDeclarator => |x| return x.token, + .ExpectedFnBody => |x| return x.token, + .ExpectedInitializer => |x| return x.token, + .ExpectedEnumField => |x| return x.token, + .ExpectedType => |*x| return x.token, + .InvalidTypeSpecifier => |x| return x.token, + .InvalidStorageClass => |x| return x.token, + .InvalidDeclarator => |x| return x.token, + .DuplicateQualifier => |x| return x.token, + .DuplicateSpecifier => |x| return x.token, + .MustUseKwToRefer => |*x| return x.name, + .FnSpecOnNonFn => |*x| return x.name, + .NothingDeclared => |*x| return x.name, + .QualifierIgnored => |*x| return x.name, + } + } + + pub const ExpectedToken = struct { + token: TokenIndex, + expected_id: @TagType(Token.Id), + + pub fn render(self: *const ExpectedToken, tree: *Tree, stream: var) !void { + const found_token = tree.tokens.at(self.token); + if (found_token.id == .Invalid) { + return stream.print("expected '{}', found invalid bytes", .{self.expected_id.symbol()}); + } else { + const token_name = found_token.id.symbol(); + return stream.print("expected '{}', found '{}'", .{ self.expected_id.symbol(), token_name }); + } + } + }; + + pub const InvalidTypeSpecifier = struct { + token: TokenIndex, + type_spec: *Node.TypeSpec, + + pub fn render(self: *const ExpectedToken, tree: *Tree, stream: var) !void { + try stream.write("invalid type specifier '"); + try type_spec.spec.print(tree, stream); + const token_name = tree.tokens.at(self.token).id.symbol(); + return stream.print("{}'", .{token_name}); + } + }; + + pub const MustUseKwToRefer = struct { + kw: TokenIndex, + name: TokenIndex, + + pub fn render(self: *const ExpectedToken, tree: *Tree, stream: var) !void { + return stream.print("must use '{}' tag to refer to type '{}'", .{ tree.slice(kw), tree.slice(name) }); + } + }; + + fn SingleTokenError(comptime msg: []const u8) type { + return struct { + token: TokenIndex, + + pub fn render(self: *const @This(), tree: *Tree, stream: var) !void { + const actual_token = tree.tokens.at(self.token); + return stream.print(msg, .{actual_token.id.symbol()}); + } + }; + } + + fn SimpleError(comptime msg: []const u8) type { + return struct { + const ThisError = @This(); + + token: TokenIndex, + + pub fn render(self: *const ThisError, tokens: *Tree.TokenList, stream: var) !void { + return stream.write(msg); + } + }; + } +}; + +pub const Type = struct { + pub const TypeList = std.SegmentedList(*Type, 4); + @"const": bool = false, + atomic: bool = false, + @"volatile": bool = false, + restrict: bool = false, + + id: union(enum) { + Int: struct { + id: Id, + is_signed: bool, + + pub const Id = enum { + Char, + Short, + Int, + Long, + LongLong, + }; + }, + Float: struct { + id: Id, + + pub const Id = enum { + Float, + Double, + LongDouble, + }; + }, + Pointer: *Type, + Function: struct { + return_type: *Type, + param_types: TypeList, + }, + Typedef: *Type, + Record: *Node.RecordType, + Enum: *Node.EnumType, + + /// Special case for macro parameters that can be any type. + /// Only present if `retain_macros == true`. + Macro, + }, +}; + +pub const Node = struct { + id: Id, + + pub const Id = enum { + Root, + EnumField, + RecordField, + RecordDeclarator, + JumpStmt, + ExprStmt, + LabeledStmt, + CompoundStmt, + IfStmt, + SwitchStmt, + WhileStmt, + DoStmt, + ForStmt, + StaticAssert, + Declarator, + Pointer, + FnDecl, + Typedef, + VarDecl, + }; + + pub const Root = struct { + base: Node = Node{ .id = .Root }, + decls: DeclList, + eof: TokenIndex, + + pub const DeclList = SegmentedList(*Node, 4); + }; + + pub const DeclSpec = struct { + storage_class: union(enum) { + Auto: TokenIndex, + Extern: TokenIndex, + Register: TokenIndex, + Static: TokenIndex, + Typedef: TokenIndex, + None, + } = .None, + thread_local: ?TokenIndex = null, + type_spec: TypeSpec = TypeSpec{}, + fn_spec: union(enum) { + Inline: TokenIndex, + Noreturn: TokenIndex, + None, + } = .None, + align_spec: ?struct { + alignas: TokenIndex, + expr: *Node, + rparen: TokenIndex, + } = null, + }; + + pub const TypeSpec = struct { + qual: TypeQual = TypeQual{}, + spec: union(enum) { + /// error or default to int + None, + Void: TokenIndex, + Char: struct { + sign: ?TokenIndex = null, + char: TokenIndex, + }, + Short: struct { + sign: ?TokenIndex = null, + short: TokenIndex = null, + int: ?TokenIndex = null, + }, + Int: struct { + sign: ?TokenIndex = null, + int: ?TokenIndex = null, + }, + Long: struct { + sign: ?TokenIndex = null, + long: TokenIndex, + longlong: ?TokenIndex = null, + int: ?TokenIndex = null, + }, + Float: struct { + float: TokenIndex, + complex: ?TokenIndex = null, + }, + Double: struct { + long: ?TokenIndex = null, + double: ?TokenIndex, + complex: ?TokenIndex = null, + }, + Bool: TokenIndex, + Atomic: struct { + atomic: TokenIndex, + typename: *Node, + rparen: TokenIndex, + }, + Enum: *EnumType, + Record: *RecordType, + Typedef: struct { + sym: TokenIndex, + sym_type: *Type, + }, + + pub fn print(self: *@This(), self: *const @This(), tree: *Tree, stream: var) !void { + switch (self.spec) { + .None => unreachable, + .Void => |index| try stream.write(tree.slice(index)), + .Char => |char| { + if (char.sign) |s| { + try stream.write(tree.slice(s)); + try stream.writeByte(' '); + } + try stream.write(tree.slice(char.char)); + }, + .Short => |short| { + if (short.sign) |s| { + try stream.write(tree.slice(s)); + try stream.writeByte(' '); + } + try stream.write(tree.slice(short.short)); + if (short.int) |i| { + try stream.writeByte(' '); + try stream.write(tree.slice(i)); + } + }, + .Int => |int| { + if (int.sign) |s| { + try stream.write(tree.slice(s)); + try stream.writeByte(' '); + } + if (int.int) |i| { + try stream.writeByte(' '); + try stream.write(tree.slice(i)); + } + }, + .Long => |long| { + if (long.sign) |s| { + try stream.write(tree.slice(s)); + try stream.writeByte(' '); + } + try stream.write(tree.slice(long.long)); + if (long.longlong) |l| { + try stream.writeByte(' '); + try stream.write(tree.slice(l)); + } + if (long.int) |i| { + try stream.writeByte(' '); + try stream.write(tree.slice(i)); + } + }, + .Float => |float| { + try stream.write(tree.slice(float.float)); + if (float.complex) |c| { + try stream.writeByte(' '); + try stream.write(tree.slice(c)); + } + }, + .Double => |double| { + if (double.long) |l| { + try stream.write(tree.slice(l)); + try stream.writeByte(' '); + } + try stream.write(tree.slice(double.double)); + if (double.complex) |c| { + try stream.writeByte(' '); + try stream.write(tree.slice(c)); + } + }, + .Bool => |index| try stream.write(tree.slice(index)), + .Typedef => |typedef| try stream.write(tree.slice(typedef.sym)), + else => try stream.print("TODO print {}", self.spec), + } + } + } = .None, + }; + + pub const EnumType = struct { + tok: TokenIndex, + name: ?TokenIndex, + body: ?struct { + lbrace: TokenIndex, + + /// always EnumField + fields: FieldList, + rbrace: TokenIndex, + }, + + pub const FieldList = Root.DeclList; + }; + + pub const EnumField = struct { + base: Node = Node{ .id = .EnumField }, + name: TokenIndex, + value: ?*Node, + }; + + pub const RecordType = struct { + tok: TokenIndex, + kind: enum { + Struct, + Union, + }, + name: ?TokenIndex, + body: ?struct { + lbrace: TokenIndex, + + /// RecordField or StaticAssert + fields: FieldList, + rbrace: TokenIndex, + }, + + pub const FieldList = Root.DeclList; + }; + + pub const RecordField = struct { + base: Node = Node{ .id = .RecordField }, + type_spec: TypeSpec, + declarators: DeclaratorList, + semicolon: TokenIndex, + + pub const DeclaratorList = Root.DeclList; + }; + + pub const RecordDeclarator = struct { + base: Node = Node{ .id = .RecordDeclarator }, + declarator: ?*Declarator, + bit_field_expr: ?*Expr, + }; + + pub const TypeQual = struct { + @"const": ?TokenIndex = null, + atomic: ?TokenIndex = null, + @"volatile": ?TokenIndex = null, + restrict: ?TokenIndex = null, + }; + + pub const JumpStmt = struct { + base: Node = Node{ .id = .JumpStmt }, + ltoken: TokenIndex, + kind: union(enum) { + Break, + Continue, + Return: ?*Node, + Goto: TokenIndex, + }, + semicolon: TokenIndex, + }; + + pub const ExprStmt = struct { + base: Node = Node{ .id = .ExprStmt }, + expr: ?*Expr, + semicolon: TokenIndex, + }; + + pub const LabeledStmt = struct { + base: Node = Node{ .id = .LabeledStmt }, + kind: union(enum) { + Label: TokenIndex, + Case: TokenIndex, + Default: TokenIndex, + }, + stmt: *Node, + }; + + pub const CompoundStmt = struct { + base: Node = Node{ .id = .CompoundStmt }, + lbrace: TokenIndex, + statements: StmtList, + rbrace: TokenIndex, + + pub const StmtList = Root.DeclList; + }; + + pub const IfStmt = struct { + base: Node = Node{ .id = .IfStmt }, + @"if": TokenIndex, + cond: *Node, + body: *Node, + @"else": ?struct { + tok: TokenIndex, + body: *Node, + }, + }; + + pub const SwitchStmt = struct { + base: Node = Node{ .id = .SwitchStmt }, + @"switch": TokenIndex, + expr: *Expr, + rparen: TokenIndex, + stmt: *Node, + }; + + pub const WhileStmt = struct { + base: Node = Node{ .id = .WhileStmt }, + @"while": TokenIndex, + cond: *Expr, + rparen: TokenIndex, + body: *Node, + }; + + pub const DoStmt = struct { + base: Node = Node{ .id = .DoStmt }, + do: TokenIndex, + body: *Node, + @"while": TokenIndex, + cond: *Expr, + semicolon: TokenIndex, + }; + + pub const ForStmt = struct { + base: Node = Node{ .id = .ForStmt }, + @"for": TokenIndex, + init: ?*Node, + cond: ?*Expr, + semicolon: TokenIndex, + incr: ?*Expr, + rparen: TokenIndex, + body: *Node, + }; + + pub const StaticAssert = struct { + base: Node = Node{ .id = .StaticAssert }, + assert: TokenIndex, + expr: *Node, + semicolon: TokenIndex, + }; + + pub const Declarator = struct { + base: Node = Node{ .id = .Declarator }, + pointer: ?*Pointer, + prefix: union(enum) { + None, + Identifer: TokenIndex, + Complex: struct { + lparen: TokenIndex, + inner: *Node, + rparen: TokenIndex, + }, + }, + suffix: union(enum) { + None, + Fn: struct { + lparen: TokenIndex, + params: Params, + rparen: TokenIndex, + }, + Array: Arrays, + }, + + pub const Arrays = std.SegmentedList(*Array, 2); + pub const Params = std.SegmentedList(*Param, 4); + }; + + pub const Array = struct { + lbracket: TokenIndex, + inner: union(enum) { + Inferred, + Unspecified: TokenIndex, + Variable: struct { + asterisk: ?TokenIndex, + static: ?TokenIndex, + qual: TypeQual, + expr: *Expr, + }, + }, + rbracket: TokenIndex, + }; + + pub const Pointer = struct { + base: Node = Node{ .id = .Pointer }, + asterisk: TokenIndex, + qual: TypeQual, + pointer: ?*Pointer, + }; + + pub const Param = struct { + kind: union(enum) { + Variable, + Old: TokenIndex, + Normal: struct { + decl_spec: *DeclSpec, + declarator: *Node, + }, + }, + }; + + pub const FnDecl = struct { + base: Node = Node{ .id = .FnDecl }, + decl_spec: DeclSpec, + declarator: *Declarator, + old_decls: OldDeclList, + body: ?*CompoundStmt, + + pub const OldDeclList = SegmentedList(*Node, 0); + }; + + pub const Typedef = struct { + base: Node = Node{ .id = .Typedef }, + decl_spec: DeclSpec, + declarators: DeclaratorList, + semicolon: TokenIndex, + + pub const DeclaratorList = Root.DeclList; + }; + + pub const VarDecl = struct { + base: Node = Node{ .id = .VarDecl }, + decl_spec: DeclSpec, + initializers: Initializers, + semicolon: TokenIndex, + + pub const Initializers = Root.DeclList; + }; + + pub const Initialized = struct { + base: Node = Node{ .id = Initialized }, + declarator: *Declarator, + eq: TokenIndex, + init: Initializer, + }; + + pub const Initializer = union(enum) { + list: struct { + initializers: InitializerList, + rbrace: TokenIndex, + }, + expr: *Expr, + pub const InitializerList = std.SegmentedList(*Initializer, 4); + }; + + pub const Macro = struct { + base: Node = Node{ .id = Macro }, + kind: union(enum) { + Undef: []const u8, + Fn: struct { + params: []const []const u8, + expr: *Expr, + }, + Expr: *Expr, + }, + }; +}; + +pub const Expr = struct { + id: Id, + ty: *Type, + value: union(enum) { + None, + }, + + pub const Id = enum { + Infix, + Literal, + }; + + pub const Infix = struct { + base: Expr = Expr{ .id = .Infix }, + lhs: *Expr, + op_token: TokenIndex, + op: Op, + rhs: *Expr, + + pub const Op = enum {}; + }; +}; diff --git a/lib/std/c/parse.zig b/lib/std/c/parse.zig new file mode 100644 index 0000000000..dd646e06d6 --- /dev/null +++ b/lib/std/c/parse.zig @@ -0,0 +1,1431 @@ +const std = @import("std"); +const mem = std.mem; +const assert = std.debug.assert; +const Allocator = std.mem.Allocator; +const ast = std.c.ast; +const Node = ast.Node; +const Type = ast.Type; +const Tree = ast.Tree; +const TokenIndex = ast.TokenIndex; +const Token = std.c.Token; +const TokenIterator = ast.Tree.TokenList.Iterator; + +pub const Error = error{ParseError} || Allocator.Error; + +pub const Options = struct { + // /// Keep simple macros unexpanded and add the definitions to the ast + // retain_macros: bool = false, + /// Warning or error + warn_as_err: union(enum) { + /// All warnings are warnings + None, + + /// Some warnings are errors + Some: []@TagType(ast.Error), + + /// All warnings are errors + All, + } = .All, +}; + +/// Result should be freed with tree.deinit() when there are +/// no more references to any of the tokens or nodes. +pub fn parse(allocator: *Allocator, source: []const u8, options: Options) !*Tree { + const tree = blk: { + // This block looks unnecessary, but is a "foot-shield" to prevent the SegmentedLists + // from being initialized with a pointer to this `arena`, which is created on + // the stack. Following code should instead refer to `&tree.arena_allocator`, a + // pointer to data which lives safely on the heap and will outlive `parse`. + var arena = std.heap.ArenaAllocator.init(allocator); + errdefer arena.deinit(); + const tree = try arena.allocator.create(ast.Tree); + tree.* = .{ + .root_node = undefined, + .arena_allocator = arena, + .tokens = undefined, + .sources = undefined, + }; + break :blk tree; + }; + errdefer tree.deinit(); + const arena = &tree.arena_allocator.allocator; + + tree.tokens = ast.Tree.TokenList.init(arena); + tree.sources = ast.Tree.SourceList.init(arena); + + var tokenizer = std.zig.Tokenizer.init(source); + while (true) { + const tree_token = try tree.tokens.addOne(); + tree_token.* = tokenizer.next(); + if (tree_token.id == .Eof) break; + } + // TODO preprocess here + var it = tree.tokens.iterator(0); + + while (true) { + const tok = it.peek().?.id; + switch (id) { + .LineComment, + .MultiLineComment, + => { + _ = it.next(); + }, + else => break, + } + } + + var parse_arena = std.heap.ArenaAllocator.init(allocator); + defer parse_arena.deinit(); + + var parser = Parser{ + .scopes = Parser.SymbolList.init(allocator), + .arena = &parse_arena.allocator, + .it = &it, + .tree = tree, + .options = options, + }; + defer parser.symbols.deinit(); + + tree.root_node = try parser.root(); + return tree; +} + +const Parser = struct { + arena: *Allocator, + it: *TokenIterator, + tree: *Tree, + + arena: *Allocator, + scopes: ScopeList, + options: Options, + + const ScopeList = std.SegmentedLists(Scope); + const SymbolList = std.SegmentedLists(Symbol); + + const Scope = struct { + kind: ScopeKind, + syms: SymbolList, + }; + + const Symbol = struct { + name: []const u8, + ty: *Type, + }; + + const ScopeKind = enum { + Block, + Loop, + Root, + Switch, + }; + + fn pushScope(parser: *Parser, kind: ScopeKind) !void { + const new = try parser.scopes.addOne(); + new.* = .{ + .kind = kind, + .syms = SymbolList.init(parser.arena), + }; + } + + fn popScope(parser: *Parser, len: usize) void { + _ = parser.scopes.pop(); + } + + fn getSymbol(parser: *Parser, tok: TokenIndex) ?*Symbol { + const name = parser.tree.tokenSlice(tok); + var scope_it = parser.scopes.iterator(parser.scopes.len); + while (scope_it.prev()) |scope| { + var sym_it = scope.syms.iterator(scope.syms.len); + while (sym_it.prev()) |sym| { + if (mem.eql(u8, sym.name, name)) { + return sym; + } + } + } + return null; + } + + fn declareSymbol(parser: *Parser, type_spec: Node.TypeSpec, dr: *Node.Declarator) Error!void { + return; // TODO + } + + /// Root <- ExternalDeclaration* eof + fn root(parser: *Parser) Allocator.Error!*Node.Root { + try parser.pushScope(.Root); + defer parser.popScope(); + const node = try parser.arena.create(Node.Root); + node.* = .{ + .decls = Node.Root.DeclList.init(parser.arena), + .eof = undefined, + }; + while (parser.externalDeclarations() catch |e| switch (e) { + error.OutOfMemory => return error.OutOfMemory, + error.ParseError => return node, + }) |decl| { + try node.decls.push(decl); + } + node.eof = parser.eatToken(.Eof) orelse return node; + return node; + } + + /// ExternalDeclaration + /// <- DeclSpec Declarator OldStyleDecl* CompoundStmt + /// / Declaration + /// OldStyleDecl <- DeclSpec Declarator (COMMA Declarator)* SEMICOLON + fn externalDeclarations(parser: *Parser) !?*Node { + return parser.declarationExtra(false); + } + + /// Declaration + /// <- DeclSpec DeclInit SEMICOLON + /// / StaticAssert + /// DeclInit <- Declarator (EQUAL Initializer)? (COMMA Declarator (EQUAL Initializer)?)* + fn declaration(parser: *Parser) !?*Node { + return parser.declarationExtra(true); + } + + fn declarationExtra(parser: *Parser, local: bool) !?*Node { + if (try parser.staticAssert()) |decl| return decl; + const begin = parser.it.index + 1; + var ds = Node.DeclSpec{}; + const got_ds = try parser.declSpec(&ds); + if (local and !got_ds) { + // not a declaration + return null; + } + switch (ds.storage_class) { + .Auto, .Register => |tok| return parser.err(.{ + .InvalidStorageClass = .{ .token = tok }, + }), + .Typedef => { + const node = try parser.arena.create(Node.Typedef); + node.* = .{ + .decl_spec = ds, + .declarators = Node.Typedef.DeclaratorList.init(parser.arena), + .semicolon = undefined, + }; + while (true) { + const dr = @fieldParentPtr(Node.Declarator, "base", (try parser.declarator(.Must)) orelse return parser.err(.{ + .ExpectedDeclarator = .{ .token = parser.it.index }, + })); + try parser.declareSymbol(ds.type_spec, dr); + try node.declarators.push(&dr.base); + if (parser.eatToken(.Comma)) |_| {} else break; + } + return &node.base; + }, + else => {}, + } + var first_dr = try parser.declarator(.Must); + if (first_dr != null and declaratorIsFunction(first_dr.?)) { + // TODO typedeffed fn proto-only + const dr = @fieldParentPtr(Node.Declarator, "base", first_dr.?); + try parser.declareSymbol(ds.type_spec, dr); + var old_decls = Node.FnDecl.OldDeclList.init(parser.arena); + const body = if (parser.eatToken(.Semicolon)) |_| + null + else blk: { + if (local) { + // TODO nested function warning + } + // TODO first_dr.is_old + // while (true) { + // var old_ds = Node.DeclSpec{}; + // if (!(try parser.declSpec(&old_ds))) { + // // not old decl + // break; + // } + // var old_dr = (try parser.declarator(.Must)); + // // if (old_dr == null) + // // try parser.err(.{ + // // .NoParamName = .{ .token = parser.it.index }, + // // }); + // // try old_decls.push(decl); + // } + const body_node = (try parser.compoundStmt()) orelse return parser.err(.{ + .ExpectedFnBody = .{ .token = parser.it.index }, + }); + break :blk @fieldParentPtr(Node.CompoundStmt, "base", body_node); + }; + + const node = try parser.arena.create(Node.FnDecl); + node.* = .{ + .decl_spec = ds, + .declarator = dr, + .old_decls = old_decls, + .body = body, + }; + return &node.base; + } else { + switch (ds.fn_spec) { + .Inline, .Noreturn => |tok| return parser.err(.{ + .FnSpecOnNonFn = .{ .token = tok }, + }), + else => {}, + } + // TODO threadlocal without static or extern on local variable + const node = try parser.arena.create(Node.VarDecl); + node.* = .{ + .decl_spec = ds, + .initializers = Node.VarDecl.Initializers.init(parser.arena), + .semicolon = undefined, + }; + if (first_dr == null) { + node.semicolon = try parser.expectToken(.Semicolon); + const ok = switch (ds.type_spec.spec) { + .Enum => |e| e.name != null, + .Record => |r| r.name != null, + else => false, + }; + const q = ds.type_spec.qual; + if (!ok) + try parser.warn(.{ + .NothingDeclared = .{ .token = begin }, + }) + else if (q.@"const" orelse q.atomic orelse q.@"volatile" orelse q.restrict) |tok| + try parser.warn(.{ + .QualifierIgnored = .{ .token = tok }, + }); + return &node.base; + } + var dr = @fieldParentPtr(Node.Declarator, "base", first_dr.?); + while (true) { + try parser.declareSymbol(ds.type_spec, dr); + if (parser.eatToken(.Equal)) |tok| { + try node.initializers.push((try parser.initializer(dr)) orelse return parser.err(.{ + .ExpectedInitializer = .{ .token = parser.it.index }, + })); + } else + try node.initializers.push(&dr.base); + if (parser.eatToken(.Comma) != null) break; + dr = @fieldParentPtr(Node.Declarator, "base", (try parser.declarator(.Must)) orelse return parser.err(.{ + .ExpectedDeclarator = .{ .token = parser.it.index }, + })); + } + node.semicolon = try parser.expectToken(.Semicolon); + return &node.base; + } + } + + fn declaratorIsFunction(node: *Node) bool { + if (node.id != .Declarator) return false; + assert(node.id == .Declarator); + const dr = @fieldParentPtr(Node.Declarator, "base", node); + if (dr.suffix != .Fn) return false; + switch (dr.prefix) { + .None, .Identifer => return true, + .Complex => |inner| { + var inner_node = inner.inner; + while (true) { + if (inner_node.id != .Declarator) return false; + assert(inner_node.id == .Declarator); + const inner_dr = @fieldParentPtr(Node.Declarator, "base", inner_node); + if (inner_dr.pointer != null) return false; + switch (inner_dr.prefix) { + .None, .Identifer => return true, + .Complex => |c| inner_node = c.inner, + } + } + }, + } + } + + /// StaticAssert <- Keyword_static_assert LPAREN ConstExpr COMMA STRINGLITERAL RPAREN SEMICOLON + fn staticAssert(parser: *Parser) !?*Node { + const tok = parser.eatToken(.Keyword_static_assert) orelse return null; + _ = try parser.expectToken(.LParen); + const const_expr = (try parser.constExpr()) orelse parser.err(.{ + .ExpectedExpr = .{ .token = parser.it.index }, + }); + _ = try parser.expectToken(.Comma); + const str = try parser.expectToken(.StringLiteral); + _ = try parser.expectToken(.RParen); + const node = try parser.arena.create(Node.StaticAssert); + node.* = .{ + .assert = tok, + .expr = const_expr, + .semicolon = try parser.expectToken(.Semicolon), + }; + return &node.base; + } + + /// DeclSpec <- (StorageClassSpec / TypeSpec / FnSpec / AlignSpec)* + /// returns true if any tokens were consumed + fn declSpec(parser: *Parser, ds: *Node.DeclSpec) !bool { + var got = false; + while ((try parser.storageClassSpec(ds)) or (try parser.typeSpec(&ds.type_spec)) or (try parser.fnSpec(ds)) or (try parser.alignSpec(ds))) { + got = true; + } + return got; + } + + /// StorageClassSpec + /// <- Keyword_typedef / Keyword_extern / Keyword_static / Keyword_thread_local / Keyword_auto / Keyword_register + fn storageClassSpec(parser: *Parser, ds: *Node.DeclSpec) !bool { + blk: { + if (parser.eatToken(.Keyword_typedef)) |tok| { + if (ds.storage_class != .None or ds.thread_local != null) + break :blk; + ds.storage_class = .{ .Typedef = tok }; + } else if (parser.eatToken(.Keyword_extern)) |tok| { + if (ds.storage_class != .None) + break :blk; + ds.storage_class = .{ .Extern = tok }; + } else if (parser.eatToken(.Keyword_static)) |tok| { + if (ds.storage_class != .None) + break :blk; + ds.storage_class = .{ .Static = tok }; + } else if (parser.eatToken(.Keyword_thread_local)) |tok| { + switch (ds.storage_class) { + .None, .Extern, .Static => {}, + else => break :blk, + } + ds.thread_local = tok; + } else if (parser.eatToken(.Keyword_auto)) |tok| { + if (ds.storage_class != .None or ds.thread_local != null) + break :blk; + ds.storage_class = .{ .Auto = tok }; + } else if (parser.eatToken(.Keyword_register)) |tok| { + if (ds.storage_class != .None or ds.thread_local != null) + break :blk; + ds.storage_class = .{ .Register = tok }; + } else return false; + return true; + } + try parser.warn(.{ + .DuplicateSpecifier = .{ .token = parser.it.index }, + }); + return true; + } + + /// TypeSpec + /// <- Keyword_void / Keyword_char / Keyword_short / Keyword_int / Keyword_long / Keyword_float / Keyword_double + /// / Keyword_signed / Keyword_unsigned / Keyword_bool / Keyword_complex / Keyword_imaginary / + /// / Keyword_atomic LPAREN TypeName RPAREN + /// / EnumSpec + /// / RecordSpec + /// / IDENTIFIER // typedef name + /// / TypeQual + fn typeSpec(parser: *Parser, type_spec: *Node.TypeSpec) !bool { + blk: { + if (parser.eatToken(.Keyword_void)) |tok| { + if (type_spec.spec != .None) + break :blk; + type_spec.spec = .{ .Void = tok }; + } else if (parser.eatToken(.Keyword_char)) |tok| { + switch (type_spec.spec) { + .None => { + type_spec.spec = .{ + .Char = .{ + .char = tok, + }, + }; + }, + .Int => |int| { + if (int.int != null) + break :blk; + type_spec.spec = .{ + .Char = .{ + .char = tok, + .sign = int.sign, + }, + }; + }, + else => break :blk, + } + } else if (parser.eatToken(.Keyword_short)) |tok| { + switch (type_spec.spec) { + .None => { + type_spec.spec = .{ + .Short = .{ + .short = tok, + }, + }; + }, + .Int => |int| { + if (int.int != null) + break :blk; + type_spec.spec = .{ + .Short = .{ + .short = tok, + .sign = int.sign, + }, + }; + }, + else => break :blk, + } + } else if (parser.eatToken(.Keyword_long)) |tok| { + switch (type_spec.spec) { + .None => { + type_spec.spec = .{ + .Long = .{ + .long = tok, + }, + }; + }, + .Int => |int| { + type_spec.spec = .{ + .Long = .{ + .long = tok, + .sign = int.sign, + .int = int.int, + }, + }; + }, + .Long => |*long| { + if (long.longlong != null) + break :blk; + long.longlong = tok; + }, + .Double => |*double| { + if (double.long != null) + break :blk; + double.long = tok; + }, + else => break :blk, + } + } else if (parser.eatToken(.Keyword_int)) |tok| { + switch (type_spec.spec) { + .None => { + type_spec.spec = .{ + .Int = .{ + .int = tok, + }, + }; + }, + .Short => |*short| { + if (short.int != null) + break :blk; + short.int = tok; + }, + .Int => |*int| { + if (int.int != null) + break :blk; + int.int = tok; + }, + .Long => |*long| { + if (long.int != null) + break :blk; + long.int = tok; + }, + else => break :blk, + } + } else if (parser.eatToken(.Keyword_signed) orelse parser.eatToken(.Keyword_unsigned)) |tok| { + switch (type_spec.spec) { + .None => { + type_spec.spec = .{ + .Int = .{ + .sign = tok, + }, + }; + }, + .Char => |*char| { + if (char.sign != null) + break :blk; + char.sign = tok; + }, + .Short => |*short| { + if (short.sign != null) + break :blk; + short.sign = tok; + }, + .Int => |*int| { + if (int.sign != null) + break :blk; + int.sign = tok; + }, + .Long => |*long| { + if (long.sign != null) + break :blk; + long.sign = tok; + }, + else => break :blk, + } + } else if (parser.eatToken(.Keyword_float)) |tok| { + if (type_spec.spec != .None) + break :blk; + type_spec.spec = .{ + .Float = .{ + .float = tok, + }, + }; + } else if (parser.eatToken(.Keyword_double)) |tok| { + if (type_spec.spec != .None) + break :blk; + type_spec.spec = .{ + .Double = .{ + .double = tok, + }, + }; + } else if (parser.eatToken(.Keyword_complex)) |tok| { + switch (type_spec.spec) { + .None => { + type_spec.spec = .{ + .Double = .{ + .complex = tok, + .double = null, + }, + }; + }, + .Float => |*float| { + if (float.complex != null) + break :blk; + float.complex = tok; + }, + .Double => |*double| { + if (double.complex != null) + break :blk; + double.complex = tok; + }, + else => break :blk, + } + } else if (parser.eatToken(.Keyword_bool)) |tok| { + if (type_spec.spec != .None) + break :blk; + type_spec.spec = .{ .Bool = tok }; + } else if (parser.eatToken(.Keyword_atomic)) |tok| { + // might be _Atomic qualifier + if (parser.eatToken(.LParen)) |_| { + if (type_spec.spec != .None) + break :blk; + const name = (try parser.typeName()) orelse return parser.err(.{ + .ExpectedTypeName = .{ .token = parser.it.index }, + }); + type_spec.spec.Atomic = .{ + .atomic = tok, + .typename = name, + .rparen = try parser.expectToken(.RParen), + }; + } else { + parser.putBackToken(tok); + } + } else if (parser.eatToken(.Keyword_enum)) |tok| { + if (type_spec.spec != .None) + break :blk; + type_spec.spec.Enum = try parser.enumSpec(tok); + } else if (parser.eatToken(.Keyword_union) orelse parser.eatToken(.Keyword_struct)) |tok| { + if (type_spec.spec != .None) + break :blk; + type_spec.spec.Record = try parser.recordSpec(tok); + } else if (parser.eatToken(.Identifier)) |tok| { + const ty = parser.getSymbol(tok) orelse { + parser.putBackToken(tok); + return false; + }; + switch (ty.id) { + .Enum => |e| blk: { + if (e.name) |some| + if (!parser.tree.tokenEql(some, tok)) + break :blk; + return parser.err(.{ + .MustUseKwToRefer = .{ .kw = e.tok, .name = tok }, + }); + }, + .Record => |r| blk: { + if (r.name) |some| + if (!parser.tree.tokenEql(some, tok)) + break :blk; + return parser.err(.{ + .MustUseKwToRefer = .{ + .kw = r.tok, + .name = tok, + }, + }); + }, + .Typedef => { + type_spec.spec = .{ + .Typedef = .{ + .sym = tok, + .sym_type = ty, + }, + }; + return true; + }, + else => {}, + } + parser.putBackToken(tok); + return false; + } + return parser.typeQual(&type_spec.qual); + } + return parser.err(.{ + .InvalidTypeSpecifier = .{ + .token = parser.it.index, + .type_spec = type_spec, + }, + }); + } + + /// TypeQual <- Keyword_const / Keyword_restrict / Keyword_volatile / Keyword_atomic + fn typeQual(parser: *Parser, qual: *Node.TypeQual) !bool { + blk: { + if (parser.eatToken(.Keyword_const)) |tok| { + if (qual.@"const" != null) + break :blk; + qual.@"const" = tok; + } else if (parser.eatToken(.Keyword_restrict)) |tok| { + if (qual.atomic != null) + break :blk; + qual.atomic = tok; + } else if (parser.eatToken(.Keyword_volatile)) |tok| { + if (qual.@"volatile" != null) + break :blk; + qual.@"volatile" = tok; + } else if (parser.eatToken(.Keyword_atomic)) |tok| { + if (qual.atomic != null) + break :blk; + qual.atomic = tok; + } else return false; + return true; + } + try parser.warn(.{ + .DuplicateQualifier = .{ .token = parser.it.index }, + }); + return true; + } + + /// FnSpec <- Keyword_inline / Keyword_noreturn + fn fnSpec(parser: *Parser, ds: *Node.DeclSpec) !bool { + blk: { + if (parser.eatToken(.Keyword_inline)) |tok| { + if (ds.fn_spec != .None) + break :blk; + ds.fn_spec = .{ .Inline = tok }; + } else if (parser.eatToken(.Keyword_noreturn)) |tok| { + if (ds.fn_spec != .None) + break :blk; + ds.fn_spec = .{ .Noreturn = tok }; + } else return false; + return true; + } + try parser.warn(.{ + .DuplicateSpecifier = .{ .token = parser.it.index }, + }); + return true; + } + + /// AlignSpec <- Keyword_alignas LPAREN (TypeName / ConstExpr) RPAREN + fn alignSpec(parser: *Parser, ds: *Node.DeclSpec) !bool { + if (parser.eatToken(.Keyword_alignas)) |tok| { + _ = try parser.expectToken(.LParen); + const node = (try parser.typeName()) orelse (try parser.constExpr()) orelse parser.err(.{ + .ExpectedExpr = .{ .token = parser.it.index }, + }); + if (ds.align_spec != null) { + try parser.warn(.{ + .DuplicateSpecifier = .{ .token = parser.it.index }, + }); + } + ds.align_spec = .{ + .alignas = tok, + .expr = node, + .rparen = try parser.expectToken(.RParen), + }; + return true; + } + return false; + } + + /// EnumSpec <- Keyword_enum IDENTIFIER? (LBRACE EnumField RBRACE)? + fn enumSpec(parser: *Parser, tok: TokenIndex) !*Node.EnumType { + const node = try parser.arena.create(Node.EnumType); + const name = parser.eatToken(.Identifier); + node.* = .{ + .tok = tok, + .name = name, + .body = null, + }; + const ty = try parser.arena.create(Type); + ty.* = .{ + .id = .{ + .Enum = node, + }, + }; + if (name) |some| + try parser.symbols.append(.{ + .name = parser.tree.tokenSlice(some), + .ty = ty, + }); + if (parser.eatToken(.LBrace)) |lbrace| { + var fields = Node.EnumType.FieldList.init(parser.arena); + try fields.push((try parser.enumField()) orelse return parser.err(.{ + .ExpectedEnumField = .{ .token = parser.it.index }, + })); + while (parser.eatToken(.Comma)) |_| { + try fields.push((try parser.enumField()) orelse break); + } + node.body = .{ + .lbrace = lbrace, + .fields = fields, + .rbrace = try parser.expectToken(.RBrace), + }; + } + return node; + } + + /// EnumField <- IDENTIFIER (EQUAL ConstExpr)? (COMMA EnumField) COMMA? + fn enumField(parser: *Parser) !?*Node { + const name = parser.eatToken(.Identifier) orelse return null; + const node = try parser.arena.create(Node.EnumField); + node.* = .{ + .name = name, + .value = null, + }; + if (parser.eatToken(.Equal)) |eq| { + node.value = (try parser.constExpr()) orelse parser.err(.{ + .ExpectedExpr = .{ .token = parser.it.index }, + }); + } + return &node.base; + } + + /// RecordSpec <- (Keyword_struct / Keyword_union) IDENTIFIER? (LBRACE RecordField+ RBRACE)? + fn recordSpec(parser: *Parser, tok: TokenIndex) !*Node.RecordType { + const node = try parser.arena.create(Node.RecordType); + const name = parser.eatToken(.Identifier); + const is_struct = parser.tree.tokenSlice(tok)[0] == 's'; + node.* = .{ + .tok = tok, + .kind = if (is_struct) .Struct else .Union, + .name = name, + .body = null, + }; + const ty = try parser.arena.create(Type); + ty.* = .{ + .id = .{ + .Record = node, + }, + }; + if (name) |some| + try parser.symbols.append(.{ + .name = parser.tree.tokenSlice(some), + .ty = ty, + }); + if (parser.eatToken(.LBrace)) |lbrace| { + try parser.pushScope(.Block); + defer parser.popScope(); + var fields = Node.RecordType.FieldList.init(parser.arena); + while (true) { + if (parser.eatToken(.RBrace)) |rbrace| { + node.body = .{ + .lbrace = lbrace, + .fields = fields, + .rbrace = rbrace, + }; + break; + } + try fields.push(try parser.recordField()); + } + } + return node; + } + + /// RecordField + /// <- TypeSpec* (RecordDeclarator (COMMA RecordDeclarator))? SEMICOLON + /// \ StaticAssert + fn recordField(parser: *Parser) Error!*Node { + if (try parser.staticAssert()) |decl| return decl; + var got = false; + var type_spec = Node.TypeSpec{}; + while (try parser.typeSpec(&type_spec)) got = true; + if (!got) + return parser.err(.{ + .ExpectedType = .{ .token = parser.it.index }, + }); + const node = try parser.arena.create(Node.RecordField); + node.* = .{ + .type_spec = type_spec, + .declarators = Node.RecordField.DeclaratorList.init(parser.arena), + .semicolon = undefined, + }; + while (true) { + const rdr = try parser.recordDeclarator(); + try parser.declareSymbol(type_spec, rdr.declarator); + try node.declarators.push(&rdr.base); + if (parser.eatToken(.Comma)) |_| {} else break; + } + + node.semicolon = try parser.expectToken(.Semicolon); + return &node.base; + } + + /// TypeName <- TypeSpec* AbstractDeclarator? + fn typeName(parser: *Parser) Error!?*Node { + @panic("TODO"); + } + + /// RecordDeclarator <- Declarator? (COLON ConstExpr)? + fn recordDeclarator(parser: *Parser) Error!*Node.RecordDeclarator { + @panic("TODO"); + } + + /// Pointer <- ASTERISK TypeQual* Pointer? + fn pointer(parser: *Parser) Error!?*Node.Pointer { + const asterisk = parser.eatToken(.Asterisk) orelse return null; + const node = try parser.arena.create(Node.Pointer); + node.* = .{ + .asterisk = asterisk, + .qual = .{}, + .pointer = null, + }; + while (try parser.typeQual(&node.qual)) {} + node.pointer = try parser.pointer(); + return node; + } + + const Named = enum { + Must, + Allowed, + Forbidden, + }; + + /// Declarator <- Pointer? DeclaratorSuffix + /// DeclaratorPrefix + /// <- IDENTIFIER // if named != .Forbidden + /// / LPAREN Declarator RPAREN + /// / (none) // if named != .Must + /// DeclaratorSuffix + /// <- DeclaratorPrefix (LBRACKET ArrayDeclarator? RBRACKET)* + /// / DeclaratorPrefix LPAREN (ParamDecl (COMMA ParamDecl)* (COMMA ELLIPSIS)?)? RPAREN + fn declarator(parser: *Parser, named: Named) Error!?*Node { + const ptr = try parser.pointer(); + var node: *Node.Declarator = undefined; + var inner_fn = false; + + // TODO sizof(int (int)) + // prefix + if (parser.eatToken(.LParen)) |lparen| { + const inner = (try parser.declarator(named)) orelse return parser.err(.{ + .ExpectedDeclarator = .{ .token = lparen + 1 }, + }); + inner_fn = declaratorIsFunction(inner); + node = try parser.arena.create(Node.Declarator); + node.* = .{ + .pointer = ptr, + .prefix = .{ + .Complex = .{ + .lparen = lparen, + .inner = inner, + .rparen = try parser.expectToken(.RParen), + }, + }, + .suffix = .None, + }; + } else if (named != .Forbidden) { + if (parser.eatToken(.Identifier)) |tok| { + node = try parser.arena.create(Node.Declarator); + node.* = .{ + .pointer = ptr, + .prefix = .{ .Identifer = tok }, + .suffix = .None, + }; + } else if (named == .Must) { + return parser.err(.{ + .ExpectedToken = .{ .token = parser.it.index, .expected_id = .Identifier }, + }); + } else { + if (ptr) |some| + return &some.base; + return null; + } + } else { + node = try parser.arena.create(Node.Declarator); + node.* = .{ + .pointer = ptr, + .prefix = .None, + .suffix = .None, + }; + } + // suffix + if (parser.eatToken(.LParen)) |lparen| { + if (inner_fn) + return parser.err(.{ + .InvalidDeclarator = .{ .token = lparen }, + }); + node.suffix = .{ + .Fn = .{ + .lparen = lparen, + .params = Node.Declarator.Params.init(parser.arena), + .rparen = undefined, + }, + }; + try parser.paramDecl(node); + node.suffix.Fn.rparen = try parser.expectToken(.RParen); + } else if (parser.eatToken(.LBracket)) |tok| { + if (inner_fn) + return parser.err(.{ + .InvalidDeclarator = .{ .token = tok }, + }); + node.suffix = .{ .Array = Node.Declarator.Arrays.init(parser.arena) }; + var lbrace = tok; + while (true) { + try node.suffix.Array.push(try parser.arrayDeclarator(lbrace)); + if (parser.eatToken(.LBracket)) |t| lbrace = t else break; + } + } + if (parser.eatToken(.LParen) orelse parser.eatToken(.LBracket)) |tok| + return parser.err(.{ + .InvalidDeclarator = .{ .token = tok }, + }); + return &node.base; + } + + /// ArrayDeclarator + /// <- ASTERISK + /// / Keyword_static TypeQual* AssignmentExpr + /// / TypeQual+ (ASTERISK / Keyword_static AssignmentExpr) + /// / TypeQual+ AssignmentExpr? + /// / AssignmentExpr + fn arrayDeclarator(parser: *Parser, lbracket: TokenIndex) !*Node.Array { + const arr = try parser.arena.create(Node.Array); + arr.* = .{ + .lbracket = lbracket, + .inner = .Inferred, + .rbracket = undefined, + }; + if (parser.eatToken(.Asterisk)) |tok| { + arr.inner = .{ .Unspecified = tok }; + } else { + // TODO + } + arr.rbracket = try parser.expectToken(.RBracket); + return arr; + } + + /// Params <- ParamDecl (COMMA ParamDecl)* (COMMA ELLIPSIS)? + /// ParamDecl <- DeclSpec (Declarator / AbstractDeclarator) + fn paramDecl(parser: *Parser, dr: *Node.Declarator) !void { + var old_style = false; + while (true) { + var ds = Node.DeclSpec{}; + if (try parser.declSpec(&ds)) { + //TODO + // TODO try parser.declareSymbol(ds.type_spec, dr); + } else if (parser.eatToken(.Identifier)) |tok| { + old_style = true; + } else if (parser.eatToken(.Ellipsis)) |tok| { + // TODO + } + } + } + + /// Expr <- AssignmentExpr (COMMA Expr)* + fn expr(parser: *Parser) Error!?*Expr { + @panic("TODO"); + } + + /// AssignmentExpr + /// <- ConditionalExpr // TODO recursive? + /// / UnaryExpr (EQUAL / ASTERISKEQUAL / SLASHEQUAL / PERCENTEQUAL / PLUSEQUAL / MINUSEQUA / + /// / ANGLEBRACKETANGLEBRACKETLEFTEQUAL / ANGLEBRACKETANGLEBRACKETRIGHTEQUAL / + /// / AMPERSANDEQUAL / CARETEQUAL / PIPEEQUAL) AssignmentExpr + fn assignmentExpr(parser: *Parser) !?*Expr { + @panic("TODO"); + } + + /// ConstExpr <- ConditionalExpr + fn constExpr(parser: *Parser) Error!?*Expr { + const start = parser.it.index; + const expression = try parser.conditionalExpr(); + if (expression != null and expression.?.value == .None) + return parser.err(.{ + .ConsExpr = start, + }); + return expression; + } + + /// ConditionalExpr <- LogicalOrExpr (QUESTIONMARK Expr COLON ConditionalExpr)? + fn conditionalExpr(parser: *Parser) Error!?*Expr { + @panic("TODO"); + } + + /// LogicalOrExpr <- LogicalAndExpr (PIPEPIPE LogicalOrExpr)* + fn logicalOrExpr(parser: *Parser) !*Node { + const lhs = (try parser.logicalAndExpr()) orelse return null; + } + + /// LogicalAndExpr <- BinOrExpr (AMPERSANDAMPERSAND LogicalAndExpr)* + fn logicalAndExpr(parser: *Parser) !*Node { + @panic("TODO"); + } + + /// BinOrExpr <- BinXorExpr (PIPE BinOrExpr)* + fn binOrExpr(parser: *Parser) !*Node { + @panic("TODO"); + } + + /// BinXorExpr <- BinAndExpr (CARET BinXorExpr)* + fn binXorExpr(parser: *Parser) !*Node { + @panic("TODO"); + } + + /// BinAndExpr <- EqualityExpr (AMPERSAND BinAndExpr)* + fn binAndExpr(parser: *Parser) !*Node { + @panic("TODO"); + } + + /// EqualityExpr <- ComparisionExpr ((EQUALEQUAL / BANGEQUAL) EqualityExpr)* + fn equalityExpr(parser: *Parser) !*Node { + @panic("TODO"); + } + + /// ComparisionExpr <- ShiftExpr (ANGLEBRACKETLEFT / ANGLEBRACKETLEFTEQUAL /ANGLEBRACKETRIGHT / ANGLEBRACKETRIGHTEQUAL) ComparisionExpr)* + fn comparisionExpr(parser: *Parser) !*Node { + @panic("TODO"); + } + + /// ShiftExpr <- AdditiveExpr (ANGLEBRACKETANGLEBRACKETLEFT / ANGLEBRACKETANGLEBRACKETRIGHT) ShiftExpr)* + fn shiftExpr(parser: *Parser) !*Node { + @panic("TODO"); + } + + /// AdditiveExpr <- MultiplicativeExpr (PLUS / MINUS) AdditiveExpr)* + fn additiveExpr(parser: *Parser) !*Node { + @panic("TODO"); + } + + /// MultiplicativeExpr <- UnaryExpr (ASTERISK / SLASH / PERCENT) MultiplicativeExpr)* + fn multiplicativeExpr(parser: *Parser) !*Node { + @panic("TODO"); + } + + /// UnaryExpr + /// <- LPAREN TypeName RPAREN UnaryExpr + /// / Keyword_sizeof LAPERN TypeName RPAREN + /// / Keyword_sizeof UnaryExpr + /// / Keyword_alignof LAPERN TypeName RPAREN + /// / (AMPERSAND / ASTERISK / PLUS / PLUSPLUS / MINUS / MINUSMINUS / TILDE / BANG) UnaryExpr + /// / PrimaryExpr PostFixExpr* + fn unaryExpr(parser: *Parser) !*Node { + @panic("TODO"); + } + + /// PrimaryExpr + /// <- IDENTIFIER + /// / INTEGERLITERAL / FLOATLITERAL / STRINGLITERAL / CHARLITERAL + /// / LPAREN Expr RPAREN + /// / Keyword_generic LPAREN AssignmentExpr (COMMA Generic)+ RPAREN + fn primaryExpr(parser: *Parser) !*Node { + @panic("TODO"); + } + + /// Generic + /// <- TypeName COLON AssignmentExpr + /// / Keyword_default COLON AssignmentExpr + fn generic(parser: *Parser) !*Node { + @panic("TODO"); + } + + /// PostFixExpr + /// <- LPAREN TypeName RPAREN LBRACE Initializers RBRACE + /// / LBRACKET Expr RBRACKET + /// / LPAREN (AssignmentExpr (COMMA AssignmentExpr)*)? RPAREN + /// / (PERIOD / ARROW) IDENTIFIER + /// / (PLUSPLUS / MINUSMINUS) + fn postFixExpr(parser: *Parser) !*Node { + @panic("TODO"); + } + + /// Initializers <- ((Designator+ EQUAL)? Initializer COMMA)* (Designator+ EQUAL)? Initializer COMMA? + fn initializers(parser: *Parser) !*Node { + @panic("TODO"); + } + + /// Initializer + /// <- LBRACE Initializers RBRACE + /// / AssignmentExpr + fn initializer(parser: *Parser, dr: *Node.Declarator) Error!?*Node { + @panic("TODO"); + } + + /// Designator + /// <- LBRACKET ConstExpr RBRACKET + /// / PERIOD IDENTIFIER + fn designator(parser: *Parser) !*Node { + @panic("TODO"); + } + + /// CompoundStmt <- LBRACE (Declaration / Stmt)* RBRACE + fn compoundStmt(parser: *Parser) Error!?*Node { + const lbrace = parser.eatToken(.LBrace) orelse return null; + try parser.pushScope(.Block); + defer parser.popScope(); + const body_node = try parser.arena.create(Node.CompoundStmt); + body_node.* = .{ + .lbrace = lbrace, + .statements = Node.CompoundStmt.StmtList.init(parser.arena), + .rbrace = undefined, + }; + while (true) { + if (parser.eatToken(.RBRACE)) |rbrace| { + body_node.rbrace = rbrace; + break; + } + try body_node.statements.push((try parser.declaration()) orelse (try parser.stmt())); + } + return &body_node.base; + } + + /// Stmt + /// <- CompoundStmt + /// / Keyword_if LPAREN Expr RPAREN Stmt (Keyword_ELSE Stmt)? + /// / Keyword_switch LPAREN Expr RPAREN Stmt + /// / Keyword_while LPAREN Expr RPAREN Stmt + /// / Keyword_do statement Keyword_while LPAREN Expr RPAREN SEMICOLON + /// / Keyword_for LPAREN (Declaration / ExprStmt) ExprStmt Expr? RPAREN Stmt + /// / Keyword_default COLON Stmt + /// / Keyword_case ConstExpr COLON Stmt + /// / Keyword_goto IDENTIFIER SEMICOLON + /// / Keyword_continue SEMICOLON + /// / Keyword_break SEMICOLON + /// / Keyword_return Expr? SEMICOLON + /// / IDENTIFIER COLON Stmt + /// / ExprStmt + fn stmt(parser: *Parser) Error!*Node { + if (try parser.compoundStmt()) |node| return node; + if (parser.eatToken(.Keyword_if)) |tok| { + const node = try parser.arena.create(Node.IfStmt); + _ = try parser.expectToken(.LParen); + node.* = .{ + .@"if" = tok, + .cond = (try parser.expr()) orelse return parser.err(.{ + .ExpectedExpr = .{ .token = parser.it.index }, + }), + .body = undefined, + .@"else" = null, + }; + _ = try parser.expectToken(.RParen); + node.body = try parser.stmt(); + if (parser.eatToken(.Keyword_else)) |else_tok| { + node.@"else" = .{ + .tok = else_tok, + .body = try parser.stmt(), + }; + } + return &node.base; + } + if (parser.eatToken(.Keyword_while)) |tok| { + try parser.pushScope(.Loop); + defer parser.popScope(); + _ = try parser.expectToken(.LParen); + const cond = (try parser.expr()) orelse return parser.err(.{ + .ExpectedExpr = .{ .token = parser.it.index }, + }); + const rparen = try parser.expectToken(.RParen); + const node = try parser.arena.create(Node.WhileStmt); + node.* = .{ + .@"while" = tok, + .cond = cond, + .rparen = rparen, + .body = try parser.stmt(), + .semicolon = try parser.expectToken(.Semicolon), + }; + return &node.base; + } + if (parser.eatToken(.Keyword_do)) |tok| { + try parser.pushScope(.Loop); + defer parser.popScope(); + const body = try parser.stmt(); + _ = try parser.expectToken(.LParen); + const cond = (try parser.expr()) orelse return parser.err(.{ + .ExpectedExpr = .{ .token = parser.it.index }, + }); + _ = try parser.expectToken(.RParen); + const node = try parser.arena.create(Node.DoStmt); + node.* = .{ + .do = tok, + .body = body, + .cond = cond, + .@"while" = @"while", + .semicolon = try parser.expectToken(.Semicolon), + }; + return &node.base; + } + if (parser.eatToken(.Keyword_for)) |tok| { + try parser.pushScope(.Loop); + defer parser.popScope(); + _ = try parser.expectToken(.LParen); + const init = if (try parser.declaration()) |decl| blk: { + // TODO disallow storage class other than auto and register + break :blk decl; + } else try parser.exprStmt(); + const cond = try parser.expr(); + const semicolon = try parser.expectToken(.Semicolon); + const incr = try parser.expr(); + const rparen = try parser.expectToken(.RParen); + const node = try parser.arena.create(Node.ForStmt); + node.* = .{ + .@"for" = tok, + .init = init, + .cond = cond, + .semicolon = semicolon, + .incr = incr, + .rparen = rparen, + .body = try parser.stmt(), + }; + return &node.base; + } + if (parser.eatToken(.Keyword_switch)) |tok| { + try parser.pushScope(.Switch); + defer parser.popScope(); + _ = try parser.expectToken(.LParen); + const switch_expr = try parser.exprStmt(); + const rparen = try parser.expectToken(.RParen); + const node = try parser.arena.create(Node.SwitchStmt); + node.* = .{ + .@"switch" = tok, + .expr = switch_expr, + .rparen = rparen, + .body = try parser.stmt(), + }; + return &node.base; + } + if (parser.eatToken(.Keyword_default)) |tok| { + _ = try parser.expectToken(.Colon); + const node = try parser.arena.create(Node.LabeledStmt); + node.* = .{ + .kind = .{ .Default = tok }, + .stmt = try parser.stmt(), + }; + return &node.base; + } + if (parser.eatToken(.Keyword_case)) |tok| { + _ = try parser.expectToken(.Colon); + const node = try parser.arena.create(Node.LabeledStmt); + node.* = .{ + .kind = .{ .Case = tok }, + .stmt = try parser.stmt(), + }; + return &node.base; + } + if (parser.eatToken(.Keyword_goto)) |tok| { + const node = try parser.arena.create(Node.JumpStmt); + node.* = .{ + .ltoken = tok, + .kind = .{ .Goto = tok }, + .semicolon = try parser.expectToken(.Semicolon), + }; + return &node.base; + } + if (parser.eatToken(.Keyword_continue)) |tok| { + const node = try parser.arena.create(Node.JumpStmt); + node.* = .{ + .ltoken = tok, + .kind = .Continue, + .semicolon = try parser.expectToken(.Semicolon), + }; + return &node.base; + } + if (parser.eatToken(.Keyword_break)) |tok| { + const node = try parser.arena.create(Node.JumpStmt); + node.* = .{ + .ltoken = tok, + .kind = .Break, + .semicolon = try parser.expectToken(.Semicolon), + }; + return &node.base; + } + if (parser.eatToken(.Keyword_return)) |tok| { + const node = try parser.arena.create(Node.JumpStmt); + node.* = .{ + .ltoken = tok, + .kind = .{ .Return = try parser.expr() }, + .semicolon = try parser.expectToken(.Semicolon), + }; + return &node.base; + } + if (parser.eatToken(.Identifier)) |tok| { + if (parser.eatToken(.Colon)) |_| { + const node = try parser.arena.create(Node.LabeledStmt); + node.* = .{ + .kind = .{ .Label = tok }, + .stmt = try parser.stmt(), + }; + return &node.base; + } + parser.putBackToken(tok); + } + return parser.exprStmt(); + } + + /// ExprStmt <- Expr? SEMICOLON + fn exprStmt(parser: *Parser) !*Node { + const node = try parser.arena.create(Node.ExprStmt); + node.* = .{ + .expr = try parser.expr(), + .semicolon = try parser.expectToken(.Semicolon), + }; + return &node.base; + } + + fn eatToken(parser: *Parser, id: @TagType(Token.Id)) ?TokenIndex { + while (true) { + switch ((parser.it.next() orelse return null).id) { + .LineComment, .MultiLineComment, .Nl => continue, + else => |next_id| if (next_id == id) { + return parser.it.index; + } else { + _ = parser.it.prev(); + return null; + }, + } + } + } + + fn expectToken(parser: *Parser, id: @TagType(Token.Id)) Error!TokenIndex { + while (true) { + switch ((parser.it.next() orelse return error.ParseError).id) { + .LineComment, .MultiLineComment, .Nl => continue, + else => |next_id| if (next_id != id) { + return parser.err(.{ + .ExpectedToken = .{ .token = parser.it.index, .expected_id = id }, + }); + } else { + return parser.it.index; + }, + } + } + } + + fn putBackToken(parser: *Parser, putting_back: TokenIndex) void { + while (true) { + const prev_tok = parser.it.next() orelse return; + switch (prev_tok.id) { + .LineComment, .MultiLineComment, .Nl => continue, + else => { + assert(parser.it.list.at(putting_back) == prev_tok); + return; + }, + } + } + } + + fn err(parser: *Parser, msg: ast.Error) Error { + try parser.tree.msgs.push(.{ + .kind = .Error, + .inner = msg, + }); + return error.ParseError; + } + + fn warn(parser: *Parser, msg: ast.Error) Error!void { + const is_warning = switch (parser.options.warn_as_err) { + .None => true, + .Some => |list| for (list) |item| (if (item == msg) break false) else true, + .All => false, + }; + try parser.tree.msgs.push(.{ + .kind = if (is_warning) .Warning else .Error, + .inner = msg, + }); + if (!is_warning) return error.ParseError; + } + + fn note(parser: *Parser, msg: ast.Error) Error!void { + try parser.tree.msgs.push(.{ + .kind = .Note, + .inner = msg, + }); + } +}; + diff --git a/lib/std/c/tokenizer.zig b/lib/std/c/tokenizer.zig new file mode 100644 index 0000000000..a641529502 --- /dev/null +++ b/lib/std/c/tokenizer.zig @@ -0,0 +1,1583 @@ +const std = @import("std"); +const mem = std.mem; + +pub const Source = struct { + buffer: []const u8, + file_name: []const u8, + tokens: TokenList, + + pub const TokenList = std.SegmentedList(Token, 64); +}; + +pub const Token = struct { + id: Id, + start: usize, + end: usize, + source: *Source, + + pub const Id = union(enum) { + Invalid, + Eof, + Nl, + Identifier, + + /// special case for #include <...> + MacroString, + StringLiteral: StrKind, + CharLiteral: StrKind, + IntegerLiteral: NumSuffix, + FloatLiteral: NumSuffix, + Bang, + BangEqual, + Pipe, + PipePipe, + PipeEqual, + Equal, + EqualEqual, + LParen, + RParen, + LBrace, + RBrace, + LBracket, + RBracket, + Period, + Ellipsis, + Caret, + CaretEqual, + Plus, + PlusPlus, + PlusEqual, + Minus, + MinusMinus, + MinusEqual, + Asterisk, + AsteriskEqual, + Percent, + PercentEqual, + Arrow, + Colon, + Semicolon, + Slash, + SlashEqual, + Comma, + Ampersand, + AmpersandAmpersand, + AmpersandEqual, + QuestionMark, + AngleBracketLeft, + AngleBracketLeftEqual, + AngleBracketAngleBracketLeft, + AngleBracketAngleBracketLeftEqual, + AngleBracketRight, + AngleBracketRightEqual, + AngleBracketAngleBracketRight, + AngleBracketAngleBracketRightEqual, + Tilde, + LineComment, + MultiLineComment, + Hash, + HashHash, + + Keyword_auto, + Keyword_break, + Keyword_case, + Keyword_char, + Keyword_const, + Keyword_continue, + Keyword_default, + Keyword_do, + Keyword_double, + Keyword_else, + Keyword_enum, + Keyword_extern, + Keyword_float, + Keyword_for, + Keyword_goto, + Keyword_if, + Keyword_int, + Keyword_long, + Keyword_register, + Keyword_return, + Keyword_short, + Keyword_signed, + Keyword_sizeof, + Keyword_static, + Keyword_struct, + Keyword_switch, + Keyword_typedef, + Keyword_union, + Keyword_unsigned, + Keyword_void, + Keyword_volatile, + Keyword_while, + + // ISO C99 + Keyword_bool, + Keyword_complex, + Keyword_imaginary, + Keyword_inline, + Keyword_restrict, + + // ISO C11 + Keyword_alignas, + Keyword_alignof, + Keyword_atomic, + Keyword_generic, + Keyword_noreturn, + Keyword_static_assert, + Keyword_thread_local, + + // Preprocessor directives + Keyword_include, + Keyword_define, + Keyword_ifdef, + Keyword_ifndef, + Keyword_error, + Keyword_pragma, + + pub fn symbol(id: @TagType(Id)) []const u8 { + return switch (id) { + .Invalid => "Invalid", + .Eof => "Eof", + .Nl => "NewLine", + .Identifier => "Identifier", + .MacroString => "MacroString", + .StringLiteral => "StringLiteral", + .CharLiteral => "CharLiteral", + .IntegerLiteral => "IntegerLiteral", + .FloatLiteral => "FloatLiteral", + .LineComment => "LineComment", + .MultiLineComment => "MultiLineComment", + + .Bang => "!", + .BangEqual => "!=", + .Pipe => "|", + .PipePipe => "||", + .PipeEqual => "|=", + .Equal => "=", + .EqualEqual => "==", + .LParen => "(", + .RParen => ")", + .LBrace => "{", + .RBrace => "}", + .LBracket => "[", + .RBracket => "]", + .Period => ".", + .Ellipsis => "...", + .Caret => "^", + .CaretEqual => "^=", + .Plus => "+", + .PlusPlus => "++", + .PlusEqual => "+=", + .Minus => "-", + .MinusMinus => "--", + .MinusEqual => "-=", + .Asterisk => "*", + .AsteriskEqual => "*=", + .Percent => "%", + .PercentEqual => "%=", + .Arrow => "->", + .Colon => ":", + .Semicolon => ";", + .Slash => "/", + .SlashEqual => "/=", + .Comma => ",", + .Ampersand => "&", + .AmpersandAmpersand => "&&", + .AmpersandEqual => "&=", + .QuestionMark => "?", + .AngleBracketLeft => "<", + .AngleBracketLeftEqual => "<=", + .AngleBracketAngleBracketLeft => "<<", + .AngleBracketAngleBracketLeftEqual => "<<=", + .AngleBracketRight => ">", + .AngleBracketRightEqual => ">=", + .AngleBracketAngleBracketRight => ">>", + .AngleBracketAngleBracketRightEqual => ">>=", + .Tilde => "~", + .Hash => "#", + .HashHash => "##", + .Keyword_auto => "auto", + .Keyword_break => "break", + .Keyword_case => "case", + .Keyword_char => "char", + .Keyword_const => "const", + .Keyword_continue => "continue", + .Keyword_default => "default", + .Keyword_do => "do", + .Keyword_double => "double", + .Keyword_else => "else", + .Keyword_enum => "enum", + .Keyword_extern => "extern", + .Keyword_float => "float", + .Keyword_for => "for", + .Keyword_goto => "goto", + .Keyword_if => "if", + .Keyword_int => "int", + .Keyword_long => "long", + .Keyword_register => "register", + .Keyword_return => "return", + .Keyword_short => "short", + .Keyword_signed => "signed", + .Keyword_sizeof => "sizeof", + .Keyword_static => "static", + .Keyword_struct => "struct", + .Keyword_switch => "switch", + .Keyword_typedef => "typedef", + .Keyword_union => "union", + .Keyword_unsigned => "unsigned", + .Keyword_void => "void", + .Keyword_volatile => "volatile", + .Keyword_while => "while", + .Keyword_bool => "_Bool", + .Keyword_complex => "_Complex", + .Keyword_imaginary => "_Imaginary", + .Keyword_inline => "inline", + .Keyword_restrict => "restrict", + .Keyword_alignas => "_Alignas", + .Keyword_alignof => "_Alignof", + .Keyword_atomic => "_Atomic", + .Keyword_generic => "_Generic", + .Keyword_noreturn => "_Noreturn", + .Keyword_static_assert => "_Static_assert", + .Keyword_thread_local => "_Thread_local", + .Keyword_include => "include", + .Keyword_define => "define", + .Keyword_ifdef => "ifdef", + .Keyword_ifndef => "ifndef", + .Keyword_error => "error", + .Keyword_pragma => "pragma", + }; + } + }; + + pub fn eql(a: Token, b: Token) bool { + // do we really need this cast here + if (@as(@TagType(Id), a.id) != b.id) return false; + return mem.eql(u8, a.slice(), b.slice()); + } + + pub fn slice(tok: Token) []const u8 { + return tok.source.buffer[tok.start..tok.end]; + } + + pub const Keyword = struct { + bytes: []const u8, + id: Id, + hash: u32, + + fn init(bytes: []const u8, id: Id) Keyword { + @setEvalBranchQuota(2000); + return .{ + .bytes = bytes, + .id = id, + .hash = std.hash_map.hashString(bytes), + }; + } + }; + + // TODO extensions + pub const keywords = [_]Keyword{ + Keyword.init("auto", .Keyword_auto), + Keyword.init("break", .Keyword_break), + Keyword.init("case", .Keyword_case), + Keyword.init("char", .Keyword_char), + Keyword.init("const", .Keyword_const), + Keyword.init("continue", .Keyword_continue), + Keyword.init("default", .Keyword_default), + Keyword.init("do", .Keyword_do), + Keyword.init("double", .Keyword_double), + Keyword.init("else", .Keyword_else), + Keyword.init("enum", .Keyword_enum), + Keyword.init("extern", .Keyword_extern), + Keyword.init("float", .Keyword_float), + Keyword.init("for", .Keyword_for), + Keyword.init("goto", .Keyword_goto), + Keyword.init("if", .Keyword_if), + Keyword.init("int", .Keyword_int), + Keyword.init("long", .Keyword_long), + Keyword.init("register", .Keyword_register), + Keyword.init("return", .Keyword_return), + Keyword.init("short", .Keyword_short), + Keyword.init("signed", .Keyword_signed), + Keyword.init("sizeof", .Keyword_sizeof), + Keyword.init("static", .Keyword_static), + Keyword.init("struct", .Keyword_struct), + Keyword.init("switch", .Keyword_switch), + Keyword.init("typedef", .Keyword_typedef), + Keyword.init("union", .Keyword_union), + Keyword.init("unsigned", .Keyword_unsigned), + Keyword.init("void", .Keyword_void), + Keyword.init("volatile", .Keyword_volatile), + Keyword.init("while", .Keyword_while), + + // ISO C99 + Keyword.init("_Bool", .Keyword_bool), + Keyword.init("_Complex", .Keyword_complex), + Keyword.init("_Imaginary", .Keyword_imaginary), + Keyword.init("inline", .Keyword_inline), + Keyword.init("restrict", .Keyword_restrict), + + // ISO C11 + Keyword.init("_Alignas", .Keyword_alignas), + Keyword.init("_Alignof", .Keyword_alignof), + Keyword.init("_Atomic", .Keyword_atomic), + Keyword.init("_Generic", .Keyword_generic), + Keyword.init("_Noreturn", .Keyword_noreturn), + Keyword.init("_Static_assert", .Keyword_static_assert), + Keyword.init("_Thread_local", .Keyword_thread_local), + + // Preprocessor directives + Keyword.init("include", .Keyword_include), + Keyword.init("define", .Keyword_define), + Keyword.init("ifdef", .Keyword_ifdef), + Keyword.init("ifndef", .Keyword_ifndef), + Keyword.init("error", .Keyword_error), + Keyword.init("pragma", .Keyword_pragma), + }; + + // TODO perfect hash at comptime + // TODO do this in the preprocessor + pub fn getKeyword(bytes: []const u8, pp_directive: bool) ?Id { + var hash = std.hash_map.hashString(bytes); + for (keywords) |kw| { + if (kw.hash == hash and mem.eql(u8, kw.bytes, bytes)) { + switch (kw.id) { + .Keyword_include, + .Keyword_define, + .Keyword_ifdef, + .Keyword_ifndef, + .Keyword_error, + .Keyword_pragma, + => if (!pp_directive) return null, + else => {}, + } + return kw.id; + } + } + return null; + } + + pub const NumSuffix = enum { + None, + F, + L, + U, + LU, + LL, + LLU, + }; + + pub const StrKind = enum { + None, + Wide, + Utf8, + Utf16, + Utf32, + }; +}; + +pub const Tokenizer = struct { + source: *Source, + index: usize = 0, + prev_tok_id: @TagType(Token.Id) = .Invalid, + pp_directive: bool = false, + + pub fn next(self: *Tokenizer) Token { + const start_index = self.index; + var result = Token{ + .id = .Eof, + .start = self.index, + .end = undefined, + .source = self.source, + }; + var state: enum { + Start, + Cr, + BackSlash, + BackSlashCr, + u, + u8, + U, + L, + StringLiteral, + CharLiteralStart, + CharLiteral, + EscapeSequence, + CrEscape, + OctalEscape, + HexEscape, + UnicodeEscape, + Identifier, + Equal, + Bang, + Pipe, + Percent, + Asterisk, + Plus, + + /// special case for #include <...> + MacroString, + AngleBracketLeft, + AngleBracketAngleBracketLeft, + AngleBracketRight, + AngleBracketAngleBracketRight, + Caret, + Period, + Period2, + Minus, + Slash, + Ampersand, + Hash, + LineComment, + MultiLineComment, + MultiLineCommentAsterisk, + Zero, + IntegerLiteralOct, + IntegerLiteralBinary, + IntegerLiteralHex, + IntegerLiteral, + IntegerSuffix, + IntegerSuffixU, + IntegerSuffixL, + IntegerSuffixLL, + IntegerSuffixUL, + FloatFraction, + FloatFractionHex, + FloatExponent, + FloatExponentDigits, + FloatSuffix, + } = .Start; + var string = false; + var counter: u32 = 0; + while (self.index < self.source.buffer.len) : (self.index += 1) { + const c = self.source.buffer[self.index]; + switch (state) { + .Start => switch (c) { + '\n' => { + self.pp_directive = false; + result.id = .Nl; + self.index += 1; + break; + }, + '\r' => { + state = .Cr; + }, + '"' => { + result.id = .{ .StringLiteral = .None }; + state = .StringLiteral; + }, + '\'' => { + result.id = .{ .CharLiteral = .None }; + state = .CharLiteralStart; + }, + 'u' => { + state = .u; + }, + 'U' => { + state = .U; + }, + 'L' => { + state = .L; + }, + 'a'...'t', 'v'...'z', 'A'...'K', 'M'...'T', 'V'...'Z', '_' => { + state = .Identifier; + }, + '=' => { + state = .Equal; + }, + '!' => { + state = .Bang; + }, + '|' => { + state = .Pipe; + }, + '(' => { + result.id = .LParen; + self.index += 1; + break; + }, + ')' => { + result.id = .RParen; + self.index += 1; + break; + }, + '[' => { + result.id = .LBracket; + self.index += 1; + break; + }, + ']' => { + result.id = .RBracket; + self.index += 1; + break; + }, + ';' => { + result.id = .Semicolon; + self.index += 1; + break; + }, + ',' => { + result.id = .Comma; + self.index += 1; + break; + }, + '?' => { + result.id = .QuestionMark; + self.index += 1; + break; + }, + ':' => { + result.id = .Colon; + self.index += 1; + break; + }, + '%' => { + state = .Percent; + }, + '*' => { + state = .Asterisk; + }, + '+' => { + state = .Plus; + }, + '<' => { + if (self.prev_tok_id == .Keyword_include) + state = .MacroString + else + state = .AngleBracketLeft; + }, + '>' => { + state = .AngleBracketRight; + }, + '^' => { + state = .Caret; + }, + '{' => { + result.id = .LBrace; + self.index += 1; + break; + }, + '}' => { + result.id = .RBrace; + self.index += 1; + break; + }, + '~' => { + result.id = .Tilde; + self.index += 1; + break; + }, + '.' => { + state = .Period; + }, + '-' => { + state = .Minus; + }, + '/' => { + state = .Slash; + }, + '&' => { + state = .Ampersand; + }, + '#' => { + state = .Hash; + }, + '0' => { + state = .Zero; + }, + '1'...'9' => { + state = .IntegerLiteral; + }, + '\\' => { + state = .BackSlash; + }, + '\t', '\x0B', '\x0C', ' ' => { + result.start = self.index + 1; + }, + else => { + // TODO handle invalid bytes better + result.id = .Invalid; + self.index += 1; + break; + }, + }, + .Cr => switch (c) { + '\n' => { + self.pp_directive = false; + result.id = .Nl; + self.index += 1; + break; + }, + else => { + result.id = .Invalid; + break; + }, + }, + .BackSlash => switch (c) { + '\n' => { + state = .Start; + }, + '\r' => { + state = .BackSlashCr; + }, + '\t', '\x0B', '\x0C', ' ' => { + // TODO warn + }, + else => { + result.id = .Invalid; + break; + }, + }, + .BackSlashCr => switch (c) { + '\n' => { + state = .Start; + }, + else => { + result.id = .Invalid; + break; + }, + }, + .u => switch (c) { + '8' => { + state = .u8; + }, + '\'' => { + result.id = .{ .CharLiteral = .Utf16 }; + state = .CharLiteralStart; + }, + '\"' => { + result.id = .{ .StringLiteral = .Utf16 }; + state = .StringLiteral; + }, + else => { + state = .Identifier; + }, + }, + .u8 => switch (c) { + '\"' => { + result.id = .{ .StringLiteral = .Utf8 }; + state = .StringLiteral; + }, + else => { + state = .Identifier; + }, + }, + .U => switch (c) { + '\'' => { + result.id = .{ .CharLiteral = .Utf32 }; + state = .CharLiteralStart; + }, + '\"' => { + result.id = .{ .StringLiteral = .Utf32 }; + state = .StringLiteral; + }, + else => { + state = .Identifier; + }, + }, + .L => switch (c) { + '\'' => { + result.id = .{ .CharLiteral = .Wide }; + state = .CharLiteralStart; + }, + '\"' => { + result.id = .{ .StringLiteral = .Wide }; + state = .StringLiteral; + }, + else => { + state = .Identifier; + }, + }, + .StringLiteral => switch (c) { + '\\' => { + string = true; + state = .EscapeSequence; + }, + '"' => { + self.index += 1; + break; + }, + '\n', '\r' => { + result.id = .Invalid; + break; + }, + else => {}, + }, + .CharLiteralStart => switch (c) { + '\\' => { + string = false; + state = .EscapeSequence; + }, + '\'', '\n' => { + result.id = .Invalid; + break; + }, + else => { + state = .CharLiteral; + }, + }, + .CharLiteral => switch (c) { + '\\' => { + string = false; + state = .EscapeSequence; + }, + '\'' => { + self.index += 1; + break; + }, + '\n' => { + result.id = .Invalid; + break; + }, + else => {}, + }, + .EscapeSequence => switch (c) { + '\'', '"', '?', '\\', 'a', 'b', 'f', 'n', 'r', 't', 'v', '\n' => { + state = if (string) .StringLiteral else .CharLiteral; + }, + '\r' => { + state = .CrEscape; + }, + '0'...'7' => { + counter = 1; + state = .OctalEscape; + }, + 'x' => { + state = .HexEscape; + }, + 'u' => { + counter = 4; + state = .OctalEscape; + }, + 'U' => { + counter = 8; + state = .OctalEscape; + }, + else => { + result.id = .Invalid; + break; + }, + }, + .CrEscape => switch (c) { + '\n' => { + state = if (string) .StringLiteral else .CharLiteral; + }, + else => { + result.id = .Invalid; + break; + }, + }, + .OctalEscape => switch (c) { + '0'...'7' => { + counter += 1; + if (counter == 3) { + state = if (string) .StringLiteral else .CharLiteral; + } + }, + else => { + state = if (string) .StringLiteral else .CharLiteral; + }, + }, + .HexEscape => switch (c) { + '0'...'9', 'a'...'f', 'A'...'F' => {}, + else => { + state = if (string) .StringLiteral else .CharLiteral; + }, + }, + .UnicodeEscape => switch (c) { + '0'...'9', 'a'...'f', 'A'...'F' => { + counter -= 1; + if (counter == 0) { + state = if (string) .StringLiteral else .CharLiteral; + } + }, + else => { + if (counter != 0) { + result.id = .Invalid; + break; + } + state = if (string) .StringLiteral else .CharLiteral; + }, + }, + .Identifier => switch (c) { + 'a'...'z', 'A'...'Z', '_', '0'...'9' => {}, + else => { + result.id = Token.getKeyword(self.source.buffer[result.start..self.index], self.prev_tok_id == .Hash and !self.pp_directive) orelse .Identifier; + if (self.prev_tok_id == .Hash) + self.pp_directive = true; + break; + }, + }, + .Equal => switch (c) { + '=' => { + result.id = .EqualEqual; + self.index += 1; + break; + }, + else => { + result.id = .Equal; + break; + }, + }, + .Bang => switch (c) { + '=' => { + result.id = .BangEqual; + self.index += 1; + break; + }, + else => { + result.id = .Bang; + break; + }, + }, + .Pipe => switch (c) { + '=' => { + result.id = .PipeEqual; + self.index += 1; + break; + }, + '|' => { + result.id = .PipePipe; + self.index += 1; + break; + }, + else => { + result.id = .Pipe; + break; + }, + }, + .Percent => switch (c) { + '=' => { + result.id = .PercentEqual; + self.index += 1; + break; + }, + else => { + result.id = .Percent; + break; + }, + }, + .Asterisk => switch (c) { + '=' => { + result.id = .AsteriskEqual; + self.index += 1; + break; + }, + else => { + result.id = .Asterisk; + break; + }, + }, + .Plus => switch (c) { + '=' => { + result.id = .PlusEqual; + self.index += 1; + break; + }, + '+' => { + result.id = .PlusPlus; + self.index += 1; + break; + }, + else => { + result.id = .Plus; + break; + }, + }, + .MacroString => switch (c) { + '>' => { + result.id = .MacroString; + self.index += 1; + break; + }, + else => {}, + }, + .AngleBracketLeft => switch (c) { + '<' => { + state = .AngleBracketAngleBracketLeft; + }, + '=' => { + result.id = .AngleBracketLeftEqual; + self.index += 1; + break; + }, + else => { + result.id = .AngleBracketLeft; + break; + }, + }, + .AngleBracketAngleBracketLeft => switch (c) { + '=' => { + result.id = .AngleBracketAngleBracketLeftEqual; + self.index += 1; + break; + }, + else => { + result.id = .AngleBracketAngleBracketLeft; + break; + }, + }, + .AngleBracketRight => switch (c) { + '>' => { + state = .AngleBracketAngleBracketRight; + }, + '=' => { + result.id = .AngleBracketRightEqual; + self.index += 1; + break; + }, + else => { + result.id = .AngleBracketRight; + break; + }, + }, + .AngleBracketAngleBracketRight => switch (c) { + '=' => { + result.id = .AngleBracketAngleBracketRightEqual; + self.index += 1; + break; + }, + else => { + result.id = .AngleBracketAngleBracketRight; + break; + }, + }, + .Caret => switch (c) { + '=' => { + result.id = .CaretEqual; + self.index += 1; + break; + }, + else => { + result.id = .Caret; + break; + }, + }, + .Period => switch (c) { + '.' => { + state = .Period2; + }, + '0'...'9' => { + state = .FloatFraction; + }, + else => { + result.id = .Period; + break; + }, + }, + .Period2 => switch (c) { + '.' => { + result.id = .Ellipsis; + self.index += 1; + break; + }, + else => { + result.id = .Period; + self.index -= 1; + break; + }, + }, + .Minus => switch (c) { + '>' => { + result.id = .Arrow; + self.index += 1; + break; + }, + '=' => { + result.id = .MinusEqual; + self.index += 1; + break; + }, + '-' => { + result.id = .MinusMinus; + self.index += 1; + break; + }, + else => { + result.id = .Minus; + break; + }, + }, + .Slash => switch (c) { + '/' => { + state = .LineComment; + }, + '*' => { + state = .MultiLineComment; + }, + '=' => { + result.id = .SlashEqual; + self.index += 1; + break; + }, + else => { + result.id = .Slash; + break; + }, + }, + .Ampersand => switch (c) { + '&' => { + result.id = .AmpersandAmpersand; + self.index += 1; + break; + }, + '=' => { + result.id = .AmpersandEqual; + self.index += 1; + break; + }, + else => { + result.id = .Ampersand; + break; + }, + }, + .Hash => switch (c) { + '#' => { + result.id = .HashHash; + self.index += 1; + break; + }, + else => { + result.id = .Hash; + break; + }, + }, + .LineComment => switch (c) { + '\n' => { + result.id = .LineComment; + self.index += 1; + break; + }, + else => {}, + }, + .MultiLineComment => switch (c) { + '*' => { + state = .MultiLineCommentAsterisk; + }, + else => {}, + }, + .MultiLineCommentAsterisk => switch (c) { + '/' => { + result.id = .MultiLineComment; + self.index += 1; + break; + }, + else => { + state = .MultiLineComment; + }, + }, + .Zero => switch (c) { + '0'...'9' => { + state = .IntegerLiteralOct; + }, + 'b', 'B' => { + state = .IntegerLiteralBinary; + }, + 'x', 'X' => { + state = .IntegerLiteralHex; + }, + else => { + state = .IntegerSuffix; + self.index -= 1; + }, + }, + .IntegerLiteralOct => switch (c) { + '0'...'7' => {}, + else => { + state = .IntegerSuffix; + self.index -= 1; + }, + }, + .IntegerLiteralBinary => switch (c) { + '0', '1' => {}, + else => { + state = .IntegerSuffix; + self.index -= 1; + }, + }, + .IntegerLiteralHex => switch (c) { + '0'...'9', 'a'...'f', 'A'...'F' => {}, + '.' => { + state = .FloatFractionHex; + }, + 'p', 'P' => { + state = .FloatExponent; + }, + else => { + state = .IntegerSuffix; + self.index -= 1; + }, + }, + .IntegerLiteral => switch (c) { + '0'...'9' => {}, + '.' => { + state = .FloatFraction; + }, + 'e', 'E' => { + state = .FloatExponent; + }, + else => { + state = .IntegerSuffix; + self.index -= 1; + }, + }, + .IntegerSuffix => switch (c) { + 'u', 'U' => { + state = .IntegerSuffixU; + }, + 'l', 'L' => { + state = .IntegerSuffixL; + }, + else => { + result.id = .{ .IntegerLiteral = .None }; + break; + }, + }, + .IntegerSuffixU => switch (c) { + 'l', 'L' => { + state = .IntegerSuffixUL; + }, + else => { + result.id = .{ .IntegerLiteral = .U }; + break; + }, + }, + .IntegerSuffixL => switch (c) { + 'l', 'L' => { + state = .IntegerSuffixLL; + }, + 'u', 'U' => { + result.id = .{ .IntegerLiteral = .LU }; + self.index += 1; + break; + }, + else => { + result.id = .{ .IntegerLiteral = .L }; + break; + }, + }, + .IntegerSuffixLL => switch (c) { + 'u', 'U' => { + result.id = .{ .IntegerLiteral = .LLU }; + self.index += 1; + break; + }, + else => { + result.id = .{ .IntegerLiteral = .LL }; + break; + }, + }, + .IntegerSuffixUL => switch (c) { + 'l', 'L' => { + result.id = .{ .IntegerLiteral = .LLU }; + self.index += 1; + break; + }, + else => { + result.id = .{ .IntegerLiteral = .LU }; + break; + }, + }, + .FloatFraction => switch (c) { + '0'...'9' => {}, + 'e', 'E' => { + state = .FloatExponent; + }, + else => { + self.index -= 1; + state = .FloatSuffix; + }, + }, + .FloatFractionHex => switch (c) { + '0'...'9', 'a'...'f', 'A'...'F' => {}, + 'p', 'P' => { + state = .FloatExponent; + }, + else => { + result.id = .Invalid; + break; + }, + }, + .FloatExponent => switch (c) { + '+', '-' => { + state = .FloatExponentDigits; + }, + else => { + self.index -= 1; + state = .FloatExponentDigits; + }, + }, + .FloatExponentDigits => switch (c) { + '0'...'9' => { + counter += 1; + }, + else => { + if (counter == 0) { + result.id = .Invalid; + break; + } + state = .FloatSuffix; + }, + }, + .FloatSuffix => switch (c) { + 'l', 'L' => { + result.id = .{ .FloatLiteral = .L }; + self.index += 1; + break; + }, + 'f', 'F' => { + result.id = .{ .FloatLiteral = .F }; + self.index += 1; + break; + }, + else => { + result.id = .{ .FloatLiteral = .None }; + break; + }, + }, + } + } else if (self.index == self.source.buffer.len) { + switch (state) { + .Start => {}, + .u, .u8, .U, .L, .Identifier => { + result.id = Token.getKeyword(self.source.buffer[result.start..self.index], self.prev_tok_id == .Hash and !self.pp_directive) orelse .Identifier; + }, + + .Cr, + .BackSlash, + .BackSlashCr, + .Period2, + .StringLiteral, + .CharLiteralStart, + .CharLiteral, + .EscapeSequence, + .CrEscape, + .OctalEscape, + .HexEscape, + .UnicodeEscape, + .MultiLineComment, + .MultiLineCommentAsterisk, + .FloatFraction, + .FloatFractionHex, + .FloatExponent, + .FloatExponentDigits, + .MacroString, + => result.id = .Invalid, + + .IntegerLiteralOct, + .IntegerLiteralBinary, + .IntegerLiteralHex, + .IntegerLiteral, + .IntegerSuffix, + .Zero, + => result.id = .{ .IntegerLiteral = .None }, + .IntegerSuffixU => result.id = .{ .IntegerLiteral = .U }, + .IntegerSuffixL => result.id = .{ .IntegerLiteral = .L }, + .IntegerSuffixLL => result.id = .{ .IntegerLiteral = .LL }, + .IntegerSuffixUL => result.id = .{ .IntegerLiteral = .LU }, + + .FloatSuffix => result.id = .{ .FloatLiteral = .None }, + .Equal => result.id = .Equal, + .Bang => result.id = .Bang, + .Minus => result.id = .Minus, + .Slash => result.id = .Slash, + .Ampersand => result.id = .Ampersand, + .Hash => result.id = .Hash, + .Period => result.id = .Period, + .Pipe => result.id = .Pipe, + .AngleBracketAngleBracketRight => result.id = .AngleBracketAngleBracketRight, + .AngleBracketRight => result.id = .AngleBracketRight, + .AngleBracketAngleBracketLeft => result.id = .AngleBracketAngleBracketLeft, + .AngleBracketLeft => result.id = .AngleBracketLeft, + .Plus => result.id = .Plus, + .Percent => result.id = .Percent, + .Caret => result.id = .Caret, + .Asterisk => result.id = .Asterisk, + .LineComment => result.id = .LineComment, + } + } + + self.prev_tok_id = result.id; + result.end = self.index; + return result; + } +}; + +test "operators" { + expectTokens( + \\ ! != | || |= = == + \\ ( ) { } [ ] . .. ... + \\ ^ ^= + ++ += - -- -= + \\ * *= % %= -> : ; / /= + \\ , & && &= ? < <= << + \\ <<= > >= >> >>= ~ # ## + \\ + , &[_]Token.Id{ + .Bang, + .BangEqual, + .Pipe, + .PipePipe, + .PipeEqual, + .Equal, + .EqualEqual, + .Nl, + .LParen, + .RParen, + .LBrace, + .RBrace, + .LBracket, + .RBracket, + .Period, + .Period, + .Period, + .Ellipsis, + .Nl, + .Caret, + .CaretEqual, + .Plus, + .PlusPlus, + .PlusEqual, + .Minus, + .MinusMinus, + .MinusEqual, + .Nl, + .Asterisk, + .AsteriskEqual, + .Percent, + .PercentEqual, + .Arrow, + .Colon, + .Semicolon, + .Slash, + .SlashEqual, + .Nl, + .Comma, + .Ampersand, + .AmpersandAmpersand, + .AmpersandEqual, + .QuestionMark, + .AngleBracketLeft, + .AngleBracketLeftEqual, + .AngleBracketAngleBracketLeft, + .Nl, + .AngleBracketAngleBracketLeftEqual, + .AngleBracketRight, + .AngleBracketRightEqual, + .AngleBracketAngleBracketRight, + .AngleBracketAngleBracketRightEqual, + .Tilde, + .Hash, + .HashHash, + .Nl, + }); +} + +test "keywords" { + expectTokens( + \\auto break case char const continue default do + \\double else enum extern float for goto if int + \\long register return short signed sizeof static + \\struct switch typedef union unsigned void volatile + \\while _Bool _Complex _Imaginary inline restrict _Alignas + \\_Alignof _Atomic _Generic _Noreturn _Static_assert _Thread_local + \\ + , &[_]Token.Id{ + .Keyword_auto, + .Keyword_break, + .Keyword_case, + .Keyword_char, + .Keyword_const, + .Keyword_continue, + .Keyword_default, + .Keyword_do, + .Nl, + .Keyword_double, + .Keyword_else, + .Keyword_enum, + .Keyword_extern, + .Keyword_float, + .Keyword_for, + .Keyword_goto, + .Keyword_if, + .Keyword_int, + .Nl, + .Keyword_long, + .Keyword_register, + .Keyword_return, + .Keyword_short, + .Keyword_signed, + .Keyword_sizeof, + .Keyword_static, + .Nl, + .Keyword_struct, + .Keyword_switch, + .Keyword_typedef, + .Keyword_union, + .Keyword_unsigned, + .Keyword_void, + .Keyword_volatile, + .Nl, + .Keyword_while, + .Keyword_bool, + .Keyword_complex, + .Keyword_imaginary, + .Keyword_inline, + .Keyword_restrict, + .Keyword_alignas, + .Nl, + .Keyword_alignof, + .Keyword_atomic, + .Keyword_generic, + .Keyword_noreturn, + .Keyword_static_assert, + .Keyword_thread_local, + .Nl, + }); +} + +test "preprocessor keywords" { + expectTokens( + \\#include + \\#define #include <1 + \\#ifdef + \\#ifndef + \\#error + \\#pragma + \\ + , &[_]Token.Id{ + .Hash, + .Keyword_include, + .MacroString, + .Nl, + .Hash, + .Keyword_define, + .Hash, + .Identifier, + .AngleBracketLeft, + .{ .IntegerLiteral = .None }, + .Nl, + .Hash, + .Keyword_ifdef, + .Nl, + .Hash, + .Keyword_ifndef, + .Nl, + .Hash, + .Keyword_error, + .Nl, + .Hash, + .Keyword_pragma, + .Nl, + }); +} + +test "line continuation" { + expectTokens( + \\#define foo \ + \\ bar + \\"foo\ + \\ bar" + \\#define "foo" + \\ "bar" + \\#define "foo" \ + \\ "bar" + , &[_]Token.Id{ + .Hash, + .Keyword_define, + .Identifier, + .Identifier, + .Nl, + .{ .StringLiteral = .None }, + .Nl, + .Hash, + .Keyword_define, + .{ .StringLiteral = .None }, + .Nl, + .{ .StringLiteral = .None }, + .Nl, + .Hash, + .Keyword_define, + .{ .StringLiteral = .None }, + .{ .StringLiteral = .None }, + }); +} + +test "string prefix" { + expectTokens( + \\"foo" + \\u"foo" + \\u8"foo" + \\U"foo" + \\L"foo" + \\'foo' + \\u'foo' + \\U'foo' + \\L'foo' + \\ + , &[_]Token.Id{ + .{ .StringLiteral = .None }, + .Nl, + .{ .StringLiteral = .Utf16 }, + .Nl, + .{ .StringLiteral = .Utf8 }, + .Nl, + .{ .StringLiteral = .Utf32 }, + .Nl, + .{ .StringLiteral = .Wide }, + .Nl, + .{ .CharLiteral = .None }, + .Nl, + .{ .CharLiteral = .Utf16 }, + .Nl, + .{ .CharLiteral = .Utf32 }, + .Nl, + .{ .CharLiteral = .Wide }, + .Nl, + }); +} + +test "num suffixes" { + expectTokens( + \\ 1.0f 1.0L 1.0 .0 1. + \\ 0l 0lu 0ll 0llu 0 + \\ 1u 1ul 1ull 1 + \\ + , &[_]Token.Id{ + .{ .FloatLiteral = .F }, + .{ .FloatLiteral = .L }, + .{ .FloatLiteral = .None }, + .{ .FloatLiteral = .None }, + .{ .FloatLiteral = .None }, + .Nl, + .{ .IntegerLiteral = .L }, + .{ .IntegerLiteral = .LU }, + .{ .IntegerLiteral = .LL }, + .{ .IntegerLiteral = .LLU }, + .{ .IntegerLiteral = .None }, + .Nl, + .{ .IntegerLiteral = .U }, + .{ .IntegerLiteral = .LU }, + .{ .IntegerLiteral = .LLU }, + .{ .IntegerLiteral = .None }, + .Nl, + }); +} + +fn expectTokens(source: []const u8, expected_tokens: []const Token.Id) void { + var tokenizer = Tokenizer{ + .source = &Source{ + .buffer = source, + .file_name = undefined, + .tokens = undefined, + }, + }; + for (expected_tokens) |expected_token_id| { + const token = tokenizer.next(); + if (!std.meta.eql(token.id, expected_token_id)) { + std.debug.panic("expected {}, found {}\n", .{ @tagName(expected_token_id), @tagName(token.id) }); + } + } + const last_token = tokenizer.next(); + std.testing.expect(last_token.id == .Eof); +}