From 04b7cec42e85a74e5933f13a6d3d10c8e0207066 Mon Sep 17 00:00:00 2001 From: Vexu Date: Fri, 3 Jan 2020 22:34:47 +0200 Subject: [PATCH 01/30] std-c tokenizer base --- lib/std/c/tokenizer.zig | 132 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 132 insertions(+) create mode 100644 lib/std/c/tokenizer.zig diff --git a/lib/std/c/tokenizer.zig b/lib/std/c/tokenizer.zig new file mode 100644 index 0000000000..c87c69e209 --- /dev/null +++ b/lib/std/c/tokenizer.zig @@ -0,0 +1,132 @@ +const std = @import("std"); +const expect = std.testing.expect; + +pub const Source = struct { + buffer: []const u8, + file_name: []const u8, +}; + +pub const Token = struct { + id: Id, + num_suffix: NumSuffix = .None, + start: usize, + end: usize, + source: *Source, + + pub const Id = enum { + Invalid, + Eof, + Nl, + Identifier, + StringLiteral, + CharLiteral, + IntegerLiteral, + FloatLiteral, + Bang, + BangEqual, + Pipe, + PipePipe, + PipeEqual, + Equal, + EqualEqual, + EqualAngleBracketRight, + LParen, + RParen, + LBrace, + RBrace, + LBracket, + RBracket, + Period, + PeriodAsterisk, + Ellipsis, + Caret, + CaretEqual, + Plus, + PlusPlus, + PlusEqual, + Minus, + MinusMinus, + MinusEqual, + Asterisk, + AsteriskEqual, + Percent, + PercentEqual, + Arrow, + Colon, + Semicolon, + Slash, + SlashEqual, + Comma, + Ampersand, + AmpersandAmpersand, + AmpersandEqual, + QuestionMark, + AngleBracketLeft, + AngleBracketLeftEqual, + AngleBracketAngleBracketLeft, + AngleBracketAngleBracketLeftEqual, + AngleBracketRight, + AngleBracketRightEqual, + AngleBracketAngleBracketRight, + AngleBracketAngleBracketRightEqual, + Tilde, + LineComment, + MultiLineComment, + Hash, + HashHash, + }; + + pub const NumSuffix = enum { + None, + F, + L, + U, + LU, + LL, + LLU, + }; +}; + +pub const Tokenizer = struct { + source: *Source, + index: usize = 0, + + pub fn next(self: *Tokenizer) Token { + const start_index = self.index; + var result = Token{ + .id = .Eof, + .start = self.index, + .end = undefined, + .source = self.source, + }; + var state: enum { + Start, + } = .Start; + while (self.index < self.source.buffer.len) : (self.index += 1) { + const c = self.source.buffer[self.index]; + switch (state) { + .Start => switch (c) { + else => @panic("TODO"), + }, + else => @panic("TODO"), + } + } + } +}; + +fn expectTokens(source: []const u8, expected_tokens: []const Token.Id) void { + var tokenizer = Tokenizer{ + .source = .{ + .buffer = source, + .file_name = undefined, + }, + }; + for (expected_tokens) |expected_token_id| { + const token = tokenizer.next(); + if (token.id != expected_token_id) { + std.debug.panic("expected {}, found {}\n", .{ @tagName(expected_token_id), @tagName(token.id) }); + } + } + const last_token = tokenizer.next(); + std.testing.expect(last_token.id == .Eof); +} From 05acc0b0c14c19c9776633cd0d1ebbbbc30c3c47 Mon Sep 17 00:00:00 2001 From: Vexu Date: Sat, 4 Jan 2020 00:19:43 +0200 Subject: [PATCH 02/30] std-c tokenizer more stuff --- lib/std/c/tokenizer.zig | 558 +++++++++++++++++++++++++++++++++++++++- 1 file changed, 556 insertions(+), 2 deletions(-) diff --git a/lib/std/c/tokenizer.zig b/lib/std/c/tokenizer.zig index c87c69e209..e770357766 100644 --- a/lib/std/c/tokenizer.zig +++ b/lib/std/c/tokenizer.zig @@ -101,16 +101,570 @@ pub const Tokenizer = struct { }; var state: enum { Start, + Cr, + StringLiteral, + CharLiteral, + Identifier, + Equal, + Bang, + Pipe, + Percent, + Asterisk, + Plus, + AngleBracketLeft, + AngleBracketAngleBracketLeft, + AngleBracketRight, + AngleBracketAngleBracketRight, + Caret, + Period, + Minus, + Slash, + Ampersand, + Zero, + IntegerLiteralOct, + IntegerLiteralBinary, + IntegerLiteralHex, + IntegerLiteral, + IntegerSuffix, + IntegerSuffixU, + IntegerSuffixL, + IntegerSuffixLL, + IntegerSuffixUL, } = .Start; while (self.index < self.source.buffer.len) : (self.index += 1) { const c = self.source.buffer[self.index]; switch (state) { .Start => switch (c) { - else => @panic("TODO"), + '\n' => { + result.id = .Nl; + self.index += 1; + break; + }, + '\r' => { + state = .Cr; + }, + ' ', '\t' => { + result.start = self.index + 1; + }, + '"' => { + state = .StringLiteral; + result.id = .StringLiteral; + }, + '\'' => { + state = .CharLiteral; + }, + 'a'...'z', 'A'...'Z', '_' => { + state = .Identifier; + result.id = .Identifier; + }, + '=' => { + state = .Equal; + }, + '!' => { + state = .Bang; + }, + '|' => { + state = .Pipe; + }, + '(' => { + result.id = .LParen; + self.index += 1; + break; + }, + ')' => { + result.id = .RParen; + self.index += 1; + break; + }, + '[' => { + result.id = .LBracket; + self.index += 1; + break; + }, + ']' => { + result.id = .RBracket; + self.index += 1; + break; + }, + ';' => { + result.id = .Semicolon; + self.index += 1; + break; + }, + ',' => { + result.id = .Comma; + self.index += 1; + break; + }, + '?' => { + result.id = .QuestionMark; + self.index += 1; + break; + }, + ':' => { + result.id = .Colon; + self.index += 1; + break; + }, + '%' => { + state = .Percent; + }, + '*' => { + state = .Asterisk; + }, + '+' => { + state = .Plus; + }, + '<' => { + state = .AngleBracketLeft; + }, + '>' => { + state = .AngleBracketRight; + }, + '^' => { + state = .Caret; + }, + '{' => { + result.id = .LBrace; + self.index += 1; + break; + }, + '}' => { + result.id = .RBrace; + self.index += 1; + break; + }, + '~' => { + result.id = .Tilde; + self.index += 1; + break; + }, + '.' => { + state = .Period; + }, + '-' => { + state = .Minus; + }, + '/' => { + state = .Slash; + }, + '&' => { + state = .Ampersand; + }, + '0' => { + state = .Zero; + result.id = .IntegerLiteral; + }, + '1'...'9' => { + state = .IntegerLiteral; + result.id = .IntegerLiteral; + }, + else => { + result.id = .Invalid; + self.index += 1; + break; + }, }, - else => @panic("TODO"), + .Cr => switch (c) { + '\n' => { + result.id = .Nl; + self.index += 1; + break; + }, + else => { + result.id = .Invalid; + break; + }, + }, + .Identifier => switch (c) { + 'a'...'z', 'A'...'Z', '_', '0'...'9' => {}, + else => { + result.id = .Identifier; + break; + }, + }, + .Equal => switch (c) { + '=' => { + result.id = .EqualEqual; + self.index += 1; + break; + }, + else => { + result.id = .Equal; + break; + }, + }, + .Bang => switch (c) { + '=' => { + result.id = .BangEqual; + self.index += 1; + break; + }, + else => { + result.id = .Bang; + break; + }, + }, + .Pipe => switch (c) { + '=' => { + result.id = .PipeEqual; + self.index += 1; + break; + }, + '|' => { + result.id = .PipePipe; + self.index += 1; + break; + }, + else => { + result.id = .Pipe; + break; + }, + }, + .Percent => switch (c) { + '=' => { + result.id = .PercentEqual; + self.index += 1; + break; + }, + else => { + result.id = .Id.Percent; + break; + }, + }, + .Asterisk => switch (c) { + '=' => { + result.id = .AsteriskEqual; + self.index += 1; + break; + }, + else => { + result.id = .Asterisk; + break; + }, + }, + .Plus => switch (c) { + '=' => { + result.id = .PlusEqual; + self.index += 1; + break; + }, + '+' => { + result.id = .PlusPlus; + self.index += 1; + break; + }, + else => { + result.id = .Plus; + break; + }, + }, + .AngleBracketLeft => switch (c) { + '<' => { + state = .AngleBracketAngleBracketLeft; + }, + '=' => { + result.id = .AngleBracketLeftEqual; + self.index += 1; + break; + }, + else => { + result.id = .AngleBracketLeft; + break; + }, + }, + .AngleBracketAngleBracketLeft => switch (c) { + '=' => { + result.id = .AngleBracketAngleBracketLeftEqual; + self.index += 1; + break; + }, + else => { + result.id = .AngleBracketAngleBracketLeft; + break; + }, + }, + .AngleBracketRight => switch (c) { + '>' => { + state = .AngleBracketAngleBracketRight; + }, + '=' => { + result.id = .AngleBracketRightEqual; + self.index += 1; + break; + }, + else => { + result.id = .AngleBracketRight; + break; + }, + }, + .AngleBracketAngleBracketRight => switch (c) { + '=' => { + result.id = .AngleBracketAngleBracketRightEqual; + self.index += 1; + break; + }, + else => { + result.id = .AngleBracketAngleBracketRight; + break; + }, + }, + .Caret => switch (c) { + '=' => { + result.id = .CaretEqual; + self.index += 1; + break; + }, + else => { + result.id = .Caret; + break; + }, + }, + .Period => switch (c) { + '.' => { + state = .Period2; + }, + '0'...'9' => { + state = .FloatFraction; + }, + else => { + result.id = .Period; + break; + }, + }, + .Period2 => switch (c) { + '.' => { + result.id = .Ellipsis; + self.index += 1; + break; + }, + else => { + result.id = .Period; + self.index -= 1; + break; + }, + }, + .Minus => switch (c) { + '>' => { + result.id = .Arrow; + self.index += 1; + break; + }, + '=' => { + result.id = .MinusEqual; + self.index += 1; + break; + }, + '-' => { + result.id = .MinusMinus; + self.index += 1; + break; + }, + else => { + result.id = .Minus; + break; + }, + }, + .Slash => switch (c) { + '/' => { + state = .LineComment; + result.id = .LineComment; + }, + '=' => { + result.id = .SlashEqual; + self.index += 1; + break; + }, + else => { + result.id = .Slash; + break; + }, + }, + .Ampersand => switch (c) { + '&' => { + result.id = .AmpersandAmpersand; + self.index += 1; + break; + }, + '=' => { + result.id = .AmpersandEqual; + self.index += 1; + break; + }, + else => { + result.id = .Ampersand; + break; + }, + }, + .Zero => switch (c) { + '0'...'9' => { + state = .IntegerLiteralOct; + }, + 'b', 'B' => { + state = .IntegerLiteralBinary; + }, + 'x', 'X' => { + state = .IntegerLiteralHex; + }, + else => { + state = .IntegerSuffix; + self.index -= 1; + }, + }, + .IntegerLiteralOct => switch (c) { + '0'...'7' => {}, + else => { + state = .IntegerSuffix; + self.index -= 1; + }, + }, + .IntegerLiteralBinary => switch (c) { + '0', '1' => {}, + else => { + state = .IntegerSuffix; + self.index -= 1; + }, + }, + .IntegerLiteralHex => switch (c) { + '0'...'9', 'a'...'f', 'A'...'F' => {}, + '.' => { + state = .FloatFractionHex; + }, + 'p', 'P' => { + state = .FloatExponentUnsignedHex; + }, + else => { + state = .IntegerSuffix; + self.index -= 1; + }, + }, + .IntegerLiteral => switch (c) { + '0'...'9' => {}, + '.' => { + state = .FloatFraction; + }, + 'e', 'E' => { + state = .FloatExponentUnsigned; + }, + else => { + state = .IntegerSuffix; + self.index -= 1; + }, + }, + .IntegerSuffix => switch (c) { + 'u', 'U' => { + state = .IntegerSuffixU; + }, + 'l', 'L' => { + state = .IntegerSuffixL; + }, + else => { + result.id = .IntegerLiteral; + break; + }, + }, + .IntegerSuffixU => switch (c) { + 'l', 'L' => { + state = .IntegerSuffixUL; + }, + else => { + result.id = .IntegerLiteral; + result.num_suffix = .U; + break; + }, + }, + .IntegerSuffixL => switch (c) { + 'l', 'L' => { + state = .IntegerSuffixLL; + }, + 'u', 'U' => { + result.id = .IntegerLiteral; + result.num_suffix = .LU; + self.index += 1; + break; + }, + else => { + result.id = .IntegerLiteral; + result.num_suffix = .L; + break; + }, + }, + .IntegerSuffixLL => switch (c) { + 'u', 'U' => { + result.id = .IntegerLiteral; + result.num_suffix = .LLU; + self.index += 1; + break; + }, + else => { + result.id = .IntegerLiteral; + result.num_suffix = .LL; + break; + }, + }, + .IntegerSuffixUL => switch (c) { + 'l', 'L' => { + result.id = .IntegerLiteral; + result.num_suffix = .LLU; + self.index += 1; + break; + }, + else => { + result.id = .IntegerLiteral; + result.num_suffix = .LU; + break; + }, + }, + } + } else if (self.index == self.source.buffer.len) { + switch (state) { + .Identifier => { + result.id = .Identifier; + }, + .IntegerLiteralOct, + .IntegerLiteralBinary, + .IntegerLiteralHex, + .IntegerLiteral, + .IntegerSuffix, + .Zero => result.id = .IntegerLiteral, + .IntegerSuffixU => { + result.id = .IntegerLiteral; + result.num_suffix = .U; + }, + .IntegerSuffixL => { + result.id = .IntegerLiteral; + result.num_suffix = .L; + }, + .IntegerSuffixLL => { + result.id = .IntegerLiteral; + result.num_suffix = .LL; + }, + .IntegerSuffixUL => { + result.id = .IntegerLiteral; + result.num_suffix = .Ul; + }, + + .Equal => result.id = .Equal, + .Bang => result.id = .Bang, + .Minus => result.id = .Minus, + .Slash => result.id = .Slash, + .Ampersand => result.id = .Ampersand, + .Period => result.id = .Period, + .Period2 => result.id = .Invalid, + .Pipe => result.id = .Pipe, + .AngleBracketAngleBracketRight => result.id = .AngleBracketAngleBracketRight, + .AngleBracketRight => result.id = .AngleBracketRight, + .AngleBracketAngleBracketLeft => result.id = .AngleBracketAngleBracketLeft, + .AngleBracketLeft => result.id = .AngleBracketLeft, + .Plus => result.id = .Plus, + .Percent => result.id = .Percent, + .Caret => result.id = .Caret, + .Asterisk => result.id = .Asterisk, } } + + result.end = self.index; + return result; } }; From f14a5287e92755f8d1f7f592caeed77bac940958 Mon Sep 17 00:00:00 2001 From: Vexu Date: Sat, 4 Jan 2020 01:38:26 +0200 Subject: [PATCH 03/30] std-c tokenizer strings, floats and comments --- lib/std/c/tokenizer.zig | 221 +++++++++++++++++++++++++++++++++++++--- 1 file changed, 209 insertions(+), 12 deletions(-) diff --git a/lib/std/c/tokenizer.zig b/lib/std/c/tokenizer.zig index e770357766..034b7637fb 100644 --- a/lib/std/c/tokenizer.zig +++ b/lib/std/c/tokenizer.zig @@ -104,6 +104,10 @@ pub const Tokenizer = struct { Cr, StringLiteral, CharLiteral, + EscapeSequence, + OctalEscape, + HexEscape, + UnicodeEscape, Identifier, Equal, Bang, @@ -117,9 +121,13 @@ pub const Tokenizer = struct { AngleBracketAngleBracketRight, Caret, Period, + Period2, Minus, Slash, Ampersand, + LineComment, + MultiLineComment, + MultiLineCommentAsterisk, Zero, IntegerLiteralOct, IntegerLiteralBinary, @@ -130,7 +138,14 @@ pub const Tokenizer = struct { IntegerSuffixL, IntegerSuffixLL, IntegerSuffixUL, + FloatFraction, + FloatFractionHex, + FloatExponent, + FloatExponentDigits, + FloatSuffix, } = .Start; + var string = false; + var counter: u32 = 0; while (self.index < self.source.buffer.len) : (self.index += 1) { const c = self.source.buffer[self.index]; switch (state) { @@ -276,6 +291,89 @@ pub const Tokenizer = struct { break; }, }, + // TODO l"" u"" U"" u8"" + .StringLiteral => switch (c) { + '\\' => { + string = true; + state = .EscapeSequence; + }, + '"' => { + result.id = .StringLiteral; + self.index += 1; + break; + }, + '\n', '\r' => { + result.id = .Invalid; + break; + }, + else => {}, + }, + // TODO l'' u'' U'' + .CharLiteral => switch (c) { + '\\' => { + string = false; + state = .EscapeSequence; + }, + '\'', '\n' => { + result.id = .Invalid; + break; + }, + else => {}, + }, + .EscapeSequence => switch (c) { + '\'', '"', '?', '\\', 'a', 'b', 'f', 'n', 'r', 't', 'v' => {}, + '0'...'7' => { + counter = 1; + state = .OctalEscape; + }, + 'x' => { + state = .HexEscape; + }, + 'u' => { + counter = 4; + state = .OctalEscape; + }, + 'U' => { + counter = 8; + state = .OctalEscape; + }, + else => { + result.id = .Invalid; + break; + }, + }, + .OctalEscape => switch (c) { + '0'...'7' => { + counter += 1; + if (counter == 3) { + state = if (string) .StringLiteral else .CharLiteral; + } + }, + else => { + state = if (string) .StringLiteral else .CharLiteral; + }, + }, + .HexEscape => switch (c) { + '0'...'9', 'a'...'f', 'A'...'F' => {}, + else => { + state = if (string) .StringLiteral else .CharLiteral; + }, + }, + .UnicodeEscape => switch (c) { + '0'...'9', 'a'...'f', 'A'...'F' => { + counter -= 1; + if (counter == 0) { + state = if (string) .StringLiteral else .CharLiteral; + } + }, + else => { + if (counter != 0) { + result.id = .Invalid; + break; + } + state = if (string) .StringLiteral else .CharLiteral; + }, + }, .Identifier => switch (c) { 'a'...'z', 'A'...'Z', '_', '0'...'9' => {}, else => { @@ -328,7 +426,7 @@ pub const Tokenizer = struct { break; }, else => { - result.id = .Id.Percent; + result.id = .Percent; break; }, }, @@ -468,7 +566,9 @@ pub const Tokenizer = struct { .Slash => switch (c) { '/' => { state = .LineComment; - result.id = .LineComment; + }, + '*' => { + state = .MultiLineComment; }, '=' => { result.id = .SlashEqual; @@ -496,6 +596,30 @@ pub const Tokenizer = struct { break; }, }, + .LineComment => switch (c) { + '\n' => { + result.id = .LineComment; + self.index += 1; + break; + }, + else => {}, + }, + .MultiLineComment => switch (c) { + '*' => { + state = .MultiLineCommentAsterisk; + }, + else => {}, + }, + .MultiLineCommentAsterisk => switch (c) { + '/' => { + result.id = .MultiLineComment; + self.index += 1; + break; + }, + else => { + state = .MultiLineComment; + }, + }, .Zero => switch (c) { '0'...'9' => { state = .IntegerLiteralOct; @@ -531,7 +655,7 @@ pub const Tokenizer = struct { state = .FloatFractionHex; }, 'p', 'P' => { - state = .FloatExponentUnsignedHex; + state = .FloatExponent; }, else => { state = .IntegerSuffix; @@ -544,7 +668,7 @@ pub const Tokenizer = struct { state = .FloatFraction; }, 'e', 'E' => { - state = .FloatExponentUnsigned; + state = .FloatExponent; }, else => { state = .IntegerSuffix; @@ -615,18 +739,90 @@ pub const Tokenizer = struct { break; }, }, + .FloatFraction => switch (c) { + '0'...'9' => {}, + 'e', 'E' => { + state = .FloatExponent; + }, + else => { + self.index -= 1; + state = .FloatSuffix; + }, + }, + .FloatFractionHex => switch (c) { + '0'...'9', 'a'...'f', 'A'...'F' => {}, + 'p', 'P' => { + state = .FloatExponent; + }, + else => { + result.id = .Invalid; + break; + }, + }, + .FloatExponent => switch (c) { + '+', '-' => { + state = .FloatExponentDigits; + }, + else => { + self.index -= 1; + state = .FloatExponentDigits; + }, + }, + .FloatExponentDigits => switch (c) { + '0'...'9' => { + counter += 1; + }, + else => { + if (counter == 0) { + result.id = .Invalid; + break; + } + state = .FloatSuffix; + }, + }, + .FloatSuffix => switch (c) { + 'l', 'L' => { + result.id = .FloatLiteral; + result.num_suffix = .L; + self.index += 1; + break; + }, + 'f', 'F' => { + result.id = .FloatLiteral; + result.num_suffix = .F; + self.index += 1; + break; + }, + else => { + result.id = .FloatLiteral; + break; + }, + }, } } else if (self.index == self.source.buffer.len) { switch (state) { + .Start => {}, .Identifier => { result.id = .Identifier; }, - .IntegerLiteralOct, - .IntegerLiteralBinary, - .IntegerLiteralHex, - .IntegerLiteral, - .IntegerSuffix, - .Zero => result.id = .IntegerLiteral, + + .Cr, + .Period2, + .StringLiteral, + .CharLiteral, + .EscapeSequence, + .OctalEscape, + .HexEscape, + .UnicodeEscape, + .MultiLineComment, + .MultiLineCommentAsterisk, + .FloatFraction, + .FloatFractionHex, + .FloatExponent, + .FloatExponentDigits, + => result.id = .Invalid, + + .IntegerLiteralOct, .IntegerLiteralBinary, .IntegerLiteralHex, .IntegerLiteral, .IntegerSuffix, .Zero => result.id = .IntegerLiteral, .IntegerSuffixU => { result.id = .IntegerLiteral; result.num_suffix = .U; @@ -641,16 +837,16 @@ pub const Tokenizer = struct { }, .IntegerSuffixUL => { result.id = .IntegerLiteral; - result.num_suffix = .Ul; + result.num_suffix = .LU; }, + .FloatSuffix => result.id = .FloatLiteral, .Equal => result.id = .Equal, .Bang => result.id = .Bang, .Minus => result.id = .Minus, .Slash => result.id = .Slash, .Ampersand => result.id = .Ampersand, .Period => result.id = .Period, - .Period2 => result.id = .Invalid, .Pipe => result.id = .Pipe, .AngleBracketAngleBracketRight => result.id = .AngleBracketAngleBracketRight, .AngleBracketRight => result.id = .AngleBracketRight, @@ -660,6 +856,7 @@ pub const Tokenizer = struct { .Percent => result.id = .Percent, .Caret => result.id = .Caret, .Asterisk => result.id = .Asterisk, + .LineComment => result.id = .LineComment, } } From 26bf410b061b9d6d18e4945417ddec62d7486e9c Mon Sep 17 00:00:00 2001 From: Vexu Date: Sat, 4 Jan 2020 02:00:29 +0200 Subject: [PATCH 04/30] std-c finish tokenizer --- lib/std/c/tokenizer.zig | 166 ++++++++++++++++++++++++++-------------- 1 file changed, 108 insertions(+), 58 deletions(-) diff --git a/lib/std/c/tokenizer.zig b/lib/std/c/tokenizer.zig index 034b7637fb..a5f2ad770d 100644 --- a/lib/std/c/tokenizer.zig +++ b/lib/std/c/tokenizer.zig @@ -7,21 +7,15 @@ pub const Source = struct { }; pub const Token = struct { - id: Id, - num_suffix: NumSuffix = .None, - start: usize, - end: usize, - source: *Source, - - pub const Id = enum { + id: union(enum) { Invalid, Eof, Nl, Identifier, - StringLiteral, - CharLiteral, - IntegerLiteral, - FloatLiteral, + StringLiteral: StrKind, + CharLiteral: StrKind, + IntegerLiteral: NumSuffix, + FloatLiteral: NumSuffix, Bang, BangEqual, Pipe, @@ -74,7 +68,10 @@ pub const Token = struct { MultiLineComment, Hash, HashHash, - }; + }, + start: usize, + end: usize, + source: *Source, pub const NumSuffix = enum { None, @@ -85,6 +82,14 @@ pub const Token = struct { LL, LLU, }; + + pub const StrKind = enum { + None, + Wide, + Utf8, + Utf16, + Utf32, + }; }; pub const Tokenizer = struct { @@ -102,6 +107,10 @@ pub const Tokenizer = struct { var state: enum { Start, Cr, + u, + u8, + U, + L, StringLiteral, CharLiteral, EscapeSequence, @@ -162,13 +171,23 @@ pub const Tokenizer = struct { result.start = self.index + 1; }, '"' => { + result.id = .{ .StringLiteral = .None }; state = .StringLiteral; - result.id = .StringLiteral; }, '\'' => { + result.id = .{ .CharLiteral = .None }; state = .CharLiteral; }, - 'a'...'z', 'A'...'Z', '_' => { + 'u' => { + state = .u; + }, + 'U' => { + state = .U; + }, + 'L' => { + state = .L; + }, + 'a'...'t', 'v'...'z', 'A'...'K', 'M'...'T', 'V'...'Z', '_' => { state = .Identifier; result.id = .Identifier; }, @@ -268,11 +287,9 @@ pub const Tokenizer = struct { }, '0' => { state = .Zero; - result.id = .IntegerLiteral; }, '1'...'9' => { state = .IntegerLiteral; - result.id = .IntegerLiteral; }, else => { result.id = .Invalid; @@ -291,14 +308,63 @@ pub const Tokenizer = struct { break; }, }, - // TODO l"" u"" U"" u8"" + .u => switch (c) { + '8' => { + state = .u8; + }, + '\'' => { + result.id = .{ .CharLiteral = .Utf16 }; + state = .CharLiteral; + }, + '\"' => { + result.id = .{ .StringLiteral = .Utf16 }; + state = .StringLiteral; + }, + else => { + state = .Identifier; + }, + }, + .u8 => switch (c) { + '\"' => { + result.id = .{ .StringLiteral = .Utf8 }; + state = .StringLiteral; + }, + else => { + state = .Identifier; + }, + }, + .U => switch (c) { + '\'' => { + result.id = .{ .CharLiteral = .Utf32 }; + state = .CharLiteral; + }, + '\"' => { + result.id = .{ .StringLiteral = .Utf32 }; + state = .StringLiteral; + }, + else => { + state = .Identifier; + }, + }, + .L => switch (c) { + '\'' => { + result.id = .{ .CharLiteral = .Wide }; + state = .CharLiteral; + }, + '\"' => { + result.id = .{ .StringLiteral = .Wide }; + state = .StringLiteral; + }, + else => { + state = .Identifier; + }, + }, .StringLiteral => switch (c) { '\\' => { string = true; state = .EscapeSequence; }, '"' => { - result.id = .StringLiteral; self.index += 1; break; }, @@ -308,7 +374,6 @@ pub const Tokenizer = struct { }, else => {}, }, - // TODO l'' u'' U'' .CharLiteral => switch (c) { '\\' => { string = false; @@ -683,7 +748,7 @@ pub const Tokenizer = struct { state = .IntegerSuffixL; }, else => { - result.id = .IntegerLiteral; + result.id = .{ .IntegerLiteral = .None }; break; }, }, @@ -692,8 +757,7 @@ pub const Tokenizer = struct { state = .IntegerSuffixUL; }, else => { - result.id = .IntegerLiteral; - result.num_suffix = .U; + result.id = .{ .IntegerLiteral = .U }; break; }, }, @@ -702,40 +766,34 @@ pub const Tokenizer = struct { state = .IntegerSuffixLL; }, 'u', 'U' => { - result.id = .IntegerLiteral; - result.num_suffix = .LU; + result.id = .{ .IntegerLiteral = .LU }; self.index += 1; break; }, else => { - result.id = .IntegerLiteral; - result.num_suffix = .L; + result.id = .{ .IntegerLiteral = .L }; break; }, }, .IntegerSuffixLL => switch (c) { 'u', 'U' => { - result.id = .IntegerLiteral; - result.num_suffix = .LLU; + result.id = .{ .IntegerLiteral = .LLU }; self.index += 1; break; }, else => { - result.id = .IntegerLiteral; - result.num_suffix = .LL; + result.id = .{ .IntegerLiteral = .LL }; break; }, }, .IntegerSuffixUL => switch (c) { 'l', 'L' => { - result.id = .IntegerLiteral; - result.num_suffix = .LLU; + result.id = .{ .IntegerLiteral = .LLU }; self.index += 1; break; }, else => { - result.id = .IntegerLiteral; - result.num_suffix = .LU; + result.id = .{ .IntegerLiteral = .LU }; break; }, }, @@ -782,19 +840,17 @@ pub const Tokenizer = struct { }, .FloatSuffix => switch (c) { 'l', 'L' => { - result.id = .FloatLiteral; - result.num_suffix = .L; + result.id = .{ .FloatLiteral = .L }; self.index += 1; break; }, 'f', 'F' => { - result.id = .FloatLiteral; - result.num_suffix = .F; + result.id = .{ .FloatLiteral = .F }; self.index += 1; break; }, else => { - result.id = .FloatLiteral; + result.id = .{ .FloatLiteral = .None }; break; }, }, @@ -802,7 +858,7 @@ pub const Tokenizer = struct { } else if (self.index == self.source.buffer.len) { switch (state) { .Start => {}, - .Identifier => { + .u, .u8, .U, .L, .Identifier => { result.id = .Identifier; }, @@ -822,25 +878,19 @@ pub const Tokenizer = struct { .FloatExponentDigits, => result.id = .Invalid, - .IntegerLiteralOct, .IntegerLiteralBinary, .IntegerLiteralHex, .IntegerLiteral, .IntegerSuffix, .Zero => result.id = .IntegerLiteral, - .IntegerSuffixU => { - result.id = .IntegerLiteral; - result.num_suffix = .U; - }, - .IntegerSuffixL => { - result.id = .IntegerLiteral; - result.num_suffix = .L; - }, - .IntegerSuffixLL => { - result.id = .IntegerLiteral; - result.num_suffix = .LL; - }, - .IntegerSuffixUL => { - result.id = .IntegerLiteral; - result.num_suffix = .LU; - }, + .IntegerLiteralOct, + .IntegerLiteralBinary, + .IntegerLiteralHex, + .IntegerLiteral, + .IntegerSuffix, + .Zero, + => result.id = .{ .IntegerLiteral = .None }, + .IntegerSuffixU => result.id = .{ .IntegerLiteral = .U }, + .IntegerSuffixL => result.id = .{ .IntegerLiteral = .L }, + .IntegerSuffixLL => result.id = .{ .IntegerLiteral = .LL }, + .IntegerSuffixUL => result.id = .{ .IntegerLiteral = .LU }, - .FloatSuffix => result.id = .FloatLiteral, + .FloatSuffix => result.id = .{ .FloatLiteral = .None }, .Equal => result.id = .Equal, .Bang => result.id = .Bang, .Minus => result.id = .Minus, From d75697a6a3e2c8d96819c365dcb5690d4d8028e9 Mon Sep 17 00:00:00 2001 From: Vexu Date: Sat, 4 Jan 2020 02:44:23 +0200 Subject: [PATCH 05/30] std-c tokenizer keywords --- lib/std/c/tokenizer.zig | 194 +++++++++++++++++++++++++++++++++++++--- 1 file changed, 184 insertions(+), 10 deletions(-) diff --git a/lib/std/c/tokenizer.zig b/lib/std/c/tokenizer.zig index a5f2ad770d..0a4a217551 100644 --- a/lib/std/c/tokenizer.zig +++ b/lib/std/c/tokenizer.zig @@ -1,5 +1,5 @@ const std = @import("std"); -const expect = std.testing.expect; +const mem = std.mem; pub const Source = struct { buffer: []const u8, @@ -7,11 +7,19 @@ pub const Source = struct { }; pub const Token = struct { - id: union(enum) { + id: Id, + start: usize, + end: usize, + source: *Source, + + pub const Id = union(enum) { Invalid, Eof, Nl, Identifier, + + /// special case for #include <...> + MacroString, StringLiteral: StrKind, CharLiteral: StrKind, IntegerLiteral: NumSuffix, @@ -68,10 +76,160 @@ pub const Token = struct { MultiLineComment, Hash, HashHash, - }, - start: usize, - end: usize, - source: *Source, + + Keyword_auto, + Keyword_break, + Keyword_case, + Keyword_char, + Keyword_const, + Keyword_continue, + Keyword_default, + Keyword_do, + Keyword_double, + Keyword_else, + Keyword_enum, + Keyword_extern, + Keyword_float, + Keyword_for, + Keyword_goto, + Keyword_if, + Keyword_int, + Keyword_long, + Keyword_register, + Keyword_return, + Keyword_short, + Keyword_signed, + Keyword_sizeof, + Keyword_static, + Keyword_struct, + Keyword_switch, + Keyword_typedef, + Keyword_union, + Keyword_unsigned, + Keyword_void, + Keyword_volatile, + Keyword_while, + + // ISO C99 + Keyword_bool, + Keyword_complex, + Keyword_imaginary, + Keyword_inline, + Keyword_restrict, + + // ISO C11 + Keyword_alignas, + Keyword_alignof, + Keyword_atomic, + Keyword_generic, + Keyword_noreturn, + Keyword_static_assert, + Keyword_thread_local, + + // Preprocessor + Keyword_include, + Keyword_define, + Keyword_ifdef, + Keyword_ifndef, + Keyword_error, + Keyword_pragma, + }; + + pub const Keyword = struct { + bytes: []const u8, + id: Id, + hash: u32, + + fn init(bytes: []const u8, id: Id) Keyword { + @setEvalBranchQuota(2000); + return .{ + .bytes = bytes, + .id = id, + .hash = std.hash_map.hashString(bytes), + }; + } + }; + + // TODO extensions + pub const keywords = [_]Keyword{ + Keyword.init("auto", .Keyword_auto), + Keyword.init("break", .Keyword_break), + Keyword.init("case", .Keyword_case), + Keyword.init("char", .Keyword_char), + Keyword.init("const", .Keyword_const), + Keyword.init("continue", .Keyword_continue), + Keyword.init("default", .Keyword_default), + Keyword.init("do", .Keyword_do), + Keyword.init("double", .Keyword_double), + Keyword.init("else", .Keyword_else), + Keyword.init("enum", .Keyword_enum), + Keyword.init("extern", .Keyword_extern), + Keyword.init("float", .Keyword_float), + Keyword.init("for", .Keyword_for), + Keyword.init("goto", .Keyword_goto), + Keyword.init("if", .Keyword_if), + Keyword.init("int", .Keyword_int), + Keyword.init("long", .Keyword_long), + Keyword.init("register", .Keyword_register), + Keyword.init("return", .Keyword_return), + Keyword.init("short", .Keyword_short), + Keyword.init("signed", .Keyword_signed), + Keyword.init("sizeof", .Keyword_sizeof), + Keyword.init("static", .Keyword_static), + Keyword.init("struct", .Keyword_struct), + Keyword.init("switch", .Keyword_switch), + Keyword.init("typedef", .Keyword_typedef), + Keyword.init("union", .Keyword_union), + Keyword.init("unsigned", .Keyword_unsigned), + Keyword.init("void", .Keyword_void), + Keyword.init("volatile", .Keyword_volatile), + Keyword.init("while", .Keyword_while), + + // ISO C99 + Keyword.init("_Bool", .Keyword_bool), + Keyword.init("_Complex", .Keyword_complex), + Keyword.init("_Imaginary", .Keyword_imaginary), + Keyword.init("inline", .Keyword_inline), + Keyword.init("restrict", .Keyword_restrict), + + // ISO C11 + Keyword.init("_Alignas", .Keyword_alignas), + Keyword.init("_Alignof", .Keyword_alignof), + Keyword.init("_Atomic", .Keyword_atomic), + Keyword.init("_Generic", .Keyword_generic), + Keyword.init("_Noreturn", .Keyword_noreturn), + Keyword.init("_Static_assert", .Keyword_static_assert), + Keyword.init("_Thread_local", .Keyword_thread_local), + + // Preprocessor + Keyword.init("include", .Keyword_include), + Keyword.init("define", .Keyword_define), + Keyword.init("ifdef", .Keyword_ifdef), + Keyword.init("ifndef", .Keyword_ifndef), + Keyword.init("error", .Keyword_error), + Keyword.init("pragma", .Keyword_pragma), + }; + + // TODO perfect hash at comptime + pub fn getKeyword(bytes: []const u8, macro: bool) ?Id { + var hash = std.hash_map.hashString(bytes); + for (keywords) |kw| { + if (kw.hash == hash and mem.eql(u8, kw.bytes, bytes)) { + switch (kw.id) { + .Keyword_include, + .Keyword_define, + .Keyword_ifdef, + .Keyword_ifndef, + .Keyword_error, + .Keyword_pragma, + => if (!macro) return null, + else => {}, + } + return kw.id; + } + } + return null; + } pub const NumSuffix = enum { None, @@ -95,6 +253,7 @@ pub const Token = struct { pub const Tokenizer = struct { source: *Source, index: usize = 0, + prev_tok_id: @TagType(Token.Id), pub fn next(self: *Tokenizer) Token { const start_index = self.index; @@ -124,6 +283,9 @@ pub const Tokenizer = struct { Percent, Asterisk, Plus, + + /// special case for #include <...> + MacroString, AngleBracketLeft, AngleBracketAngleBracketLeft, AngleBracketRight, @@ -189,7 +351,6 @@ pub const Tokenizer = struct { }, 'a'...'t', 'v'...'z', 'A'...'K', 'M'...'T', 'V'...'Z', '_' => { state = .Identifier; - result.id = .Identifier; }, '=' => { state = .Equal; @@ -250,7 +411,10 @@ pub const Tokenizer = struct { state = .Plus; }, '<' => { - state = .AngleBracketLeft; + if (self.prev_tok_id == .Keyword_include) + state = .MacroString + else + state = .AngleBracketLeft; }, '>' => { state = .AngleBracketRight; @@ -442,7 +606,7 @@ pub const Tokenizer = struct { .Identifier => switch (c) { 'a'...'z', 'A'...'Z', '_', '0'...'9' => {}, else => { - result.id = .Identifier; + result.id = Token.getKeyword(self.source.buffer[result.start..self.index], self.prev_tok_id == .Hash) orelse .Identifier; break; }, }, @@ -522,6 +686,14 @@ pub const Tokenizer = struct { break; }, }, + .MacroString => switch (c) { + '>' => { + result.id = .MacroString; + self.index += 1; + break; + }, + else => {}, + }, .AngleBracketLeft => switch (c) { '<' => { state = .AngleBracketAngleBracketLeft; @@ -859,7 +1031,7 @@ pub const Tokenizer = struct { switch (state) { .Start => {}, .u, .u8, .U, .L, .Identifier => { - result.id = .Identifier; + result.id = Token.getKeyword(self.source.buffer[result.start..self.index], self.prev_tok_id == .Hash) orelse .Identifier; }, .Cr, @@ -876,6 +1048,7 @@ pub const Tokenizer = struct { .FloatFractionHex, .FloatExponent, .FloatExponentDigits, + .MacroString, => result.id = .Invalid, .IntegerLiteralOct, @@ -910,6 +1083,7 @@ pub const Tokenizer = struct { } } + self.prev_tok_id = result.id; result.end = self.index; return result; } From 472ca947c94f703866eec75fc364810e655b4894 Mon Sep 17 00:00:00 2001 From: Vexu Date: Sat, 4 Jan 2020 03:04:02 +0200 Subject: [PATCH 06/30] std-c tokenizer add tests --- lib/std/c/tokenizer.zig | 207 +++++++++++++++++++++++++++++++++++++--- 1 file changed, 196 insertions(+), 11 deletions(-) diff --git a/lib/std/c/tokenizer.zig b/lib/std/c/tokenizer.zig index 0a4a217551..6a94511e60 100644 --- a/lib/std/c/tokenizer.zig +++ b/lib/std/c/tokenizer.zig @@ -31,7 +31,6 @@ pub const Token = struct { PipeEqual, Equal, EqualEqual, - EqualAngleBracketRight, LParen, RParen, LBrace, @@ -39,7 +38,6 @@ pub const Token = struct { LBracket, RBracket, Period, - PeriodAsterisk, Ellipsis, Caret, CaretEqual, @@ -253,7 +251,7 @@ pub const Token = struct { pub const Tokenizer = struct { source: *Source, index: usize = 0, - prev_tok_id: @TagType(Token.Id), + prev_tok_id: @TagType(Token.Id) = .Invalid, pub fn next(self: *Tokenizer) Token { const start_index = self.index; @@ -296,6 +294,7 @@ pub const Tokenizer = struct { Minus, Slash, Ampersand, + Hash, LineComment, MultiLineComment, MultiLineCommentAsterisk, @@ -329,9 +328,6 @@ pub const Tokenizer = struct { '\r' => { state = .Cr; }, - ' ', '\t' => { - result.start = self.index + 1; - }, '"' => { result.id = .{ .StringLiteral = .None }; state = .StringLiteral; @@ -449,6 +445,9 @@ pub const Tokenizer = struct { '&' => { state = .Ampersand; }, + '#' => { + state = .Hash; + }, '0' => { state = .Zero; }, @@ -456,9 +455,7 @@ pub const Tokenizer = struct { state = .IntegerLiteral; }, else => { - result.id = .Invalid; - self.index += 1; - break; + result.start = self.index + 1; }, }, .Cr => switch (c) { @@ -833,6 +830,17 @@ pub const Tokenizer = struct { break; }, }, + .Hash => switch (c) { + '#' => { + result.id = .HashHash; + self.index += 1; + break; + }, + else => { + result.id = .Hash; + break; + }, + }, .LineComment => switch (c) { '\n' => { result.id = .LineComment; @@ -1069,6 +1077,7 @@ pub const Tokenizer = struct { .Minus => result.id = .Minus, .Slash => result.id = .Slash, .Ampersand => result.id = .Ampersand, + .Hash => result.id = .Hash, .Period => result.id = .Period, .Pipe => result.id = .Pipe, .AngleBracketAngleBracketRight => result.id = .AngleBracketAngleBracketRight, @@ -1089,16 +1098,192 @@ pub const Tokenizer = struct { } }; +test "operators" { + expectTokens( + \\ ! != | || |= = == + \\ ( ) { } [ ] . .. ... + \\ ^ ^= + ++ += - -- -= + \\ * *= % %= -> : ; / /= + \\ , & && &= ? < <= << + \\ <<= > >= >> >>= ~ # ## + \\ + , + &[_]Token.Id{ + .Bang, + .BangEqual, + .Pipe, + .PipePipe, + .PipeEqual, + .Equal, + .EqualEqual, + .Nl, + + .LParen, + .RParen, + .LBrace, + .RBrace, + .LBracket, + .RBracket, + .Period, + .Period, + .Period, + .Ellipsis, + .Nl, + + .Caret, + .CaretEqual, + .Plus, + .PlusPlus, + .PlusEqual, + .Minus, + .MinusMinus, + .MinusEqual, + .Nl, + + .Asterisk, + .AsteriskEqual, + .Percent, + .PercentEqual, + .Arrow, + .Colon, + .Semicolon, + .Slash, + .SlashEqual, + .Nl, + + .Comma, + .Ampersand, + .AmpersandAmpersand, + .AmpersandEqual, + .QuestionMark, + .AngleBracketLeft, + .AngleBracketLeftEqual, + .AngleBracketAngleBracketLeft, + .Nl, + + .AngleBracketAngleBracketLeftEqual, + .AngleBracketRight, + .AngleBracketRightEqual, + .AngleBracketAngleBracketRight, + .AngleBracketAngleBracketRightEqual, + .Tilde, + .Hash, + .HashHash, + .Nl, + }, + ); +} + +test "keywords" { + expectTokens( + \\auto break case char const continue default do + \\double else enum extern float for goto if int + \\long register return short signed sizeof static + \\struct switch typedef union unsigned void volatile + \\while _Bool _Complex _Imaginary inline restrict _Alignas + \\_Alignof _Atomic _Generic _Noreturn _Static_assert _Thread_local + \\ + , &[_]Token.Id{ + .Keyword_auto, + .Keyword_break, + .Keyword_case, + .Keyword_char, + .Keyword_const, + .Keyword_continue, + .Keyword_default, + .Keyword_do, + .Nl, + + .Keyword_double, + .Keyword_else, + .Keyword_enum, + .Keyword_extern, + .Keyword_float, + .Keyword_for, + .Keyword_goto, + .Keyword_if, + .Keyword_int, + .Nl, + + .Keyword_long, + .Keyword_register, + .Keyword_return, + .Keyword_short, + .Keyword_signed, + .Keyword_sizeof, + .Keyword_static, + .Nl, + + .Keyword_struct, + .Keyword_switch, + .Keyword_typedef, + .Keyword_union, + .Keyword_unsigned, + .Keyword_void, + .Keyword_volatile, + .Nl, + + .Keyword_while, + .Keyword_bool, + .Keyword_complex, + .Keyword_imaginary, + .Keyword_inline, + .Keyword_restrict, + .Keyword_alignas, + .Nl, + + .Keyword_alignof, + .Keyword_atomic, + .Keyword_generic, + .Keyword_noreturn, + .Keyword_static_assert, + .Keyword_thread_local, + .Nl, + }); +} + +test "preprocessor keywords" { + expectTokens( + \\#include + \\#define + \\#ifdef + \\#ifndef + \\#error + \\#pragma + \\ + , &[_]Token.Id{ + .Hash, + .Keyword_include, + .MacroString, + .Nl, + .Hash, + .Keyword_define, + .Nl, + .Hash, + .Keyword_ifdef, + .Nl, + .Hash, + .Keyword_ifndef, + .Nl, + .Hash, + .Keyword_error, + .Nl, + .Hash, + .Keyword_pragma, + .Nl, + }); +} + fn expectTokens(source: []const u8, expected_tokens: []const Token.Id) void { var tokenizer = Tokenizer{ - .source = .{ + .source = &Source{ .buffer = source, .file_name = undefined, }, }; for (expected_tokens) |expected_token_id| { const token = tokenizer.next(); - if (token.id != expected_token_id) { + if (!std.meta.eql(token.id, expected_token_id)) { std.debug.panic("expected {}, found {}\n", .{ @tagName(expected_token_id), @tagName(token.id) }); } } From c221593d7d6d441c04c9332aaa6d2be8b3d24bc0 Mon Sep 17 00:00:00 2001 From: Vexu Date: Sat, 4 Jan 2020 10:58:12 +0200 Subject: [PATCH 07/30] std-c tokenizer better special case handling --- lib/std/c/tokenizer.zig | 53 +++++++++++++++++++---------------------- 1 file changed, 24 insertions(+), 29 deletions(-) diff --git a/lib/std/c/tokenizer.zig b/lib/std/c/tokenizer.zig index 6a94511e60..58f1e0a153 100644 --- a/lib/std/c/tokenizer.zig +++ b/lib/std/c/tokenizer.zig @@ -124,7 +124,7 @@ pub const Token = struct { Keyword_static_assert, Keyword_thread_local, - // Preprocessor + // Preprocessor directives Keyword_include, Keyword_define, Keyword_ifdef, @@ -199,7 +199,7 @@ pub const Token = struct { Keyword.init("_Static_assert", .Keyword_static_assert), Keyword.init("_Thread_local", .Keyword_thread_local), - // Preprocessor + // Preprocessor directives Keyword.init("include", .Keyword_include), Keyword.init("define", .Keyword_define), Keyword.init("ifdef", .Keyword_ifdef), @@ -209,7 +209,7 @@ pub const Token = struct { }; // TODO perfect hash at comptime - pub fn getKeyword(bytes: []const u8, macro: bool) ?Id { + pub fn getKeyword(bytes: []const u8, pp_directive: bool) ?Id { var hash = std.hash_map.hashString(bytes); for (keywords) |kw| { if (kw.hash == hash and mem.eql(u8, kw.bytes, bytes)) { @@ -220,7 +220,7 @@ pub const Token = struct { .Keyword_ifndef, .Keyword_error, .Keyword_pragma, - => if (!macro) return null, + => if (!pp_directive) return null, else => {}, } return kw.id; @@ -252,6 +252,7 @@ pub const Tokenizer = struct { source: *Source, index: usize = 0, prev_tok_id: @TagType(Token.Id) = .Invalid, + pp_directive: bool = false, pub fn next(self: *Tokenizer) Token { const start_index = self.index; @@ -321,11 +322,20 @@ pub const Tokenizer = struct { switch (state) { .Start => switch (c) { '\n' => { + if (!self.pp_directive) { + result.start = self.index + 1; + continue; + } + self.pp_directive = false; result.id = .Nl; self.index += 1; break; }, '\r' => { + if (!self.pp_directive) { + result.start = self.index + 1; + continue; + } state = .Cr; }, '"' => { @@ -460,6 +470,7 @@ pub const Tokenizer = struct { }, .Cr => switch (c) { '\n' => { + self.pp_directive = false; result.id = .Nl; self.index += 1; break; @@ -603,7 +614,9 @@ pub const Tokenizer = struct { .Identifier => switch (c) { 'a'...'z', 'A'...'Z', '_', '0'...'9' => {}, else => { - result.id = Token.getKeyword(self.source.buffer[result.start..self.index], self.prev_tok_id == .Hash) orelse .Identifier; + result.id = Token.getKeyword(self.source.buffer[result.start..self.index], self.prev_tok_id == .Hash and !self.pp_directive) orelse .Identifier; + if (self.prev_tok_id == .Hash) + self.pp_directive = true; break; }, }, @@ -1039,7 +1052,7 @@ pub const Tokenizer = struct { switch (state) { .Start => {}, .u, .u8, .U, .L, .Identifier => { - result.id = Token.getKeyword(self.source.buffer[result.start..self.index], self.prev_tok_id == .Hash) orelse .Identifier; + result.id = Token.getKeyword(self.source.buffer[result.start..self.index], self.prev_tok_id == .Hash and !self.pp_directive) orelse .Identifier; }, .Cr, @@ -1116,8 +1129,6 @@ test "operators" { .PipeEqual, .Equal, .EqualEqual, - .Nl, - .LParen, .RParen, .LBrace, @@ -1128,8 +1139,6 @@ test "operators" { .Period, .Period, .Ellipsis, - .Nl, - .Caret, .CaretEqual, .Plus, @@ -1138,8 +1147,6 @@ test "operators" { .Minus, .MinusMinus, .MinusEqual, - .Nl, - .Asterisk, .AsteriskEqual, .Percent, @@ -1149,8 +1156,6 @@ test "operators" { .Semicolon, .Slash, .SlashEqual, - .Nl, - .Comma, .Ampersand, .AmpersandAmpersand, @@ -1159,8 +1164,6 @@ test "operators" { .AngleBracketLeft, .AngleBracketLeftEqual, .AngleBracketAngleBracketLeft, - .Nl, - .AngleBracketAngleBracketLeftEqual, .AngleBracketRight, .AngleBracketRightEqual, @@ -1169,7 +1172,6 @@ test "operators" { .Tilde, .Hash, .HashHash, - .Nl, }, ); } @@ -1192,8 +1194,6 @@ test "keywords" { .Keyword_continue, .Keyword_default, .Keyword_do, - .Nl, - .Keyword_double, .Keyword_else, .Keyword_enum, @@ -1203,8 +1203,6 @@ test "keywords" { .Keyword_goto, .Keyword_if, .Keyword_int, - .Nl, - .Keyword_long, .Keyword_register, .Keyword_return, @@ -1212,8 +1210,6 @@ test "keywords" { .Keyword_signed, .Keyword_sizeof, .Keyword_static, - .Nl, - .Keyword_struct, .Keyword_switch, .Keyword_typedef, @@ -1221,8 +1217,6 @@ test "keywords" { .Keyword_unsigned, .Keyword_void, .Keyword_volatile, - .Nl, - .Keyword_while, .Keyword_bool, .Keyword_complex, @@ -1230,22 +1224,19 @@ test "keywords" { .Keyword_inline, .Keyword_restrict, .Keyword_alignas, - .Nl, - .Keyword_alignof, .Keyword_atomic, .Keyword_generic, .Keyword_noreturn, .Keyword_static_assert, .Keyword_thread_local, - .Nl, }); } test "preprocessor keywords" { expectTokens( \\#include - \\#define + \\#define #include <1 \\#ifdef \\#ifndef \\#error @@ -1258,6 +1249,10 @@ test "preprocessor keywords" { .Nl, .Hash, .Keyword_define, + .Hash, + .Identifier, + .AngleBracketLeft, + .{ .IntegerLiteral = .None }, .Nl, .Hash, .Keyword_ifdef, From a5d1fb1e49891c70fd384e1cf38e9d2f4eac6ee9 Mon Sep 17 00:00:00 2001 From: Vexu Date: Sat, 4 Jan 2020 11:23:19 +0200 Subject: [PATCH 08/30] std-c tokenizer line continuation, tests and fixes --- lib/std/c/tokenizer.zig | 141 ++++++++++++++++++++++++++++++++++++++-- 1 file changed, 135 insertions(+), 6 deletions(-) diff --git a/lib/std/c/tokenizer.zig b/lib/std/c/tokenizer.zig index 58f1e0a153..a27a39e6db 100644 --- a/lib/std/c/tokenizer.zig +++ b/lib/std/c/tokenizer.zig @@ -265,13 +265,17 @@ pub const Tokenizer = struct { var state: enum { Start, Cr, + BackSlash, + BackSlashCr, u, u8, U, L, StringLiteral, + CharLiteralStart, CharLiteral, EscapeSequence, + CrEscape, OctalEscape, HexEscape, UnicodeEscape, @@ -344,7 +348,7 @@ pub const Tokenizer = struct { }, '\'' => { result.id = .{ .CharLiteral = .None }; - state = .CharLiteral; + state = .CharLiteralStart; }, 'u' => { state = .u; @@ -464,6 +468,9 @@ pub const Tokenizer = struct { '1'...'9' => { state = .IntegerLiteral; }, + '\\' => { + state = .BackSlash; + }, else => { result.start = self.index + 1; }, @@ -480,13 +487,34 @@ pub const Tokenizer = struct { break; }, }, + .BackSlash => switch (c) { + '\n' => { + state = .Start; + }, + '\r' => { + state = .BackSlashCr; + }, + else => { + result.id = .Invalid; + break; + }, + }, + .BackSlashCr => switch (c) { + '\n' => { + state = .Start; + }, + else => { + result.id = .Invalid; + break; + }, + }, .u => switch (c) { '8' => { state = .u8; }, '\'' => { result.id = .{ .CharLiteral = .Utf16 }; - state = .CharLiteral; + state = .CharLiteralStart; }, '\"' => { result.id = .{ .StringLiteral = .Utf16 }; @@ -508,7 +536,7 @@ pub const Tokenizer = struct { .U => switch (c) { '\'' => { result.id = .{ .CharLiteral = .Utf32 }; - state = .CharLiteral; + state = .CharLiteralStart; }, '\"' => { result.id = .{ .StringLiteral = .Utf32 }; @@ -521,7 +549,7 @@ pub const Tokenizer = struct { .L => switch (c) { '\'' => { result.id = .{ .CharLiteral = .Wide }; - state = .CharLiteral; + state = .CharLiteralStart; }, '\"' => { result.id = .{ .StringLiteral = .Wide }; @@ -546,7 +574,7 @@ pub const Tokenizer = struct { }, else => {}, }, - .CharLiteral => switch (c) { + .CharLiteralStart => switch (c) { '\\' => { string = false; state = .EscapeSequence; @@ -555,10 +583,32 @@ pub const Tokenizer = struct { result.id = .Invalid; break; }, + else => { + state = .CharLiteral; + }, + }, + .CharLiteral => switch (c) { + '\\' => { + string = false; + state = .EscapeSequence; + }, + '\'' => { + self.index += 1; + break; + }, + '\n' => { + result.id = .Invalid; + break; + }, else => {}, }, .EscapeSequence => switch (c) { - '\'', '"', '?', '\\', 'a', 'b', 'f', 'n', 'r', 't', 'v' => {}, + '\'', '"', '?', '\\', 'a', 'b', 'f', 'n', 'r', 't', 'v', '\n' => { + state = if (string) .StringLiteral else .CharLiteral; + }, + '\r' => { + state = .CrEscape; + }, '0'...'7' => { counter = 1; state = .OctalEscape; @@ -579,6 +629,15 @@ pub const Tokenizer = struct { break; }, }, + .CrEscape => switch (c) { + '\n' => { + state = if (string) .StringLiteral else .CharLiteral; + }, + else => { + result.id = .Invalid; + break; + }, + }, .OctalEscape => switch (c) { '0'...'7' => { counter += 1; @@ -1056,10 +1115,14 @@ pub const Tokenizer = struct { }, .Cr, + .BackSlash, + .BackSlashCr, .Period2, .StringLiteral, + .CharLiteralStart, .CharLiteral, .EscapeSequence, + .CrEscape, .OctalEscape, .HexEscape, .UnicodeEscape, @@ -1269,6 +1332,72 @@ test "preprocessor keywords" { }); } +test "line continuation" { + expectTokens( + \\#define foo \ + \\ bar + \\"foo\ + \\ bar" + \\ + , &[_]Token.Id{ + .Hash, + .Keyword_define, + .Identifier, + .Identifier, + .Nl, + .{ .StringLiteral = .None }, + }); +} + +test "string prefix" { + expectTokens( + \\"foo" + \\u"foo" + \\u8"foo" + \\U"foo" + \\L"foo" + \\'foo' + \\u'foo' + \\U'foo' + \\L'foo' + \\ + , &[_]Token.Id{ + .{ .StringLiteral = .None }, + .{ .StringLiteral = .Utf16 }, + .{ .StringLiteral = .Utf8 }, + .{ .StringLiteral = .Utf32 }, + .{ .StringLiteral = .Wide }, + .{ .CharLiteral = .None }, + .{ .CharLiteral = .Utf16 }, + .{ .CharLiteral = .Utf32 }, + .{ .CharLiteral = .Wide }, + }); +} + +test "num suffixes" { + expectTokens( + \\ 1.0f 1.0L 1.0 .0 1. + \\ 0l 0lu 0ll 0llu 0 + \\ 1u 1ul 1ull 1 + \\ + , &[_]Token.Id{ + .{ .FloatLiteral = .F }, + .{ .FloatLiteral = .L }, + .{ .FloatLiteral = .None }, + .{ .FloatLiteral = .None }, + .{ .FloatLiteral = .None }, + .{ .IntegerLiteral = .L }, + .{ .IntegerLiteral = .LU }, + .{ .IntegerLiteral = .LL }, + .{ .IntegerLiteral = .LLU }, + .{ .IntegerLiteral = .None }, + .{ .IntegerLiteral = .U }, + .{ .IntegerLiteral = .LU }, + .{ .IntegerLiteral = .LLU }, + .{ .IntegerLiteral = .None }, + }); +} + fn expectTokens(source: []const u8, expected_tokens: []const Token.Id) void { var tokenizer = Tokenizer{ .source = &Source{ From 2183c4bb444d80921783fc5a26d217c0e4a68d31 Mon Sep 17 00:00:00 2001 From: Vexu Date: Sat, 4 Jan 2020 13:16:37 +0200 Subject: [PATCH 09/30] std-c tokenizer string concatenation --- lib/std/c/tokenizer.zig | 24 +++++++++++++++++------- 1 file changed, 17 insertions(+), 7 deletions(-) diff --git a/lib/std/c/tokenizer.zig b/lib/std/c/tokenizer.zig index a27a39e6db..1d06c6a523 100644 --- a/lib/std/c/tokenizer.zig +++ b/lib/std/c/tokenizer.zig @@ -272,6 +272,7 @@ pub const Tokenizer = struct { U, L, StringLiteral, + AfterStringLiteral, CharLiteralStart, CharLiteral, EscapeSequence, @@ -565,8 +566,7 @@ pub const Tokenizer = struct { state = .EscapeSequence; }, '"' => { - self.index += 1; - break; + state = .AfterStringLiteral; }, '\n', '\r' => { result.id = .Invalid; @@ -574,6 +574,15 @@ pub const Tokenizer = struct { }, else => {}, }, + .AfterStringLiteral => switch (c) { + '"' => { + state = .StringLiteral; + }, + '\n'...'\r', ' ' => {}, + else => { + break; + }, + }, .CharLiteralStart => switch (c) { '\\' => { string = false; @@ -1109,6 +1118,7 @@ pub const Tokenizer = struct { } } else if (self.index == self.source.buffer.len) { switch (state) { + .AfterStringLiteral, .Start => {}, .u, .u8, .U, .L, .Identifier => { result.id = Token.getKeyword(self.source.buffer[result.start..self.index], self.prev_tok_id == .Hash and !self.pp_directive) orelse .Identifier; @@ -1351,11 +1361,11 @@ test "line continuation" { test "string prefix" { expectTokens( - \\"foo" - \\u"foo" - \\u8"foo" - \\U"foo" - \\L"foo" + \\"foo" "bar" + \\u"foo" "bar" + \\u8"foo" "bar" + \\U"foo" "bar" + \\L"foo" "bar" \\'foo' \\u'foo' \\U'foo' From e1b01d32f0869bae9b770a613f3f02fbda6c6556 Mon Sep 17 00:00:00 2001 From: Vexu Date: Sat, 4 Jan 2020 14:34:00 +0200 Subject: [PATCH 10/30] std-c ast base --- lib/std/c.zig | 5 ++++ lib/std/c/ast.zig | 66 +++++++++++++++++++++++++++++++++++++++++ lib/std/c/tokenizer.zig | 3 ++ 3 files changed, 74 insertions(+) create mode 100644 lib/std/c/ast.zig diff --git a/lib/std/c.zig b/lib/std/c.zig index 684758286b..b0d0e50079 100644 --- a/lib/std/c.zig +++ b/lib/std/c.zig @@ -2,6 +2,11 @@ const builtin = @import("builtin"); const std = @import("std"); const page_size = std.mem.page_size; +const tokenizer = @import("c/tokenizer.zig"); +pub const Token = tokenizer.Token; +pub const Tokenizer = tokenizer.Tokenizer; +pub const ast = @import("c/ast.zig"); + pub usingnamespace @import("os/bits.zig"); pub usingnamespace switch (builtin.os) { diff --git a/lib/std/c/ast.zig b/lib/std/c/ast.zig new file mode 100644 index 0000000000..bc992bc549 --- /dev/null +++ b/lib/std/c/ast.zig @@ -0,0 +1,66 @@ +const std = @import("std.zig"); +const SegmentedList = std.SegmentedList; +const Token = std.c.Token; +const Source = std.c.tokenizer.Source; + +pub const TokenIndex = usize; + +pub const Tree = struct { + tokens: TokenList, + sources: SourceList, + root_node: *Node.Root, + arena_allocator: std.heap.ArenaAllocator, + errors: ErrorList, + + pub const SourceList = SegmentedList(Source, 4); + pub const TokenList = Source.TokenList; + pub const ErrorList = SegmentedList(Error, 0); + + pub fn deinit(self: *Tree) void { + // Here we copy the arena allocator into stack memory, because + // otherwise it would destroy itself while it was still working. + var arena_allocator = self.arena_allocator; + arena_allocator.deinit(); + // self is destroyed + } +}; + +pub const Error = union(enum) { + InvalidToken: InvalidToken, + + pub fn render(self: *const Error, tokens: *Tree.TokenList, stream: var) !void { + switch (self.*) { + .InvalidToken => |*x| return x.render(tokens, stream), + } + } + + pub fn loc(self: *const Error) TokenIndex { + switch (self.*) { + .InvalidToken => |x| return x.token, + } + } + + pub const InvalidToken = SingleTokenError("Invalid token '{}'"); + + fn SingleTokenError(comptime msg: []const u8) type { + return struct { + token: TokenIndex, + + pub fn render(self: *const @This(), tokens: *Tree.TokenList, stream: var) !void { + const actual_token = tokens.at(self.token); + return stream.print(msg, .{actual_token.id.symbol()}); + } + }; + } +}; + +pub const Root = struct { + decls: DeclList, + eof_token: TokenIndex, + + pub const DeclList = SegmentedList(*Decl, 4); +}; + +pub const Decl = struct { + +}; \ No newline at end of file diff --git a/lib/std/c/tokenizer.zig b/lib/std/c/tokenizer.zig index 1d06c6a523..b8e515bec9 100644 --- a/lib/std/c/tokenizer.zig +++ b/lib/std/c/tokenizer.zig @@ -4,6 +4,9 @@ const mem = std.mem; pub const Source = struct { buffer: []const u8, file_name: []const u8, + tokens: TokenList, + + pub const TokenList = SegmentedList(Token, 64); }; pub const Token = struct { From 73a53fa263dd514940715b98e64e96764af91d46 Mon Sep 17 00:00:00 2001 From: Vexu Date: Sat, 4 Jan 2020 20:37:04 +0200 Subject: [PATCH 11/30] std-c outline parser --- lib/std/c.zig | 1 + lib/std/c/parse.zig | 296 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 297 insertions(+) create mode 100644 lib/std/c/parse.zig diff --git a/lib/std/c.zig b/lib/std/c.zig index b0d0e50079..3f339f9d18 100644 --- a/lib/std/c.zig +++ b/lib/std/c.zig @@ -5,6 +5,7 @@ const page_size = std.mem.page_size; const tokenizer = @import("c/tokenizer.zig"); pub const Token = tokenizer.Token; pub const Tokenizer = tokenizer.Tokenizer; +pub const parse = @import("c/parse.zig").parse; pub const ast = @import("c/ast.zig"); pub usingnamespace @import("os/bits.zig"); diff --git a/lib/std/c/parse.zig b/lib/std/c/parse.zig new file mode 100644 index 0000000000..d5cadf7412 --- /dev/null +++ b/lib/std/c/parse.zig @@ -0,0 +1,296 @@ +const std = @import("../std.zig"); +const assert = std.debug.assert; +const Allocator = std.mem.Allocator; +const ast = std.c.ast; +const Tree = ast.Tree; +const TokenIndex = ast.TokenIndex; +const Token = std.c.Token; +const TokenIterator = ast.Tree.TokenList.Iterator; + +pub const Error = error{ParseError} || Allocator.Error; + +/// Result should be freed with tree.deinit() when there are +/// no more references to any of the tokens or nodes. +pub fn parse(allocator: *Allocator, source: []const u8) !*Tree { + const tree = blk: { + // This block looks unnecessary, but is a "foot-shield" to prevent the SegmentedLists + // from being initialized with a pointer to this `arena`, which is created on + // the stack. Following code should instead refer to `&tree.arena_allocator`, a + // pointer to data which lives safely on the heap and will outlive `parse`. + var arena = std.heap.ArenaAllocator.init(allocator); + errdefer arena.deinit(); + const tree = try arena.allocator.create(ast.Tree); + tree.* = .{ + .root_node = undefined, + .arena_allocator = arena, + .tokens = undefined, + .sources = undefined, + }; + break :blk tree; + }; + errdefer tree.deinit(); + const arena = &tree.arena_allocator.allocator; + + tree.tokens = ast.Tree.TokenList.init(arena); + tree.sources = ast.Tree.SourceList.init(arena); + + var tokenizer = std.zig.Tokenizer.init(source); + while (true) { + const tree_token = try tree.tokens.addOne(); + tree_token.* = tokenizer.next(); + if (tree_token.id == .Eof) break; + } + // TODO preprocess here + var it = tree.tokens.iterator(0); + + while (true) { + const tok = it.peek().?.id; + switch (id) { + .LineComment, + .MultiLineComment, + => { + _ = it.next(); + }, + else => break, + } + } + + tree.root_node = try parseRoot(arena, &it, tree); + return tree; +} + +/// Root <- ExternalDeclaration* eof +fn parseRoot(arena: *Allocator, it: *TokenIterator, tree: *Tree) Allocator.Error!*Node { + const node = try arena.create(ast.Root); + node.* = .{ + .decls = ast.Node.DeclList.init(arena), + .eof_token = undefined, + }; + while (parseExternalDeclarations(arena, it, tree) catch |err| switch (err) { + error.OutOfMemory => return error.OutOfMemory, + error.ParseError => return node, + }) |decl| { + try node.decls.push(decl); + } + node.eof_token = eatToken(it, .Eof) orelse { + try tree.errors.push(.{ + .ExpectedDecl = .{ .token = it.index }, + }); + return node; + }; + return node; +} + +/// ExternalDeclaration +/// <- Declaration +/// / DeclarationSpecifiers Declarator Declaration* CompoundStmt +fn parseExternalDeclarations(arena: *Allocator, it: *TokenIterator, tree: *Tree) !?*Node { + if (try parseDeclaration(arena, it, tree)) |decl| {} + return null; +} + +/// Declaration +/// <- DeclarationSpecifiers (Declarator (EQUAL Initializer)?)* SEMICOLON +/// \ StaticAssertDeclaration +fn parseDeclaration(arena: *Allocator, it: *TokenIterator, tree: *Tree) !?*Node {} + +/// StaticAssertDeclaration <- Keyword_static_assert LPAREN ConstExpr COMMA STRINGLITERAL RPAREN SEMICOLON +fn parseStaticAssertDeclaration(arena: *Allocator, it: *TokenIterator, tree: *Tree) !?*Node {} + +/// DeclarationSpecifiers +/// <- StorageClassSpecifier DeclarationSpecifiers? +/// / TypeSpecifier DeclarationSpecifiers? +/// / TypeQualifier DeclarationSpecifiers? +/// / FunctionSpecifier DeclarationSpecifiers? +/// / AlignmentSpecifier DeclarationSpecifiers? +fn parseDeclarationSpecifiers(arena: *Allocator, it: *TokenIterator, tree: *Tree) !*Node {} + +/// StorageClassSpecifier +/// <- Keyword_typedef / Keyword_extern / Keyword_static / Keyword_thread_local / Keyword_auto / Keyword_register +fn parseStorageClassSpecifier(arena: *Allocator, it: *TokenIterator, tree: *Tree) !*Node {} + +/// TypeSpecifier +/// <- Keyword_void / Keyword_char / Keyword_short / Keyword_int / Keyword_long / Keyword_float / Keyword_double +/// / Keyword_signed / Keyword_unsigned / Keyword_bool / Keyword_complex / Keyword_imaginary / +/// / Keyword_atomic LPAREN TypeName RPAREN +/// / EnumSpecifier +/// / RecordSpecifier +/// / IDENTIFIER // typedef name +fn parseTypeSpecifier(arena: *Allocator, it: *TokenIterator, tree: *Tree) !*Node {} + +/// TypeQualifier <- Keyword_const / Keyword_restrict / Keyword_volatile / Keyword_atomic +fn parseTypeQualifier(arena: *Allocator, it: *TokenIterator, tree: *Tree) !*Node {} + +/// FunctionSpecifier <- Keyword_inline / Keyword_noreturn +fn parseFunctionSpecifier(arena: *Allocator, it: *TokenIterator, tree: *Tree) !*Node {} + +/// AlignmentSpecifier <- Keyword_alignas LPAREN (TypeName / ConstExpr) RPAREN +fn parseAlignmentSpecifier(arena: *Allocator, it: *TokenIterator, tree: *Tree) !*Node {} + +/// EnumSpecifier <- Keyword_enum IDENTIFIER? (LBRACE EnumField RBRACE)? +fn parseEnumSpecifier(arena: *Allocator, it: *TokenIterator, tree: *Tree) !*Node {} + +/// EnumField <- IDENTIFIER (EQUAL ConstExpr)? (COMMA EnumField) COMMA? +fn parseEnumField(arena: *Allocator, it: *TokenIterator, tree: *Tree) !*Node {} + +/// RecordSpecifier <- (Keyword_struct / Keyword_union) IDENTIFIER? (LBRACE RecordField+ RBRACE)? +fn parseRecordSpecifier(arena: *Allocator, it: *TokenIterator, tree: *Tree) !*Node {} + +/// RecordField +/// <- SpecifierQualifer (RecordDeclarator (COMMA RecordDeclarator))? SEMICOLON +/// \ StaticAssertDeclaration +fn parseRecordField(arena: *Allocator, it: *TokenIterator, tree: *Tree) !*Node {} + +/// TypeName +/// <- SpecifierQualifer AbstractDeclarator? +fn parseTypeName(arena: *Allocator, it: *TokenIterator, tree: *Tree) !*Node {} + +/// SpecifierQualifer +/// <- TypeSpecifier SpecifierQualifer? +/// / TypeQualifier SpecifierQualifer? +fn parseSpecifierQualifer(arena: *Allocator, it: *TokenIterator, tree: *Tree) !*Node {} + +/// RecordDeclarator <- Declarator? (COLON ConstExpr)? +fn parseRecordDeclarator(arena: *Allocator, it: *TokenIterator, tree: *Tree) !*Node {} + +/// Declarator <- Pointer? DirectDeclarator +fn parseDeclarator(arena: *Allocator, it: *TokenIterator, tree: *Tree) !*Node {} + +/// Pointer <- ASTERISK TypeQualifier* Pointer? +fn parsePointer(arena: *Allocator, it: *TokenIterator, tree: *Tree) !*Node {} + +/// DirectDeclarator +/// <- IDENTIFIER +/// / LPAREN Declarator RPAREN +/// / DirectDeclarator LBRACKET (ASTERISK / BracketDeclarator)? RBRACKET +/// / DirectDeclarator LPAREN (ParamDecl (COMMA ParamDecl)* (COMMA ELLIPSIS)?)? RPAREN +fn parseDirectDeclarator(arena: *Allocator, it: *TokenIterator, tree: *Tree) !*Node {} + +/// BracketDeclarator +/// <- Keyword_static TypeQualifier* AssignmentExpr +/// / TypeQualifier+ (ASTERISK / Keyword_static AssignmentExpr) +/// / TypeQualifier+ AssignmentExpr? +/// / AssignmentExpr +fn parseBracketDeclarator(arena: *Allocator, it: *TokenIterator, tree: *Tree) !*Node {} + +/// ParamDecl <- DeclarationSpecifiers (Declarator / AbstractDeclarator) +fn parseParamDecl(arena: *Allocator, it: *TokenIterator, tree: *Tree) !*Node {} + +/// AbstractDeclarator <- Pointer? DirectAbstractDeclarator? +fn parseAbstractDeclarator(arena: *Allocator, it: *TokenIterator, tree: *Tree) !*Node {} + +/// DirectAbstractDeclarator +/// <- IDENTIFIER +/// / LPAREN DirectAbstractDeclarator RPAREN +/// / DirectAbstractDeclarator? LBRACKET (ASTERISK / BracketDeclarator)? RBRACKET +/// / DirectAbstractDeclarator? LPAREN (ParamDecl (COMMA ParamDecl)* (COMMA ELLIPSIS)?)? RPAREN +fn parseDirectAbstractDeclarator(arena: *Allocator, it: *TokenIterator, tree: *Tree) !*Node {} + +/// Expr <- AssignmentExpr (COMMA Expr)* +fn parseExpr(arena: *Allocator, it: *TokenIterator, tree: *Tree) !*Node {} + +/// AssignmentExpr +/// <- ConditionalExpr +/// / UnaryExpr (EQUAL / ASTERISKEQUAL / SLASHEQUAL / PERCENTEQUAL / PLUSEQUAL / MINUSEQUA / +/// / ANGLEBRACKETANGLEBRACKETLEFTEQUAL / ANGLEBRACKETANGLEBRACKETRIGHTEQUAL / +/// / AMPERSANDEQUAL / CARETEQUAL / PIPEEQUAL) AssignmentExpr +fn parseAssignmentExpr(arena: *Allocator, it: *TokenIterator, tree: *Tree) !*Node {} + +/// ConstExpr <- ConditionalExpr +/// ConditionalExpr <- LogicalOrExpr (QUESTIONMARK Expr COLON ConditionalExpr)? +fn parseConditionalExpr(arena: *Allocator, it: *TokenIterator, tree: *Tree) !*Node {} + +/// LogicalOrExpr <- LogicalAndExpr (PIPEPIPE LogicalOrExpr)* +fn parseLogicalOrExpr(arena: *Allocator, it: *TokenIterator, tree: *Tree) !*Node {} + +/// LogicalAndExpr <- BinOrExpr (AMPERSANDAMPERSAND LogicalAndExpr)* +fn parseLogicalAndExpr(arena: *Allocator, it: *TokenIterator, tree: *Tree) !*Node {} + +/// BinOrExpr <- BinXorExpr (PIPE BinOrExpr)* +fn parseBinOrExpr(arena: *Allocator, it: *TokenIterator, tree: *Tree) !*Node {} + +/// BinXorExpr <- BinAndExpr (CARET BinXorExpr)* +fn parseBinXorExpr(arena: *Allocator, it: *TokenIterator, tree: *Tree) !*Node {} + +/// BinAndExpr <- EqualityExpr (AMPERSAND BinAndExpr)* +fn parseBinAndExpr(arena: *Allocator, it: *TokenIterator, tree: *Tree) !*Node {} + +/// EqualityExpr <- ComparisionExpr ((EQUALEQUAL / BANGEQUAL) EqualityExpr)* +fn parseEqualityExpr(arena: *Allocator, it: *TokenIterator, tree: *Tree) !*Node {} + +/// ComparisionExpr <- ShiftExpr (ANGLEBRACKETLEFT / ANGLEBRACKETLEFTEQUAL /ANGLEBRACKETRIGHT / ANGLEBRACKETRIGHTEQUAL) ComparisionExpr)* +fn parseComparisionExpr(arena: *Allocator, it: *TokenIterator, tree: *Tree) !*Node {} + +/// ShiftExpr <- AdditiveExpr (ANGLEBRACKETANGLEBRACKETLEFT / ANGLEBRACKETANGLEBRACKETRIGHT) ShiftExpr)* +fn parseShiftExpr(arena: *Allocator, it: *TokenIterator, tree: *Tree) !*Node {} + +/// AdditiveExpr <- MultiplicativeExpr (PLUS / MINUS) AdditiveExpr)* +fn parseAdditiveExpr(arena: *Allocator, it: *TokenIterator, tree: *Tree) !*Node {} + +/// MultiplicativeExpr <- UnaryExpr (ASTERISK / SLASH / PERCENT) MultiplicativeExpr)* +fn parseMultiplicativeExpr(arena: *Allocator, it: *TokenIterator, tree: *Tree) !*Node {} + +/// UnaryExpr +/// <- LPAREN TypeName RPAREN UnaryExpr +/// / Keyword_sizeof LAPERN TypeName RPAREN +/// / Keyword_sizeof UnaryExpr +/// / Keyword_alignof LAPERN TypeName RPAREN +/// / (AMPERSAND / ASTERISK / PLUS / PLUSPLUS / MINUS / MINUSMINUS / TILDE / BANG) UnaryExpr +/// / PrimaryExpr PostFixExpr* +fn parseUnaryExpr(arena: *Allocator, it: *TokenIterator, tree: *Tree) !*Node {} + +/// PrimaryExpr +/// <- IDENTIFIER +/// / INTEGERLITERAL / FLITERAL / STRINGLITERAL / CHARLITERAL +/// / LPAREN Expr RPAREN +/// / Keyword_generic LPAREN AssignmentExpr (COMMA Generic)+ RPAREN +fn parsePrimaryExpr(arena: *Allocator, it: *TokenIterator, tree: *Tree) !*Node {} + +/// Generic +/// <- TypeName COLON AssignmentExpr +/// / Keyword_default COLON AssignmentExpr +fn parseGeneric(arena: *Allocator, it: *TokenIterator, tree: *Tree) !*Node {} + +/// PostFixExpr +/// <- LPAREN TypeName RPAREN LBRACE Initializers RBRACE +/// / LBRACKET Expr RBRACKET +/// / LPAREN (AssignmentExpr (COMMA AssignmentExpr)*)? RPAREN +/// / (PERIOD / ARROW) IDENTIFIER +/// / (PLUSPLUS / MINUSMINUS) +fn parsePostFixExpr(arena: *Allocator, it: *TokenIterator, tree: *Tree) !*Node {} + +/// Initializers <- ((Designator+ EQUAL)? Initializer COMMA)* (Designator+ EQUAL)? Initializer COMMA? +fn parseInitializers(arena: *Allocator, it: *TokenIterator, tree: *Tree) !*Node {} + +/// Initializer +/// <- LBRACE Initializers RBRACE +/// / AssignmentExpr +fn parseInitializer(arena: *Allocator, it: *TokenIterator, tree: *Tree) !*Node {} + +/// Designator +/// <- LBRACKET Initializers RBRACKET +/// / PERIOD IDENTIFIER +fn parseDesignator(arena: *Allocator, it: *TokenIterator, tree: *Tree) !*Node {} + +/// CompoundStmt <- LBRACE (Declaration / Stmt)* RBRACE +fn parseCompoundStmt(arena: *Allocator, it: *TokenIterator, tree: *Tree) !*Node {} + +/// Stmt +/// <- CompoundStmt +/// / Keyword_if LPAREN Expr RPAREN Stmt (Keyword_ELSE Stmt)? +/// / Keyword_switch LPAREN Expr RPAREN Stmt +/// / Keyword_while LPAREN Expr RPAREN Stmt +/// / Keyword_do statement Keyword_while LPAREN Expr RPAREN SEMICOLON +/// / Keyword_for LPAREN (Declaration / ExprStmt) ExprStmt Expr? RPAREN Stmt +/// / Keyword_default COLON Stmt +/// / Keyword_case ConstExpr COLON Stmt +/// / Keyword_goto IDENTIFIER SEMICOLON +/// / Keyword_continue SEMICOLON +/// / Keyword_break SEMICOLON +/// / Keyword_return Expr? SEMICOLON +/// / IDENTIFIER COLON Stmt +/// / ExprStmt +fn parseStmt(arena: *Allocator, it: *TokenIterator, tree: *Tree) !*Node {} + +/// ExprStmt <- Expr? SEMICOLON +fn parseExprStmt(arena: *Allocator, it: *TokenIterator, tree: *Tree) !*Node {} From a20c0b31de2b6d8de568707429754a6d39d3346d Mon Sep 17 00:00:00 2001 From: Vexu Date: Sat, 4 Jan 2020 21:26:43 +0200 Subject: [PATCH 12/30] std-c parser and ast organization --- lib/std/c/ast.zig | 50 ++++- lib/std/c/parse.zig | 492 +++++++++++++++++++++++++++----------------- 2 files changed, 345 insertions(+), 197 deletions(-) diff --git a/lib/std/c/ast.zig b/lib/std/c/ast.zig index bc992bc549..7a9d06af32 100644 --- a/lib/std/c/ast.zig +++ b/lib/std/c/ast.zig @@ -54,13 +54,47 @@ pub const Error = union(enum) { } }; -pub const Root = struct { - decls: DeclList, - eof_token: TokenIndex, +pub const Node = struct { + id: Id, - pub const DeclList = SegmentedList(*Decl, 4); + pub const Id = enum { + Root, + JumpStmt, + ExprStmt, + Label, + }; + + pub const Root = struct { + base: Node, + decls: DeclList, + eof: TokenIndex, + + pub const DeclList = SegmentedList(*Node, 4); + }; + + pub const JumpStmt = struct { + base: Node = Node{ .id = .JumpStmt}, + ltoken: TokenIndex, + kind: Kind, + semicolon: TokenIndex, + + pub const Kind = union(enum) { + Break, + Continue, + Return: ?*Node, + Goto: TokenIndex, + }; + }; + + pub const ExprStmt = struct { + base: Node = Node{ .id = .ExprStmt}, + expr: ?*Node, + semicolon: TokenIndex, + }; + + pub const Label = struct { + base: Node = Node{ .id = .Label}, + identifier: TokenIndex, + colon: TokenIndex, + }; }; - -pub const Decl = struct { - -}; \ No newline at end of file diff --git a/lib/std/c/parse.zig b/lib/std/c/parse.zig index d5cadf7412..527e03a48b 100644 --- a/lib/std/c/parse.zig +++ b/lib/std/c/parse.zig @@ -55,242 +55,356 @@ pub fn parse(allocator: *Allocator, source: []const u8) !*Tree { } } - tree.root_node = try parseRoot(arena, &it, tree); + var parser = Parser{ + .arena = arena, + .it = &it, + .tree = tree, + }; + + tree.root_node = try parser.root(); return tree; } -/// Root <- ExternalDeclaration* eof -fn parseRoot(arena: *Allocator, it: *TokenIterator, tree: *Tree) Allocator.Error!*Node { - const node = try arena.create(ast.Root); - node.* = .{ - .decls = ast.Node.DeclList.init(arena), - .eof_token = undefined, - }; - while (parseExternalDeclarations(arena, it, tree) catch |err| switch (err) { - error.OutOfMemory => return error.OutOfMemory, - error.ParseError => return node, - }) |decl| { - try node.decls.push(decl); - } - node.eof_token = eatToken(it, .Eof) orelse { - try tree.errors.push(.{ - .ExpectedDecl = .{ .token = it.index }, - }); +const Parser = struct { + arena: *Allocator, + it: *TokenIterator, + tree: *Tree, + + /// Root <- ExternalDeclaration* eof + fn root(parser: *Parser) Allocator.Error!*Node { + const node = try arena.create(ast.Root); + node.* = .{ + .decls = ast.Node.DeclList.init(arena), + .eof = undefined, + }; + while (parser.externalDeclarations() catch |err| switch (err) { + error.OutOfMemory => return error.OutOfMemory, + error.ParseError => return node, + }) |decl| { + try node.decls.push(decl); + } + node.eof = eatToken(it, .Eof) orelse { + try tree.errors.push(.{ + .ExpectedDecl = .{ .token = it.index }, + }); + return node; + }; return node; - }; - return node; -} + } -/// ExternalDeclaration -/// <- Declaration -/// / DeclarationSpecifiers Declarator Declaration* CompoundStmt -fn parseExternalDeclarations(arena: *Allocator, it: *TokenIterator, tree: *Tree) !?*Node { - if (try parseDeclaration(arena, it, tree)) |decl| {} - return null; -} + /// ExternalDeclaration + /// <- Declaration + /// / DeclarationSpecifiers Declarator Declaration* CompoundStmt + fn externalDeclarations(parser: *Parser) !?*Node { + if (try Declaration(parser)) |decl| {} + return null; + } -/// Declaration -/// <- DeclarationSpecifiers (Declarator (EQUAL Initializer)?)* SEMICOLON -/// \ StaticAssertDeclaration -fn parseDeclaration(arena: *Allocator, it: *TokenIterator, tree: *Tree) !?*Node {} + /// Declaration + /// <- DeclarationSpecifiers (Declarator (EQUAL Initializer)?)* SEMICOLON + /// \ StaticAssertDeclaration + fn declaration(parser: *Parser) !?*Node {} -/// StaticAssertDeclaration <- Keyword_static_assert LPAREN ConstExpr COMMA STRINGLITERAL RPAREN SEMICOLON -fn parseStaticAssertDeclaration(arena: *Allocator, it: *TokenIterator, tree: *Tree) !?*Node {} + /// StaticAssertDeclaration <- Keyword_static_assert LPAREN ConstExpr COMMA STRINGLITERAL RPAREN SEMICOLON + fn staticAssertDeclaration(parser: *Parser) !?*Node {} -/// DeclarationSpecifiers -/// <- StorageClassSpecifier DeclarationSpecifiers? -/// / TypeSpecifier DeclarationSpecifiers? -/// / TypeQualifier DeclarationSpecifiers? -/// / FunctionSpecifier DeclarationSpecifiers? -/// / AlignmentSpecifier DeclarationSpecifiers? -fn parseDeclarationSpecifiers(arena: *Allocator, it: *TokenIterator, tree: *Tree) !*Node {} + /// DeclarationSpecifiers + /// <- StorageClassSpecifier DeclarationSpecifiers? + /// / TypeSpecifier DeclarationSpecifiers? + /// / TypeQualifier DeclarationSpecifiers? + /// / FunctionSpecifier DeclarationSpecifiers? + /// / AlignmentSpecifier DeclarationSpecifiers? + fn declarationSpecifiers(parser: *Parser) !*Node {} -/// StorageClassSpecifier -/// <- Keyword_typedef / Keyword_extern / Keyword_static / Keyword_thread_local / Keyword_auto / Keyword_register -fn parseStorageClassSpecifier(arena: *Allocator, it: *TokenIterator, tree: *Tree) !*Node {} + /// StorageClassSpecifier + /// <- Keyword_typedef / Keyword_extern / Keyword_static / Keyword_thread_local / Keyword_auto / Keyword_register + fn storageClassSpecifier(parser: *Parser) !*Node {} -/// TypeSpecifier -/// <- Keyword_void / Keyword_char / Keyword_short / Keyword_int / Keyword_long / Keyword_float / Keyword_double -/// / Keyword_signed / Keyword_unsigned / Keyword_bool / Keyword_complex / Keyword_imaginary / -/// / Keyword_atomic LPAREN TypeName RPAREN -/// / EnumSpecifier -/// / RecordSpecifier -/// / IDENTIFIER // typedef name -fn parseTypeSpecifier(arena: *Allocator, it: *TokenIterator, tree: *Tree) !*Node {} + /// TypeSpecifier + /// <- Keyword_void / Keyword_char / Keyword_short / Keyword_int / Keyword_long / Keyword_float / Keyword_double + /// / Keyword_signed / Keyword_unsigned / Keyword_bool / Keyword_complex / Keyword_imaginary / + /// / Keyword_atomic LPAREN TypeName RPAREN + /// / EnumSpecifier + /// / RecordSpecifier + /// / IDENTIFIER // typedef name + fn typeSpecifier(parser: *Parser) !*Node {} -/// TypeQualifier <- Keyword_const / Keyword_restrict / Keyword_volatile / Keyword_atomic -fn parseTypeQualifier(arena: *Allocator, it: *TokenIterator, tree: *Tree) !*Node {} + /// TypeQualifier <- Keyword_const / Keyword_restrict / Keyword_volatile / Keyword_atomic + fn typeQualifier(parser: *Parser) !*Node {} -/// FunctionSpecifier <- Keyword_inline / Keyword_noreturn -fn parseFunctionSpecifier(arena: *Allocator, it: *TokenIterator, tree: *Tree) !*Node {} + /// FunctionSpecifier <- Keyword_inline / Keyword_noreturn + fn functionSpecifier(parser: *Parser) !*Node {} -/// AlignmentSpecifier <- Keyword_alignas LPAREN (TypeName / ConstExpr) RPAREN -fn parseAlignmentSpecifier(arena: *Allocator, it: *TokenIterator, tree: *Tree) !*Node {} + /// AlignmentSpecifier <- Keyword_alignas LPAREN (TypeName / ConstExpr) RPAREN + fn alignmentSpecifier(parser: *Parser) !*Node {} -/// EnumSpecifier <- Keyword_enum IDENTIFIER? (LBRACE EnumField RBRACE)? -fn parseEnumSpecifier(arena: *Allocator, it: *TokenIterator, tree: *Tree) !*Node {} + /// EnumSpecifier <- Keyword_enum IDENTIFIER? (LBRACE EnumField RBRACE)? + fn enumSpecifier(parser: *Parser) !*Node {} -/// EnumField <- IDENTIFIER (EQUAL ConstExpr)? (COMMA EnumField) COMMA? -fn parseEnumField(arena: *Allocator, it: *TokenIterator, tree: *Tree) !*Node {} + /// EnumField <- IDENTIFIER (EQUAL ConstExpr)? (COMMA EnumField) COMMA? + fn enumField(parser: *Parser) !*Node {} -/// RecordSpecifier <- (Keyword_struct / Keyword_union) IDENTIFIER? (LBRACE RecordField+ RBRACE)? -fn parseRecordSpecifier(arena: *Allocator, it: *TokenIterator, tree: *Tree) !*Node {} + /// RecordSpecifier <- (Keyword_struct / Keyword_union) IDENTIFIER? (LBRACE RecordField+ RBRACE)? + fn recordSpecifier(parser: *Parser) !*Node {} -/// RecordField -/// <- SpecifierQualifer (RecordDeclarator (COMMA RecordDeclarator))? SEMICOLON -/// \ StaticAssertDeclaration -fn parseRecordField(arena: *Allocator, it: *TokenIterator, tree: *Tree) !*Node {} + /// RecordField + /// <- SpecifierQualifer (RecordDeclarator (COMMA RecordDeclarator))? SEMICOLON + /// \ StaticAssertDeclaration + fn recordField(parser: *Parser) !*Node {} -/// TypeName -/// <- SpecifierQualifer AbstractDeclarator? -fn parseTypeName(arena: *Allocator, it: *TokenIterator, tree: *Tree) !*Node {} + /// TypeName + /// <- SpecifierQualifer AbstractDeclarator? + fn typeName(parser: *Parser) !*Node {} -/// SpecifierQualifer -/// <- TypeSpecifier SpecifierQualifer? -/// / TypeQualifier SpecifierQualifer? -fn parseSpecifierQualifer(arena: *Allocator, it: *TokenIterator, tree: *Tree) !*Node {} + /// SpecifierQualifer + /// <- TypeSpecifier SpecifierQualifer? + /// / TypeQualifier SpecifierQualifer? + fn specifierQualifer(parser: *Parser) !*Node {} -/// RecordDeclarator <- Declarator? (COLON ConstExpr)? -fn parseRecordDeclarator(arena: *Allocator, it: *TokenIterator, tree: *Tree) !*Node {} + /// RecordDeclarator <- Declarator? (COLON ConstExpr)? + fn recordDeclarator(parser: *Parser) !*Node {} -/// Declarator <- Pointer? DirectDeclarator -fn parseDeclarator(arena: *Allocator, it: *TokenIterator, tree: *Tree) !*Node {} + /// Declarator <- Pointer? DirectDeclarator + fn declarator(parser: *Parser) !*Node {} -/// Pointer <- ASTERISK TypeQualifier* Pointer? -fn parsePointer(arena: *Allocator, it: *TokenIterator, tree: *Tree) !*Node {} + /// Pointer <- ASTERISK TypeQualifier* Pointer? + fn pointer(parser: *Parser) !*Node {} -/// DirectDeclarator -/// <- IDENTIFIER -/// / LPAREN Declarator RPAREN -/// / DirectDeclarator LBRACKET (ASTERISK / BracketDeclarator)? RBRACKET -/// / DirectDeclarator LPAREN (ParamDecl (COMMA ParamDecl)* (COMMA ELLIPSIS)?)? RPAREN -fn parseDirectDeclarator(arena: *Allocator, it: *TokenIterator, tree: *Tree) !*Node {} + /// DirectDeclarator + /// <- IDENTIFIER + /// / LPAREN Declarator RPAREN + /// / DirectDeclarator LBRACKET (ASTERISK / BracketDeclarator)? RBRACKET + /// / DirectDeclarator LPAREN (ParamDecl (COMMA ParamDecl)* (COMMA ELLIPSIS)?)? RPAREN + fn directDeclarator(parser: *Parser) !*Node {} -/// BracketDeclarator -/// <- Keyword_static TypeQualifier* AssignmentExpr -/// / TypeQualifier+ (ASTERISK / Keyword_static AssignmentExpr) -/// / TypeQualifier+ AssignmentExpr? -/// / AssignmentExpr -fn parseBracketDeclarator(arena: *Allocator, it: *TokenIterator, tree: *Tree) !*Node {} + /// BracketDeclarator + /// <- Keyword_static TypeQualifier* AssignmentExpr + /// / TypeQualifier+ (ASTERISK / Keyword_static AssignmentExpr) + /// / TypeQualifier+ AssignmentExpr? + /// / AssignmentExpr + fn bracketDeclarator(parser: *Parser) !*Node {} -/// ParamDecl <- DeclarationSpecifiers (Declarator / AbstractDeclarator) -fn parseParamDecl(arena: *Allocator, it: *TokenIterator, tree: *Tree) !*Node {} + /// ParamDecl <- DeclarationSpecifiers (Declarator / AbstractDeclarator) + fn paramDecl(parser: *Parser) !*Node {} -/// AbstractDeclarator <- Pointer? DirectAbstractDeclarator? -fn parseAbstractDeclarator(arena: *Allocator, it: *TokenIterator, tree: *Tree) !*Node {} + /// AbstractDeclarator <- Pointer? DirectAbstractDeclarator? + fn abstractDeclarator(parser: *Parser) !*Node {} -/// DirectAbstractDeclarator -/// <- IDENTIFIER -/// / LPAREN DirectAbstractDeclarator RPAREN -/// / DirectAbstractDeclarator? LBRACKET (ASTERISK / BracketDeclarator)? RBRACKET -/// / DirectAbstractDeclarator? LPAREN (ParamDecl (COMMA ParamDecl)* (COMMA ELLIPSIS)?)? RPAREN -fn parseDirectAbstractDeclarator(arena: *Allocator, it: *TokenIterator, tree: *Tree) !*Node {} + /// DirectAbstractDeclarator + /// <- IDENTIFIER + /// / LPAREN DirectAbstractDeclarator RPAREN + /// / DirectAbstractDeclarator? LBRACKET (ASTERISK / BracketDeclarator)? RBRACKET + /// / DirectAbstractDeclarator? LPAREN (ParamDecl (COMMA ParamDecl)* (COMMA ELLIPSIS)?)? RPAREN + fn directAbstractDeclarator(parser: *Parser) !*Node {} -/// Expr <- AssignmentExpr (COMMA Expr)* -fn parseExpr(arena: *Allocator, it: *TokenIterator, tree: *Tree) !*Node {} + /// Expr <- AssignmentExpr (COMMA Expr)* + fn expr(parser: *Parser) !*Node {} -/// AssignmentExpr -/// <- ConditionalExpr -/// / UnaryExpr (EQUAL / ASTERISKEQUAL / SLASHEQUAL / PERCENTEQUAL / PLUSEQUAL / MINUSEQUA / -/// / ANGLEBRACKETANGLEBRACKETLEFTEQUAL / ANGLEBRACKETANGLEBRACKETRIGHTEQUAL / -/// / AMPERSANDEQUAL / CARETEQUAL / PIPEEQUAL) AssignmentExpr -fn parseAssignmentExpr(arena: *Allocator, it: *TokenIterator, tree: *Tree) !*Node {} + /// AssignmentExpr + /// <- ConditionalExpr // TODO recursive? + /// / UnaryExpr (EQUAL / ASTERISKEQUAL / SLASHEQUAL / PERCENTEQUAL / PLUSEQUAL / MINUSEQUA / + /// / ANGLEBRACKETANGLEBRACKETLEFTEQUAL / ANGLEBRACKETANGLEBRACKETRIGHTEQUAL / + /// / AMPERSANDEQUAL / CARETEQUAL / PIPEEQUAL) AssignmentExpr + fn assignmentExpr(parser: *Parser) !*Node {} -/// ConstExpr <- ConditionalExpr -/// ConditionalExpr <- LogicalOrExpr (QUESTIONMARK Expr COLON ConditionalExpr)? -fn parseConditionalExpr(arena: *Allocator, it: *TokenIterator, tree: *Tree) !*Node {} + /// ConstExpr <- ConditionalExpr + /// ConditionalExpr <- LogicalOrExpr (QUESTIONMARK Expr COLON ConditionalExpr)? + fn conditionalExpr(parser: *Parser) !*Node {} -/// LogicalOrExpr <- LogicalAndExpr (PIPEPIPE LogicalOrExpr)* -fn parseLogicalOrExpr(arena: *Allocator, it: *TokenIterator, tree: *Tree) !*Node {} + /// LogicalOrExpr <- LogicalAndExpr (PIPEPIPE LogicalOrExpr)* + fn logicalOrExpr(parser: *Parser) !*Node {} -/// LogicalAndExpr <- BinOrExpr (AMPERSANDAMPERSAND LogicalAndExpr)* -fn parseLogicalAndExpr(arena: *Allocator, it: *TokenIterator, tree: *Tree) !*Node {} + /// LogicalAndExpr <- BinOrExpr (AMPERSANDAMPERSAND LogicalAndExpr)* + fn logicalAndExpr(parser: *Parser) !*Node {} -/// BinOrExpr <- BinXorExpr (PIPE BinOrExpr)* -fn parseBinOrExpr(arena: *Allocator, it: *TokenIterator, tree: *Tree) !*Node {} + /// BinOrExpr <- BinXorExpr (PIPE BinOrExpr)* + fn binOrExpr(parser: *Parser) !*Node {} -/// BinXorExpr <- BinAndExpr (CARET BinXorExpr)* -fn parseBinXorExpr(arena: *Allocator, it: *TokenIterator, tree: *Tree) !*Node {} + /// BinXorExpr <- BinAndExpr (CARET BinXorExpr)* + fn binXorExpr(parser: *Parser) !*Node {} -/// BinAndExpr <- EqualityExpr (AMPERSAND BinAndExpr)* -fn parseBinAndExpr(arena: *Allocator, it: *TokenIterator, tree: *Tree) !*Node {} + /// BinAndExpr <- EqualityExpr (AMPERSAND BinAndExpr)* + fn binAndExpr(parser: *Parser) !*Node {} -/// EqualityExpr <- ComparisionExpr ((EQUALEQUAL / BANGEQUAL) EqualityExpr)* -fn parseEqualityExpr(arena: *Allocator, it: *TokenIterator, tree: *Tree) !*Node {} + /// EqualityExpr <- ComparisionExpr ((EQUALEQUAL / BANGEQUAL) EqualityExpr)* + fn equalityExpr(parser: *Parser) !*Node {} -/// ComparisionExpr <- ShiftExpr (ANGLEBRACKETLEFT / ANGLEBRACKETLEFTEQUAL /ANGLEBRACKETRIGHT / ANGLEBRACKETRIGHTEQUAL) ComparisionExpr)* -fn parseComparisionExpr(arena: *Allocator, it: *TokenIterator, tree: *Tree) !*Node {} + /// ComparisionExpr <- ShiftExpr (ANGLEBRACKETLEFT / ANGLEBRACKETLEFTEQUAL /ANGLEBRACKETRIGHT / ANGLEBRACKETRIGHTEQUAL) ComparisionExpr)* + fn comparisionExpr(parser: *Parser) !*Node {} -/// ShiftExpr <- AdditiveExpr (ANGLEBRACKETANGLEBRACKETLEFT / ANGLEBRACKETANGLEBRACKETRIGHT) ShiftExpr)* -fn parseShiftExpr(arena: *Allocator, it: *TokenIterator, tree: *Tree) !*Node {} + /// ShiftExpr <- AdditiveExpr (ANGLEBRACKETANGLEBRACKETLEFT / ANGLEBRACKETANGLEBRACKETRIGHT) ShiftExpr)* + fn shiftExpr(parser: *Parser) !*Node {} -/// AdditiveExpr <- MultiplicativeExpr (PLUS / MINUS) AdditiveExpr)* -fn parseAdditiveExpr(arena: *Allocator, it: *TokenIterator, tree: *Tree) !*Node {} + /// AdditiveExpr <- MultiplicativeExpr (PLUS / MINUS) AdditiveExpr)* + fn additiveExpr(parser: *Parser) !*Node {} -/// MultiplicativeExpr <- UnaryExpr (ASTERISK / SLASH / PERCENT) MultiplicativeExpr)* -fn parseMultiplicativeExpr(arena: *Allocator, it: *TokenIterator, tree: *Tree) !*Node {} + /// MultiplicativeExpr <- UnaryExpr (ASTERISK / SLASH / PERCENT) MultiplicativeExpr)* + fn multiplicativeExpr(parser: *Parser) !*Node {} -/// UnaryExpr -/// <- LPAREN TypeName RPAREN UnaryExpr -/// / Keyword_sizeof LAPERN TypeName RPAREN -/// / Keyword_sizeof UnaryExpr -/// / Keyword_alignof LAPERN TypeName RPAREN -/// / (AMPERSAND / ASTERISK / PLUS / PLUSPLUS / MINUS / MINUSMINUS / TILDE / BANG) UnaryExpr -/// / PrimaryExpr PostFixExpr* -fn parseUnaryExpr(arena: *Allocator, it: *TokenIterator, tree: *Tree) !*Node {} + /// UnaryExpr + /// <- LPAREN TypeName RPAREN UnaryExpr + /// / Keyword_sizeof LAPERN TypeName RPAREN + /// / Keyword_sizeof UnaryExpr + /// / Keyword_alignof LAPERN TypeName RPAREN + /// / (AMPERSAND / ASTERISK / PLUS / PLUSPLUS / MINUS / MINUSMINUS / TILDE / BANG) UnaryExpr + /// / PrimaryExpr PostFixExpr* + fn unaryExpr(parser: *Parser) !*Node {} -/// PrimaryExpr -/// <- IDENTIFIER -/// / INTEGERLITERAL / FLITERAL / STRINGLITERAL / CHARLITERAL -/// / LPAREN Expr RPAREN -/// / Keyword_generic LPAREN AssignmentExpr (COMMA Generic)+ RPAREN -fn parsePrimaryExpr(arena: *Allocator, it: *TokenIterator, tree: *Tree) !*Node {} + /// PrimaryExpr + /// <- IDENTIFIER + /// / INTEGERLITERAL / FLITERAL / STRINGLITERAL / CHARLITERAL + /// / LPAREN Expr RPAREN + /// / Keyword_generic LPAREN AssignmentExpr (COMMA Generic)+ RPAREN + fn primaryExpr(parser: *Parser) !*Node {} -/// Generic -/// <- TypeName COLON AssignmentExpr -/// / Keyword_default COLON AssignmentExpr -fn parseGeneric(arena: *Allocator, it: *TokenIterator, tree: *Tree) !*Node {} + /// Generic + /// <- TypeName COLON AssignmentExpr + /// / Keyword_default COLON AssignmentExpr + fn generic(parser: *Parser) !*Node {} -/// PostFixExpr -/// <- LPAREN TypeName RPAREN LBRACE Initializers RBRACE -/// / LBRACKET Expr RBRACKET -/// / LPAREN (AssignmentExpr (COMMA AssignmentExpr)*)? RPAREN -/// / (PERIOD / ARROW) IDENTIFIER -/// / (PLUSPLUS / MINUSMINUS) -fn parsePostFixExpr(arena: *Allocator, it: *TokenIterator, tree: *Tree) !*Node {} + /// PostFixExpr + /// <- LPAREN TypeName RPAREN LBRACE Initializers RBRACE + /// / LBRACKET Expr RBRACKET + /// / LPAREN (AssignmentExpr (COMMA AssignmentExpr)*)? RPAREN + /// / (PERIOD / ARROW) IDENTIFIER + /// / (PLUSPLUS / MINUSMINUS) + fn postFixExpr(parser: *Parser) !*Node {} -/// Initializers <- ((Designator+ EQUAL)? Initializer COMMA)* (Designator+ EQUAL)? Initializer COMMA? -fn parseInitializers(arena: *Allocator, it: *TokenIterator, tree: *Tree) !*Node {} + /// Initializers <- ((Designator+ EQUAL)? Initializer COMMA)* (Designator+ EQUAL)? Initializer COMMA? + fn initializers(parser: *Parser) !*Node {} -/// Initializer -/// <- LBRACE Initializers RBRACE -/// / AssignmentExpr -fn parseInitializer(arena: *Allocator, it: *TokenIterator, tree: *Tree) !*Node {} + /// Initializer + /// <- LBRACE Initializers RBRACE + /// / AssignmentExpr + fn initializer(parser: *Parser) !*Node {} -/// Designator -/// <- LBRACKET Initializers RBRACKET -/// / PERIOD IDENTIFIER -fn parseDesignator(arena: *Allocator, it: *TokenIterator, tree: *Tree) !*Node {} + /// Designator + /// <- LBRACKET Initializers RBRACKET + /// / PERIOD IDENTIFIER + fn designator(parser: *Parser) !*Node {} -/// CompoundStmt <- LBRACE (Declaration / Stmt)* RBRACE -fn parseCompoundStmt(arena: *Allocator, it: *TokenIterator, tree: *Tree) !*Node {} + /// CompoundStmt <- LBRACE (Declaration / Stmt)* RBRACE + fn compoundStmt(parser: *Parser) !?*Node {} -/// Stmt -/// <- CompoundStmt -/// / Keyword_if LPAREN Expr RPAREN Stmt (Keyword_ELSE Stmt)? -/// / Keyword_switch LPAREN Expr RPAREN Stmt -/// / Keyword_while LPAREN Expr RPAREN Stmt -/// / Keyword_do statement Keyword_while LPAREN Expr RPAREN SEMICOLON -/// / Keyword_for LPAREN (Declaration / ExprStmt) ExprStmt Expr? RPAREN Stmt -/// / Keyword_default COLON Stmt -/// / Keyword_case ConstExpr COLON Stmt -/// / Keyword_goto IDENTIFIER SEMICOLON -/// / Keyword_continue SEMICOLON -/// / Keyword_break SEMICOLON -/// / Keyword_return Expr? SEMICOLON -/// / IDENTIFIER COLON Stmt -/// / ExprStmt -fn parseStmt(arena: *Allocator, it: *TokenIterator, tree: *Tree) !*Node {} + /// Stmt + /// <- CompoundStmt + /// / Keyword_if LPAREN Expr RPAREN Stmt (Keyword_ELSE Stmt)? + /// / Keyword_switch LPAREN Expr RPAREN Stmt + /// / Keyword_while LPAREN Expr RPAREN Stmt + /// / Keyword_do statement Keyword_while LPAREN Expr RPAREN SEMICOLON + /// / Keyword_for LPAREN (Declaration / ExprStmt) ExprStmt Expr? RPAREN Stmt + /// / Keyword_default COLON Stmt + /// / Keyword_case ConstExpr COLON Stmt + /// / Keyword_goto IDENTIFIER SEMICOLON + /// / Keyword_continue SEMICOLON + /// / Keyword_break SEMICOLON + /// / Keyword_return Expr? SEMICOLON + /// / IDENTIFIER COLON Stmt + /// / ExprStmt + fn stmt(parser: *Parser) !?*Node { + if (parser.compoundStmt()) |node| return node; + // if (parser.eatToken(.Keyword_if)) |tok| {} + // if (parser.eatToken(.Keyword_switch)) |tok| {} + // if (parser.eatToken(.Keyword_while)) |tok| {} + // if (parser.eatToken(.Keyword_do)) |tok| {} + // if (parser.eatToken(.Keyword_for)) |tok| {} + // if (parser.eatToken(.Keyword_default)) |tok| {} + // if (parser.eatToken(.Keyword_case)) |tok| {} + if (parser.eatToken(.Keyword_goto)) |tok| { + const node = try parser.arena.create(Node.JumpStmt); + node.* = .{ + .ltoken = tok, + .kind = .Goto, + .semicolon = parser.expectToken(.Semicolon), + }; + return &node.base; + } + if (parser.eatToken(.Keyword_continue)) |tok| { + const node = try parser.arena.create(Node.JumpStmt); + node.* = .{ + .ltoken = tok, + .kind = .Continue, + .semicolon = parser.expectToken(.Semicolon), + }; + return &node.base; + } + if (parser.eatToken(.Keyword_break)) |tok| { + const node = try parser.arena.create(Node.JumpStmt); + node.* = .{ + .ltoken = tok, + .kind = .Break, + .semicolon = parser.expectToken(.Semicolon), + }; + return &node.base; + } + if (parser.eatToken(.Keyword_return)) |tok| { + const node = try parser.arena.create(Node.JumpStmt); + node.* = .{ + .ltoken = tok, + .kind = .{ .Return = try parser.expr() }, + .semicolon = parser.expectToken(.Semicolon), + }; + return &node.base; + } + if (parser.eatToken(.Identifier)) |tok| { + if (parser.eatToken(.Colon)) |col| { + const node = try parser.arena.create(Node.Label); + node.* = .{ + .identifier = tok, + .semicolon = parser.expectToken(.Colon), + }; + return &node.base; + } + putBackToken(tok); + } + if (parser.exprStmt()) |node| return node; + return null; + } -/// ExprStmt <- Expr? SEMICOLON -fn parseExprStmt(arena: *Allocator, it: *TokenIterator, tree: *Tree) !*Node {} + /// ExprStmt <- Expr? SEMICOLON + fn exprStmt(parser: *Parser) !*Node { + const node = try parser.arena.create(Node.ExprStmt); + node.* = .{ + .expr = try parser.expr(), + .semicolon = parser.expectToken(.Semicolon), + }; + return &node.base; + } + + fn eatToken(parser: *Parser, id: Token.Id) ?TokenIndex { + while (true) { + const next_tok = parser.it.next() orelse return null; + if (next_tok.id != .LineComment and next_tok.id != .MultiLineComment) { + if (next_tok.id == id) { + return parser.it.index; + } + parser.it.prev(); + return null; + } + } + } + + fn expectToken(parser: *Parser, id: Token.Id) Error!TokenIndex { + while (true) { + const next_tok = parser.it.next() orelse return error.ParseError; + if (next_tok.id != .LineComment and next_tok.id != .MultiLineComment) { + if (next_tok.id != id) { + try tree.errors.push(.{ + .ExpectedToken = .{ .token = parser.it.index, .expected_id = id }, + }); + return error.ParseError; + } + return parser.it.index; + } + } + } + + fn putBackToken(it: *TokenIterator, putting_back: TokenIndex) void { + while (true) { + const prev_tok = it.prev() orelse return; + if (next_tok.id == .LineComment or next_tok.id == .MultiLineComment) continue; + assert(it.list.at(putting_back) == prev_tok); + return; + } + } +}; From dccf1247b21ee2b15f6ec3c01c908b29b5c60f24 Mon Sep 17 00:00:00 2001 From: Vexu Date: Sat, 4 Jan 2020 22:27:05 +0200 Subject: [PATCH 13/30] std-c ifstmt compoundstmt and errors --- lib/std/c/ast.zig | 53 ++++++++++++++++-- lib/std/c/parse.zig | 47 +++++++++++++++- lib/std/c/tokenizer.zig | 121 +++++++++++++++++++++++++++++++++++++++- 3 files changed, 211 insertions(+), 10 deletions(-) diff --git a/lib/std/c/ast.zig b/lib/std/c/ast.zig index 7a9d06af32..936964fddf 100644 --- a/lib/std/c/ast.zig +++ b/lib/std/c/ast.zig @@ -26,21 +26,43 @@ pub const Tree = struct { }; pub const Error = union(enum) { - InvalidToken: InvalidToken, + InvalidToken: SingleTokenError("Invalid token '{}'"), + ExpectedToken: ExpectedToken, + ExpectedExpr: SingleTokenError("Expected expression, found '{}'"), + ExpectedStmt: SingleTokenError("Expected statement, found '{}'"), pub fn render(self: *const Error, tokens: *Tree.TokenList, stream: var) !void { switch (self.*) { .InvalidToken => |*x| return x.render(tokens, stream), + .ExpectedToken => |*x| return x.render(tokens, stream), + .ExpectedExpr => |*x| return x.render(tokens, stream), + .ExpectedStmt => |*x| return x.render(tokens, stream), } } pub fn loc(self: *const Error) TokenIndex { switch (self.*) { .InvalidToken => |x| return x.token, + .ExpectedToken => |x| return x.token, + .ExpectedExpr => |x| return x.token, + .ExpectedStmt => |x| return x.token, } } - pub const InvalidToken = SingleTokenError("Invalid token '{}'"); + pub const ExpectedToken = struct { + token: TokenIndex, + expected_id: @TagType(Token.Id), + + pub fn render(self: *const ExpectedToken, tokens: *Tree.TokenList, stream: var) !void { + const found_token = tokens.at(self.token); + if (found_token.id == .Invalid) { + return stream.print("expected '{}', found invalid bytes", .{self.expected_id.symbol()}); + } else { + const token_name = found_token.id.symbol(); + return stream.print("expected '{}', found '{}'", .{ self.expected_id.symbol(), token_name }); + } + } + }; fn SingleTokenError(comptime msg: []const u8) type { return struct { @@ -62,6 +84,8 @@ pub const Node = struct { JumpStmt, ExprStmt, Label, + CompoundStmt, + IfStmt, }; pub const Root = struct { @@ -73,7 +97,7 @@ pub const Node = struct { }; pub const JumpStmt = struct { - base: Node = Node{ .id = .JumpStmt}, + base: Node = Node{ .id = .JumpStmt }, ltoken: TokenIndex, kind: Kind, semicolon: TokenIndex, @@ -87,14 +111,33 @@ pub const Node = struct { }; pub const ExprStmt = struct { - base: Node = Node{ .id = .ExprStmt}, + base: Node = Node{ .id = .ExprStmt }, expr: ?*Node, semicolon: TokenIndex, }; pub const Label = struct { - base: Node = Node{ .id = .Label}, + base: Node = Node{ .id = .Label }, identifier: TokenIndex, colon: TokenIndex, }; + + pub const CompoundStmt = struct { + base: Node = Node{ .id = .CompoundStmt }, + lbrace: TokenIndex, + statements: StmtList, + rbrace: TokenIndex, + + pub const StmtList = Root.DeclList; + }; + + pub const IfStmt = struct { + base: Node = Node{ .id = .IfStmt }, + @"if": TokenIndex, + cond: *Node, + @"else": ?struct { + tok: TokenIndex, + stmt: *Node, + }, + }; }; diff --git a/lib/std/c/parse.zig b/lib/std/c/parse.zig index 527e03a48b..3fcfeaec52 100644 --- a/lib/std/c/parse.zig +++ b/lib/std/c/parse.zig @@ -284,7 +284,19 @@ const Parser = struct { fn designator(parser: *Parser) !*Node {} /// CompoundStmt <- LBRACE (Declaration / Stmt)* RBRACE - fn compoundStmt(parser: *Parser) !?*Node {} + fn compoundStmt(parser: *Parser) !?*Node { + const lbrace = parser.eatToken(.LBrace) orelse return null; + const node = try parser.arena.create(Node.CompoundStmt); + node.* = .{ + .lbrace = lbrace, + .statements = Node.JumpStmt.StmtList.init(parser.arena), + .rbrace = undefined, + }; + while (parser.declaration() orelse parser.stmt()) |node| + try node.statements.push(node); + node.rbrace = try parser.expectToken(.RBrace); + return &node.base; + } /// Stmt /// <- CompoundStmt @@ -303,7 +315,27 @@ const Parser = struct { /// / ExprStmt fn stmt(parser: *Parser) !?*Node { if (parser.compoundStmt()) |node| return node; - // if (parser.eatToken(.Keyword_if)) |tok| {} + if (parser.eatToken(.Keyword_if)) |tok| { + const node = try parser.arena.create(Node.IfStmt); + _ = try parser.expectToken(.LParen); + node.* = .{ + .@"if" = tok, + .cond = try parser.expect(expr, .{ + .ExpectedExpr = .{ .token = it.index }, + }), + .@"else" = null, + }; + _ = try parser.expectToken(.RParen); + if (parser.eatToken(.Keyword_else)) |else_tok| { + node.@"else" = .{ + .tok = else_tok, + .stmt = try parser.stmt(expr, .{ + .ExpectedStmt = .{ .token = it.index }, + }), + }; + } + return &node.base; + } // if (parser.eatToken(.Keyword_switch)) |tok| {} // if (parser.eatToken(.Keyword_while)) |tok| {} // if (parser.eatToken(.Keyword_do)) |tok| {} @@ -407,4 +439,15 @@ const Parser = struct { return; } } + + fn expect( + parser: *Parser, + parseFn: fn (*Parser) Error!?*Node, + err: ast.Error, // if parsing fails + ) Error!*Node { + return (try parseFn(arena, it, tree)) orelse { + try parser.tree.errors.push(err); + return error.ParseError; + }; + } }; diff --git a/lib/std/c/tokenizer.zig b/lib/std/c/tokenizer.zig index b8e515bec9..f7732b644e 100644 --- a/lib/std/c/tokenizer.zig +++ b/lib/std/c/tokenizer.zig @@ -6,7 +6,7 @@ pub const Source = struct { file_name: []const u8, tokens: TokenList, - pub const TokenList = SegmentedList(Token, 64); + pub const TokenList = std.SegmentedList(Token, 64); }; pub const Token = struct { @@ -134,6 +134,121 @@ pub const Token = struct { Keyword_ifndef, Keyword_error, Keyword_pragma, + + pub fn symbol(tok: Token) []const u8 { + return switch (tok.id) { + .Invalid => "Invalid", + .Eof => "Eof", + .Nl => "NewLine", + .Identifier => "Identifier", + .MacroString => "MacroString", + .StringLiteral => "StringLiteral", + .CharLiteral => "CharLiteral", + .IntegerLiteral => "IntegerLiteral", + .FloatLiteral => "FloatLiteral", + .LineComment => "LineComment", + .MultiLineComment => "MultiLineComment", + + .Bang => "!", + .BangEqual => "!=", + .Pipe => "|", + .PipePipe => "||", + .PipeEqual => "|=", + .Equal => "=", + .EqualEqual => "==", + .LParen => "(", + .RParen => ")", + .LBrace => "{", + .RBrace => "}", + .LBracket => "[", + .RBracket => "]", + .Period => ".", + .Ellipsis => "...", + .Caret => "^", + .CaretEqual => "^=", + .Plus => "+", + .PlusPlus => "++", + .PlusEqual => "+=", + .Minus => "-", + .MinusMinus => "--", + .MinusEqual => "-=", + .Asterisk => "*", + .AsteriskEqual => "*=", + .Percent => "%", + .PercentEqual => "%=", + .Arrow => "->", + .Colon => ":", + .Semicolon => ";", + .Slash => "/", + .SlashEqual => "/=", + .Comma => ",", + .Ampersand => "&", + .AmpersandAmpersand => "&&", + .AmpersandEqual => "&=", + .QuestionMark => "?", + .AngleBracketLeft => "<", + .AngleBracketLeftEqual => "<=", + .AngleBracketAngleBracketLeft => "<<", + .AngleBracketAngleBracketLeftEqual => "<<=", + .AngleBracketRight => ">", + .AngleBracketRightEqual => ">=", + .AngleBracketAngleBracketRight => ">>", + .AngleBracketAngleBracketRightEqual => ">>=", + .Tilde => "~", + .Hash => "#", + .HashHash => "##", + .Keyword_auto => "auto", + .Keyword_break => "break", + .Keyword_case => "case", + .Keyword_char => "char", + .Keyword_const => "const", + .Keyword_continue => "continue", + .Keyword_default => "default", + .Keyword_do => "do", + .Keyword_double => "double", + .Keyword_else => "else", + .Keyword_enum => "enum", + .Keyword_extern => "extern", + .Keyword_float => "float", + .Keyword_for => "for", + .Keyword_goto => "goto", + .Keyword_if => "if", + .Keyword_int => "int", + .Keyword_long => "long", + .Keyword_register => "register", + .Keyword_return => "return", + .Keyword_short => "short", + .Keyword_signed => "signed", + .Keyword_sizeof => "sizeof", + .Keyword_static => "static", + .Keyword_struct => "struct", + .Keyword_switch => "switch", + .Keyword_typedef => "typedef", + .Keyword_union => "union", + .Keyword_unsigned => "unsigned", + .Keyword_void => "void", + .Keyword_volatile => "volatile", + .Keyword_while => "while", + .Keyword_bool => "_Bool", + .Keyword_complex => "_Complex", + .Keyword_imaginary => "_Imaginary", + .Keyword_inline => "inline", + .Keyword_restrict => "restrict", + .Keyword_alignas => "_Alignas", + .Keyword_alignof => "_Alignof", + .Keyword_atomic => "_Atomic", + .Keyword_generic => "_Generic", + .Keyword_noreturn => "_Noreturn", + .Keyword_static_assert => "_Static_assert", + .Keyword_thread_local => "_Thread_local", + .Keyword_include => "include", + .Keyword_define => "define", + .Keyword_ifdef => "ifdef", + .Keyword_ifndef => "ifndef", + .Keyword_error => "error", + .Keyword_pragma => "pragma", + }; + } }; pub const Keyword = struct { @@ -1121,8 +1236,7 @@ pub const Tokenizer = struct { } } else if (self.index == self.source.buffer.len) { switch (state) { - .AfterStringLiteral, - .Start => {}, + .AfterStringLiteral, .Start => {}, .u, .u8, .U, .L, .Identifier => { result.id = Token.getKeyword(self.source.buffer[result.start..self.index], self.prev_tok_id == .Hash and !self.pp_directive) orelse .Identifier; }, @@ -1416,6 +1530,7 @@ fn expectTokens(source: []const u8, expected_tokens: []const Token.Id) void { .source = &Source{ .buffer = source, .file_name = undefined, + .tokens = undefined, }, }; for (expected_tokens) |expected_token_id| { From 25f7f66b8fb882a9b0abcad1baed851fabc464de Mon Sep 17 00:00:00 2001 From: Vexu Date: Sun, 5 Jan 2020 00:33:34 +0200 Subject: [PATCH 14/30] std-c type parsing --- lib/std/c/ast.zig | 87 ++++++++++++- lib/std/c/parse.zig | 302 ++++++++++++++++++++++++++++++++++++++++---- 2 files changed, 360 insertions(+), 29 deletions(-) diff --git a/lib/std/c/ast.zig b/lib/std/c/ast.zig index 936964fddf..acc20dcfdf 100644 --- a/lib/std/c/ast.zig +++ b/lib/std/c/ast.zig @@ -26,10 +26,12 @@ pub const Tree = struct { }; pub const Error = union(enum) { - InvalidToken: SingleTokenError("Invalid token '{}'"), + InvalidToken: SingleTokenError("invalid token '{}'"), ExpectedToken: ExpectedToken, - ExpectedExpr: SingleTokenError("Expected expression, found '{}'"), - ExpectedStmt: SingleTokenError("Expected statement, found '{}'"), + ExpectedExpr: SingleTokenError("expected expression, found '{}'"), + ExpectedStmt: SingleTokenError("expected statement, found '{}'"), + InvalidTypeSpecifier: InvalidTypeSpecifier, + DuplicateQualifier: SingleTokenError("duplicate type qualifier '{}'"), pub fn render(self: *const Error, tokens: *Tree.TokenList, stream: var) !void { switch (self.*) { @@ -37,6 +39,8 @@ pub const Error = union(enum) { .ExpectedToken => |*x| return x.render(tokens, stream), .ExpectedExpr => |*x| return x.render(tokens, stream), .ExpectedStmt => |*x| return x.render(tokens, stream), + .InvalidTypeSpecifier => |*x| return x.render(tokens, stream), + .DuplicateQualifier => |*x| return x.render(tokens, stream), } } @@ -46,6 +50,8 @@ pub const Error = union(enum) { .ExpectedToken => |x| return x.token, .ExpectedExpr => |x| return x.token, .ExpectedStmt => |x| return x.token, + .InvalidTypeSpecifier => |x| return x.token, + .DuplicateQualifier => |x| return x.token, } } @@ -64,6 +70,18 @@ pub const Error = union(enum) { } }; + pub const InvalidTypeSpecifier = struct { + token: TokenIndex, + type: *Node.Type, + + pub fn render(self: *const ExpectedToken, tokens: *Tree.TokenList, stream: var) !void { + try stream.write("invalid type specifier '"); + try type.specifier.print(tokens, stream); + const token_name = tokens.at(self.token).id.symbol(); + return stream.print("{}'", .{ token_name }); + } + }; + fn SingleTokenError(comptime msg: []const u8) type { return struct { token: TokenIndex, @@ -96,6 +114,69 @@ pub const Node = struct { pub const DeclList = SegmentedList(*Node, 4); }; + pub const Type = struct { + qualifiers: Qualifiers, + specifier: union(enum) { + /// error or default to int + None, + Void: TokenIndex, + Char: struct { + sign: ?TokenIndex = null, + char: TokenIndex, + }, + Short: struct { + sign: ?TokenIndex = null, + short: TokenIndex = null, + int: ?TokenIndex = null, + }, + Int: struct { + sign: ?TokenIndex = null, + int: ?TokenIndex = null, + }, + Long: struct { + sign: ?TokenIndex = null, + long: TokenIndex, + longlong: ?TokenIndex = null, + int: ?TokenIndex = null, + }, + Float: struct { + float: TokenIndex, + complex: ?TokenIndex = null, + }, + Double: struct { + long: ?TokenIndex = null, + double: ?TokenIndex, + complex: ?TokenIndex = null, + }, + Bool: TokenIndex, + Atomic: struct { + atomic: TokenIndex, + typename: *Node, + rparen: TokenIndex, + }, + + //todo + // @"enum", + // record, + + Typedef: TokenIndex, + + pub fn print(self: *@This(), self: *const @This(), tokens: *Tree.TokenList, stream: var) !void { + switch (self) { + .None => unreachable, + else => @panic("TODO print type specifier"), + } + } + }, + }; + + pub const Qualifiers = struct { + @"const": ?TokenIndex = null, + atomic: ?TokenIndex = null, + @"volatile": ?TokenIndex = null, + restrict: ?TokenIndex = null, + }; + pub const JumpStmt = struct { base: Node = Node{ .id = .JumpStmt }, ltoken: TokenIndex, diff --git a/lib/std/c/parse.zig b/lib/std/c/parse.zig index 3fcfeaec52..02bbd99fde 100644 --- a/lib/std/c/parse.zig +++ b/lib/std/c/parse.zig @@ -109,35 +109,284 @@ const Parser = struct { fn staticAssertDeclaration(parser: *Parser) !?*Node {} /// DeclarationSpecifiers - /// <- StorageClassSpecifier DeclarationSpecifiers? - /// / TypeSpecifier DeclarationSpecifiers? - /// / TypeQualifier DeclarationSpecifiers? - /// / FunctionSpecifier DeclarationSpecifiers? - /// / AlignmentSpecifier DeclarationSpecifiers? + /// <- (Keyword_typedef / Keyword_extern / Keyword_static / Keyword_thread_local / Keyword_auto / Keyword_register + /// / Type + /// / Keyword_inline / Keyword_noreturn + /// / Keyword_alignas LPAREN (TypeName / ConstExpr) RPAREN)* fn declarationSpecifiers(parser: *Parser) !*Node {} - /// StorageClassSpecifier - /// <- Keyword_typedef / Keyword_extern / Keyword_static / Keyword_thread_local / Keyword_auto / Keyword_register - fn storageClassSpecifier(parser: *Parser) !*Node {} - - /// TypeSpecifier + /// Type /// <- Keyword_void / Keyword_char / Keyword_short / Keyword_int / Keyword_long / Keyword_float / Keyword_double /// / Keyword_signed / Keyword_unsigned / Keyword_bool / Keyword_complex / Keyword_imaginary / /// / Keyword_atomic LPAREN TypeName RPAREN /// / EnumSpecifier /// / RecordSpecifier /// / IDENTIFIER // typedef name - fn typeSpecifier(parser: *Parser) !*Node {} + /// / TypeQualifier + fn type(parser: *Parser, type: *Node.Type) !bool { + while (try parser.typeQualifier(type.qualifiers)) {} + blk: { + if (parser.eatToken(.Keyword_void)) |tok| { + if (type.specifier != .None) + break :blk; + type.specifier = .{ .Void = tok }; + return true; + } else if (parser.eatToken(.Keyword_char)) |tok| { + switch (type.specifier) { + .None => { + type.specifier = .{ + .Char = .{ + .char = tok, + }, + }; + }, + .Int => |int| { + if (int.int != null) + break :blk; + type.specifier = .{ + .Char = .{ + .char = tok, + .sign = int.sign, + }, + }; + }, + else => break :blk, + } + return true; + } else if (parser.eatToken(.Keyword_short)) |tok| { + switch (type.specifier) { + .None => { + type.specifier = .{ + .Short = .{ + .short = tok, + }, + }; + }, + .Int => |int| { + if (int.int != null) + break :blk; + type.specifier = .{ + .Short = .{ + .short = tok, + .sign = int.sign, + }, + }; + }, + else => break :blk, + } + return true; + } else if (parser.eatToken(.Keyword_long)) |tok| { + switch (type.specifier) { + .None => { + type.specifier = .{ + .Long = .{ + .long = tok, + }, + }; + }, + .Int => |int| { + type.specifier = .{ + .Long = .{ + .long = tok, + .sign = int.sign, + .int = int.int, + }, + }; + }, + .Long => |*long| { + if (long.longlong != null) + break :blk; + long.longlong = tok; + }, + .Double => |*double| { + if (double.long != null) + break :blk; + double.long = tok; + }, + else => break :blk, + } + return true; + } else if (parser.eatToken(.Keyword_int)) |tok| { + switch (type.specifier) { + .None => { + type.specifier = .{ + .Int = .{ + .int = tok, + }, + }; + }, + .Short => |*short| { + if (short.int != null) + break :blk; + short.int = tok; + }, + .Int => |*int| { + if (int.int != null) + break :blk; + int.int = tok; + }, + .Long => |*long| { + if (long.int != null) + break :blk; + long.int = tok; + }, + else => break :blk, + } + return true; + } else if (parser.eatToken(.Keyword_signed) orelse parser.eatToken(.Keyword_unsigned)) |tok| { + switch (type.specifier) { + .None => { + type.specifier = .{ + .Int = .{ + .sign = tok, + }, + }; + }, + .Char => |*char| { + if (char.sign != null) + break :blk; + char.sign = tok; + }, + .Short => |*short| { + if (short.sign != null) + break :blk; + short.sign = tok; + }, + .Int => |*int| { + if (int.sign != null) + break :blk; + int.sign = tok; + }, + .Long => |*long| { + if (long.sign != null) + break :blk; + long.sign = tok; + }, + else => break :blk, + } + return true; + } else if (parser.eatToken(.Keyword_float)) |tok| { + if (type.specifier != .None) + break :blk; + type.specifier = .{ + .Float = .{ + .float = tok, + }, + }; + return true; + } else if (parser.eatToken(.Keyword_double)) |tok| { + if (type.specifier != .None) + break :blk; + type.specifier = .{ + .Double = .{ + .double = tok, + }, + }; + return true; + } else if (parser.eatToken(.Keyword_complex)) |tok| { + switch (type.specifier) { + .None => { + type.specifier = .{ + .Double = .{ + .complex = tok, + .double = null + }, + }; + }, + .Float => |*float| { + if (float.complex != null) + break :blk; + float.complex = tok; + }, + .Double => |*double| { + if (double.complex != null) + break :blk; + double.complex = tok; + }, + else => break :blk, + } + return true; + } if (parser.eatToken(.Keyword_bool)) |tok| { + if (type.specifier != .None) + break :blk; + type.specifier = .{ .Bool = tok }; + return true; + } else if (parser.eatToken(.Keyword_atomic)) |tok| { + if (type.specifier != .None) + break :blk; + _ = try parser.expectToken(.LParen); + const name = try parser.expect(typeName, .{ + .ExpectedTypeName = .{ .tok = it.index }, + }); + type.specifier.Atomic = .{ + .atomic = tok, + .typename = name, + .rparen = try parser.expectToken(.RParen), + }; + return true; + } else if (parser.eatToken(.Keyword_enum)) |tok| { + if (type.specifier != .None) + break :blk; + @panic("TODO enum type"); + // return true; + } else if (parser.eatToken(.Keyword_union) orelse parser.eatToken(.Keyword_struct)) |tok| { + if (type.specifier != .None) + break :blk; + @panic("TODO record type"); + // return true; + } else if (parser.eatToken(.Identifier)) |tok| { + if (!parser.typedefs.contains(tok)) { + parser.putBackToken(tok); + return false; + } + type.specifier = .{ + .Typedef = tok, + }; + return true; + } + } + try parser.tree.errors.push(.{ + .InvalidTypeSpecifier = .{ + .token = parser.it.index, + .type = type, + }, + }); + return error.ParseError; + } /// TypeQualifier <- Keyword_const / Keyword_restrict / Keyword_volatile / Keyword_atomic - fn typeQualifier(parser: *Parser) !*Node {} + fn typeQualifier(parser: *Parser, qualifiers: *Node.Qualifiers) !bool { + if (parser.eatToken(.Keyword_const)) |tok| { + if (qualifiers.@"const" != null) + return parser.warning(.{ + .DuplicateQualifier = .{ .token = tok }, + }); + qualifiers.@"const" = tok; + } else if (parser.eatToken(.Keyword_restrict)) |tok| { + if (qualifiers.atomic != null) + return parser.warning(.{ + .DuplicateQualifier = .{ .token = tok }, + }); + qualifiers.atomic = tok; + } else if (parser.eatToken(.Keyword_volatile)) |tok| { + if (qualifiers.@"volatile" != null) + return parser.warning(.{ + .DuplicateQualifier = .{ .token = tok }, + }); + qualifiers.@"volatile" = tok; + } else if (parser.eatToken(.Keyword_atomic)) |tok| { + if (qualifiers.atomic != null) + return parser.warning(.{ + .DuplicateQualifier = .{ .token = tok }, + }); + qualifiers.atomic = tok; + } else return false; + return true; + } /// FunctionSpecifier <- Keyword_inline / Keyword_noreturn fn functionSpecifier(parser: *Parser) !*Node {} - /// AlignmentSpecifier <- Keyword_alignas LPAREN (TypeName / ConstExpr) RPAREN - fn alignmentSpecifier(parser: *Parser) !*Node {} - /// EnumSpecifier <- Keyword_enum IDENTIFIER? (LBRACE EnumField RBRACE)? fn enumSpecifier(parser: *Parser) !*Node {} @@ -148,19 +397,14 @@ const Parser = struct { fn recordSpecifier(parser: *Parser) !*Node {} /// RecordField - /// <- SpecifierQualifer (RecordDeclarator (COMMA RecordDeclarator))? SEMICOLON + /// <- Type* (RecordDeclarator (COMMA RecordDeclarator))? SEMICOLON /// \ StaticAssertDeclaration fn recordField(parser: *Parser) !*Node {} /// TypeName - /// <- SpecifierQualifer AbstractDeclarator? + /// <- Type* AbstractDeclarator? fn typeName(parser: *Parser) !*Node {} - /// SpecifierQualifer - /// <- TypeSpecifier SpecifierQualifer? - /// / TypeQualifier SpecifierQualifer? - fn specifierQualifer(parser: *Parser) !*Node {} - /// RecordDeclarator <- Declarator? (COLON ConstExpr)? fn recordDeclarator(parser: *Parser) !*Node {} @@ -329,7 +573,7 @@ const Parser = struct { if (parser.eatToken(.Keyword_else)) |else_tok| { node.@"else" = .{ .tok = else_tok, - .stmt = try parser.stmt(expr, .{ + .stmt = try parser.stmt(expr, .{ .ExpectedStmt = .{ .token = it.index }, }), }; @@ -431,11 +675,11 @@ const Parser = struct { } } - fn putBackToken(it: *TokenIterator, putting_back: TokenIndex) void { + fn putBackToken(parser: *Parser, putting_back: TokenIndex) void { while (true) { - const prev_tok = it.prev() orelse return; + const prev_tok = parser.it.prev() orelse return; if (next_tok.id == .LineComment or next_tok.id == .MultiLineComment) continue; - assert(it.list.at(putting_back) == prev_tok); + assert(parser.it.list.at(putting_back) == prev_tok); return; } } @@ -450,4 +694,10 @@ const Parser = struct { return error.ParseError; }; } + + fn warning(parser: *Parser, err: ast.Error) Error { + // if (parser.warnaserror) + try parser.tree.errors.push(err); + return error.ParseError; + } }; From 46f292982d6035c7d57ae8d637c770a121c40e37 Mon Sep 17 00:00:00 2001 From: Vexu Date: Sun, 5 Jan 2020 13:24:10 +0200 Subject: [PATCH 15/30] std-c parser DeclSpec --- lib/std/c.zig | 2 +- lib/std/c/ast.zig | 46 +++++-- lib/std/c/parse.zig | 285 ++++++++++++++++++++++++++-------------- lib/std/c/tokenizer.zig | 8 +- 4 files changed, 232 insertions(+), 109 deletions(-) diff --git a/lib/std/c.zig b/lib/std/c.zig index 3f339f9d18..2339c8b1a2 100644 --- a/lib/std/c.zig +++ b/lib/std/c.zig @@ -2,7 +2,7 @@ const builtin = @import("builtin"); const std = @import("std"); const page_size = std.mem.page_size; -const tokenizer = @import("c/tokenizer.zig"); +pub const tokenizer = @import("c/tokenizer.zig"); pub const Token = tokenizer.Token; pub const Tokenizer = tokenizer.Tokenizer; pub const parse = @import("c/parse.zig").parse; diff --git a/lib/std/c/ast.zig b/lib/std/c/ast.zig index acc20dcfdf..ea6abfad9d 100644 --- a/lib/std/c/ast.zig +++ b/lib/std/c/ast.zig @@ -1,4 +1,4 @@ -const std = @import("std.zig"); +const std = @import("std"); const SegmentedList = std.SegmentedList; const Token = std.c.Token; const Source = std.c.tokenizer.Source; @@ -11,6 +11,7 @@ pub const Tree = struct { root_node: *Node.Root, arena_allocator: std.heap.ArenaAllocator, errors: ErrorList, + warnings: ?ErrorList, pub const SourceList = SegmentedList(Source, 4); pub const TokenList = Source.TokenList; @@ -30,8 +31,10 @@ pub const Error = union(enum) { ExpectedToken: ExpectedToken, ExpectedExpr: SingleTokenError("expected expression, found '{}'"), ExpectedStmt: SingleTokenError("expected statement, found '{}'"), + ExpectedTypeName: SingleTokenError("expected type name, found '{}'"), InvalidTypeSpecifier: InvalidTypeSpecifier, DuplicateQualifier: SingleTokenError("duplicate type qualifier '{}'"), + DuplicateSpecifier: SingleTokenError("duplicate declaration specifier '{}'"), pub fn render(self: *const Error, tokens: *Tree.TokenList, stream: var) !void { switch (self.*) { @@ -39,8 +42,10 @@ pub const Error = union(enum) { .ExpectedToken => |*x| return x.render(tokens, stream), .ExpectedExpr => |*x| return x.render(tokens, stream), .ExpectedStmt => |*x| return x.render(tokens, stream), + .ExpectedTypeName => |*x| return x.render(tokens, stream), .InvalidTypeSpecifier => |*x| return x.render(tokens, stream), .DuplicateQualifier => |*x| return x.render(tokens, stream), + .DuplicateSpecifier => |*x| return x.render(tokens, stream), } } @@ -50,8 +55,10 @@ pub const Error = union(enum) { .ExpectedToken => |x| return x.token, .ExpectedExpr => |x| return x.token, .ExpectedStmt => |x| return x.token, + .ExpectedTypeName => |x| return x.token, .InvalidTypeSpecifier => |x| return x.token, .DuplicateQualifier => |x| return x.token, + .DuplicateSpecifier => |x| return x.token, } } @@ -72,11 +79,11 @@ pub const Error = union(enum) { pub const InvalidTypeSpecifier = struct { token: TokenIndex, - type: *Node.Type, + type_spec: *Node.TypeSpec, pub fn render(self: *const ExpectedToken, tokens: *Tree.TokenList, stream: var) !void { try stream.write("invalid type specifier '"); - try type.specifier.print(tokens, stream); + try type_spec.spec.print(tokens, stream); const token_name = tokens.at(self.token).id.symbol(); return stream.print("{}'", .{ token_name }); } @@ -114,9 +121,32 @@ pub const Node = struct { pub const DeclList = SegmentedList(*Node, 4); }; - pub const Type = struct { - qualifiers: Qualifiers, - specifier: union(enum) { + pub const DeclSpec = struct { + storage_class: union(enum) { + Auto: TokenIndex, + Extern: TokenIndex, + Register: TokenIndex, + Static: TokenIndex, + Typedef: TokenIndex, + None, + } = .None, + thread_local: ?TokenIndex = null, + type_spec: TypeSpec = TypeSpec{}, + fn_spec: union(enum) { + Inline: TokenIndex, + Noreturn: TokenIndex, + None, + } = .None, + align_spec: ?struct { + alignas: TokenIndex, + expr: *Node, + rparen: TokenIndex, + } = null, + }; + + pub const TypeSpec = struct { + qual: TypeQual = TypeQual{}, + spec: union(enum) { /// error or default to int None, Void: TokenIndex, @@ -167,10 +197,10 @@ pub const Node = struct { else => @panic("TODO print type specifier"), } } - }, + } = .None, }; - pub const Qualifiers = struct { + pub const TypeQual = struct { @"const": ?TokenIndex = null, atomic: ?TokenIndex = null, @"volatile": ?TokenIndex = null, diff --git a/lib/std/c/parse.zig b/lib/std/c/parse.zig index 02bbd99fde..9cb8e327fd 100644 --- a/lib/std/c/parse.zig +++ b/lib/std/c/parse.zig @@ -1,7 +1,8 @@ -const std = @import("../std.zig"); +const std = @import("std"); const assert = std.debug.assert; const Allocator = std.mem.Allocator; const ast = std.c.ast; +const Node = ast.Node; const Tree = ast.Tree; const TokenIndex = ast.TokenIndex; const Token = std.c.Token; @@ -69,6 +70,12 @@ const Parser = struct { arena: *Allocator, it: *TokenIterator, tree: *Tree, + typedefs: std.StringHashMap(void), + + fn isTypedef(parser: *Parser, tok: TokenIndex) bool { + const token = parser.it.list.at(tok); + return parser.typedefs.contains(token.slice()); + } /// Root <- ExternalDeclaration* eof fn root(parser: *Parser) Allocator.Error!*Node { @@ -93,48 +100,89 @@ const Parser = struct { } /// ExternalDeclaration - /// <- Declaration - /// / DeclarationSpecifiers Declarator Declaration* CompoundStmt + /// <- DeclSpec Declarator Declaration* CompoundStmt + /// / DeclSpec (Declarator (EQUAL Initializer)?)* SEMICOLON + /// / StaticAssert fn externalDeclarations(parser: *Parser) !?*Node { if (try Declaration(parser)) |decl| {} return null; } /// Declaration - /// <- DeclarationSpecifiers (Declarator (EQUAL Initializer)?)* SEMICOLON - /// \ StaticAssertDeclaration + /// <- DeclSpec (Declarator (EQUAL Initializer)?)* SEMICOLON + /// / StaticAssert fn declaration(parser: *Parser) !?*Node {} - /// StaticAssertDeclaration <- Keyword_static_assert LPAREN ConstExpr COMMA STRINGLITERAL RPAREN SEMICOLON - fn staticAssertDeclaration(parser: *Parser) !?*Node {} + /// StaticAssert <- Keyword_static_assert LPAREN ConstExpr COMMA STRINGLITERAL RPAREN SEMICOLON + fn StaticAssert(parser: *Parser) !?*Node {} - /// DeclarationSpecifiers - /// <- (Keyword_typedef / Keyword_extern / Keyword_static / Keyword_thread_local / Keyword_auto / Keyword_register - /// / Type - /// / Keyword_inline / Keyword_noreturn - /// / Keyword_alignas LPAREN (TypeName / ConstExpr) RPAREN)* - fn declarationSpecifiers(parser: *Parser) !*Node {} + /// DeclSpec <- (StorageClassSpec / TypeSpec / FnSpec / AlignSpec)* + fn declSpec(parser: *Parser) !*Node.DeclSpec { + const ds = try parser.arena.create(Node.DeclSpec); + ds.* = .{}; + while ((try parser.storageClassSpec(ds)) or (try parser.typeSpec(&ds.type_spec)) or (try parser.fnSpec(ds)) or (try parser.alignSpec(ds))) {} + return ds; + } - /// Type + /// StorageClassSpec + /// <- Keyword_typedef / Keyword_extern / Keyword_static / Keyword_thread_local / Keyword_auto / Keyword_register + fn storageClassSpec(parser: *Parser, ds: *Node.DeclSpec) !bool { + blk: { + if (parser.eatToken(.Keyword_typedef)) |tok| { + if (ds.storage_class != .None or ds.thread_local != null) + break :blk; + ds.storage_class = .{ .Typedef = tok }; + } else if (parser.eatToken(.Keyword_extern)) |tok| { + if (ds.storage_class != .None) + break :blk; + ds.storage_class = .{ .Extern = tok }; + } else if (parser.eatToken(.Keyword_static)) |tok| { + if (ds.storage_class != .None) + break :blk; + ds.storage_class = .{ .Static = tok }; + } else if (parser.eatToken(.Keyword_thread_local)) |tok| { + switch (ds.storage_class) { + .None, .Extern, .Static => {}, + else => break :blk, + } + ds.thread_local = tok; + } else if (parser.eatToken(.Keyword_auto)) |tok| { + if (ds.storage_class != .None or ds.thread_local != null) + break :blk; + ds.storage_class = .{ .Auto = tok }; + } else if (parser.eatToken(.Keyword_register)) |tok| { + if (ds.storage_class != .None or ds.thread_local != null) + break :blk; + ds.storage_class = .{ .Register = tok }; + } else return false; + return true; + } + try parser.warning(.{ + .DuplicateSpecifier = .{ .token = parser.it.index }, + }); + return true; + } + + /// TypeSpec /// <- Keyword_void / Keyword_char / Keyword_short / Keyword_int / Keyword_long / Keyword_float / Keyword_double /// / Keyword_signed / Keyword_unsigned / Keyword_bool / Keyword_complex / Keyword_imaginary / /// / Keyword_atomic LPAREN TypeName RPAREN /// / EnumSpecifier /// / RecordSpecifier /// / IDENTIFIER // typedef name - /// / TypeQualifier - fn type(parser: *Parser, type: *Node.Type) !bool { - while (try parser.typeQualifier(type.qualifiers)) {} + /// / TypeQual + fn typeSpec(parser: *Parser, type_spec: *Node.TypeSpec) !bool { + while (try parser.typeQual(&type_spec.qual)) {} blk: { if (parser.eatToken(.Keyword_void)) |tok| { - if (type.specifier != .None) + if (type_spec.spec != .None) break :blk; - type.specifier = .{ .Void = tok }; + type_spec.spec = .{ .Void = tok }; return true; } else if (parser.eatToken(.Keyword_char)) |tok| { - switch (type.specifier) { + switch (type_spec.spec) { .None => { - type.specifier = .{ + type_spec.spec = .{ .Char = .{ .char = tok, }, @@ -143,7 +191,7 @@ const Parser = struct { .Int => |int| { if (int.int != null) break :blk; - type.specifier = .{ + type_spec.spec = .{ .Char = .{ .char = tok, .sign = int.sign, @@ -154,9 +202,9 @@ const Parser = struct { } return true; } else if (parser.eatToken(.Keyword_short)) |tok| { - switch (type.specifier) { + switch (type_spec.spec) { .None => { - type.specifier = .{ + type_spec.spec = .{ .Short = .{ .short = tok, }, @@ -165,7 +213,7 @@ const Parser = struct { .Int => |int| { if (int.int != null) break :blk; - type.specifier = .{ + type_spec.spec = .{ .Short = .{ .short = tok, .sign = int.sign, @@ -176,16 +224,16 @@ const Parser = struct { } return true; } else if (parser.eatToken(.Keyword_long)) |tok| { - switch (type.specifier) { + switch (type_spec.spec) { .None => { - type.specifier = .{ + type_spec.spec = .{ .Long = .{ .long = tok, }, }; }, .Int => |int| { - type.specifier = .{ + type_spec.spec = .{ .Long = .{ .long = tok, .sign = int.sign, @@ -207,9 +255,9 @@ const Parser = struct { } return true; } else if (parser.eatToken(.Keyword_int)) |tok| { - switch (type.specifier) { + switch (type_spec.spec) { .None => { - type.specifier = .{ + type_spec.spec = .{ .Int = .{ .int = tok, }, @@ -234,9 +282,9 @@ const Parser = struct { } return true; } else if (parser.eatToken(.Keyword_signed) orelse parser.eatToken(.Keyword_unsigned)) |tok| { - switch (type.specifier) { + switch (type_spec.spec) { .None => { - type.specifier = .{ + type_spec.spec = .{ .Int = .{ .sign = tok, }, @@ -266,30 +314,30 @@ const Parser = struct { } return true; } else if (parser.eatToken(.Keyword_float)) |tok| { - if (type.specifier != .None) + if (type_spec.spec != .None) break :blk; - type.specifier = .{ + type_spec.spec = .{ .Float = .{ .float = tok, }, }; return true; - } else if (parser.eatToken(.Keyword_double)) |tok| { - if (type.specifier != .None) + } else if (parser.eatToken(.Keyword_double)) |tok| { + if (type_spec.spec != .None) break :blk; - type.specifier = .{ + type_spec.spec = .{ .Double = .{ .double = tok, }, }; return true; - } else if (parser.eatToken(.Keyword_complex)) |tok| { - switch (type.specifier) { + } else if (parser.eatToken(.Keyword_complex)) |tok| { + switch (type_spec.spec) { .None => { - type.specifier = .{ + type_spec.spec = .{ .Double = .{ .complex = tok, - .double = null + .double = null, }, }; }, @@ -306,40 +354,41 @@ const Parser = struct { else => break :blk, } return true; - } if (parser.eatToken(.Keyword_bool)) |tok| { - if (type.specifier != .None) + } + if (parser.eatToken(.Keyword_bool)) |tok| { + if (type_spec.spec != .None) break :blk; - type.specifier = .{ .Bool = tok }; + type_spec.spec = .{ .Bool = tok }; return true; } else if (parser.eatToken(.Keyword_atomic)) |tok| { - if (type.specifier != .None) + if (type_spec.spec != .None) break :blk; _ = try parser.expectToken(.LParen); const name = try parser.expect(typeName, .{ - .ExpectedTypeName = .{ .tok = it.index }, + .ExpectedTypeName = .{ .token = parser.it.index }, }); - type.specifier.Atomic = .{ + type_spec.spec.Atomic = .{ .atomic = tok, .typename = name, .rparen = try parser.expectToken(.RParen), }; return true; } else if (parser.eatToken(.Keyword_enum)) |tok| { - if (type.specifier != .None) + if (type_spec.spec != .None) break :blk; @panic("TODO enum type"); // return true; } else if (parser.eatToken(.Keyword_union) orelse parser.eatToken(.Keyword_struct)) |tok| { - if (type.specifier != .None) + if (type_spec.spec != .None) break :blk; @panic("TODO record type"); // return true; } else if (parser.eatToken(.Identifier)) |tok| { - if (!parser.typedefs.contains(tok)) { + if (!parser.isTypedef(tok)) { parser.putBackToken(tok); return false; } - type.specifier = .{ + type_spec.spec = .{ .Typedef = tok, }; return true; @@ -348,44 +397,81 @@ const Parser = struct { try parser.tree.errors.push(.{ .InvalidTypeSpecifier = .{ .token = parser.it.index, - .type = type, + .type_spec = type_spec, }, }); return error.ParseError; } - /// TypeQualifier <- Keyword_const / Keyword_restrict / Keyword_volatile / Keyword_atomic - fn typeQualifier(parser: *Parser, qualifiers: *Node.Qualifiers) !bool { - if (parser.eatToken(.Keyword_const)) |tok| { - if (qualifiers.@"const" != null) - return parser.warning(.{ - .DuplicateQualifier = .{ .token = tok }, - }); - qualifiers.@"const" = tok; - } else if (parser.eatToken(.Keyword_restrict)) |tok| { - if (qualifiers.atomic != null) - return parser.warning(.{ - .DuplicateQualifier = .{ .token = tok }, - }); - qualifiers.atomic = tok; - } else if (parser.eatToken(.Keyword_volatile)) |tok| { - if (qualifiers.@"volatile" != null) - return parser.warning(.{ - .DuplicateQualifier = .{ .token = tok }, - }); - qualifiers.@"volatile" = tok; - } else if (parser.eatToken(.Keyword_atomic)) |tok| { - if (qualifiers.atomic != null) - return parser.warning(.{ - .DuplicateQualifier = .{ .token = tok }, - }); - qualifiers.atomic = tok; - } else return false; + /// TypeQual <- Keyword_const / Keyword_restrict / Keyword_volatile / Keyword_atomic + fn typeQual(parser: *Parser, qual: *Node.TypeQual) !bool { + blk: { + if (parser.eatToken(.Keyword_const)) |tok| { + if (qual.@"const" != null) + break :blk; + qual.@"const" = tok; + } else if (parser.eatToken(.Keyword_restrict)) |tok| { + if (qual.atomic != null) + break :blk; + qual.atomic = tok; + } else if (parser.eatToken(.Keyword_volatile)) |tok| { + if (qual.@"volatile" != null) + break :blk; + qual.@"volatile" = tok; + } else if (parser.eatToken(.Keyword_atomic)) |tok| { + if (qual.atomic != null) + break :blk; + qual.atomic = tok; + } else return false; + return true; + } + try parser.warning(.{ + .DuplicateQualifier = .{ .token = parser.it.index }, + }); return true; } - /// FunctionSpecifier <- Keyword_inline / Keyword_noreturn - fn functionSpecifier(parser: *Parser) !*Node {} + /// FnSpec <- Keyword_inline / Keyword_noreturn + fn fnSpec(parser: *Parser, ds: *Node.DeclSpec) !bool { + blk: { + if (parser.eatToken(.Keyword_inline)) |tok| { + if (ds.fn_spec != .None) + break :blk; + ds.fn_spec = .{ .Inline = tok }; + } else if (parser.eatToken(.Keyword_noreturn)) |tok| { + if (ds.fn_spec != .None) + break :blk; + ds.fn_spec = .{ .Noreturn = tok }; + } else return false; + return true; + } + try parser.warning(.{ + .DuplicateSpecifier = .{ .token = parser.it.index }, + }); + return true; + } + + /// AlignSpec <- Keyword_alignas LPAREN (TypeName / ConstExpr) RPAREN + fn alignSpec(parser: *Parser, ds: *Node.DeclSpec) !bool { + if (parser.eatToken(.Keyword_alignas)) |tok| { + _ = try parser.expectToken(.LParen); + const node = (try parser.typeName()) orelse (try parser.expect(conditionalExpr, .{ + .ExpectedExpr = .{ .token = parser.it.index }, + })); + if (ds.align_spec != null) { + try parser.warning(.{ + .DuplicateSpecifier = .{ .token = parser.it.index }, + }); + } + ds.align_spec = .{ + .alignas = tok, + .expr = node, + .rparen = try parser.expectToken(.RParen), + }; + return true; + } + return false; + } /// EnumSpecifier <- Keyword_enum IDENTIFIER? (LBRACE EnumField RBRACE)? fn enumSpecifier(parser: *Parser) !*Node {} @@ -397,13 +483,13 @@ const Parser = struct { fn recordSpecifier(parser: *Parser) !*Node {} /// RecordField - /// <- Type* (RecordDeclarator (COMMA RecordDeclarator))? SEMICOLON - /// \ StaticAssertDeclaration + /// <- TypeSpec* (RecordDeclarator (COMMA RecordDeclarator))? SEMICOLON + /// \ StaticAssert fn recordField(parser: *Parser) !*Node {} /// TypeName - /// <- Type* AbstractDeclarator? - fn typeName(parser: *Parser) !*Node {} + /// <- TypeSpec* AbstractDeclarator? + fn typeName(parser: *Parser) !*Node { /// RecordDeclarator <- Declarator? (COLON ConstExpr)? fn recordDeclarator(parser: *Parser) !*Node {} @@ -411,7 +497,7 @@ const Parser = struct { /// Declarator <- Pointer? DirectDeclarator fn declarator(parser: *Parser) !*Node {} - /// Pointer <- ASTERISK TypeQualifier* Pointer? + /// Pointer <- ASTERISK TypeQual* Pointer? fn pointer(parser: *Parser) !*Node {} /// DirectDeclarator @@ -422,13 +508,13 @@ const Parser = struct { fn directDeclarator(parser: *Parser) !*Node {} /// BracketDeclarator - /// <- Keyword_static TypeQualifier* AssignmentExpr - /// / TypeQualifier+ (ASTERISK / Keyword_static AssignmentExpr) - /// / TypeQualifier+ AssignmentExpr? + /// <- Keyword_static TypeQual* AssignmentExpr + /// / TypeQual+ (ASTERISK / Keyword_static AssignmentExpr) + /// / TypeQual+ AssignmentExpr? /// / AssignmentExpr fn bracketDeclarator(parser: *Parser) !*Node {} - /// ParamDecl <- DeclarationSpecifiers (Declarator / AbstractDeclarator) + /// ParamDecl <- DeclSpec (Declarator / AbstractDeclarator) fn paramDecl(parser: *Parser) !*Node {} /// AbstractDeclarator <- Pointer? DirectAbstractDeclarator? @@ -647,25 +733,25 @@ const Parser = struct { return &node.base; } - fn eatToken(parser: *Parser, id: Token.Id) ?TokenIndex { + fn eatToken(parser: *Parser, id: @TagType(Token.Id)) ?TokenIndex { while (true) { const next_tok = parser.it.next() orelse return null; if (next_tok.id != .LineComment and next_tok.id != .MultiLineComment) { if (next_tok.id == id) { return parser.it.index; } - parser.it.prev(); + _ = parser.it.prev(); return null; } } } - fn expectToken(parser: *Parser, id: Token.Id) Error!TokenIndex { + fn expectToken(parser: *Parser, id: @TagType(Token.Id)) Error!TokenIndex { while (true) { const next_tok = parser.it.next() orelse return error.ParseError; if (next_tok.id != .LineComment and next_tok.id != .MultiLineComment) { if (next_tok.id != id) { - try tree.errors.push(.{ + try parser.tree.errors.push(.{ .ExpectedToken = .{ .token = parser.it.index, .expected_id = id }, }); return error.ParseError; @@ -678,7 +764,7 @@ const Parser = struct { fn putBackToken(parser: *Parser, putting_back: TokenIndex) void { while (true) { const prev_tok = parser.it.prev() orelse return; - if (next_tok.id == .LineComment or next_tok.id == .MultiLineComment) continue; + if (prev_tok.id == .LineComment or prev_tok.id == .MultiLineComment) continue; assert(parser.it.list.at(putting_back) == prev_tok); return; } @@ -689,14 +775,17 @@ const Parser = struct { parseFn: fn (*Parser) Error!?*Node, err: ast.Error, // if parsing fails ) Error!*Node { - return (try parseFn(arena, it, tree)) orelse { + return (try parseFn(parser)) orelse { try parser.tree.errors.push(err); return error.ParseError; }; } - fn warning(parser: *Parser, err: ast.Error) Error { - // if (parser.warnaserror) + fn warning(parser: *Parser, err: ast.Error) Error!void { + if (parser.tree.warnings) |*w| { + try w.push(err); + return; + } try parser.tree.errors.push(err); return error.ParseError; } diff --git a/lib/std/c/tokenizer.zig b/lib/std/c/tokenizer.zig index f7732b644e..4e74b97018 100644 --- a/lib/std/c/tokenizer.zig +++ b/lib/std/c/tokenizer.zig @@ -135,8 +135,8 @@ pub const Token = struct { Keyword_error, Keyword_pragma, - pub fn symbol(tok: Token) []const u8 { - return switch (tok.id) { + pub fn symbol(id: @TagType(Id)) []const u8 { + return switch (id) { .Invalid => "Invalid", .Eof => "Eof", .Nl => "NewLine", @@ -347,6 +347,10 @@ pub const Token = struct { return null; } + pub fn slice(tok: Token) []const u8 { + return tok.source.buffer[tok.start..tok.end]; + } + pub const NumSuffix = enum { None, F, From f934f9b41938ee6208d7cdd8a26687bffbe171cf Mon Sep 17 00:00:00 2001 From: Vexu Date: Sun, 5 Jan 2020 15:15:55 +0200 Subject: [PATCH 16/30] std-c parser fndef and static assert --- lib/std/c/ast.zig | 30 ++++++++- lib/std/c/parse.zig | 148 +++++++++++++++++++++++++++++++------------- 2 files changed, 132 insertions(+), 46 deletions(-) diff --git a/lib/std/c/ast.zig b/lib/std/c/ast.zig index ea6abfad9d..0a600eb7f3 100644 --- a/lib/std/c/ast.zig +++ b/lib/std/c/ast.zig @@ -32,6 +32,8 @@ pub const Error = union(enum) { ExpectedExpr: SingleTokenError("expected expression, found '{}'"), ExpectedStmt: SingleTokenError("expected statement, found '{}'"), ExpectedTypeName: SingleTokenError("expected type name, found '{}'"), + ExpectedFnBody: SingleTokenError("expected function body, found '{}'"), + ExpectedInitializer: SingleTokenError("expected initializer, found '{}'"), InvalidTypeSpecifier: InvalidTypeSpecifier, DuplicateQualifier: SingleTokenError("duplicate type qualifier '{}'"), DuplicateSpecifier: SingleTokenError("duplicate declaration specifier '{}'"), @@ -43,6 +45,8 @@ pub const Error = union(enum) { .ExpectedExpr => |*x| return x.render(tokens, stream), .ExpectedStmt => |*x| return x.render(tokens, stream), .ExpectedTypeName => |*x| return x.render(tokens, stream), + .ExpectedDeclarator => |*x| return x.render(tokens, stream), + .ExpectedFnBody => |*x| return x.render(tokens, stream), .InvalidTypeSpecifier => |*x| return x.render(tokens, stream), .DuplicateQualifier => |*x| return x.render(tokens, stream), .DuplicateSpecifier => |*x| return x.render(tokens, stream), @@ -56,6 +60,8 @@ pub const Error = union(enum) { .ExpectedExpr => |x| return x.token, .ExpectedStmt => |x| return x.token, .ExpectedTypeName => |x| return x.token, + .ExpectedDeclarator => |x| return x.token, + .ExpectedFnBody => |x| return x.token, .InvalidTypeSpecifier => |x| return x.token, .DuplicateQualifier => |x| return x.token, .DuplicateSpecifier => |x| return x.token, @@ -85,7 +91,7 @@ pub const Error = union(enum) { try stream.write("invalid type specifier '"); try type_spec.spec.print(tokens, stream); const token_name = tokens.at(self.token).id.symbol(); - return stream.print("{}'", .{ token_name }); + return stream.print("{}'", .{token_name}); } }; @@ -111,10 +117,12 @@ pub const Node = struct { Label, CompoundStmt, IfStmt, + StaticAssert, + FnDef, }; pub const Root = struct { - base: Node, + base: Node = Node{ .id = .Root }, decls: DeclList, eof: TokenIndex, @@ -230,7 +238,6 @@ pub const Node = struct { pub const Label = struct { base: Node = Node{ .id = .Label }, identifier: TokenIndex, - colon: TokenIndex, }; pub const CompoundStmt = struct { @@ -251,4 +258,21 @@ pub const Node = struct { stmt: *Node, }, }; + + pub const StaticAssert = struct { + base: Node = Node{ .id = .StaticAssert }, + assert: TokenIndex, + expr: *Node, + semicolon: TokenIndex, + }; + + pub const FnDef = struct { + base: Node = Node{ .id = .FnDef }, + decl_spec: *DeclSpec, + declarator: *Node, + old_decls: OldDeclList, + body: *CompoundStmt, + + pub const OldDeclList = SegmentedList(*Node, 0); + }; }; diff --git a/lib/std/c/parse.zig b/lib/std/c/parse.zig index 9cb8e327fd..79790fd0ff 100644 --- a/lib/std/c/parse.zig +++ b/lib/std/c/parse.zig @@ -78,10 +78,10 @@ const Parser = struct { } /// Root <- ExternalDeclaration* eof - fn root(parser: *Parser) Allocator.Error!*Node { - const node = try arena.create(ast.Root); + fn root(parser: *Parser) Allocator.Error!*Node.Root { + const node = try parser.arena.create(Node.Root); node.* = .{ - .decls = ast.Node.DeclList.init(arena), + .decls = Node.Root.DeclList.init(parser.arena), .eof = undefined, }; while (parser.externalDeclarations() catch |err| switch (err) { @@ -90,31 +90,87 @@ const Parser = struct { }) |decl| { try node.decls.push(decl); } - node.eof = eatToken(it, .Eof) orelse { - try tree.errors.push(.{ - .ExpectedDecl = .{ .token = it.index }, - }); - return node; - }; + node.eof = parser.eatToken(.Eof) orelse return node; return node; } /// ExternalDeclaration /// <- DeclSpec Declarator Declaration* CompoundStmt - /// / DeclSpec (Declarator (EQUAL Initializer)?)* SEMICOLON - /// / StaticAssert + /// / Declaration fn externalDeclarations(parser: *Parser) !?*Node { - if (try Declaration(parser)) |decl| {} - return null; + if (try parser.staticAssert()) |decl| return decl; + const ds = try parser.declSpec(); + const dr = (try parser.declarator()); + if (dr == null) + try parser.warning(.{ + .ExpectedDeclarator = .{ .token = parser.it.index }, + }); + // TODO disallow auto and register + const next_tok = parser.it.peek().?; + switch (next_tok.id) { + .Semicolon, + .Equal, + .Comma, + .Eof, + => return parser.declarationExtra(ds, dr, false), + else => {}, + } + var old_decls = Node.FnDef.OldDeclList.init(parser.arena); + while (try parser.declaration()) |decl| { + // validate declaration + try old_decls.push(decl); + } + const body = try parser.expect(compoundStmt, .{ + .ExpectedFnBody = .{ .token = parser.it.index }, + }); + + const node = try parser.arena.create(Node.FnDef); + node.* = .{ + .decl_spec = ds, + .declarator = dr orelse return null, + .old_decls = old_decls, + .body = @fieldParentPtr(Node.CompoundStmt, "base", body), + }; + return &node.base; } /// Declaration - /// <- DeclSpec (Declarator (EQUAL Initializer)?)* SEMICOLON + /// <- DeclSpec (Declarator (EQUAL Initializer)? COMMA)* SEMICOLON /// / StaticAssert - fn declaration(parser: *Parser) !?*Node {} + fn declaration(parser: *Parser) !?*Node { + if (try parser.staticAssert()) |decl| return decl; + const ds = try parser.declSpec(); + const dr = (try parser.declarator()); + if (dr == null) + try parser.warning(.{ + .ExpectedDeclarator = .{ .token = parser.it.index }, + }); + // TODO disallow threadlocal without static or extern + return parser.declarationExtra(ds, dr, true); + } + + fn declarationExtra(parser: *Parser, ds: *Node.DeclSpec, dr: ?*Node, local: bool) !?*Node { + } /// StaticAssert <- Keyword_static_assert LPAREN ConstExpr COMMA STRINGLITERAL RPAREN SEMICOLON - fn StaticAssert(parser: *Parser) !?*Node {} + fn staticAssert(parser: *Parser) !?*Node { + const tok = parser.eatToken(.Keyword_static_assert) orelse return null; + _ = try parser.expectToken(.LParen); + const const_expr = try parser.expect(constExpr, .{ + .ExpectedExpr = .{ .token = parser.it.index }, + }); + _ = try parser.expectToken(.Comma); + const str = try parser.expectToken(.StringLiteral); + _ = try parser.expectToken(.RParen); + const semicolon = try parser.expectToken(.Semicolon); + const node = try parser.arena.create(Node.StaticAssert); + node.* = .{ + .assert = tok, + .expr = const_expr, + .semicolon = semicolon, + }; + return &node.base; + } /// DeclSpec <- (StorageClassSpec / TypeSpec / FnSpec / AlignSpec)* fn declSpec(parser: *Parser) !*Node.DeclSpec { @@ -455,7 +511,7 @@ const Parser = struct { fn alignSpec(parser: *Parser, ds: *Node.DeclSpec) !bool { if (parser.eatToken(.Keyword_alignas)) |tok| { _ = try parser.expectToken(.LParen); - const node = (try parser.typeName()) orelse (try parser.expect(conditionalExpr, .{ + const node = (try parser.typeName()) orelse (try parser.expect(constExpr, .{ .ExpectedExpr = .{ .token = parser.it.index }, })); if (ds.align_spec != null) { @@ -538,6 +594,8 @@ const Parser = struct { fn assignmentExpr(parser: *Parser) !*Node {} /// ConstExpr <- ConditionalExpr + const constExpr = conditionalExpr; + /// ConditionalExpr <- LogicalOrExpr (QUESTIONMARK Expr COLON ConditionalExpr)? fn conditionalExpr(parser: *Parser) !*Node {} @@ -613,19 +671,19 @@ const Parser = struct { /// / PERIOD IDENTIFIER fn designator(parser: *Parser) !*Node {} - /// CompoundStmt <- LBRACE (Declaration / Stmt)* RBRACE - fn compoundStmt(parser: *Parser) !?*Node { + /// CompoundStmt <- LBRACE (Stmt / Declaration)* RBRACE + fn compoundStmt(parser: *Parser) Error!?*Node { const lbrace = parser.eatToken(.LBrace) orelse return null; - const node = try parser.arena.create(Node.CompoundStmt); - node.* = .{ + const body_node = try parser.arena.create(Node.CompoundStmt); + body_node.* = .{ .lbrace = lbrace, - .statements = Node.JumpStmt.StmtList.init(parser.arena), + .statements = Node.CompoundStmt.StmtList.init(parser.arena), .rbrace = undefined, }; - while (parser.declaration() orelse parser.stmt()) |node| - try node.statements.push(node); - node.rbrace = try parser.expectToken(.RBrace); - return &node.base; + while ((try parser.stmt()) orelse (try parser.declaration())) |node| + try body_node.statements.push(node); + body_node.rbrace = try parser.expectToken(.RBrace); + return &body_node.base; } /// Stmt @@ -643,15 +701,15 @@ const Parser = struct { /// / Keyword_return Expr? SEMICOLON /// / IDENTIFIER COLON Stmt /// / ExprStmt - fn stmt(parser: *Parser) !?*Node { - if (parser.compoundStmt()) |node| return node; + fn stmt(parser: *Parser) Error!?*Node { + if (try parser.compoundStmt()) |node| return node; if (parser.eatToken(.Keyword_if)) |tok| { const node = try parser.arena.create(Node.IfStmt); _ = try parser.expectToken(.LParen); node.* = .{ .@"if" = tok, .cond = try parser.expect(expr, .{ - .ExpectedExpr = .{ .token = it.index }, + .ExpectedExpr = .{ .token = parser.it.index }, }), .@"else" = null, }; @@ -659,8 +717,8 @@ const Parser = struct { if (parser.eatToken(.Keyword_else)) |else_tok| { node.@"else" = .{ .tok = else_tok, - .stmt = try parser.stmt(expr, .{ - .ExpectedStmt = .{ .token = it.index }, + .stmt = try parser.expect(stmt, .{ + .ExpectedStmt = .{ .token = parser.it.index }, }), }; } @@ -676,8 +734,8 @@ const Parser = struct { const node = try parser.arena.create(Node.JumpStmt); node.* = .{ .ltoken = tok, - .kind = .Goto, - .semicolon = parser.expectToken(.Semicolon), + .kind = .{ .Goto = tok }, + .semicolon = try parser.expectToken(.Semicolon), }; return &node.base; } @@ -686,7 +744,7 @@ const Parser = struct { node.* = .{ .ltoken = tok, .kind = .Continue, - .semicolon = parser.expectToken(.Semicolon), + .semicolon = try parser.expectToken(.Semicolon), }; return &node.base; } @@ -695,7 +753,7 @@ const Parser = struct { node.* = .{ .ltoken = tok, .kind = .Break, - .semicolon = parser.expectToken(.Semicolon), + .semicolon = try parser.expectToken(.Semicolon), }; return &node.base; } @@ -704,31 +762,35 @@ const Parser = struct { node.* = .{ .ltoken = tok, .kind = .{ .Return = try parser.expr() }, - .semicolon = parser.expectToken(.Semicolon), + .semicolon = try parser.expectToken(.Semicolon), }; return &node.base; } if (parser.eatToken(.Identifier)) |tok| { - if (parser.eatToken(.Colon)) |col| { + if (parser.eatToken(.Colon)) |_| { const node = try parser.arena.create(Node.Label); node.* = .{ .identifier = tok, - .semicolon = parser.expectToken(.Colon), }; return &node.base; } - putBackToken(tok); + parser.putBackToken(tok); } - if (parser.exprStmt()) |node| return node; + if (try parser.exprStmt()) |node| return node; return null; } /// ExprStmt <- Expr? SEMICOLON - fn exprStmt(parser: *Parser) !*Node { + fn exprStmt(parser: *Parser) !?*Node { const node = try parser.arena.create(Node.ExprStmt); + const expr_node = try parser.expr(); + const semicolon = if (expr_node != null) + try parser.expectToken(.Semicolon) + else + parser.eatToken(.Semicolon) orelse return null; node.* = .{ - .expr = try parser.expr(), - .semicolon = parser.expectToken(.Semicolon), + .expr = expr_node, + .semicolon = semicolon, }; return &node.base; } From 795a5039995a1a23ba00d15488565f1a79d3f25b Mon Sep 17 00:00:00 2001 From: Vexu Date: Sun, 5 Jan 2020 19:28:14 +0200 Subject: [PATCH 17/30] std-c tokenizer always add newline token --- lib/std/c/parse.zig | 38 +++++----- lib/std/c/tokenizer.zig | 162 ++++++++++++++++++++++++---------------- 2 files changed, 117 insertions(+), 83 deletions(-) diff --git a/lib/std/c/parse.zig b/lib/std/c/parse.zig index 79790fd0ff..e5082d06c0 100644 --- a/lib/std/c/parse.zig +++ b/lib/std/c/parse.zig @@ -797,38 +797,42 @@ const Parser = struct { fn eatToken(parser: *Parser, id: @TagType(Token.Id)) ?TokenIndex { while (true) { - const next_tok = parser.it.next() orelse return null; - if (next_tok.id != .LineComment and next_tok.id != .MultiLineComment) { - if (next_tok.id == id) { + switch (parser.it.next() orelse return null) { + .LineComment, .MultiLineComment, .Nl => continue, + else => |next_id| if (next_id == id) { return parser.it.index; - } - _ = parser.it.prev(); - return null; + } else { + _ = parser.it.prev(); + return null; + }, } } } fn expectToken(parser: *Parser, id: @TagType(Token.Id)) Error!TokenIndex { while (true) { - const next_tok = parser.it.next() orelse return error.ParseError; - if (next_tok.id != .LineComment and next_tok.id != .MultiLineComment) { - if (next_tok.id != id) { - try parser.tree.errors.push(.{ + switch (parser.it.next() orelse return null) { + .LineComment, .MultiLineComment, .Nl => continue, + else => |next_id| if (next_id != id) { + return parser.err(.{ .ExpectedToken = .{ .token = parser.it.index, .expected_id = id }, }); - return error.ParseError; - } - return parser.it.index; + } else { + return parser.it.index; + }, } } } fn putBackToken(parser: *Parser, putting_back: TokenIndex) void { while (true) { - const prev_tok = parser.it.prev() orelse return; - if (prev_tok.id == .LineComment or prev_tok.id == .MultiLineComment) continue; - assert(parser.it.list.at(putting_back) == prev_tok); - return; + switch (parser.it.next() orelse return null) { + .LineComment, .MultiLineComment, .Nl => continue, + else => |next_id| { + assert(parser.it.list.at(putting_back) == prev_tok); + return; + }, + } } } diff --git a/lib/std/c/tokenizer.zig b/lib/std/c/tokenizer.zig index 4e74b97018..92c139f3c2 100644 --- a/lib/std/c/tokenizer.zig +++ b/lib/std/c/tokenizer.zig @@ -449,20 +449,12 @@ pub const Tokenizer = struct { switch (state) { .Start => switch (c) { '\n' => { - if (!self.pp_directive) { - result.start = self.index + 1; - continue; - } self.pp_directive = false; result.id = .Nl; self.index += 1; break; }, '\r' => { - if (!self.pp_directive) { - result.start = self.index + 1; - continue; - } state = .Cr; }, '"' => { @@ -612,11 +604,14 @@ pub const Tokenizer = struct { }, .BackSlash => switch (c) { '\n' => { - state = .Start; + state = if (string) .AfterStringLiteral else .Start; }, '\r' => { state = .BackSlashCr; }, + '\t', '\x0B', '\x0C', ' ' => { + // TODO warn + }, else => { result.id = .Invalid; break; @@ -624,7 +619,7 @@ pub const Tokenizer = struct { }, .BackSlashCr => switch (c) { '\n' => { - state = .Start; + state = if (string) .AfterStringLiteral else .Start; }, else => { result.id = .Invalid; @@ -700,7 +695,14 @@ pub const Tokenizer = struct { '"' => { state = .StringLiteral; }, - '\n'...'\r', ' ' => {}, + '\\' => { + state = .BackSlash; + }, + '\n', '\r' => { + if (self.pp_directive) + break; + }, + '\t', '\x0B', '\x0C', ' ' => {}, else => { break; }, @@ -1314,60 +1316,64 @@ test "operators" { \\ , & && &= ? < <= << \\ <<= > >= >> >>= ~ # ## \\ - , - &[_]Token.Id{ - .Bang, - .BangEqual, - .Pipe, - .PipePipe, - .PipeEqual, - .Equal, - .EqualEqual, - .LParen, - .RParen, - .LBrace, - .RBrace, - .LBracket, - .RBracket, - .Period, - .Period, - .Period, - .Ellipsis, - .Caret, - .CaretEqual, - .Plus, - .PlusPlus, - .PlusEqual, - .Minus, - .MinusMinus, - .MinusEqual, - .Asterisk, - .AsteriskEqual, - .Percent, - .PercentEqual, - .Arrow, - .Colon, - .Semicolon, - .Slash, - .SlashEqual, - .Comma, - .Ampersand, - .AmpersandAmpersand, - .AmpersandEqual, - .QuestionMark, - .AngleBracketLeft, - .AngleBracketLeftEqual, - .AngleBracketAngleBracketLeft, - .AngleBracketAngleBracketLeftEqual, - .AngleBracketRight, - .AngleBracketRightEqual, - .AngleBracketAngleBracketRight, - .AngleBracketAngleBracketRightEqual, - .Tilde, - .Hash, - .HashHash, - }, - ); + , &[_]Token.Id{ + .Bang, + .BangEqual, + .Pipe, + .PipePipe, + .PipeEqual, + .Equal, + .EqualEqual, + .Nl, + .LParen, + .RParen, + .LBrace, + .RBrace, + .LBracket, + .RBracket, + .Period, + .Period, + .Period, + .Ellipsis, + .Nl, + .Caret, + .CaretEqual, + .Plus, + .PlusPlus, + .PlusEqual, + .Minus, + .MinusMinus, + .MinusEqual, + .Nl, + .Asterisk, + .AsteriskEqual, + .Percent, + .PercentEqual, + .Arrow, + .Colon, + .Semicolon, + .Slash, + .SlashEqual, + .Nl, + .Comma, + .Ampersand, + .AmpersandAmpersand, + .AmpersandEqual, + .QuestionMark, + .AngleBracketLeft, + .AngleBracketLeftEqual, + .AngleBracketAngleBracketLeft, + .Nl, + .AngleBracketAngleBracketLeftEqual, + .AngleBracketRight, + .AngleBracketRightEqual, + .AngleBracketAngleBracketRight, + .AngleBracketAngleBracketRightEqual, + .Tilde, + .Hash, + .HashHash, + .Nl, + }); } test "keywords" { @@ -1388,6 +1394,7 @@ test "keywords" { .Keyword_continue, .Keyword_default, .Keyword_do, + .Nl, .Keyword_double, .Keyword_else, .Keyword_enum, @@ -1397,6 +1404,7 @@ test "keywords" { .Keyword_goto, .Keyword_if, .Keyword_int, + .Nl, .Keyword_long, .Keyword_register, .Keyword_return, @@ -1404,6 +1412,7 @@ test "keywords" { .Keyword_signed, .Keyword_sizeof, .Keyword_static, + .Nl, .Keyword_struct, .Keyword_switch, .Keyword_typedef, @@ -1411,6 +1420,7 @@ test "keywords" { .Keyword_unsigned, .Keyword_void, .Keyword_volatile, + .Nl, .Keyword_while, .Keyword_bool, .Keyword_complex, @@ -1418,12 +1428,14 @@ test "keywords" { .Keyword_inline, .Keyword_restrict, .Keyword_alignas, + .Nl, .Keyword_alignof, .Keyword_atomic, .Keyword_generic, .Keyword_noreturn, .Keyword_static_assert, .Keyword_thread_local, + .Nl, }); } @@ -1469,7 +1481,10 @@ test "line continuation" { \\ bar \\"foo\ \\ bar" - \\ + \\#define "foo" + \\ "bar" + \\#define "foo" \ + \\ "bar" , &[_]Token.Id{ .Hash, .Keyword_define, @@ -1477,6 +1492,14 @@ test "line continuation" { .Identifier, .Nl, .{ .StringLiteral = .None }, + .Hash, + .Keyword_define, + .{ .StringLiteral = .None }, + .Nl, + .{ .StringLiteral = .None }, + .Hash, + .Keyword_define, + .{ .StringLiteral = .None }, }); } @@ -1499,9 +1522,13 @@ test "string prefix" { .{ .StringLiteral = .Utf32 }, .{ .StringLiteral = .Wide }, .{ .CharLiteral = .None }, + .Nl, .{ .CharLiteral = .Utf16 }, + .Nl, .{ .CharLiteral = .Utf32 }, + .Nl, .{ .CharLiteral = .Wide }, + .Nl, }); } @@ -1517,15 +1544,18 @@ test "num suffixes" { .{ .FloatLiteral = .None }, .{ .FloatLiteral = .None }, .{ .FloatLiteral = .None }, + .Nl, .{ .IntegerLiteral = .L }, .{ .IntegerLiteral = .LU }, .{ .IntegerLiteral = .LL }, .{ .IntegerLiteral = .LLU }, .{ .IntegerLiteral = .None }, + .Nl, .{ .IntegerLiteral = .U }, .{ .IntegerLiteral = .LU }, .{ .IntegerLiteral = .LLU }, .{ .IntegerLiteral = .None }, + .Nl, }); } From 5feeff71236eb7bf8257b247660b6e9c33495ee8 Mon Sep 17 00:00:00 2001 From: Vexu Date: Sun, 5 Jan 2020 20:19:17 +0200 Subject: [PATCH 18/30] std-c improve error reporting and decl parsing --- lib/std/c/ast.zig | 16 ++- lib/std/c/parse.zig | 239 +++++++++++++++++++++++++++----------------- 2 files changed, 157 insertions(+), 98 deletions(-) diff --git a/lib/std/c/ast.zig b/lib/std/c/ast.zig index 0a600eb7f3..8801bbfc48 100644 --- a/lib/std/c/ast.zig +++ b/lib/std/c/ast.zig @@ -10,12 +10,11 @@ pub const Tree = struct { sources: SourceList, root_node: *Node.Root, arena_allocator: std.heap.ArenaAllocator, - errors: ErrorList, - warnings: ?ErrorList, + msgs: MsgList, pub const SourceList = SegmentedList(Source, 4); pub const TokenList = Source.TokenList; - pub const ErrorList = SegmentedList(Error, 0); + pub const MsgList = SegmentedList(Msg, 0); pub fn deinit(self: *Tree) void { // Here we copy the arena allocator into stack memory, because @@ -26,6 +25,15 @@ pub const Tree = struct { } }; +pub const Msg = struct { + kind: enum { + Error, + Warning, + Note, + }, + inner: Error, +}; + pub const Error = union(enum) { InvalidToken: SingleTokenError("invalid token '{}'"), ExpectedToken: ExpectedToken, @@ -268,7 +276,7 @@ pub const Node = struct { pub const FnDef = struct { base: Node = Node{ .id = .FnDef }, - decl_spec: *DeclSpec, + decl_spec: DeclSpec, declarator: *Node, old_decls: OldDeclList, body: *CompoundStmt, diff --git a/lib/std/c/parse.zig b/lib/std/c/parse.zig index e5082d06c0..37e9814f7e 100644 --- a/lib/std/c/parse.zig +++ b/lib/std/c/parse.zig @@ -70,11 +70,21 @@ const Parser = struct { arena: *Allocator, it: *TokenIterator, tree: *Tree, - typedefs: std.StringHashMap(void), - fn isTypedef(parser: *Parser, tok: TokenIndex) bool { - const token = parser.it.list.at(tok); - return parser.typedefs.contains(token.slice()); + /// only used for scopes + arena_allocator: std.heap.ArenaAllocator, + // scopes: std.SegmentedLists(Scope), + warnings: bool = true, + + // const Scope = struct { + // types: + // syms: + // }; + + fn getTypeDef(parser: *Parser, tok: TokenIndex) bool { + return false; // TODO + // const token = parser.it.list.at(tok); + // return parser.typedefs.contains(token.slice()); } /// Root <- ExternalDeclaration* eof @@ -84,7 +94,7 @@ const Parser = struct { .decls = Node.Root.DeclList.init(parser.arena), .eof = undefined, }; - while (parser.externalDeclarations() catch |err| switch (err) { + while (parser.externalDeclarations() catch |e| switch (e) { error.OutOfMemory => return error.OutOfMemory, error.ParseError => return node, }) |decl| { @@ -95,70 +105,99 @@ const Parser = struct { } /// ExternalDeclaration - /// <- DeclSpec Declarator Declaration* CompoundStmt + /// <- DeclSpec Declarator OldStyleDecl* CompoundStmt /// / Declaration + /// OldStyleDecl <- DeclSpec Declarator (COMMA Declarator)* SEMICOLON fn externalDeclarations(parser: *Parser) !?*Node { + return parser.declarationExtra(false); + } + + /// Declaration + /// <- DeclSpec DeclInit SEMICOLON + /// / StaticAssert + /// DeclInit <- Declarator (EQUAL Initializer)? (COMMA Declarator (EQUAL Initializer)?)* + fn declaration(parser: *Parser) !?*Node { + return parser.declarationExtra(true); + } + + fn declarationExtra(parser: *Parser, local: bool) !?*Node { if (try parser.staticAssert()) |decl| return decl; - const ds = try parser.declSpec(); - const dr = (try parser.declarator()); - if (dr == null) - try parser.warning(.{ - .ExpectedDeclarator = .{ .token = parser.it.index }, - }); + var ds = Node.DeclSpec{}; + const got_ds = try parser.declSpec(&ds); + if (local and !got_ds) { + // not a declaration + return null; + } + var dr = try parser.declarator(); // TODO disallow auto and register const next_tok = parser.it.peek().?; + if (next_tok.id == .Eof and !got_ds and dr == null) { + return null; + } switch (next_tok.id) { .Semicolon, .Equal, .Comma, .Eof, - => return parser.declarationExtra(ds, dr, false), - else => {}, + => { + while (dr != null) { + if (parser.eatToken(.Equal)) |tok| { + // TODO typedef + // dr.?.init = try parser.expect(initializer, .{ + // .ExpectedInitializer = .{ .token = parser.it.index }, + // }); + } + if (parser.eatToken(.Comma) != null) break; + dr = (try parser.declarator()) orelse return parser.err(.{ + .ExpectedDeclarator = .{ .token = parser.it.index }, + }); + // .push(dr); + } + const semicolon = try parser.expectToken(.Semicolon); + + // TODO VarDecl, TypeDecl, TypeDef + return null; + }, + else => { + if (dr == null) + return parser.err(.{ + .ExpectedDeclarator = .{ .token = parser.it.index }, + }); + var old_decls = Node.FnDef.OldDeclList.init(parser.arena); + while (true) { + var old_ds = Node.DeclSpec{}; + if (!(try parser.declSpec(&old_ds))) { + // not old decl + break; + } + var old_dr = (try parser.declarator()); + // if (old_dr == null) + // try parser.err(.{ + // .NoParamName = .{ .token = parser.it.index }, + // }); + // try old_decls.push(decl); + } + const body = (try parser.compoundStmt()) orelse return parser.err(.{ + .ExpectedFnBody = .{ .token = parser.it.index }, + }); + + const node = try parser.arena.create(Node.FnDef); + node.* = .{ + .decl_spec = ds, + .declarator = dr orelse return null, + .old_decls = old_decls, + .body = @fieldParentPtr(Node.CompoundStmt, "base", body), + }; + return &node.base; + }, } - var old_decls = Node.FnDef.OldDeclList.init(parser.arena); - while (try parser.declaration()) |decl| { - // validate declaration - try old_decls.push(decl); - } - const body = try parser.expect(compoundStmt, .{ - .ExpectedFnBody = .{ .token = parser.it.index }, - }); - - const node = try parser.arena.create(Node.FnDef); - node.* = .{ - .decl_spec = ds, - .declarator = dr orelse return null, - .old_decls = old_decls, - .body = @fieldParentPtr(Node.CompoundStmt, "base", body), - }; - return &node.base; - } - - /// Declaration - /// <- DeclSpec (Declarator (EQUAL Initializer)? COMMA)* SEMICOLON - /// / StaticAssert - fn declaration(parser: *Parser) !?*Node { - if (try parser.staticAssert()) |decl| return decl; - const ds = try parser.declSpec(); - const dr = (try parser.declarator()); - if (dr == null) - try parser.warning(.{ - .ExpectedDeclarator = .{ .token = parser.it.index }, - }); - // TODO disallow threadlocal without static or extern - return parser.declarationExtra(ds, dr, true); - } - - fn declarationExtra(parser: *Parser, ds: *Node.DeclSpec, dr: ?*Node, local: bool) !?*Node { } /// StaticAssert <- Keyword_static_assert LPAREN ConstExpr COMMA STRINGLITERAL RPAREN SEMICOLON fn staticAssert(parser: *Parser) !?*Node { const tok = parser.eatToken(.Keyword_static_assert) orelse return null; _ = try parser.expectToken(.LParen); - const const_expr = try parser.expect(constExpr, .{ - .ExpectedExpr = .{ .token = parser.it.index }, - }); + const const_expr = try parser.constExpr(); _ = try parser.expectToken(.Comma); const str = try parser.expectToken(.StringLiteral); _ = try parser.expectToken(.RParen); @@ -173,11 +212,13 @@ const Parser = struct { } /// DeclSpec <- (StorageClassSpec / TypeSpec / FnSpec / AlignSpec)* - fn declSpec(parser: *Parser) !*Node.DeclSpec { - const ds = try parser.arena.create(Node.DeclSpec); - ds.* = .{}; - while ((try parser.storageClassSpec(ds)) or (try parser.typeSpec(&ds.type_spec)) or (try parser.fnSpec(ds)) or (try parser.alignSpec(ds))) {} - return ds; + /// returns true if any tokens were consumed + fn declSpec(parser: *Parser, ds: *Node.DeclSpec) !bool { + var got = false; + while ((try parser.storageClassSpec(ds)) or (try parser.typeSpec(&ds.type_spec)) or (try parser.fnSpec(ds)) or (try parser.alignSpec(ds))) { + got = true; + } + return got; } /// StorageClassSpec @@ -213,7 +254,7 @@ const Parser = struct { } else return false; return true; } - try parser.warning(.{ + try parser.warn(.{ .DuplicateSpecifier = .{ .token = parser.it.index }, }); return true; @@ -420,7 +461,7 @@ const Parser = struct { if (type_spec.spec != .None) break :blk; _ = try parser.expectToken(.LParen); - const name = try parser.expect(typeName, .{ + const name = (try parser.typeName()) orelse return parser.err(.{ .ExpectedTypeName = .{ .token = parser.it.index }, }); type_spec.spec.Atomic = .{ @@ -440,7 +481,7 @@ const Parser = struct { @panic("TODO record type"); // return true; } else if (parser.eatToken(.Identifier)) |tok| { - if (!parser.isTypedef(tok)) { + if (!parser.getTypeDef(tok)) { parser.putBackToken(tok); return false; } @@ -450,13 +491,12 @@ const Parser = struct { return true; } } - try parser.tree.errors.push(.{ + return parser.err(.{ .InvalidTypeSpecifier = .{ .token = parser.it.index, .type_spec = type_spec, }, }); - return error.ParseError; } /// TypeQual <- Keyword_const / Keyword_restrict / Keyword_volatile / Keyword_atomic @@ -481,7 +521,7 @@ const Parser = struct { } else return false; return true; } - try parser.warning(.{ + try parser.warn(.{ .DuplicateQualifier = .{ .token = parser.it.index }, }); return true; @@ -501,7 +541,7 @@ const Parser = struct { } else return false; return true; } - try parser.warning(.{ + try parser.warn(.{ .DuplicateSpecifier = .{ .token = parser.it.index }, }); return true; @@ -511,11 +551,9 @@ const Parser = struct { fn alignSpec(parser: *Parser, ds: *Node.DeclSpec) !bool { if (parser.eatToken(.Keyword_alignas)) |tok| { _ = try parser.expectToken(.LParen); - const node = (try parser.typeName()) orelse (try parser.expect(constExpr, .{ - .ExpectedExpr = .{ .token = parser.it.index }, - })); + const node = (try parser.typeName()) orelse (try parser.constExpr()); if (ds.align_spec != null) { - try parser.warning(.{ + try parser.warn(.{ .DuplicateSpecifier = .{ .token = parser.it.index }, }); } @@ -594,7 +632,16 @@ const Parser = struct { fn assignmentExpr(parser: *Parser) !*Node {} /// ConstExpr <- ConditionalExpr - const constExpr = conditionalExpr; + fn constExpr(parser: *Parser) Error!*Node { + const start = parser.it.index; + const expression = try parser.conditionalExpr(); + // TODO + // if (expression == nullor expression.?.value == null) + // return parser.err(.{ + // .ConsExpr = start, + // }); + return expression.?; + } /// ConditionalExpr <- LogicalOrExpr (QUESTIONMARK Expr COLON ConditionalExpr)? fn conditionalExpr(parser: *Parser) !*Node {} @@ -671,7 +718,7 @@ const Parser = struct { /// / PERIOD IDENTIFIER fn designator(parser: *Parser) !*Node {} - /// CompoundStmt <- LBRACE (Stmt / Declaration)* RBRACE + /// CompoundStmt <- LBRACE (Declaration / Stmt)* RBRACE fn compoundStmt(parser: *Parser) Error!?*Node { const lbrace = parser.eatToken(.LBrace) orelse return null; const body_node = try parser.arena.create(Node.CompoundStmt); @@ -680,7 +727,7 @@ const Parser = struct { .statements = Node.CompoundStmt.StmtList.init(parser.arena), .rbrace = undefined, }; - while ((try parser.stmt()) orelse (try parser.declaration())) |node| + while ((try parser.declaration()) orelse (try parser.stmt())) |node| try body_node.statements.push(node); body_node.rbrace = try parser.expectToken(.RBrace); return &body_node.base; @@ -708,7 +755,7 @@ const Parser = struct { _ = try parser.expectToken(.LParen); node.* = .{ .@"if" = tok, - .cond = try parser.expect(expr, .{ + .cond = (try parser.expr()) orelse return parser.err(.{ .ExpectedExpr = .{ .token = parser.it.index }, }), .@"else" = null, @@ -717,7 +764,7 @@ const Parser = struct { if (parser.eatToken(.Keyword_else)) |else_tok| { node.@"else" = .{ .tok = else_tok, - .stmt = try parser.expect(stmt, .{ + .stmt = (try parser.stmt()) orelse return parser.err(.{ .ExpectedStmt = .{ .token = parser.it.index }, }), }; @@ -797,7 +844,7 @@ const Parser = struct { fn eatToken(parser: *Parser, id: @TagType(Token.Id)) ?TokenIndex { while (true) { - switch (parser.it.next() orelse return null) { + switch ((parser.it.next() orelse return null).id) { .LineComment, .MultiLineComment, .Nl => continue, else => |next_id| if (next_id == id) { return parser.it.index; @@ -811,7 +858,7 @@ const Parser = struct { fn expectToken(parser: *Parser, id: @TagType(Token.Id)) Error!TokenIndex { while (true) { - switch (parser.it.next() orelse return null) { + switch ((parser.it.next() orelse return error.ParseError).id) { .LineComment, .MultiLineComment, .Nl => continue, else => |next_id| if (next_id != id) { return parser.err(.{ @@ -826,9 +873,10 @@ const Parser = struct { fn putBackToken(parser: *Parser, putting_back: TokenIndex) void { while (true) { - switch (parser.it.next() orelse return null) { + const prev_tok = parser.it.next() orelse return; + switch (prev_tok.id) { .LineComment, .MultiLineComment, .Nl => continue, - else => |next_id| { + else => { assert(parser.it.list.at(putting_back) == prev_tok); return; }, @@ -836,23 +884,26 @@ const Parser = struct { } } - fn expect( - parser: *Parser, - parseFn: fn (*Parser) Error!?*Node, - err: ast.Error, // if parsing fails - ) Error!*Node { - return (try parseFn(parser)) orelse { - try parser.tree.errors.push(err); - return error.ParseError; - }; - } - - fn warning(parser: *Parser, err: ast.Error) Error!void { - if (parser.tree.warnings) |*w| { - try w.push(err); - return; - } - try parser.tree.errors.push(err); + fn err(parser: *Parser, msg: ast.Error) Error { + try parser.tree.msgs.push(.{ + .kind = .Error, + .inner = msg, + }); return error.ParseError; } + + fn warn(parser: *Parser, msg: ast.Error) Error!void { + try parser.tree.msgs.push(.{ + .kind = if (parser.warnings) .Warning else .Error, + .inner = msg, + }); + if (!parser.warnings) return error.ParseError; + } + + fn note(parser: *Parser, msg: ast.Error) Error!void { + try parser.tree.msgs.push(.{ + .kind = .Note, + .inner = msg, + }); + } }; From d5d52af26ec3d3e6a171564112978a7d8c96aba4 Mon Sep 17 00:00:00 2001 From: Vexu Date: Mon, 6 Jan 2020 00:06:33 +0200 Subject: [PATCH 19/30] std-c parse pointer --- lib/std/c/ast.zig | 84 ++++++++++++++++++++++++++++++++++++++++++--- lib/std/c/parse.zig | 21 ++++++++---- 2 files changed, 93 insertions(+), 12 deletions(-) diff --git a/lib/std/c/ast.zig b/lib/std/c/ast.zig index 8801bbfc48..5d34d26fe2 100644 --- a/lib/std/c/ast.zig +++ b/lib/std/c/ast.zig @@ -126,7 +126,9 @@ pub const Node = struct { CompoundStmt, IfStmt, StaticAssert, - FnDef, + Fn, + Typedef, + Var, }; pub const Root = struct { @@ -274,13 +276,85 @@ pub const Node = struct { semicolon: TokenIndex, }; - pub const FnDef = struct { - base: Node = Node{ .id = .FnDef }, + pub const Declarator = struct { + pointer: *Pointer, + identifier: ?TokenIndex, + kind: union(enum) { + Simple, + Complex: struct { + lparen: TokenIndex, + inner: *Declarator, + rparen: TokenIndex, + }, + Fn: ParamList, + Array: ArrayList, + }, + + pub const ArrayList = std.SegmentedList(*Array, 2); + pub const ParamList = std.SegmentedList(*Param, 4); + }; + + pub const Array = union(enum) { + Unspecified, + Variable: TokenIndex, + Known: *Expr, + }; + + pub const Pointer = struct { + asterisk: TokenIndex, + qual: TypeQual, + pointer: ?*Pointer, + }; + + pub const Param = struct { + kind: union(enum) { + Variable, + Old: TokenIndex, + Normal: struct { + decl_spec: *DeclSpec, + declarator: *Declarator, + }, + }, + }; + + pub const Fn = struct { + base: Node = Node{ .id = .Fn }, decl_spec: DeclSpec, - declarator: *Node, + declarator: *Declarator, old_decls: OldDeclList, - body: *CompoundStmt, + body: ?*CompoundStmt, pub const OldDeclList = SegmentedList(*Node, 0); }; + + pub const Typedef = struct { + base: Node = Node{ .id = .Typedef }, + decl_spec: DeclSpec, + declarators: DeclaratorList, + + pub const DeclaratorList = std.SegmentedList(*Declarator, 2); + }; + + pub const Var = struct { + base: Node = Node{ .id = .Var }, + decl_spec: DeclSpec, + initializers: Initializers, + + pub const Initializers = std.SegmentedList(*Initialized, 2); + }; + + pub const Initialized = struct { + declarator: *Declarator, + eq: TokenIndex, + init: Initializer, + }; + + pub const Initializer = union(enum) { + list: struct { + initializers: InitializerList, + rbrace: TokenIndex, + }, + expr: *Expr, + pub const InitializerList = std.SegmentedList(*Initializer, 4); + }; }; diff --git a/lib/std/c/parse.zig b/lib/std/c/parse.zig index 37e9814f7e..0925d65d8f 100644 --- a/lib/std/c/parse.zig +++ b/lib/std/c/parse.zig @@ -588,12 +588,19 @@ const Parser = struct { /// RecordDeclarator <- Declarator? (COLON ConstExpr)? fn recordDeclarator(parser: *Parser) !*Node {} - /// Declarator <- Pointer? DirectDeclarator - fn declarator(parser: *Parser) !*Node {} - /// Pointer <- ASTERISK TypeQual* Pointer? - fn pointer(parser: *Parser) !*Node {} - + fn pointer(parser: *Parser) !?*Node.Pointer { + const asterisk = parser.eatToken(.Asterisk) orelse return null; + const node = try parser.arena.create(Node.Pointer); + node.* = .{ + .asterisk = asterisk, + .qual = .{}, + .pointer = null, + }; + while (try parser.typeQual(&node.qual)) {} + node.pointer = try parser.pointer(); + return node; + } /// DirectDeclarator /// <- IDENTIFIER /// / LPAREN Declarator RPAREN @@ -687,7 +694,7 @@ const Parser = struct { /// PrimaryExpr /// <- IDENTIFIER - /// / INTEGERLITERAL / FLITERAL / STRINGLITERAL / CHARLITERAL + /// / INTEGERLITERAL / FLOATLITERAL / STRINGLITERAL / CHARLITERAL /// / LPAREN Expr RPAREN /// / Keyword_generic LPAREN AssignmentExpr (COMMA Generic)+ RPAREN fn primaryExpr(parser: *Parser) !*Node {} @@ -714,7 +721,7 @@ const Parser = struct { fn initializer(parser: *Parser) !*Node {} /// Designator - /// <- LBRACKET Initializers RBRACKET + /// <- LBRACKET ConstExpr RBRACKET /// / PERIOD IDENTIFIER fn designator(parser: *Parser) !*Node {} From 3ed6d7d24589aa295409880239833fdc8d6be9d6 Mon Sep 17 00:00:00 2001 From: Vexu Date: Mon, 6 Jan 2020 14:41:53 +0200 Subject: [PATCH 20/30] std-c parser declarator --- lib/std/c/ast.zig | 53 +++++++++++------ lib/std/c/parse.zig | 137 +++++++++++++++++++++++++++++++++++++------- 2 files changed, 153 insertions(+), 37 deletions(-) diff --git a/lib/std/c/ast.zig b/lib/std/c/ast.zig index 5d34d26fe2..b248ce2fbc 100644 --- a/lib/std/c/ast.zig +++ b/lib/std/c/ast.zig @@ -41,6 +41,7 @@ pub const Error = union(enum) { ExpectedStmt: SingleTokenError("expected statement, found '{}'"), ExpectedTypeName: SingleTokenError("expected type name, found '{}'"), ExpectedFnBody: SingleTokenError("expected function body, found '{}'"), + ExpectedDeclarator: SingleTokenError("expected declarator, found '{}'"), ExpectedInitializer: SingleTokenError("expected initializer, found '{}'"), InvalidTypeSpecifier: InvalidTypeSpecifier, DuplicateQualifier: SingleTokenError("duplicate type qualifier '{}'"), @@ -55,6 +56,7 @@ pub const Error = union(enum) { .ExpectedTypeName => |*x| return x.render(tokens, stream), .ExpectedDeclarator => |*x| return x.render(tokens, stream), .ExpectedFnBody => |*x| return x.render(tokens, stream), + .ExpectedInitializer => |*x| return x.render(tokens, stream), .InvalidTypeSpecifier => |*x| return x.render(tokens, stream), .DuplicateQualifier => |*x| return x.render(tokens, stream), .DuplicateSpecifier => |*x| return x.render(tokens, stream), @@ -70,6 +72,7 @@ pub const Error = union(enum) { .ExpectedTypeName => |x| return x.token, .ExpectedDeclarator => |x| return x.token, .ExpectedFnBody => |x| return x.token, + .ExpectedInitializer => |x| return x.token, .InvalidTypeSpecifier => |x| return x.token, .DuplicateQualifier => |x| return x.token, .DuplicateSpecifier => |x| return x.token, @@ -277,30 +280,48 @@ pub const Node = struct { }; pub const Declarator = struct { + base: Node = Node{ .id = .Declarator }, pointer: *Pointer, - identifier: ?TokenIndex, - kind: union(enum) { - Simple, + prefix: union(enum) { + None, + Identifer: TokenIndex, Complex: struct { lparen: TokenIndex, - inner: *Declarator, + inner: *Node, rparen: TokenIndex, }, - Fn: ParamList, - Array: ArrayList, + }, + suffix: union(enum) { + None, + Fn: struct { + lparen: TokenIndex, + params: Params, + rparen: TokenIndex, + }, + Array: Arrays, }, - pub const ArrayList = std.SegmentedList(*Array, 2); - pub const ParamList = std.SegmentedList(*Param, 4); + pub const Arrays = std.SegmentedList(*Array, 2); + pub const Params = std.SegmentedList(*Param, 4); }; - pub const Array = union(enum) { - Unspecified, - Variable: TokenIndex, - Known: *Expr, + pub const Array = struct { + rbracket: TokenIndex, + inner: union(enum) { + Inferred, + Unspecified: TokenIndex, + Variable: struct { + asterisk: ?TokenIndex, + static: ?TokenIndex, + qual: TypeQual, + expr: *Expr, + }, + }, + rbracket: TokenIndex, }; pub const Pointer = struct { + base: Node = Node{ .id = .Pointer }, asterisk: TokenIndex, qual: TypeQual, pointer: ?*Pointer, @@ -312,7 +333,7 @@ pub const Node = struct { Old: TokenIndex, Normal: struct { decl_spec: *DeclSpec, - declarator: *Declarator, + declarator: *Node, }, }, }; @@ -320,7 +341,7 @@ pub const Node = struct { pub const Fn = struct { base: Node = Node{ .id = .Fn }, decl_spec: DeclSpec, - declarator: *Declarator, + declarator: *Node, old_decls: OldDeclList, body: ?*CompoundStmt, @@ -332,7 +353,7 @@ pub const Node = struct { decl_spec: DeclSpec, declarators: DeclaratorList, - pub const DeclaratorList = std.SegmentedList(*Declarator, 2); + pub const DeclaratorList = Root.DeclList; }; pub const Var = struct { @@ -344,7 +365,7 @@ pub const Node = struct { }; pub const Initialized = struct { - declarator: *Declarator, + declarator: *Node, eq: TokenIndex, init: Initializer, }; diff --git a/lib/std/c/parse.zig b/lib/std/c/parse.zig index 0925d65d8f..016de8826e 100644 --- a/lib/std/c/parse.zig +++ b/lib/std/c/parse.zig @@ -589,7 +589,7 @@ const Parser = struct { fn recordDeclarator(parser: *Parser) !*Node {} /// Pointer <- ASTERISK TypeQual* Pointer? - fn pointer(parser: *Parser) !?*Node.Pointer { + fn pointer(parser: *Parser) Error!?*Node { const asterisk = parser.eatToken(.Asterisk) orelse return null; const node = try parser.arena.create(Node.Pointer); node.* = .{ @@ -599,34 +599,129 @@ const Parser = struct { }; while (try parser.typeQual(&node.qual)) {} node.pointer = try parser.pointer(); + return &node.base; + } + + const Named = enum { + Must, + Allowed, + Forbidden, + }; + + /// Declarator <- Pointer? DeclaratorSuffix + /// DeclaratorPrefix + /// <- IDENTIFIER // if named != .Forbidden + /// / LPAREN Declarator RPAREN + /// / (none) // if named != .Must + /// DeclaratorSuffix + /// <. DeclaratorPrefix (LBRACKET ArrayDeclarator? RBRACKET)* + /// / DeclaratorPrefix LPAREN (ParamDecl (COMMA ParamDecl)* (COMMA ELLIPSIS)?)? RPAREN + fn declarator(parser: *Parser, named: Named) Error!?*Node { + const ptr = try parser.pointer(); + var node: *Node.Declarator = undefined; + // prefix + if (parser.eatToken(.LParen)) |lparen| { + const inner = (try parser.declarator(named)) orelse return parser.err(.{ + .ExpectedDeclarator = .{ .token = lparen + 1 }, + }); + node = try parser.arena.create(Node.Declarator); + node.* = .{ + .pointer = ptr, + .prefix = .{ + .Comples = .{ + .lparen = lparen, + .inner = inner, + .rparen = try parser.expectToken(.RParen), + }, + }, + .suffix = .None, + }; + } else if (named != .Forbidden) { + if (parser.eatToken(.Identifier)) |tok| { + node = try parser.arena.create(Node.Declarator); + node.* = .{ + .pointer = ptr, + .prefix = .{ .Simple = tok }, + .suffix = .None, + }; + } else if (named == .Must) { + return parser.err(.{ + .ExpectedToken = .{ .token = parser.it.index, .expected_id = .Identifier }, + }); + } else { + return ptr; + } + } else { + node = try parser.arena.create(Node.Declarator); + node.* = .{ + .pointer = ptr, + .prefix = .None, + .suffix = .None, + }; + } + // suffix + if (parser.eatToken(.LParen)) |lparen| { + node.suffix = .{ + .Fn = .{ + .lparen = lparen, + .params = .Node.Declarator.Params.init(parser.arena), + .rparen = undefined, + }, + }; + try parser.ParamDecl(node); + node.suffix.Fn.rparen = try parser.expectToken(.RParen); + } else { + while (parser.arrayDeclarator()) |arr| { + if (node.suffix == .None) + node.suffix = .{ .Array = .Node.Declarator.Arrays.init(parser.arena) }; + try node.suffix.Array.push(arr); + } + } + if (parser.eatToken(.LParen) orelse parser.eatToken(.LBracket)) |tok| + return parser.err(.{ + .InvalidDeclarator = .{ .token = tok }, + }); return node; } - /// DirectDeclarator - /// <- IDENTIFIER - /// / LPAREN Declarator RPAREN - /// / DirectDeclarator LBRACKET (ASTERISK / BracketDeclarator)? RBRACKET - /// / DirectDeclarator LPAREN (ParamDecl (COMMA ParamDecl)* (COMMA ELLIPSIS)?)? RPAREN - fn directDeclarator(parser: *Parser) !*Node {} - /// BracketDeclarator - /// <- Keyword_static TypeQual* AssignmentExpr + /// ArrayDeclarator + /// <- ASTERISK + /// / Keyword_static TypeQual* AssignmentExpr /// / TypeQual+ (ASTERISK / Keyword_static AssignmentExpr) /// / TypeQual+ AssignmentExpr? /// / AssignmentExpr - fn bracketDeclarator(parser: *Parser) !*Node {} + fn arrayDeclarator(parser: *Parser, dr: *Node.Declarator) !?*Node.Array { + const lbracket = parser.eatToken(.LBracket) orelse return null; + const arr = try parser.arena.create(Node.Array); + arr.* = .{ + .lbracket = lbarcket, + .inner = .Inferred, + .rbracket = undefined, + }; + if (parser.eatToken(.Asterisk)) |tok| { + arr.inner = .{ .Unspecified = tok }; + } else { + // TODO + } + arr.rbracket = try parser.expectToken(.RBracket); + return arr; + } + /// Params <- ParamDecl (COMMA ParamDecl)* (COMMA ELLIPSIS)? /// ParamDecl <- DeclSpec (Declarator / AbstractDeclarator) - fn paramDecl(parser: *Parser) !*Node {} - - /// AbstractDeclarator <- Pointer? DirectAbstractDeclarator? - fn abstractDeclarator(parser: *Parser) !*Node {} - - /// DirectAbstractDeclarator - /// <- IDENTIFIER - /// / LPAREN DirectAbstractDeclarator RPAREN - /// / DirectAbstractDeclarator? LBRACKET (ASTERISK / BracketDeclarator)? RBRACKET - /// / DirectAbstractDeclarator? LPAREN (ParamDecl (COMMA ParamDecl)* (COMMA ELLIPSIS)?)? RPAREN - fn directAbstractDeclarator(parser: *Parser) !*Node {} + fn paramDecl(parser: *Parser, dr: *Node.Declarator) !void { + var old_style = false; + while (true) { + var ds = Node.DeclSpec; + if (try parser.declSpec(&ds)) { + //TODO + } else if (parser.eatToken(.Identifier)) { + old_style = true; + } else if (parser.eatToken(.Ellipsis)) { + // TODO + } + } + } /// Expr <- AssignmentExpr (COMMA Expr)* fn expr(parser: *Parser) !*Node {} From df12c1328eb9c2af006dfd9e5cf69046ad6fc235 Mon Sep 17 00:00:00 2001 From: Vexu Date: Tue, 7 Jan 2020 16:05:13 +0200 Subject: [PATCH 21/30] std-c parser typing improvements --- lib/std/c/ast.zig | 212 ++++++++++++++++++++++++++++++++++------ lib/std/c/parse.zig | 94 +++++++++++++----- lib/std/c/tokenizer.zig | 5 +- 3 files changed, 254 insertions(+), 57 deletions(-) diff --git a/lib/std/c/ast.zig b/lib/std/c/ast.zig index b248ce2fbc..41315466eb 100644 --- a/lib/std/c/ast.zig +++ b/lib/std/c/ast.zig @@ -23,6 +23,11 @@ pub const Tree = struct { arena_allocator.deinit(); // self is destroyed } + + pub fn slice(tree: *Tree, token: TokenIndex) []const u8 { + const tok = tree.tokens.at(token); + return tok.source.buffer[tok.start..tok.end]; + } }; pub const Msg = struct { @@ -47,19 +52,19 @@ pub const Error = union(enum) { DuplicateQualifier: SingleTokenError("duplicate type qualifier '{}'"), DuplicateSpecifier: SingleTokenError("duplicate declaration specifier '{}'"), - pub fn render(self: *const Error, tokens: *Tree.TokenList, stream: var) !void { + pub fn render(self: *const Error, tree: *Tree, stream: var) !void { switch (self.*) { - .InvalidToken => |*x| return x.render(tokens, stream), - .ExpectedToken => |*x| return x.render(tokens, stream), - .ExpectedExpr => |*x| return x.render(tokens, stream), - .ExpectedStmt => |*x| return x.render(tokens, stream), - .ExpectedTypeName => |*x| return x.render(tokens, stream), - .ExpectedDeclarator => |*x| return x.render(tokens, stream), - .ExpectedFnBody => |*x| return x.render(tokens, stream), - .ExpectedInitializer => |*x| return x.render(tokens, stream), - .InvalidTypeSpecifier => |*x| return x.render(tokens, stream), - .DuplicateQualifier => |*x| return x.render(tokens, stream), - .DuplicateSpecifier => |*x| return x.render(tokens, stream), + .InvalidToken => |*x| return x.render(tree, stream), + .ExpectedToken => |*x| return x.render(tree, stream), + .ExpectedExpr => |*x| return x.render(tree, stream), + .ExpectedStmt => |*x| return x.render(tree, stream), + .ExpectedTypeName => |*x| return x.render(tree, stream), + .ExpectedDeclarator => |*x| return x.render(tree, stream), + .ExpectedFnBody => |*x| return x.render(tree, stream), + .ExpectedInitializer => |*x| return x.render(tree, stream), + .InvalidTypeSpecifier => |*x| return x.render(tree, stream), + .DuplicateQualifier => |*x| return x.render(tree, stream), + .DuplicateSpecifier => |*x| return x.render(tree, stream), } } @@ -83,8 +88,8 @@ pub const Error = union(enum) { token: TokenIndex, expected_id: @TagType(Token.Id), - pub fn render(self: *const ExpectedToken, tokens: *Tree.TokenList, stream: var) !void { - const found_token = tokens.at(self.token); + pub fn render(self: *const ExpectedToken, tree: *Tree, stream: var) !void { + const found_token = tree.tokens.at(self.token); if (found_token.id == .Invalid) { return stream.print("expected '{}', found invalid bytes", .{self.expected_id.symbol()}); } else { @@ -98,10 +103,10 @@ pub const Error = union(enum) { token: TokenIndex, type_spec: *Node.TypeSpec, - pub fn render(self: *const ExpectedToken, tokens: *Tree.TokenList, stream: var) !void { + pub fn render(self: *const ExpectedToken, tree: *Tree, stream: var) !void { try stream.write("invalid type specifier '"); - try type_spec.spec.print(tokens, stream); - const token_name = tokens.at(self.token).id.symbol(); + try type_spec.spec.print(tree, stream); + const token_name = tree.tokens.at(self.token).id.symbol(); return stream.print("{}'", .{token_name}); } }; @@ -110,14 +115,59 @@ pub const Error = union(enum) { return struct { token: TokenIndex, - pub fn render(self: *const @This(), tokens: *Tree.TokenList, stream: var) !void { - const actual_token = tokens.at(self.token); + pub fn render(self: *const @This(), tree: *Tree, stream: var) !void { + const actual_token = tree.tokens.at(self.token); return stream.print(msg, .{actual_token.id.symbol()}); } }; } }; +pub const Type = struct { + pub const TypeList = std.SegmentedList(*Type, 4); + @"const": bool, + atomic: bool, + @"volatile": bool, + restrict: bool, + + id: union(enum) { + Int: struct { + quals: Qualifiers, + id: Id, + is_signed: bool, + + pub const Id = enum { + Char, + Short, + Int, + Long, + LongLong, + }; + }, + Float: struct { + quals: Qualifiers, + id: Id, + + pub const Id = enum { + Float, + Double, + LongDouble, + }; + }, + Pointer: struct { + quals: Qualifiers, + child_type: *Type, + }, + Function: struct { + return_type: *Type, + param_types: TypeList, + }, + Typedef: *Type, + Record: *Node.RecordType, + Enum: *Node.EnumType, + }, +}; + pub const Node = struct { id: Id, @@ -205,22 +255,128 @@ pub const Node = struct { typename: *Node, rparen: TokenIndex, }, + Enum: *EnumType, + Record: *RecordType, + Typedef: struct { + sym: TokenIndex, + sym_type: *Type, + }, - //todo - // @"enum", - // record, - - Typedef: TokenIndex, - - pub fn print(self: *@This(), self: *const @This(), tokens: *Tree.TokenList, stream: var) !void { - switch (self) { + pub fn print(self: *@This(), self: *const @This(), tree: *Tree, stream: var) !void { + switch (self.spec) { .None => unreachable, - else => @panic("TODO print type specifier"), + .Void => |index| try stream.write(tree.slice(index)), + .Char => |char| { + if (char.sign) |s| { + try stream.write(tree.slice(s)); + try stream.writeByte(' '); + } + try stream.write(tree.slice(char.char)); + }, + .Short => |short| { + if (short.sign) |s| { + try stream.write(tree.slice(s)); + try stream.writeByte(' '); + } + try stream.write(tree.slice(short.short)); + if (short.int) |i| { + try stream.writeByte(' '); + try stream.write(tree.slice(i)); + } + }, + .Int => |int| { + if (int.sign) |s| { + try stream.write(tree.slice(s)); + try stream.writeByte(' '); + } + if (int.int) |i| { + try stream.writeByte(' '); + try stream.write(tree.slice(i)); + } + }, + .Long => |long| { + if (long.sign) |s| { + try stream.write(tree.slice(s)); + try stream.writeByte(' '); + } + try stream.write(tree.slice(long.long)); + if (long.longlong) |l| { + try stream.writeByte(' '); + try stream.write(tree.slice(l)); + } + if (long.int) |i| { + try stream.writeByte(' '); + try stream.write(tree.slice(i)); + } + }, + .Float => |float| { + try stream.write(tree.slice(float.float)); + if (float.complex) |c| { + try stream.writeByte(' '); + try stream.write(tree.slice(c)); + } + }, + .Double => |double| { + if (double.long) |l| { + try stream.write(tree.slice(l)); + try stream.writeByte(' '); + } + try stream.write(tree.slice(double.double)); + if (double.complex) |c| { + try stream.writeByte(' '); + try stream.write(tree.slice(c)); + } + }, + .Bool => |index| try stream.write(tree.slice(index)), + .Typedef => |typedef| try stream.write(tree.slice(typedef.sym)), + else => try stream.print("TODO print {}", self.spec), } } } = .None, }; + pub const EnumType = struct { + tok: TokenIndex, + name: ?TokenIndex, + body: ?struct { + lbrace: TokenIndex, + + /// always EnumField + fields: FieldList, + rbrace: TokenIndex, + }, + + pub const FieldList = Root.DeclList; + }; + + pub const EnumField = struct { + base: Node = Node{ .id = EnumField }, + name: TokenIndex, + value: ?*Node, + }; + + pub const RecordType = struct { + kind: union(enum) { + Struct: TokenIndex, + Union: TokenIndex, + }, + name: ?TokenIndex, + body: ?struct { + lbrace: TokenIndex, + + /// RecordField or StaticAssert + fields: FieldList, + rbrace: TokenIndex, + }, + + pub const FieldList = Root.DeclList; + }; + + pub const RecordField = struct { + base: Node = Node{ .id = RecordField }, + // TODO + }; + pub const TypeQual = struct { @"const": ?TokenIndex = null, atomic: ?TokenIndex = null, diff --git a/lib/std/c/parse.zig b/lib/std/c/parse.zig index 016de8826e..736c25133a 100644 --- a/lib/std/c/parse.zig +++ b/lib/std/c/parse.zig @@ -3,6 +3,7 @@ const assert = std.debug.assert; const Allocator = std.mem.Allocator; const ast = std.c.ast; const Node = ast.Node; +const Type = ast.Type; const Tree = ast.Tree; const TokenIndex = ast.TokenIndex; const Token = std.c.Token; @@ -57,10 +58,12 @@ pub fn parse(allocator: *Allocator, source: []const u8) !*Tree { } var parser = Parser{ + .symbols = Parser.SymbolList.init(allocator), .arena = arena, .it = &it, .tree = tree, }; + defer parser.symbols.deinit(); tree.root_node = try parser.root(); return tree; @@ -72,19 +75,35 @@ const Parser = struct { tree: *Tree, /// only used for scopes - arena_allocator: std.heap.ArenaAllocator, - // scopes: std.SegmentedLists(Scope), + symbols: SymbolList, warnings: bool = true, - // const Scope = struct { - // types: - // syms: - // }; + const SymbolList = std.ArrayList(Symbol); - fn getTypeDef(parser: *Parser, tok: TokenIndex) bool { - return false; // TODO - // const token = parser.it.list.at(tok); - // return parser.typedefs.contains(token.slice()); + const Symbol = struct { + name: []const u8, + ty: *Type, + }; + + fn pushScope(parser: *Parser) usize { + return parser.symbols.len; + } + + fn popScope(parser: *Parser, len: usize) void { + parser.symbols.resize(len) catch unreachable; + } + + fn getSymbol(parser: *Parser, tok: TokenIndex) ?*Type { + const token = parser.it.list.at(tok); + const name = parser.tree.slice(token); + const syms = parser.symbols.toSliceConst(); + var i = syms.len; + while (i > 0) : (i -= 1) { + if (mem.eql(u8, name, syms[i].name)) { + return syms[i].ty; + } + } + return null; } /// Root <- ExternalDeclaration* eof @@ -264,8 +283,8 @@ const Parser = struct { /// <- Keyword_void / Keyword_char / Keyword_short / Keyword_int / Keyword_long / Keyword_float / Keyword_double /// / Keyword_signed / Keyword_unsigned / Keyword_bool / Keyword_complex / Keyword_imaginary / /// / Keyword_atomic LPAREN TypeName RPAREN - /// / EnumSpecifier - /// / RecordSpecifier + /// / EnumSpec + /// / RecordSpec /// / IDENTIFIER // typedef name /// / TypeQual fn typeSpec(parser: *Parser, type_spec: *Node.TypeSpec) !bool { @@ -473,22 +492,48 @@ const Parser = struct { } else if (parser.eatToken(.Keyword_enum)) |tok| { if (type_spec.spec != .None) break :blk; - @panic("TODO enum type"); - // return true; + type_spec.Enum = try parser.enumSpec(tok); + return true; } else if (parser.eatToken(.Keyword_union) orelse parser.eatToken(.Keyword_struct)) |tok| { if (type_spec.spec != .None) break :blk; - @panic("TODO record type"); - // return true; + type_spec.Record = try parser.recordSpec(); + return true; } else if (parser.eatToken(.Identifier)) |tok| { - if (!parser.getTypeDef(tok)) { + const ty = parser.getSymbol(tok) orelse { parser.putBackToken(tok); return false; - } - type_spec.spec = .{ - .Typedef = tok, }; - return true; + switch (ty) { + .Enum => |e| { + return parser.err(.{ + .MustUseKwToRefer = .{ .kw = e.identifier, .sym = tok }, + }); + }, + .Record => |r| { + return parser.err(.{ + .MustUseKwToRefer = .{ + .kw = switch (r.kind) { + .Struct, .Union => |kw| kw, + }, + .sym = tok, + }, + }); + }, + .Typedef => { + type_spec.spec = .{ + .Typedef = .{ + .sym = tok, + .sym_type = ty, + }, + }; + return true; + }, + else => { + parser.putBackToken(tok); + return false; + }, + } } } return parser.err(.{ @@ -567,13 +612,13 @@ const Parser = struct { return false; } - /// EnumSpecifier <- Keyword_enum IDENTIFIER? (LBRACE EnumField RBRACE)? + /// EnumSpec <- Keyword_enum IDENTIFIER? (LBRACE EnumField RBRACE)? fn enumSpecifier(parser: *Parser) !*Node {} /// EnumField <- IDENTIFIER (EQUAL ConstExpr)? (COMMA EnumField) COMMA? fn enumField(parser: *Parser) !*Node {} - /// RecordSpecifier <- (Keyword_struct / Keyword_union) IDENTIFIER? (LBRACE RecordField+ RBRACE)? + /// RecordSpec <- (Keyword_struct / Keyword_union) IDENTIFIER? (LBRACE RecordField+ RBRACE)? fn recordSpecifier(parser: *Parser) !*Node {} /// RecordField @@ -581,8 +626,7 @@ const Parser = struct { /// \ StaticAssert fn recordField(parser: *Parser) !*Node {} - /// TypeName - /// <- TypeSpec* AbstractDeclarator? + /// TypeName <- TypeSpec* AbstractDeclarator? fn typeName(parser: *Parser) !*Node { /// RecordDeclarator <- Declarator? (COLON ConstExpr)? diff --git a/lib/std/c/tokenizer.zig b/lib/std/c/tokenizer.zig index 92c139f3c2..d3c8490c07 100644 --- a/lib/std/c/tokenizer.zig +++ b/lib/std/c/tokenizer.zig @@ -327,6 +327,7 @@ pub const Token = struct { }; // TODO perfect hash at comptime + // TODO do this in the preprocessor pub fn getKeyword(bytes: []const u8, pp_directive: bool) ?Id { var hash = std.hash_map.hashString(bytes); for (keywords) |kw| { @@ -347,10 +348,6 @@ pub const Token = struct { return null; } - pub fn slice(tok: Token) []const u8 { - return tok.source.buffer[tok.start..tok.end]; - } - pub const NumSuffix = enum { None, F, From 4184d4c66a26ba10fbc78cc21f2c73db8f0cfcb2 Mon Sep 17 00:00:00 2001 From: Vexu Date: Tue, 7 Jan 2020 19:05:46 +0200 Subject: [PATCH 22/30] std-c parser record and enum specifiers --- lib/std/c/ast.zig | 52 ++++++++++++------ lib/std/c/parse.zig | 130 +++++++++++++++++++++++++++++++++++++++----- 2 files changed, 151 insertions(+), 31 deletions(-) diff --git a/lib/std/c/ast.zig b/lib/std/c/ast.zig index 41315466eb..13c9699e74 100644 --- a/lib/std/c/ast.zig +++ b/lib/std/c/ast.zig @@ -48,9 +48,12 @@ pub const Error = union(enum) { ExpectedFnBody: SingleTokenError("expected function body, found '{}'"), ExpectedDeclarator: SingleTokenError("expected declarator, found '{}'"), ExpectedInitializer: SingleTokenError("expected initializer, found '{}'"), + ExpectedEnumField: SingleTokenError("expected enum field, found '{}'"), + ExpectedType: SingleTokenError("expected enum field, found '{}'"), InvalidTypeSpecifier: InvalidTypeSpecifier, DuplicateQualifier: SingleTokenError("duplicate type qualifier '{}'"), DuplicateSpecifier: SingleTokenError("duplicate declaration specifier '{}'"), + MustUseKwToRefer: MustUseKwToRefer, pub fn render(self: *const Error, tree: *Tree, stream: var) !void { switch (self.*) { @@ -62,9 +65,12 @@ pub const Error = union(enum) { .ExpectedDeclarator => |*x| return x.render(tree, stream), .ExpectedFnBody => |*x| return x.render(tree, stream), .ExpectedInitializer => |*x| return x.render(tree, stream), + .ExpectedEnumField => |*x| return x.render(tree, stream), + .ExpectedType => |*x| return x.render(tree, stream), .InvalidTypeSpecifier => |*x| return x.render(tree, stream), .DuplicateQualifier => |*x| return x.render(tree, stream), .DuplicateSpecifier => |*x| return x.render(tree, stream), + .MustUseKwToRefer => |*x| return x.render(tree, stream), } } @@ -78,9 +84,12 @@ pub const Error = union(enum) { .ExpectedDeclarator => |x| return x.token, .ExpectedFnBody => |x| return x.token, .ExpectedInitializer => |x| return x.token, + .ExpectedEnumField => |x| return x.token, + .ExpectedType => |*x| return x.token, .InvalidTypeSpecifier => |x| return x.token, .DuplicateQualifier => |x| return x.token, .DuplicateSpecifier => |x| return x.token, + .MustUseKwToRefer => |*x| return x.name, } } @@ -111,6 +120,15 @@ pub const Error = union(enum) { } }; + pub const MustUseKwToRefer = struct { + kw: TokenIndex, + name: TokenIndex, + + pub fn render(self: *const ExpectedToken, tree: *Tree, stream: var) !void { + return stream.print("must use '{}' tag to refer to type '{}'", .{tree.slice(kw), tree.slice(name)}); + } + }; + fn SingleTokenError(comptime msg: []const u8) type { return struct { token: TokenIndex, @@ -125,14 +143,13 @@ pub const Error = union(enum) { pub const Type = struct { pub const TypeList = std.SegmentedList(*Type, 4); - @"const": bool, - atomic: bool, - @"volatile": bool, - restrict: bool, + @"const": bool = false, + atomic: bool = false, + @"volatile": bool = false, + restrict: bool = false, id: union(enum) { Int: struct { - quals: Qualifiers, id: Id, is_signed: bool, @@ -145,7 +162,6 @@ pub const Type = struct { }; }, Float: struct { - quals: Qualifiers, id: Id, pub const Id = enum { @@ -154,10 +170,7 @@ pub const Type = struct { LongDouble, }; }, - Pointer: struct { - quals: Qualifiers, - child_type: *Type, - }, + Pointer: *Type, Function: struct { return_type: *Type, param_types: TypeList, @@ -173,6 +186,8 @@ pub const Node = struct { pub const Id = enum { Root, + EnumField, + RecordField, JumpStmt, ExprStmt, Label, @@ -350,15 +365,16 @@ pub const Node = struct { }; pub const EnumField = struct { - base: Node = Node{ .id = EnumField }, + base: Node = Node{ .id = .EnumField }, name: TokenIndex, value: ?*Node, }; pub const RecordType = struct { - kind: union(enum) { - Struct: TokenIndex, - Union: TokenIndex, + tok: TokenIndex, + kind: enum { + Struct, + Union, }, name: ?TokenIndex, body: ?struct { @@ -373,8 +389,12 @@ pub const Node = struct { }; pub const RecordField = struct { - base: Node = Node{ .id = RecordField }, - // TODO + base: Node = Node{ .id = .RecordField }, + type_spec: TypeSpec, + declarators: DeclaratorList, + semicolon: TokenIndex, + + pub const DeclaratorList = Root.DeclList; }; pub const TypeQual = struct { diff --git a/lib/std/c/parse.zig b/lib/std/c/parse.zig index 736c25133a..3b30fc8a48 100644 --- a/lib/std/c/parse.zig +++ b/lib/std/c/parse.zig @@ -94,12 +94,11 @@ const Parser = struct { } fn getSymbol(parser: *Parser, tok: TokenIndex) ?*Type { - const token = parser.it.list.at(tok); - const name = parser.tree.slice(token); + const name = parser.tree.slice(tok); const syms = parser.symbols.toSliceConst(); var i = syms.len; while (i > 0) : (i -= 1) { - if (mem.eql(u8, name, syms[i].name)) { + if (std.mem.eql(u8, name, syms[i].name)) { return syms[i].ty; } } @@ -492,31 +491,29 @@ const Parser = struct { } else if (parser.eatToken(.Keyword_enum)) |tok| { if (type_spec.spec != .None) break :blk; - type_spec.Enum = try parser.enumSpec(tok); + type_spec.spec.Enum = try parser.enumSpec(tok); return true; } else if (parser.eatToken(.Keyword_union) orelse parser.eatToken(.Keyword_struct)) |tok| { if (type_spec.spec != .None) break :blk; - type_spec.Record = try parser.recordSpec(); + type_spec.spec.Record = try parser.recordSpec(tok); return true; } else if (parser.eatToken(.Identifier)) |tok| { const ty = parser.getSymbol(tok) orelse { parser.putBackToken(tok); return false; }; - switch (ty) { + switch (ty.id) { .Enum => |e| { return parser.err(.{ - .MustUseKwToRefer = .{ .kw = e.identifier, .sym = tok }, + .MustUseKwToRefer = .{ .kw = e.tok, .name = tok }, }); }, .Record => |r| { return parser.err(.{ .MustUseKwToRefer = .{ - .kw = switch (r.kind) { - .Struct, .Union => |kw| kw, - }, - .sym = tok, + .kw = r.tok, + .name = tok, }, }); }, @@ -613,18 +610,121 @@ const Parser = struct { } /// EnumSpec <- Keyword_enum IDENTIFIER? (LBRACE EnumField RBRACE)? - fn enumSpecifier(parser: *Parser) !*Node {} + fn enumSpec(parser: *Parser, tok: TokenIndex) !*Node.EnumType { + const node = try parser.arena.create(Node.EnumType); + const name = parser.eatToken(.Identifier); + node.* = .{ + .tok = tok, + .name = name, + .body = null, + }; + const ty = try parser.arena.create(Type); + ty.* = .{ + .id = .{ + .Enum = node, + }, + }; + if (name) |some| + try parser.symbols.append(.{ + .name = parser.tree.slice(some), + .ty = ty, + }); + if (parser.eatToken(.LBrace)) |lbrace| { + var fields = Node.EnumType.FieldList.init(parser.arena); + try fields.push((try parser.enumField()) orelse return parser.err(.{ + .ExpectedEnumField = .{ .token = parser.it.index }, + })); + while (parser.eatToken(.Comma)) |_| { + try fields.push((try parser.enumField()) orelse break); + } + node.body = .{ + .lbrace = lbrace, + .fields = fields, + .rbrace = try parser.expectToken(.RBrace), + }; + } + return node; + } /// EnumField <- IDENTIFIER (EQUAL ConstExpr)? (COMMA EnumField) COMMA? - fn enumField(parser: *Parser) !*Node {} + fn enumField(parser: *Parser) !?*Node { + const name = parser.eatToken(.Identifier) orelse return null; + const node = try parser.arena.create(Node.EnumField); + node.* = .{ + .name = name, + .value = null, + }; + if (parser.eatToken(.Equal)) |eq| { + node.value = try parser.constExpr(); + } + return &node.base; + } /// RecordSpec <- (Keyword_struct / Keyword_union) IDENTIFIER? (LBRACE RecordField+ RBRACE)? - fn recordSpecifier(parser: *Parser) !*Node {} + fn recordSpec(parser: *Parser, tok: TokenIndex) !*Node.RecordType { + const node = try parser.arena.create(Node.RecordType); + const name = parser.eatToken(.Identifier); + const is_struct = parser.tree.slice(tok)[0] == 's'; + node.* = .{ + .tok = tok, + .kind = if (is_struct) .Struct else .Union, + .name = name, + .body = null, + }; + const ty = try parser.arena.create(Type); + ty.* = .{ + .id = .{ + .Record = node, + }, + }; + if (name) |some| + try parser.symbols.append(.{ + .name = parser.tree.slice(some), + .ty = ty, + }); + if (parser.eatToken(.LBrace)) |lbrace| { + var fields = Node.RecordType.FieldList.init(parser.arena); + while (true) { + if (parser.eatToken(.RBrace)) |rbrace| { + node.body = .{ + .lbrace = lbrace, + .fields = fields, + .rbrace = rbrace, + }; + break; + } + try fields.push(try parser.recordField()); + } + } + return node; + } /// RecordField /// <- TypeSpec* (RecordDeclarator (COMMA RecordDeclarator))? SEMICOLON /// \ StaticAssert - fn recordField(parser: *Parser) !*Node {} + fn recordField(parser: *Parser) Error!*Node { + if (try parser.staticAssert()) |decl| return decl; + var got = false; + var type_spec = Node.TypeSpec{}; + while (try parser.typeSpec(&type_spec)) got = true; + if (!got) + return parser.err(.{ + .ExpectedType = .{ .token = parser.it.index }, + }); + const node = try parser.arena.create(Node.RecordField); + node.* = .{ + .type_spec = type_spec, + .declarators = Node.RecordField.DeclaratorList.init(parser.arena), + .semicolon = undefined, + }; + while (true) { + try node.declarators.push(try parser.recordDeclarator()); + if (parser.eatToken(.Comma)) |_| {} else break; + } + + node.semicolon = try parser.expectToken(.Semicolon); + return &node.base; + } /// TypeName <- TypeSpec* AbstractDeclarator? fn typeName(parser: *Parser) !*Node { From dbc045706809e7135fdb55ebd7c4a7383f40bf0f Mon Sep 17 00:00:00 2001 From: Vexu Date: Tue, 7 Jan 2020 20:15:57 +0200 Subject: [PATCH 23/30] std-c declaration parsing --- lib/std/c/ast.zig | 58 ++++++++++--- lib/std/c/parse.zig | 200 ++++++++++++++++++++++++++++---------------- 2 files changed, 173 insertions(+), 85 deletions(-) diff --git a/lib/std/c/ast.zig b/lib/std/c/ast.zig index 13c9699e74..c17bc2443f 100644 --- a/lib/std/c/ast.zig +++ b/lib/std/c/ast.zig @@ -51,9 +51,14 @@ pub const Error = union(enum) { ExpectedEnumField: SingleTokenError("expected enum field, found '{}'"), ExpectedType: SingleTokenError("expected enum field, found '{}'"), InvalidTypeSpecifier: InvalidTypeSpecifier, + InvalidStorageClass: SingleTokenError("invalid storage class, found '{}'"), + InvalidDeclarator: SimpleError("invalid declarator"), DuplicateQualifier: SingleTokenError("duplicate type qualifier '{}'"), DuplicateSpecifier: SingleTokenError("duplicate declaration specifier '{}'"), MustUseKwToRefer: MustUseKwToRefer, + FnSpecOnNonFn: SingleTokenError("function specifier '{}' on non function"), + NothingDeclared: SimpleError("declaration doesn't declare anything"), + QualifierIgnored: SingleTokenError("qualifier '{}' ignored"), pub fn render(self: *const Error, tree: *Tree, stream: var) !void { switch (self.*) { @@ -68,9 +73,14 @@ pub const Error = union(enum) { .ExpectedEnumField => |*x| return x.render(tree, stream), .ExpectedType => |*x| return x.render(tree, stream), .InvalidTypeSpecifier => |*x| return x.render(tree, stream), + .InvalidStorageClass => |*x| return x.render(tree, stream), + .InvalidDeclarator => |*x| return x.render(tree, stream), .DuplicateQualifier => |*x| return x.render(tree, stream), .DuplicateSpecifier => |*x| return x.render(tree, stream), .MustUseKwToRefer => |*x| return x.render(tree, stream), + .FnSpecOnNonFn => |*x| return x.render(tree, stream), + .NothingDeclared => |*x| return x.render(tree, stream), + .QualifierIgnored => |*x| return x.render(tree, stream), } } @@ -87,9 +97,14 @@ pub const Error = union(enum) { .ExpectedEnumField => |x| return x.token, .ExpectedType => |*x| return x.token, .InvalidTypeSpecifier => |x| return x.token, + .InvalidStorageClass => |x| return x.token, + .InvalidDeclarator => |x| return x.token, .DuplicateQualifier => |x| return x.token, .DuplicateSpecifier => |x| return x.token, .MustUseKwToRefer => |*x| return x.name, + .FnSpecOnNonFn => |*x| return x.name, + .NothingDeclared => |*x| return x.name, + .QualifierIgnored => |*x| return x.name, } } @@ -125,7 +140,7 @@ pub const Error = union(enum) { name: TokenIndex, pub fn render(self: *const ExpectedToken, tree: *Tree, stream: var) !void { - return stream.print("must use '{}' tag to refer to type '{}'", .{tree.slice(kw), tree.slice(name)}); + return stream.print("must use '{}' tag to refer to type '{}'", .{ tree.slice(kw), tree.slice(name) }); } }; @@ -139,6 +154,18 @@ pub const Error = union(enum) { } }; } + + fn SimpleError(comptime msg: []const u8) type { + return struct { + const ThisError = @This(); + + token: TokenIndex, + + pub fn render(self: *const ThisError, tokens: *Tree.TokenList, stream: var) !void { + return stream.write(msg); + } + }; + } }; pub const Type = struct { @@ -194,9 +221,11 @@ pub const Node = struct { CompoundStmt, IfStmt, StaticAssert, - Fn, + Declarator, + Pointer, + FnDecl, Typedef, - Var, + VarDecl, }; pub const Root = struct { @@ -457,7 +486,7 @@ pub const Node = struct { pub const Declarator = struct { base: Node = Node{ .id = .Declarator }, - pointer: *Pointer, + pointer: ?*Pointer, prefix: union(enum) { None, Identifer: TokenIndex, @@ -482,7 +511,7 @@ pub const Node = struct { }; pub const Array = struct { - rbracket: TokenIndex, + lbracket: TokenIndex, inner: union(enum) { Inferred, Unspecified: TokenIndex, @@ -490,7 +519,7 @@ pub const Node = struct { asterisk: ?TokenIndex, static: ?TokenIndex, qual: TypeQual, - expr: *Expr, + // expr: *Expr, }, }, rbracket: TokenIndex, @@ -514,10 +543,10 @@ pub const Node = struct { }, }; - pub const Fn = struct { - base: Node = Node{ .id = .Fn }, + pub const FnDecl = struct { + base: Node = Node{ .id = .FnDecl }, decl_spec: DeclSpec, - declarator: *Node, + declarator: *Declarator, old_decls: OldDeclList, body: ?*CompoundStmt, @@ -528,20 +557,23 @@ pub const Node = struct { base: Node = Node{ .id = .Typedef }, decl_spec: DeclSpec, declarators: DeclaratorList, + semicolon: TokenIndex, pub const DeclaratorList = Root.DeclList; }; - pub const Var = struct { - base: Node = Node{ .id = .Var }, + pub const VarDecl = struct { + base: Node = Node{ .id = .VarDecl }, decl_spec: DeclSpec, initializers: Initializers, + semicolon: TokenIndex, - pub const Initializers = std.SegmentedList(*Initialized, 2); + pub const Initializers = Root.DeclList; }; pub const Initialized = struct { - declarator: *Node, + base: Node = Node{ .id = Initialized }, + declarator: *Declarator, eq: TokenIndex, init: Initializer, }; diff --git a/lib/std/c/parse.zig b/lib/std/c/parse.zig index 3b30fc8a48..ca768017a2 100644 --- a/lib/std/c/parse.zig +++ b/lib/std/c/parse.zig @@ -105,6 +105,10 @@ const Parser = struct { return null; } + fn declareSymbol(parser: *Parser, decl_spec: *Node.DeclSpec, dr: *Node.Declarator) Error!void { + return; // TODO + } + /// Root <- ExternalDeclaration* eof fn root(parser: *Parser) Allocator.Error!*Node.Root { const node = try parser.arena.create(Node.Root); @@ -140,75 +144,125 @@ const Parser = struct { fn declarationExtra(parser: *Parser, local: bool) !?*Node { if (try parser.staticAssert()) |decl| return decl; + const begin = parser.it.index + 1; var ds = Node.DeclSpec{}; const got_ds = try parser.declSpec(&ds); if (local and !got_ds) { // not a declaration return null; } - var dr = try parser.declarator(); - // TODO disallow auto and register - const next_tok = parser.it.peek().?; - if (next_tok.id == .Eof and !got_ds and dr == null) { - return null; - } - switch (next_tok.id) { - .Semicolon, - .Equal, - .Comma, - .Eof, - => { - while (dr != null) { - if (parser.eatToken(.Equal)) |tok| { - // TODO typedef - // dr.?.init = try parser.expect(initializer, .{ - // .ExpectedInitializer = .{ .token = parser.it.index }, - // }); - } - if (parser.eatToken(.Comma) != null) break; - dr = (try parser.declarator()) orelse return parser.err(.{ - .ExpectedDeclarator = .{ .token = parser.it.index }, - }); - // .push(dr); - } - const semicolon = try parser.expectToken(.Semicolon); - - // TODO VarDecl, TypeDecl, TypeDef - return null; - }, - else => { - if (dr == null) - return parser.err(.{ - .ExpectedDeclarator = .{ .token = parser.it.index }, - }); - var old_decls = Node.FnDef.OldDeclList.init(parser.arena); - while (true) { - var old_ds = Node.DeclSpec{}; - if (!(try parser.declSpec(&old_ds))) { - // not old decl - break; - } - var old_dr = (try parser.declarator()); - // if (old_dr == null) - // try parser.err(.{ - // .NoParamName = .{ .token = parser.it.index }, - // }); - // try old_decls.push(decl); - } - const body = (try parser.compoundStmt()) orelse return parser.err(.{ - .ExpectedFnBody = .{ .token = parser.it.index }, - }); - - const node = try parser.arena.create(Node.FnDef); + switch (ds.storage_class) { + .Auto, .Register => |tok| return parser.err(.{ + .InvalidStorageClass = .{ .token = tok }, + }), + .Typedef => { + const node = try parser.arena.create(Node.Typedef); node.* = .{ .decl_spec = ds, - .declarator = dr orelse return null, - .old_decls = old_decls, - .body = @fieldParentPtr(Node.CompoundStmt, "base", body), + .declarators = Node.Typedef.DeclaratorList.init(parser.arena), + .semicolon = undefined, }; + while (true) { + const dr = @fieldParentPtr(Node.Declarator, "base", (try parser.declarator(.Must)) orelse return parser.err(.{ + .ExpectedDeclarator = .{ .token = parser.it.index }, + })); + try parser.declareSymbol(&ds, dr); + try node.declarators.push(&dr.base); + if (parser.eatToken(.Comma)) |_| {} else break; + } return &node.base; }, + else => {}, } + var first_dr = try parser.declarator(.Must); + if (first_dr != null and declaratorIsFunction(first_dr.?)) { + const dr = @fieldParentPtr(Node.Declarator, "base", first_dr.?); + try parser.declareSymbol(&ds, dr); + var old_decls = Node.FnDecl.OldDeclList.init(parser.arena); + const body = if (parser.eatToken(.Semicolon)) |_| + null + else blk: { + // TODO first_dr.is_old + // while (true) { + // var old_ds = Node.DeclSpec{}; + // if (!(try parser.declSpec(&old_ds))) { + // // not old decl + // break; + // } + // var old_dr = (try parser.declarator(.Must)); + // // if (old_dr == null) + // // try parser.err(.{ + // // .NoParamName = .{ .token = parser.it.index }, + // // }); + // // try old_decls.push(decl); + // } + const body_node = (try parser.compoundStmt()) orelse return parser.err(.{ + .ExpectedFnBody = .{ .token = parser.it.index }, + }); + break :blk @fieldParentPtr(Node.CompoundStmt, "base", body_node); + }; + + const node = try parser.arena.create(Node.FnDecl); + node.* = .{ + .decl_spec = ds, + .declarator = dr, + .old_decls = old_decls, + .body = body, + }; + return &node.base; + } else { + switch (ds.fn_spec) { + .Inline, .Noreturn => |tok| return parser.err(.{ + .FnSpecOnNonFn = .{ .token = tok }, + }), + else => {}, + } + // TODO threadlocal without static or extern on local variable + const node = try parser.arena.create(Node.VarDecl); + node.* = .{ + .decl_spec = ds, + .initializers = Node.VarDecl.Initializers.init(parser.arena), + .semicolon = undefined, + }; + if (first_dr == null) { + node.semicolon = try parser.expectToken(.Semicolon); + const ok = switch (ds.type_spec.spec) { + .Enum => |e| e.name != null, + .Record => |r| r.name != null, + else => false, + }; + const q = ds.type_spec.qual; + if (!ok) + try parser.warn(.{ + .NothingDeclared = .{ .token = begin }, + }) + else if (q.@"const" orelse q.atomic orelse q.@"volatile" orelse q.restrict) |tok| + try parser.warn(.{ + .QualifierIgnored = .{ .token = tok }, + }); + return &node.base; + } + var dr = @fieldParentPtr(Node.Declarator, "base", first_dr.?); + while (true) { + try parser.declareSymbol(&ds, dr); + if (parser.eatToken(.Equal)) |tok| { + try node.initializers.push((try parser.initializer(dr)) orelse return parser.err(.{ + .ExpectedInitializer = .{ .token = parser.it.index }, + })); + } else + try node.initializers.push(&dr.base); + if (parser.eatToken(.Comma) != null) break; + dr = @fieldParentPtr(Node.Declarator, "base", (try parser.declarator(.Must)) orelse return parser.err(.{ + .ExpectedDeclarator = .{ .token = parser.it.index }, + })); + } + node.semicolon = try parser.expectToken(.Semicolon); + return &node.base; + } + } + + fn declaratorIsFunction(dr: *Node) bool { + return false; // TODO } /// StaticAssert <- Keyword_static_assert LPAREN ConstExpr COMMA STRINGLITERAL RPAREN SEMICOLON @@ -733,7 +787,7 @@ const Parser = struct { fn recordDeclarator(parser: *Parser) !*Node {} /// Pointer <- ASTERISK TypeQual* Pointer? - fn pointer(parser: *Parser) Error!?*Node { + fn pointer(parser: *Parser) Error!?*Node.Pointer { const asterisk = parser.eatToken(.Asterisk) orelse return null; const node = try parser.arena.create(Node.Pointer); node.* = .{ @@ -743,7 +797,7 @@ const Parser = struct { }; while (try parser.typeQual(&node.qual)) {} node.pointer = try parser.pointer(); - return &node.base; + return node; } const Named = enum { @@ -772,7 +826,7 @@ const Parser = struct { node.* = .{ .pointer = ptr, .prefix = .{ - .Comples = .{ + .Complex = .{ .lparen = lparen, .inner = inner, .rparen = try parser.expectToken(.RParen), @@ -785,7 +839,7 @@ const Parser = struct { node = try parser.arena.create(Node.Declarator); node.* = .{ .pointer = ptr, - .prefix = .{ .Simple = tok }, + .prefix = .{ .Identifer = tok }, .suffix = .None, }; } else if (named == .Must) { @@ -793,7 +847,9 @@ const Parser = struct { .ExpectedToken = .{ .token = parser.it.index, .expected_id = .Identifier }, }); } else { - return ptr; + if (ptr) |some| + return &some.base; + return null; } } else { node = try parser.arena.create(Node.Declarator); @@ -808,16 +864,16 @@ const Parser = struct { node.suffix = .{ .Fn = .{ .lparen = lparen, - .params = .Node.Declarator.Params.init(parser.arena), + .params = Node.Declarator.Params.init(parser.arena), .rparen = undefined, }, }; - try parser.ParamDecl(node); + try parser.paramDecl(node); node.suffix.Fn.rparen = try parser.expectToken(.RParen); } else { - while (parser.arrayDeclarator()) |arr| { + while (try parser.arrayDeclarator()) |arr| { if (node.suffix == .None) - node.suffix = .{ .Array = .Node.Declarator.Arrays.init(parser.arena) }; + node.suffix = .{ .Array = Node.Declarator.Arrays.init(parser.arena) }; try node.suffix.Array.push(arr); } } @@ -825,7 +881,7 @@ const Parser = struct { return parser.err(.{ .InvalidDeclarator = .{ .token = tok }, }); - return node; + return &node.base; } /// ArrayDeclarator @@ -834,11 +890,11 @@ const Parser = struct { /// / TypeQual+ (ASTERISK / Keyword_static AssignmentExpr) /// / TypeQual+ AssignmentExpr? /// / AssignmentExpr - fn arrayDeclarator(parser: *Parser, dr: *Node.Declarator) !?*Node.Array { + fn arrayDeclarator(parser: *Parser) !?*Node.Array { const lbracket = parser.eatToken(.LBracket) orelse return null; const arr = try parser.arena.create(Node.Array); arr.* = .{ - .lbracket = lbarcket, + .lbracket = lbracket, .inner = .Inferred, .rbracket = undefined, }; @@ -856,12 +912,12 @@ const Parser = struct { fn paramDecl(parser: *Parser, dr: *Node.Declarator) !void { var old_style = false; while (true) { - var ds = Node.DeclSpec; + var ds = Node.DeclSpec{}; if (try parser.declSpec(&ds)) { //TODO - } else if (parser.eatToken(.Identifier)) { + } else if (parser.eatToken(.Identifier)) |tok| { old_style = true; - } else if (parser.eatToken(.Ellipsis)) { + } else if (parser.eatToken(.Ellipsis)) |tok| { // TODO } } From 8b713ce88959a953c6d41b5d1372e9b3e666512f Mon Sep 17 00:00:00 2001 From: Vexu Date: Tue, 7 Jan 2020 22:43:44 +0200 Subject: [PATCH 24/30] std-c parser add options --- lib/std/c/ast.zig | 27 ++++++++++++++++-- lib/std/c/parse.zig | 61 ++++++++++++++++++++++++++++++----------- lib/std/c/tokenizer.zig | 10 +++++++ 3 files changed, 79 insertions(+), 19 deletions(-) diff --git a/lib/std/c/ast.zig b/lib/std/c/ast.zig index c17bc2443f..f23570b0b0 100644 --- a/lib/std/c/ast.zig +++ b/lib/std/c/ast.zig @@ -24,9 +24,14 @@ pub const Tree = struct { // self is destroyed } - pub fn slice(tree: *Tree, token: TokenIndex) []const u8 { - const tok = tree.tokens.at(token); - return tok.source.buffer[tok.start..tok.end]; + pub fn tokenSlice(tree: *Tree, token: TokenIndex) []const u8 { + return tree.tokens.at(token).slice(); + } + + pub fn tokenEql(tree: *Tree, a: TokenIndex, b: TokenIndex) bool { + const atok = tree.tokens.at(a); + const btok = tree.tokens.at(b); + return atok.eql(btok.*); } }; @@ -205,6 +210,10 @@ pub const Type = struct { Typedef: *Type, Record: *Node.RecordType, Enum: *Node.EnumType, + + /// Special case for macro parameters that can be any type. + /// Only present if `retain_macros == true`. + Macro, }, }; @@ -586,4 +595,16 @@ pub const Node = struct { expr: *Expr, pub const InitializerList = std.SegmentedList(*Initializer, 4); }; + + pub const Macro = struct { + base: Node = Node{ .id = Macro }, + kind: union(enum) { + Undef: []const u8, + Fn: struct { + params: []const []const u8, + expr: *Expr, + }, + Expr: *Expr, + }, + }; }; diff --git a/lib/std/c/parse.zig b/lib/std/c/parse.zig index ca768017a2..cf59743364 100644 --- a/lib/std/c/parse.zig +++ b/lib/std/c/parse.zig @@ -1,4 +1,5 @@ const std = @import("std"); +const mem = std.mem; const assert = std.debug.assert; const Allocator = std.mem.Allocator; const ast = std.c.ast; @@ -11,9 +12,26 @@ const TokenIterator = ast.Tree.TokenList.Iterator; pub const Error = error{ParseError} || Allocator.Error; +pub const Options = struct { + /// Keep simple macros unexpanded and add the definitions to the ast + retain_macros: bool = false, + + /// Warning or error + warn_as_err: union(enum) { + /// All warnings are warnings + None, + + /// Some warnings are errors + Some: []@TagType(ast.Error), + + /// All warnings are errors + All, + } = .All, +}; + /// Result should be freed with tree.deinit() when there are /// no more references to any of the tokens or nodes. -pub fn parse(allocator: *Allocator, source: []const u8) !*Tree { +pub fn parse(allocator: *Allocator, source: []const u8, options: Options) !*Tree { const tree = blk: { // This block looks unnecessary, but is a "foot-shield" to prevent the SegmentedLists // from being initialized with a pointer to this `arena`, which is created on @@ -62,6 +80,7 @@ pub fn parse(allocator: *Allocator, source: []const u8) !*Tree { .arena = arena, .it = &it, .tree = tree, + .options = options, }; defer parser.symbols.deinit(); @@ -76,7 +95,7 @@ const Parser = struct { /// only used for scopes symbols: SymbolList, - warnings: bool = true, + options: Options, const SymbolList = std.ArrayList(Symbol); @@ -94,11 +113,11 @@ const Parser = struct { } fn getSymbol(parser: *Parser, tok: TokenIndex) ?*Type { - const name = parser.tree.slice(tok); + const name = parser.tree.tokenSlice(tok); const syms = parser.symbols.toSliceConst(); var i = syms.len; while (i > 0) : (i -= 1) { - if (std.mem.eql(u8, name, syms[i].name)) { + if (mem.eql(u8, name, syms[i].name)) { return syms[i].ty; } } @@ -249,7 +268,7 @@ const Parser = struct { try node.initializers.push((try parser.initializer(dr)) orelse return parser.err(.{ .ExpectedInitializer = .{ .token = parser.it.index }, })); - } else + } else try node.initializers.push(&dr.base); if (parser.eatToken(.Comma) != null) break; dr = @fieldParentPtr(Node.Declarator, "base", (try parser.declarator(.Must)) orelse return parser.err(.{ @@ -558,12 +577,18 @@ const Parser = struct { return false; }; switch (ty.id) { - .Enum => |e| { + .Enum => |e| blk: { + if (e.name) |some| + if (!parser.tree.tokenEql(some, tok)) + break :blk; return parser.err(.{ .MustUseKwToRefer = .{ .kw = e.tok, .name = tok }, }); }, - .Record => |r| { + .Record => |r| blk: { + if (r.name) |some| + if (!parser.tree.tokenEql(some, tok)) + break :blk; return parser.err(.{ .MustUseKwToRefer = .{ .kw = r.tok, @@ -580,11 +605,10 @@ const Parser = struct { }; return true; }, - else => { - parser.putBackToken(tok); - return false; - }, + else => {}, } + parser.putBackToken(tok); + return false; } } return parser.err(.{ @@ -680,7 +704,7 @@ const Parser = struct { }; if (name) |some| try parser.symbols.append(.{ - .name = parser.tree.slice(some), + .name = parser.tree.tokenSlice(some), .ty = ty, }); if (parser.eatToken(.LBrace)) |lbrace| { @@ -718,7 +742,7 @@ const Parser = struct { fn recordSpec(parser: *Parser, tok: TokenIndex) !*Node.RecordType { const node = try parser.arena.create(Node.RecordType); const name = parser.eatToken(.Identifier); - const is_struct = parser.tree.slice(tok)[0] == 's'; + const is_struct = parser.tree.tokenSlice(tok)[0] == 's'; node.* = .{ .tok = tok, .kind = if (is_struct) .Struct else .Union, @@ -733,7 +757,7 @@ const Parser = struct { }; if (name) |some| try parser.symbols.append(.{ - .name = parser.tree.slice(some), + .name = parser.tree.tokenSlice(some), .ty = ty, }); if (parser.eatToken(.LBrace)) |lbrace| { @@ -1195,11 +1219,16 @@ const Parser = struct { } fn warn(parser: *Parser, msg: ast.Error) Error!void { + const is_warning = switch (parser.options.warn_as_err) { + .None => true, + .Some => |list| for (list) |item| (if (item == msg) break false) else true, + .All => false, + }; try parser.tree.msgs.push(.{ - .kind = if (parser.warnings) .Warning else .Error, + .kind = if (is_warning) .Warning else .Error, .inner = msg, }); - if (!parser.warnings) return error.ParseError; + if (!is_warning) return error.ParseError; } fn note(parser: *Parser, msg: ast.Error) Error!void { diff --git a/lib/std/c/tokenizer.zig b/lib/std/c/tokenizer.zig index d3c8490c07..fa76fb42af 100644 --- a/lib/std/c/tokenizer.zig +++ b/lib/std/c/tokenizer.zig @@ -251,6 +251,16 @@ pub const Token = struct { } }; + pub fn eql(a: Token, b: Token) bool { + // do we really need this cast here + if (@as(@TagType(Id), a.id) != b.id) return false; + return mem.eql(u8, a.slice(), b.slice()); + } + + pub fn slice(tok: Token) []const u8 { + return tok.source.buffer[tok.start..tok.end]; + } + pub const Keyword = struct { bytes: []const u8, id: Id, From 83b4163591982e66e7abeb2816706f0cd537633f Mon Sep 17 00:00:00 2001 From: Vexu Date: Tue, 7 Jan 2020 23:25:38 +0200 Subject: [PATCH 25/30] std-c parser declaratorIsFunction and small fixes --- lib/std/c/ast.zig | 6 ++++ lib/std/c/parse.zig | 69 ++++++++++++++++++++++++++++++++++----------- 2 files changed, 58 insertions(+), 17 deletions(-) diff --git a/lib/std/c/ast.zig b/lib/std/c/ast.zig index f23570b0b0..2f75e9b455 100644 --- a/lib/std/c/ast.zig +++ b/lib/std/c/ast.zig @@ -435,6 +435,12 @@ pub const Node = struct { pub const DeclaratorList = Root.DeclList; }; + pub const RecordDeclarator = struct { + base: Node = Node{ .id = .RecordField }, + declarator: *Declarator, + // bit_field_expr: ?*Expr, + }; + pub const TypeQual = struct { @"const": ?TokenIndex = null, atomic: ?TokenIndex = null, diff --git a/lib/std/c/parse.zig b/lib/std/c/parse.zig index cf59743364..a38717e94b 100644 --- a/lib/std/c/parse.zig +++ b/lib/std/c/parse.zig @@ -124,7 +124,7 @@ const Parser = struct { return null; } - fn declareSymbol(parser: *Parser, decl_spec: *Node.DeclSpec, dr: *Node.Declarator) Error!void { + fn declareSymbol(parser: *Parser, type_spec: Node.TypeSpec, dr: *Node.Declarator) Error!void { return; // TODO } @@ -185,7 +185,7 @@ const Parser = struct { const dr = @fieldParentPtr(Node.Declarator, "base", (try parser.declarator(.Must)) orelse return parser.err(.{ .ExpectedDeclarator = .{ .token = parser.it.index }, })); - try parser.declareSymbol(&ds, dr); + try parser.declareSymbol(ds.type_spec, dr); try node.declarators.push(&dr.base); if (parser.eatToken(.Comma)) |_| {} else break; } @@ -196,7 +196,7 @@ const Parser = struct { var first_dr = try parser.declarator(.Must); if (first_dr != null and declaratorIsFunction(first_dr.?)) { const dr = @fieldParentPtr(Node.Declarator, "base", first_dr.?); - try parser.declareSymbol(&ds, dr); + try parser.declareSymbol(ds.type_spec, dr); var old_decls = Node.FnDecl.OldDeclList.init(parser.arena); const body = if (parser.eatToken(.Semicolon)) |_| null @@ -263,7 +263,7 @@ const Parser = struct { } var dr = @fieldParentPtr(Node.Declarator, "base", first_dr.?); while (true) { - try parser.declareSymbol(&ds, dr); + try parser.declareSymbol(ds.type_spec, dr); if (parser.eatToken(.Equal)) |tok| { try node.initializers.push((try parser.initializer(dr)) orelse return parser.err(.{ .ExpectedInitializer = .{ .token = parser.it.index }, @@ -280,8 +280,27 @@ const Parser = struct { } } - fn declaratorIsFunction(dr: *Node) bool { - return false; // TODO + fn declaratorIsFunction(node: *Node) bool { + if (node.id != .Declarator) return false; + assert(node.id == .Declarator); + const dr = @fieldParentPtr(Node.Declarator, "base", node); + if (dr.suffix != .Fn) return false; + switch (dr.prefix) { + .None, .Identifer => return true, + .Complex => |inner| { + var inner_node = inner.inner; + while (true) { + if (inner_node.id != .Declarator) return false; + assert(inner_node.id == .Declarator); + const inner_dr = @fieldParentPtr(Node.Declarator, "base", inner_node); + if (inner_dr.pointer != null) return false; + switch (inner_dr.prefix) { + .None, .Identifer => return true, + .Complex => |c| inner_node = c.inner, + } + } + }, + } } /// StaticAssert <- Keyword_static_assert LPAREN ConstExpr COMMA STRINGLITERAL RPAREN SEMICOLON @@ -292,12 +311,11 @@ const Parser = struct { _ = try parser.expectToken(.Comma); const str = try parser.expectToken(.StringLiteral); _ = try parser.expectToken(.RParen); - const semicolon = try parser.expectToken(.Semicolon); const node = try parser.arena.create(Node.StaticAssert); node.* = .{ .assert = tok, .expr = const_expr, - .semicolon = semicolon, + .semicolon = try parser.expectToken(.Semicolon), }; return &node.base; } @@ -761,6 +779,8 @@ const Parser = struct { .ty = ty, }); if (parser.eatToken(.LBrace)) |lbrace| { + const scope = parser.pushScope(); + defer parser.popScope(scope); var fields = Node.RecordType.FieldList.init(parser.arena); while (true) { if (parser.eatToken(.RBrace)) |rbrace| { @@ -796,7 +816,9 @@ const Parser = struct { .semicolon = undefined, }; while (true) { - try node.declarators.push(try parser.recordDeclarator()); + const rdr = try parser.recordDeclarator(); + try parser.declareSymbol(type_spec, rdr.declarator); + try node.declarators.push(&rdr.base); if (parser.eatToken(.Comma)) |_| {} else break; } @@ -836,16 +858,19 @@ const Parser = struct { /// / LPAREN Declarator RPAREN /// / (none) // if named != .Must /// DeclaratorSuffix - /// <. DeclaratorPrefix (LBRACKET ArrayDeclarator? RBRACKET)* + /// <- DeclaratorPrefix (LBRACKET ArrayDeclarator? RBRACKET)* /// / DeclaratorPrefix LPAREN (ParamDecl (COMMA ParamDecl)* (COMMA ELLIPSIS)?)? RPAREN fn declarator(parser: *Parser, named: Named) Error!?*Node { const ptr = try parser.pointer(); var node: *Node.Declarator = undefined; + var inner_fn = false; + // prefix if (parser.eatToken(.LParen)) |lparen| { const inner = (try parser.declarator(named)) orelse return parser.err(.{ .ExpectedDeclarator = .{ .token = lparen + 1 }, }); + inner_fn = declaratorIsFunction(inner); node = try parser.arena.create(Node.Declarator); node.* = .{ .pointer = ptr, @@ -885,6 +910,10 @@ const Parser = struct { } // suffix if (parser.eatToken(.LParen)) |lparen| { + if (inner_fn) + return parser.err(.{ + .InvalidDeclarator = .{ .token = lparen }, + }); node.suffix = .{ .Fn = .{ .lparen = lparen, @@ -894,11 +923,16 @@ const Parser = struct { }; try parser.paramDecl(node); node.suffix.Fn.rparen = try parser.expectToken(.RParen); - } else { - while (try parser.arrayDeclarator()) |arr| { - if (node.suffix == .None) - node.suffix = .{ .Array = Node.Declarator.Arrays.init(parser.arena) }; - try node.suffix.Array.push(arr); + } else if (parser.eatToken(.LBracket)) |tok| { + if (inner_fn) + return parser.err(.{ + .InvalidDeclarator = .{ .token = tok }, + }); + node.suffix = .{ .Array = Node.Declarator.Arrays.init(parser.arena) }; + var lbrace = tok; + while (true) { + try node.suffix.Array.push(try parser.arrayDeclarator(lbrace)); + if (parser.eatToken(.LBracket)) |t| lbrace = t else break; } } if (parser.eatToken(.LParen) orelse parser.eatToken(.LBracket)) |tok| @@ -914,8 +948,7 @@ const Parser = struct { /// / TypeQual+ (ASTERISK / Keyword_static AssignmentExpr) /// / TypeQual+ AssignmentExpr? /// / AssignmentExpr - fn arrayDeclarator(parser: *Parser) !?*Node.Array { - const lbracket = parser.eatToken(.LBracket) orelse return null; + fn arrayDeclarator(parser: *Parser, lbracket: TokenIndex) !*Node.Array { const arr = try parser.arena.create(Node.Array); arr.* = .{ .lbracket = lbracket, @@ -1046,6 +1079,8 @@ const Parser = struct { /// CompoundStmt <- LBRACE (Declaration / Stmt)* RBRACE fn compoundStmt(parser: *Parser) Error!?*Node { + const scope = parser.pushScope(); + defer parser.popScope(scope); const lbrace = parser.eatToken(.LBrace) orelse return null; const body_node = try parser.arena.create(Node.CompoundStmt); body_node.* = .{ From e21ea5bd9583e583c1691b05df682178f1bea10f Mon Sep 17 00:00:00 2001 From: Vexu Date: Wed, 8 Jan 2020 00:00:14 +0200 Subject: [PATCH 26/30] std-c parser loops --- lib/std/c/ast.zig | 36 ++++++++++++++++++++-- lib/std/c/parse.zig | 73 ++++++++++++++++++++++++++++++++++++++++++--- 2 files changed, 103 insertions(+), 6 deletions(-) diff --git a/lib/std/c/ast.zig b/lib/std/c/ast.zig index 2f75e9b455..7e91a28f0c 100644 --- a/lib/std/c/ast.zig +++ b/lib/std/c/ast.zig @@ -229,6 +229,9 @@ pub const Node = struct { Label, CompoundStmt, IfStmt, + WhileStmt, + DoStmt, + ForStmt, StaticAssert, Declarator, Pointer, @@ -438,7 +441,7 @@ pub const Node = struct { pub const RecordDeclarator = struct { base: Node = Node{ .id = .RecordField }, declarator: *Declarator, - // bit_field_expr: ?*Expr, + bit_field_expr: ?*Expr, }; pub const TypeQual = struct { @@ -486,12 +489,41 @@ pub const Node = struct { base: Node = Node{ .id = .IfStmt }, @"if": TokenIndex, cond: *Node, + body: *Node, @"else": ?struct { tok: TokenIndex, - stmt: *Node, + body: *Node, }, }; + pub const WhileStmt = struct { + base: Node = Node{ .id = .WhileStmt }, + @"while": TokenIndex, + cond: *Expr, + rparen: TokenIndex, + body: *Node, + }; + + pub const DoStmt = struct { + base: Node = Node{ .id = .DoStmt }, + do: TokenIndex, + body: *Node, + @"while": TokenIndex, + cond: *Expr, + semicolon: TokenIndex, + }; + + pub const ForStmt = struct { + base: Node = Node{ .id = .ForStmt }, + @"for": TokenIndex, + init: ?*Node, + cond: ?*Expr, + semicolon: TokenIndex, + incr: ?*Expr, + rparen: TokenIndex, + body: *Node, + }; + pub const StaticAssert = struct { base: Node = Node{ .id = .StaticAssert }, assert: TokenIndex, diff --git a/lib/std/c/parse.zig b/lib/std/c/parse.zig index a38717e94b..5afbc88327 100644 --- a/lib/std/c/parse.zig +++ b/lib/std/c/parse.zig @@ -1119,23 +1119,88 @@ const Parser = struct { .cond = (try parser.expr()) orelse return parser.err(.{ .ExpectedExpr = .{ .token = parser.it.index }, }), + .body = undefined, .@"else" = null, }; _ = try parser.expectToken(.RParen); + node.body = (try parser.stmt()) orelse return parser.err(.{ + .ExpectedStmt = .{ .token = parser.it.index }, + }); if (parser.eatToken(.Keyword_else)) |else_tok| { node.@"else" = .{ .tok = else_tok, - .stmt = (try parser.stmt()) orelse return parser.err(.{ + .body = (try parser.stmt()) orelse return parser.err(.{ .ExpectedStmt = .{ .token = parser.it.index }, }), }; } return &node.base; } + + // TODO loop scope + if (parser.eatToken(.Keyword_while)) |tok| { + _ = try parser.expectToken(.LParen); + const cond = (try parser.expr()) orelse return parser.err(.{ + .ExpectedExpr = .{ .token = parser.it.index }, + }); + const rparen = try parser.expectToken(.RParen); + const node = try parser.arena.create(Node.WhileStmt); + node.* = .{ + .@"while" = tok, + .cond = cond, + .rparen = rparen, + .body = (try parser.stmt()) orelse return parser.err(.{ + .ExpectedStmt = .{ .token = parser.it.index }, + }), + .semicolon = try parser.expectToken(.Semicolon), + }; + return &node.base; + } + if (parser.eatToken(.Keyword_do)) |tok| { + const body = (try parser.stmt()) orelse return parser.err(.{ + .ExpectedStmt = .{ .token = parser.it.index }, + }); + const @"while" = try parser.expectToken(.Keyword_while); + _ = try parser.expectToken(.LParen); + const cond = (try parser.expr()) orelse return parser.err(.{ + .ExpectedExpr = .{ .token = parser.it.index }, + }); + _ = try parser.expectToken(.RParen); + const node = try parser.arena.create(Node.DoStmt); + node.* = .{ + .do = tok, + .body = body, + .cond = cond, + .@"while" = @"while", + .semicolon = try parser.expectToken(.Semicolon), + }; + return &node.base; + } + if (parser.eatToken(.Keyword_for)) |tok| { + _ = try parser.expectToken(.LParen); + const init = if (try parser.declaration()) |decl| blk:{ + // TODO disallow storage class other than auto and register + break :blk decl; + } else try parser.exprStmt(); + const cond = try parser.expr(); + const semicolon = try parser.expectToken(.Semicolon); + const incr = try parser.expr(); + const rparen = try parser.expectToken(.RParen); + const node = try parser.arena.create(Node.ForStmt); + node.* = .{ + .@"for" = tok, + .init = init, + .cond = cond, + .semicolon = semicolon, + .incr = incr, + .rparen = rparen, + .body = (try parser.stmt()) orelse return parser.err(.{ + .ExpectedStmt = .{ .token = parser.it.index }, + }), + }; + return &node.base; + } // if (parser.eatToken(.Keyword_switch)) |tok| {} - // if (parser.eatToken(.Keyword_while)) |tok| {} - // if (parser.eatToken(.Keyword_do)) |tok| {} - // if (parser.eatToken(.Keyword_for)) |tok| {} // if (parser.eatToken(.Keyword_default)) |tok| {} // if (parser.eatToken(.Keyword_case)) |tok| {} if (parser.eatToken(.Keyword_goto)) |tok| { From 4c0776b2a5a8a92e770fe0b08a15e06671000cb6 Mon Sep 17 00:00:00 2001 From: Vexu Date: Sat, 11 Jan 2020 21:46:36 +0200 Subject: [PATCH 27/30] std-c parse switch --- lib/std/c/ast.zig | 35 +++++++++----- lib/std/c/parse.zig | 113 ++++++++++++++++++++++++++++---------------- 2 files changed, 94 insertions(+), 54 deletions(-) diff --git a/lib/std/c/ast.zig b/lib/std/c/ast.zig index 7e91a28f0c..093ea4cc1b 100644 --- a/lib/std/c/ast.zig +++ b/lib/std/c/ast.zig @@ -48,7 +48,6 @@ pub const Error = union(enum) { InvalidToken: SingleTokenError("invalid token '{}'"), ExpectedToken: ExpectedToken, ExpectedExpr: SingleTokenError("expected expression, found '{}'"), - ExpectedStmt: SingleTokenError("expected statement, found '{}'"), ExpectedTypeName: SingleTokenError("expected type name, found '{}'"), ExpectedFnBody: SingleTokenError("expected function body, found '{}'"), ExpectedDeclarator: SingleTokenError("expected declarator, found '{}'"), @@ -70,7 +69,6 @@ pub const Error = union(enum) { .InvalidToken => |*x| return x.render(tree, stream), .ExpectedToken => |*x| return x.render(tree, stream), .ExpectedExpr => |*x| return x.render(tree, stream), - .ExpectedStmt => |*x| return x.render(tree, stream), .ExpectedTypeName => |*x| return x.render(tree, stream), .ExpectedDeclarator => |*x| return x.render(tree, stream), .ExpectedFnBody => |*x| return x.render(tree, stream), @@ -94,7 +92,6 @@ pub const Error = union(enum) { .InvalidToken => |x| return x.token, .ExpectedToken => |x| return x.token, .ExpectedExpr => |x| return x.token, - .ExpectedStmt => |x| return x.token, .ExpectedTypeName => |x| return x.token, .ExpectedDeclarator => |x| return x.token, .ExpectedFnBody => |x| return x.token, @@ -226,9 +223,10 @@ pub const Node = struct { RecordField, JumpStmt, ExprStmt, - Label, + LabeledStmt, CompoundStmt, IfStmt, + SwitchStmt, WhileStmt, DoStmt, ForStmt, @@ -454,26 +452,29 @@ pub const Node = struct { pub const JumpStmt = struct { base: Node = Node{ .id = .JumpStmt }, ltoken: TokenIndex, - kind: Kind, - semicolon: TokenIndex, - - pub const Kind = union(enum) { + kind: union(enum) { Break, Continue, Return: ?*Node, Goto: TokenIndex, - }; + }, + semicolon: TokenIndex, }; pub const ExprStmt = struct { base: Node = Node{ .id = .ExprStmt }, - expr: ?*Node, + expr: ?*Expr, semicolon: TokenIndex, }; - pub const Label = struct { - base: Node = Node{ .id = .Label }, - identifier: TokenIndex, + pub const LabeledStmt = struct { + base: Node = Node{ .id = .LabeledStmt }, + kind: union(enum) { + Label: TokenIndex, + Case: TokenIndex, + Default: TokenIndex, + }, + stmt: *Node, }; pub const CompoundStmt = struct { @@ -496,6 +497,14 @@ pub const Node = struct { }, }; + pub const SwitchStmt = struct { + base: Node = Node{ .id = .SwitchStmt }, + @"switch": TokenIndex, + expr: *Expr, + rparen: TokenIndex, + stmt: *Node, + }; + pub const WhileStmt = struct { base: Node = Node{ .id = .WhileStmt }, @"while": TokenIndex, diff --git a/lib/std/c/parse.zig b/lib/std/c/parse.zig index 5afbc88327..7473cf0ac6 100644 --- a/lib/std/c/parse.zig +++ b/lib/std/c/parse.zig @@ -104,7 +104,14 @@ const Parser = struct { ty: *Type, }; - fn pushScope(parser: *Parser) usize { + const ScopeKind = enum { + Block, + Loop, + Root, + Switch, + }; + + fn pushScope(parser: *Parser, kind: ScopeKind) usize { return parser.symbols.len; } @@ -130,6 +137,8 @@ const Parser = struct { /// Root <- ExternalDeclaration* eof fn root(parser: *Parser) Allocator.Error!*Node.Root { + const scope = parser.pushScope(.Root); + defer parser.popScope(scope); const node = try parser.arena.create(Node.Root); node.* = .{ .decls = Node.Root.DeclList.init(parser.arena), @@ -779,7 +788,7 @@ const Parser = struct { .ty = ty, }); if (parser.eatToken(.LBrace)) |lbrace| { - const scope = parser.pushScope(); + const scope = parser.pushScope(.Block); defer parser.popScope(scope); var fields = Node.RecordType.FieldList.init(parser.arena); while (true) { @@ -1079,18 +1088,22 @@ const Parser = struct { /// CompoundStmt <- LBRACE (Declaration / Stmt)* RBRACE fn compoundStmt(parser: *Parser) Error!?*Node { - const scope = parser.pushScope(); - defer parser.popScope(scope); const lbrace = parser.eatToken(.LBrace) orelse return null; + const scope = parser.pushScope(.Block); + defer parser.popScope(scope); const body_node = try parser.arena.create(Node.CompoundStmt); body_node.* = .{ .lbrace = lbrace, .statements = Node.CompoundStmt.StmtList.init(parser.arena), .rbrace = undefined, }; - while ((try parser.declaration()) orelse (try parser.stmt())) |node| - try body_node.statements.push(node); - body_node.rbrace = try parser.expectToken(.RBrace); + while (true) { + if (parser.eatToken(.RBRACE)) |rbrace| { + body_node.rbrace = rbrace; + break; + } + try body_node.statements.push((try parser.declaration()) orelse (try parser.stmt())); + } return &body_node.base; } @@ -1109,7 +1122,7 @@ const Parser = struct { /// / Keyword_return Expr? SEMICOLON /// / IDENTIFIER COLON Stmt /// / ExprStmt - fn stmt(parser: *Parser) Error!?*Node { + fn stmt(parser: *Parser) Error!*Node { if (try parser.compoundStmt()) |node| return node; if (parser.eatToken(.Keyword_if)) |tok| { const node = try parser.arena.create(Node.IfStmt); @@ -1123,22 +1136,18 @@ const Parser = struct { .@"else" = null, }; _ = try parser.expectToken(.RParen); - node.body = (try parser.stmt()) orelse return parser.err(.{ - .ExpectedStmt = .{ .token = parser.it.index }, - }); + node.body = try parser.stmt(); if (parser.eatToken(.Keyword_else)) |else_tok| { node.@"else" = .{ .tok = else_tok, - .body = (try parser.stmt()) orelse return parser.err(.{ - .ExpectedStmt = .{ .token = parser.it.index }, - }), + .body = try parser.stmt(), }; } return &node.base; } - - // TODO loop scope if (parser.eatToken(.Keyword_while)) |tok| { + const scope = parser.pushScope(.Loop); + defer parser.popScope(scope); _ = try parser.expectToken(.LParen); const cond = (try parser.expr()) orelse return parser.err(.{ .ExpectedExpr = .{ .token = parser.it.index }, @@ -1149,18 +1158,15 @@ const Parser = struct { .@"while" = tok, .cond = cond, .rparen = rparen, - .body = (try parser.stmt()) orelse return parser.err(.{ - .ExpectedStmt = .{ .token = parser.it.index }, - }), + .body = try parser.stmt(), .semicolon = try parser.expectToken(.Semicolon), }; return &node.base; } if (parser.eatToken(.Keyword_do)) |tok| { - const body = (try parser.stmt()) orelse return parser.err(.{ - .ExpectedStmt = .{ .token = parser.it.index }, - }); - const @"while" = try parser.expectToken(.Keyword_while); + const scope = parser.pushScope(.Loop); + defer parser.popScope(scope); + const body = try parser.stmt(); _ = try parser.expectToken(.LParen); const cond = (try parser.expr()) orelse return parser.err(.{ .ExpectedExpr = .{ .token = parser.it.index }, @@ -1177,6 +1183,8 @@ const Parser = struct { return &node.base; } if (parser.eatToken(.Keyword_for)) |tok| { + const scope = parser.pushScope(.Loop); + defer parser.popScope(scope); _ = try parser.expectToken(.LParen); const init = if (try parser.declaration()) |decl| blk:{ // TODO disallow storage class other than auto and register @@ -1194,15 +1202,43 @@ const Parser = struct { .semicolon = semicolon, .incr = incr, .rparen = rparen, - .body = (try parser.stmt()) orelse return parser.err(.{ - .ExpectedStmt = .{ .token = parser.it.index }, - }), + .body = try parser.stmt(), + }; + return &node.base; + } + if (parser.eatToken(.Keyword_switch)) |tok| { + const scope = parser.pushScope(.Switch); + defer parser.popScope(scope); + _ = try parser.expectToken(.LParen); + const switch_expr = try parser.exprStmt(); + const rparen = try parser.expectToken(.RParen); + const node = try parser.arena.create(Node.SwitchStmt); + node.* = .{ + .@"switch" = tok, + .expr = switch_expr, + .rparen = rparen, + .body = try parser.stmt(), + }; + return &node.base; + } + if (parser.eatToken(.Keyword_default)) |tok| { + _ = try parser.expectToken(.Colon); + const node = try parser.arena.create(Node.LabeledStmt); + node.* = .{ + .kind = .{.Default = tok }, + .stmt = try parser.stmt(), + }; + return &node.base; + } + if (parser.eatToken(.Keyword_case)) |tok| { + _ = try parser.expectToken(.Colon); + const node = try parser.arena.create(Node.LabeledStmt); + node.* = .{ + .kind = .{.Case = tok }, + .stmt = try parser.stmt(), }; return &node.base; } - // if (parser.eatToken(.Keyword_switch)) |tok| {} - // if (parser.eatToken(.Keyword_default)) |tok| {} - // if (parser.eatToken(.Keyword_case)) |tok| {} if (parser.eatToken(.Keyword_goto)) |tok| { const node = try parser.arena.create(Node.JumpStmt); node.* = .{ @@ -1241,29 +1277,24 @@ const Parser = struct { } if (parser.eatToken(.Identifier)) |tok| { if (parser.eatToken(.Colon)) |_| { - const node = try parser.arena.create(Node.Label); + const node = try parser.arena.create(Node.LabeledStmt); node.* = .{ - .identifier = tok, + .kind = .{.Label = tok }, + .stmt = try parser.stmt(), }; return &node.base; } parser.putBackToken(tok); } - if (try parser.exprStmt()) |node| return node; - return null; + return parser.exprStmt(); } /// ExprStmt <- Expr? SEMICOLON - fn exprStmt(parser: *Parser) !?*Node { + fn exprStmt(parser: *Parser) !*Node { const node = try parser.arena.create(Node.ExprStmt); - const expr_node = try parser.expr(); - const semicolon = if (expr_node != null) - try parser.expectToken(.Semicolon) - else - parser.eatToken(.Semicolon) orelse return null; node.* = .{ - .expr = expr_node, - .semicolon = semicolon, + .expr = try parser.expr(), + .semicolon = try parser.expectToken(.Semicolon), }; return &node.base; } From 28daddae81f354da89f917528dab2e5d87bca829 Mon Sep 17 00:00:00 2001 From: Vexu Date: Tue, 14 Jan 2020 16:18:32 +0200 Subject: [PATCH 28/30] std-c todos and small fixes --- lib/std/c/ast.zig | 5 ++-- lib/std/c/parse.zig | 54 +++++++++++++++++++---------------------- lib/std/c/tokenizer.zig | 8 +++++- 3 files changed, 35 insertions(+), 32 deletions(-) diff --git a/lib/std/c/ast.zig b/lib/std/c/ast.zig index 093ea4cc1b..bb6eb54d21 100644 --- a/lib/std/c/ast.zig +++ b/lib/std/c/ast.zig @@ -221,6 +221,7 @@ pub const Node = struct { Root, EnumField, RecordField, + RecordDeclarator, JumpStmt, ExprStmt, LabeledStmt, @@ -437,8 +438,8 @@ pub const Node = struct { }; pub const RecordDeclarator = struct { - base: Node = Node{ .id = .RecordField }, - declarator: *Declarator, + base: Node = Node{ .id = .RecordDeclarator }, + declarator: ?*Declarator, bit_field_expr: ?*Expr, }; diff --git a/lib/std/c/parse.zig b/lib/std/c/parse.zig index 7473cf0ac6..d3a96e9aa2 100644 --- a/lib/std/c/parse.zig +++ b/lib/std/c/parse.zig @@ -13,8 +13,8 @@ const TokenIterator = ast.Tree.TokenList.Iterator; pub const Error = error{ParseError} || Allocator.Error; pub const Options = struct { - /// Keep simple macros unexpanded and add the definitions to the ast - retain_macros: bool = false, + // /// Keep simple macros unexpanded and add the definitions to the ast + // retain_macros: bool = false, /// Warning or error warn_as_err: union(enum) { @@ -204,12 +204,16 @@ const Parser = struct { } var first_dr = try parser.declarator(.Must); if (first_dr != null and declaratorIsFunction(first_dr.?)) { + // TODO typedeffed fn proto-only const dr = @fieldParentPtr(Node.Declarator, "base", first_dr.?); try parser.declareSymbol(ds.type_spec, dr); var old_decls = Node.FnDecl.OldDeclList.init(parser.arena); const body = if (parser.eatToken(.Semicolon)) |_| null else blk: { + if (local) { + // TODO nested function warning + } // TODO first_dr.is_old // while (true) { // var old_ds = Node.DeclSpec{}; @@ -387,13 +391,11 @@ const Parser = struct { /// / IDENTIFIER // typedef name /// / TypeQual fn typeSpec(parser: *Parser, type_spec: *Node.TypeSpec) !bool { - while (try parser.typeQual(&type_spec.qual)) {} blk: { if (parser.eatToken(.Keyword_void)) |tok| { if (type_spec.spec != .None) break :blk; type_spec.spec = .{ .Void = tok }; - return true; } else if (parser.eatToken(.Keyword_char)) |tok| { switch (type_spec.spec) { .None => { @@ -415,7 +417,6 @@ const Parser = struct { }, else => break :blk, } - return true; } else if (parser.eatToken(.Keyword_short)) |tok| { switch (type_spec.spec) { .None => { @@ -437,7 +438,6 @@ const Parser = struct { }, else => break :blk, } - return true; } else if (parser.eatToken(.Keyword_long)) |tok| { switch (type_spec.spec) { .None => { @@ -468,7 +468,6 @@ const Parser = struct { }, else => break :blk, } - return true; } else if (parser.eatToken(.Keyword_int)) |tok| { switch (type_spec.spec) { .None => { @@ -495,7 +494,6 @@ const Parser = struct { }, else => break :blk, } - return true; } else if (parser.eatToken(.Keyword_signed) orelse parser.eatToken(.Keyword_unsigned)) |tok| { switch (type_spec.spec) { .None => { @@ -527,7 +525,6 @@ const Parser = struct { }, else => break :blk, } - return true; } else if (parser.eatToken(.Keyword_float)) |tok| { if (type_spec.spec != .None) break :blk; @@ -536,7 +533,6 @@ const Parser = struct { .float = tok, }, }; - return true; } else if (parser.eatToken(.Keyword_double)) |tok| { if (type_spec.spec != .None) break :blk; @@ -545,7 +541,6 @@ const Parser = struct { .double = tok, }, }; - return true; } else if (parser.eatToken(.Keyword_complex)) |tok| { switch (type_spec.spec) { .None => { @@ -568,36 +563,34 @@ const Parser = struct { }, else => break :blk, } - return true; - } - if (parser.eatToken(.Keyword_bool)) |tok| { + } else if (parser.eatToken(.Keyword_bool)) |tok| { if (type_spec.spec != .None) break :blk; type_spec.spec = .{ .Bool = tok }; - return true; } else if (parser.eatToken(.Keyword_atomic)) |tok| { - if (type_spec.spec != .None) - break :blk; - _ = try parser.expectToken(.LParen); - const name = (try parser.typeName()) orelse return parser.err(.{ - .ExpectedTypeName = .{ .token = parser.it.index }, - }); - type_spec.spec.Atomic = .{ - .atomic = tok, - .typename = name, - .rparen = try parser.expectToken(.RParen), - }; - return true; + // might be _Atomic qualifier + if (parser.eatToken(.LParen)) |_| { + if (type_spec.spec != .None) + break :blk; + const name = (try parser.typeName()) orelse return parser.err(.{ + .ExpectedTypeName = .{ .token = parser.it.index }, + }); + type_spec.spec.Atomic = .{ + .atomic = tok, + .typename = name, + .rparen = try parser.expectToken(.RParen), + }; + } else { + parser.putBackToken(tok); + } } else if (parser.eatToken(.Keyword_enum)) |tok| { if (type_spec.spec != .None) break :blk; type_spec.spec.Enum = try parser.enumSpec(tok); - return true; } else if (parser.eatToken(.Keyword_union) orelse parser.eatToken(.Keyword_struct)) |tok| { if (type_spec.spec != .None) break :blk; type_spec.spec.Record = try parser.recordSpec(tok); - return true; } else if (parser.eatToken(.Identifier)) |tok| { const ty = parser.getSymbol(tok) orelse { parser.putBackToken(tok); @@ -637,6 +630,7 @@ const Parser = struct { parser.putBackToken(tok); return false; } + return parser.typeQual(&type_spec.qual); } return parser.err(.{ .InvalidTypeSpecifier = .{ @@ -874,6 +868,7 @@ const Parser = struct { var node: *Node.Declarator = undefined; var inner_fn = false; + // TODO sizof(int (int)) // prefix if (parser.eatToken(.LParen)) |lparen| { const inner = (try parser.declarator(named)) orelse return parser.err(.{ @@ -981,6 +976,7 @@ const Parser = struct { var ds = Node.DeclSpec{}; if (try parser.declSpec(&ds)) { //TODO + // TODO try parser.declareSymbol(ds.type_spec, dr); } else if (parser.eatToken(.Identifier)) |tok| { old_style = true; } else if (parser.eatToken(.Ellipsis)) |tok| { diff --git a/lib/std/c/tokenizer.zig b/lib/std/c/tokenizer.zig index fa76fb42af..7da2e18320 100644 --- a/lib/std/c/tokenizer.zig +++ b/lib/std/c/tokenizer.zig @@ -593,9 +593,15 @@ pub const Tokenizer = struct { '\\' => { state = .BackSlash; }, - else => { + '\t', '\x0B', '\x0C', ' ' => { result.start = self.index + 1; }, + else => { + // TODO handle invalid bytes better + result.id = .Invalid; + self.index += 1; + break; + }, }, .Cr => switch (c) { '\n' => { From ad327fed05d2d809dfef612e3f4abbb5a9a5ed71 Mon Sep 17 00:00:00 2001 From: Vexu Date: Sun, 19 Jan 2020 20:41:44 +0200 Subject: [PATCH 29/30] std-c redo scoping, do string concatanation in parser --- lib/std/c/ast.zig | 2 +- lib/std/c/parse.zig | 84 ++++++++++++++++++++++++----------------- lib/std/c/tokenizer.zig | 46 ++++++++++------------ 3 files changed, 69 insertions(+), 63 deletions(-) diff --git a/lib/std/c/ast.zig b/lib/std/c/ast.zig index bb6eb54d21..4bb42647cb 100644 --- a/lib/std/c/ast.zig +++ b/lib/std/c/ast.zig @@ -576,7 +576,7 @@ pub const Node = struct { asterisk: ?TokenIndex, static: ?TokenIndex, qual: TypeQual, - // expr: *Expr, + expr: *Expr, }, }, rbracket: TokenIndex, diff --git a/lib/std/c/parse.zig b/lib/std/c/parse.zig index d3a96e9aa2..253df5981b 100644 --- a/lib/std/c/parse.zig +++ b/lib/std/c/parse.zig @@ -75,9 +75,12 @@ pub fn parse(allocator: *Allocator, source: []const u8, options: Options) !*Tree } } + var parse_arena = std.heap.ArenaAllocator.init(allocator); + defer parse_arena.deinit(); + var parser = Parser{ - .symbols = Parser.SymbolList.init(allocator), - .arena = arena, + .scopes = Parser.SymbolList.init(allocator), + .arena = &parse_arena.allocator, .it = &it, .tree = tree, .options = options, @@ -93,11 +96,17 @@ const Parser = struct { it: *TokenIterator, tree: *Tree, - /// only used for scopes - symbols: SymbolList, + arena: *Allocator, + scopes: ScopeList, options: Options, - const SymbolList = std.ArrayList(Symbol); + const ScopeList = std.SegmentedLists(Scope); + const SymbolList = std.SegmentedLists(Symbol); + + const Scope = struct { + kind: ScopeKind, + syms: SymbolList, + }; const Symbol = struct { name: []const u8, @@ -111,21 +120,27 @@ const Parser = struct { Switch, }; - fn pushScope(parser: *Parser, kind: ScopeKind) usize { - return parser.symbols.len; + fn pushScope(parser: *Parser, kind: ScopeKind) !void { + const new = try parser.scopes.addOne(); + new.* = .{ + .kind = kind, + .syms = SymbolList.init(parser.arena), + }; } fn popScope(parser: *Parser, len: usize) void { - parser.symbols.resize(len) catch unreachable; + _ = parser.scopes.pop(); } - fn getSymbol(parser: *Parser, tok: TokenIndex) ?*Type { + fn getSymbol(parser: *Parser, tok: TokenIndex) ?*Symbol { const name = parser.tree.tokenSlice(tok); - const syms = parser.symbols.toSliceConst(); - var i = syms.len; - while (i > 0) : (i -= 1) { - if (mem.eql(u8, name, syms[i].name)) { - return syms[i].ty; + var scope_it = parser.scopes.iterator(parser.scopes.len); + while (scope_it.prev()) |scope| { + var sym_it = scope.syms.iterator(scope.syms.len); + while (sym_it.prev()) |sym| { + if (mem.eql(u8, sym.name, name)) { + return sym; + } } } return null; @@ -137,8 +152,8 @@ const Parser = struct { /// Root <- ExternalDeclaration* eof fn root(parser: *Parser) Allocator.Error!*Node.Root { - const scope = parser.pushScope(.Root); - defer parser.popScope(scope); + try parser.pushScope(.Root); + defer parser.popScope(); const node = try parser.arena.create(Node.Root); node.* = .{ .decls = Node.Root.DeclList.init(parser.arena), @@ -782,8 +797,8 @@ const Parser = struct { .ty = ty, }); if (parser.eatToken(.LBrace)) |lbrace| { - const scope = parser.pushScope(.Block); - defer parser.popScope(scope); + try parser.pushScope(.Block); + defer parser.popScope(); var fields = Node.RecordType.FieldList.init(parser.arena); while (true) { if (parser.eatToken(.RBrace)) |rbrace| { @@ -996,15 +1011,14 @@ const Parser = struct { fn assignmentExpr(parser: *Parser) !*Node {} /// ConstExpr <- ConditionalExpr - fn constExpr(parser: *Parser) Error!*Node { + fn constExpr(parser: *Parser) Error!?*Expr { const start = parser.it.index; const expression = try parser.conditionalExpr(); - // TODO - // if (expression == nullor expression.?.value == null) - // return parser.err(.{ - // .ConsExpr = start, - // }); - return expression.?; + if (expression != null and expression.?.value == .None) + return parser.err(.{ + .ConsExpr = start, + }); + return expression; } /// ConditionalExpr <- LogicalOrExpr (QUESTIONMARK Expr COLON ConditionalExpr)? @@ -1085,8 +1099,8 @@ const Parser = struct { /// CompoundStmt <- LBRACE (Declaration / Stmt)* RBRACE fn compoundStmt(parser: *Parser) Error!?*Node { const lbrace = parser.eatToken(.LBrace) orelse return null; - const scope = parser.pushScope(.Block); - defer parser.popScope(scope); + try parser.pushScope(.Block); + defer parser.popScope(); const body_node = try parser.arena.create(Node.CompoundStmt); body_node.* = .{ .lbrace = lbrace, @@ -1142,8 +1156,8 @@ const Parser = struct { return &node.base; } if (parser.eatToken(.Keyword_while)) |tok| { - const scope = parser.pushScope(.Loop); - defer parser.popScope(scope); + try parser.pushScope(.Loop); + defer parser.popScope(); _ = try parser.expectToken(.LParen); const cond = (try parser.expr()) orelse return parser.err(.{ .ExpectedExpr = .{ .token = parser.it.index }, @@ -1160,8 +1174,8 @@ const Parser = struct { return &node.base; } if (parser.eatToken(.Keyword_do)) |tok| { - const scope = parser.pushScope(.Loop); - defer parser.popScope(scope); + try parser.pushScope(.Loop); + defer parser.popScope(); const body = try parser.stmt(); _ = try parser.expectToken(.LParen); const cond = (try parser.expr()) orelse return parser.err(.{ @@ -1179,8 +1193,8 @@ const Parser = struct { return &node.base; } if (parser.eatToken(.Keyword_for)) |tok| { - const scope = parser.pushScope(.Loop); - defer parser.popScope(scope); + try parser.pushScope(.Loop); + defer parser.popScope(); _ = try parser.expectToken(.LParen); const init = if (try parser.declaration()) |decl| blk:{ // TODO disallow storage class other than auto and register @@ -1203,8 +1217,8 @@ const Parser = struct { return &node.base; } if (parser.eatToken(.Keyword_switch)) |tok| { - const scope = parser.pushScope(.Switch); - defer parser.popScope(scope); + try parser.pushScope(.Switch); + defer parser.popScope(); _ = try parser.expectToken(.LParen); const switch_expr = try parser.exprStmt(); const rparen = try parser.expectToken(.RParen); diff --git a/lib/std/c/tokenizer.zig b/lib/std/c/tokenizer.zig index 7da2e18320..a641529502 100644 --- a/lib/std/c/tokenizer.zig +++ b/lib/std/c/tokenizer.zig @@ -401,7 +401,6 @@ pub const Tokenizer = struct { U, L, StringLiteral, - AfterStringLiteral, CharLiteralStart, CharLiteral, EscapeSequence, @@ -617,7 +616,7 @@ pub const Tokenizer = struct { }, .BackSlash => switch (c) { '\n' => { - state = if (string) .AfterStringLiteral else .Start; + state = .Start; }, '\r' => { state = .BackSlashCr; @@ -632,7 +631,7 @@ pub const Tokenizer = struct { }, .BackSlashCr => switch (c) { '\n' => { - state = if (string) .AfterStringLiteral else .Start; + state = .Start; }, else => { result.id = .Invalid; @@ -696,7 +695,8 @@ pub const Tokenizer = struct { state = .EscapeSequence; }, '"' => { - state = .AfterStringLiteral; + self.index += 1; + break; }, '\n', '\r' => { result.id = .Invalid; @@ -704,22 +704,6 @@ pub const Tokenizer = struct { }, else => {}, }, - .AfterStringLiteral => switch (c) { - '"' => { - state = .StringLiteral; - }, - '\\' => { - state = .BackSlash; - }, - '\n', '\r' => { - if (self.pp_directive) - break; - }, - '\t', '\x0B', '\x0C', ' ' => {}, - else => { - break; - }, - }, .CharLiteralStart => switch (c) { '\\' => { string = false; @@ -1255,7 +1239,7 @@ pub const Tokenizer = struct { } } else if (self.index == self.source.buffer.len) { switch (state) { - .AfterStringLiteral, .Start => {}, + .Start => {}, .u, .u8, .U, .L, .Identifier => { result.id = Token.getKeyword(self.source.buffer[result.start..self.index], self.prev_tok_id == .Hash and !self.pp_directive) orelse .Identifier; }, @@ -1322,7 +1306,7 @@ pub const Tokenizer = struct { test "operators" { expectTokens( - \\ ! != | || |= = == + \\ ! != | || |= = == \\ ( ) { } [ ] . .. ... \\ ^ ^= + ++ += - -- -= \\ * *= % %= -> : ; / /= @@ -1505,24 +1489,27 @@ test "line continuation" { .Identifier, .Nl, .{ .StringLiteral = .None }, + .Nl, .Hash, .Keyword_define, .{ .StringLiteral = .None }, .Nl, .{ .StringLiteral = .None }, + .Nl, .Hash, .Keyword_define, .{ .StringLiteral = .None }, + .{ .StringLiteral = .None }, }); } test "string prefix" { expectTokens( - \\"foo" "bar" - \\u"foo" "bar" - \\u8"foo" "bar" - \\U"foo" "bar" - \\L"foo" "bar" + \\"foo" + \\u"foo" + \\u8"foo" + \\U"foo" + \\L"foo" \\'foo' \\u'foo' \\U'foo' @@ -1530,10 +1517,15 @@ test "string prefix" { \\ , &[_]Token.Id{ .{ .StringLiteral = .None }, + .Nl, .{ .StringLiteral = .Utf16 }, + .Nl, .{ .StringLiteral = .Utf8 }, + .Nl, .{ .StringLiteral = .Utf32 }, + .Nl, .{ .StringLiteral = .Wide }, + .Nl, .{ .CharLiteral = .None }, .Nl, .{ .CharLiteral = .Utf16 }, From abd1a7c91c611b35754e5d22a8755cfbebc65861 Mon Sep 17 00:00:00 2001 From: Vexu Date: Wed, 29 Jan 2020 12:12:28 +0200 Subject: [PATCH 30/30] std-c add todos to unfinished parsing functioins --- lib/std/c/ast.zig | 23 +++++++++ lib/std/c/parse.zig | 110 ++++++++++++++++++++++++++++++++------------ 2 files changed, 103 insertions(+), 30 deletions(-) diff --git a/lib/std/c/ast.zig b/lib/std/c/ast.zig index 4bb42647cb..bb8c01f138 100644 --- a/lib/std/c/ast.zig +++ b/lib/std/c/ast.zig @@ -656,3 +656,26 @@ pub const Node = struct { }, }; }; + +pub const Expr = struct { + id: Id, + ty: *Type, + value: union(enum) { + None, + }, + + pub const Id = enum { + Infix, + Literal, + }; + + pub const Infix = struct { + base: Expr = Expr{ .id = .Infix }, + lhs: *Expr, + op_token: TokenIndex, + op: Op, + rhs: *Expr, + + pub const Op = enum {}; + }; +}; diff --git a/lib/std/c/parse.zig b/lib/std/c/parse.zig index 253df5981b..dd646e06d6 100644 --- a/lib/std/c/parse.zig +++ b/lib/std/c/parse.zig @@ -15,7 +15,6 @@ pub const Error = error{ParseError} || Allocator.Error; pub const Options = struct { // /// Keep simple macros unexpanded and add the definitions to the ast // retain_macros: bool = false, - /// Warning or error warn_as_err: union(enum) { /// All warnings are warnings @@ -335,7 +334,9 @@ const Parser = struct { fn staticAssert(parser: *Parser) !?*Node { const tok = parser.eatToken(.Keyword_static_assert) orelse return null; _ = try parser.expectToken(.LParen); - const const_expr = try parser.constExpr(); + const const_expr = (try parser.constExpr()) orelse parser.err(.{ + .ExpectedExpr = .{ .token = parser.it.index }, + }); _ = try parser.expectToken(.Comma); const str = try parser.expectToken(.StringLiteral); _ = try parser.expectToken(.RParen); @@ -707,7 +708,9 @@ const Parser = struct { fn alignSpec(parser: *Parser, ds: *Node.DeclSpec) !bool { if (parser.eatToken(.Keyword_alignas)) |tok| { _ = try parser.expectToken(.LParen); - const node = (try parser.typeName()) orelse (try parser.constExpr()); + const node = (try parser.typeName()) orelse (try parser.constExpr()) orelse parser.err(.{ + .ExpectedExpr = .{ .token = parser.it.index }, + }); if (ds.align_spec != null) { try parser.warn(.{ .DuplicateSpecifier = .{ .token = parser.it.index }, @@ -769,7 +772,9 @@ const Parser = struct { .value = null, }; if (parser.eatToken(.Equal)) |eq| { - node.value = try parser.constExpr(); + node.value = (try parser.constExpr()) orelse parser.err(.{ + .ExpectedExpr = .{ .token = parser.it.index }, + }); } return &node.base; } @@ -845,10 +850,14 @@ const Parser = struct { } /// TypeName <- TypeSpec* AbstractDeclarator? - fn typeName(parser: *Parser) !*Node { + fn typeName(parser: *Parser) Error!?*Node { + @panic("TODO"); + } /// RecordDeclarator <- Declarator? (COLON ConstExpr)? - fn recordDeclarator(parser: *Parser) !*Node {} + fn recordDeclarator(parser: *Parser) Error!*Node.RecordDeclarator { + @panic("TODO"); + } /// Pointer <- ASTERISK TypeQual* Pointer? fn pointer(parser: *Parser) Error!?*Node.Pointer { @@ -1001,14 +1010,18 @@ const Parser = struct { } /// Expr <- AssignmentExpr (COMMA Expr)* - fn expr(parser: *Parser) !*Node {} + fn expr(parser: *Parser) Error!?*Expr { + @panic("TODO"); + } /// AssignmentExpr /// <- ConditionalExpr // TODO recursive? /// / UnaryExpr (EQUAL / ASTERISKEQUAL / SLASHEQUAL / PERCENTEQUAL / PLUSEQUAL / MINUSEQUA / /// / ANGLEBRACKETANGLEBRACKETLEFTEQUAL / ANGLEBRACKETANGLEBRACKETRIGHTEQUAL / /// / AMPERSANDEQUAL / CARETEQUAL / PIPEEQUAL) AssignmentExpr - fn assignmentExpr(parser: *Parser) !*Node {} + fn assignmentExpr(parser: *Parser) !?*Expr { + @panic("TODO"); + } /// ConstExpr <- ConditionalExpr fn constExpr(parser: *Parser) Error!?*Expr { @@ -1022,37 +1035,59 @@ const Parser = struct { } /// ConditionalExpr <- LogicalOrExpr (QUESTIONMARK Expr COLON ConditionalExpr)? - fn conditionalExpr(parser: *Parser) !*Node {} + fn conditionalExpr(parser: *Parser) Error!?*Expr { + @panic("TODO"); + } /// LogicalOrExpr <- LogicalAndExpr (PIPEPIPE LogicalOrExpr)* - fn logicalOrExpr(parser: *Parser) !*Node {} + fn logicalOrExpr(parser: *Parser) !*Node { + const lhs = (try parser.logicalAndExpr()) orelse return null; + } /// LogicalAndExpr <- BinOrExpr (AMPERSANDAMPERSAND LogicalAndExpr)* - fn logicalAndExpr(parser: *Parser) !*Node {} + fn logicalAndExpr(parser: *Parser) !*Node { + @panic("TODO"); + } /// BinOrExpr <- BinXorExpr (PIPE BinOrExpr)* - fn binOrExpr(parser: *Parser) !*Node {} + fn binOrExpr(parser: *Parser) !*Node { + @panic("TODO"); + } /// BinXorExpr <- BinAndExpr (CARET BinXorExpr)* - fn binXorExpr(parser: *Parser) !*Node {} + fn binXorExpr(parser: *Parser) !*Node { + @panic("TODO"); + } /// BinAndExpr <- EqualityExpr (AMPERSAND BinAndExpr)* - fn binAndExpr(parser: *Parser) !*Node {} + fn binAndExpr(parser: *Parser) !*Node { + @panic("TODO"); + } /// EqualityExpr <- ComparisionExpr ((EQUALEQUAL / BANGEQUAL) EqualityExpr)* - fn equalityExpr(parser: *Parser) !*Node {} + fn equalityExpr(parser: *Parser) !*Node { + @panic("TODO"); + } /// ComparisionExpr <- ShiftExpr (ANGLEBRACKETLEFT / ANGLEBRACKETLEFTEQUAL /ANGLEBRACKETRIGHT / ANGLEBRACKETRIGHTEQUAL) ComparisionExpr)* - fn comparisionExpr(parser: *Parser) !*Node {} + fn comparisionExpr(parser: *Parser) !*Node { + @panic("TODO"); + } /// ShiftExpr <- AdditiveExpr (ANGLEBRACKETANGLEBRACKETLEFT / ANGLEBRACKETANGLEBRACKETRIGHT) ShiftExpr)* - fn shiftExpr(parser: *Parser) !*Node {} + fn shiftExpr(parser: *Parser) !*Node { + @panic("TODO"); + } /// AdditiveExpr <- MultiplicativeExpr (PLUS / MINUS) AdditiveExpr)* - fn additiveExpr(parser: *Parser) !*Node {} + fn additiveExpr(parser: *Parser) !*Node { + @panic("TODO"); + } /// MultiplicativeExpr <- UnaryExpr (ASTERISK / SLASH / PERCENT) MultiplicativeExpr)* - fn multiplicativeExpr(parser: *Parser) !*Node {} + fn multiplicativeExpr(parser: *Parser) !*Node { + @panic("TODO"); + } /// UnaryExpr /// <- LPAREN TypeName RPAREN UnaryExpr @@ -1061,19 +1096,25 @@ const Parser = struct { /// / Keyword_alignof LAPERN TypeName RPAREN /// / (AMPERSAND / ASTERISK / PLUS / PLUSPLUS / MINUS / MINUSMINUS / TILDE / BANG) UnaryExpr /// / PrimaryExpr PostFixExpr* - fn unaryExpr(parser: *Parser) !*Node {} + fn unaryExpr(parser: *Parser) !*Node { + @panic("TODO"); + } /// PrimaryExpr /// <- IDENTIFIER /// / INTEGERLITERAL / FLOATLITERAL / STRINGLITERAL / CHARLITERAL /// / LPAREN Expr RPAREN /// / Keyword_generic LPAREN AssignmentExpr (COMMA Generic)+ RPAREN - fn primaryExpr(parser: *Parser) !*Node {} + fn primaryExpr(parser: *Parser) !*Node { + @panic("TODO"); + } /// Generic /// <- TypeName COLON AssignmentExpr /// / Keyword_default COLON AssignmentExpr - fn generic(parser: *Parser) !*Node {} + fn generic(parser: *Parser) !*Node { + @panic("TODO"); + } /// PostFixExpr /// <- LPAREN TypeName RPAREN LBRACE Initializers RBRACE @@ -1081,20 +1122,28 @@ const Parser = struct { /// / LPAREN (AssignmentExpr (COMMA AssignmentExpr)*)? RPAREN /// / (PERIOD / ARROW) IDENTIFIER /// / (PLUSPLUS / MINUSMINUS) - fn postFixExpr(parser: *Parser) !*Node {} + fn postFixExpr(parser: *Parser) !*Node { + @panic("TODO"); + } /// Initializers <- ((Designator+ EQUAL)? Initializer COMMA)* (Designator+ EQUAL)? Initializer COMMA? - fn initializers(parser: *Parser) !*Node {} + fn initializers(parser: *Parser) !*Node { + @panic("TODO"); + } /// Initializer /// <- LBRACE Initializers RBRACE /// / AssignmentExpr - fn initializer(parser: *Parser) !*Node {} + fn initializer(parser: *Parser, dr: *Node.Declarator) Error!?*Node { + @panic("TODO"); + } /// Designator /// <- LBRACKET ConstExpr RBRACKET /// / PERIOD IDENTIFIER - fn designator(parser: *Parser) !*Node {} + fn designator(parser: *Parser) !*Node { + @panic("TODO"); + } /// CompoundStmt <- LBRACE (Declaration / Stmt)* RBRACE fn compoundStmt(parser: *Parser) Error!?*Node { @@ -1196,7 +1245,7 @@ const Parser = struct { try parser.pushScope(.Loop); defer parser.popScope(); _ = try parser.expectToken(.LParen); - const init = if (try parser.declaration()) |decl| blk:{ + const init = if (try parser.declaration()) |decl| blk: { // TODO disallow storage class other than auto and register break :blk decl; } else try parser.exprStmt(); @@ -1235,7 +1284,7 @@ const Parser = struct { _ = try parser.expectToken(.Colon); const node = try parser.arena.create(Node.LabeledStmt); node.* = .{ - .kind = .{.Default = tok }, + .kind = .{ .Default = tok }, .stmt = try parser.stmt(), }; return &node.base; @@ -1244,7 +1293,7 @@ const Parser = struct { _ = try parser.expectToken(.Colon); const node = try parser.arena.create(Node.LabeledStmt); node.* = .{ - .kind = .{.Case = tok }, + .kind = .{ .Case = tok }, .stmt = try parser.stmt(), }; return &node.base; @@ -1289,7 +1338,7 @@ const Parser = struct { if (parser.eatToken(.Colon)) |_| { const node = try parser.arena.create(Node.LabeledStmt); node.* = .{ - .kind = .{.Label = tok }, + .kind = .{ .Label = tok }, .stmt = try parser.stmt(), }; return &node.base; @@ -1379,3 +1428,4 @@ const Parser = struct { }); } }; +