From 821907317eb77a96dc53adf20ac705b4501e2ab8 Mon Sep 17 00:00:00 2001 From: Andrew Kelley Date: Fri, 27 Nov 2015 00:40:26 -0700 Subject: [PATCH] support C-style comments, plus nesting --- README.md | 2 +- src/tokenizer.cpp | 103 ++++++++++++++++++++++++++++++++++++++++++-- src/tokenizer.hpp | 8 ---- test/standalone.cpp | 25 ++++++++++- 4 files changed, 124 insertions(+), 14 deletions(-) diff --git a/README.md b/README.md index bc92675ce5..f735a21402 100644 --- a/README.md +++ b/README.md @@ -31,7 +31,7 @@ readable, safe, optimal, and concise code to solve any computing problem. ## Roadmap - * C style comments. + * empty function and return with no expression * Simple .so library * Multiple files * figure out integers diff --git a/src/tokenizer.cpp b/src/tokenizer.cpp index 0e83f9a5cb..f56c1d8fb9 100644 --- a/src/tokenizer.cpp +++ b/src/tokenizer.cpp @@ -87,6 +87,19 @@ case DIGIT: \ case '_' +enum TokenizeState { + TokenizeStateStart, + TokenizeStateSymbol, + TokenizeStateNumber, + TokenizeStateString, + TokenizeStateSawDash, + TokenizeStateSawSlash, + TokenizeStateLineComment, + TokenizeStateMultiLineComment, + TokenizeStateMultiLineCommentSlash, + TokenizeStateMultiLineCommentStar, +}; + struct Tokenize { Buf *buf; @@ -96,6 +109,7 @@ struct Tokenize { int line; int column; Token *cur_tok; + int multi_line_comment_count; }; __attribute__ ((format (printf, 2, 3))) @@ -222,10 +236,80 @@ ZigList *tokenize(Buf *buf) { begin_token(&t, TokenIdNumberSign); end_token(&t); break; + case '/': + t.state = TokenizeStateSawSlash; + break; default: tokenize_error(&t, "invalid character: '%c'", c); } break; + case TokenizeStateSawSlash: + switch (c) { + case '/': + t.state = TokenizeStateLineComment; + break; + case '*': + t.state = TokenizeStateMultiLineComment; + t.multi_line_comment_count = 1; + break; + default: + tokenize_error(&t, "invalid character: '%c'", c); + break; + } + break; + case TokenizeStateLineComment: + switch (c) { + case '\n': + t.state = TokenizeStateStart; + break; + default: + // do nothing + break; + } + break; + case TokenizeStateMultiLineComment: + switch (c) { + case '*': + t.state = TokenizeStateMultiLineCommentStar; + break; + case '/': + t.state = TokenizeStateMultiLineCommentSlash; + break; + default: + // do nothing + break; + } + break; + case TokenizeStateMultiLineCommentSlash: + switch (c) { + case '*': + t.state = TokenizeStateMultiLineComment; + t.multi_line_comment_count += 1; + break; + case '/': + break; + default: + t.state = TokenizeStateMultiLineComment; + break; + } + break; + case TokenizeStateMultiLineCommentStar: + switch (c) { + case '/': + t.multi_line_comment_count -= 1; + if (t.multi_line_comment_count == 0) { + t.state = TokenizeStateStart; + } else { + t.state = TokenizeStateMultiLineComment; + } + break; + case '*': + break; + default: + t.state = TokenizeStateMultiLineComment; + break; + } + break; case TokenizeStateSymbol: switch (c) { case SYMBOL_CHAR: @@ -295,7 +379,18 @@ ZigList *tokenize(Buf *buf) { case TokenizeStateSawDash: end_token(&t); break; + case TokenizeStateSawSlash: + tokenize_error(&t, "unexpected EOF"); + break; + case TokenizeStateLineComment: + break; + case TokenizeStateMultiLineComment: + case TokenizeStateMultiLineCommentSlash: + case TokenizeStateMultiLineCommentStar: + tokenize_error(&t, "unterminated multi-line comment"); + break; } + t.pos = -1; begin_token(&t, TokenIdEof); end_token(&t); assert(!t.cur_tok); @@ -333,9 +428,11 @@ static const char * token_name(Token *token) { void print_tokens(Buf *buf, ZigList *tokens) { for (int i = 0; i < tokens->length; i += 1) { Token *token = &tokens->at(i); - printf("%s ", token_name(token)); - fwrite(buf_ptr(buf) + token->start_pos, 1, token->end_pos - token->start_pos, stdout); - printf("\n"); + fprintf(stderr, "%s ", token_name(token)); + if (token->start_pos >= 0) { + fwrite(buf_ptr(buf) + token->start_pos, 1, token->end_pos - token->start_pos, stderr); + } + fprintf(stderr, "\n"); } } diff --git a/src/tokenizer.hpp b/src/tokenizer.hpp index e2337aea8d..fbd085cecf 100644 --- a/src/tokenizer.hpp +++ b/src/tokenizer.hpp @@ -43,14 +43,6 @@ struct Token { int start_column; }; -enum TokenizeState { - TokenizeStateStart, - TokenizeStateSymbol, - TokenizeStateNumber, - TokenizeStateString, - TokenizeStateSawDash, -}; - ZigList *tokenize(Buf *buf); void print_tokens(Buf *buf, ZigList *tokens); diff --git a/test/standalone.cpp b/test/standalone.cpp index e6dbf75186..caeb926a06 100644 --- a/test/standalone.cpp +++ b/test/standalone.cpp @@ -75,6 +75,26 @@ static void add_all_test_cases(void) { exit(0); } )SOURCE", "OK\n"); + + add_simple_case("comments", R"SOURCE( + #link("c") + extern { + fn puts(s: *mut u8) -> i32; + fn exit(code: i32) -> unreachable; + } + + /** + * multi line doc comment + */ + fn another_function() -> i32 { return 0; } + + /// this is a documentation comment + /// doc comment line 2 + fn _start() -> unreachable { + puts(/* mid-line comment /* nested */ */ "OK"); + exit(0); + } + )SOURCE", "OK\n"); } static void run_test(TestCase *test_case) { @@ -83,11 +103,12 @@ static void run_test(TestCase *test_case) { Buf zig_stderr = BUF_INIT; Buf zig_stdout = BUF_INIT; int return_code; - os_exec_process("./zig", test_case->compiler_args, &return_code, &zig_stderr, &zig_stdout); + static const char *zig_exe = "./zig"; + os_exec_process(zig_exe, test_case->compiler_args, &return_code, &zig_stderr, &zig_stdout); if (return_code != 0) { printf("\nCompile failed with return code %d:\n", return_code); - printf("zig"); + printf("%s", zig_exe); for (int i = 0; i < test_case->compiler_args.length; i += 1) { printf(" %s", test_case->compiler_args.at(i)); }