diff --git a/README.md b/README.md index 072d0688af..488cb1ccac 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,32 @@ # zig lang -C upgrade. +An experiment in writing a low-level programming language with the intent to +replace C. Zig intends to be a small language, yet powerful enough to write +readable, safe, optimal, and concise code to solve any computing problem. -Start with C. +## Goals + + * Ability to run arbitrary code at compile time and generate code. + * Completely compatible with C libraries with no wrapper necessary. + * Creating a C library should be a primary use case. Should be easy to export + an auto-generated .h file. + * Generics such as containers. + * Do not depend on libc. + * First class error code support. + * Include documentation generator. + * Eliminate the need for make, cmake, etc. + * Friendly toward package maintainers. + * Eliminate the need for C headers (when using zig internally). + * Ability to declare dependencies as Git URLS with commit locking (can + provide a tag or sha1). + * Rust-style enums. + * Opinionated when it makes life easier. + - Tab character in source code is a compile error. + - Whitespace at the end of line is a compile error. + * Resilient to parsing errors to make IDE integration work well. + * Source code is UTF-8. + +## Roadmap + + * Hello, world. + * How should the Widget use case be solved? In Genesis I'm using C++ and inheritance. diff --git a/src/buffer.cpp b/src/buffer.cpp index 7bcd1b3783..ab22a87afd 100644 --- a/src/buffer.cpp +++ b/src/buffer.cpp @@ -23,3 +23,24 @@ Buf *buf_sprintf(const char *format, ...) { return buf; } + +void buf_appendf(Buf *buf, const char *format, ...) { + va_list ap, ap2; + va_start(ap, format); + va_copy(ap2, ap); + + int len1 = vsnprintf(nullptr, 0, format, ap); + assert(len1 >= 0); + + size_t required_size = len1 + 1; + + int orig_len = buf_len(buf); + + buf_resize(buf, orig_len + required_size); + + int len2 = vsnprintf(buf_ptr(buf) + orig_len, required_size, format, ap2); + assert(len2 == len1); + + va_end(ap2); + va_end(ap); +} diff --git a/src/buffer.hpp b/src/buffer.hpp index ace195f3ec..529a6c11d1 100644 --- a/src/buffer.hpp +++ b/src/buffer.hpp @@ -93,6 +93,9 @@ static inline void buf_append_char(Buf *buf, uint8_t c) { buf_append_mem(buf, (const char *)&c, 1); } +void buf_appendf(Buf *buf, const char *format, ...) + __attribute__ ((format (printf, 2, 3))); + static inline bool buf_eql_mem(Buf *buf, const char *mem, int mem_len) { if (buf_len(buf) != mem_len) return false; diff --git a/src/main.cpp b/src/main.cpp index b8823c340f..34e5474ba3 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -129,7 +129,6 @@ static Buf *fetch_file(FILE *f) { enum TokenId { - TokenIdDirective, TokenIdSymbol, TokenIdLParen, TokenIdRParen, @@ -141,6 +140,9 @@ enum TokenId { TokenIdSemicolon, TokenIdNumberLiteral, TokenIdPlus, + TokenIdColon, + TokenIdArrow, + TokenIdDash, }; struct Token { @@ -153,14 +155,10 @@ struct Token { enum TokenizeState { TokenizeStateStart, - TokenizeStateDirective, - TokenizeStateDirectiveName, - TokenizeStateIncludeQuote, - TokenizeStateDirectiveEnd, - TokenizeStateInclude, TokenizeStateSymbol, - TokenizeStateString, TokenizeStateNumber, + TokenizeStateString, + TokenizeStateSawDash, }; struct Tokenize { @@ -171,11 +169,7 @@ struct Tokenize { int line; int column; Token *cur_tok; - Buf *directive_name; Buf *cur_dir_path; - uint8_t unquote_char; - int quote_start_pos; - Buf *include_path; ZigList *include_paths; }; @@ -217,68 +211,6 @@ static void end_token(Tokenize *t) { t->cur_tok = nullptr; } -static void put_back(Tokenize *t, int count) { - t->pos -= count; -} - -static void begin_directive(Tokenize *t) { - t->state = TokenizeStateDirective; - begin_token(t, TokenIdDirective); - assert(!t->directive_name); - t->directive_name = buf_alloc(); -} - -static bool find_and_include_file(Tokenize *t, char *dir_path, char *file_path) { - Buf *full_path = buf_sprintf("%s/%s", dir_path, file_path); - - FILE *f = fopen(buf_ptr(full_path), "rb"); - if (!f) - return false; - - Buf *contents = fetch_file(f); - - buf_splice_buf(t->buf, t->pos, t->pos, contents); - - return true; -} - -static void render_include(Tokenize *t, Buf *target_path, char unquote_char) { - if (unquote_char == '"') { - if (find_and_include_file(t, buf_ptr(t->cur_dir_path), buf_ptr(target_path))) - return; - } - for (int i = 0; i < t->include_paths->length; i += 1) { - char *include_path = t->include_paths->at(i); - if (find_and_include_file(t, include_path, buf_ptr(target_path))) - return; - } - tokenize_error(t, "include path \"%s\" not found", buf_ptr(target_path)); -} - -static void end_directive(Tokenize *t) { - end_token(t); - if (t->include_path) { - render_include(t, t->include_path, t->unquote_char); - t->include_path = nullptr; - } - t->state = TokenizeStateStart; -} - -static void end_directive_name(Tokenize *t) { - if (buf_eql_str(t->directive_name, "include")) { - t->state = TokenizeStateInclude; - t->directive_name = nullptr; - } else { - tokenize_error(t, "invalid directive name: \"%s\"", buf_ptr(t->directive_name)); - } -} - -static void end_symbol(Tokenize *t) { - put_back(t, 1); - end_token(t); - t->state = TokenizeStateStart; -} - static ZigList *tokenize(Buf *buf, ZigList *include_paths, Buf *cur_dir_path) { Tokenize t = {0}; t.tokens = allocate>(1); @@ -300,9 +232,6 @@ static ZigList *tokenize(Buf *buf, ZigList *include_paths, Buf *c t.state = TokenizeStateNumber; begin_token(&t, TokenIdNumberLiteral); break; - case '#': - begin_directive(&t); - break; case '"': begin_token(&t, TokenIdStringLiteral); t.state = TokenizeStateString; @@ -335,88 +264,31 @@ static ZigList *tokenize(Buf *buf, ZigList *include_paths, Buf *c begin_token(&t, TokenIdSemicolon); end_token(&t); break; + case ':': + begin_token(&t, TokenIdColon); + end_token(&t); + break; case '+': begin_token(&t, TokenIdPlus); end_token(&t); break; + case '-': + begin_token(&t, TokenIdDash); + t.state = TokenizeStateSawDash; + break; default: tokenize_error(&t, "invalid character: '%c'", c); } break; - case TokenizeStateDirective: - switch (c) { - case '\n': - end_directive_name(&t); - end_directive(&t); - break; - case ' ': - case '\t': - case '\f': - case '\r': - case 0xb: - break; - case SYMBOL_CHAR: - t.state = TokenizeStateDirectiveName; - buf_append_char(t.directive_name, c); - break; - default: - tokenize_error(&t, "invalid directive character: '%c'", c); - break; - } - break; - case TokenizeStateDirectiveName: - switch (c) { - case WHITESPACE: - end_directive_name(&t); - break; - case SYMBOL_CHAR: - buf_append_char(t.directive_name, c); - break; - default: - tokenize_error(&t, "invalid directive name character: '%c'", c); - break; - } - break; - case TokenizeStateInclude: - switch (c) { - case WHITESPACE: - break; - case '<': - case '"': - t.state = TokenizeStateIncludeQuote; - t.quote_start_pos = t.pos; - t.unquote_char = (c == '<') ? '>' : '"'; - break; - } - break; - case TokenizeStateIncludeQuote: - if (c == t.unquote_char) { - t.include_path = buf_slice(t.buf, t.quote_start_pos + 1, t.pos); - t.state = TokenizeStateDirectiveEnd; - } - break; - case TokenizeStateDirectiveEnd: - switch (c) { - case '\n': - end_directive(&t); - break; - case ' ': - case '\t': - case '\f': - case '\r': - case 0xb: - break; - default: - tokenize_error(&t, "expected whitespace or newline: '%c'", c); - } - break; case TokenizeStateSymbol: switch (c) { case SYMBOL_CHAR: break; default: - end_symbol(&t); - break; + t.pos -= 1; + end_token(&t); + t.state = TokenizeStateStart; + continue; } break; case TokenizeStateString: @@ -434,7 +306,22 @@ static ZigList *tokenize(Buf *buf, ZigList *include_paths, Buf *c case DIGIT: break; default: - end_symbol(&t); + t.pos -= 1; + end_token(&t); + t.state = TokenizeStateStart; + continue; + } + break; + case TokenizeStateSawDash: + switch (c) { + case '>': + t.cur_tok->id = TokenIdArrow; + end_token(&t); + t.state = TokenizeStateStart; + break; + default: + end_token(&t); + t.state = TokenizeStateStart; break; } break; @@ -450,30 +337,17 @@ static ZigList *tokenize(Buf *buf, ZigList *include_paths, Buf *c switch (t.state) { case TokenizeStateStart: break; - case TokenizeStateDirective: - end_directive(&t); - break; - case TokenizeStateDirectiveName: - end_directive_name(&t); - end_directive(&t); - break; - case TokenizeStateInclude: - tokenize_error(&t, "missing include path"); - break; case TokenizeStateSymbol: - end_symbol(&t); + end_token(&t); break; case TokenizeStateString: tokenize_error(&t, "unterminated string"); break; case TokenizeStateNumber: - end_symbol(&t); + end_token(&t); break; - case TokenizeStateIncludeQuote: - tokenize_error(&t, "unterminated include path"); - break; - case TokenizeStateDirectiveEnd: - end_directive(&t); + case TokenizeStateSawDash: + end_token(&t); break; } assert(!t.cur_tok); @@ -482,7 +356,6 @@ static ZigList *tokenize(Buf *buf, ZigList *include_paths, Buf *c static const char * token_name(Token *token) { switch (token->id) { - case TokenIdDirective: return "Directive"; case TokenIdSymbol: return "Symbol"; case TokenIdLParen: return "LParen"; case TokenIdRParen: return "RParen"; @@ -494,6 +367,9 @@ static const char * token_name(Token *token) { case TokenIdSemicolon: return "Semicolon"; case TokenIdNumberLiteral: return "NumberLiteral"; case TokenIdPlus: return "Plus"; + case TokenIdColon: return "Colon"; + case TokenIdArrow: return "Arrow"; + case TokenIdDash: return "Dash"; } return "(invalid token)"; } @@ -507,6 +383,83 @@ static void print_tokens(Buf *buf, ZigList *tokens) { } } +enum NodeType { + NodeTypeRoot, +}; + +struct AstNode { + enum NodeType type; + ZigList children; +}; + +enum AstState { + AstStateStart, +}; + +struct BuildAst { + Buf *buf; + AstNode *root; + AstState state; + int line; + int column; +}; + +__attribute__ ((format (printf, 2, 3))) +static void ast_error(BuildAst *b, const char *format, ...) { + int line = b->line + 1; + int column = b->column + 1; + + va_list ap; + va_start(ap, format); + fprintf(stderr, "Error: Line %d, column %d: ", line, column); + vfprintf(stderr, format, ap); + fprintf(stderr, "\n"); + va_end(ap); + exit(EXIT_FAILURE); +} + +static inline bool mem_eql_str(const char *mem, size_t mem_len, const char *str) { + size_t str_len = strlen(str); + if (str_len != mem_len) + return false; + return memcmp(mem, str, mem_len) == 0; +} + + +static AstNode *build_ast(Buf *buf, ZigList *tokens) { + BuildAst b = {0}; + b.buf = buf; + b.root = allocate(1); + b.root->type = NodeTypeRoot; + + for (int i = 0; i < tokens->length; i += 1) { + Token *token = &tokens->at(i); + const char *token_str = buf_ptr(buf) + token->start_pos; + int token_len = token->end_pos - token->start_pos; + b.line = token->start_line; + b.column = token->start_column; + switch (b.state) { + case AstStateStart: + if (mem_eql_str(token_str, token_len, "fn")) { + zig_panic("TODO fn"); + } else { + Buf msg = {0}; + buf_append_str(&msg, "unexpected symbol: '"); + buf_append_mem(&msg, token_str, token_len); + buf_append_str(&msg, "'"); + ast_error(&b, "%s", buf_ptr(&msg)); + } + break; + } + } + + return b.root; +} + +static void print_ast(AstNode *node) { + zig_panic("TODO"); +} + char cur_dir[1024]; int main(int argc, char **argv) { @@ -559,18 +512,18 @@ int main(int argc, char **argv) { Buf *in_data = fetch_file(in_f); - fprintf(stderr, "Original source:\n%s\n", buf_ptr(in_data)); + fprintf(stderr, "Original source:\n"); + fprintf(stderr, "----------------\n"); + fprintf(stderr, "%s\n", buf_ptr(in_data)); ZigList *tokens = tokenize(in_data, &include_paths, cur_dir_path); fprintf(stderr, "\nTokens:\n"); + fprintf(stderr, "---------\n"); print_tokens(in_data, tokens); - /* - Buf *preprocessed_source = preprocess(in_data, tokens, &include_paths, cur_dir_path); - - fprintf(stderr, "\nPreprocessed source:\n%s\n", buf_ptr(preprocessed_source)); - */ + AstNode *root = build_ast(in_data, tokens); + print_ast(root); return EXIT_SUCCESS; diff --git a/test/add.zig b/test/add.zig index 59aab49836..b25094575d 100644 --- a/test/add.zig +++ b/test/add.zig @@ -1,3 +1,3 @@ -int add(int a, int b) { - return a + b; +pub fn add(a: int, b: int) -> int { + a + b } diff --git a/test/hello.zig b/test/hello.zig index f18ba2558b..8fbde414c6 100644 --- a/test/hello.zig +++ b/test/hello.zig @@ -1,6 +1,6 @@ -#include -#include "add.h" -int main(int argc, char **argv) { - fprintf(stderr, "hello: %d", add(1, 2)); + +fn main(argc: int, argv: *mut char) -> int { + puts("Hello, world!\n"); + return 0; }