diff --git a/README.md b/README.md index 60449464fb..85becaae6e 100644 --- a/README.md +++ b/README.md @@ -26,6 +26,8 @@ readable, safe, optimal, and concise code to solve any computing problem. * Resilient to parsing errors to make IDE integration work well. * Source code is UTF-8. * Shebang line OK so language can be used for "scripting" as well. + * Ability to mark functions as test and automatically run them in test mode. + * Memory zeroed by default, unless you initialize with "uninitialized". ## Roadmap diff --git a/src/buffer.cpp b/src/buffer.cpp index ab22a87afd..87d134b351 100644 --- a/src/buffer.cpp +++ b/src/buffer.cpp @@ -36,7 +36,7 @@ void buf_appendf(Buf *buf, const char *format, ...) { int orig_len = buf_len(buf); - buf_resize(buf, orig_len + required_size); + buf_resize(buf, orig_len + len1); int len2 = vsnprintf(buf_ptr(buf) + orig_len, required_size, format, ap2); assert(len2 == len1); diff --git a/src/buffer.hpp b/src/buffer.hpp index 47975f5fe9..88d9415d34 100644 --- a/src/buffer.hpp +++ b/src/buffer.hpp @@ -55,6 +55,14 @@ static inline void buf_init_from_mem(Buf *buf, const char *ptr, int len) { buf->list.at(buf_len(buf)) = 0; } +static inline void buf_init_from_str(Buf *buf, const char *str) { + buf_init_from_mem(buf, str, strlen(str)); +} + +static inline void buf_init_from_buf(Buf *buf, Buf *other) { + buf_init_from_mem(buf, buf_ptr(other), buf_len(other)); +} + static inline Buf *buf_create_from_mem(const char *ptr, int len) { Buf *buf = allocate(1); buf_init_from_mem(buf, ptr, len); diff --git a/src/main.cpp b/src/main.cpp index 248d4d2770..1f28ce41bc 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -28,7 +28,6 @@ static int usage(const char *arg0) { fprintf(stderr, "Usage: %s [command] [options] target\n" "Commands:\n" " build create an executable from target\n" - " link turn a .o file into an executable\n" "Options:\n" " --output output file\n" " --version print version number and exit\n" diff --git a/src/parser.cpp b/src/parser.cpp index f5da8addeb..dd54168b4b 100644 --- a/src/parser.cpp +++ b/src/parser.cpp @@ -78,3 +78,7 @@ void ast_print(AstNode *node, int indent) { break; } } + +AstNode *ast_create_root(Token *token) { + return nullptr; +} diff --git a/src/parser.hpp b/src/parser.hpp index db2f5eb623..008d478f4f 100644 --- a/src/parser.hpp +++ b/src/parser.hpp @@ -89,4 +89,6 @@ const char *node_type_str(NodeType node_type); void ast_print(AstNode *node, int indent); +AstNode *ast_create_root(Token *token); + #endif diff --git a/src/parsergen.cpp b/src/parsergen.cpp index a812f990c0..31bb1a8493 100644 --- a/src/parsergen.cpp +++ b/src/parsergen.cpp @@ -187,29 +187,39 @@ struct RuleNode { }; -enum ParserStateType { - ParserStateTypeError, - ParserStateTypeOk, - ParserStateTypeCapture, +enum CodeGenType { + CodeGenTypeTransition, + CodeGenTypeError, + CodeGenTypeSave, + CodeGenTypePushNode, + CodeGenTypeCapture, + CodeGenTypePopNode, + CodeGenTypeEatToken, }; -struct ParserStateError { +struct CodeGenError { Buf *msg; }; -struct ParserStateCapture { +struct CodeGenCapture { Buf *body; + bool is_root; + Buf *field_names; +}; + +struct CodeGen { + CodeGenType type; + union { + CodeGenError error; + CodeGenCapture capture; + }; }; struct ParserState { - ParserStateType type; + ZigList code_gen_list; // One for each token ID. ParserState **transition; int index; - union { - ParserStateError error; - ParserStateCapture capture; - }; }; enum LexState { @@ -234,6 +244,7 @@ struct Gen { ZigList transition_table; ZigList tokens; RuleNode *root; + int biggest_tuple_len; Buf *in_buf; LexState lex_state; @@ -249,9 +260,8 @@ struct Gen { int lex_body_end; }; -static ParserState *create_state(Gen *g, ParserStateType type) { +static ParserState *create_state(Gen *g) { ParserState *state = allocate(1); - state->type = type; state->index = g->transition_table.length; state->transition = allocate(g->tokens.length); g->transition_table.append(state); @@ -264,28 +274,93 @@ static void fill_state_with_transition(Gen *g, ParserState *source, ParserState } } -static void gen(Gen *g, RuleNode *node) { +static void state_add_code(ParserState *state, CodeGen *code) { + state->code_gen_list.append(code); +} + +static void state_add_save_token(ParserState *state) { + CodeGen *code = allocate(1); + code->type = CodeGenTypeSave; + state_add_code(state, code); +} + +static void state_add_error(ParserState *state, Buf *msg) { + CodeGen *code = allocate(1); + code->type = CodeGenTypeError; + code->error.msg = msg; + state_add_code(state, code); +} + +static void state_add_transition(ParserState *state) { + CodeGen *code = allocate(1); + code->type = CodeGenTypeTransition; + state_add_code(state, code); +} + +static void state_add_push_node(ParserState *state) { + CodeGen *code = allocate(1); + code->type = CodeGenTypePushNode; + state_add_code(state, code); +} + +static CodeGen *codegen_create_capture(Buf *body, bool is_root, int field_name_count) { + CodeGen *code = allocate(1); + code->type = CodeGenTypeCapture; + code->capture.body = body; + code->capture.is_root = is_root; + code->capture.field_names = allocate(field_name_count); + return code; +} + +static void state_add_pop_node(ParserState *state) { + CodeGen *code = allocate(1); + code->type = CodeGenTypePopNode; + state_add_code(state, code); +} + +static void state_add_eat_token(ParserState *state) { + CodeGen *code = allocate(1); + code->type = CodeGenTypeEatToken; + state_add_code(state, code); +} + +static void gen(Gen *g, RuleNode *node, Buf *out_field_name) { switch (node->type) { case RuleNodeTypeToken: { - ParserState *ok_state = create_state(g, ParserStateTypeOk); - ParserState *err_state = create_state(g, ParserStateTypeError); + buf_init_from_str(out_field_name, "token"); - err_state->error.msg = buf_sprintf("expected token '%s'", buf_ptr(&node->token.token->name)); + state_add_save_token(g->cur_state); + + ParserState *ok_state = create_state(g); + ParserState *err_state = create_state(g); + state_add_error(err_state, buf_sprintf("expected token '%s'", buf_ptr(&node->token.token->name))); fill_state_with_transition(g, g->cur_state, err_state); g->cur_state->transition[node->token.token->id] = ok_state; + state_add_transition(g->cur_state); + state_add_eat_token(g->cur_state); + g->cur_state = ok_state; } break; case RuleNodeTypeTuple: { + buf_init_from_str(out_field_name, "node"); + + state_add_push_node(g->cur_state); + + bool is_root = (node == g->root); + int field_name_count = node->tuple.children.length; + CodeGen *code = codegen_create_capture(&node->tuple.body, is_root, field_name_count); + for (int i = 0; i < node->tuple.children.length; i += 1) { RuleNode *child = node->tuple.children.at(i); - gen(g, child); + gen(g, child, &code->capture.field_names[i]); } - g->cur_state->type = ParserStateTypeCapture; - g->cur_state->capture.body = &node->tuple.body; + state_add_code(g->cur_state, code); + + state_add_pop_node(g->cur_state); } break; case RuleNodeTypeMany: @@ -301,7 +376,10 @@ static void gen(Gen *g, RuleNode *node) { zig_panic("TODO"); break; case RuleNodeTypeSubRule: - zig_panic("TODO"); + { + RuleNode *child = node->sub_rule.child; + gen(g, child, out_field_name); + } break; } } @@ -407,6 +485,9 @@ static void end_token_name(Gen *g) { assert(g->lex_cur_rule->type == RuleNodeTypeTuple); g->lex_cur_rule->tuple.children.append(node); + g->biggest_tuple_len = max(g->biggest_tuple_len, g->lex_cur_rule->tuple.children.length); + + lex_pop_stack(g); } @@ -571,6 +652,76 @@ static void initialize_rules(Gen *g) { } } +enum TemplateState { + TemplateStateStart, + TemplateStateDollar, + TemplateStateNumber, +}; + +static Buf *fill_template(Buf *body, const char *result_name, Buf *field_names) { + //fprintf(stderr, "fill template input:\n%s\n", buf_ptr(body)); + Buf *result = buf_alloc(); + TemplateState state = TemplateStateStart; + int digit_start; + for (int i = 0; i < buf_len(body); i += 1) { + uint8_t c = buf_ptr(body)[i]; + switch (state) { + case TemplateStateStart: + switch (c) { + case '$': + state = TemplateStateDollar; + break; + default: + buf_append_char(result, c); + break; + } + break; + case TemplateStateDollar: + switch (c) { + case '$': + buf_append_str(result, result_name); + state = TemplateStateStart; + break; + case DIGIT: + digit_start = i; + state = TemplateStateNumber; + break; + default: + buf_append_char(result, '$'); + buf_append_char(result, c); + state = TemplateStateStart; + break; + } + break; + case TemplateStateNumber: + switch (c) { + case DIGIT: + // nothing + break; + default: + { + Buf *num_buf = buf_create_from_mem(&buf_ptr(body)[digit_start], i - digit_start); + int index = atoi(buf_ptr(num_buf)) - 1; + buf_appendf(result, "(top_node->data[%d].%s)%c", + index, buf_ptr(&field_names[index]), c); + + state = TemplateStateStart; + } + break; + } + break; + } + } + switch (state) { + case TemplateStateStart: + // OK + break; + default: + zig_panic("unable to fill grammar template"); + } + //fprintf(stderr, "fill template output:\n%s\n", buf_ptr(result)); + return result; +} int main(int argc, char **argv) { const char *in_filename = argv[1]; @@ -603,8 +754,9 @@ int main(int argc, char **argv) { g.root = g.rules.at(0); - g.cur_state = create_state(&g, ParserStateTypeOk); - gen(&g, g.root); + g.cur_state = create_state(&g); + Buf root_field_name = {0}; + gen(&g, g.root, &root_field_name); fprintf(out_f, "/* This file is generated by parsergen.cpp */\n"); fprintf(out_f, "\n"); @@ -627,15 +779,14 @@ int main(int argc, char **argv) { } fprintf(out_f, "\n"); - /* TODO - fprintf(out_f, "struct ParserGenNode{\n"); + fprintf(out_f, "struct ParserGenNode {\n"); + fprintf(out_f, " int next_index;\n"); fprintf(out_f, " union {\n"); - fprintf(out_f, " [%d];\n", biggest_tuple_len); fprintf(out_f, " Token *token;\n"); - fprintf(out_f, " };\n"); + fprintf(out_f, " AstNode *node;\n"); + fprintf(out_f, " } data[%d];\n", g.biggest_tuple_len); fprintf(out_f, "};\n"); fprintf(out_f, "\n"); - */ fprintf(out_f, "AstNode * ast_parse(Buf *buf, ZigList *tokens) {\n"); @@ -655,39 +806,66 @@ int main(int argc, char **argv) { fprintf(out_f, " int state = 0;\n"); + fprintf(out_f, " int token_index = 0;\n"); + fprintf(out_f, " Token *token = &tokens->at(token_index);\n"); fprintf(out_f, " AstNode *root = nullptr;\n"); + fprintf(out_f, " ZigList stack = {0};\n"); + fprintf(out_f, " ParserGenNode *top_node = nullptr;\n"); - fprintf(out_f, " for (int i = 0; i < tokens->length; i += 1) {\n"); - fprintf(out_f, " Token *token = &tokens->at(i);\n"); + fprintf(out_f, " for (;;) {\n"); fprintf(out_f, " switch (state) {\n"); - for (int i = 0; i < g.transition_table.length; i += 1) { - ParserState *state = g.transition_table.at(i); - fprintf(out_f, " case %d:\n", i); - switch (state->type) { - case ParserStateTypeError: - fprintf(out_f, " ast_error(token, \"%s\");\n", buf_ptr(state->error.msg)); - break; - case ParserStateTypeOk: - fprintf(out_f, " assert(transition[%d][token->id] >= 0);\n", state->index); - fprintf(out_f, " assert(transition[%d][token->id] < %d);\n", - state->index, g.transition_table.length); - fprintf(out_f, " state = transition[%d][token->id];\n", state->index); - break; - case ParserStateTypeCapture: - // TODO fprintf(out_f, " %s\n", buf_ptr(state->capture.body)); - fprintf(out_f, " state = transition[%d][token->id];\n", state->index); - break; + for (int state_i = 0; state_i < g.transition_table.length; state_i += 1) { + ParserState *state = g.transition_table.at(state_i); + fprintf(out_f, " case %d: {\n", state_i); + for (int code_i = 0; code_i < state->code_gen_list.length; code_i += 1) { + CodeGen *code = state->code_gen_list.at(code_i); + switch (code->type) { + case CodeGenTypeTransition: + fprintf(out_f, " assert(transition[%d][token->id] >= 0);\n", state->index); + fprintf(out_f, " assert(transition[%d][token->id] < %d);\n", + state->index, g.transition_table.length); + fprintf(out_f, " state = transition[%d][token->id];\n", state->index); + break; + case CodeGenTypeError: + fprintf(out_f, " ast_error(token, \"%s\");\n", buf_ptr(code->error.msg)); + break; + case CodeGenTypeSave: + fprintf(out_f, " top_node->data[top_node->next_index++].token = token;\n"); + break; + case CodeGenTypePushNode: + fprintf(out_f, " top_node = allocate(1);\n"); + fprintf(out_f, " stack.append(top_node);\n"); + break; + case CodeGenTypeCapture: + if (code->capture.is_root) { + Buf *code_text = fill_template(code->capture.body, "root", code->capture.field_names); + fprintf(out_f, "%s\n", buf_ptr(code_text)); + fprintf(out_f, " return root;\n"); + } else { + zig_panic("TODO capture non-root"); + } + break; + case CodeGenTypePopNode: + fprintf(out_f, " stack.pop();\n"); + fprintf(out_f, " top_node = stack.length ? stack.last() : nullptr;\n"); + break; + case CodeGenTypeEatToken: + fprintf(out_f, " token_index += 1;\n"); + fprintf(out_f, " token = (token_index < tokens->length) ? &tokens->at(token_index) : nullptr;\n"); + break; + } } fprintf(out_f, " break;\n"); + fprintf(out_f, " }\n"); } fprintf(out_f, " default:\n"); fprintf(out_f, " zig_panic(\"unreachable\");\n"); fprintf(out_f, " }\n"); fprintf(out_f, " }\n"); - fprintf(out_f, " return root;\n"); + fprintf(out_f, " zig_panic(\"unreachable\");\n"); fprintf(out_f, "}\n"); - + return 0; }