parsing hello.zig example with recursive descent

that was easy
This commit is contained in:
Andrew Kelley 2015-11-23 21:30:12 -07:00
parent 6b911f1e94
commit 1b24f4c73c
7 changed files with 379 additions and 104 deletions

View file

@ -69,3 +69,31 @@ zig | C equivalent | Description
f128 | long double | 128-bit IEE754 floating point f128 | long double | 128-bit IEE754 floating point
isize | ssize_t | signed pointer sized integer isize | ssize_t | signed pointer sized integer
usize | size_t | unsigned pointer sized integer usize | size_t | unsigned pointer sized integer
### Grammar
```
Root : many(FnDecl) token(EOF);
FnDecl : token(Fn) token(Symbol) ParamDeclList option(token(Arrow) Type) Block;
ParamDeclList : token(LParen) list(ParamDecl, token(Comma)) token(RParen);
ParamDecl : token(Symbol) token(Colon) Type;
Type : token(Symbol) | PointerType;
PointerType : token(Star) token(Const) Type | token(Star) token(Mut) Type;
Block : token(LBrace) many(Statement) token(RBrace);
Statement : ExpressionStatement | ReturnStatement ;
ExpressionStatement : Expression token(Semicolon) ;
ReturnStatement : token(Return) Expression token(Semicolon) ;
Expression : token(Number) | token(String) | FnCall ;
FnCall : token(Symbol) token(LParen) list(Expression, token(Comma)) token(RParen) ;
```

View file

@ -1,57 +0,0 @@
Root<node> : many(FnDecl) token(EOF) {
$$ = ast_create_root($1);
};
FnDecl<node> : token(Fn) token(Symbol) token(LParen) list(ParamDecl, token(Comma)) token(RParen) token(Arrow) Type Block {
$$ = ast_create_fn_decl($2, $4, $7, $8);
} | token(Fn) token(Symbol) token(LParen) list(ParamDecl, token(Comma)) token(RParen) Block {
$$ = ast_create_void_fn_decl($2, $4, $6);
};
ParamDecl<node> : token(Symbol) token(Colon) Type {
$$ = ast_create_param_decl($1, $2);
};
Type<node> : token(Symbol) {
$$ = ast_create_symbol_type($1);
} | PointerType {
$$ = $1;
};
PointerType<node> : token(Star) token(Const) Type {
$$ = ast_create_pointer_type($2, $3);
} | token(Star) token(Mut) Type {
$$ = ast_create_pointer_type($2, $3);
};
Block<node> : token(LBrace) many(Statement) Expression token(RBrace) {
$$ = ast_create_expr_block($2, $3);
} | token(LBrace) many(Statement) token(RBrace) {
$$ = ast_create_block($2);
};
Statement<node> : ExpressionStatement {
$$ = $1;
} | ReturnStatement {
$$ = $1;
};
ExpressionStatement<node> : Expression token(Semicolon) {
$$ = ast_create_expression_statement($1);
};
ReturnStatement<node> : token(Return) Expression token(Semicolon) {
$$ = ast_create_return_statement($2);
};
Expression<node> : token(Number) {
$$ = ast_create_number($1);
} | token(String) {
$$ = ast_create_string($1);
} | FnCall {
$$ = $1;
};
FnCall<node> : token(Symbol) token(LParen) list(Expression, token(Comma)) token(RParen) {
$$ = ast_create_fn_call($1, $3);
};

View file

@ -89,6 +89,8 @@ static int build(const char *arg0, const char *in_file, const char *out_file, Zi
AstNode *root = ast_parse(in_data, tokens); AstNode *root = ast_parse(in_data, tokens);
assert(root); assert(root);
fprintf(stderr, "\nAST:\n");
fprintf(stderr, "------\n");
ast_print(root, 0); ast_print(root, 0);
@ -135,7 +137,7 @@ int main(int argc, char **argv) {
} else { } else {
switch (cmd) { switch (cmd) {
case CmdNone: case CmdNone:
zig_panic("unreachable"); zig_unreachable();
case CmdBuild: case CmdBuild:
if (!in_file) { if (!in_file) {
in_file = arg; in_file = arg;
@ -154,6 +156,6 @@ int main(int argc, char **argv) {
return build(arg0, in_file, out_file, &include_paths); return build(arg0, in_file, out_file, &include_paths);
} }
zig_panic("unreachable"); zig_unreachable();
} }

View file

@ -41,7 +41,7 @@ const char *node_type_str(NodeType node_type) {
case NodeTypeFnCall: case NodeTypeFnCall:
return "FnCall"; return "FnCall";
} }
zig_panic("unreachable"); zig_unreachable();
} }
void ast_print(AstNode *node, int indent) { void ast_print(AstNode *node, int indent) {
@ -82,28 +82,321 @@ void ast_print(AstNode *node, int indent) {
struct ParseContext { struct ParseContext {
Buf *buf; Buf *buf;
AstNode *root; AstNode *root;
ZigList<Token> *tokens;
}; };
AstNode *ast_create_root(void) { static AstNode *ast_create_node(NodeType type) {
zig_panic("TODO create root"); AstNode *node = allocate<AstNode>(1);
node->type = type;
return node;
} }
void ast_invalid_token_error(Buf *buf, Token *token) { static void ast_buf_from_token(ParseContext *pc, Token *token, Buf *buf) {
buf_init_from_mem(buf, buf_ptr(pc->buf) + token->start_pos, token->end_pos - token->start_pos);
}
static void ast_invalid_token_error(ParseContext *pc, Token *token) {
Buf token_value = {0}; Buf token_value = {0};
buf_init_from_mem(&token_value, buf_ptr(buf) + token->start_pos, token->end_pos - token->start_pos); ast_buf_from_token(pc, token, &token_value);
ast_error(token, "invalid token: '%s'", buf_ptr(&token_value)); ast_error(token, "invalid token: '%s'", buf_ptr(&token_value));
} }
void ast_parse_fn_decls(ParseContext *pc, ZigList<AstNode *> *fn_decls) { static AstNode *ast_parse_expression(ParseContext *pc, int token_index, int *new_token_index);
zig_panic("TODO parse fn decls");
static void ast_expect_token(ParseContext *pc, Token *token, TokenId token_id) {
if (token->id != token_id) {
ast_invalid_token_error(pc, token);
}
}
/*
Type : token(Symbol) | PointerType;
PointerType : token(Star) token(Const) Type | token(Star) token(Mut) Type;
*/
static AstNode *ast_parse_type(ParseContext *pc, int token_index, int *new_token_index) {
AstNode *node = ast_create_node(NodeTypeType);
Token *token = &pc->tokens->at(token_index);
token_index += 1;
if (token->id == TokenIdSymbol) {
node->data.type.type = AstNodeTypeTypePrimitive;
ast_buf_from_token(pc, token, &node->data.type.primitive_name);
} else if (token->id == TokenIdStar) {
Token *const_or_mut = &pc->tokens->at(token_index);
token_index += 1;
if (const_or_mut->id == TokenIdKeywordMut) {
node->data.type.is_const = false;
} else if (const_or_mut->id == TokenIdKeywordConst) {
node->data.type.is_const = true;
} else {
ast_invalid_token_error(pc, const_or_mut);
}
node->data.type.child_type = ast_parse_type(pc, token_index, &token_index);
} else {
ast_invalid_token_error(pc, token);
}
*new_token_index = token_index;
return node;
}
/*
ParamDecl<node> : token(Symbol) token(Colon) Type {
};
*/
static AstNode *ast_parse_param_decl(ParseContext *pc, int token_index, int *new_token_index) {
AstNode *node = ast_create_node(NodeTypeParamDecl);
Token *param_name = &pc->tokens->at(token_index);
token_index += 1;
ast_expect_token(pc, param_name, TokenIdSymbol);
ast_buf_from_token(pc, param_name, &node->data.param_decl.name);
Token *colon = &pc->tokens->at(token_index);
token_index += 1;
ast_expect_token(pc, colon, TokenIdColon);
node->data.param_decl.type = ast_parse_type(pc, token_index, &token_index);
*new_token_index = token_index;
return node;
}
static void ast_parse_param_decl_list(ParseContext *pc, int token_index, int *new_token_index,
ZigList<AstNode *> *params)
{
Token *l_paren = &pc->tokens->at(token_index);
token_index += 1;
ast_expect_token(pc, l_paren, TokenIdLParen);
Token *token = &pc->tokens->at(token_index);
if (token->id == TokenIdRParen) {
token_index += 1;
*new_token_index = token_index;
return;
}
for (;;) {
AstNode *param_decl_node = ast_parse_param_decl(pc, token_index, &token_index);
params->append(param_decl_node);
Token *token = &pc->tokens->at(token_index);
token_index += 1;
if (token->id == TokenIdRParen) {
*new_token_index = token_index;
return;
} else {
ast_expect_token(pc, token, TokenIdComma);
}
}
zig_unreachable();
}
static void ast_parse_fn_call_param_list(ParseContext *pc, int token_index, int *new_token_index,
ZigList<AstNode*> *params)
{
Token *l_paren = &pc->tokens->at(token_index);
token_index += 1;
ast_expect_token(pc, l_paren, TokenIdLParen);
Token *token = &pc->tokens->at(token_index);
if (token->id == TokenIdRParen) {
token_index += 1;
*new_token_index = token_index;
return;
}
for (;;) {
AstNode *expr = ast_parse_expression(pc, token_index, &token_index);
params->append(expr);
Token *token = &pc->tokens->at(token_index);
token_index += 1;
if (token->id == TokenIdRParen) {
*new_token_index = token_index;
return;
} else {
ast_expect_token(pc, token, TokenIdComma);
}
}
zig_unreachable();
}
/*
FnCall : token(Symbol) token(LParen) list(Expression, token(Comma)) token(RParen) ;
*/
static AstNode *ast_parse_fn_call(ParseContext *pc, int token_index, int *new_token_index) {
AstNode *node = ast_create_node(NodeTypeFnCall);
Token *fn_name = &pc->tokens->at(token_index);
token_index += 1;
ast_expect_token(pc, fn_name, TokenIdSymbol);
ast_buf_from_token(pc, fn_name, &node->data.fn_call.name);
ast_parse_fn_call_param_list(pc, token_index, &token_index, &node->data.fn_call.params);
*new_token_index = token_index;
return node;
}
static AstNode *ast_parse_expression(ParseContext *pc, int token_index, int *new_token_index) {
AstNode *node = ast_create_node(NodeTypeExpression);
Token *token = &pc->tokens->at(token_index);
if (token->id == TokenIdSymbol) {
node->data.expression.type = AstNodeExpressionTypeFnCall;
node->data.expression.data.fn_call = ast_parse_fn_call(pc, token_index, &token_index);
} else if (token->id == TokenIdNumberLiteral) {
node->data.expression.type = AstNodeExpressionTypeNumber;
ast_buf_from_token(pc, token, &node->data.expression.data.number);
token_index += 1;
} else if (token->id == TokenIdStringLiteral) {
node->data.expression.type = AstNodeExpressionTypeString;
ast_buf_from_token(pc, token, &node->data.expression.data.string);
token_index += 1;
} else {
ast_invalid_token_error(pc, token);
}
*new_token_index = token_index;
return node;
}
/*
Statement : ExpressionStatement | ReturnStatement ;
ExpressionStatement : Expression token(Semicolon) ;
ReturnStatement : token(Return) Expression token(Semicolon) ;
Expression : token(Number) | token(String) | FnCall ;
FnCall : token(Symbol) token(LParen) list(Expression, token(Comma)) token(RParen) ;
*/
static AstNode *ast_parse_statement(ParseContext *pc, int token_index, int *new_token_index) {
AstNode *node = ast_create_node(NodeTypeStatement);
Token *token = &pc->tokens->at(token_index);
if (token->id == TokenIdKeywordReturn) {
token_index += 1;
node->data.statement.type = AstNodeStatementTypeReturn;
node->data.statement.data.retrn.expression = ast_parse_expression(pc, token_index, &token_index);
Token *semicolon = &pc->tokens->at(token_index);
token_index += 1;
ast_expect_token(pc, semicolon, TokenIdSemicolon);
} else if (token->id == TokenIdSymbol ||
token->id == TokenIdStringLiteral ||
token->id == TokenIdNumberLiteral)
{
node->data.statement.type = AstNodeStatementTypeExpression;
node->data.statement.data.expr.expression = ast_parse_expression(pc, token_index, &token_index);
Token *semicolon = &pc->tokens->at(token_index);
token_index += 1;
ast_expect_token(pc, semicolon, TokenIdSemicolon);
} else {
ast_invalid_token_error(pc, token);
}
*new_token_index = token_index;
return node;
}
/*
Block : token(LBrace) many(Statement) token(RBrace);
*/
static AstNode *ast_parse_block(ParseContext *pc, int token_index, int *new_token_index) {
AstNode *node = ast_create_node(NodeTypeBlock);
Token *l_brace = &pc->tokens->at(token_index);
token_index += 1;
ast_expect_token(pc, l_brace, TokenIdLBrace);
for (;;) {
Token *token = &pc->tokens->at(token_index);
if (token->id == TokenIdRBrace) {
token_index += 1;
*new_token_index = token_index;
return node;
} else {
AstNode *statement_node = ast_parse_statement(pc, token_index, &token_index);
node->data.block.statements.append(statement_node);
}
}
zig_unreachable();
}
/*
FnDecl : token(Fn) token(Symbol) ParamDeclList option(token(Arrow) Type) Block;
*/
static AstNode *ast_parse_fn_decl(ParseContext *pc, int token_index, int *new_token_index) {
AstNode *node = ast_create_node(NodeTypeFnDecl);
Token *fn_token = &pc->tokens->at(token_index);
token_index += 1;
ast_expect_token(pc, fn_token, TokenIdKeywordFn);
Token *fn_name = &pc->tokens->at(token_index);
token_index += 1;
ast_expect_token(pc, fn_name, TokenIdSymbol);
ast_buf_from_token(pc, fn_name, &node->data.fn_decl.name);
ast_parse_param_decl_list(pc, token_index, &token_index, &node->data.fn_decl.params);
Token *arrow = &pc->tokens->at(token_index);
token_index += 1;
if (arrow->id == TokenIdArrow) {
node->data.fn_decl.return_type = ast_parse_type(pc, token_index, &token_index);
} else if (arrow->id == TokenIdLBrace) {
node->data.fn_decl.return_type = nullptr;
} else {
ast_invalid_token_error(pc, arrow);
}
node->data.fn_decl.body = ast_parse_block(pc, token_index, &token_index);
*new_token_index = token_index;
return node;
}
static void ast_parse_fn_decl_list(ParseContext *pc, int token_index, ZigList<AstNode *> *fn_decls,
int *new_token_index)
{
for (;;) {
Token *token = &pc->tokens->at(token_index);
if (token->id == TokenIdKeywordFn) {
AstNode *fn_decl_node = ast_parse_fn_decl(pc, token_index, &token_index);
fn_decls->append(fn_decl_node);
} else {
*new_token_index = token_index;
return;
}
}
zig_unreachable();
} }
AstNode *ast_parse(Buf *buf, ZigList<Token> *tokens) { AstNode *ast_parse(Buf *buf, ZigList<Token> *tokens) {
ParseContext pc = {0}; ParseContext pc = {0};
pc.buf = buf; pc.buf = buf;
pc.root = ast_create_root(); pc.root = ast_create_node(NodeTypeRoot);
pc.tokens = tokens;
ast_parse_fn_decls(&pc, &pc.root->data.root.fn_decls); int new_token_index;
ast_parse_fn_decl_list(&pc, 0, &pc.root->data.root.fn_decls, &new_token_index);
if (new_token_index != tokens->length - 1) {
ast_invalid_token_error(&pc, &tokens->at(new_token_index));
}
return pc.root; return pc.root;
} }

View file

@ -44,20 +44,49 @@ enum AstNodeTypeType {
struct AstNodeType { struct AstNodeType {
AstNodeTypeType type; AstNodeTypeType type;
AstNode *child; Buf primitive_name;
}; AstNode *child_type;
bool is_const;
struct AstNodePointerType {
AstNode *const_or_mut;
AstNode *type;
}; };
struct AstNodeBlock { struct AstNodeBlock {
ZigList<AstNode *> expressions; ZigList<AstNode *> statements;
};
enum AstNodeStatementType {
AstNodeStatementTypeExpression,
AstNodeStatementTypeReturn,
};
struct AstNodeStatementExpression {
AstNode *expression;
};
struct AstNodeStatementReturn {
AstNode *expression;
};
struct AstNodeStatement {
AstNodeStatementType type;
union {
AstNodeStatementExpression expr;
AstNodeStatementReturn retrn;
} data;
};
enum AstNodeExpressionType {
AstNodeExpressionTypeNumber,
AstNodeExpressionTypeString,
AstNodeExpressionTypeFnCall,
}; };
struct AstNodeExpression { struct AstNodeExpression {
AstNode *child; AstNodeExpressionType type;
union {
Buf number;
Buf string;
AstNode *fn_call;
} data;
}; };
struct AstNodeFnCall { struct AstNodeFnCall {
@ -74,13 +103,14 @@ struct AstNode {
AstNodeType type; AstNodeType type;
AstNodeParamDecl param_decl; AstNodeParamDecl param_decl;
AstNodeBlock block; AstNodeBlock block;
AstNodeStatement statement;
AstNodeExpression expression; AstNodeExpression expression;
AstNodeFnCall fn_call; AstNodeFnCall fn_call;
} data; } data;
}; };
__attribute__ ((format (printf, 2, 3))) __attribute__ ((format (printf, 2, 3)))
void ast_error(Token *token, const char *format, ...); void ast_token_error(Token *token, const char *format, ...);
void ast_invalid_token_error(Buf *buf, Token *token); void ast_invalid_token_error(Buf *buf, Token *token);
@ -91,6 +121,4 @@ const char *node_type_str(NodeType node_type);
void ast_print(AstNode *node, int indent); void ast_print(AstNode *node, int indent);
AstNode *ast_create_root(void);
#endif #endif

View file

@ -10,7 +10,6 @@
#include "buffer.hpp" #include "buffer.hpp"
/*
enum TokenId { enum TokenId {
TokenIdEof, TokenIdEof,
TokenIdSymbol, TokenIdSymbol,
@ -32,30 +31,6 @@ enum TokenId {
TokenIdArrow, TokenIdArrow,
TokenIdDash, TokenIdDash,
}; };
*/
// TODO: debug delete this
enum TokenId {
TokenIdLParen = 0,
TokenIdRParen = 1,
TokenIdEof = 2,
TokenIdStar = 3,
TokenIdPlus = 4,
TokenIdSymbol,
TokenIdKeywordFn,
TokenIdKeywordReturn,
TokenIdKeywordMut,
TokenIdKeywordConst,
TokenIdComma,
TokenIdLBrace,
TokenIdRBrace,
TokenIdStringLiteral,
TokenIdSemicolon,
TokenIdNumberLiteral,
TokenIdColon,
TokenIdArrow,
TokenIdDash,
};
struct Token { struct Token {
TokenId id; TokenId id;

View file

@ -20,6 +20,12 @@ void zig_panic(const char *format, ...)
__attribute__ ((noreturn)) __attribute__ ((noreturn))
__attribute__ ((format (printf, 1, 2))); __attribute__ ((format (printf, 1, 2)));
__attribute__((cold))
__attribute__ ((noreturn))
static inline void zig_unreachable(void) {
zig_panic("unreachable");
}
template<typename T> template<typename T>
__attribute__((malloc)) static inline T *allocate_nonzero(size_t count) { __attribute__((malloc)) static inline T *allocate_nonzero(size_t count) {
T *ptr = reinterpret_cast<T*>(malloc(count * sizeof(T))); T *ptr = reinterpret_cast<T*>(malloc(count * sizeof(T)));