From 9e0ff6faa2458a431875b38d24f754def450117e Mon Sep 17 00:00:00 2001 From: Josh Wolfe Date: Mon, 30 Nov 2015 09:14:58 -0700 Subject: [PATCH] factor analysis code out of codegen --- CMakeLists.txt | 1 + src/analyze.cpp | 498 ++++++++++++++++++++++++++++++++++++ src/analyze.hpp | 15 ++ src/buffer.cpp | 17 ++ src/buffer.hpp | 17 +- src/codegen.cpp | 581 +----------------------------------------- src/codegen.hpp | 2 - src/main.cpp | 1 + src/semantic_info.hpp | 106 ++++++++ 9 files changed, 643 insertions(+), 595 deletions(-) create mode 100644 src/analyze.cpp create mode 100644 src/analyze.hpp create mode 100644 src/semantic_info.hpp diff --git a/CMakeLists.txt b/CMakeLists.txt index 15e7858352..44e7c43170 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -22,6 +22,7 @@ include_directories( ) set(ZIG_SOURCES + "${CMAKE_SOURCE_DIR}/src/analyze.cpp" "${CMAKE_SOURCE_DIR}/src/buffer.cpp" "${CMAKE_SOURCE_DIR}/src/error.cpp" "${CMAKE_SOURCE_DIR}/src/main.cpp" diff --git a/src/analyze.cpp b/src/analyze.cpp new file mode 100644 index 0000000000..2e456a1d19 --- /dev/null +++ b/src/analyze.cpp @@ -0,0 +1,498 @@ +/* + * Copyright (c) 2015 Andrew Kelley + * + * This file is part of zig, which is MIT licensed. + * See http://opensource.org/licenses/MIT + */ + +#include "analyze.hpp" +#include "semantic_info.hpp" +#include "error.hpp" +#include "zig_llvm.hpp" + +static void add_node_error(CodeGen *g, AstNode *node, Buf *msg) { + g->errors.add_one(); + ErrorMsg *last_msg = &g->errors.last(); + last_msg->line_start = node->line; + last_msg->column_start = node->column; + last_msg->line_end = -1; + last_msg->column_end = -1; + last_msg->msg = msg; +} + +static int parse_version_string(Buf *buf, int *major, int *minor, int *patch) { + char *dot1 = strstr(buf_ptr(buf), "."); + if (!dot1) + return ErrorInvalidFormat; + char *dot2 = strstr(dot1 + 1, "."); + if (!dot2) + return ErrorInvalidFormat; + + *major = (int)strtol(buf_ptr(buf), nullptr, 10); + *minor = (int)strtol(dot1 + 1, nullptr, 10); + *patch = (int)strtol(dot2 + 1, nullptr, 10); + + return ErrorNone; +} + +static void set_root_export_version(CodeGen *g, Buf *version_buf, AstNode *node) { + int err; + if ((err = parse_version_string(version_buf, &g->version_major, &g->version_minor, &g->version_patch))) { + add_node_error(g, node, + buf_sprintf("invalid version string")); + } +} + +static void find_declarations(CodeGen *g, AstNode *node); + +static void resolve_type_and_recurse(CodeGen *g, AstNode *node) { + assert(!node->codegen_node); + node->codegen_node = allocate(1); + TypeNode *type_node = &node->codegen_node->data.type_node; + switch (node->data.type.type) { + case AstNodeTypeTypePrimitive: + { + Buf *name = &node->data.type.primitive_name; + auto table_entry = g->type_table.maybe_get(name); + if (table_entry) { + type_node->entry = table_entry->value; + } else { + add_node_error(g, node, + buf_sprintf("invalid type name: '%s'", buf_ptr(name))); + type_node->entry = g->invalid_type_entry; + } + break; + } + case AstNodeTypeTypePointer: + { + find_declarations(g, node->data.type.child_type); + TypeNode *child_type_node = &node->data.type.child_type->codegen_node->data.type_node; + if (child_type_node->entry->id == TypeIdUnreachable) { + add_node_error(g, node, + buf_create_from_str("pointer to unreachable not allowed")); + } + TypeTableEntry **parent_pointer = node->data.type.is_const ? + &child_type_node->entry->pointer_const_parent : + &child_type_node->entry->pointer_mut_parent; + const char *const_or_mut_str = node->data.type.is_const ? "const" : "mut"; + if (*parent_pointer) { + type_node->entry = *parent_pointer; + } else { + TypeTableEntry *entry = allocate(1); + entry->id = TypeIdPointer; + entry->type_ref = LLVMPointerType(child_type_node->entry->type_ref, 0); + buf_resize(&entry->name, 0); + buf_appendf(&entry->name, "*%s %s", const_or_mut_str, buf_ptr(&child_type_node->entry->name)); + entry->di_type = g->dbuilder->createPointerType(child_type_node->entry->di_type, + g->pointer_size_bytes * 8, g->pointer_size_bytes * 8, buf_ptr(&entry->name)); + g->type_table.put(&entry->name, entry); + type_node->entry = entry; + *parent_pointer = entry; + } + break; + } + } +} + +static void find_declarations(CodeGen *g, AstNode *node) { + switch (node->type) { + case NodeTypeExternBlock: + for (int i = 0; i < node->data.extern_block.directives->length; i += 1) { + AstNode *directive_node = node->data.extern_block.directives->at(i); + Buf *name = &directive_node->data.directive.name; + Buf *param = &directive_node->data.directive.param; + if (buf_eql_str(name, "link")) { + g->link_table.put(param, true); + } else { + add_node_error(g, directive_node, + buf_sprintf("invalid directive: '%s'", buf_ptr(name))); + } + } + + for (int fn_decl_i = 0; fn_decl_i < node->data.extern_block.fn_decls.length; fn_decl_i += 1) { + AstNode *fn_decl = node->data.extern_block.fn_decls.at(fn_decl_i); + assert(fn_decl->type == NodeTypeFnDecl); + AstNode *fn_proto = fn_decl->data.fn_decl.fn_proto; + find_declarations(g, fn_proto); + Buf *name = &fn_proto->data.fn_proto.name; + + FnTableEntry *fn_table_entry = allocate(1); + fn_table_entry->proto_node = fn_proto; + fn_table_entry->is_extern = true; + fn_table_entry->calling_convention = LLVMCCallConv; + g->fn_table.put(name, fn_table_entry); + } + break; + case NodeTypeFnDef: + { + AstNode *proto_node = node->data.fn_def.fn_proto; + assert(proto_node->type == NodeTypeFnProto); + Buf *proto_name = &proto_node->data.fn_proto.name; + auto entry = g->fn_table.maybe_get(proto_name); + if (entry) { + add_node_error(g, node, + buf_sprintf("redefinition of '%s'", buf_ptr(proto_name))); + assert(!node->codegen_node); + node->codegen_node = allocate(1); + node->codegen_node->data.fn_def_node.skip = true; + } else { + FnTableEntry *fn_table_entry = allocate(1); + fn_table_entry->proto_node = proto_node; + fn_table_entry->fn_def_node = node; + fn_table_entry->internal_linkage = proto_node->data.fn_proto.visib_mod != FnProtoVisibModExport; + if (fn_table_entry->internal_linkage) { + fn_table_entry->calling_convention = LLVMFastCallConv; + } else { + fn_table_entry->calling_convention = LLVMCCallConv; + } + g->fn_table.put(proto_name, fn_table_entry); + g->fn_defs.append(fn_table_entry); + + find_declarations(g, proto_node); + } + break; + } + case NodeTypeFnProto: + { + for (int i = 0; i < node->data.fn_proto.directives->length; i += 1) { + AstNode *directive_node = node->data.fn_proto.directives->at(i); + Buf *name = &directive_node->data.directive.name; + add_node_error(g, directive_node, + buf_sprintf("invalid directive: '%s'", buf_ptr(name))); + } + for (int i = 0; i < node->data.fn_proto.params.length; i += 1) { + AstNode *child = node->data.fn_proto.params.at(i); + find_declarations(g, child); + } + find_declarations(g, node->data.fn_proto.return_type); + break; + } + break; + case NodeTypeParamDecl: + find_declarations(g, node->data.param_decl.type); + break; + case NodeTypeType: + resolve_type_and_recurse(g, node); + break; + case NodeTypeDirective: + // we handled directives in the parent function + break; + case NodeTypeRootExportDecl: + for (int i = 0; i < node->data.root_export_decl.directives->length; i += 1) { + AstNode *directive_node = node->data.root_export_decl.directives->at(i); + Buf *name = &directive_node->data.directive.name; + Buf *param = &directive_node->data.directive.param; + if (buf_eql_str(name, "version")) { + set_root_export_version(g, param, directive_node); + } else { + add_node_error(g, directive_node, + buf_sprintf("invalid directive: '%s'", buf_ptr(name))); + } + } + break; + case NodeTypeFnDecl: + case NodeTypeReturnExpr: + case NodeTypeRoot: + case NodeTypeBlock: + case NodeTypeBinOpExpr: + case NodeTypeFnCallExpr: + case NodeTypeNumberLiteral: + case NodeTypeStringLiteral: + case NodeTypeUnreachable: + case NodeTypeSymbol: + case NodeTypeCastExpr: + case NodeTypePrefixOpExpr: + zig_unreachable(); + } +} + +static void check_fn_def_control_flow(CodeGen *g, AstNode *node) { + // Follow the execution flow and make sure the code returns appropriately. + // * A `return` statement in an unreachable type function should be an error. + // * Control flow should not be able to reach the end of an unreachable type function. + // * Functions that have a type other than void should not return without a value. + // * void functions without explicit return statements at the end need the + // add_implicit_return flag set on the codegen node. + assert(node->type == NodeTypeFnDef); + AstNode *proto_node = node->data.fn_def.fn_proto; + assert(proto_node->type == NodeTypeFnProto); + AstNode *return_type_node = proto_node->data.fn_proto.return_type; + assert(return_type_node->type == NodeTypeType); + + node->codegen_node = allocate(1); + FnDefNode *codegen_fn_def = &node->codegen_node->data.fn_def_node; + + assert(return_type_node->codegen_node); + TypeTableEntry *type_entry = return_type_node->codegen_node->data.type_node.entry; + assert(type_entry); + TypeId type_id = type_entry->id; + + AstNode *body_node = node->data.fn_def.body; + assert(body_node->type == NodeTypeBlock); + + // TODO once we understand types, do this pass after type checking, and + // if an expression has an unreachable value then stop looking at statements after + // it. then we can remove the check to `unreachable` in the end of this function. + bool prev_statement_return = false; + for (int i = 0; i < body_node->data.block.statements.length; i += 1) { + AstNode *statement_node = body_node->data.block.statements.at(i); + if (statement_node->type == NodeTypeReturnExpr) { + if (type_id == TypeIdUnreachable) { + add_node_error(g, statement_node, + buf_sprintf("return statement in function with unreachable return type")); + return; + } else { + prev_statement_return = true; + } + } else if (prev_statement_return) { + add_node_error(g, statement_node, + buf_sprintf("unreachable code")); + } + } + + if (!prev_statement_return) { + if (type_id == TypeIdVoid) { + codegen_fn_def->add_implicit_return = true; + } else if (type_id != TypeIdUnreachable) { + add_node_error(g, node, + buf_sprintf("control reaches end of non-void function")); + } + } +} + +static void analyze_node(CodeGen *g, AstNode *node) { + switch (node->type) { + case NodeTypeRoot: + { + // Iterate once over the top level declarations to build the function table + for (int i = 0; i < node->data.root.top_level_decls.length; i += 1) { + AstNode *child = node->data.root.top_level_decls.at(i); + find_declarations(g, child); + } + for (int i = 0; i < node->data.root.top_level_decls.length; i += 1) { + AstNode *child = node->data.root.top_level_decls.at(i); + analyze_node(g, child); + } + if (!g->out_name) { + add_node_error(g, node, + buf_sprintf("missing export declaration and output name not provided")); + } else if (g->out_type == OutTypeUnknown) { + add_node_error(g, node, + buf_sprintf("missing export declaration and export type not provided")); + } + break; + } + case NodeTypeRootExportDecl: + if (g->root_export_decl) { + add_node_error(g, node, + buf_sprintf("only one root export declaration allowed")); + } else { + g->root_export_decl = node; + + if (!g->out_name) + g->out_name = &node->data.root_export_decl.name; + + Buf *out_type = &node->data.root_export_decl.type; + OutType export_out_type; + if (buf_eql_str(out_type, "executable")) { + export_out_type = OutTypeExe; + } else if (buf_eql_str(out_type, "library")) { + export_out_type = OutTypeLib; + } else if (buf_eql_str(out_type, "object")) { + export_out_type = OutTypeObj; + } else { + add_node_error(g, node, + buf_sprintf("invalid export type: '%s'", buf_ptr(out_type))); + } + if (g->out_type == OutTypeUnknown) + g->out_type = export_out_type; + } + break; + case NodeTypeExternBlock: + for (int fn_decl_i = 0; fn_decl_i < node->data.extern_block.fn_decls.length; fn_decl_i += 1) { + AstNode *fn_decl = node->data.extern_block.fn_decls.at(fn_decl_i); + analyze_node(g, fn_decl); + } + break; + case NodeTypeFnDef: + { + if (node->codegen_node && node->codegen_node->data.fn_def_node.skip) { + // we detected an error with this function definition which prevents us + // from further analyzing it. + break; + } + + AstNode *proto_node = node->data.fn_def.fn_proto; + assert(proto_node->type == NodeTypeFnProto); + analyze_node(g, proto_node); + + check_fn_def_control_flow(g, node); + analyze_node(g, node->data.fn_def.body); + break; + } + case NodeTypeFnDecl: + { + AstNode *proto_node = node->data.fn_decl.fn_proto; + assert(proto_node->type == NodeTypeFnProto); + analyze_node(g, proto_node); + break; + } + case NodeTypeFnProto: + { + for (int i = 0; i < node->data.fn_proto.params.length; i += 1) { + AstNode *child = node->data.fn_proto.params.at(i); + analyze_node(g, child); + } + analyze_node(g, node->data.fn_proto.return_type); + break; + } + case NodeTypeParamDecl: + analyze_node(g, node->data.param_decl.type); + break; + + case NodeTypeType: + // ignore; we handled types with find_declarations + break; + case NodeTypeBlock: + for (int i = 0; i < node->data.block.statements.length; i += 1) { + AstNode *child = node->data.block.statements.at(i); + analyze_node(g, child); + } + break; + case NodeTypeReturnExpr: + if (node->data.return_expr.expr) { + analyze_node(g, node->data.return_expr.expr); + } + break; + case NodeTypeBinOpExpr: + analyze_node(g, node->data.bin_op_expr.op1); + analyze_node(g, node->data.bin_op_expr.op2); + break; + case NodeTypeFnCallExpr: + { + Buf *name = hack_get_fn_call_name(g, node->data.fn_call_expr.fn_ref_expr); + + auto entry = g->fn_table.maybe_get(name); + if (!entry) { + add_node_error(g, node, + buf_sprintf("undefined function: '%s'", buf_ptr(name))); + } else { + FnTableEntry *fn_table_entry = entry->value; + assert(fn_table_entry->proto_node->type == NodeTypeFnProto); + int expected_param_count = fn_table_entry->proto_node->data.fn_proto.params.length; + int actual_param_count = node->data.fn_call_expr.params.length; + if (expected_param_count != actual_param_count) { + add_node_error(g, node, + buf_sprintf("wrong number of arguments. Expected %d, got %d.", + expected_param_count, actual_param_count)); + } + } + + for (int i = 0; i < node->data.fn_call_expr.params.length; i += 1) { + AstNode *child = node->data.fn_call_expr.params.at(i); + analyze_node(g, child); + } + break; + } + case NodeTypeDirective: + // we looked at directives in the parent node + break; + case NodeTypeCastExpr: + zig_panic("TODO"); + break; + case NodeTypePrefixOpExpr: + zig_panic("TODO"); + break; + case NodeTypeNumberLiteral: + case NodeTypeStringLiteral: + case NodeTypeUnreachable: + case NodeTypeSymbol: + // nothing to do + break; + } +} + +static void add_types(CodeGen *g) { + { + TypeTableEntry *entry = allocate(1); + entry->id = TypeIdU8; + entry->type_ref = LLVMInt8Type(); + buf_init_from_str(&entry->name, "u8"); + entry->di_type = g->dbuilder->createBasicType(buf_ptr(&entry->name), 8, 8, llvm::dwarf::DW_ATE_unsigned); + g->type_table.put(&entry->name, entry); + } + { + TypeTableEntry *entry = allocate(1); + entry->id = TypeIdI32; + entry->type_ref = LLVMInt32Type(); + buf_init_from_str(&entry->name, "i32"); + entry->di_type = g->dbuilder->createBasicType(buf_ptr(&entry->name), 32, 32, + llvm::dwarf::DW_ATE_signed); + g->type_table.put(&entry->name, entry); + } + { + TypeTableEntry *entry = allocate(1); + entry->id = TypeIdVoid; + entry->type_ref = LLVMVoidType(); + buf_init_from_str(&entry->name, "void"); + entry->di_type = g->dbuilder->createBasicType(buf_ptr(&entry->name), 0, 0, + llvm::dwarf::DW_ATE_unsigned); + g->type_table.put(&entry->name, entry); + + // invalid types are void + g->invalid_type_entry = entry; + } + { + TypeTableEntry *entry = allocate(1); + entry->id = TypeIdUnreachable; + entry->type_ref = LLVMVoidType(); + buf_init_from_str(&entry->name, "unreachable"); + entry->di_type = g->invalid_type_entry->di_type; + g->type_table.put(&entry->name, entry); + } +} + + +void semantic_analyze(CodeGen *g) { + LLVMInitializeAllTargets(); + LLVMInitializeAllTargetMCs(); + LLVMInitializeAllAsmPrinters(); + LLVMInitializeAllAsmParsers(); + LLVMInitializeNativeTarget(); + + g->is_native_target = true; + char *native_triple = LLVMGetDefaultTargetTriple(); + + LLVMTargetRef target_ref; + char *err_msg = nullptr; + if (LLVMGetTargetFromTriple(native_triple, &target_ref, &err_msg)) { + zig_panic("unable to get target from triple: %s", err_msg); + } + + char *native_cpu = LLVMZigGetHostCPUName(); + char *native_features = LLVMZigGetNativeFeatures(); + + LLVMCodeGenOptLevel opt_level = (g->build_type == CodeGenBuildTypeDebug) ? + LLVMCodeGenLevelNone : LLVMCodeGenLevelAggressive; + + LLVMRelocMode reloc_mode = g->is_static ? LLVMRelocStatic : LLVMRelocPIC; + + g->target_machine = LLVMCreateTargetMachine(target_ref, native_triple, + native_cpu, native_features, opt_level, reloc_mode, LLVMCodeModelDefault); + + g->target_data_ref = LLVMGetTargetMachineData(g->target_machine); + + + g->module = LLVMModuleCreateWithName("ZigModule"); + + g->pointer_size_bytes = LLVMPointerSize(g->target_data_ref); + + g->builder = LLVMCreateBuilder(); + g->dbuilder = new llvm::DIBuilder(*llvm::unwrap(g->module), true); + + + add_types(g); + + analyze_node(g, g->root); +} + diff --git a/src/analyze.hpp b/src/analyze.hpp new file mode 100644 index 0000000000..0dca23194d --- /dev/null +++ b/src/analyze.hpp @@ -0,0 +1,15 @@ +/* + * Copyright (c) 2015 Andrew Kelley + * + * This file is part of zig, which is MIT licensed. + * See http://opensource.org/licenses/MIT + */ + +#ifndef ZIG_ANALYZE_HPP +#define ZIG_ANALYZE_HPP + +struct CodeGen; + +void semantic_analyze(CodeGen *g); + +#endif diff --git a/src/buffer.cpp b/src/buffer.cpp index 6607bb389c..1978371034 100644 --- a/src/buffer.cpp +++ b/src/buffer.cpp @@ -45,3 +45,20 @@ void buf_appendf(Buf *buf, const char *format, ...) { va_end(ap2); va_end(ap); } + +// these functions are not static inline so they can be better used as template parameters +bool buf_eql_buf(Buf *buf, Buf *other) { + assert(buf->list.length); + return buf_eql_mem(buf, buf_ptr(other), buf_len(other)); +} + +uint32_t buf_hash(Buf *buf) { + assert(buf->list.length); + // FNV 32-bit hash + uint32_t h = 2166136261; + for (int i = 0; i < buf_len(buf); i += 1) { + h = h ^ ((uint8_t)buf->list.at(i)); + h = h * 16777619; + } + return h; +} diff --git a/src/buffer.hpp b/src/buffer.hpp index d95036b2b0..f9b2497548 100644 --- a/src/buffer.hpp +++ b/src/buffer.hpp @@ -132,21 +132,8 @@ static inline bool buf_eql_str(Buf *buf, const char *str) { return buf_eql_mem(buf, str, strlen(str)); } -static inline bool buf_eql_buf(Buf *buf, Buf *other) { - assert(buf->list.length); - return buf_eql_mem(buf, buf_ptr(other), buf_len(other)); -} - -static inline uint32_t buf_hash(Buf *buf) { - assert(buf->list.length); - // FNV 32-bit hash - uint32_t h = 2166136261; - for (int i = 0; i < buf_len(buf); i += 1) { - h = h ^ ((uint8_t)buf->list.at(i)); - h = h * 16777619; - } - return h; -} +bool buf_eql_buf(Buf *buf, Buf *other); +uint32_t buf_hash(Buf *buf); static inline void buf_upcase(Buf *buf) { for (int i = 0; i < buf_len(buf); i += 1) { diff --git a/src/codegen.cpp b/src/codegen.cpp index a768d52122..572769817a 100644 --- a/src/codegen.cpp +++ b/src/codegen.cpp @@ -12,6 +12,8 @@ #include "config.h" #include "error.hpp" +#include "semantic_info.hpp" + #include #include @@ -21,88 +23,6 @@ #include #include -struct FnTableEntry { - LLVMValueRef fn_value; - AstNode *proto_node; - AstNode *fn_def_node; - bool is_extern; - bool internal_linkage; - unsigned calling_convention; -}; - -enum TypeId { - TypeIdUserDefined, - TypeIdPointer, - TypeIdU8, - TypeIdI32, - TypeIdVoid, - TypeIdUnreachable, -}; - -struct TypeTableEntry { - TypeId id; - LLVMTypeRef type_ref; - llvm::DIType *di_type; - - TypeTableEntry *pointer_child; - bool pointer_is_const; - int user_defined_id; - Buf name; - TypeTableEntry *pointer_const_parent; - TypeTableEntry *pointer_mut_parent; -}; - -struct CodeGen { - LLVMModuleRef module; - AstNode *root; - ZigList errors; - LLVMBuilderRef builder; - llvm::DIBuilder *dbuilder; - llvm::DICompileUnit *compile_unit; - HashMap fn_table; - HashMap str_table; - HashMap type_table; - HashMap link_table; - TypeTableEntry *invalid_type_entry; - LLVMTargetDataRef target_data_ref; - unsigned pointer_size_bytes; - bool is_static; - bool strip_debug_symbols; - CodeGenBuildType build_type; - LLVMTargetMachineRef target_machine; - bool is_native_target; - Buf in_file; - Buf in_dir; - ZigList block_scopes; - llvm::DIFile *di_file; - ZigList fn_defs; - Buf *out_name; - OutType out_type; - FnTableEntry *cur_fn; - bool c_stdint_used; - AstNode *root_export_decl; - int version_major; - int version_minor; - int version_patch; -}; - -struct TypeNode { - TypeTableEntry *entry; -}; - -struct FnDefNode { - bool add_implicit_return; - bool skip; - LLVMValueRef *params; -}; - -struct CodeGenNode { - union { - TypeNode type_node; // for NodeTypeType - FnDefNode fn_def_node; // for NodeTypeFnDef - } data; -}; - CodeGen *create_codegen(AstNode *root, Buf *in_full_path) { CodeGen *g = allocate(1); g->root = root; @@ -140,15 +60,7 @@ void codegen_set_out_name(CodeGen *g, Buf *out_name) { g->out_name = out_name; } -static void add_node_error(CodeGen *g, AstNode *node, Buf *msg) { - g->errors.add_one(); - ErrorMsg *last_msg = &g->errors.last(); - last_msg->line_start = node->line; - last_msg->column_start = node->column; - last_msg->line_end = -1; - last_msg->column_end = -1; - last_msg->msg = msg; -} +static LLVMValueRef gen_expr(CodeGen *g, AstNode *expr_node); static LLVMTypeRef to_llvm_type(AstNode *type_node) { assert(type_node->type == NodeTypeType); @@ -166,7 +78,6 @@ static llvm::DIType *to_llvm_debug_type(AstNode *type_node) { return type_node->codegen_node->data.type_node.entry->di_type; } - static bool type_is_unreachable(AstNode *type_node) { assert(type_node->type == NodeTypeType); assert(type_node->codegen_node); @@ -174,492 +85,6 @@ static bool type_is_unreachable(AstNode *type_node) { return type_node->codegen_node->data.type_node.entry->id == TypeIdUnreachable; } - -static int parse_version_string(Buf *buf, int *major, int *minor, int *patch) { - char *dot1 = strstr(buf_ptr(buf), "."); - if (!dot1) - return ErrorInvalidFormat; - char *dot2 = strstr(dot1 + 1, "."); - if (!dot2) - return ErrorInvalidFormat; - - *major = (int)strtol(buf_ptr(buf), nullptr, 10); - *minor = (int)strtol(dot1 + 1, nullptr, 10); - *patch = (int)strtol(dot2 + 1, nullptr, 10); - - return ErrorNone; -} - -static void set_root_export_version(CodeGen *g, Buf *version_buf, AstNode *node) { - int err; - if ((err = parse_version_string(version_buf, &g->version_major, &g->version_minor, &g->version_patch))) { - add_node_error(g, node, - buf_sprintf("invalid version string")); - } -} - -static void find_declarations(CodeGen *g, AstNode *node); - -static void resolve_type_and_recurse(CodeGen *g, AstNode *node) { - assert(!node->codegen_node); - node->codegen_node = allocate(1); - TypeNode *type_node = &node->codegen_node->data.type_node; - switch (node->data.type.type) { - case AstNodeTypeTypePrimitive: - { - Buf *name = &node->data.type.primitive_name; - auto table_entry = g->type_table.maybe_get(name); - if (table_entry) { - type_node->entry = table_entry->value; - } else { - add_node_error(g, node, - buf_sprintf("invalid type name: '%s'", buf_ptr(name))); - type_node->entry = g->invalid_type_entry; - } - break; - } - case AstNodeTypeTypePointer: - { - find_declarations(g, node->data.type.child_type); - TypeNode *child_type_node = &node->data.type.child_type->codegen_node->data.type_node; - if (child_type_node->entry->id == TypeIdUnreachable) { - add_node_error(g, node, - buf_create_from_str("pointer to unreachable not allowed")); - } - TypeTableEntry **parent_pointer = node->data.type.is_const ? - &child_type_node->entry->pointer_const_parent : - &child_type_node->entry->pointer_mut_parent; - const char *const_or_mut_str = node->data.type.is_const ? "const" : "mut"; - if (*parent_pointer) { - type_node->entry = *parent_pointer; - } else { - TypeTableEntry *entry = allocate(1); - entry->id = TypeIdPointer; - entry->type_ref = LLVMPointerType(child_type_node->entry->type_ref, 0); - buf_resize(&entry->name, 0); - buf_appendf(&entry->name, "*%s %s", const_or_mut_str, buf_ptr(&child_type_node->entry->name)); - entry->di_type = g->dbuilder->createPointerType(child_type_node->entry->di_type, - g->pointer_size_bytes * 8, g->pointer_size_bytes * 8, buf_ptr(&entry->name)); - g->type_table.put(&entry->name, entry); - type_node->entry = entry; - *parent_pointer = entry; - } - break; - } - } -} - -static void find_declarations(CodeGen *g, AstNode *node) { - switch (node->type) { - case NodeTypeExternBlock: - for (int i = 0; i < node->data.extern_block.directives->length; i += 1) { - AstNode *directive_node = node->data.extern_block.directives->at(i); - Buf *name = &directive_node->data.directive.name; - Buf *param = &directive_node->data.directive.param; - if (buf_eql_str(name, "link")) { - g->link_table.put(param, true); - } else { - add_node_error(g, directive_node, - buf_sprintf("invalid directive: '%s'", buf_ptr(name))); - } - } - - for (int fn_decl_i = 0; fn_decl_i < node->data.extern_block.fn_decls.length; fn_decl_i += 1) { - AstNode *fn_decl = node->data.extern_block.fn_decls.at(fn_decl_i); - assert(fn_decl->type == NodeTypeFnDecl); - AstNode *fn_proto = fn_decl->data.fn_decl.fn_proto; - find_declarations(g, fn_proto); - Buf *name = &fn_proto->data.fn_proto.name; - - FnTableEntry *fn_table_entry = allocate(1); - fn_table_entry->proto_node = fn_proto; - fn_table_entry->is_extern = true; - fn_table_entry->calling_convention = LLVMCCallConv; - g->fn_table.put(name, fn_table_entry); - } - break; - case NodeTypeFnDef: - { - AstNode *proto_node = node->data.fn_def.fn_proto; - assert(proto_node->type == NodeTypeFnProto); - Buf *proto_name = &proto_node->data.fn_proto.name; - auto entry = g->fn_table.maybe_get(proto_name); - if (entry) { - add_node_error(g, node, - buf_sprintf("redefinition of '%s'", buf_ptr(proto_name))); - assert(!node->codegen_node); - node->codegen_node = allocate(1); - node->codegen_node->data.fn_def_node.skip = true; - } else { - FnTableEntry *fn_table_entry = allocate(1); - fn_table_entry->proto_node = proto_node; - fn_table_entry->fn_def_node = node; - fn_table_entry->internal_linkage = proto_node->data.fn_proto.visib_mod != FnProtoVisibModExport; - if (fn_table_entry->internal_linkage) { - fn_table_entry->calling_convention = LLVMFastCallConv; - } else { - fn_table_entry->calling_convention = LLVMCCallConv; - } - g->fn_table.put(proto_name, fn_table_entry); - g->fn_defs.append(fn_table_entry); - - find_declarations(g, proto_node); - } - break; - } - case NodeTypeFnProto: - { - for (int i = 0; i < node->data.fn_proto.directives->length; i += 1) { - AstNode *directive_node = node->data.fn_proto.directives->at(i); - Buf *name = &directive_node->data.directive.name; - add_node_error(g, directive_node, - buf_sprintf("invalid directive: '%s'", buf_ptr(name))); - } - for (int i = 0; i < node->data.fn_proto.params.length; i += 1) { - AstNode *child = node->data.fn_proto.params.at(i); - find_declarations(g, child); - } - find_declarations(g, node->data.fn_proto.return_type); - break; - } - break; - case NodeTypeParamDecl: - find_declarations(g, node->data.param_decl.type); - break; - case NodeTypeType: - resolve_type_and_recurse(g, node); - break; - case NodeTypeDirective: - // we handled directives in the parent function - break; - case NodeTypeRootExportDecl: - for (int i = 0; i < node->data.root_export_decl.directives->length; i += 1) { - AstNode *directive_node = node->data.root_export_decl.directives->at(i); - Buf *name = &directive_node->data.directive.name; - Buf *param = &directive_node->data.directive.param; - if (buf_eql_str(name, "version")) { - set_root_export_version(g, param, directive_node); - } else { - add_node_error(g, directive_node, - buf_sprintf("invalid directive: '%s'", buf_ptr(name))); - } - } - break; - case NodeTypeFnDecl: - case NodeTypeReturnExpr: - case NodeTypeRoot: - case NodeTypeBlock: - case NodeTypeBinOpExpr: - case NodeTypeFnCallExpr: - case NodeTypeNumberLiteral: - case NodeTypeStringLiteral: - case NodeTypeUnreachable: - case NodeTypeSymbol: - case NodeTypeCastExpr: - case NodeTypePrefixOpExpr: - zig_unreachable(); - } -} - -static void check_fn_def_control_flow(CodeGen *g, AstNode *node) { - // Follow the execution flow and make sure the code returns appropriately. - // * A `return` statement in an unreachable type function should be an error. - // * Control flow should not be able to reach the end of an unreachable type function. - // * Functions that have a type other than void should not return without a value. - // * void functions without explicit return statements at the end need the - // add_implicit_return flag set on the codegen node. - assert(node->type == NodeTypeFnDef); - AstNode *proto_node = node->data.fn_def.fn_proto; - assert(proto_node->type == NodeTypeFnProto); - AstNode *return_type_node = proto_node->data.fn_proto.return_type; - assert(return_type_node->type == NodeTypeType); - - node->codegen_node = allocate(1); - FnDefNode *codegen_fn_def = &node->codegen_node->data.fn_def_node; - - assert(return_type_node->codegen_node); - TypeTableEntry *type_entry = return_type_node->codegen_node->data.type_node.entry; - assert(type_entry); - TypeId type_id = type_entry->id; - - AstNode *body_node = node->data.fn_def.body; - assert(body_node->type == NodeTypeBlock); - - // TODO once we understand types, do this pass after type checking, and - // if an expression has an unreachable value then stop looking at statements after - // it. then we can remove the check to `unreachable` in the end of this function. - bool prev_statement_return = false; - for (int i = 0; i < body_node->data.block.statements.length; i += 1) { - AstNode *statement_node = body_node->data.block.statements.at(i); - if (statement_node->type == NodeTypeReturnExpr) { - if (type_id == TypeIdUnreachable) { - add_node_error(g, statement_node, - buf_sprintf("return statement in function with unreachable return type")); - return; - } else { - prev_statement_return = true; - } - } else if (prev_statement_return) { - add_node_error(g, statement_node, - buf_sprintf("unreachable code")); - } - } - - if (!prev_statement_return) { - if (type_id == TypeIdVoid) { - codegen_fn_def->add_implicit_return = true; - } else if (type_id != TypeIdUnreachable) { - add_node_error(g, node, - buf_sprintf("control reaches end of non-void function")); - } - } -} - -static Buf *hack_get_fn_call_name(CodeGen *g, AstNode *node) { - // Assume that the expression evaluates to a simple name and return the buf - // TODO after type checking works we should be able to remove this hack - assert(node->type == NodeTypeSymbol); - return &node->data.symbol; -} - -static void analyze_node(CodeGen *g, AstNode *node) { - switch (node->type) { - case NodeTypeRoot: - { - // Iterate once over the top level declarations to build the function table - for (int i = 0; i < node->data.root.top_level_decls.length; i += 1) { - AstNode *child = node->data.root.top_level_decls.at(i); - find_declarations(g, child); - } - for (int i = 0; i < node->data.root.top_level_decls.length; i += 1) { - AstNode *child = node->data.root.top_level_decls.at(i); - analyze_node(g, child); - } - if (!g->out_name) { - add_node_error(g, node, - buf_sprintf("missing export declaration and output name not provided")); - } else if (g->out_type == OutTypeUnknown) { - add_node_error(g, node, - buf_sprintf("missing export declaration and export type not provided")); - } - break; - } - case NodeTypeRootExportDecl: - if (g->root_export_decl) { - add_node_error(g, node, - buf_sprintf("only one root export declaration allowed")); - } else { - g->root_export_decl = node; - - if (!g->out_name) - g->out_name = &node->data.root_export_decl.name; - - Buf *out_type = &node->data.root_export_decl.type; - OutType export_out_type; - if (buf_eql_str(out_type, "executable")) { - export_out_type = OutTypeExe; - } else if (buf_eql_str(out_type, "library")) { - export_out_type = OutTypeLib; - } else if (buf_eql_str(out_type, "object")) { - export_out_type = OutTypeObj; - } else { - add_node_error(g, node, - buf_sprintf("invalid export type: '%s'", buf_ptr(out_type))); - } - if (g->out_type == OutTypeUnknown) - g->out_type = export_out_type; - } - break; - case NodeTypeExternBlock: - for (int fn_decl_i = 0; fn_decl_i < node->data.extern_block.fn_decls.length; fn_decl_i += 1) { - AstNode *fn_decl = node->data.extern_block.fn_decls.at(fn_decl_i); - analyze_node(g, fn_decl); - } - break; - case NodeTypeFnDef: - { - if (node->codegen_node && node->codegen_node->data.fn_def_node.skip) { - // we detected an error with this function definition which prevents us - // from further analyzing it. - break; - } - - AstNode *proto_node = node->data.fn_def.fn_proto; - assert(proto_node->type == NodeTypeFnProto); - analyze_node(g, proto_node); - - check_fn_def_control_flow(g, node); - analyze_node(g, node->data.fn_def.body); - break; - } - case NodeTypeFnDecl: - { - AstNode *proto_node = node->data.fn_decl.fn_proto; - assert(proto_node->type == NodeTypeFnProto); - analyze_node(g, proto_node); - break; - } - case NodeTypeFnProto: - { - for (int i = 0; i < node->data.fn_proto.params.length; i += 1) { - AstNode *child = node->data.fn_proto.params.at(i); - analyze_node(g, child); - } - analyze_node(g, node->data.fn_proto.return_type); - break; - } - case NodeTypeParamDecl: - analyze_node(g, node->data.param_decl.type); - break; - - case NodeTypeType: - // ignore; we handled types with find_declarations - break; - case NodeTypeBlock: - for (int i = 0; i < node->data.block.statements.length; i += 1) { - AstNode *child = node->data.block.statements.at(i); - analyze_node(g, child); - } - break; - case NodeTypeReturnExpr: - if (node->data.return_expr.expr) { - analyze_node(g, node->data.return_expr.expr); - } - break; - case NodeTypeBinOpExpr: - analyze_node(g, node->data.bin_op_expr.op1); - analyze_node(g, node->data.bin_op_expr.op2); - break; - case NodeTypeFnCallExpr: - { - Buf *name = hack_get_fn_call_name(g, node->data.fn_call_expr.fn_ref_expr); - - auto entry = g->fn_table.maybe_get(name); - if (!entry) { - add_node_error(g, node, - buf_sprintf("undefined function: '%s'", buf_ptr(name))); - } else { - FnTableEntry *fn_table_entry = entry->value; - assert(fn_table_entry->proto_node->type == NodeTypeFnProto); - int expected_param_count = fn_table_entry->proto_node->data.fn_proto.params.length; - int actual_param_count = node->data.fn_call_expr.params.length; - if (expected_param_count != actual_param_count) { - add_node_error(g, node, - buf_sprintf("wrong number of arguments. Expected %d, got %d.", - expected_param_count, actual_param_count)); - } - } - - for (int i = 0; i < node->data.fn_call_expr.params.length; i += 1) { - AstNode *child = node->data.fn_call_expr.params.at(i); - analyze_node(g, child); - } - break; - } - case NodeTypeDirective: - // we looked at directives in the parent node - break; - case NodeTypeCastExpr: - zig_panic("TODO"); - break; - case NodeTypePrefixOpExpr: - zig_panic("TODO"); - break; - case NodeTypeNumberLiteral: - case NodeTypeStringLiteral: - case NodeTypeUnreachable: - case NodeTypeSymbol: - // nothing to do - break; - } -} - -static void add_types(CodeGen *g) { - { - TypeTableEntry *entry = allocate(1); - entry->id = TypeIdU8; - entry->type_ref = LLVMInt8Type(); - buf_init_from_str(&entry->name, "u8"); - entry->di_type = g->dbuilder->createBasicType(buf_ptr(&entry->name), 8, 8, llvm::dwarf::DW_ATE_unsigned); - g->type_table.put(&entry->name, entry); - } - { - TypeTableEntry *entry = allocate(1); - entry->id = TypeIdI32; - entry->type_ref = LLVMInt32Type(); - buf_init_from_str(&entry->name, "i32"); - entry->di_type = g->dbuilder->createBasicType(buf_ptr(&entry->name), 32, 32, - llvm::dwarf::DW_ATE_signed); - g->type_table.put(&entry->name, entry); - } - { - TypeTableEntry *entry = allocate(1); - entry->id = TypeIdVoid; - entry->type_ref = LLVMVoidType(); - buf_init_from_str(&entry->name, "void"); - entry->di_type = g->dbuilder->createBasicType(buf_ptr(&entry->name), 0, 0, - llvm::dwarf::DW_ATE_unsigned); - g->type_table.put(&entry->name, entry); - - // invalid types are void - g->invalid_type_entry = entry; - } - { - TypeTableEntry *entry = allocate(1); - entry->id = TypeIdUnreachable; - entry->type_ref = LLVMVoidType(); - buf_init_from_str(&entry->name, "unreachable"); - entry->di_type = g->invalid_type_entry->di_type; - g->type_table.put(&entry->name, entry); - } -} - - -void semantic_analyze(CodeGen *g) { - LLVMInitializeAllTargets(); - LLVMInitializeAllTargetMCs(); - LLVMInitializeAllAsmPrinters(); - LLVMInitializeAllAsmParsers(); - LLVMInitializeNativeTarget(); - - g->is_native_target = true; - char *native_triple = LLVMGetDefaultTargetTriple(); - - LLVMTargetRef target_ref; - char *err_msg = nullptr; - if (LLVMGetTargetFromTriple(native_triple, &target_ref, &err_msg)) { - zig_panic("unable to get target from triple: %s", err_msg); - } - - char *native_cpu = LLVMZigGetHostCPUName(); - char *native_features = LLVMZigGetNativeFeatures(); - - LLVMCodeGenOptLevel opt_level = (g->build_type == CodeGenBuildTypeDebug) ? - LLVMCodeGenLevelNone : LLVMCodeGenLevelAggressive; - - LLVMRelocMode reloc_mode = g->is_static ? LLVMRelocStatic : LLVMRelocPIC; - - g->target_machine = LLVMCreateTargetMachine(target_ref, native_triple, - native_cpu, native_features, opt_level, reloc_mode, LLVMCodeModelDefault); - - g->target_data_ref = LLVMGetTargetMachineData(g->target_machine); - - - g->module = LLVMModuleCreateWithName("ZigModule"); - - g->pointer_size_bytes = LLVMPointerSize(g->target_data_ref); - - g->builder = LLVMCreateBuilder(); - g->dbuilder = new llvm::DIBuilder(*llvm::unwrap(g->module), true); - - - add_types(g); - - analyze_node(g, g->root); -} - -static LLVMValueRef gen_expr(CodeGen *g, AstNode *expr_node); - static void add_debug_source_node(CodeGen *g, AstNode *node) { llvm::unwrap(g->builder)->SetCurrentDebugLocation(llvm::DebugLoc::get( node->line + 1, node->column + 1, diff --git a/src/codegen.hpp b/src/codegen.hpp index ee2557f7b7..7795a3d3d9 100644 --- a/src/codegen.hpp +++ b/src/codegen.hpp @@ -41,8 +41,6 @@ void codegen_set_strip(CodeGen *codegen, bool strip); void codegen_set_out_type(CodeGen *codegen, OutType out_type); void codegen_set_out_name(CodeGen *codegen, Buf *out_name); -void semantic_analyze(CodeGen *g); - void code_gen_optimize(CodeGen *g); void code_gen(CodeGen *g); diff --git a/src/main.cpp b/src/main.cpp index c16ab50b2f..a39bb8feb5 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -13,6 +13,7 @@ #include "tokenizer.hpp" #include "error.hpp" #include "codegen.hpp" +#include "analyze.hpp" #include #include diff --git a/src/semantic_info.hpp b/src/semantic_info.hpp new file mode 100644 index 0000000000..be9e419558 --- /dev/null +++ b/src/semantic_info.hpp @@ -0,0 +1,106 @@ +/* + * Copyright (c) 2015 Andrew Kelley + * + * This file is part of zig, which is MIT licensed. + * See http://opensource.org/licenses/MIT + */ + +#ifndef ZIG_SEMANTIC_INFO_HPP +#define ZIG_SEMANTIC_INFO_HPP + +#include "codegen.hpp" +#include "hash_map.hpp" + +#include +#include + +struct FnTableEntry { + LLVMValueRef fn_value; + AstNode *proto_node; + AstNode *fn_def_node; + bool is_extern; + bool internal_linkage; + unsigned calling_convention; +}; + +enum TypeId { + TypeIdUserDefined, + TypeIdPointer, + TypeIdU8, + TypeIdI32, + TypeIdVoid, + TypeIdUnreachable, +}; + +struct TypeTableEntry { + TypeId id; + LLVMTypeRef type_ref; + llvm::DIType *di_type; + + TypeTableEntry *pointer_child; + bool pointer_is_const; + int user_defined_id; + Buf name; + TypeTableEntry *pointer_const_parent; + TypeTableEntry *pointer_mut_parent; +}; + +struct CodeGen { + LLVMModuleRef module; + AstNode *root; + ZigList errors; + LLVMBuilderRef builder; + llvm::DIBuilder *dbuilder; + llvm::DICompileUnit *compile_unit; + HashMap fn_table; + HashMap str_table; + HashMap type_table; + HashMap link_table; + TypeTableEntry *invalid_type_entry; + LLVMTargetDataRef target_data_ref; + unsigned pointer_size_bytes; + bool is_static; + bool strip_debug_symbols; + CodeGenBuildType build_type; + LLVMTargetMachineRef target_machine; + bool is_native_target; + Buf in_file; + Buf in_dir; + ZigList block_scopes; + llvm::DIFile *di_file; + ZigList fn_defs; + Buf *out_name; + OutType out_type; + FnTableEntry *cur_fn; + bool c_stdint_used; + AstNode *root_export_decl; + int version_major; + int version_minor; + int version_patch; +}; + +struct TypeNode { + TypeTableEntry *entry; +}; + +struct FnDefNode { + bool add_implicit_return; + bool skip; + LLVMValueRef *params; +}; + +struct CodeGenNode { + union { + TypeNode type_node; // for NodeTypeType + FnDefNode fn_def_node; // for NodeTypeFnDef + } data; +}; + +static inline Buf *hack_get_fn_call_name(CodeGen *g, AstNode *node) { + // Assume that the expression evaluates to a simple name and return the buf + // TODO after type checking works we should be able to remove this hack + assert(node->type == NodeTypeSymbol); + return &node->data.symbol; +} + +#endif