diff --git a/README.md b/README.md index 514042d881..072d0688af 100644 --- a/README.md +++ b/README.md @@ -1 +1,5 @@ # zig lang + +C upgrade. + +Start with C. diff --git a/src/list.hpp b/src/list.hpp index 35c632823b..434ff9ffe6 100644 --- a/src/list.hpp +++ b/src/list.hpp @@ -5,24 +5,21 @@ * See http://opensource.org/licenses/MIT */ -#ifndef GROOVE_LIST_HPP -#define GROOVE_LIST_HPP +#ifndef ZIG_LIST_HPP +#define ZIG_LIST_HPP #include "util.hpp" #include template -struct GrooveList { +struct ZigList { void deinit() { deallocate(items); } void append(T item) { - int err = ensure_capacity(length + 1); - if (err) - return err; + ensure_capacity(length + 1); items[length++] = item; - return 0; } // remember that the pointer to this item is invalid after you // modify the length of the list @@ -57,11 +54,8 @@ struct GrooveList { void resize(int new_length) { assert(new_length >= 0); - int err = ensure_capacity(new_length); - if (err) - return err; + ensure_capacity(new_length); length = new_length; - return 0; } void clear() { @@ -76,7 +70,6 @@ struct GrooveList { items = reallocate_nonzero(items, better_capacity); capacity = better_capacity; } - return 0; } T * items; diff --git a/src/main.cpp b/src/main.cpp index dce825841d..7393952e05 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -7,6 +7,7 @@ #include "config.h" #include "util.hpp" +#include "list.hpp" #include #include #include @@ -30,6 +31,13 @@ static Buf *alloc_buf(int size) { return buf; } +/* +static void fprint_buf(FILE *f, Buf *buf) { + if (fwrite(buf->ptr, 1, buf->len, f)) + zig_panic("error writing: %s", strerror(errno)); +} +*/ + static int usage(char *arg0) { fprintf(stderr, "Usage: %s --output outfile code.zig\n" "Other options:\n" @@ -56,6 +64,289 @@ static struct Buf *fetch_file(FILE *f) { return buf; } +#define WHITESPACE \ + ' ': \ + case '\t': \ + case '\n': \ + case '\f': \ + case '\r': \ + case 0xb + +#define DIGIT \ + '0': \ + case '1': \ + case '2': \ + case '3': \ + case '4': \ + case '5': \ + case '6': \ + case '7': \ + case '8': \ + case '9' + +#define ALPHA \ + 'a': \ + case 'b': \ + case 'c': \ + case 'd': \ + case 'e': \ + case 'f': \ + case 'g': \ + case 'h': \ + case 'i': \ + case 'j': \ + case 'k': \ + case 'l': \ + case 'm': \ + case 'n': \ + case 'o': \ + case 'p': \ + case 'q': \ + case 'r': \ + case 's': \ + case 't': \ + case 'u': \ + case 'v': \ + case 'w': \ + case 'x': \ + case 'y': \ + case 'z': \ + case 'A': \ + case 'B': \ + case 'C': \ + case 'D': \ + case 'E': \ + case 'F': \ + case 'G': \ + case 'H': \ + case 'I': \ + case 'J': \ + case 'K': \ + case 'L': \ + case 'M': \ + case 'N': \ + case 'O': \ + case 'P': \ + case 'Q': \ + case 'R': \ + case 'S': \ + case 'T': \ + case 'U': \ + case 'V': \ + case 'W': \ + case 'X': \ + case 'Y': \ + case 'Z' + +enum TokenId { + TokenIdDirective, + TokenIdSymbol, + TokenIdLParen, + TokenIdRParen, + TokenIdComma, + TokenIdStar, + TokenIdLBrace, + TokenIdRBrace, + TokenIdStringLiteral, + TokenIdSemicolon, + TokenIdNumberLiteral, + TokenIdPlus, +}; + +struct Token { + TokenId id; + int start_pos; + int end_pos; +}; + +enum TokenizeState { + TokenizeStateStart, + TokenizeStateDirective, + TokenizeStateSymbol, + TokenizeStateString, + TokenizeStateNumber, +}; + +struct Tokenize { + int pos; + TokenizeState state; + ZigList *tokens; + int line; + int column; + Token *cur_tok; +}; + +__attribute__ ((format (printf, 2, 3))) +static void tokenize_error(Tokenize *t, const char *format, ...) { + va_list ap; + va_start(ap, format); + fprintf(stderr, "Error. Line %d, column %d: ", t->line + 1, t->column + 1); + vfprintf(stderr, format, ap); + va_end(ap); + exit(EXIT_FAILURE); +} + +static void begin_token(Tokenize *t, TokenId id) { + assert(!t->cur_tok); + t->tokens->add_one(); + Token *token = &t->tokens->last(); + token->id = id; + token->start_pos = t->pos; + t->cur_tok = token; +} + +static void end_token(Tokenize *t) { + assert(t->cur_tok); + t->cur_tok->end_pos = t->pos + 1; + t->cur_tok = nullptr; +} + +static void put_back(Tokenize *t, int count) { + t->pos -= count; +} + +static ZigList *tokenize(Buf *buf) { + Tokenize t = {0}; + t.tokens = allocate>(1); + for (t.pos = 0; t.pos < buf->len; t.pos += 1) { + uint8_t c = buf->ptr[t.pos]; + switch (t.state) { + case TokenizeStateStart: + switch (c) { + case WHITESPACE: + break; + case ALPHA: + t.state = TokenizeStateSymbol; + begin_token(&t, TokenIdSymbol); + break; + case DIGIT: + t.state = TokenizeStateNumber; + begin_token(&t, TokenIdNumberLiteral); + break; + case '#': + t.state = TokenizeStateDirective; + begin_token(&t, TokenIdDirective); + break; + case '(': + begin_token(&t, TokenIdLParen); + end_token(&t); + break; + case ')': + begin_token(&t, TokenIdLParen); + end_token(&t); + break; + case ',': + begin_token(&t, TokenIdComma); + end_token(&t); + break; + case '*': + begin_token(&t, TokenIdStar); + end_token(&t); + break; + case '{': + begin_token(&t, TokenIdLBrace); + end_token(&t); + break; + case '}': + begin_token(&t, TokenIdRBrace); + end_token(&t); + break; + case '"': + begin_token(&t, TokenIdStringLiteral); + t.state = TokenizeStateString; + break; + case ';': + begin_token(&t, TokenIdSemicolon); + end_token(&t); + break; + case '+': + begin_token(&t, TokenIdPlus); + end_token(&t); + break; + default: + tokenize_error(&t, "invalid character: '%c'", c); + } + break; + case TokenizeStateDirective: + if (c == '\n') { + assert(t.cur_tok); + t.cur_tok->end_pos = t.pos; + t.cur_tok = nullptr; + t.state = TokenizeStateStart; + } + break; + case TokenizeStateSymbol: + switch (c) { + case ALPHA: + case DIGIT: + case '_': + break; + default: + put_back(&t, 1); + end_token(&t); + t.state = TokenizeStateStart; + break; + } + break; + case TokenizeStateString: + switch (c) { + case '"': + end_token(&t); + t.state = TokenizeStateStart; + break; + default: + break; + } + break; + case TokenizeStateNumber: + switch (c) { + case DIGIT: + break; + default: + put_back(&t, 1); + end_token(&t); + t.state = TokenizeStateStart; + break; + } + break; + } + if (c == '\n') { + t.line += 1; + t.column = 0; + } else { + t.column += 1; + } + } + return t.tokens; +} + +static const char * token_name(Token *token) { + switch (token->id) { + case TokenIdDirective: return "Directive"; + case TokenIdSymbol: return "Symbol"; + case TokenIdLParen: return "LParen"; + case TokenIdRParen: return "RParen"; + case TokenIdComma: return "Comma"; + case TokenIdStar: return "Star"; + case TokenIdLBrace: return "LBrace"; + case TokenIdRBrace: return "RBrace"; + case TokenIdStringLiteral: return "StringLiteral"; + case TokenIdSemicolon: return "Semicolon"; + case TokenIdNumberLiteral: return "NumberLiteral"; + case TokenIdPlus: return "Plus"; + } + return "(invalid token)"; +} + +static void print_tokens(Buf *buf, ZigList *tokens) { + for (int i = 0; i < tokens->length; i += 1) { + Token *token = &tokens->at(i); + printf("%s ", token_name(token)); + fwrite(buf->ptr + token->start_pos, 1, token->end_pos - token->start_pos, stdout); + printf("\n"); + } +} + int main(int argc, char **argv) { char *arg0 = argv[0]; char *in_file = NULL; @@ -99,7 +390,9 @@ int main(int argc, char **argv) { fprintf(stderr, "%s\n", in_data->ptr); - //tokenize(in_data); + ZigList *tokens = tokenize(in_data); + + print_tokens(in_data, tokens); return EXIT_SUCCESS; diff --git a/src/util.hpp b/src/util.hpp index 1702c4e90d..5571bc3306 100644 --- a/src/util.hpp +++ b/src/util.hpp @@ -48,4 +48,19 @@ template constexpr long array_length(const T (&)[n]) { return n; } + +template +static inline T max(T a, T b) { + return (a >= b) ? a : b; +} + +template +static inline T min(T a, T b) { + return (a <= b) ? a : b; +} + +template +static inline T clamp(T min_value, T value, T max_value) { + return max(min(value, max_value), min_value); +} #endif