From 5f48463bdd843a9d8dfd55cfc389637b39fc1074 Mon Sep 17 00:00:00 2001 From: Andrew Kelley Date: Sun, 23 Aug 2015 22:45:20 -0700 Subject: [PATCH] =?UTF-8?q?some=20half=20done=20thing=20I=20forgot=20about?= =?UTF-8?q?=20=C2=AF\=5F(=E3=83=84)=5F/=C2=AF?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/buffer.hpp | 44 +++++-- src/main.cpp | 303 ++++++++++++++++++++++++------------------------- 2 files changed, 187 insertions(+), 160 deletions(-) diff --git a/src/buffer.hpp b/src/buffer.hpp index 76115706c0..ace195f3ec 100644 --- a/src/buffer.hpp +++ b/src/buffer.hpp @@ -73,18 +73,48 @@ static inline Buf *buf_slice(Buf *in_buf, int start, int end) { return out_buf; } -static inline void buf_append_str(Buf *buf, const char *str, int str_len) { - if (str_len == -1) - str_len = strlen(str); - +static inline void buf_append_mem(Buf *buf, const char *mem, int mem_len) { + assert(mem_len >= 0); int old_len = buf_len(buf); - buf_resize(buf, old_len + str_len); - memcpy(buf_ptr(buf) + old_len, str, str_len); + buf_resize(buf, old_len + mem_len); + memcpy(buf_ptr(buf) + old_len, mem, mem_len); buf->list.at(buf_len(buf)) = 0; } +static inline void buf_append_str(Buf *buf, const char *str) { + buf_append_mem(buf, str, strlen(str)); +} + static inline void buf_append_buf(Buf *buf, Buf *append_buf) { - buf_append_str(buf, buf_ptr(append_buf), buf_len(append_buf)); + buf_append_mem(buf, buf_ptr(append_buf), buf_len(append_buf)); +} + +static inline void buf_append_char(Buf *buf, uint8_t c) { + buf_append_mem(buf, (const char *)&c, 1); +} + +static inline bool buf_eql_mem(Buf *buf, const char *mem, int mem_len) { + if (buf_len(buf) != mem_len) + return false; + return memcmp(buf_ptr(buf), mem, mem_len) == 0; +} + +static inline bool buf_eql_str(Buf *buf, const char *str) { + return buf_eql_mem(buf, str, strlen(str)); +} + +static inline bool buf_eql_buf(Buf *buf, Buf *other) { + return buf_eql_mem(buf, buf_ptr(other), buf_len(other)); +} + +static inline void buf_splice_buf(Buf *buf, int start, int end, Buf *other) { + if (start != end) + zig_panic("TODO buf_splice_buf"); + + int old_buf_len = buf_len(buf); + buf_resize(buf, old_buf_len + buf_len(other)); + memmove(buf_ptr(buf) + start + buf_len(other), buf_ptr(buf) + start, old_buf_len - start); + memcpy(buf_ptr(buf) + start, buf_ptr(other), buf_len(other)); } // TODO this method needs work diff --git a/src/main.cpp b/src/main.cpp index 5d1d088321..b8823c340f 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -154,18 +154,29 @@ struct Token { enum TokenizeState { TokenizeStateStart, TokenizeStateDirective, + TokenizeStateDirectiveName, + TokenizeStateIncludeQuote, + TokenizeStateDirectiveEnd, + TokenizeStateInclude, TokenizeStateSymbol, TokenizeStateString, TokenizeStateNumber, }; struct Tokenize { + Buf *buf; int pos; TokenizeState state; ZigList *tokens; int line; int column; Token *cur_tok; + Buf *directive_name; + Buf *cur_dir_path; + uint8_t unquote_char; + int quote_start_pos; + Buf *include_path; + ZigList *include_paths; }; __attribute__ ((format (printf, 2, 3))) @@ -210,24 +221,72 @@ static void put_back(Tokenize *t, int count) { t->pos -= count; } +static void begin_directive(Tokenize *t) { + t->state = TokenizeStateDirective; + begin_token(t, TokenIdDirective); + assert(!t->directive_name); + t->directive_name = buf_alloc(); +} + +static bool find_and_include_file(Tokenize *t, char *dir_path, char *file_path) { + Buf *full_path = buf_sprintf("%s/%s", dir_path, file_path); + + FILE *f = fopen(buf_ptr(full_path), "rb"); + if (!f) + return false; + + Buf *contents = fetch_file(f); + + buf_splice_buf(t->buf, t->pos, t->pos, contents); + + return true; +} + +static void render_include(Tokenize *t, Buf *target_path, char unquote_char) { + if (unquote_char == '"') { + if (find_and_include_file(t, buf_ptr(t->cur_dir_path), buf_ptr(target_path))) + return; + } + for (int i = 0; i < t->include_paths->length; i += 1) { + char *include_path = t->include_paths->at(i); + if (find_and_include_file(t, include_path, buf_ptr(target_path))) + return; + } + tokenize_error(t, "include path \"%s\" not found", buf_ptr(target_path)); +} + static void end_directive(Tokenize *t) { - assert(t->cur_tok); - t->cur_tok->end_pos = t->pos; - t->cur_tok = nullptr; + end_token(t); + if (t->include_path) { + render_include(t, t->include_path, t->unquote_char); + t->include_path = nullptr; + } t->state = TokenizeStateStart; } +static void end_directive_name(Tokenize *t) { + if (buf_eql_str(t->directive_name, "include")) { + t->state = TokenizeStateInclude; + t->directive_name = nullptr; + } else { + tokenize_error(t, "invalid directive name: \"%s\"", buf_ptr(t->directive_name)); + } +} + static void end_symbol(Tokenize *t) { put_back(t, 1); end_token(t); t->state = TokenizeStateStart; } -static ZigList *tokenize(Buf *buf) { +static ZigList *tokenize(Buf *buf, ZigList *include_paths, Buf *cur_dir_path) { Tokenize t = {0}; t.tokens = allocate>(1); - for (t.pos = 0; t.pos < buf_len(buf); t.pos += 1) { - uint8_t c = buf_ptr(buf)[t.pos]; + t.buf = buf; + t.cur_dir_path = cur_dir_path; + t.include_paths = include_paths; + for (t.pos = 0; t.pos < buf_len(t.buf); t.pos += 1) { + uint8_t c = buf_ptr(t.buf)[t.pos]; switch (t.state) { case TokenizeStateStart: switch (c) { @@ -242,8 +301,11 @@ static ZigList *tokenize(Buf *buf) { begin_token(&t, TokenIdNumberLiteral); break; case '#': - t.state = TokenizeStateDirective; - begin_token(&t, TokenIdDirective); + begin_directive(&t); + break; + case '"': + begin_token(&t, TokenIdStringLiteral); + t.state = TokenizeStateString; break; case '(': begin_token(&t, TokenIdLParen); @@ -269,10 +331,6 @@ static ZigList *tokenize(Buf *buf) { begin_token(&t, TokenIdRBrace); end_token(&t); break; - case '"': - begin_token(&t, TokenIdStringLiteral); - t.state = TokenizeStateString; - break; case ';': begin_token(&t, TokenIdSemicolon); end_token(&t); @@ -286,8 +344,70 @@ static ZigList *tokenize(Buf *buf) { } break; case TokenizeStateDirective: - if (c == '\n') { - end_directive(&t); + switch (c) { + case '\n': + end_directive_name(&t); + end_directive(&t); + break; + case ' ': + case '\t': + case '\f': + case '\r': + case 0xb: + break; + case SYMBOL_CHAR: + t.state = TokenizeStateDirectiveName; + buf_append_char(t.directive_name, c); + break; + default: + tokenize_error(&t, "invalid directive character: '%c'", c); + break; + } + break; + case TokenizeStateDirectiveName: + switch (c) { + case WHITESPACE: + end_directive_name(&t); + break; + case SYMBOL_CHAR: + buf_append_char(t.directive_name, c); + break; + default: + tokenize_error(&t, "invalid directive name character: '%c'", c); + break; + } + break; + case TokenizeStateInclude: + switch (c) { + case WHITESPACE: + break; + case '<': + case '"': + t.state = TokenizeStateIncludeQuote; + t.quote_start_pos = t.pos; + t.unquote_char = (c == '<') ? '>' : '"'; + break; + } + break; + case TokenizeStateIncludeQuote: + if (c == t.unquote_char) { + t.include_path = buf_slice(t.buf, t.quote_start_pos + 1, t.pos); + t.state = TokenizeStateDirectiveEnd; + } + break; + case TokenizeStateDirectiveEnd: + switch (c) { + case '\n': + end_directive(&t); + break; + case ' ': + case '\t': + case '\f': + case '\r': + case 0xb: + break; + default: + tokenize_error(&t, "expected whitespace or newline: '%c'", c); } break; case TokenizeStateSymbol: @@ -333,6 +453,13 @@ static ZigList *tokenize(Buf *buf) { case TokenizeStateDirective: end_directive(&t); break; + case TokenizeStateDirectiveName: + end_directive_name(&t); + end_directive(&t); + break; + case TokenizeStateInclude: + tokenize_error(&t, "missing include path"); + break; case TokenizeStateSymbol: end_symbol(&t); break; @@ -342,6 +469,12 @@ static ZigList *tokenize(Buf *buf) { case TokenizeStateNumber: end_symbol(&t); break; + case TokenizeStateIncludeQuote: + tokenize_error(&t, "unterminated include path"); + break; + case TokenizeStateDirectiveEnd: + end_directive(&t); + break; } assert(!t.cur_tok); return t.tokens; @@ -374,144 +507,6 @@ static void print_tokens(Buf *buf, ZigList *tokens) { } } -struct Preprocess { - Buf *out_buf; - Buf *in_buf; - Token *token; - ZigList *include_paths; - Buf *cur_dir_path; -}; - -__attribute__ ((format (printf, 2, 3))) -static void preprocess_error(Preprocess *p, const char *format, ...) { - va_list ap; - va_start(ap, format); - fprintf(stderr, "Error: Line %d, column %d: ", p->token->start_line + 1, p->token->start_column + 1); - vfprintf(stderr, format, ap); - fprintf(stderr, "\n"); - va_end(ap); - exit(EXIT_FAILURE); -} - -enum IncludeState { - IncludeStateStart, - IncludeStateQuote, -}; - -static Buf *find_include_file(Preprocess *p, char *dir_path, char *file_path) { - Buf *full_path = buf_sprintf("%s/%s", dir_path, file_path); - - FILE *f = fopen(buf_ptr(full_path), "rb"); - if (!f) - return nullptr; - - return fetch_file(f); -} - -static void render_include(Preprocess *p, Buf *target_path, char unquote_char) { - if (unquote_char == '"') { - Buf *file_contents = find_include_file(p, buf_ptr(p->cur_dir_path), buf_ptr(target_path)); - if (file_contents) { - buf_append_buf(p->out_buf, file_contents); - return; - } - } - for (int i = 0; i < p->include_paths->length; i += 1) { - char *include_path = p->include_paths->at(i); - Buf *file_contents = find_include_file(p, include_path, buf_ptr(target_path)); - if (file_contents) { - buf_append_buf(p->out_buf, file_contents); - return; - } - } - preprocess_error(p, "include path \"%s\" not found", buf_ptr(target_path)); -} - -static void parse_and_render_include(Preprocess *p, Buf *directive_buf, int pos) { - int state = IncludeStateStart; - char unquote_char; - int quote_start_pos; - for (; pos < buf_len(directive_buf); pos += 1) { - uint8_t c = buf_ptr(directive_buf)[pos]; - switch (state) { - case IncludeStateStart: - switch (c) { - case WHITESPACE: - break; - case '<': - case '"': - state = IncludeStateQuote; - quote_start_pos = pos; - unquote_char = (c == '<') ? '>' : '"'; - break; - - } - break; - case IncludeStateQuote: - if (c == unquote_char) { - Buf *include_path = buf_slice(directive_buf, quote_start_pos + 1, pos); - render_include(p, include_path, unquote_char); - return; - } - break; - } - } - preprocess_error(p, "include directive missing path"); -} - -static void render_directive(Preprocess *p, Buf *directive_buf) { - for (int pos = 1; pos < buf_len(directive_buf); pos += 1) { - uint8_t c = buf_ptr(directive_buf)[pos]; - switch (c) { - case SYMBOL_CHAR: - break; - default: - pos -= 1; - Buf *directive_name = buf_from_mem(buf_ptr(directive_buf) + 1, pos); - if (strcmp(buf_ptr(directive_name), "include") == 0) { - parse_and_render_include(p, directive_buf, pos); - } else { - preprocess_error(p, "invalid directive: \"%s\"", buf_ptr(directive_name)); - } - return; - } - } -} - -static void render_token(Preprocess *p) { - Buf *token_buf = buf_slice(p->in_buf, p->token->start_pos, p->token->end_pos); - switch (p->token->id) { - case TokenIdDirective: - render_directive(p, token_buf); - break; - default: - buf_append_buf(p->out_buf, token_buf); - if (p->token->id == TokenIdSemicolon || - p->token->id == TokenIdLBrace || - p->token->id == TokenIdRBrace) - { - buf_append_str(p->out_buf, "\n", -1); - } else { - buf_append_str(p->out_buf, " ", -1); - } - } -} - -static Buf *preprocess(Buf *in_buf, ZigList *tokens, - ZigList *include_paths, Buf *cur_dir_path) -{ - Preprocess p = {0}; - p.out_buf = buf_alloc(); - p.in_buf = in_buf; - p.include_paths = include_paths; - p.cur_dir_path = cur_dir_path; - for (int i = 0; i < tokens->length; i += 1) { - p.token = &tokens->at(i); - render_token(&p); - } - return p.out_buf; -} - char cur_dir[1024]; int main(int argc, char **argv) { @@ -566,14 +561,16 @@ int main(int argc, char **argv) { fprintf(stderr, "Original source:\n%s\n", buf_ptr(in_data)); - ZigList *tokens = tokenize(in_data); + ZigList *tokens = tokenize(in_data, &include_paths, cur_dir_path); fprintf(stderr, "\nTokens:\n"); print_tokens(in_data, tokens); + /* Buf *preprocessed_source = preprocess(in_data, tokens, &include_paths, cur_dir_path); fprintf(stderr, "\nPreprocessed source:\n%s\n", buf_ptr(preprocessed_source)); + */ return EXIT_SUCCESS;