some half done thing I forgot about ¯\_(ツ)_/¯

This commit is contained in:
Andrew Kelley 2015-08-23 22:45:20 -07:00
parent d519ce87dd
commit 5f48463bdd
2 changed files with 187 additions and 160 deletions

View file

@ -73,18 +73,48 @@ static inline Buf *buf_slice(Buf *in_buf, int start, int end) {
return out_buf; return out_buf;
} }
static inline void buf_append_str(Buf *buf, const char *str, int str_len) { static inline void buf_append_mem(Buf *buf, const char *mem, int mem_len) {
if (str_len == -1) assert(mem_len >= 0);
str_len = strlen(str);
int old_len = buf_len(buf); int old_len = buf_len(buf);
buf_resize(buf, old_len + str_len); buf_resize(buf, old_len + mem_len);
memcpy(buf_ptr(buf) + old_len, str, str_len); memcpy(buf_ptr(buf) + old_len, mem, mem_len);
buf->list.at(buf_len(buf)) = 0; buf->list.at(buf_len(buf)) = 0;
} }
static inline void buf_append_str(Buf *buf, const char *str) {
buf_append_mem(buf, str, strlen(str));
}
static inline void buf_append_buf(Buf *buf, Buf *append_buf) { static inline void buf_append_buf(Buf *buf, Buf *append_buf) {
buf_append_str(buf, buf_ptr(append_buf), buf_len(append_buf)); buf_append_mem(buf, buf_ptr(append_buf), buf_len(append_buf));
}
static inline void buf_append_char(Buf *buf, uint8_t c) {
buf_append_mem(buf, (const char *)&c, 1);
}
static inline bool buf_eql_mem(Buf *buf, const char *mem, int mem_len) {
if (buf_len(buf) != mem_len)
return false;
return memcmp(buf_ptr(buf), mem, mem_len) == 0;
}
static inline bool buf_eql_str(Buf *buf, const char *str) {
return buf_eql_mem(buf, str, strlen(str));
}
static inline bool buf_eql_buf(Buf *buf, Buf *other) {
return buf_eql_mem(buf, buf_ptr(other), buf_len(other));
}
static inline void buf_splice_buf(Buf *buf, int start, int end, Buf *other) {
if (start != end)
zig_panic("TODO buf_splice_buf");
int old_buf_len = buf_len(buf);
buf_resize(buf, old_buf_len + buf_len(other));
memmove(buf_ptr(buf) + start + buf_len(other), buf_ptr(buf) + start, old_buf_len - start);
memcpy(buf_ptr(buf) + start, buf_ptr(other), buf_len(other));
} }
// TODO this method needs work // TODO this method needs work

View file

@ -154,18 +154,29 @@ struct Token {
enum TokenizeState { enum TokenizeState {
TokenizeStateStart, TokenizeStateStart,
TokenizeStateDirective, TokenizeStateDirective,
TokenizeStateDirectiveName,
TokenizeStateIncludeQuote,
TokenizeStateDirectiveEnd,
TokenizeStateInclude,
TokenizeStateSymbol, TokenizeStateSymbol,
TokenizeStateString, TokenizeStateString,
TokenizeStateNumber, TokenizeStateNumber,
}; };
struct Tokenize { struct Tokenize {
Buf *buf;
int pos; int pos;
TokenizeState state; TokenizeState state;
ZigList<Token> *tokens; ZigList<Token> *tokens;
int line; int line;
int column; int column;
Token *cur_tok; Token *cur_tok;
Buf *directive_name;
Buf *cur_dir_path;
uint8_t unquote_char;
int quote_start_pos;
Buf *include_path;
ZigList<char *> *include_paths;
}; };
__attribute__ ((format (printf, 2, 3))) __attribute__ ((format (printf, 2, 3)))
@ -210,24 +221,72 @@ static void put_back(Tokenize *t, int count) {
t->pos -= count; t->pos -= count;
} }
static void begin_directive(Tokenize *t) {
t->state = TokenizeStateDirective;
begin_token(t, TokenIdDirective);
assert(!t->directive_name);
t->directive_name = buf_alloc();
}
static bool find_and_include_file(Tokenize *t, char *dir_path, char *file_path) {
Buf *full_path = buf_sprintf("%s/%s", dir_path, file_path);
FILE *f = fopen(buf_ptr(full_path), "rb");
if (!f)
return false;
Buf *contents = fetch_file(f);
buf_splice_buf(t->buf, t->pos, t->pos, contents);
return true;
}
static void render_include(Tokenize *t, Buf *target_path, char unquote_char) {
if (unquote_char == '"') {
if (find_and_include_file(t, buf_ptr(t->cur_dir_path), buf_ptr(target_path)))
return;
}
for (int i = 0; i < t->include_paths->length; i += 1) {
char *include_path = t->include_paths->at(i);
if (find_and_include_file(t, include_path, buf_ptr(target_path)))
return;
}
tokenize_error(t, "include path \"%s\" not found", buf_ptr(target_path));
}
static void end_directive(Tokenize *t) { static void end_directive(Tokenize *t) {
assert(t->cur_tok); end_token(t);
t->cur_tok->end_pos = t->pos; if (t->include_path) {
t->cur_tok = nullptr; render_include(t, t->include_path, t->unquote_char);
t->include_path = nullptr;
}
t->state = TokenizeStateStart; t->state = TokenizeStateStart;
} }
static void end_directive_name(Tokenize *t) {
if (buf_eql_str(t->directive_name, "include")) {
t->state = TokenizeStateInclude;
t->directive_name = nullptr;
} else {
tokenize_error(t, "invalid directive name: \"%s\"", buf_ptr(t->directive_name));
}
}
static void end_symbol(Tokenize *t) { static void end_symbol(Tokenize *t) {
put_back(t, 1); put_back(t, 1);
end_token(t); end_token(t);
t->state = TokenizeStateStart; t->state = TokenizeStateStart;
} }
static ZigList<Token> *tokenize(Buf *buf) { static ZigList<Token> *tokenize(Buf *buf, ZigList<char *> *include_paths, Buf *cur_dir_path) {
Tokenize t = {0}; Tokenize t = {0};
t.tokens = allocate<ZigList<Token>>(1); t.tokens = allocate<ZigList<Token>>(1);
for (t.pos = 0; t.pos < buf_len(buf); t.pos += 1) { t.buf = buf;
uint8_t c = buf_ptr(buf)[t.pos]; t.cur_dir_path = cur_dir_path;
t.include_paths = include_paths;
for (t.pos = 0; t.pos < buf_len(t.buf); t.pos += 1) {
uint8_t c = buf_ptr(t.buf)[t.pos];
switch (t.state) { switch (t.state) {
case TokenizeStateStart: case TokenizeStateStart:
switch (c) { switch (c) {
@ -242,8 +301,11 @@ static ZigList<Token> *tokenize(Buf *buf) {
begin_token(&t, TokenIdNumberLiteral); begin_token(&t, TokenIdNumberLiteral);
break; break;
case '#': case '#':
t.state = TokenizeStateDirective; begin_directive(&t);
begin_token(&t, TokenIdDirective); break;
case '"':
begin_token(&t, TokenIdStringLiteral);
t.state = TokenizeStateString;
break; break;
case '(': case '(':
begin_token(&t, TokenIdLParen); begin_token(&t, TokenIdLParen);
@ -269,10 +331,6 @@ static ZigList<Token> *tokenize(Buf *buf) {
begin_token(&t, TokenIdRBrace); begin_token(&t, TokenIdRBrace);
end_token(&t); end_token(&t);
break; break;
case '"':
begin_token(&t, TokenIdStringLiteral);
t.state = TokenizeStateString;
break;
case ';': case ';':
begin_token(&t, TokenIdSemicolon); begin_token(&t, TokenIdSemicolon);
end_token(&t); end_token(&t);
@ -286,8 +344,70 @@ static ZigList<Token> *tokenize(Buf *buf) {
} }
break; break;
case TokenizeStateDirective: case TokenizeStateDirective:
if (c == '\n') { switch (c) {
end_directive(&t); case '\n':
end_directive_name(&t);
end_directive(&t);
break;
case ' ':
case '\t':
case '\f':
case '\r':
case 0xb:
break;
case SYMBOL_CHAR:
t.state = TokenizeStateDirectiveName;
buf_append_char(t.directive_name, c);
break;
default:
tokenize_error(&t, "invalid directive character: '%c'", c);
break;
}
break;
case TokenizeStateDirectiveName:
switch (c) {
case WHITESPACE:
end_directive_name(&t);
break;
case SYMBOL_CHAR:
buf_append_char(t.directive_name, c);
break;
default:
tokenize_error(&t, "invalid directive name character: '%c'", c);
break;
}
break;
case TokenizeStateInclude:
switch (c) {
case WHITESPACE:
break;
case '<':
case '"':
t.state = TokenizeStateIncludeQuote;
t.quote_start_pos = t.pos;
t.unquote_char = (c == '<') ? '>' : '"';
break;
}
break;
case TokenizeStateIncludeQuote:
if (c == t.unquote_char) {
t.include_path = buf_slice(t.buf, t.quote_start_pos + 1, t.pos);
t.state = TokenizeStateDirectiveEnd;
}
break;
case TokenizeStateDirectiveEnd:
switch (c) {
case '\n':
end_directive(&t);
break;
case ' ':
case '\t':
case '\f':
case '\r':
case 0xb:
break;
default:
tokenize_error(&t, "expected whitespace or newline: '%c'", c);
} }
break; break;
case TokenizeStateSymbol: case TokenizeStateSymbol:
@ -333,6 +453,13 @@ static ZigList<Token> *tokenize(Buf *buf) {
case TokenizeStateDirective: case TokenizeStateDirective:
end_directive(&t); end_directive(&t);
break; break;
case TokenizeStateDirectiveName:
end_directive_name(&t);
end_directive(&t);
break;
case TokenizeStateInclude:
tokenize_error(&t, "missing include path");
break;
case TokenizeStateSymbol: case TokenizeStateSymbol:
end_symbol(&t); end_symbol(&t);
break; break;
@ -342,6 +469,12 @@ static ZigList<Token> *tokenize(Buf *buf) {
case TokenizeStateNumber: case TokenizeStateNumber:
end_symbol(&t); end_symbol(&t);
break; break;
case TokenizeStateIncludeQuote:
tokenize_error(&t, "unterminated include path");
break;
case TokenizeStateDirectiveEnd:
end_directive(&t);
break;
} }
assert(!t.cur_tok); assert(!t.cur_tok);
return t.tokens; return t.tokens;
@ -374,144 +507,6 @@ static void print_tokens(Buf *buf, ZigList<Token> *tokens) {
} }
} }
struct Preprocess {
Buf *out_buf;
Buf *in_buf;
Token *token;
ZigList<char *> *include_paths;
Buf *cur_dir_path;
};
__attribute__ ((format (printf, 2, 3)))
static void preprocess_error(Preprocess *p, const char *format, ...) {
va_list ap;
va_start(ap, format);
fprintf(stderr, "Error: Line %d, column %d: ", p->token->start_line + 1, p->token->start_column + 1);
vfprintf(stderr, format, ap);
fprintf(stderr, "\n");
va_end(ap);
exit(EXIT_FAILURE);
}
enum IncludeState {
IncludeStateStart,
IncludeStateQuote,
};
static Buf *find_include_file(Preprocess *p, char *dir_path, char *file_path) {
Buf *full_path = buf_sprintf("%s/%s", dir_path, file_path);
FILE *f = fopen(buf_ptr(full_path), "rb");
if (!f)
return nullptr;
return fetch_file(f);
}
static void render_include(Preprocess *p, Buf *target_path, char unquote_char) {
if (unquote_char == '"') {
Buf *file_contents = find_include_file(p, buf_ptr(p->cur_dir_path), buf_ptr(target_path));
if (file_contents) {
buf_append_buf(p->out_buf, file_contents);
return;
}
}
for (int i = 0; i < p->include_paths->length; i += 1) {
char *include_path = p->include_paths->at(i);
Buf *file_contents = find_include_file(p, include_path, buf_ptr(target_path));
if (file_contents) {
buf_append_buf(p->out_buf, file_contents);
return;
}
}
preprocess_error(p, "include path \"%s\" not found", buf_ptr(target_path));
}
static void parse_and_render_include(Preprocess *p, Buf *directive_buf, int pos) {
int state = IncludeStateStart;
char unquote_char;
int quote_start_pos;
for (; pos < buf_len(directive_buf); pos += 1) {
uint8_t c = buf_ptr(directive_buf)[pos];
switch (state) {
case IncludeStateStart:
switch (c) {
case WHITESPACE:
break;
case '<':
case '"':
state = IncludeStateQuote;
quote_start_pos = pos;
unquote_char = (c == '<') ? '>' : '"';
break;
}
break;
case IncludeStateQuote:
if (c == unquote_char) {
Buf *include_path = buf_slice(directive_buf, quote_start_pos + 1, pos);
render_include(p, include_path, unquote_char);
return;
}
break;
}
}
preprocess_error(p, "include directive missing path");
}
static void render_directive(Preprocess *p, Buf *directive_buf) {
for (int pos = 1; pos < buf_len(directive_buf); pos += 1) {
uint8_t c = buf_ptr(directive_buf)[pos];
switch (c) {
case SYMBOL_CHAR:
break;
default:
pos -= 1;
Buf *directive_name = buf_from_mem(buf_ptr(directive_buf) + 1, pos);
if (strcmp(buf_ptr(directive_name), "include") == 0) {
parse_and_render_include(p, directive_buf, pos);
} else {
preprocess_error(p, "invalid directive: \"%s\"", buf_ptr(directive_name));
}
return;
}
}
}
static void render_token(Preprocess *p) {
Buf *token_buf = buf_slice(p->in_buf, p->token->start_pos, p->token->end_pos);
switch (p->token->id) {
case TokenIdDirective:
render_directive(p, token_buf);
break;
default:
buf_append_buf(p->out_buf, token_buf);
if (p->token->id == TokenIdSemicolon ||
p->token->id == TokenIdLBrace ||
p->token->id == TokenIdRBrace)
{
buf_append_str(p->out_buf, "\n", -1);
} else {
buf_append_str(p->out_buf, " ", -1);
}
}
}
static Buf *preprocess(Buf *in_buf, ZigList<Token> *tokens,
ZigList<char *> *include_paths, Buf *cur_dir_path)
{
Preprocess p = {0};
p.out_buf = buf_alloc();
p.in_buf = in_buf;
p.include_paths = include_paths;
p.cur_dir_path = cur_dir_path;
for (int i = 0; i < tokens->length; i += 1) {
p.token = &tokens->at(i);
render_token(&p);
}
return p.out_buf;
}
char cur_dir[1024]; char cur_dir[1024];
int main(int argc, char **argv) { int main(int argc, char **argv) {
@ -566,14 +561,16 @@ int main(int argc, char **argv) {
fprintf(stderr, "Original source:\n%s\n", buf_ptr(in_data)); fprintf(stderr, "Original source:\n%s\n", buf_ptr(in_data));
ZigList<Token> *tokens = tokenize(in_data); ZigList<Token> *tokens = tokenize(in_data, &include_paths, cur_dir_path);
fprintf(stderr, "\nTokens:\n"); fprintf(stderr, "\nTokens:\n");
print_tokens(in_data, tokens); print_tokens(in_data, tokens);
/*
Buf *preprocessed_source = preprocess(in_data, tokens, &include_paths, cur_dir_path); Buf *preprocessed_source = preprocess(in_data, tokens, &include_paths, cur_dir_path);
fprintf(stderr, "\nPreprocessed source:\n%s\n", buf_ptr(preprocessed_source)); fprintf(stderr, "\nPreprocessed source:\n%s\n", buf_ptr(preprocessed_source));
*/
return EXIT_SUCCESS; return EXIT_SUCCESS;