/* Copyright: Boaz Segev, 2018-2019 License: MIT Feel free to copy, use and enjoy according to the license provided. */ #ifndef H_HTTP_MIME_PARSER_H #define H_HTTP_MIME_PARSER_H #include #include #include /* ***************************************************************************** Known Limitations: - Doesn't support nested multipart form structures (i.e., multi-file selection). See: https://www.w3.org/TR/html401/interact/forms.html#h-17.13.4.2 To circumvent limitation, initialize a new parser to parse nested multiparts. ***************************************************************************** */ /* ***************************************************************************** The HTTP MIME Multipart Form Parser Type ***************************************************************************** */ /** all data id read-only / for internal use */ typedef struct { char *boundary; size_t boundary_len; uint8_t in_obj; uint8_t done; uint8_t error; } http_mime_parser_s; /* ***************************************************************************** Callbacks to be implemented. ***************************************************************************** */ /** Called when all the data is available at once. */ static void http_mime_parser_on_data(http_mime_parser_s *parser, void *name, size_t name_len, void *filename, size_t filename_len, void *mimetype, size_t mimetype_len, void *value, size_t value_len); /** Called when the data didn't fit in the buffer. Data will be streamed. */ static void http_mime_parser_on_partial_start( http_mime_parser_s *parser, void *name, size_t name_len, void *filename, size_t filename_len, void *mimetype, size_t mimetype_len); /** Called when partial data is available. */ static void http_mime_parser_on_partial_data(http_mime_parser_s *parser, void *value, size_t value_len); /** Called when the partial data is complete. */ static void http_mime_parser_on_partial_end(http_mime_parser_s *parser); /** * Called when URL decoding is required. * * Should support inplace decoding (`dest == encoded`). * * Should return the length of the decoded string. */ static size_t http_mime_decode_url(char *dest, const char *encoded, size_t length); /* ***************************************************************************** API ***************************************************************************** */ /** * Takes the HTTP Content-Type header and initializes the parser data. * * Note: the Content-Type header should persist in memory while the parser is in * use. */ static int http_mime_parser_init(http_mime_parser_s *parser, char *content_type, size_t len); /** * Consumes data from a streaming buffer. * * The data might be partially consumed, in which case the unconsumed data * should be resent to the parser as more data becomes available. * * Note: test the `parser->done` and `parser->error` flags between iterations. */ static size_t http_mime_parse(http_mime_parser_s *parser, void *buffer, size_t length); /* ***************************************************************************** Implementations ***************************************************************************** */ /** takes the HTTP Content-Type header and initializes the parser data. */ static int http_mime_parser_init(http_mime_parser_s *parser, char *content_type, size_t len) { *parser = (http_mime_parser_s){.done = 0}; if (len < 14 || strncasecmp("multipart/form", content_type, 14)) return -1; char *cut = memchr(content_type, ';', len); while (cut) { ++cut; len -= (size_t)(cut - content_type); while (len && cut[0] == ' ') { --len; ++cut; } if (len <= 9) return -1; if (strncasecmp("boundary=", cut, 9)) { content_type = cut; cut = memchr(cut, ';', len); continue; } cut += 9; len -= 9; content_type = cut; parser->boundary = content_type; if ((cut = memchr(content_type, ';', len))) parser->boundary_len = (size_t)(cut - content_type); else parser->boundary_len = len; return 0; } return -1; } /** * Consumes data from a streaming buffer. * * The data might be partially consumed, in which case the unconsumed data * should be resent to the parser as more data becomes available. * * Note: test the `parser->done` and `parser->error` flags between iterations. */ static size_t http_mime_parse(http_mime_parser_s *parser, void *buffer, size_t length) { int first_run = 1; char *pos = buffer; const char *stop = pos + length; if (!length) goto end_of_data; consume_partial: if (parser->in_obj) { /* we're in an object longer than the buffer */ char *start = pos; char *end = start; do { end = memchr(end, '\n', (size_t)(stop - end)); } while (end && ++end && (size_t)(stop - end) >= (4 + parser->boundary_len) && (end[0] != '-' || end[1] != '-' || memcmp(end + 2, parser->boundary, parser->boundary_len))); if (!end) { end = (char *)stop; pos = end; if (end - start) http_mime_parser_on_partial_data(parser, start, (size_t)(end - start)); goto end_of_data; } else if (end + 4 + parser->boundary_len >= stop) { end -= 2; if (end[0] == '\r') --end; pos = end; if (end - start) http_mime_parser_on_partial_data(parser, start, (size_t)(end - start)); goto end_of_data; } size_t len = (end - start) - 1; if (start[len - 1] == '\r') --len; if (len) http_mime_parser_on_partial_data(parser, start, len); http_mime_parser_on_partial_end(parser); pos = end; parser->in_obj = 0; first_run = 0; } else if (length < (4 + parser->boundary_len) || pos[0] != '-' || pos[1] != '-' || memcmp(pos + 2, parser->boundary, parser->boundary_len)) goto error; /* We're at a boundary */ while (pos < stop) { char *start; char *end; char *name = NULL; uint32_t name_len = 0; char *value = NULL; uint32_t value_len = 0; char *filename = NULL; uint32_t filename_len = 0; char *mime = NULL; uint32_t mime_len = 0; uint8_t header_count = 0; /* test for ending */ if (pos[2 + parser->boundary_len] == '-' && pos[3 + parser->boundary_len] == '-') { pos += 5 + parser->boundary_len; if (pos > stop) pos = (char *)stop; else if (pos < stop && pos[0] == '\n') ++pos; goto done; } start = pos + 3 + parser->boundary_len; if (start[0] == '\n') { /* should be true, unless new line marker was just '\n' */ ++start; } /* consume headers */ while (start + 4 < stop && start[0] != '\n' && start[1] != '\n') { end = memchr(start, '\n', (size_t)(stop - start)); if (!end) { if (first_run) goto error; goto end_of_data; } if (end - start > 29 && !strncasecmp(start, "content-disposition:", 20)) { /* content-disposition header */ start = memchr(start + 20, ';', end - (start + 20)); // if (!start) // start = end + 1; while (start) { ++start; if (start[0] == ' ') ++start; if (start + 6 < end && !strncasecmp(start, "name=", 5)) { name = start + 5; if (name[0] == '"') ++name; start = memchr(name, ';', (size_t)(end - start)); if (!start) { name_len = (size_t)(end - name); if (name[name_len - 1] == '\r') --name_len; } else { name_len = (size_t)(start - name); } if (name[name_len - 1] == '"') --name_len; } else if (start + 9 < end && !strncasecmp(start, "filename", 8)) { uint8_t encoded = 0; start += 8; if (start[0] == '*') { encoded = 1; ++start; } if (start[0] != '=') goto error; ++start; if (start[0] == ' ') ++start; if (start[0] == '"') ++start; if (filename && !encoded) { /* prefer URL encoded version */ start = memchr(filename, ';', (size_t)(end - start)); continue; } filename = start; start = memchr(filename, ';', (size_t)(end - start)); if (!start) { filename_len = (size_t)((end - filename)); if (filename[filename_len - 1] == '\r') { --filename_len; } } else { filename_len = (size_t)(start - filename); } if (filename[filename_len - 1] == '"') --filename_len; if (encoded) { ssize_t new_len = http_mime_decode_url(filename, filename, filename_len); if (new_len > 0) filename_len = new_len; } } else { start = memchr(start, ';', (size_t)(end - start)); } } } else if (end - start > 14 && !strncasecmp(start, "content-type:", 13)) { /* content-type header */ start += 13; if (start[0] == ' ') ++start; mime = start; start = memchr(start, ';', (size_t)(end - start)); if (!start) { mime_len = (size_t)(end - mime); if (mime[mime_len - 1] == '\r') --mime_len; } else { mime_len = (size_t)(start - mime); } } start = end + 1; if (header_count++ > 4) goto error; } if (!name) { if (start + 4 >= stop) goto end_of_data; goto error; } /* advance to end of boundry */ ++start; if (start[0] == '\n') ++start; value = start; end = start; do { end = memchr(end, '\n', (size_t)(stop - end)); } while (end && ++end && (size_t)(stop - end) >= (4 + parser->boundary_len) && (end[0] != '-' || end[1] != '-' || memcmp(end + 2, parser->boundary, parser->boundary_len))); if (!end || end + 4 + parser->boundary_len >= stop) { if (first_run) { http_mime_parser_on_partial_start(parser, name, name_len, filename, filename_len, mime, mime_len); parser->in_obj = 1; pos = value; goto consume_partial; } goto end_of_data; } value_len = (size_t)((end - value) - 1); if (value[value_len - 1] == '\r') --value_len; pos = end; http_mime_parser_on_data(parser, name, name_len, filename, filename_len, mime, mime_len, value, value_len); first_run = 0; } end_of_data: return (size_t)((uintptr_t)pos - (uintptr_t)buffer); done: parser->done = 1; parser->error = 0; return (size_t)((uintptr_t)pos - (uintptr_t)buffer); error: parser->done = 0; parser->error = 1; return (size_t)((uintptr_t)pos - (uintptr_t)buffer); } #endif