mirror of
https://github.com/zigzap/zap.git
synced 2025-10-24 00:44:09 +00:00
350 lines
11 KiB
C
350 lines
11 KiB
C
/*
|
|
Copyright: Boaz Segev, 2018-2019
|
|
License: MIT
|
|
|
|
Feel free to copy, use and enjoy according to the license provided.
|
|
*/
|
|
#ifndef H_HTTP_MIME_PARSER_H
|
|
#define H_HTTP_MIME_PARSER_H
|
|
#include <stdint.h>
|
|
#include <stdlib.h>
|
|
#include <string.h>
|
|
|
|
/* *****************************************************************************
|
|
Known Limitations:
|
|
|
|
- Doesn't support nested multipart form structures (i.e., multi-file selection).
|
|
See: https://www.w3.org/TR/html401/interact/forms.html#h-17.13.4.2
|
|
|
|
To circumvent limitation, initialize a new parser to parse nested multiparts.
|
|
***************************************************************************** */
|
|
|
|
/* *****************************************************************************
|
|
The HTTP MIME Multipart Form Parser Type
|
|
***************************************************************************** */
|
|
|
|
/** all data id read-only / for internal use */
|
|
typedef struct {
|
|
char *boundary;
|
|
size_t boundary_len;
|
|
uint8_t in_obj;
|
|
uint8_t done;
|
|
uint8_t error;
|
|
} http_mime_parser_s;
|
|
|
|
/* *****************************************************************************
|
|
Callbacks to be implemented.
|
|
***************************************************************************** */
|
|
|
|
/** Called when all the data is available at once. */
|
|
static void http_mime_parser_on_data(http_mime_parser_s *parser, void *name,
|
|
size_t name_len, void *filename,
|
|
size_t filename_len, void *mimetype,
|
|
size_t mimetype_len, void *value,
|
|
size_t value_len);
|
|
|
|
/** Called when the data didn't fit in the buffer. Data will be streamed. */
|
|
static void http_mime_parser_on_partial_start(
|
|
http_mime_parser_s *parser, void *name, size_t name_len, void *filename,
|
|
size_t filename_len, void *mimetype, size_t mimetype_len);
|
|
|
|
/** Called when partial data is available. */
|
|
static void http_mime_parser_on_partial_data(http_mime_parser_s *parser,
|
|
void *value, size_t value_len);
|
|
|
|
/** Called when the partial data is complete. */
|
|
static void http_mime_parser_on_partial_end(http_mime_parser_s *parser);
|
|
|
|
/**
|
|
* Called when URL decoding is required.
|
|
*
|
|
* Should support inplace decoding (`dest == encoded`).
|
|
*
|
|
* Should return the length of the decoded string.
|
|
*/
|
|
static size_t http_mime_decode_url(char *dest, const char *encoded,
|
|
size_t length);
|
|
|
|
/* *****************************************************************************
|
|
API
|
|
***************************************************************************** */
|
|
|
|
/**
|
|
* Takes the HTTP Content-Type header and initializes the parser data.
|
|
*
|
|
* Note: the Content-Type header should persist in memory while the parser is in
|
|
* use.
|
|
*/
|
|
static int http_mime_parser_init(http_mime_parser_s *parser, char *content_type,
|
|
size_t len);
|
|
|
|
/**
|
|
* Consumes data from a streaming buffer.
|
|
*
|
|
* The data might be partially consumed, in which case the unconsumed data
|
|
* should be resent to the parser as more data becomes available.
|
|
*
|
|
* Note: test the `parser->done` and `parser->error` flags between iterations.
|
|
*/
|
|
static size_t http_mime_parse(http_mime_parser_s *parser, void *buffer,
|
|
size_t length);
|
|
|
|
/* *****************************************************************************
|
|
Implementations
|
|
***************************************************************************** */
|
|
|
|
/** takes the HTTP Content-Type header and initializes the parser data. */
|
|
static int http_mime_parser_init(http_mime_parser_s *parser, char *content_type,
|
|
size_t len) {
|
|
*parser = (http_mime_parser_s){.done = 0};
|
|
if (len < 14 || strncasecmp("multipart/form", content_type, 14))
|
|
return -1;
|
|
char *cut = memchr(content_type, ';', len);
|
|
while (cut) {
|
|
++cut;
|
|
len -= (size_t)(cut - content_type);
|
|
while (len && cut[0] == ' ') {
|
|
--len;
|
|
++cut;
|
|
}
|
|
if (len <= 9)
|
|
return -1;
|
|
if (strncasecmp("boundary=", cut, 9)) {
|
|
content_type = cut;
|
|
cut = memchr(cut, ';', len);
|
|
continue;
|
|
}
|
|
cut += 9;
|
|
len -= 9;
|
|
content_type = cut;
|
|
parser->boundary = content_type;
|
|
if ((cut = memchr(content_type, ';', len)))
|
|
parser->boundary_len = (size_t)(cut - content_type);
|
|
else
|
|
parser->boundary_len = len;
|
|
return 0;
|
|
}
|
|
return -1;
|
|
}
|
|
|
|
/**
|
|
* Consumes data from a streaming buffer.
|
|
*
|
|
* The data might be partially consumed, in which case the unconsumed data
|
|
* should be resent to the parser as more data becomes available.
|
|
*
|
|
* Note: test the `parser->done` and `parser->error` flags between iterations.
|
|
*/
|
|
static size_t http_mime_parse(http_mime_parser_s *parser, void *buffer,
|
|
size_t length) {
|
|
int first_run = 1;
|
|
char *pos = buffer;
|
|
const char *stop = pos + length;
|
|
if (!length)
|
|
goto end_of_data;
|
|
consume_partial:
|
|
if (parser->in_obj) {
|
|
/* we're in an object longer than the buffer */
|
|
char *start = pos;
|
|
char *end = start;
|
|
do {
|
|
end = memchr(end, '\n', (size_t)(stop - end));
|
|
} while (end && ++end &&
|
|
(size_t)(stop - end) >= (4 + parser->boundary_len) &&
|
|
(end[0] != '-' || end[1] != '-' ||
|
|
memcmp(end + 2, parser->boundary, parser->boundary_len)));
|
|
if (!end) {
|
|
end = (char *)stop;
|
|
pos = end;
|
|
if (end - start)
|
|
http_mime_parser_on_partial_data(parser, start, (size_t)(end - start));
|
|
goto end_of_data;
|
|
} else if (end + 4 + parser->boundary_len >= stop) {
|
|
end -= 2;
|
|
if (end[0] == '\r')
|
|
--end;
|
|
pos = end;
|
|
if (end - start)
|
|
http_mime_parser_on_partial_data(parser, start, (size_t)(end - start));
|
|
goto end_of_data;
|
|
}
|
|
size_t len = (end - start) - 1;
|
|
if (start[len - 1] == '\r')
|
|
--len;
|
|
if (len)
|
|
http_mime_parser_on_partial_data(parser, start, len);
|
|
http_mime_parser_on_partial_end(parser);
|
|
pos = end;
|
|
parser->in_obj = 0;
|
|
first_run = 0;
|
|
} else if (length < (4 + parser->boundary_len) || pos[0] != '-' ||
|
|
pos[1] != '-' ||
|
|
memcmp(pos + 2, parser->boundary, parser->boundary_len))
|
|
goto error;
|
|
/* We're at a boundary */
|
|
while (pos < stop) {
|
|
char *start;
|
|
char *end;
|
|
char *name = NULL;
|
|
uint32_t name_len = 0;
|
|
char *value = NULL;
|
|
uint32_t value_len = 0;
|
|
char *filename = NULL;
|
|
uint32_t filename_len = 0;
|
|
char *mime = NULL;
|
|
uint32_t mime_len = 0;
|
|
uint8_t header_count = 0;
|
|
/* test for ending */
|
|
if (pos[2 + parser->boundary_len] == '-' &&
|
|
pos[3 + parser->boundary_len] == '-') {
|
|
pos += 5 + parser->boundary_len;
|
|
if (pos > stop)
|
|
pos = (char *)stop;
|
|
else if (pos < stop && pos[0] == '\n')
|
|
++pos;
|
|
goto done;
|
|
}
|
|
start = pos + 3 + parser->boundary_len;
|
|
if (start[0] == '\n') {
|
|
/* should be true, unless new line marker was just '\n' */
|
|
++start;
|
|
}
|
|
/* consume headers */
|
|
while (start + 4 < stop && start[0] != '\n' && start[1] != '\n') {
|
|
end = memchr(start, '\n', (size_t)(stop - start));
|
|
if (!end) {
|
|
if (first_run)
|
|
goto error;
|
|
goto end_of_data;
|
|
}
|
|
if (end - start > 29 && !strncasecmp(start, "content-disposition:", 20)) {
|
|
/* content-disposition header */
|
|
start = memchr(start + 20, ';', end - (start + 20));
|
|
// if (!start)
|
|
// start = end + 1;
|
|
while (start) {
|
|
++start;
|
|
if (start[0] == ' ')
|
|
++start;
|
|
if (start + 6 < end && !strncasecmp(start, "name=", 5)) {
|
|
name = start + 5;
|
|
if (name[0] == '"')
|
|
++name;
|
|
start = memchr(name, ';', (size_t)(end - start));
|
|
if (!start) {
|
|
name_len = (size_t)(end - name);
|
|
if (name[name_len - 1] == '\r')
|
|
--name_len;
|
|
} else {
|
|
name_len = (size_t)(start - name);
|
|
}
|
|
if (name[name_len - 1] == '"')
|
|
--name_len;
|
|
} else if (start + 9 < end && !strncasecmp(start, "filename", 8)) {
|
|
uint8_t encoded = 0;
|
|
start += 8;
|
|
if (start[0] == '*') {
|
|
encoded = 1;
|
|
++start;
|
|
}
|
|
if (start[0] != '=')
|
|
goto error;
|
|
++start;
|
|
if (start[0] == ' ')
|
|
++start;
|
|
if (start[0] == '"')
|
|
++start;
|
|
if (filename && !encoded) {
|
|
/* prefer URL encoded version */
|
|
start = memchr(filename, ';', (size_t)(end - start));
|
|
continue;
|
|
}
|
|
filename = start;
|
|
start = memchr(filename, ';', (size_t)(end - start));
|
|
if (!start) {
|
|
filename_len = (size_t)((end - filename));
|
|
if (filename[filename_len - 1] == '\r') {
|
|
--filename_len;
|
|
}
|
|
} else {
|
|
filename_len = (size_t)(start - filename);
|
|
}
|
|
if (filename[filename_len - 1] == '"')
|
|
--filename_len;
|
|
if (encoded) {
|
|
ssize_t new_len =
|
|
http_mime_decode_url(filename, filename, filename_len);
|
|
if (new_len > 0)
|
|
filename_len = new_len;
|
|
}
|
|
} else {
|
|
start = memchr(start, ';', (size_t)(end - start));
|
|
}
|
|
}
|
|
} else if (end - start > 14 && !strncasecmp(start, "content-type:", 13)) {
|
|
/* content-type header */
|
|
start += 13;
|
|
if (start[0] == ' ')
|
|
++start;
|
|
mime = start;
|
|
start = memchr(start, ';', (size_t)(end - start));
|
|
if (!start) {
|
|
mime_len = (size_t)(end - mime);
|
|
if (mime[mime_len - 1] == '\r')
|
|
--mime_len;
|
|
} else {
|
|
mime_len = (size_t)(start - mime);
|
|
}
|
|
}
|
|
start = end + 1;
|
|
if (header_count++ > 4)
|
|
goto error;
|
|
}
|
|
if (!name) {
|
|
if (start + 4 >= stop)
|
|
goto end_of_data;
|
|
goto error;
|
|
}
|
|
|
|
/* advance to end of boundry */
|
|
++start;
|
|
if (start[0] == '\n')
|
|
++start;
|
|
value = start;
|
|
end = start;
|
|
do {
|
|
end = memchr(end, '\n', (size_t)(stop - end));
|
|
} while (end && ++end &&
|
|
(size_t)(stop - end) >= (4 + parser->boundary_len) &&
|
|
(end[0] != '-' || end[1] != '-' ||
|
|
memcmp(end + 2, parser->boundary, parser->boundary_len)));
|
|
if (!end || end + 4 + parser->boundary_len >= stop) {
|
|
if (first_run) {
|
|
http_mime_parser_on_partial_start(parser, name, name_len, filename,
|
|
filename_len, mime, mime_len);
|
|
parser->in_obj = 1;
|
|
pos = value;
|
|
goto consume_partial;
|
|
}
|
|
goto end_of_data;
|
|
}
|
|
value_len = (size_t)((end - value) - 1);
|
|
if (value[value_len - 1] == '\r')
|
|
--value_len;
|
|
pos = end;
|
|
http_mime_parser_on_data(parser, name, name_len, filename, filename_len,
|
|
mime, mime_len, value, value_len);
|
|
first_run = 0;
|
|
}
|
|
end_of_data:
|
|
return (size_t)((uintptr_t)pos - (uintptr_t)buffer);
|
|
done:
|
|
parser->done = 1;
|
|
parser->error = 0;
|
|
return (size_t)((uintptr_t)pos - (uintptr_t)buffer);
|
|
error:
|
|
parser->done = 0;
|
|
parser->error = 1;
|
|
return (size_t)((uintptr_t)pos - (uintptr_t)buffer);
|
|
}
|
|
#endif
|