From c95a5479596e95b7cdd88ca9d7b32fd3afb4379c Mon Sep 17 00:00:00 2001 From: Ryan Dahl Date: Mon, 16 Aug 2010 00:50:17 -0700 Subject: [PATCH] event stream --- http_parser.c | 556 ++++++++++++++++++++++++++++++++++++-------------- http_parser.h | 158 +++++++++++--- test.c | 95 +++++---- 3 files changed, 578 insertions(+), 231 deletions(-) diff --git a/http_parser.c b/http_parser.c index 5a0972a..c64dde0 100644 --- a/http_parser.c +++ b/http_parser.c @@ -28,41 +28,45 @@ #endif -#define CALLBACK2(FOR) \ -do { \ - if (settings->on_##FOR) { \ - if (0 != settings->on_##FOR(parser)) return (p - data); \ - } \ +#define MARK(FOR) (FOR##_mark = p) + +#define RECORD_MARK_NOCLEAR(FOR) \ +do { \ + if (FOR##_mark) { \ + assert(data_index + 1 < data_len); \ + data[data_index].payload.string.p = FOR##_mark; \ + data[data_index].payload.string.len = p - FOR##_mark; \ + data[data_index].type = HTTP_##FOR; \ + data_index++; \ + } \ } while (0) - -#define MARK(FOR) \ -do { \ - FOR##_mark = p; \ +#define RECORD_BODY(LEN) \ +do { \ + assert(data_index + 1 < data_len); \ + data[data_index].payload.string.p = p; \ + data[data_index].payload.string.len = (LEN); \ + data[data_index].type = HTTP_BODY; \ + data_index++; \ } while (0) -#define CALLBACK_NOCLEAR(FOR) \ -do { \ - if (FOR##_mark) { \ - if (settings->on_##FOR) { \ - if (0 != settings->on_##FOR(parser, \ - FOR##_mark, \ - p - FOR##_mark)) \ - { \ - return (p - data); \ - } \ - } \ - } \ +#define RECORD_MARK(FOR) \ +do { \ + RECORD_MARK_NOCLEAR(FOR); \ + FOR##_mark = NULL; \ } while (0) - -#define CALLBACK(FOR) \ -do { \ - CALLBACK_NOCLEAR(FOR); \ - FOR##_mark = NULL; \ +#define RECORD(FOR) \ +do { \ + assert(data_index + 1 < data_len); \ + data[data_index].payload.string.p = p+1; \ + data[data_index].payload.string.len = 0; \ + data[data_index].type = HTTP_##FOR; \ + data_index++; \ } while (0) + #define PROXY_CONNECTION "proxy-connection" #define CONNECTION "connection" #define CONTENT_LENGTH "content-length" @@ -245,6 +249,10 @@ enum state * states beyond this must be 'body' states. It is used for overflow * checking. See the PARSING_HEADER() macro. */ + + /* Fake state for responses which get punted out of http_parser_execute2 */ + , s_decide_body + , s_chunk_size_start , s_chunk_size , s_chunk_size_almost_done @@ -316,57 +324,118 @@ enum flags #endif -size_t http_parser_execute (http_parser *parser, - const http_parser_settings *settings, - const char *data, - size_t len) +static inline +int body_logic (http_parser *parser, + const char *p, + http_parser_data data[], + int data_len, + int *data_index_) +{ + int state; + int data_index = *data_index_; + + if (parser->flags & F_SKIPBODY) { + RECORD(MESSAGE_END); + state = NEW_MESSAGE(); + } else if (parser->flags & F_CHUNKED) { + /* chunked encoding - ignore Content-Length header */ + state = s_chunk_size_start; + } else { + if (parser->content_length == 0) { + /* Content-Length header given but zero: Content-Length: 0\r\n */ + RECORD(MESSAGE_END); + state = NEW_MESSAGE(); + } else if (parser->content_length > 0) { + /* Content-Length header given and non-zero */ + state = s_body_identity; + } else { + if (parser->type == HTTP_REQUEST || http_should_keep_alive(parser)) { + /* Assume content-length 0 - read the next */ + RECORD(MESSAGE_END); + state = NEW_MESSAGE(); + } else { + /* Read body until EOF */ + state = s_body_identity_eof; + } + } + } + + *data_index_ = data_index; + return state; +} + + +int http_parser_execute2(http_parser *parser, + const char *buf, + size_t buf_len, + http_parser_data data[], + int data_len) { char c, ch; - const char *p = data, *pe; + const char *p = buf, *pe; int64_t to_read; + int data_index = 0; enum state state = (enum state) parser->state; enum header_states header_state = (enum header_states) parser->header_state; uint64_t index = parser->index; uint64_t nread = parser->nread; - if (len == 0) { + if (data_len < 1) { + assert(0 && "Must supply at least a few http_parser_data objects"); + return 0; + } + + if (state == s_decide_body) { + assert(parser->type == HTTP_RESPONSE); + state = body_logic(parser, p, data, data_len, &data_index); + } else if (buf_len == 0) { if (state == s_body_identity_eof) { - CALLBACK2(message_complete); + RECORD(MESSAGE_END); } - return 0; + goto exit; } - /* technically we could combine all of these (except for url_mark) into one + + /* technically we could combine all of these (except for URL_mark) into one variable, saving stack space, but it seems more clear to have them separated. */ - const char *header_field_mark = 0; - const char *header_value_mark = 0; - const char *fragment_mark = 0; - const char *query_string_mark = 0; - const char *path_mark = 0; - const char *url_mark = 0; + const char *HEADER_FIELD_mark = 0; + const char *HEADER_VALUE_mark = 0; + const char *FRAGMENT_mark = 0; + const char *QUERY_STRING_mark = 0; + const char *PATH_mark = 0; + const char *URL_mark = 0; if (state == s_header_field) - header_field_mark = data; + HEADER_FIELD_mark = buf; if (state == s_header_value) - header_value_mark = data; + HEADER_VALUE_mark = buf; if (state == s_req_fragment) - fragment_mark = data; + FRAGMENT_mark = buf; if (state == s_req_query_string) - query_string_mark = data; + QUERY_STRING_mark = buf; if (state == s_req_path) - path_mark = data; + PATH_mark = buf; if (state == s_req_path || state == s_req_schema || state == s_req_schema_slash || state == s_req_schema_slash_slash || state == s_req_port || state == s_req_query_string_start || state == s_req_query_string || state == s_req_host || state == s_req_fragment_start || state == s_req_fragment) - url_mark = data; + URL_mark = buf; - for (p=data, pe=data+len; p != pe; p++) { + for (p = buf, pe = buf + buf_len; p != pe; p++) { ch = *p; + // We always need at least enough space to put two elements on the data. + if (data_index + 2 >= data_len) { + data[data_index].payload.string.p = p; + data[data_index].payload.string.len = 0; + data[data_index].type = HTTP_NEEDS_DATA_ELEMENTS; + data_index++; + goto exit; + } + if (PARSING_HEADER(state)) { ++nread; /* Buffer overflow attack */ @@ -388,8 +457,6 @@ size_t http_parser_execute (http_parser *parser, parser->flags = 0; parser->content_length = -1; - CALLBACK2(message_begin); - if (ch == 'H') state = s_res_or_resp_H; else { @@ -417,8 +484,6 @@ size_t http_parser_execute (http_parser *parser, parser->flags = 0; parser->content_length = -1; - CALLBACK2(message_begin); - switch (ch) { case 'H': state = s_res_H; @@ -517,6 +582,22 @@ size_t http_parser_execute (http_parser *parser, case s_res_status_code: { if (ch < '0' || ch > '9') { + + /* assert the last data element, if any, was not an HTTP_VERSION. */ + assert(data_index == 0 || data[data_index-1].type != HTTP_VERSION); + + /* START OF RESPONSE */ + assert(data_index + 2 < data_len); + data[data_index].type = HTTP_RES_MESSAGE_START; + data[data_index].payload.status_code = parser->status_code; + data_index++; + + data[data_index].type = HTTP_VERSION; + data[data_index].payload.version.major = parser->http_major; + data[data_index].payload.version.minor = parser->http_minor; + data_index++; + + switch (ch) { case ' ': state = s_res_status; @@ -566,8 +647,6 @@ size_t http_parser_execute (http_parser *parser, parser->flags = 0; parser->content_length = -1; - CALLBACK2(message_begin); - if (ch < 'A' || 'Z' < ch) goto error; start_req_method_assign: @@ -601,6 +680,13 @@ size_t http_parser_execute (http_parser *parser, const char *matcher = method_strings[parser->method]; if (ch == ' ' && matcher[index] == '\0') { state = s_req_spaces_before_url; + + /* START OF REQUEST */ + assert(data_index + 1 < data_len); + data[data_index].type = HTTP_REQ_MESSAGE_START; + data[data_index].payload.method = parser->method; + data_index++; + } else if (ch == matcher[index]) { ; /* nada */ } else if (parser->method == HTTP_CONNECT) { @@ -639,8 +725,8 @@ size_t http_parser_execute (http_parser *parser, if (ch == ' ') break; if (ch == '/' || ch == '*') { - MARK(url); - MARK(path); + MARK(URL); + MARK(PATH); state = s_req_path; break; } @@ -648,7 +734,7 @@ size_t http_parser_execute (http_parser *parser, c = LOWER(ch); if (c >= 'a' && c <= 'z') { - MARK(url); + MARK(URL); state = s_req_schema; break; } @@ -696,7 +782,7 @@ size_t http_parser_execute (http_parser *parser, state = s_req_port; break; case '/': - MARK(path); + MARK(PATH); state = s_req_path; break; case ' ': @@ -704,7 +790,7 @@ size_t http_parser_execute (http_parser *parser, * "GET http://foo.bar.com HTTP/1.1" * That is, there is no path. */ - CALLBACK(url); + RECORD_MARK(URL); state = s_req_http_start; break; default: @@ -718,7 +804,7 @@ size_t http_parser_execute (http_parser *parser, if (ch >= '0' && ch <= '9') break; switch (ch) { case '/': - MARK(path); + MARK(PATH); state = s_req_path; break; case ' ': @@ -726,7 +812,7 @@ size_t http_parser_execute (http_parser *parser, * "GET http://foo.bar.com:1234 HTTP/1.1" * That is, there is no path. */ - CALLBACK(url); + RECORD_MARK(URL); state = s_req_http_start; break; default: @@ -741,30 +827,30 @@ size_t http_parser_execute (http_parser *parser, switch (ch) { case ' ': - CALLBACK(url); - CALLBACK(path); + RECORD_MARK(URL); + RECORD_MARK(PATH); state = s_req_http_start; break; case CR: - CALLBACK(url); - CALLBACK(path); + RECORD_MARK(URL); + RECORD_MARK(PATH); parser->http_major = 0; parser->http_minor = 9; state = s_req_line_almost_done; break; case LF: - CALLBACK(url); - CALLBACK(path); + RECORD_MARK(URL); + RECORD_MARK(PATH); parser->http_major = 0; parser->http_minor = 9; state = s_header_field_start; break; case '?': - CALLBACK(path); + RECORD_MARK(PATH); state = s_req_query_string_start; break; case '#': - CALLBACK(path); + RECORD_MARK(PATH); state = s_req_fragment_start; break; default: @@ -776,7 +862,7 @@ size_t http_parser_execute (http_parser *parser, case s_req_query_string_start: { if (normal_url_char[(unsigned char)ch]) { - MARK(query_string); + MARK(QUERY_STRING); state = s_req_query_string; break; } @@ -785,17 +871,17 @@ size_t http_parser_execute (http_parser *parser, case '?': break; /* XXX ignore extra '?' ... is this right? */ case ' ': - CALLBACK(url); + RECORD_MARK(URL); state = s_req_http_start; break; case CR: - CALLBACK(url); + RECORD_MARK(URL); parser->http_major = 0; parser->http_minor = 9; state = s_req_line_almost_done; break; case LF: - CALLBACK(url); + RECORD_MARK(URL); parser->http_major = 0; parser->http_minor = 9; state = s_header_field_start; @@ -818,26 +904,26 @@ size_t http_parser_execute (http_parser *parser, /* allow extra '?' in query string */ break; case ' ': - CALLBACK(url); - CALLBACK(query_string); + RECORD_MARK(URL); + RECORD_MARK(QUERY_STRING); state = s_req_http_start; break; case CR: - CALLBACK(url); - CALLBACK(query_string); + RECORD_MARK(URL); + RECORD_MARK(QUERY_STRING); parser->http_major = 0; parser->http_minor = 9; state = s_req_line_almost_done; break; case LF: - CALLBACK(url); - CALLBACK(query_string); + RECORD_MARK(URL); + RECORD_MARK(QUERY_STRING); parser->http_major = 0; parser->http_minor = 9; state = s_header_field_start; break; case '#': - CALLBACK(query_string); + RECORD_MARK(QUERY_STRING); state = s_req_fragment_start; break; default: @@ -849,30 +935,30 @@ size_t http_parser_execute (http_parser *parser, case s_req_fragment_start: { if (normal_url_char[(unsigned char)ch]) { - MARK(fragment); + MARK(FRAGMENT); state = s_req_fragment; break; } switch (ch) { case ' ': - CALLBACK(url); + RECORD_MARK(URL); state = s_req_http_start; break; case CR: - CALLBACK(url); + RECORD_MARK(URL); parser->http_major = 0; parser->http_minor = 9; state = s_req_line_almost_done; break; case LF: - CALLBACK(url); + RECORD_MARK(URL); parser->http_major = 0; parser->http_minor = 9; state = s_header_field_start; break; case '?': - MARK(fragment); + RECORD_MARK(FRAGMENT); state = s_req_fragment; break; case '#': @@ -889,20 +975,20 @@ size_t http_parser_execute (http_parser *parser, switch (ch) { case ' ': - CALLBACK(url); - CALLBACK(fragment); + RECORD_MARK(URL); + RECORD_MARK(FRAGMENT); state = s_req_http_start; break; case CR: - CALLBACK(url); - CALLBACK(fragment); + RECORD_MARK(URL); + RECORD_MARK(FRAGMENT); parser->http_major = 0; parser->http_minor = 9; state = s_req_line_almost_done; break; case LF: - CALLBACK(url); - CALLBACK(fragment); + RECORD_MARK(URL); + RECORD_MARK(FRAGMENT); parser->http_major = 0; parser->http_minor = 9; state = s_header_field_start; @@ -982,6 +1068,19 @@ size_t http_parser_execute (http_parser *parser, /* minor HTTP version or end of request line */ case s_req_http_minor: { + if (data_index && data[data_index-1].type == HTTP_VERSION) { + /* only in the case of a second digit to http_minor */ + assert(0); // mostly should happen. REMOVEME + assert(parser->http_minor > 10); + data[data_index-1].payload.version.minor = parser->http_minor; + } else { + assert(data_index + 1 < data_len); + data[data_index].type = HTTP_VERSION; + data[data_index].payload.version.major = parser->http_major; + data[data_index].payload.version.minor = parser->http_minor; + data_index++; + } + if (ch == CR) { state = s_req_line_almost_done; break; @@ -1029,7 +1128,7 @@ size_t http_parser_execute (http_parser *parser, if (!c) goto error; - MARK(header_field); + MARK(HEADER_FIELD); index = 0; state = s_header_field; @@ -1167,19 +1266,19 @@ size_t http_parser_execute (http_parser *parser, } if (ch == ':') { - CALLBACK(header_field); + RECORD_MARK(HEADER_FIELD); state = s_header_value_start; break; } if (ch == CR) { state = s_header_almost_done; - CALLBACK(header_field); + RECORD_MARK(HEADER_FIELD); break; } if (ch == LF) { - CALLBACK(header_field); + RECORD_MARK(HEADER_FIELD); state = s_header_field_start; break; } @@ -1191,7 +1290,7 @@ size_t http_parser_execute (http_parser *parser, { if (ch == ' ') break; - MARK(header_value); + MARK(HEADER_VALUE); state = s_header_value; index = 0; @@ -1199,14 +1298,14 @@ size_t http_parser_execute (http_parser *parser, c = LOWER(ch); if (ch == CR) { - CALLBACK(header_value); + RECORD_MARK(HEADER_VALUE); header_state = h_general; state = s_header_almost_done; break; } if (ch == LF) { - CALLBACK(header_value); + RECORD_MARK(HEADER_VALUE); state = s_header_field_start; break; } @@ -1255,13 +1354,13 @@ size_t http_parser_execute (http_parser *parser, c = LOWER(ch); if (ch == CR) { - CALLBACK(header_value); + RECORD_MARK(HEADER_VALUE); state = s_header_almost_done; break; } if (ch == LF) { - CALLBACK(header_value); + RECORD_MARK(HEADER_VALUE); goto header_almost_done; } @@ -1357,7 +1456,7 @@ size_t http_parser_execute (http_parser *parser, if (parser->flags & F_TRAILING) { /* End of a chunked request */ - CALLBACK2(message_complete); + RECORD(MESSAGE_END); state = NEW_MESSAGE(); break; } @@ -1368,69 +1467,55 @@ size_t http_parser_execute (http_parser *parser, parser->upgrade = 1; } - /* Here we call the headers_complete callback. This is somewhat - * different than other callbacks because if the user returns 1, we - * will interpret that as saying that this message has no body. This - * is needed for the annoying case of recieving a response to a HEAD - * request. - */ - if (settings->on_headers_complete) { - switch (settings->on_headers_complete(parser)) { - case 0: - break; - case 1: - parser->flags |= F_SKIPBODY; - break; + // RECORD HEADERS_END + assert(data_index + 1 < data_len); + data[data_index].type = HTTP_HEADERS_END; + data[data_index].payload.flags = parser->flags; + data_index++; - default: - return p - data; /* Error */ - } - } /* Exit, the rest of the connect is in a different protocol. */ if (parser->upgrade) { - CALLBACK2(message_complete); - return (p - data); + RECORD(MESSAGE_END); + goto exit; } - if (parser->flags & F_SKIPBODY) { - CALLBACK2(message_complete); - state = NEW_MESSAGE(); - } else if (parser->flags & F_CHUNKED) { - /* chunked encoding - ignore Content-Length header */ - state = s_chunk_size_start; + state = s_decide_body; + + if (parser->type == HTTP_RESPONSE) { + /* RESPONSE PARSING: We need to exit the function and get + * information before knowing how to proceed. This could be a + * response to a HEAD request. We'll do body_logic() next time we + * enter http_parser_execute2(). + */ + RECORD(NEEDS_INPUT); + goto exit; + } else { - if (parser->content_length == 0) { - /* Content-Length header given but zero: Content-Length: 0\r\n */ - CALLBACK2(message_complete); - state = NEW_MESSAGE(); - } else if (parser->content_length > 0) { - /* Content-Length header given and non-zero */ - state = s_body_identity; - } else { - if (parser->type == HTTP_REQUEST || http_should_keep_alive(parser)) { - /* Assume content-length 0 - read the next */ - CALLBACK2(message_complete); - state = NEW_MESSAGE(); - } else { - /* Read body until EOF */ - state = s_body_identity_eof; - } - } + /* REQUEST PARSING: Just make a decision about how to proceed on + * the body... + */ + state = body_logic(parser, p, data, data_len, &data_index); } break; } + case s_decide_body: + assert(0 && "Should not reach this state"); + break; + case s_body_identity: to_read = MIN(pe - p, (int64_t)parser->content_length); if (to_read > 0) { - if (settings->on_body) settings->on_body(parser, p, to_read); + RECORD_BODY(to_read); p += to_read - 1; + parser->content_length -= to_read; + if (parser->content_length == 0) { - CALLBACK2(message_complete); + RECORD(MESSAGE_END); state = NEW_MESSAGE(); } } @@ -1440,7 +1525,7 @@ size_t http_parser_execute (http_parser *parser, case s_body_identity_eof: to_read = pe - p; if (to_read > 0) { - if (settings->on_body) settings->on_body(parser, p, to_read); + RECORD_BODY(to_read); p += to_read - 1; } break; @@ -1512,7 +1597,7 @@ size_t http_parser_execute (http_parser *parser, to_read = MIN(pe - p, (int64_t)(parser->content_length)); if (to_read > 0) { - if (settings->on_body) settings->on_body(parser, p, to_read); + RECORD_BODY(to_read); p += to_read - 1; } @@ -1542,23 +1627,182 @@ size_t http_parser_execute (http_parser *parser, } } - CALLBACK_NOCLEAR(header_field); - CALLBACK_NOCLEAR(header_value); - CALLBACK_NOCLEAR(fragment); - CALLBACK_NOCLEAR(query_string); - CALLBACK_NOCLEAR(path); - CALLBACK_NOCLEAR(url); + RECORD_MARK_NOCLEAR(HEADER_FIELD); + RECORD_MARK_NOCLEAR(HEADER_VALUE); + RECORD_MARK_NOCLEAR(FRAGMENT); + RECORD_MARK_NOCLEAR(QUERY_STRING); + RECORD_MARK_NOCLEAR(PATH); + RECORD_MARK_NOCLEAR(URL); +exit: parser->state = state; parser->header_state = header_state; parser->index = index; parser->nread = nread; - - return len; + return data_index; error: + RECORD(PARSER_ERROR); parser->state = s_dead; - return (p - data); + return data_index; +} + + + +#define CALLBACK(FOR) \ +do { \ + if (settings->on_##FOR) { \ + if (0 != settings->on_##FOR(&fake_parser, \ + data[i].payload.string.p, \ + data[i].payload.string.len)) { \ + return (data[i].payload.string.p - buf); \ + } \ + } \ +} while (0) + + +#define CALLBACK2(FOR) \ +do { \ + if (settings->on_##FOR) { \ + if (0 != settings->on_##FOR(&fake_parser)) \ + return (data[i].payload.string.p - buf); \ + } \ +} while (0) + + +size_t http_parser_execute (http_parser *parser, + const http_parser_settings *settings, + const char *buf, + size_t buf_len) +{ +# define DATA_SIZE 50 + http_parser_data data[DATA_SIZE]; + int i, ndata; + size_t read = 0; + + /* Fake the parser object being passed to the callbacks */ + http_parser fake_parser; + fake_parser.data = parser->data; + fake_parser.type = parser->type; + + while (1) { + ndata = http_parser_execute2(parser, + buf + read, + buf_len - read, + data, + DATA_SIZE); + + for (i = 0; i < ndata; i++) { + switch (data[i].type) { + case HTTP_PARSER_ERROR: + // HTTP_PARSER_ERROR should always be the last element of 'data' + assert(ndata - 1 == i); + return data[i].payload.string.p - buf; + + case HTTP_NEEDS_INPUT: + // HTTP_NEEDS_INPUT should always be the last element of 'data' + assert(ndata - 1 == i); + // Ignore.. we handled this in HTTP_HEADERS_END + break; + + case HTTP_NEEDS_DATA_ELEMENTS: + // HTTP_NEEDS_DATA_ELEMENTS is the last element of 'data' + assert(ndata - 1 == i); + // Go around the while loop again. + break; + + case HTTP_REQ_MESSAGE_START: + fake_parser.http_major = 0; + fake_parser.http_minor = 9; + fake_parser.method = data[i].payload.method; + CALLBACK2(message_begin); + break; + + case HTTP_RES_MESSAGE_START: + fake_parser.status_code = data[i].payload.status_code; + CALLBACK2(message_begin); + break; + + case HTTP_VERSION: + fake_parser.http_major = data[i].payload.version.major; + fake_parser.http_minor = data[i].payload.version.minor; + break; + + case HTTP_PATH: + CALLBACK(path); + break; + + case HTTP_QUERY_STRING: + CALLBACK(query_string); + break; + + case HTTP_URL: + CALLBACK(url); + break; + + case HTTP_FRAGMENT: + CALLBACK(fragment); + break; + + case HTTP_HEADER_FIELD: + CALLBACK(header_field); + break; + + case HTTP_HEADER_VALUE: + CALLBACK(header_value); + break; + + case HTTP_HEADERS_END: + fake_parser.flags = data[i].payload.flags; + if (settings->on_headers_complete) { + switch (settings->on_headers_complete(&fake_parser)) { + case 0: + http_parser_has_body(&fake_parser, 1); + break; + + case 1: + http_parser_has_body(&fake_parser, 0); + break; + + default: + return data[i].payload.string.p - buf; /* Error */ + } + } + break; + + case HTTP_BODY: + CALLBACK(body); + break; + + case HTTP_MESSAGE_END: + CALLBACK2(message_complete); + break; + } + } + + /* If the last data element is NEEDS_INPUT or NEEDS_DATA_ELEMENTS + * Go round the loop again. (Note to self: This API isn't very nice...) + */ + if (ndata > 0 && (data[ndata - 1].type == HTTP_NEEDS_INPUT || + data[ndata - 1].type == HTTP_NEEDS_DATA_ELEMENTS)) { + /* We've parsed only as far as the data point */ + read += data[ndata - 1].payload.string.p - (buf+read); + + } else { + /* We've parsed the whole thing that was passed in. */ + read += buf_len - read; + if (read >= buf_len) break; + } + } + return read; +} + + +void http_parser_has_body (http_parser *parser, int has) +{ + if (!has) { + parser->flags |= F_SKIPBODY; + } } diff --git a/http_parser.h b/http_parser.h index c03ec05..c8bfb89 100644 --- a/http_parser.h +++ b/http_parser.h @@ -57,24 +57,6 @@ typedef int ssize_t; typedef struct http_parser http_parser; -typedef struct http_parser_settings http_parser_settings; - - -/* Callbacks should return non-zero to indicate an error. The parser will - * then halt execution. - * - * The one exception is on_headers_complete. In a HTTP_RESPONSE parser - * returning '1' from on_headers_complete will tell the parser that it - * should not expect a body. This is used when receiving a response to a - * HEAD request which may contain 'Content-Length' or 'Transfer-Encoding: - * chunked' headers that indicate the presence of a body. - * - * http_data_cb does not return data chunks. It will be call arbitrarally - * many times for each string. E.G. you might get 10 callbacks for "on_path" - * each providing just a few characters more data. - */ -typedef int (*http_data_cb) (http_parser*, const char *at, size_t length); -typedef int (*http_cb) (http_parser*); /* Request Methods */ @@ -141,6 +123,45 @@ struct http_parser { }; +void http_parser_init(http_parser *parser, enum http_parser_type type); + + +/* If http_should_keep_alive() in the on_headers_complete or + * on_message_complete callback returns true, then this will be should be + * the last message on the connection. + * If you are the server, respond with the "Connection: close" header. + * If you are the client, close the connection. + */ +int http_should_keep_alive(http_parser *parser); + + +/* Returns a string version of the HTTP method. */ +const char *http_method_str(enum http_method); + + + + +/********* Parser Interface 1 *********/ +/* For those who like callbacks */ + + +/* Callbacks should return non-zero to indicate an error. The parser will + * then halt execution. + * + * The one exception is on_headers_complete. In a HTTP_RESPONSE parser + * returning '1' from on_headers_complete will tell the parser that it + * should not expect a body. This is used when receiving a response to a + * HEAD request which may contain 'Content-Length' or 'Transfer-Encoding: + * chunked' headers that indicate the presence of a body. + * + * http_data_cb does not return data chunks. It will be call arbitrarally + * many times for each string. E.G. you might get 10 callbacks for "on_path" + * each providing just a few characters more data. + */ +typedef int (*http_data_cb) (http_parser*, const char *at, size_t length); +typedef int (*http_cb) (http_parser*); +typedef struct http_parser_settings http_parser_settings; + struct http_parser_settings { http_cb on_message_begin; http_data_cb on_path; @@ -154,26 +175,101 @@ struct http_parser_settings { http_cb on_message_complete; }; +size_t http_parser_execute(http_parser *parser, + const http_parser_settings *settings, + const char *buf, + size_t buf_len); -void http_parser_init(http_parser *parser, enum http_parser_type type); -size_t http_parser_execute(http_parser *parser, - const http_parser_settings *settings, - const char *data, - size_t len); +/********** Parser Interface 2 **********/ +/** For those who don't like callbacks **/ +/****************************************/ -/* If http_should_keep_alive() in the on_headers_complete or - * on_message_complete callback returns true, then this will be should be - * the last message on the connection. - * If you are the server, respond with the "Connection: close" header. - * If you are the client, close the connection. + +typedef struct http_parser_data http_parser_data; + +struct http_parser_data { + enum { HTTP_PARSER_ERROR = 0 + , HTTP_NEEDS_INPUT + , HTTP_NEEDS_DATA_ELEMENTS + , HTTP_REQ_MESSAGE_START /* payload.method */ + , HTTP_RES_MESSAGE_START /* payload.status */ + , HTTP_VERSION /* payload.version */ + , HTTP_PATH /* payload.string */ + , HTTP_QUERY_STRING /* payload.string */ + , HTTP_URL /* payload.string */ + , HTTP_FRAGMENT /* payload.string */ + , HTTP_HEADER_FIELD /* payload.string */ + , HTTP_HEADER_VALUE /* payload.string */ + , HTTP_HEADERS_END /* payload.flags */ + , HTTP_BODY /* payload.string */ + , HTTP_MESSAGE_END /* payload.string */ + } type; + + union { + struct { + const char *p; + size_t len; + } string; + + /* For HTTP_RES_MESSAGE_START */ + unsigned short status_code; + + /* For HTTP_REQ_MESSAGE_START */ + unsigned char method; + + /* For HTTP_VERSION */ + struct { + unsigned short major; + unsigned short minor; + } version; + + /* For HTTP_HEADERS_END */ + unsigned char flags : 6; + + } payload; + +}; + +/* Returns the number of elements filled into `data`. + * + * Normally `http_parser_execute2` will parse the entire `buf` and fill + * `data` with elements. Under several conditions `http_parser_execute2` may + * drop out early. + * + * 1. A parse error was encountered. The last element of data will be + * HTTP_PARSER_ERROR. The parser cannot continue further. The + * connection to the peer should be severed. + * + * 2. The parser still to parser more of `buf` but it has run out of + * space in the user-supplied `http_parser_data` array. The last + * element of `data` will be HTTP_NEEDS_DATA_ELEMENTS. Restart + * http_parser_execute2() with a fresh array of elements starting at + * the place that HTTP_NEEDS_DATA_ELEMENTS pointed to. + * + * 3. The parser cannot continue until http_parser_has_body(parser, 1) + * or http_parser_has_body(parser, 0) is called. This is required for + * all HTTP responses. For the parser it is unclear from the headers if + * a response message has a body or not. For example, if the message is + * a response to a HEAD request, then it MUST NOT have a body but + * nevertheless may contain "Content-Length" or + * "Tranfer-Encoding: chunked" headers (which normally indicate the + * presence of a body to the parser). + * + * The last element of `data` will be HTTP_NEEDS_INPUT. The user must + * call http_parser_has_body() and then restart http_parser_execute2 + * with a fresh array of `data` elements and starting at the place + * HTTP_NEEDS_INPUT pointed to. */ -int http_should_keep_alive(http_parser *parser); +int http_parser_execute2(http_parser* parser, + const char* buf, + size_t buf_len, + http_parser_data data[], + int data_len); -/* Returns a string version of the HTTP method. */ -const char *http_method_str(enum http_method); +void http_parser_has_body(http_parser* parser, int); #ifdef __cplusplus } diff --git a/test.c b/test.c index e5699aa..6f6c6b2 100644 --- a/test.c +++ b/test.c @@ -32,6 +32,12 @@ #define FALSE 0 #define MAX_HEADERS 13 + +#define EXIT() do { \ + assert(0); \ + exit(1); \ +} while (0) + #define MAX_ELEMENT_SIZE 500 #define MIN(a,b) ((a) < (b) ? (a) : (b)) @@ -794,7 +800,8 @@ const struct message responses[] = "Server: DCLK-AdSvr\r\n" "Content-Type: text/xml\r\n" "Content-Length: 0\r\n" - "DCLK_imp: v7;x;114750856;0-0;0;17820020;0/0;21603567/21621457/1;;~okv=;dcmt=text/xml;;~cs=o\r\n\r\n" + "DCLK_imp: v7;x;114750856;0-0;0;17820020;0/0;21603567/21621457/1;;~okv=;dcmt=text/xml;;~cs=o\r\n" + "\r\n" ,.should_keep_alive= TRUE ,.message_complete_on_eof= FALSE ,.http_major= 1 @@ -950,7 +957,7 @@ const struct message responses[] = int request_path_cb (http_parser *p, const char *buf, size_t len) { - assert(p == parser); + assert(p->data == parser->data); strncat(messages[num_messages].request_path, buf, len); return 0; } @@ -958,7 +965,7 @@ request_path_cb (http_parser *p, const char *buf, size_t len) int request_url_cb (http_parser *p, const char *buf, size_t len) { - assert(p == parser); + assert(p->data == parser->data); strncat(messages[num_messages].request_url, buf, len); return 0; } @@ -966,7 +973,7 @@ request_url_cb (http_parser *p, const char *buf, size_t len) int query_string_cb (http_parser *p, const char *buf, size_t len) { - assert(p == parser); + assert(p->data == parser->data); strncat(messages[num_messages].query_string, buf, len); return 0; } @@ -974,7 +981,7 @@ query_string_cb (http_parser *p, const char *buf, size_t len) int fragment_cb (http_parser *p, const char *buf, size_t len) { - assert(p == parser); + assert(p->data == parser->data); strncat(messages[num_messages].fragment, buf, len); return 0; } @@ -982,7 +989,7 @@ fragment_cb (http_parser *p, const char *buf, size_t len) int header_field_cb (http_parser *p, const char *buf, size_t len) { - assert(p == parser); + assert(p->data == parser->data); struct message *m = &messages[num_messages]; if (m->last_header_element != FIELD) @@ -998,7 +1005,7 @@ header_field_cb (http_parser *p, const char *buf, size_t len) int header_value_cb (http_parser *p, const char *buf, size_t len) { - assert(p == parser); + assert(p->data == parser->data); struct message *m = &messages[num_messages]; strncat(m->headers[m->num_headers-1][1], buf, len); @@ -1011,7 +1018,7 @@ header_value_cb (http_parser *p, const char *buf, size_t len) int body_cb (http_parser *p, const char *buf, size_t len) { - assert(p == parser); + assert(p->data == parser->data); strncat(messages[num_messages].body, buf, len); messages[num_messages].body_size += len; // printf("body_cb: '%s'\n", requests[num_messages].body); @@ -1021,7 +1028,7 @@ body_cb (http_parser *p, const char *buf, size_t len) int count_body_cb (http_parser *p, const char *buf, size_t len) { - assert(p == parser); + assert(p->data == parser->data); assert(buf); messages[num_messages].body_size += len; return 0; @@ -1030,7 +1037,7 @@ count_body_cb (http_parser *p, const char *buf, size_t len) int message_begin_cb (http_parser *p) { - assert(p == parser); + assert(p->data == parser->data); messages[num_messages].message_begin_cb_called = TRUE; return 0; } @@ -1038,27 +1045,27 @@ message_begin_cb (http_parser *p) int headers_complete_cb (http_parser *p) { - assert(p == parser); - messages[num_messages].method = parser->method; - messages[num_messages].status_code = parser->status_code; - messages[num_messages].http_major = parser->http_major; - messages[num_messages].http_minor = parser->http_minor; + assert(p->data == parser->data); + messages[num_messages].method = p->method; + messages[num_messages].status_code = p->status_code; + messages[num_messages].http_major = p->http_major; + messages[num_messages].http_minor = p->http_minor; messages[num_messages].headers_complete_cb_called = TRUE; - messages[num_messages].should_keep_alive = http_should_keep_alive(parser); + messages[num_messages].should_keep_alive = http_should_keep_alive(p); return 0; } int message_complete_cb (http_parser *p) { - assert(p == parser); - if (messages[num_messages].should_keep_alive != http_should_keep_alive(parser)) + assert(p->data == parser->data); + /* http_should_keep_alive() doesn't work with event_stream yet */ + if (messages[num_messages].should_keep_alive != http_should_keep_alive(p)) { fprintf(stderr, "\n\n *** Error http_should_keep_alive() should have same " "value in both on_message_complete and on_headers_complete " "but it doesn't! ***\n\n"); - assert(0); - exit(1); + EXIT(); } messages[num_messages].message_complete_cb_called = TRUE; @@ -1289,7 +1296,7 @@ test_message (const struct message *message) if (read != msg1len) { print_error(msg1, read); - exit(1); + EXIT(); } } @@ -1300,7 +1307,7 @@ test_message (const struct message *message) if (read != msg2len) { print_error(msg2, read); - exit(1); + EXIT(); } read = parse(NULL, 0); @@ -1309,17 +1316,17 @@ test_message (const struct message *message) if (read != 0) { print_error(message->raw, read); - exit(1); + EXIT(); } test: if (num_messages != 1) { printf("\n*** num_messages != 1 after testing '%s' ***\n\n", message->name); - exit(1); + EXIT(); } - if(!message_eq(0, message)) exit(1); + if(!message_eq(0, message)) EXIT(); parser_free(); } @@ -1340,7 +1347,7 @@ test_message_count_body (const struct message *message) read = parse_count_body(message->raw + i, toread); if (read != toread) { print_error(message->raw, read); - exit(1); + EXIT(); } } @@ -1348,15 +1355,15 @@ test_message_count_body (const struct message *message) read = parse_count_body(NULL, 0); if (read != 0) { print_error(message->raw, read); - exit(1); + EXIT(); } if (num_messages != 1) { printf("\n*** num_messages != 1 after testing '%s' ***\n\n", message->name); - exit(1); + EXIT(); } - if(!message_eq(0, message)) exit(1); + if(!message_eq(0, message)) EXIT(); parser_free(); } @@ -1377,7 +1384,7 @@ test_simple (const char *buf, int should_pass) if (pass != should_pass) { fprintf(stderr, "\n*** test_simple expected %s ***\n\n%s", should_pass ? "success" : "error", buf); - exit(1); + EXIT(); } } @@ -1402,7 +1409,7 @@ test_header_overflow_error (int req) } fprintf(stderr, "\n*** Error expected but none in header overflow test ***\n"); - exit(1); + EXIT(); } void @@ -1412,7 +1419,7 @@ test_no_overflow_long_body (int req, size_t length) http_parser_init(&parser, req ? HTTP_REQUEST : HTTP_RESPONSE); size_t parsed; size_t i; - char buf1[3000]; + char buf1[300]; size_t buf1len = sprintf(buf1, "%s\r\nConnection: Keep-Alive\r\nContent-Length: %zu\r\n\r\n", req ? "POST / HTTP/1.0" : "HTTP/1.0 200 OK", length); parsed = http_parser_execute(&parser, &settings_null, buf1, buf1len); @@ -1435,7 +1442,7 @@ test_no_overflow_long_body (int req, size_t length) "\n*** error in test_no_overflow_long_body %s of length %zu ***\n", req ? "REQUEST" : "RESPONSE", length); - exit(1); + EXIT(); } void @@ -1461,15 +1468,17 @@ test_multiple3 (const struct message *r1, const struct message *r2, const struct parser_init(r1->type); + size_t total_len = strlen(total); + size_t read; - read = parse(total, strlen(total)); + read = parse(total, total_len); if (has_upgrade && parser->upgrade) goto test; - if (read != strlen(total)) { + if (read != total_len) { print_error(total, read); - exit(1); + EXIT(); } read = parse(NULL, 0); @@ -1478,21 +1487,21 @@ test_multiple3 (const struct message *r1, const struct message *r2, const struct if (read != 0) { print_error(total, read); - exit(1); + EXIT(); } test: if (message_count != num_messages) { fprintf(stderr, "\n\n*** Parser didn't see 3 messages only %d *** \n", num_messages); - exit(1); + EXIT(); } - if (!message_eq(0, r1)) exit(1); + if (!message_eq(0, r1)) EXIT(); if (message_count > 1) { - if (!message_eq(1, r2)) exit(1); + if (!message_eq(1, r2)) EXIT(); if (message_count > 2) { - if (!message_eq(2, r3)) exit(1); + if (!message_eq(2, r3)) EXIT(); } } @@ -1612,7 +1621,7 @@ test: fprintf(stderr, "buf1 (%u) %s\n\n", (unsigned int)buf1_len, buf1); fprintf(stderr, "buf2 (%u) %s\n\n", (unsigned int)buf2_len , buf2); fprintf(stderr, "buf3 (%u) %s\n", (unsigned int)buf3_len, buf3); - exit(1); + EXIT(); } // user required to free the result @@ -1717,7 +1726,6 @@ main (void) } - printf("response scan 1/2 "); test_scan( &responses[TRAILING_SPACE_ON_CHUNKED_BODY] , &responses[NO_HEADERS_NO_BODY_404] @@ -1729,7 +1737,6 @@ main (void) , &responses[UNDERSTORE_HEADER_KEY] , &responses[NO_CARRIAGE_RET] ); - puts("responses okay");