From d0bb867d1b58db453dd2a0845ec6b4d347645bdb Mon Sep 17 00:00:00 2001 From: Peter Griess Date: Fri, 6 Jan 2012 18:13:41 -0600 Subject: [PATCH] Implement http_parser_pause(). Summary: - Add http_parser_pause() API. A callback may invoke this at any time. This will cause http_parser_parse() to return indicating that it parsed less than the number of requested bytes and set an error to HBE_PAUSED. A paused parser with fail with HBE_PAUSED until it is un-paused with http_parser_pause(). - Stop using 'state', 'header_state', 'index', and 'nread' shadow variables and then updating their http_parser fields when we're done. Instead, update the live values as we go. This will make it possible to return from anywhere in the parser (say, due to EPAUSED) and have valid/expected state. - Update state before making callbacks so that if the want to pause, we'll know the correct state already. - Make sure that every callback has a state that uniquely identifies the next step so that we can resume in the right place if we were suppoed to be paused. - Clean and re-factor up CALLBACK() macros. - Use CALLBACK() macros for (almost) all callbacks; on_headers_complete is still a special case. This includes on_body which we used to invoke manually with a long run of bytes. We now use a 'body' mark and hit its callback just like every other data callback. - Clean up (most) gotos and replace with real states. - Add some unit tests. Fixes #70 --- http_parser.c | 640 +++++++++++++++++++++++++++++--------------------- http_parser.h | 20 +- test.c | 212 ++++++++++++++++- 3 files changed, 595 insertions(+), 277 deletions(-) diff --git a/http_parser.c b/http_parser.c index 5ab2be2..5b2b6a4 100644 --- a/http_parser.c +++ b/http_parser.c @@ -47,37 +47,64 @@ do { \ #endif -#define CALLBACK2(FOR) \ +/* Run the notify callback FOR, returning ER if it fails */ +#define CALLBACK_NOTIFY_(FOR, ER) \ do { \ + assert(HTTP_PARSER_ERRNO(parser) == HPE_OK); \ + \ if (settings->on_##FOR) { \ if (0 != settings->on_##FOR(parser)) { \ SET_ERRNO(HPE_CB_##FOR); \ - return (p - data); \ + } \ + \ + /* We either errored above or got paused; get out */ \ + if (HTTP_PARSER_ERRNO(parser) != HPE_OK) { \ + return (ER); \ } \ } \ } while (0) +/* Run the notify callback FOR and consume the current byte */ +#define CALLBACK_NOTIFY(FOR) CALLBACK_NOTIFY_(FOR, p - data + 1) -#define MARK(FOR) \ -do { \ - FOR##_mark = p; \ -} while (0) +/* Run the notify callback FOR and don't consume the current byte */ +#define CALLBACK_NOTIFY_NOADVANCE(FOR) CALLBACK_NOTIFY_(FOR, p - data) -#define CALLBACK(FOR) \ +/* Run data callback FOR with LEN bytes, returning ER if it fails */ +#define CALLBACK_DATA_(FOR, LEN, ER) \ do { \ + assert(HTTP_PARSER_ERRNO(parser) == HPE_OK); \ + \ if (FOR##_mark) { \ if (settings->on_##FOR) { \ - if (0 != settings->on_##FOR(parser, \ - FOR##_mark, \ - p - FOR##_mark)) \ - { \ + if (0 != settings->on_##FOR(parser, FOR##_mark, (LEN))) { \ SET_ERRNO(HPE_CB_##FOR); \ - return (p - data); \ + } \ + \ + /* We either errored above or got paused; get out */ \ + if (HTTP_PARSER_ERRNO(parser) != HPE_OK) { \ + return (ER); \ } \ } \ FOR##_mark = NULL; \ } \ } while (0) + +/* Run the data callback FOR and consume the current byte */ +#define CALLBACK_DATA(FOR) \ + CALLBACK_DATA_(FOR, p - FOR##_mark, p - data + 1) + +/* Run the data callback FOR and don't consume the current byte */ +#define CALLBACK_DATA_NOADVANCE(FOR) \ + CALLBACK_DATA_(FOR, p - FOR##_mark, p - data) + +/* Set the mark FOR; non-destructive if mark is already set */ +#define MARK(FOR) \ +do { \ + if (!FOR##_mark) { \ + FOR##_mark = p; \ + } \ +} while (0) #define PROXY_CONNECTION "proxy-connection" @@ -265,7 +292,9 @@ enum state , s_chunk_size_almost_done , s_headers_almost_done - /* Important: 's_headers_almost_done' must be the last 'header' state. All + , s_headers_done + + /* Important: 's_headers_done' must be the last 'header' state. All * states beyond this must be 'body' states. It is used for overflow * checking. See the PARSING_HEADER() macro. */ @@ -276,10 +305,12 @@ enum state , s_body_identity , s_body_identity_eof + + , s_message_done }; -#define PARSING_HEADER(state) (state <= s_headers_almost_done) +#define PARSING_HEADER(state) (state <= s_headers_done) enum header_states @@ -538,32 +569,24 @@ size_t http_parser_execute (http_parser *parser, { char c, ch; int8_t unhex_val; - const char *p = data, *pe; - int64_t to_read; - enum state state; - enum header_states header_state; - uint64_t index = parser->index; - uint64_t nread = parser->nread; - - /* technically we could combine all of these (except for url_mark) into one - variable, saving stack space, but it seems more clear to have them - separated. */ + const char *p = data; const char *header_field_mark = 0; const char *header_value_mark = 0; const char *url_mark = 0; + const char *body_mark = 0; /* We're in an error state. Don't bother doing anything. */ if (HTTP_PARSER_ERRNO(parser) != HPE_OK) { return 0; } - state = (enum state) parser->state; - header_state = (enum header_states) parser->header_state; - if (len == 0) { - switch (state) { + switch (parser->state) { case s_body_identity_eof: - CALLBACK2(message_complete); + /* Use of CALLBACK_NOTIFY() here would erroneously return 1 byte read if + * we got paused. + */ + CALLBACK_NOTIFY_NOADVANCE(message_complete); return 0; case s_dead: @@ -579,30 +602,36 @@ size_t http_parser_execute (http_parser *parser, } - if (state == s_header_field) + if (parser->state == s_header_field) header_field_mark = data; - if (state == s_header_value) + if (parser->state == s_header_value) header_value_mark = data; - if (state == s_req_path || state == s_req_schema || state == s_req_schema_slash - || state == s_req_schema_slash_slash || state == s_req_port - || state == s_req_query_string_start || state == s_req_query_string - || state == s_req_host - || state == s_req_fragment_start || state == s_req_fragment) + if (parser->state == s_req_path || + parser->state == s_req_schema || + parser->state == s_req_schema_slash || + parser->state == s_req_schema_slash_slash || + parser->state == s_req_port || + parser->state == s_req_query_string_start || + parser->state == s_req_query_string || + parser->state == s_req_host || + parser->state == s_req_fragment_start || + parser->state == s_req_fragment) url_mark = data; - for (p=data, pe=data+len; p != pe; p++) { + for (p=data; p != data + len; p++) { ch = *p; - if (PARSING_HEADER(state)) { - ++nread; + if (PARSING_HEADER(parser->state)) { + ++parser->nread; /* Buffer overflow attack */ - if (nread > HTTP_MAX_HEADER_SIZE) { + if (parser->nread > HTTP_MAX_HEADER_SIZE) { SET_ERRNO(HPE_HEADER_OVERFLOW); goto error; } } - switch (state) { + reexecute_byte: + switch (parser->state) { case s_dead: /* this state is used after a 'Connection: close' message @@ -618,21 +647,23 @@ size_t http_parser_execute (http_parser *parser, parser->flags = 0; parser->content_length = -1; - CALLBACK2(message_begin); + if (ch == 'H') { + parser->state = s_res_or_resp_H; - if (ch == 'H') - state = s_res_or_resp_H; - else { + CALLBACK_NOTIFY(message_begin); + } else { parser->type = HTTP_REQUEST; - goto start_req_method_assign; + parser->state = s_start_req; + goto reexecute_byte; } + break; } case s_res_or_resp_H: if (ch == 'T') { parser->type = HTTP_RESPONSE; - state = s_res_HT; + parser->state = s_res_HT; } else { if (ch != 'E') { SET_ERRNO(HPE_INVALID_CONSTANT); @@ -641,8 +672,8 @@ size_t http_parser_execute (http_parser *parser, parser->type = HTTP_REQUEST; parser->method = HTTP_HEAD; - index = 2; - state = s_req_method; + parser->index = 2; + parser->state = s_req_method; } break; @@ -651,11 +682,9 @@ size_t http_parser_execute (http_parser *parser, parser->flags = 0; parser->content_length = -1; - CALLBACK2(message_begin); - switch (ch) { case 'H': - state = s_res_H; + parser->state = s_res_H; break; case CR: @@ -666,27 +695,29 @@ size_t http_parser_execute (http_parser *parser, SET_ERRNO(HPE_INVALID_CONSTANT); goto error; } + + CALLBACK_NOTIFY(message_begin); break; } case s_res_H: STRICT_CHECK(ch != 'T'); - state = s_res_HT; + parser->state = s_res_HT; break; case s_res_HT: STRICT_CHECK(ch != 'T'); - state = s_res_HTT; + parser->state = s_res_HTT; break; case s_res_HTT: STRICT_CHECK(ch != 'P'); - state = s_res_HTTP; + parser->state = s_res_HTTP; break; case s_res_HTTP: STRICT_CHECK(ch != '/'); - state = s_res_first_http_major; + parser->state = s_res_first_http_major; break; case s_res_first_http_major: @@ -696,14 +727,14 @@ size_t http_parser_execute (http_parser *parser, } parser->http_major = ch - '0'; - state = s_res_http_major; + parser->state = s_res_http_major; break; /* major HTTP version or dot */ case s_res_http_major: { if (ch == '.') { - state = s_res_first_http_minor; + parser->state = s_res_first_http_minor; break; } @@ -731,14 +762,14 @@ size_t http_parser_execute (http_parser *parser, } parser->http_minor = ch - '0'; - state = s_res_http_minor; + parser->state = s_res_http_minor; break; /* minor HTTP version or end of request line */ case s_res_http_minor: { if (ch == ' ') { - state = s_res_first_status_code; + parser->state = s_res_first_status_code; break; } @@ -769,7 +800,7 @@ size_t http_parser_execute (http_parser *parser, goto error; } parser->status_code = ch - '0'; - state = s_res_status_code; + parser->state = s_res_status_code; break; } @@ -778,13 +809,13 @@ size_t http_parser_execute (http_parser *parser, if (!IS_NUM(ch)) { switch (ch) { case ' ': - state = s_res_status; + parser->state = s_res_status; break; case CR: - state = s_res_line_almost_done; + parser->state = s_res_line_almost_done; break; case LF: - state = s_header_field_start; + parser->state = s_header_field_start; break; default: SET_ERRNO(HPE_INVALID_STATUS); @@ -808,19 +839,19 @@ size_t http_parser_execute (http_parser *parser, /* the human readable status. e.g. "NOT FOUND" * we are not humans so just ignore this */ if (ch == CR) { - state = s_res_line_almost_done; + parser->state = s_res_line_almost_done; break; } if (ch == LF) { - state = s_header_field_start; + parser->state = s_header_field_start; break; } break; case s_res_line_almost_done: STRICT_CHECK(ch != LF); - state = s_header_field_start; + parser->state = s_header_field_start; break; case s_start_req: @@ -830,16 +861,13 @@ size_t http_parser_execute (http_parser *parser, parser->flags = 0; parser->content_length = -1; - CALLBACK2(message_begin); - if (!IS_ALPHA(ch)) { SET_ERRNO(HPE_INVALID_METHOD); goto error; } - start_req_method_assign: parser->method = (enum http_method) 0; - index = 1; + parser->index = 1; switch (ch) { case 'C': parser->method = HTTP_CONNECT; /* or COPY, CHECKOUT */ break; case 'D': parser->method = HTTP_DELETE; break; @@ -860,7 +888,10 @@ size_t http_parser_execute (http_parser *parser, SET_ERRNO(HPE_INVALID_METHOD); goto error; } - state = s_req_method; + parser->state = s_req_method; + + CALLBACK_NOTIFY(message_begin); + break; } @@ -873,31 +904,31 @@ size_t http_parser_execute (http_parser *parser, } matcher = method_strings[parser->method]; - if (ch == ' ' && matcher[index] == '\0') { - state = s_req_spaces_before_url; - } else if (ch == matcher[index]) { + if (ch == ' ' && matcher[parser->index] == '\0') { + parser->state = s_req_spaces_before_url; + } else if (ch == matcher[parser->index]) { ; /* nada */ } else if (parser->method == HTTP_CONNECT) { - if (index == 1 && ch == 'H') { + if (parser->index == 1 && ch == 'H') { parser->method = HTTP_CHECKOUT; - } else if (index == 2 && ch == 'P') { + } else if (parser->index == 2 && ch == 'P') { parser->method = HTTP_COPY; } else { goto error; } } else if (parser->method == HTTP_MKCOL) { - if (index == 1 && ch == 'O') { + if (parser->index == 1 && ch == 'O') { parser->method = HTTP_MOVE; - } else if (index == 1 && ch == 'E') { + } else if (parser->index == 1 && ch == 'E') { parser->method = HTTP_MERGE; - } else if (index == 1 && ch == '-') { + } else if (parser->index == 1 && ch == '-') { parser->method = HTTP_MSEARCH; - } else if (index == 2 && ch == 'A') { + } else if (parser->index == 2 && ch == 'A') { parser->method = HTTP_MKACTIVITY; } else { goto error; } - } else if (index == 1 && parser->method == HTTP_POST) { + } else if (parser->index == 1 && parser->method == HTTP_POST) { if (ch == 'R') { parser->method = HTTP_PROPFIND; /* or HTTP_PROPPATCH */ } else if (ch == 'U') { @@ -907,26 +938,28 @@ size_t http_parser_execute (http_parser *parser, } else { goto error; } - } else if (index == 2 && parser->method == HTTP_UNLOCK && ch == 'S') { + } else if (parser->index == 2 && parser->method == HTTP_UNLOCK && ch == 'S') { parser->method = HTTP_UNSUBSCRIBE; - } else if (index == 4 && parser->method == HTTP_PROPFIND && ch == 'P') { + } else if (parser->index == 4 && parser->method == HTTP_PROPFIND && ch == 'P') { parser->method = HTTP_PROPPATCH; } else { SET_ERRNO(HPE_INVALID_METHOD); goto error; } - ++index; + ++parser->index; break; } + case s_req_spaces_before_url: { if (ch == ' ') break; MARK(url); - state = parse_url_char(state, ch, parser->method == HTTP_CONNECT); - if (state == s_dead) { + parser->state = parse_url_char( + parser->state, ch, parser->method == HTTP_CONNECT); + if (parser->state == s_dead) { SET_ERRNO(HPE_INVALID_URL); goto error; } @@ -946,8 +979,9 @@ size_t http_parser_execute (http_parser *parser, SET_ERRNO(HPE_INVALID_URL); goto error; default: - state = parse_url_char(state, ch, parser->method == HTTP_CONNECT); - if (state == s_dead) { + parser->state = parse_url_char( + parser->state, ch, parser->method == HTTP_CONNECT); + if (parser->state == s_dead) { SET_ERRNO(HPE_INVALID_URL); goto error; } @@ -971,21 +1005,22 @@ size_t http_parser_execute (http_parser *parser, */ switch (ch) { case ' ': - CALLBACK(url); - state = s_req_http_start; + parser->state = s_req_http_start; + CALLBACK_DATA(url); break; case CR: case LF: - CALLBACK(url); parser->http_major = 0; parser->http_minor = 9; - state = (ch == CR) ? + parser->state = (ch == CR) ? s_req_line_almost_done : s_header_field_start; + CALLBACK_DATA(url); break; default: - state = parse_url_char(state, ch, parser->method == HTTP_CONNECT); - if (state == s_dead) { + parser->state = parse_url_char( + parser->state, ch, parser->method == HTTP_CONNECT); + if (parser->state == s_dead) { SET_ERRNO(HPE_INVALID_URL); goto error; } @@ -996,7 +1031,7 @@ size_t http_parser_execute (http_parser *parser, case s_req_http_start: switch (ch) { case 'H': - state = s_req_http_H; + parser->state = s_req_http_H; break; case ' ': break; @@ -1008,22 +1043,22 @@ size_t http_parser_execute (http_parser *parser, case s_req_http_H: STRICT_CHECK(ch != 'T'); - state = s_req_http_HT; + parser->state = s_req_http_HT; break; case s_req_http_HT: STRICT_CHECK(ch != 'T'); - state = s_req_http_HTT; + parser->state = s_req_http_HTT; break; case s_req_http_HTT: STRICT_CHECK(ch != 'P'); - state = s_req_http_HTTP; + parser->state = s_req_http_HTTP; break; case s_req_http_HTTP: STRICT_CHECK(ch != '/'); - state = s_req_first_http_major; + parser->state = s_req_first_http_major; break; /* first digit of major HTTP version */ @@ -1034,14 +1069,14 @@ size_t http_parser_execute (http_parser *parser, } parser->http_major = ch - '0'; - state = s_req_http_major; + parser->state = s_req_http_major; break; /* major HTTP version or dot */ case s_req_http_major: { if (ch == '.') { - state = s_req_first_http_minor; + parser->state = s_req_first_http_minor; break; } @@ -1069,19 +1104,19 @@ size_t http_parser_execute (http_parser *parser, } parser->http_minor = ch - '0'; - state = s_req_http_minor; + parser->state = s_req_http_minor; break; /* minor HTTP version or end of request line */ case s_req_http_minor: { if (ch == CR) { - state = s_req_line_almost_done; + parser->state = s_req_line_almost_done; break; } if (ch == LF) { - state = s_header_field_start; + parser->state = s_header_field_start; break; } @@ -1111,23 +1146,22 @@ size_t http_parser_execute (http_parser *parser, goto error; } - state = s_header_field_start; + parser->state = s_header_field_start; break; } case s_header_field_start: - header_field_start: { if (ch == CR) { - state = s_headers_almost_done; + parser->state = s_headers_almost_done; break; } if (ch == LF) { /* they might be just sending \n instead of \r\n so this would be * the second \n to denote the end of headers*/ - state = s_headers_almost_done; - goto headers_almost_done; + parser->state = s_headers_almost_done; + goto reexecute_byte; } c = TOKEN(ch); @@ -1139,28 +1173,28 @@ size_t http_parser_execute (http_parser *parser, MARK(header_field); - index = 0; - state = s_header_field; + parser->index = 0; + parser->state = s_header_field; switch (c) { case 'c': - header_state = h_C; + parser->header_state = h_C; break; case 'p': - header_state = h_matching_proxy_connection; + parser->header_state = h_matching_proxy_connection; break; case 't': - header_state = h_matching_transfer_encoding; + parser->header_state = h_matching_transfer_encoding; break; case 'u': - header_state = h_matching_upgrade; + parser->header_state = h_matching_upgrade; break; default: - header_state = h_general; + parser->header_state = h_general; break; } break; @@ -1171,31 +1205,31 @@ size_t http_parser_execute (http_parser *parser, c = TOKEN(ch); if (c) { - switch (header_state) { + switch (parser->header_state) { case h_general: break; case h_C: - index++; - header_state = (c == 'o' ? h_CO : h_general); + parser->index++; + parser->header_state = (c == 'o' ? h_CO : h_general); break; case h_CO: - index++; - header_state = (c == 'n' ? h_CON : h_general); + parser->index++; + parser->header_state = (c == 'n' ? h_CON : h_general); break; case h_CON: - index++; + parser->index++; switch (c) { case 'n': - header_state = h_matching_connection; + parser->header_state = h_matching_connection; break; case 't': - header_state = h_matching_content_length; + parser->header_state = h_matching_content_length; break; default: - header_state = h_general; + parser->header_state = h_general; break; } break; @@ -1203,60 +1237,60 @@ size_t http_parser_execute (http_parser *parser, /* connection */ case h_matching_connection: - index++; - if (index > sizeof(CONNECTION)-1 - || c != CONNECTION[index]) { - header_state = h_general; - } else if (index == sizeof(CONNECTION)-2) { - header_state = h_connection; + parser->index++; + if (parser->index > sizeof(CONNECTION)-1 + || c != CONNECTION[parser->index]) { + parser->header_state = h_general; + } else if (parser->index == sizeof(CONNECTION)-2) { + parser->header_state = h_connection; } break; /* proxy-connection */ case h_matching_proxy_connection: - index++; - if (index > sizeof(PROXY_CONNECTION)-1 - || c != PROXY_CONNECTION[index]) { - header_state = h_general; - } else if (index == sizeof(PROXY_CONNECTION)-2) { - header_state = h_connection; + parser->index++; + if (parser->index > sizeof(PROXY_CONNECTION)-1 + || c != PROXY_CONNECTION[parser->index]) { + parser->header_state = h_general; + } else if (parser->index == sizeof(PROXY_CONNECTION)-2) { + parser->header_state = h_connection; } break; /* content-length */ case h_matching_content_length: - index++; - if (index > sizeof(CONTENT_LENGTH)-1 - || c != CONTENT_LENGTH[index]) { - header_state = h_general; - } else if (index == sizeof(CONTENT_LENGTH)-2) { - header_state = h_content_length; + parser->index++; + if (parser->index > sizeof(CONTENT_LENGTH)-1 + || c != CONTENT_LENGTH[parser->index]) { + parser->header_state = h_general; + } else if (parser->index == sizeof(CONTENT_LENGTH)-2) { + parser->header_state = h_content_length; } break; /* transfer-encoding */ case h_matching_transfer_encoding: - index++; - if (index > sizeof(TRANSFER_ENCODING)-1 - || c != TRANSFER_ENCODING[index]) { - header_state = h_general; - } else if (index == sizeof(TRANSFER_ENCODING)-2) { - header_state = h_transfer_encoding; + parser->index++; + if (parser->index > sizeof(TRANSFER_ENCODING)-1 + || c != TRANSFER_ENCODING[parser->index]) { + parser->header_state = h_general; + } else if (parser->index == sizeof(TRANSFER_ENCODING)-2) { + parser->header_state = h_transfer_encoding; } break; /* upgrade */ case h_matching_upgrade: - index++; - if (index > sizeof(UPGRADE)-1 - || c != UPGRADE[index]) { - header_state = h_general; - } else if (index == sizeof(UPGRADE)-2) { - header_state = h_upgrade; + parser->index++; + if (parser->index > sizeof(UPGRADE)-1 + || c != UPGRADE[parser->index]) { + parser->header_state = h_general; + } else if (parser->index == sizeof(UPGRADE)-2) { + parser->header_state = h_upgrade; } break; @@ -1264,7 +1298,7 @@ size_t http_parser_execute (http_parser *parser, case h_content_length: case h_transfer_encoding: case h_upgrade: - if (ch != ' ') header_state = h_general; + if (ch != ' ') parser->header_state = h_general; break; default: @@ -1275,20 +1309,20 @@ size_t http_parser_execute (http_parser *parser, } if (ch == ':') { - CALLBACK(header_field); - state = s_header_value_start; + parser->state = s_header_value_start; + CALLBACK_DATA(header_field); break; } if (ch == CR) { - state = s_header_almost_done; - CALLBACK(header_field); + parser->state = s_header_almost_done; + CALLBACK_DATA(header_field); break; } if (ch == LF) { - CALLBACK(header_field); - state = s_header_field_start; + parser->state = s_header_field_start; + CALLBACK_DATA(header_field); break; } @@ -1302,36 +1336,36 @@ size_t http_parser_execute (http_parser *parser, MARK(header_value); - state = s_header_value; - index = 0; + parser->state = s_header_value; + parser->index = 0; if (ch == CR) { - CALLBACK(header_value); - header_state = h_general; - state = s_header_almost_done; + parser->header_state = h_general; + parser->state = s_header_almost_done; + CALLBACK_DATA(header_value); break; } if (ch == LF) { - CALLBACK(header_value); - state = s_header_field_start; + parser->state = s_header_field_start; + CALLBACK_DATA(header_value); break; } c = LOWER(ch); - switch (header_state) { + switch (parser->header_state) { case h_upgrade: parser->flags |= F_UPGRADE; - header_state = h_general; + parser->header_state = h_general; break; case h_transfer_encoding: /* looking for 'Transfer-Encoding: chunked' */ if ('c' == c) { - header_state = h_matching_transfer_encoding_chunked; + parser->header_state = h_matching_transfer_encoding_chunked; } else { - header_state = h_general; + parser->header_state = h_general; } break; @@ -1347,17 +1381,17 @@ size_t http_parser_execute (http_parser *parser, case h_connection: /* looking for 'Connection: keep-alive' */ if (c == 'k') { - header_state = h_matching_connection_keep_alive; + parser->header_state = h_matching_connection_keep_alive; /* looking for 'Connection: close' */ } else if (c == 'c') { - header_state = h_matching_connection_close; + parser->header_state = h_matching_connection_close; } else { - header_state = h_general; + parser->header_state = h_general; } break; default: - header_state = h_general; + parser->header_state = h_general; break; } break; @@ -1367,19 +1401,20 @@ size_t http_parser_execute (http_parser *parser, { if (ch == CR) { - CALLBACK(header_value); - state = s_header_almost_done; + parser->state = s_header_almost_done; + CALLBACK_DATA(header_value); break; } if (ch == LF) { - CALLBACK(header_value); - goto header_almost_done; + parser->state = s_header_almost_done; + CALLBACK_DATA_NOADVANCE(header_value); + goto reexecute_byte; } c = LOWER(ch); - switch (header_state) { + switch (parser->header_state) { case h_general: break; @@ -1401,58 +1436,57 @@ size_t http_parser_execute (http_parser *parser, /* Transfer-Encoding: chunked */ case h_matching_transfer_encoding_chunked: - index++; - if (index > sizeof(CHUNKED)-1 - || c != CHUNKED[index]) { - header_state = h_general; - } else if (index == sizeof(CHUNKED)-2) { - header_state = h_transfer_encoding_chunked; + parser->index++; + if (parser->index > sizeof(CHUNKED)-1 + || c != CHUNKED[parser->index]) { + parser->header_state = h_general; + } else if (parser->index == sizeof(CHUNKED)-2) { + parser->header_state = h_transfer_encoding_chunked; } break; /* looking for 'Connection: keep-alive' */ case h_matching_connection_keep_alive: - index++; - if (index > sizeof(KEEP_ALIVE)-1 - || c != KEEP_ALIVE[index]) { - header_state = h_general; - } else if (index == sizeof(KEEP_ALIVE)-2) { - header_state = h_connection_keep_alive; + parser->index++; + if (parser->index > sizeof(KEEP_ALIVE)-1 + || c != KEEP_ALIVE[parser->index]) { + parser->header_state = h_general; + } else if (parser->index == sizeof(KEEP_ALIVE)-2) { + parser->header_state = h_connection_keep_alive; } break; /* looking for 'Connection: close' */ case h_matching_connection_close: - index++; - if (index > sizeof(CLOSE)-1 || c != CLOSE[index]) { - header_state = h_general; - } else if (index == sizeof(CLOSE)-2) { - header_state = h_connection_close; + parser->index++; + if (parser->index > sizeof(CLOSE)-1 || c != CLOSE[parser->index]) { + parser->header_state = h_general; + } else if (parser->index == sizeof(CLOSE)-2) { + parser->header_state = h_connection_close; } break; case h_transfer_encoding_chunked: case h_connection_keep_alive: case h_connection_close: - if (ch != ' ') header_state = h_general; + if (ch != ' ') parser->header_state = h_general; break; default: - state = s_header_value; - header_state = h_general; + parser->state = s_header_value; + parser->header_state = h_general; break; } break; } case s_header_almost_done: - header_almost_done: { STRICT_CHECK(ch != LF); - state = s_header_value_lws; + parser->state = s_header_value_lws; - switch (header_state) { + switch (parser->header_state) { case h_connection_keep_alive: parser->flags |= F_CONNECTION_KEEP_ALIVE; break; @@ -1465,44 +1499,47 @@ size_t http_parser_execute (http_parser *parser, default: break; } + break; } case s_header_value_lws: { if (ch == ' ' || ch == '\t') - state = s_header_value_start; + parser->state = s_header_value_start; else { - state = s_header_field_start; - goto header_field_start; + parser->state = s_header_field_start; + goto reexecute_byte; } break; } case s_headers_almost_done: - headers_almost_done: { STRICT_CHECK(ch != LF); if (parser->flags & F_TRAILING) { /* End of a chunked request */ - CALLBACK2(message_complete); - state = NEW_MESSAGE(); + parser->state = NEW_MESSAGE(); + CALLBACK_NOTIFY(message_complete); break; } - nread = 0; + parser->state = s_headers_done; - if (parser->flags & F_UPGRADE || parser->method == HTTP_CONNECT) { - parser->upgrade = 1; - } + /* Set this here so that on_headers_complete() callbacks can see it */ + parser->upgrade = + (parser->flags & F_UPGRADE || parser->method == HTTP_CONNECT); /* Here we call the headers_complete callback. This is somewhat * different than other callbacks because if the user returns 1, we * will interpret that as saying that this message has no body. This * is needed for the annoying case of recieving a response to a HEAD * request. + * + * We'd like to use CALLBACK_NOTIFY_NOADVANCE() here but we cannot, so + * we have to simulate it by handling a change in errno below. */ if (settings->on_headers_complete) { switch (settings->on_headers_complete(parser)) { @@ -1514,40 +1551,53 @@ size_t http_parser_execute (http_parser *parser, break; default: - parser->state = state; SET_ERRNO(HPE_CB_headers_complete); return p - data; /* Error */ } } + if (HTTP_PARSER_ERRNO(parser) != HPE_OK) { + return p - data; + } + + goto reexecute_byte; + } + + case s_headers_done: + { + STRICT_CHECK(ch != LF); + + parser->nread = 0; + /* Exit, the rest of the connect is in a different protocol. */ if (parser->upgrade) { - CALLBACK2(message_complete); + parser->state = NEW_MESSAGE(); + CALLBACK_NOTIFY(message_complete); return (p - data) + 1; } if (parser->flags & F_SKIPBODY) { - CALLBACK2(message_complete); - state = NEW_MESSAGE(); + parser->state = NEW_MESSAGE(); + CALLBACK_NOTIFY(message_complete); } else if (parser->flags & F_CHUNKED) { /* chunked encoding - ignore Content-Length header */ - state = s_chunk_size_start; + parser->state = s_chunk_size_start; } else { if (parser->content_length == 0) { /* Content-Length header given but zero: Content-Length: 0\r\n */ - CALLBACK2(message_complete); - state = NEW_MESSAGE(); + parser->state = NEW_MESSAGE(); + CALLBACK_NOTIFY(message_complete); } else if (parser->content_length > 0) { /* Content-Length header given and non-zero */ - state = s_body_identity; + parser->state = s_body_identity; } else { if (parser->type == HTTP_REQUEST || http_should_keep_alive(parser)) { /* Assume content-length 0 - read the next */ - CALLBACK2(message_complete); - state = NEW_MESSAGE(); + parser->state = NEW_MESSAGE(); + CALLBACK_NOTIFY(message_complete); } else { /* Read body until EOF */ - state = s_body_identity_eof; + parser->state = s_body_identity_eof; } } } @@ -1556,30 +1606,54 @@ size_t http_parser_execute (http_parser *parser, } case s_body_identity: - to_read = MIN(pe - p, (int64_t)parser->content_length); - if (to_read > 0) { - if (settings->on_body) settings->on_body(parser, p, to_read); - p += to_read - 1; - parser->content_length -= to_read; - if (parser->content_length == 0) { - CALLBACK2(message_complete); - state = NEW_MESSAGE(); - } + { + uint64_t to_read = MIN(parser->content_length, (data + len) - p); + + assert(parser->content_length > 0); + + /* The difference between advancing content_length and p is because + * the latter will automaticaly advance on the next loop iteration. + * Further, if content_length ends up at 0, we want to see the last + * byte again for our message complete callback. + */ + MARK(body); + parser->content_length -= to_read; + p += to_read - 1; + + if (parser->content_length == 0) { + parser->state = s_message_done; + + /* Mimic CALLBACK_DATA_NOADVANCE() but with one extra byte. + * + * The alternative to doing this is to wait for the next byte to + * trigger the data callback, just as in every other case. The + * problem with this is that this makes it difficult for the test + * harness to distinguish between complete-on-EOF and + * complete-on-length. It's not clear that this distinction is + * important for applications, but let's keep it for now. + */ + CALLBACK_DATA_(body, p - body_mark + 1, p - data); + goto reexecute_byte; } + break; + } /* read until EOF */ case s_body_identity_eof: - to_read = pe - p; - if (to_read > 0) { - if (settings->on_body) settings->on_body(parser, p, to_read); - p += to_read - 1; - } + MARK(body); + p = data + len - 1; + + break; + + case s_message_done: + parser->state = NEW_MESSAGE(); + CALLBACK_NOTIFY(message_complete); break; case s_chunk_size_start: { - assert(nread == 1); + assert(parser->nread == 1); assert(parser->flags & F_CHUNKED); unhex_val = unhex[(unsigned char)ch]; @@ -1589,7 +1663,7 @@ size_t http_parser_execute (http_parser *parser, } parser->content_length = unhex_val; - state = s_chunk_size; + parser->state = s_chunk_size; break; } @@ -1598,7 +1672,7 @@ size_t http_parser_execute (http_parser *parser, assert(parser->flags & F_CHUNKED); if (ch == CR) { - state = s_chunk_size_almost_done; + parser->state = s_chunk_size_almost_done; break; } @@ -1606,7 +1680,7 @@ size_t http_parser_execute (http_parser *parser, if (unhex_val == -1) { if (ch == ';' || ch == ' ') { - state = s_chunk_parameters; + parser->state = s_chunk_parameters; break; } @@ -1624,7 +1698,7 @@ size_t http_parser_execute (http_parser *parser, assert(parser->flags & F_CHUNKED); /* just ignore this shit. TODO check for overflow */ if (ch == CR) { - state = s_chunk_size_almost_done; + parser->state = s_chunk_size_almost_done; break; } break; @@ -1635,46 +1709,51 @@ size_t http_parser_execute (http_parser *parser, assert(parser->flags & F_CHUNKED); STRICT_CHECK(ch != LF); - nread = 0; + parser->nread = 0; if (parser->content_length == 0) { parser->flags |= F_TRAILING; - state = s_header_field_start; + parser->state = s_header_field_start; } else { - state = s_chunk_data; + parser->state = s_chunk_data; } break; } case s_chunk_data: { - assert(parser->flags & F_CHUNKED); + uint64_t to_read = MIN(parser->content_length, (data + len) - p); - to_read = MIN(pe - p, (int64_t)(parser->content_length)); + assert(parser->flags & F_CHUNKED); + assert(parser->content_length > 0); - if (to_read > 0) { - if (settings->on_body) settings->on_body(parser, p, to_read); - p += to_read - 1; - } + /* See the explanation in s_body_identity for why the content + * length and data pointers are managed this way. + */ + MARK(body); + parser->content_length -= to_read; + p += to_read - 1; - if (to_read == parser->content_length) { - state = s_chunk_data_almost_done; + if (parser->content_length == 0) { + parser->state = s_chunk_data_almost_done; } - parser->content_length -= to_read; break; } case s_chunk_data_almost_done: assert(parser->flags & F_CHUNKED); + assert(parser->content_length == 0); STRICT_CHECK(ch != CR); - state = s_chunk_data_done; + parser->state = s_chunk_data_done; + CALLBACK_DATA(body); break; case s_chunk_data_done: assert(parser->flags & F_CHUNKED); STRICT_CHECK(ch != LF); - state = s_chunk_size_start; + parser->nread = 0; + parser->state = s_chunk_size_start; break; default: @@ -1684,14 +1763,25 @@ size_t http_parser_execute (http_parser *parser, } } - CALLBACK(header_field); - CALLBACK(header_value); - CALLBACK(url); + /* Run callbacks for any marks that we have leftover after we ran our of + * bytes. There should be at most one of these set, so it's OK to invoke + * them in series (unset marks will not result in callbacks). + * + * We use the NOADVANCE() variety of callbacks here because 'p' has already + * overflowed 'data' and this allows us to correct for the off-by-one that + * we'd otherwise have (since CALLBACK_DATA() is meant to be run with a 'p' + * value that's in-bounds). + */ + + assert(((header_field_mark ? 1 : 0) + + (header_value_mark ? 1 : 0) + + (url_mark ? 1 : 0) + + (body_mark ? 1 : 0)) <= 1); - parser->state = state; - parser->header_state = header_state; - parser->index = index; - parser->nread = nread; + CALLBACK_DATA_NOADVANCE(header_field); + CALLBACK_DATA_NOADVANCE(header_value); + CALLBACK_DATA_NOADVANCE(url); + CALLBACK_DATA_NOADVANCE(body); return len; @@ -1751,7 +1841,7 @@ http_parser_init (http_parser *parser, enum http_parser_type t) parser->upgrade = 0; parser->flags = 0; parser->method = 0; - parser->http_errno = 0; + parser->http_errno = HPE_OK; } const char * @@ -1860,3 +1950,17 @@ http_parser_parse_url(const char *buf, size_t buflen, int is_connect, return 0; } + +void +http_parser_pause(http_parser *parser, int paused) { + /* Users should only be pausing/unpausing a parser that is not in an error + * state. In non-debug builds, there's not much that we can do about this + * other than ignore it. + */ + if (HTTP_PARSER_ERRNO(parser) == HPE_OK || + HTTP_PARSER_ERRNO(parser) == HPE_PAUSED) { + SET_ERRNO((paused) ? HPE_PAUSED : HPE_OK); + } else { + assert(0 && "Attempting to pause parser in error state"); + } +} diff --git a/http_parser.h b/http_parser.h index f80ecff..2a8ae7d 100644 --- a/http_parser.h +++ b/http_parser.h @@ -177,6 +177,7 @@ enum flags XX(INVALID_CONSTANT, "invalid constant string") \ XX(INVALID_INTERNAL_STATE, "encountered unexpected internal state")\ XX(STRICT, "strict mode assertion failed") \ + XX(PAUSED, "parser is paused") \ XX(UNKNOWN, "an unknown error occurred") @@ -201,20 +202,20 @@ enum http_errno { struct http_parser { /** PRIVATE **/ - unsigned char type : 2; - unsigned char flags : 6; /* F_* values from 'flags' enum; semi-public */ - unsigned char state; - unsigned char header_state; - unsigned char index; + unsigned char type : 2; /* enum http_parser_type */ + unsigned char flags : 6; /* F_* values from 'flags' enum; semi-public */ + unsigned char state; /* enum state from http_parser.c */ + unsigned char header_state; /* enum header_state from http_parser.c */ + unsigned char index; /* index into current matcher */ - uint32_t nread; - int64_t content_length; + uint32_t nread; /* # bytes read in various scenarios */ + int64_t content_length; /* # bytes in body (0 if no Content-Length header) */ /** READ-ONLY **/ unsigned short http_major; unsigned short http_minor; unsigned short status_code; /* responses only */ - unsigned char method; /* requests only */ + unsigned char method; /* requests only */ unsigned char http_errno : 7; /* 1 = Upgrade header was present and the parser has exited because of that. @@ -304,6 +305,9 @@ int http_parser_parse_url(const char *buf, size_t buflen, int is_connect, struct http_parser_url *u); +/* Pause or un-pause the parser; a nonzero value pauses */ +void http_parser_pause(http_parser *parser, int paused); + #ifdef __cplusplus } #endif diff --git a/test.c b/test.c index 807921f..701180f 100644 --- a/test.c +++ b/test.c @@ -71,6 +71,7 @@ static int currently_parsing_eof; static struct message messages[5]; static int num_messages; +static http_parser_settings *current_pause_parser; /* * R E Q U E S T S * */ const struct message requests[] = @@ -1290,6 +1291,146 @@ message_complete_cb (http_parser *p) return 0; } +/* These dontcall_* callbacks exist so that we can verify that when we're + * paused, no additional callbacks are invoked */ +int +dontcall_message_begin_cb (http_parser *p) +{ + if (p) { } // gcc + fprintf(stderr, "\n\n*** on_message_begin() called on paused parser ***\n\n"); + exit(1); +} + +int +dontcall_header_field_cb (http_parser *p, const char *buf, size_t len) +{ + if (p || buf || len) { } // gcc + fprintf(stderr, "\n\n*** on_header_field() called on paused parser ***\n\n"); + exit(1); +} + +int +dontcall_header_value_cb (http_parser *p, const char *buf, size_t len) +{ + if (p || buf || len) { } // gcc + fprintf(stderr, "\n\n*** on_header_value() called on paused parser ***\n\n"); + exit(1); +} + +int +dontcall_request_url_cb (http_parser *p, const char *buf, size_t len) +{ + if (p || buf || len) { } // gcc + fprintf(stderr, "\n\n*** on_request_url() called on paused parser ***\n\n"); + exit(1); +} + +int +dontcall_body_cb (http_parser *p, const char *buf, size_t len) +{ + if (p || buf || len) { } // gcc + fprintf(stderr, "\n\n*** on_body_cb() called on paused parser ***\n\n"); + exit(1); +} + +int +dontcall_headers_complete_cb (http_parser *p) +{ + if (p) { } // gcc + fprintf(stderr, "\n\n*** on_headers_complete() called on paused " + "parser ***\n\n"); + exit(1); +} + +int +dontcall_message_complete_cb (http_parser *p) +{ + if (p) { } // gcc + fprintf(stderr, "\n\n*** on_message_complete() called on paused " + "parser ***\n\n"); + exit(1); +} + +static http_parser_settings settings_dontcall = + {.on_message_begin = dontcall_message_begin_cb + ,.on_header_field = dontcall_header_field_cb + ,.on_header_value = dontcall_header_value_cb + ,.on_url = dontcall_request_url_cb + ,.on_body = dontcall_body_cb + ,.on_headers_complete = dontcall_headers_complete_cb + ,.on_message_complete = dontcall_message_complete_cb + }; + +/* These pause_* callbacks always pause the parser and just invoke the regular + * callback that tracks content. Before returning, we overwrite the parser + * settings to point to the _dontcall variety so that we can verify that + * the pause actually did, you know, pause. */ +int +pause_message_begin_cb (http_parser *p) +{ + http_parser_pause(p, 1); + *current_pause_parser = settings_dontcall; + return message_begin_cb(p); +} + +int +pause_header_field_cb (http_parser *p, const char *buf, size_t len) +{ + http_parser_pause(p, 1); + *current_pause_parser = settings_dontcall; + return header_field_cb(p, buf, len); +} + +int +pause_header_value_cb (http_parser *p, const char *buf, size_t len) +{ + http_parser_pause(p, 1); + *current_pause_parser = settings_dontcall; + return header_value_cb(p, buf, len); +} + +int +pause_request_url_cb (http_parser *p, const char *buf, size_t len) +{ + http_parser_pause(p, 1); + *current_pause_parser = settings_dontcall; + return request_url_cb(p, buf, len); +} + +int +pause_body_cb (http_parser *p, const char *buf, size_t len) +{ + http_parser_pause(p, 1); + *current_pause_parser = settings_dontcall; + return body_cb(p, buf, len); +} + +int +pause_headers_complete_cb (http_parser *p) +{ + http_parser_pause(p, 1); + *current_pause_parser = settings_dontcall; + return headers_complete_cb(p); +} + +int +pause_message_complete_cb (http_parser *p) +{ + http_parser_pause(p, 1); + *current_pause_parser = settings_dontcall; + return message_complete_cb(p); +} + +static http_parser_settings settings_pause = + {.on_message_begin = pause_message_begin_cb + ,.on_header_field = pause_header_field_cb + ,.on_header_value = pause_header_value_cb + ,.on_url = pause_request_url_cb + ,.on_body = pause_body_cb + ,.on_headers_complete = pause_headers_complete_cb + ,.on_message_complete = pause_message_complete_cb + }; + static http_parser_settings settings = {.on_message_begin = message_begin_cb ,.on_header_field = header_field_cb @@ -1359,6 +1500,17 @@ size_t parse_count_body (const char *buf, size_t len) return nparsed; } +size_t parse_pause (const char *buf, size_t len) +{ + size_t nparsed; + http_parser_settings s = settings_pause; + + currently_parsing_eof = (len == 0); + current_pause_parser = &s; + nparsed = http_parser_execute(parser, current_pause_parser, buf, len); + return nparsed; +} + static inline int check_str_eq (const struct message *m, const char *prop, @@ -1982,6 +2134,58 @@ create_large_chunked_message (int body_size_in_kb, const char* headers) return buf; } +/* Verify that we can pause parsing at any of the bytes in the + * message and still get the result that we're expecting. */ +void +test_message_pause (const struct message *msg) +{ + char *buf = (char*) msg->raw; + size_t buflen = strlen(msg->raw); + size_t nread; + + parser_init(msg->type); + + do { + nread = parse_pause(buf, buflen); + + // We can only set the upgrade buffer once we've gotten our message + // completion callback. + if (messages[0].message_complete_cb_called && + msg->upgrade && + parser->upgrade) { + messages[0].upgrade = buf + nread; + goto test; + } + + if (nread < buflen) { + + // Not much do to if we failed a strict-mode check + if (HTTP_PARSER_ERRNO(parser) == HPE_STRICT) { + parser_free(); + return; + } + + assert (HTTP_PARSER_ERRNO(parser) == HPE_PAUSED); + } + + buf += nread; + buflen -= nread; + http_parser_pause(parser, 0); + } while (buflen > 0); + + nread = parse_pause(NULL, 0); + assert (nread == 0); + +test: + if (num_messages != 1) { + printf("\n*** num_messages != 1 after testing '%s' ***\n\n", msg->name); + exit(1); + } + + if(!message_eq(0, msg)) exit(1); + + parser_free(); +} int main (void) @@ -2012,6 +2216,10 @@ main (void) test_message(&responses[i]); } + for (i = 0; i < response_count; i++) { + test_message_pause(&responses[i]); + } + for (i = 0; i < response_count; i++) { if (!responses[i].should_keep_alive) continue; for (j = 0; j < response_count; j++) { @@ -2187,7 +2395,9 @@ main (void) test_message(&requests[i]); } - + for (i = 0; i < request_count; i++) { + test_message_pause(&requests[i]); + } for (i = 0; i < request_count; i++) { if (!requests[i].should_keep_alive) continue;