From fb6dc67b05c83da9022040dc23dfe66d6ba82f32 Mon Sep 17 00:00:00 2001 From: Ryan Dahl Date: Fri, 20 Nov 2009 14:24:05 +0100 Subject: [PATCH] strict check --- Makefile | 12 ++- README.md | 14 ++-- http_parser.c | 213 +++++++++++++++++++++++++------------------------- http_parser.h | 23 ++++-- 4 files changed, 137 insertions(+), 125 deletions(-) diff --git a/Makefile b/Makefile index fc0dbde..3c62766 100644 --- a/Makefile +++ b/Makefile @@ -1,9 +1,15 @@ -OPT=-O0 -g -Wall -Wextra -Werror -#OPT=-O2 +#OPT=-O0 -g -Wall -Wextra -Werror +OPT=-O3 -DHTTP_PARSER_STRICT=0 test: http_parser.o test.c gcc $(OPT) http_parser.o test.c -o $@ +test-run: test + ./test + +test-run-timed: test + time ./test > /dev/null + http_parser.o: http_parser.c http_parser.h Makefile gcc $(OPT) -c http_parser.c @@ -21,4 +27,4 @@ package: http_parser.c tar -cf http_parser.tar http_parser/ @echo /tmp/http_parser.tar -.PHONY: clean package +.PHONY: clean package test-run test-run-timed diff --git a/README.md b/README.md index c8c1c74..ed25150 100644 --- a/README.md +++ b/README.md @@ -32,14 +32,14 @@ using `http_parser_init()` and set the callbacks. That might look something like this: http_parser *parser = malloc(sizeof(http_parser)); - http_parser_init(parser, HTTP_REQUEST); + http_parser_init(parser); parser->on_path = my_path_callback; parser->on_header_field = my_header_field_callback; parser->data = my_socket; When data is received on the socket execute the parser and check for errors. - size_t len = 80*1024; + size_t len = 80*1024, nparsed; char buf[len]; ssize_t recved; @@ -50,19 +50,19 @@ When data is received on the socket execute the parser and check for errors. } /* Start up / continue the parser. - * Note we pass the recved==0 to http_parser_execute to signal + * Note we pass the recved==0 to http_parse_requests to signal * that EOF has been recieved. */ - http_parser_execute(parser, buf, recved); + nparsed = http_parse_requests(parser, buf, recved); - if (http_parser_has_error(parser)) { + if (nparsed != recved) { /* Handle error. Usually just close the connection. */ } HTTP needs to know where the end of the stream is. For example, sometimes servers send responses without Content-Length and expect the client to consume input (for the body) until EOF. To tell http_parser about EOF, give -`0` as the third parameter to `http_parser_execute()`. Callbacks and errors +`0` as the third parameter to `http_parse_requests()`. Callbacks and errors can still be encountered during an EOF, so one must still be prepared to receive them. @@ -84,7 +84,7 @@ parser, for example, would not want such a feature. Callbacks --------- -During the `http_parser_execute()` call, the callbacks set in `http_parser` +During the `http_parse_requests()` call, the callbacks set in `http_parser` will be executed. The parser maintains state and never looks behind, so buffering the data is not necessary. If you need to save certain data for later usage, you can do that from the callbacks. diff --git a/http_parser.c b/http_parser.c index b9b36a9..1f67f6b 100644 --- a/http_parser.c +++ b/http_parser.c @@ -58,26 +58,6 @@ do { \ if (0 != FOR##_callback(parser)) return (p - data); \ } while (0) - -#if 0 -do { \ - if (parser->FOR##_mark) { \ - parser->FOR##_size += p - parser->FOR##_mark; \ - if (parser->FOR##_size > MAX_FIELD_SIZE) { \ - return ERROR; \ - } \ - if (parser->on_##FOR) { \ - if (0 != parser->on_##FOR(parser, \ - parser->FOR##_mark, \ - p - parser->FOR##_mark)) \ - { \ - return ERROR; \ - } \ - } \ - } \ -} while(0) -#endif - #define DEFINE_CALLBACK(FOR) \ static inline int FOR##_callback (http_parser *parser, const char *p) \ { \ @@ -135,7 +115,7 @@ static const unsigned char lowcase[] = "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0" "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"; -static int unhex[] = +static const int unhex[] = {-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1 ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1 ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1 @@ -166,7 +146,7 @@ static const uint32_t usual[] = { }; enum state - { s_start_res = 1 + { s_start_res = 1 /* important that this is > 0 */ , s_res_H , s_res_HT , s_res_HTT @@ -211,10 +191,10 @@ enum state , s_req_http_HT , s_req_http_HTT , s_req_http_HTTP - , s_req_first_major_digit - , s_req_major_digit - , s_req_first_minor_digit - , s_req_minor_digit + , s_req_first_http_major + , s_req_http_major + , s_req_first_http_minor + , s_req_http_minor , s_req_line_almost_done , s_header_field_start @@ -269,11 +249,17 @@ enum flags , F_TRAILING = 0x0010 }; -#define ERROR (p - data); +#define ERROR (p - data) #define CR '\r' #define LF '\n' #define LOWER(c) (unsigned char)(c | 0x20) +#if HTTP_PARSER_STRICT +# define STRICT_CHECK(cond) if (cond) return ERROR +#else +# define STRICT_CHECK(cond) +#endif + static inline size_t parse (http_parser *parser, const char *data, size_t len, int start_state) { @@ -325,22 +311,22 @@ size_t parse (http_parser *parser, const char *data, size_t len, int start_state } case s_res_H: - if (ch != 'T') return ERROR; + STRICT_CHECK(ch != 'T'); state = s_res_HT; break; case s_res_HT: - if (ch != 'T') return ERROR; + STRICT_CHECK(ch != 'T'); state = s_res_HTT; break; case s_res_HTT: - if (ch != 'P') return ERROR; + STRICT_CHECK(ch != 'P'); state = s_res_HTTP; break; case s_res_HTTP: - if (ch != '/') return ERROR; + STRICT_CHECK(ch != '/'); state = s_res_first_http_major; break; @@ -425,6 +411,7 @@ size_t parse (http_parser *parser, const char *data, size_t len, int start_state parser->status_code *= 10; parser->status_code += ch - '0'; + if (parser->status_code > 999) return ERROR; break; } @@ -444,7 +431,7 @@ size_t parse (http_parser *parser, const char *data, size_t len, int start_state break; case s_res_line_almost_done: - if (ch != LF) return ERROR; + STRICT_CHECK(ch != LF); state = s_header_field_start; break; @@ -489,12 +476,12 @@ size_t parse (http_parser *parser, const char *data, size_t len, int start_state /* GET */ case s_req_method_G: - if (ch != 'E') return ERROR; + STRICT_CHECK(ch != 'E'); state = s_req_method_GE; break; case s_req_method_GE: - if (ch != 'T') return ERROR; + STRICT_CHECK(ch != 'T'); parser->method = HTTP_GET; state = s_req_spaces_before_url; break; @@ -502,17 +489,17 @@ size_t parse (http_parser *parser, const char *data, size_t len, int start_state /* HEAD */ case s_req_method_H: - if (ch != 'E') return ERROR; + STRICT_CHECK(ch != 'E'); state = s_req_method_HE; break; case s_req_method_HE: - if (ch != 'A') return ERROR; + STRICT_CHECK(ch != 'A'); state = s_req_method_HEA; break; case s_req_method_HEA: - if (ch != 'D') return ERROR; + STRICT_CHECK(ch != 'D'); parser->method = HTTP_HEAD; state = s_req_spaces_before_url; break; @@ -537,7 +524,7 @@ size_t parse (http_parser *parser, const char *data, size_t len, int start_state /* PUT */ case s_req_method_PU: - if (ch != 'T') return ERROR; + STRICT_CHECK(ch != 'T'); parser->method = HTTP_PUT; state = s_req_spaces_before_url; break; @@ -545,12 +532,12 @@ size_t parse (http_parser *parser, const char *data, size_t len, int start_state /* POST */ case s_req_method_PO: - if (ch != 'S') return ERROR; + STRICT_CHECK(ch != 'S'); state = s_req_method_POS; break; case s_req_method_POS: - if (ch != 'T') return ERROR; + STRICT_CHECK(ch != 'T'); parser->method = HTTP_POST; state = s_req_spaces_before_url; break; @@ -558,27 +545,27 @@ size_t parse (http_parser *parser, const char *data, size_t len, int start_state /* DELETE */ case s_req_method_D: - if (ch != 'E') return ERROR; + STRICT_CHECK(ch != 'E'); state = s_req_method_DE; break; case s_req_method_DE: - if (ch != 'L') return ERROR; + STRICT_CHECK(ch != 'L'); state = s_req_method_DEL; break; case s_req_method_DEL: - if (ch != 'E') return ERROR; + STRICT_CHECK(ch != 'E'); state = s_req_method_DELE; break; case s_req_method_DELE: - if (ch != 'T') return ERROR; + STRICT_CHECK(ch != 'T'); state = s_req_method_DELET; break; case s_req_method_DELET: - if (ch != 'E') return ERROR; + STRICT_CHECK(ch != 'E'); parser->method = HTTP_DELETE; state = s_req_spaces_before_url; break; @@ -621,12 +608,12 @@ size_t parse (http_parser *parser, const char *data, size_t len, int start_state } case s_req_schema_slash: - if (ch != '/') return ERROR; + STRICT_CHECK(ch != '/'); state = s_req_schema_slash_slash; break; case s_req_schema_slash_slash: - if (ch != '/') return ERROR; + STRICT_CHECK(ch != '/'); state = s_req_host; break; @@ -860,37 +847,37 @@ size_t parse (http_parser *parser, const char *data, size_t len, int start_state break; case s_req_http_H: - if (ch != 'T') return ERROR; + STRICT_CHECK(ch != 'T'); state = s_req_http_HT; break; case s_req_http_HT: - if (ch != 'T') return ERROR; + STRICT_CHECK(ch != 'T'); state = s_req_http_HTT; break; case s_req_http_HTT: - if (ch != 'P') return ERROR; + STRICT_CHECK(ch != 'P'); state = s_req_http_HTTP; break; case s_req_http_HTTP: - if (ch != '/') return ERROR; - state = s_req_first_major_digit; + STRICT_CHECK(ch != '/'); + state = s_req_first_http_major; break; /* first digit of major HTTP version */ - case s_req_first_major_digit: + case s_req_first_http_major: if (ch < '1' || ch > '9') return ERROR; parser->http_major = ch - '0'; - state = s_req_major_digit; + state = s_req_http_major; break; /* major HTTP version or dot */ - case s_req_major_digit: + case s_req_http_major: { if (ch == '.') { - state = s_req_first_minor_digit; + state = s_req_first_http_minor; break; } @@ -904,14 +891,14 @@ size_t parse (http_parser *parser, const char *data, size_t len, int start_state } /* first digit of minor HTTP version */ - case s_req_first_minor_digit: + case s_req_first_http_minor: if (ch < '0' || ch > '9') return ERROR; parser->http_minor = ch - '0'; - state = s_req_minor_digit; + state = s_req_http_minor; break; /* minor HTTP version or end of request line */ - case s_req_minor_digit: + case s_req_http_minor: { if (ch == CR) { state = s_req_line_almost_done; @@ -1083,17 +1070,6 @@ size_t parse (http_parser *parser, const char *data, size_t len, int start_state { if (ch == ' ') break; - if (ch == CR) { - header_state = h_general; - state = s_header_almost_done; - break; - } - - if (ch == LF) { - state = s_header_field_start; - break; - } - MARK(header_value); state = s_header_value; @@ -1102,39 +1078,51 @@ size_t parse (http_parser *parser, const char *data, size_t len, int start_state c = lowcase[(int)ch]; if (!c) { + if (ch == CR) { + header_state = h_general; + state = s_header_almost_done; + break; + } + + if (ch == LF) { + state = s_header_field_start; + break; + } + header_state = h_general; - } else { - switch (header_state) { - case h_transfer_encoding: - /* looking for 'Transfer-Encoding: chunked' */ - if ('c' == c) { - header_state = h_matching_transfer_encoding_chunked; - } else { - header_state = h_general; - } - break; + break; + } - case h_content_length: - if (ch < '0' || ch > '9') return ERROR; - parser->content_length = ch - '0'; - break; + switch (header_state) { + case h_transfer_encoding: + /* looking for 'Transfer-Encoding: chunked' */ + if ('c' == c) { + header_state = h_matching_transfer_encoding_chunked; + } else { + header_state = h_general; + } + break; - case h_connection: - /* looking for 'Connection: keep-alive' */ - if (c == 'k') { - header_state = h_matching_connection_keep_alive; - /* looking for 'Connection: close' */ - } else if (c == 'c') { - header_state = h_matching_connection_close; - } else { - header_state = h_general; - } - break; + case h_content_length: + if (ch < '0' || ch > '9') return ERROR; + parser->content_length = ch - '0'; + break; - default: + case h_connection: + /* looking for 'Connection: keep-alive' */ + if (c == 'k') { + header_state = h_matching_connection_keep_alive; + /* looking for 'Connection: close' */ + } else if (c == 'c') { + header_state = h_matching_connection_close; + } else { header_state = h_general; - break; - } + } + break; + + default: + header_state = h_general; + break; } break; } @@ -1219,7 +1207,8 @@ size_t parse (http_parser *parser, const char *data, size_t len, int start_state } case s_header_almost_done: - if (ch != LF) return ERROR; + { + STRICT_CHECK(ch != LF); state = s_header_field_start; @@ -1237,9 +1226,11 @@ size_t parse (http_parser *parser, const char *data, size_t len, int start_state break; } break; + } case s_headers_almost_done: - if (ch != LF) return ERROR; + { + STRICT_CHECK(ch != LF); if (parser->flags & F_TRAILING) { /* End of a chunked request */ @@ -1270,6 +1261,7 @@ size_t parse (http_parser *parser, const char *data, size_t len, int start_state } } break; + } case s_body_identity: to_read = MIN(pe - p, (ssize_t)(parser->content_length - parser->body_read)); @@ -1302,6 +1294,7 @@ size_t parse (http_parser *parser, const char *data, size_t len, int start_state break; case s_chunk_size: + { if (ch == CR) { state = s_chunk_size_almost_done; break; @@ -1320,10 +1313,10 @@ size_t parse (http_parser *parser, const char *data, size_t len, int start_state parser->chunk_size *= 16; parser->chunk_size += c; break; + } case s_chunk_parameters: - /* just ignore this shit */ - /* TODO check for overflow */ + /* just ignore this shit. TODO check for overflow */ if (ch == CR) { state = s_chunk_size_almost_done; break; @@ -1331,7 +1324,8 @@ size_t parse (http_parser *parser, const char *data, size_t len, int start_state break; case s_chunk_size_almost_done: - if (ch != LF) return ERROR; + { + STRICT_CHECK(ch != LF); if (parser->chunk_size == 0) { parser->flags |= F_TRAILING; state = s_header_field_start; @@ -1339,8 +1333,10 @@ size_t parse (http_parser *parser, const char *data, size_t len, int start_state state = s_chunk_data; } break; + } case s_chunk_data: + { to_read = MIN(pe - p, (ssize_t)(parser->chunk_size)); if (to_read > 0) { @@ -1354,14 +1350,15 @@ size_t parse (http_parser *parser, const char *data, size_t len, int start_state parser->chunk_size -= to_read; break; + } case s_chunk_data_almost_done: - if (ch != CR) return ERROR; + STRICT_CHECK(ch != CR); state = s_chunk_data_done; break; case s_chunk_data_done: - if (ch != LF) return ERROR; + STRICT_CHECK(ch != LF); state = s_chunk_size_start; break; @@ -1386,14 +1383,16 @@ size_t parse (http_parser *parser, const char *data, size_t len, int start_state } -size_t http_parse_requests (http_parser *parser, const char *data, size_t len) +size_t +http_parse_requests (http_parser *parser, const char *data, size_t len) { if (!parser->state) parser->state = s_start_req; return parse(parser, data, len, s_start_req); } -size_t http_parse_responses (http_parser *parser, const char *data, size_t len) +size_t +http_parse_responses (http_parser *parser, const char *data, size_t len) { if (!parser->state) parser->state = s_start_res; return parse(parser, data, len, s_start_res); diff --git a/http_parser.h b/http_parser.h index 4080689..a245765 100644 --- a/http_parser.h +++ b/http_parser.h @@ -29,6 +29,15 @@ extern "C" { #endif #include +/* Compile with -DHTTP_PARSER_STRICT=0 to make less checks, but run + * faster + */ +#ifndef HTTP_PARSER_STRICT +# define HTTP_PARSER_STRICT 1 +#else +# define HTTP_PARSER_STRICT 0 +#endif + typedef struct http_parser http_parser; /* Callbacks should return non-zero to indicate an error. The parse will @@ -52,14 +61,15 @@ enum http_method struct http_parser { /** PRIVATE **/ - int state; - int header_state; + unsigned short state; + unsigned short header_state; size_t header_index; - size_t chunk_size; char flags; + size_t chunk_size; ssize_t body_read; + ssize_t content_length; const char *header_field_mark; size_t header_field_size; @@ -77,11 +87,8 @@ struct http_parser { /** READ-ONLY **/ unsigned short status_code; /* responses only */ enum http_method method; /* requests only */ - - int http_major; - int http_minor; - - ssize_t content_length; + unsigned short http_major; + unsigned short http_minor; /** PUBLIC **/ void *data; /* A pointer to get hook to the "connection" or "socket" object */