From dbd2dad46177a43c68f644af94d9646995e98505 Mon Sep 17 00:00:00 2001 From: Ryan Dahl Date: Sat, 27 Feb 2010 20:23:29 -0800 Subject: [PATCH] Introduce http_parser_settings --- README.md | 29 ++++++++++++++++------------- http_parser.c | 33 ++++++++++++--------------------- http_parser.h | 8 +++++++- test.c | 39 +++++++++++++++++++++------------------ 4 files changed, 56 insertions(+), 53 deletions(-) diff --git a/README.md b/README.md index 6684cfe..959117e 100644 --- a/README.md +++ b/README.md @@ -4,8 +4,9 @@ HTTP Parser This is a parser for HTTP messages written in C. It parses both requests and responses. The parser is designed to be used in performance HTTP applications. It does not make any allocations, it does not buffer data, and -it can be interrupted at anytime. It only requires about 136 bytes of data -per message stream (in a web server that is per connection). +it can be interrupted at anytime. Depending on your architecture, it only +requires between 100 and 200 bytes of data per message stream (in a web +server that is per connection). Features: @@ -31,12 +32,14 @@ One `http_parser` object is used per TCP connection. Initialize the struct using `http_parser_init()` and set the callbacks. That might look something like this for a request parser: + http_parser_settings settings; + settings.on_path = my_path_callback; + settings.on_header_field = my_header_field_callback; + /* ... */ + settings.data = my_socket; + http_parser *parser = malloc(sizeof(http_parser)); http_parser_init(parser, HTTP_REQUEST); - parser->on_path = my_path_callback; - parser->on_header_field = my_header_field_callback; - /* ... */ - parser->data = my_socket; When data is received on the socket execute the parser and check for errors. @@ -54,7 +57,7 @@ When data is received on the socket execute the parser and check for errors. * Note we pass the recved==0 to http_parse_requests to signal * that EOF has been recieved. */ - nparsed = http_parser_execute(parser, buf, recved); + nparsed = http_parser_execute(parser, settings, buf, recved); if (nparsed != recved) { /* Handle error. Usually just close the connection. */ @@ -63,13 +66,13 @@ When data is received on the socket execute the parser and check for errors. HTTP needs to know where the end of the stream is. For example, sometimes servers send responses without Content-Length and expect the client to consume input (for the body) until EOF. To tell http_parser about EOF, give -`0` as the third parameter to `http_parser_execute()`. Callbacks and errors +`0` as the forth parameter to `http_parser_execute()`. Callbacks and errors can still be encountered during an EOF, so one must still be prepared to receive them. Scalar valued message information such as `status_code`, `method`, and the HTTP version are stored in the parser structure. This data is only -temporarlly stored in `http_parser` and gets reset on each new message. If +temporally stored in `http_parser` and gets reset on each new message. If this information is needed later, copy it out of the structure during the `headers_complete` callback. @@ -85,10 +88,10 @@ parser, for example, would not want such a feature. Callbacks --------- -During the `http_parser_execute()` call, the callbacks set in `http_parser` -will be executed. The parser maintains state and never looks behind, so -buffering the data is not necessary. If you need to save certain data for -later usage, you can do that from the callbacks. +During the `http_parser_execute()` call, the callbacks set in +`http_parser_settings` will be executed. The parser maintains state and +never looks behind, so buffering the data is not necessary. If you need to +save certain data for later usage, you can do that from the callbacks. There are two types of callbacks: diff --git a/http_parser.c b/http_parser.c index ff440ba..a7d01cc 100644 --- a/http_parser.c +++ b/http_parser.c @@ -38,10 +38,10 @@ #define CALLBACK2(FOR) \ do { \ - if (parser->on_##FOR) { \ - if (0 != parser->on_##FOR(parser)) return (p - data); \ + if (settings.on_##FOR) { \ + if (0 != settings.on_##FOR(parser)) return (p - data); \ } \ -} while (0) +} while (0) #define MARK(FOR) \ do { \ @@ -54,10 +54,11 @@ do { \ if (parser->FOR##_mark) { \ parser->FOR##_size += p - parser->FOR##_mark; \ if (parser->FOR##_size > MAX_FIELD_SIZE) return (p - data); \ - if (parser->on_##FOR) { \ - if (0 != parser->on_##FOR(parser, \ - parser->FOR##_mark, \ - p - parser->FOR##_mark)) { \ + if (settings.on_##FOR) { \ + if (0 != settings.on_##FOR(parser, \ + parser->FOR##_mark, \ + p - parser->FOR##_mark)) \ + { \ return (p - data); \ } \ } \ @@ -236,6 +237,7 @@ enum flags #endif size_t http_parser_execute (http_parser *parser, + http_parser_settings settings, const char *data, size_t len) { @@ -1285,7 +1287,7 @@ size_t http_parser_execute (http_parser *parser, case s_body_identity: to_read = MIN(pe - p, (ssize_t)(parser->content_length - parser->body_read)); if (to_read > 0) { - if (parser->on_body) parser->on_body(parser, p, to_read); + if (settings.on_body) settings.on_body(parser, p, to_read); p += to_read - 1; parser->body_read += to_read; if (parser->body_read == parser->content_length) { @@ -1299,7 +1301,7 @@ size_t http_parser_execute (http_parser *parser, case s_body_identity_eof: to_read = pe - p; if (to_read > 0) { - if (parser->on_body) parser->on_body(parser, p, to_read); + if (settings.on_body) settings.on_body(parser, p, to_read); p += to_read - 1; parser->body_read += to_read; } @@ -1372,7 +1374,7 @@ size_t http_parser_execute (http_parser *parser, to_read = MIN(pe - p, (ssize_t)(parser->content_length)); if (to_read > 0) { - if (parser->on_body) parser->on_body(parser, p, to_read); + if (settings.on_body) settings.on_body(parser, p, to_read); p += to_read - 1; } @@ -1449,17 +1451,6 @@ http_parser_init (http_parser *parser, enum http_parser_type t) parser->state = (t == HTTP_REQUEST ? s_start_req : s_start_res); parser->nread = 0; - parser->on_message_begin = NULL; - parser->on_path = NULL; - parser->on_query_string = NULL; - parser->on_url = NULL; - parser->on_fragment = NULL; - parser->on_header_field = NULL; - parser->on_header_value = NULL; - parser->on_headers_complete = NULL; - parser->on_body = NULL; - parser->on_message_complete = NULL; - parser->header_field_mark = NULL; parser->header_value_mark = NULL; parser->query_string_mark = NULL; diff --git a/http_parser.h b/http_parser.h index f6a30b0..482209a 100644 --- a/http_parser.h +++ b/http_parser.h @@ -42,6 +42,7 @@ extern "C" { #define HTTP_MAX_HEADER_SIZE (80*1024) typedef struct http_parser http_parser; +typedef struct http_parser_settings http_parser_settings; /* Callbacks should return non-zero to indicate an error. The parser will * then halt execution. @@ -114,7 +115,9 @@ struct http_parser { /** PUBLIC **/ void *data; /* A pointer to get hook to the "connection" or "socket" object */ +}; +struct http_parser_settings { /* an ordered list of callbacks */ http_cb on_message_begin; @@ -134,7 +137,10 @@ struct http_parser { void http_parser_init(http_parser *parser, enum http_parser_type type); -size_t http_parser_execute(http_parser *parser, const char *data, size_t len); +size_t http_parser_execute(http_parser *parser, + http_parser_settings settings, + const char *data, + size_t len); /* If http_should_keep_alive() in the on_headers_complete or * on_message_complete callback returns true, then this will be should be diff --git a/test.c b/test.c index 3216dc8..4b1e870 100644 --- a/test.c +++ b/test.c @@ -63,14 +63,6 @@ struct message { static int currently_parsing_eof; -inline size_t parse (const char *buf, size_t len) -{ - size_t nparsed; - currently_parsing_eof = (len == 0); - nparsed = http_parser_execute(parser, buf, len); - return nparsed; -} - static struct message messages[5]; static int num_messages; @@ -802,6 +794,19 @@ message_complete_cb (http_parser *p) return 0; } +static http_parser_settings settings = + {.on_message_begin = message_begin_cb + ,.on_header_field = header_field_cb + ,.on_header_value = header_value_cb + ,.on_path = request_path_cb + ,.on_url = request_url_cb + ,.on_fragment = fragment_cb + ,.on_query_string = query_string_cb + ,.on_body = body_cb + ,.on_headers_complete = headers_complete_cb + ,.on_message_complete = message_complete_cb + }; + void parser_init (enum http_parser_type type) { @@ -815,16 +820,6 @@ parser_init (enum http_parser_type type) memset(&messages, 0, sizeof messages); - parser->on_message_begin = message_begin_cb; - parser->on_header_field = header_field_cb; - parser->on_header_value = header_value_cb; - parser->on_path = request_path_cb; - parser->on_url = request_url_cb; - parser->on_fragment = fragment_cb; - parser->on_query_string = query_string_cb; - parser->on_body = body_cb; - parser->on_headers_complete = headers_complete_cb; - parser->on_message_complete = message_complete_cb; } void @@ -835,6 +830,14 @@ parser_free () parser = NULL; } +inline size_t parse (const char *buf, size_t len) +{ + size_t nparsed; + currently_parsing_eof = (len == 0); + nparsed = http_parser_execute(parser, settings, buf, len); + return nparsed; +} + static inline int check_str_eq (const struct message *m, const char *prop,