From 1a677040c01a371239597a8fa288dc2accae02fa Mon Sep 17 00:00:00 2001 From: Ryan Dahl Date: Fri, 8 Jan 2010 21:38:17 -0800 Subject: [PATCH] API: Define parser type in http_parser_init() That is, for a request parser do this: http_parser_init(my_parser, HTTP_REQUEST) for a response parser do this: http_parser_init(my_parser, HTTP_RESPONSE) Then http_parse_requests() and http_parse_responses() both turn into http_parer_execute(). --- README.md | 10 +++---- http_parser.c | 30 ++++++------------- http_parser.h | 8 +++-- test.c | 83 +++++++++++++++++++++++++-------------------------- 4 files changed, 59 insertions(+), 72 deletions(-) diff --git a/README.md b/README.md index bca5aea..6684cfe 100644 --- a/README.md +++ b/README.md @@ -29,10 +29,10 @@ Usage One `http_parser` object is used per TCP connection. Initialize the struct using `http_parser_init()` and set the callbacks. That might look something -like this: +like this for a request parser: http_parser *parser = malloc(sizeof(http_parser)); - http_parser_init(parser); + http_parser_init(parser, HTTP_REQUEST); parser->on_path = my_path_callback; parser->on_header_field = my_header_field_callback; /* ... */ @@ -54,7 +54,7 @@ When data is received on the socket execute the parser and check for errors. * Note we pass the recved==0 to http_parse_requests to signal * that EOF has been recieved. */ - nparsed = http_parse_requests(parser, buf, recved); + nparsed = http_parser_execute(parser, buf, recved); if (nparsed != recved) { /* Handle error. Usually just close the connection. */ @@ -63,7 +63,7 @@ When data is received on the socket execute the parser and check for errors. HTTP needs to know where the end of the stream is. For example, sometimes servers send responses without Content-Length and expect the client to consume input (for the body) until EOF. To tell http_parser about EOF, give -`0` as the third parameter to `http_parse_requests()`. Callbacks and errors +`0` as the third parameter to `http_parser_execute()`. Callbacks and errors can still be encountered during an EOF, so one must still be prepared to receive them. @@ -85,7 +85,7 @@ parser, for example, would not want such a feature. Callbacks --------- -During the `http_parse_requests()` call, the callbacks set in `http_parser` +During the `http_parser_execute()` call, the callbacks set in `http_parser` will be executed. The parser maintains state and never looks behind, so buffering the data is not necessary. If you need to save certain data for later usage, you can do that from the callbacks. diff --git a/http_parser.c b/http_parser.c index 9088af7..a4ed4bb 100644 --- a/http_parser.c +++ b/http_parser.c @@ -245,6 +245,8 @@ enum flags #define LF '\n' #define LOWER(c) (unsigned char)(c | 0x20) +#define start_state (parser->type == HTTP_REQUEST ? s_start_req : s_start_res) + #if HTTP_PARSER_STRICT # define STRICT_CHECK(cond) if (cond) goto error # define NEW_MESSAGE() (http_should_keep_alive(parser) ? start_state : s_dead) @@ -253,8 +255,9 @@ enum flags # define NEW_MESSAGE() start_state #endif -static inline -size_t parse (http_parser *parser, const char *data, size_t len, int start_state) +size_t http_parser_execute (http_parser *parser, + const char *data, + size_t len) { char c, ch; const char *p, *pe; @@ -1256,7 +1259,7 @@ size_t parse (http_parser *parser, const char *data, size_t len, int start_state /* Content-Length header given and non-zero */ state = s_body_identity; } else { - if (start_state == s_start_req || http_should_keep_alive(parser)) { + if (parser->type == HTTP_REQUEST || http_should_keep_alive(parser)) { /* Assume content-length 0 - read the next */ CALLBACK2(message_complete); state = NEW_MESSAGE(); @@ -1408,22 +1411,6 @@ error: } -size_t -http_parse_requests (http_parser *parser, const char *data, size_t len) -{ - if (!parser->state) parser->state = s_start_req; - return parse(parser, data, len, s_start_req); -} - - -size_t -http_parse_responses (http_parser *parser, const char *data, size_t len) -{ - if (!parser->state) parser->state = s_start_res; - return parse(parser, data, len, s_start_res); -} - - int http_should_keep_alive (http_parser *parser) { @@ -1446,9 +1433,10 @@ http_should_keep_alive (http_parser *parser) void -http_parser_init (http_parser *parser) +http_parser_init (http_parser *parser, enum http_parser_type t) { - parser->state = 0; + parser->type = t; + parser->state = (t == HTTP_REQUEST ? s_start_req : s_start_res); parser->on_message_begin = NULL; parser->on_path = NULL; parser->on_query_string = NULL; diff --git a/http_parser.h b/http_parser.h index 5855e89..3a00e53 100644 --- a/http_parser.h +++ b/http_parser.h @@ -74,8 +74,11 @@ enum http_method , HTTP_UNLOCK = 0x4000 }; +enum http_parser_type { HTTP_REQUEST, HTTP_RESPONSE }; + struct http_parser { /** PRIVATE **/ + enum http_parser_type type; unsigned short state; unsigned short header_state; size_t index; @@ -125,9 +128,8 @@ struct http_parser { http_cb on_message_complete; }; -void http_parser_init(http_parser *parser); -size_t http_parse_requests(http_parser *parser, const char *data, size_t len); -size_t http_parse_responses(http_parser *parser, const char *data, size_t len); +void http_parser_init(http_parser *parser, enum http_parser_type type); +size_t http_parser_execute(http_parser *parser, const char *data, size_t len); /* Call this in the on_headers_complete or on_message_complete callback to * determine if this will be the last message on the connection. * If you are the server, respond with the "Connection: close" header diff --git a/test.c b/test.c index 5f4c418..ad98f69 100644 --- a/test.c +++ b/test.c @@ -34,14 +34,12 @@ #define MAX_HEADERS 10 #define MAX_ELEMENT_SIZE 500 -enum message_type { REQUEST, RESPONSE }; - static http_parser *parser; struct message { const char *name; // for debugging purposes const char *raw; - enum message_type type; + enum http_parser_type type; enum http_method method; int status_code; char request_path[MAX_ELEMENT_SIZE]; @@ -65,12 +63,11 @@ struct message { static int currently_parsing_eof; -inline size_t parse (enum message_type t, const char *buf, size_t len) +inline size_t parse (const char *buf, size_t len) { size_t nparsed; currently_parsing_eof = (len == 0); - nparsed = (t == REQUEST ? http_parse_requests(parser, buf, len) - : http_parse_responses(parser, buf, len)); + nparsed = http_parser_execute(parser, buf, len); return nparsed; } @@ -81,7 +78,7 @@ static int num_messages; const struct message requests[] = #define CURL_GET 0 { {.name= "curl get" - ,.type= REQUEST + ,.type= HTTP_REQUEST ,.raw= "GET /test HTTP/1.1\r\n" "User-Agent: curl/7.18.0 (i486-pc-linux-gnu) libcurl/7.18.0 OpenSSL/0.9.8g zlib/1.2.3.3 libidn/1.1\r\n" "Host: 0.0.0.0=5000\r\n" @@ -107,7 +104,7 @@ const struct message requests[] = #define FIREFOX_GET 1 , {.name= "firefox get" - ,.type= REQUEST + ,.type= HTTP_REQUEST ,.raw= "GET /favicon.ico HTTP/1.1\r\n" "Host: 0.0.0.0=5000\r\n" "User-Agent: Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9) Gecko/2008061015 Firefox/3.0\r\n" @@ -143,7 +140,7 @@ const struct message requests[] = #define DUMBFUCK 2 , {.name= "dumbfuck" - ,.type= REQUEST + ,.type= HTTP_REQUEST ,.raw= "GET /dumbfuck HTTP/1.1\r\n" "aaaaaaaaaaaaa:++++++++++\r\n" "\r\n" @@ -165,7 +162,7 @@ const struct message requests[] = #define FRAGMENT_IN_URI 3 , {.name= "fragment in url" - ,.type= REQUEST + ,.type= HTTP_REQUEST ,.raw= "GET /forums/1/topics/2375?page=1#posts-17408 HTTP/1.1\r\n" "\r\n" ,.should_keep_alive= TRUE @@ -184,7 +181,7 @@ const struct message requests[] = #define GET_NO_HEADERS_NO_BODY 4 , {.name= "get no headers no body" - ,.type= REQUEST + ,.type= HTTP_REQUEST ,.raw= "GET /get_no_headers_no_body/world HTTP/1.1\r\n" "\r\n" ,.should_keep_alive= TRUE @@ -202,7 +199,7 @@ const struct message requests[] = #define GET_ONE_HEADER_NO_BODY 5 , {.name= "get one header no body" - ,.type= REQUEST + ,.type= HTTP_REQUEST ,.raw= "GET /get_one_header_no_body HTTP/1.1\r\n" "Accept: */*\r\n" "\r\n" @@ -224,7 +221,7 @@ const struct message requests[] = #define GET_FUNKY_CONTENT_LENGTH 6 , {.name= "get funky content length body hello" - ,.type= REQUEST + ,.type= HTTP_REQUEST ,.raw= "GET /get_funky_content_length_body_hello HTTP/1.0\r\n" "conTENT-Length: 5\r\n" "\r\n" @@ -247,7 +244,7 @@ const struct message requests[] = #define POST_IDENTITY_BODY_WORLD 7 , {.name= "post identity body world" - ,.type= REQUEST + ,.type= HTTP_REQUEST ,.raw= "POST /post_identity_body_world?q=search#hey HTTP/1.1\r\n" "Accept: */*\r\n" "Transfer-Encoding: identity\r\n" @@ -274,7 +271,7 @@ const struct message requests[] = #define POST_CHUNKED_ALL_YOUR_BASE 8 , {.name= "post - chunked body: all your base are belong to us" - ,.type= REQUEST + ,.type= HTTP_REQUEST ,.raw= "POST /post_chunked_all_your_base HTTP/1.1\r\n" "Transfer-Encoding: chunked\r\n" "\r\n" @@ -299,7 +296,7 @@ const struct message requests[] = #define TWO_CHUNKS_MULT_ZERO_END 9 , {.name= "two chunks ; triple zero ending" - ,.type= REQUEST + ,.type= HTTP_REQUEST ,.raw= "POST /two_chunks_mult_zero_end HTTP/1.1\r\n" "Transfer-Encoding: chunked\r\n" "\r\n" @@ -325,7 +322,7 @@ const struct message requests[] = #define CHUNKED_W_TRAILING_HEADERS 10 , {.name= "chunked with trailing headers. blech." - ,.type= REQUEST + ,.type= HTTP_REQUEST ,.raw= "POST /chunked_w_trailing_headers HTTP/1.1\r\n" "Transfer-Encoding: chunked\r\n" "\r\n" @@ -355,7 +352,7 @@ const struct message requests[] = #define CHUNKED_W_BULLSHIT_AFTER_LENGTH 11 , {.name= "with bullshit after the length" - ,.type= REQUEST + ,.type= HTTP_REQUEST ,.raw= "POST /chunked_w_bullshit_after_length HTTP/1.1\r\n" "Transfer-Encoding: chunked\r\n" "\r\n" @@ -381,7 +378,7 @@ const struct message requests[] = #define WITH_QUOTES 12 , {.name= "with quotes" - ,.type= REQUEST + ,.type= HTTP_REQUEST ,.raw= "GET /with_\"stupid\"_quotes?foo=\"bar\" HTTP/1.1\r\n\r\n" ,.should_keep_alive= TRUE ,.message_complete_on_eof= FALSE @@ -404,7 +401,7 @@ const struct message requests[] = * Compare with NO_CONTENT_LENGTH_RESPONSE. */ , {.name = "apachebench get" - ,.type= REQUEST + ,.type= HTTP_REQUEST ,.raw= "GET /test HTTP/1.0\r\n" "Host: 0.0.0.0:5000\r\n" "User-Agent: ApacheBench/2.3\r\n" @@ -433,7 +430,7 @@ const struct message requests[] = const struct message responses[] = #define GOOGLE_301 0 { {.name= "google 301" - ,.type= RESPONSE + ,.type= HTTP_RESPONSE ,.raw= "HTTP/1.1 301 Moved Permanently\r\n" "Location: http://www.google.com/\r\n" "Content-Type: text/html; charset=UTF-8\r\n" @@ -479,7 +476,7 @@ const struct message responses[] = * Compare with APACHEBENCH_GET */ , {.name= "no content-length response" - ,.type= RESPONSE + ,.type= HTTP_RESPONSE ,.raw= "HTTP/1.1 200 OK\r\n" "Date: Tue, 04 Aug 2009 07:59:32 GMT\r\n" "Server: Apache\r\n" @@ -522,7 +519,7 @@ const struct message responses[] = #define NO_HEADERS_NO_BODY_404 2 , {.name= "404 no headers no body" - ,.type= RESPONSE + ,.type= HTTP_RESPONSE ,.raw= "HTTP/1.1 404 Not Found\r\n\r\n" ,.should_keep_alive= TRUE ,.message_complete_on_eof= FALSE @@ -536,7 +533,7 @@ const struct message responses[] = #define NO_REASON_PHRASE 3 , {.name= "301 no response phrase" - ,.type= RESPONSE + ,.type= HTTP_RESPONSE ,.raw= "HTTP/1.1 301\r\n\r\n" ,.should_keep_alive = TRUE ,.message_complete_on_eof= FALSE @@ -550,7 +547,7 @@ const struct message responses[] = #define TRAILING_SPACE_ON_CHUNKED_BODY 4 , {.name="200 trailing space on chunked body" - ,.type= RESPONSE + ,.type= HTTP_RESPONSE ,.raw= "HTTP/1.1 200 OK\r\n" "Content-Type: text/plain\r\n" "Transfer-Encoding: chunked\r\n" @@ -581,7 +578,7 @@ const struct message responses[] = #define NO_CARRIAGE_RET 5 , {.name="no carriage ret" - ,.type= RESPONSE + ,.type= HTTP_RESPONSE ,.raw= "HTTP/1.1 200 OK\n" "Content-Type: text/html; charset=utf-8\n" "Connection: close\n" @@ -715,7 +712,7 @@ message_complete_cb (http_parser *p) } void -parser_init () +parser_init (enum http_parser_type type) { num_messages = 0; @@ -723,7 +720,7 @@ parser_init () parser = malloc(sizeof(http_parser)); - http_parser_init(parser); + http_parser_init(parser, type); memset(&messages, 0, sizeof messages); @@ -791,7 +788,7 @@ message_eq (int index, const struct message *expected) MESSAGE_CHECK_NUM_EQ(expected, m, http_major); MESSAGE_CHECK_NUM_EQ(expected, m, http_minor); - if (expected->type == REQUEST) { + if (expected->type == HTTP_REQUEST) { MESSAGE_CHECK_NUM_EQ(expected, m, method); } else { MESSAGE_CHECK_NUM_EQ(expected, m, status_code); @@ -869,17 +866,17 @@ print_error (const char *raw, size_t error_location) void test_message (const struct message *message) { - parser_init(); + parser_init(message->type); size_t read; - read = parse(message->type, message->raw, strlen(message->raw)); + read = parse(message->raw, strlen(message->raw)); if (read != strlen(message->raw)) { print_error(message->raw, read); exit(1); } - read = parse(message->type, NULL, 0); + read = parse(NULL, 0); if (read != 0) { print_error(message->raw, read); exit(1); @@ -898,13 +895,13 @@ test_message (const struct message *message) void test_error (const char *buf) { - parser_init(); + parser_init(HTTP_REQUEST); size_t parsed; - parsed = parse(REQUEST, buf, strlen(buf)); + parsed = parse(buf, strlen(buf)); if (parsed != strlen(buf)) goto out; - parsed = parse(REQUEST, NULL, 0); + parsed = parse(NULL, 0); if (parsed != 0) goto out; fprintf(stderr, "\n*** Error expected but none found ***\n\n%s", buf); @@ -929,17 +926,17 @@ test_multiple3 (const struct message *r1, const struct message *r2, const struct strcat(total, r2->raw); strcat(total, r3->raw); - parser_init(); + parser_init(r1->type); size_t read; - read = parse(r1->type, total, strlen(total)); + read = parse(total, strlen(total)); if (read != strlen(total)) { print_error(total, read); exit(1); } - read = parse(REQUEST, NULL, 0); + read = parse(NULL, 0); if (read != 0) { print_error(total, read); exit(1); @@ -992,7 +989,7 @@ test_scan (const struct message *r1, const struct message *r2, const struct mess } ops += 1; - parser_init(); + parser_init(r1->type); buf1_len = i; strncpy(buf1, total, buf1_len); @@ -1006,25 +1003,25 @@ test_scan (const struct message *r1, const struct message *r2, const struct mess strncpy(buf3, total+j, buf3_len); buf3[buf3_len] = 0; - read = parse(r1->type, buf1, buf1_len); + read = parse(buf1, buf1_len); if (read != buf1_len) { print_error(buf1, read); goto error; } - read = parse(r1->type, buf2, buf2_len); + read = parse(buf2, buf2_len); if (read != buf2_len) { print_error(buf2, read); goto error; } - read = parse(r1->type, buf3, buf3_len); + read = parse(buf3, buf3_len); if (read != buf3_len) { print_error(buf3, read); goto error; } - parse(r1->type, NULL, 0); + parse(NULL, 0); if (3 != num_messages) { fprintf(stderr, "\n\nParser didn't see 3 messages only %d\n", num_messages);