diff --git a/README.md b/README.md index 405dd5f..700c3ac 100644 --- a/README.md +++ b/README.md @@ -164,6 +164,13 @@ and apply following logic: ------------------------ ------------ -------------------------------------------- +Parsing URLs +------------ + +A simplistic zero-copy URL parser is provided as `http_parser_parse_url()`. +Users of this library may wish to use it to parse URLs constructed from +consecutive `on_url` callbacks. + See examples of reading in headers: * [partial example](http://gist.github.com/155877) in C diff --git a/http_parser.c b/http_parser.c index f87dd17..5ab2be2 100644 --- a/http_parser.c +++ b/http_parser.c @@ -24,6 +24,8 @@ #include #include #include +#include +#include #ifndef MIN @@ -261,7 +263,7 @@ enum state , s_chunk_size , s_chunk_parameters , s_chunk_size_almost_done - + , s_headers_almost_done /* Important: 's_headers_almost_done' must be the last 'header' state. All * states beyond this must be 'body' states. It is used for overflow @@ -356,6 +358,178 @@ static struct { }; #undef HTTP_STRERROR_GEN +/* Our URL parser. + * + * This is designed to be shared by http_parser_execute() for URL validation, + * hence it has a state transition + byte-for-byte interface. In addition, it + * is meant to be embedded in http_parser_parse_url(), which does the dirty + * work of turning state transitions URL components for its API. + * + * This function should only be invoked with non-space characters. It is + * assumed that the caller cares about (and can detect) the transition between + * URL and non-URL states by looking for these. + */ +static inline enum state +parse_url_char(enum state s, const char ch, int is_connect) +{ + assert(!isspace(ch)); + + switch (s) { + case s_req_spaces_before_url: + if (ch == '/' || ch == '*') { + return s_req_path; + } + + /* Proxied requests are followed by scheme of an absolute URI (alpha). + * CONNECT is followed by a hostname, which begins with alphanum. + * All other methods are followed by '/' or '*' (handled above). + */ + if (IS_ALPHA(ch) || (is_connect && IS_NUM(ch))) { + return (is_connect) ? s_req_host : s_req_schema; + } + + break; + + case s_req_schema: + if (IS_ALPHA(ch)) { + return s; + } + + if (ch == ':') { + return s_req_schema_slash; + } + + break; + + case s_req_schema_slash: + if (ch == '/') { + return s_req_schema_slash_slash; + } + + break; + + case s_req_schema_slash_slash: + if (ch == '/') { + return s_req_host; + } + + break; + + case s_req_host: + if (IS_HOST_CHAR(ch)) { + return s; + } + + switch (ch) { + case ':': + return s_req_port; + + case '/': + return s_req_path; + + case '?': + return s_req_query_string_start; + } + + break; + + case s_req_port: + if (IS_NUM(ch)) { + return s; + } + + switch (ch) { + case '/': + return s_req_path; + + case '?': + return s_req_query_string_start; + } + + break; + + case s_req_path: + if (IS_URL_CHAR(ch)) { + return s; + } + + switch (ch) { + case '?': + return s_req_query_string_start; + + case '#': + return s_req_fragment_start; + } + + break; + + case s_req_query_string_start: + if (IS_URL_CHAR(ch)) { + return s_req_query_string; + } + + switch (ch) { + case '?': + /* XXX ignore extra '?' ... is this right? */ + return s; + + case '#': + return s_req_fragment_start; + } + + break; + + case s_req_query_string: + if (IS_URL_CHAR(ch)) { + return s; + } + + switch (ch) { + case '?': + /* allow extra '?' in query string */ + return s; + + case '#': + return s_req_fragment_start; + } + + break; + + case s_req_fragment_start: + if (IS_URL_CHAR(ch)) { + return s_req_fragment; + } + + switch (ch) { + case '?': + return s_req_fragment; + + case '#': + return s; + } + + break; + + case s_req_fragment: + if (IS_URL_CHAR(ch)) { + return s; + } + + switch (ch) { + case '?': + case '#': + return s; + } + + break; + + default: + break; + } + + /* We should never fall out of the switch above unless there's an error */ + return s_dead; +} size_t http_parser_execute (http_parser *parser, const http_parser_settings *settings, @@ -749,269 +923,72 @@ size_t http_parser_execute (http_parser *parser, { if (ch == ' ') break; - if (ch == '/' || ch == '*') { - MARK(url); - state = s_req_path; - break; - } + MARK(url); - /* Proxied requests are followed by scheme of an absolute URI (alpha). - * CONNECT is followed by a hostname, which begins with alphanum. - * All other methods are followed by '/' or '*' (handled above). - */ - if (IS_ALPHA(ch) || (parser->method == HTTP_CONNECT && IS_NUM(ch))) { - MARK(url); - state = (parser->method == HTTP_CONNECT) ? s_req_host : s_req_schema; - break; + state = parse_url_char(state, ch, parser->method == HTTP_CONNECT); + if (state == s_dead) { + SET_ERRNO(HPE_INVALID_URL); + goto error; } - SET_ERRNO(HPE_INVALID_URL); - goto error; + break; } case s_req_schema: - { - if (IS_ALPHA(ch)) break; - - if (ch == ':') { - state = s_req_schema_slash; - break; - } - - SET_ERRNO(HPE_INVALID_URL); - goto error; - } - case s_req_schema_slash: - STRICT_CHECK(ch != '/'); - state = s_req_schema_slash_slash; - break; - case s_req_schema_slash_slash: - STRICT_CHECK(ch != '/'); - state = s_req_host; - break; - - case s_req_host: - { - if (IS_HOST_CHAR(ch)) break; - switch (ch) { - case ':': - state = s_req_port; - break; - case '/': - state = s_req_path; - break; - case ' ': - /* The request line looks like: - * "GET http://foo.bar.com HTTP/1.1" - * That is, there is no path. - */ - CALLBACK(url); - state = s_req_http_start; - break; - case '?': - state = s_req_query_string_start; - break; - default: - SET_ERRNO(HPE_INVALID_HOST); - goto error; - } - break; - } - - case s_req_port: { - if (IS_NUM(ch)) break; - switch (ch) { - case '/': - state = s_req_path; - break; - case ' ': - /* The request line looks like: - * "GET http://foo.bar.com:1234 HTTP/1.1" - * That is, there is no path. - */ - CALLBACK(url); - state = s_req_http_start; - break; - case '?': - state = s_req_query_string_start; - break; - default: - SET_ERRNO(HPE_INVALID_PORT); - goto error; - } - break; - } - - case s_req_path: - { - if (IS_URL_CHAR(ch)) break; - switch (ch) { + /* No whitespace allowed here */ case ' ': - CALLBACK(url); - state = s_req_http_start; - break; case CR: - CALLBACK(url); - parser->http_major = 0; - parser->http_minor = 9; - state = s_req_line_almost_done; - break; case LF: - CALLBACK(url); - parser->http_major = 0; - parser->http_minor = 9; - state = s_header_field_start; - break; - case '?': - state = s_req_query_string_start; - break; - case '#': - state = s_req_fragment_start; - break; - default: - SET_ERRNO(HPE_INVALID_PATH); + SET_ERRNO(HPE_INVALID_URL); goto error; - } - break; - } - - case s_req_query_string_start: - { - if (IS_URL_CHAR(ch)) { - state = s_req_query_string; - break; - } - - switch (ch) { - case '?': - break; /* XXX ignore extra '?' ... is this right? */ - case ' ': - CALLBACK(url); - state = s_req_http_start; - break; - case CR: - CALLBACK(url); - parser->http_major = 0; - parser->http_minor = 9; - state = s_req_line_almost_done; - break; - case LF: - CALLBACK(url); - parser->http_major = 0; - parser->http_minor = 9; - state = s_header_field_start; - break; - case '#': - state = s_req_fragment_start; - break; default: - SET_ERRNO(HPE_INVALID_QUERY_STRING); - goto error; + state = parse_url_char(state, ch, parser->method == HTTP_CONNECT); + if (state == s_dead) { + SET_ERRNO(HPE_INVALID_URL); + goto error; + } } - break; - } - case s_req_query_string: - { - if (IS_URL_CHAR(ch)) break; - - switch (ch) { - case '?': - /* allow extra '?' in query string */ - break; - case ' ': - CALLBACK(url); - state = s_req_http_start; - break; - case CR: - CALLBACK(url); - parser->http_major = 0; - parser->http_minor = 9; - state = s_req_line_almost_done; - break; - case LF: - CALLBACK(url); - parser->http_major = 0; - parser->http_minor = 9; - state = s_header_field_start; - break; - case '#': - state = s_req_fragment_start; - break; - default: - SET_ERRNO(HPE_INVALID_QUERY_STRING); - goto error; - } break; } + case s_req_host: + case s_req_port: + case s_req_path: + case s_req_query_string_start: + case s_req_query_string: case s_req_fragment_start: - { - if (IS_URL_CHAR(ch)) { - state = s_req_fragment; - break; - } - - switch (ch) { - case ' ': - CALLBACK(url); - state = s_req_http_start; - break; - case CR: - CALLBACK(url); - parser->http_major = 0; - parser->http_minor = 9; - state = s_req_line_almost_done; - break; - case LF: - CALLBACK(url); - parser->http_major = 0; - parser->http_minor = 9; - state = s_header_field_start; - break; - case '?': - state = s_req_fragment; - break; - case '#': - break; - default: - SET_ERRNO(HPE_INVALID_FRAGMENT); - goto error; - } - break; - } - case s_req_fragment: { - if (IS_URL_CHAR(ch)) break; - + /* XXX: There is a bug here where if we're on the first character + * of s_req_host (e.g. our URL is 'http://' and we see a whitespace + * character, we'll consider this a valid URL. This seems incorrect, + * but at least it's bug-compatible with what we had before. + */ switch (ch) { case ' ': CALLBACK(url); state = s_req_http_start; break; case CR: - CALLBACK(url); - parser->http_major = 0; - parser->http_minor = 9; - state = s_req_line_almost_done; - break; case LF: CALLBACK(url); parser->http_major = 0; parser->http_minor = 9; - state = s_header_field_start; - break; - case '?': - case '#': + state = (ch == CR) ? + s_req_line_almost_done : + s_header_field_start; break; default: - SET_ERRNO(HPE_INVALID_FRAGMENT); - goto error; + state = parse_url_char(state, ch, parser->method == HTTP_CONNECT); + if (state == s_dead) { + SET_ERRNO(HPE_INVALID_URL); + goto error; + } } break; } @@ -1788,3 +1765,98 @@ http_errno_description(enum http_errno err) { assert(err < (sizeof(http_strerror_tab)/sizeof(http_strerror_tab[0]))); return http_strerror_tab[err].description; } + +int +http_parser_parse_url(const char *buf, size_t buflen, int is_connect, + struct http_parser_url *u) +{ + enum state s; + const char *p; + enum http_parser_url_fields uf, old_uf; + + u->port = u->field_set = 0; + s = s_req_spaces_before_url; + uf = old_uf = UF_MAX; + + for (p = buf; p < buf + buflen; p++) { + if ((s = parse_url_char(s, *p, is_connect)) == s_dead) { + return 1; + } + + /* Figure out the next field that we're operating on */ + switch (s) { + case s_req_schema: + case s_req_schema_slash: + case s_req_schema_slash_slash: + uf = UF_SCHEMA; + break; + + case s_req_host: + uf = UF_HOST; + break; + + case s_req_port: + uf = UF_PORT; + break; + + case s_req_path: + uf = UF_PATH; + break; + + case s_req_query_string_start: + case s_req_query_string: + uf = UF_QUERY; + break; + + case s_req_fragment_start: + case s_req_fragment: + uf = UF_FRAGMENT; + break; + + default: + assert(!"Unexpected state"); + return 1; + } + + /* Nothing's changed; soldier on */ + if (uf == old_uf) { + u->field_data[uf].len++; + continue; + } + + /* We ignore the first character in some fields; without this, we end up + * with the query being "?foo=bar" rather than "foo=bar". Callers probably + * don't want this. + */ + switch (uf) { + case UF_QUERY: + case UF_FRAGMENT: + case UF_PORT: + u->field_data[uf].off = p - buf + 1; + u->field_data[uf].len = 0; + break; + + default: + u->field_data[uf].off = p - buf; + u->field_data[uf].len = 1; + break; + } + + u->field_set |= (1 << uf); + old_uf = uf; + } + + if (u->field_set & (1 << UF_PORT)) { + /* Don't bother with endp; we've already validated the string */ + unsigned long v = strtoul(buf + u->field_data[UF_PORT].off, NULL, 10); + + /* Ports have a max value of 2^16 */ + if (v > 0xffff) { + return 1; + } + + u->port = (uint16_t) v; + } + + return 0; +} diff --git a/http_parser.h b/http_parser.h index 69f66d6..f80ecff 100644 --- a/http_parser.h +++ b/http_parser.h @@ -244,6 +244,35 @@ struct http_parser_settings { }; +enum http_parser_url_fields + { UF_SCHEMA = 0 + , UF_HOST = 1 + , UF_PORT = 2 + , UF_PATH = 3 + , UF_QUERY = 4 + , UF_FRAGMENT = 5 + , UF_MAX = 6 + }; + + +/* Result structure for http_parser_parse_url(). + * + * Callers should index into field_data[] with UF_* values iff field_set + * has the relevant (1 << UF_*) bit set. As a courtesy to clients (and + * because we probably have padding left over), we convert any port to + * a uint16_t. + */ +struct http_parser_url { + uint16_t field_set; /* Bitmask of (1 << UF_*) values */ + uint16_t port; /* Converted UF_PORT string */ + + struct { + uint16_t off; /* Offset into buffer in which field starts */ + uint16_t len; /* Length of run in buffer */ + } field_data[UF_MAX]; +}; + + void http_parser_init(http_parser *parser, enum http_parser_type type); @@ -270,6 +299,11 @@ const char *http_errno_name(enum http_errno err); /* Return a string description of the given error */ const char *http_errno_description(enum http_errno err); +/* Parse a URL; return nonzero on failure */ +int http_parser_parse_url(const char *buf, size_t buflen, + int is_connect, + struct http_parser_url *u); + #ifdef __cplusplus } #endif diff --git a/test.c b/test.c index 3810283..807921f 100644 --- a/test.c +++ b/test.c @@ -44,9 +44,13 @@ struct message { enum http_parser_type type; enum http_method method; int status_code; + char request_path[MAX_ELEMENT_SIZE]; char request_url[MAX_ELEMENT_SIZE]; + char fragment[MAX_ELEMENT_SIZE]; + char query_string[MAX_ELEMENT_SIZE]; char body[MAX_ELEMENT_SIZE]; size_t body_size; + uint16_t port; int num_headers; enum { NONE=0, FIELD, VALUE } last_header_element; char headers [MAX_HEADERS][2][MAX_ELEMENT_SIZE]; @@ -83,6 +87,9 @@ const struct message requests[] = ,.http_major= 1 ,.http_minor= 1 ,.method= HTTP_GET + ,.query_string= "" + ,.fragment= "" + ,.request_path= "/test" ,.request_url= "/test" ,.num_headers= 3 ,.headers= @@ -111,6 +118,9 @@ const struct message requests[] = ,.http_major= 1 ,.http_minor= 1 ,.method= HTTP_GET + ,.query_string= "" + ,.fragment= "" + ,.request_path= "/favicon.ico" ,.request_url= "/favicon.ico" ,.num_headers= 8 ,.headers= @@ -137,6 +147,9 @@ const struct message requests[] = ,.http_major= 1 ,.http_minor= 1 ,.method= HTTP_GET + ,.query_string= "" + ,.fragment= "" + ,.request_path= "/dumbfuck" ,.request_url= "/dumbfuck" ,.num_headers= 1 ,.headers= @@ -155,6 +168,9 @@ const struct message requests[] = ,.http_major= 1 ,.http_minor= 1 ,.method= HTTP_GET + ,.query_string= "page=1" + ,.fragment= "posts-17408" + ,.request_path= "/forums/1/topics/2375" /* XXX request url does include fragment? */ ,.request_url= "/forums/1/topics/2375?page=1#posts-17408" ,.num_headers= 0 @@ -171,6 +187,9 @@ const struct message requests[] = ,.http_major= 1 ,.http_minor= 1 ,.method= HTTP_GET + ,.query_string= "" + ,.fragment= "" + ,.request_path= "/get_no_headers_no_body/world" ,.request_url= "/get_no_headers_no_body/world" ,.num_headers= 0 ,.body= "" @@ -187,6 +206,9 @@ const struct message requests[] = ,.http_major= 1 ,.http_minor= 1 ,.method= HTTP_GET + ,.query_string= "" + ,.fragment= "" + ,.request_path= "/get_one_header_no_body" ,.request_url= "/get_one_header_no_body" ,.num_headers= 1 ,.headers= @@ -207,6 +229,9 @@ const struct message requests[] = ,.http_major= 1 ,.http_minor= 0 ,.method= HTTP_GET + ,.query_string= "" + ,.fragment= "" + ,.request_path= "/get_funky_content_length_body_hello" ,.request_url= "/get_funky_content_length_body_hello" ,.num_headers= 1 ,.headers= @@ -229,6 +254,9 @@ const struct message requests[] = ,.http_major= 1 ,.http_minor= 1 ,.method= HTTP_POST + ,.query_string= "q=search" + ,.fragment= "hey" + ,.request_path= "/post_identity_body_world" ,.request_url= "/post_identity_body_world?q=search#hey" ,.num_headers= 3 ,.headers= @@ -253,6 +281,9 @@ const struct message requests[] = ,.http_major= 1 ,.http_minor= 1 ,.method= HTTP_POST + ,.query_string= "" + ,.fragment= "" + ,.request_path= "/post_chunked_all_your_base" ,.request_url= "/post_chunked_all_your_base" ,.num_headers= 1 ,.headers= @@ -276,6 +307,9 @@ const struct message requests[] = ,.http_major= 1 ,.http_minor= 1 ,.method= HTTP_POST + ,.query_string= "" + ,.fragment= "" + ,.request_path= "/two_chunks_mult_zero_end" ,.request_url= "/two_chunks_mult_zero_end" ,.num_headers= 1 ,.headers= @@ -301,6 +335,9 @@ const struct message requests[] = ,.http_major= 1 ,.http_minor= 1 ,.method= HTTP_POST + ,.query_string= "" + ,.fragment= "" + ,.request_path= "/chunked_w_trailing_headers" ,.request_url= "/chunked_w_trailing_headers" ,.num_headers= 3 ,.headers= @@ -326,6 +363,9 @@ const struct message requests[] = ,.http_major= 1 ,.http_minor= 1 ,.method= HTTP_POST + ,.query_string= "" + ,.fragment= "" + ,.request_path= "/chunked_w_bullshit_after_length" ,.request_url= "/chunked_w_bullshit_after_length" ,.num_headers= 1 ,.headers= @@ -343,6 +383,9 @@ const struct message requests[] = ,.http_major= 1 ,.http_minor= 1 ,.method= HTTP_GET + ,.query_string= "foo=\"bar\"" + ,.fragment= "" + ,.request_path= "/with_\"stupid\"_quotes" ,.request_url= "/with_\"stupid\"_quotes?foo=\"bar\"" ,.num_headers= 0 ,.headers= { } @@ -366,6 +409,9 @@ const struct message requests[] = ,.http_major= 1 ,.http_minor= 0 ,.method= HTTP_GET + ,.query_string= "" + ,.fragment= "" + ,.request_path= "/test" ,.request_url= "/test" ,.num_headers= 3 ,.headers= { { "Host", "0.0.0.0:5000" } @@ -386,6 +432,9 @@ const struct message requests[] = ,.http_major= 1 ,.http_minor= 1 ,.method= HTTP_GET + ,.query_string= "foo=bar?baz" + ,.fragment= "" + ,.request_path= "/test.cgi" ,.request_url= "/test.cgi?foo=bar?baz" ,.num_headers= 0 ,.headers= {} @@ -404,6 +453,9 @@ const struct message requests[] = ,.http_major= 1 ,.http_minor= 1 ,.method= HTTP_GET + ,.query_string= "" + ,.fragment= "" + ,.request_path= "/test" ,.request_url= "/test" ,.num_headers= 0 ,.headers= { } @@ -428,6 +480,9 @@ const struct message requests[] = ,.http_major= 1 ,.http_minor= 1 ,.method= HTTP_GET + ,.query_string= "" + ,.fragment= "" + ,.request_path= "/demo" ,.request_url= "/demo" ,.num_headers= 7 ,.upgrade="Hot diggity dogg" @@ -456,6 +511,9 @@ const struct message requests[] = ,.http_major= 1 ,.http_minor= 0 ,.method= HTTP_CONNECT + ,.query_string= "" + ,.fragment= "" + ,.request_path= "" ,.request_url= "0-home0.netscape.com:443" ,.num_headers= 2 ,.upgrade="some data\r\nand yet even more data" @@ -475,6 +533,9 @@ const struct message requests[] = ,.http_major= 1 ,.http_minor= 1 ,.method= HTTP_REPORT + ,.query_string= "" + ,.fragment= "" + ,.request_path= "/test" ,.request_url= "/test" ,.num_headers= 0 ,.headers= {} @@ -491,6 +552,9 @@ const struct message requests[] = ,.http_major= 0 ,.http_minor= 9 ,.method= HTTP_GET + ,.query_string= "" + ,.fragment= "" + ,.request_path= "/" ,.request_url= "/" ,.num_headers= 0 ,.headers= {} @@ -510,6 +574,9 @@ const struct message requests[] = ,.http_major= 1 ,.http_minor= 1 ,.method= HTTP_MSEARCH + ,.query_string= "" + ,.fragment= "" + ,.request_path= "*" ,.request_url= "*" ,.num_headers= 3 ,.headers= { { "HOST", "239.255.255.250:1900" } @@ -536,6 +603,9 @@ const struct message requests[] = ,.http_major= 1 ,.http_minor= 1 ,.method= HTTP_GET + ,.query_string= "" + ,.fragment= "" + ,.request_path= "/" ,.request_url= "/" ,.num_headers= 2 ,.headers= { { "Line1", "abcdefghijklmno qrs" } @@ -555,6 +625,9 @@ const struct message requests[] = ,.http_major= 1 ,.http_minor= 1 ,.method= HTTP_GET + ,.query_string= "hail=all" + ,.fragment= "" + ,.request_path= "" ,.request_url= "http://hypnotoad.org?hail=all" ,.num_headers= 0 ,.headers= { } @@ -571,7 +644,11 @@ const struct message requests[] = ,.http_major= 1 ,.http_minor= 1 ,.method= HTTP_GET + ,.query_string= "hail=all" + ,.fragment= "" + ,.request_path= "" ,.request_url= "http://hypnotoad.org:1234?hail=all" + ,.port= 1234 ,.num_headers= 0 ,.headers= { } ,.body= "" @@ -587,7 +664,11 @@ const struct message requests[] = ,.http_major= 1 ,.http_minor= 1 ,.method= HTTP_GET + ,.query_string= "" + ,.fragment= "" + ,.request_path= "" ,.request_url= "http://hypnotoad.org:1234" + ,.port= 1234 ,.num_headers= 0 ,.headers= { } ,.body= "" @@ -608,6 +689,9 @@ const struct message requests[] = ,.http_major= 1 ,.http_minor= 1 ,.method= HTTP_PATCH + ,.query_string= "" + ,.fragment= "" + ,.request_path= "/file.txt" ,.request_url= "/file.txt" ,.num_headers= 4 ,.headers= { { "Host", "www.example.com" } @@ -630,6 +714,9 @@ const struct message requests[] = ,.http_major= 1 ,.http_minor= 0 ,.method= HTTP_CONNECT + ,.query_string= "" + ,.fragment= "" + ,.request_path= "" ,.request_url= "HOME0.NETSCAPE.COM:443" ,.num_headers= 2 ,.upgrade="" @@ -651,6 +738,9 @@ const struct message requests[] = ,.http_major= 1 ,.http_minor= 1 ,.method= HTTP_GET + ,.query_string= "q=1" + ,.fragment= "narf" + ,.request_path= "/δ¶/δt/pope" ,.request_url= "/δ¶/δt/pope?q=1#narf" ,.num_headers= 1 ,.headers= { {"Host", "github.com" } @@ -670,6 +760,9 @@ const struct message requests[] = ,.http_major= 1 ,.http_minor= 0 ,.method= HTTP_CONNECT + ,.query_string= "" + ,.fragment= "" + ,.request_path= "" ,.request_url= "home_0.netscape.com:443" ,.num_headers= 2 ,.upgrade="" @@ -1306,6 +1399,20 @@ check_num_eq (const struct message *m, #define MESSAGE_CHECK_NUM_EQ(expected, found, prop) \ if (!check_num_eq(expected, #prop, expected->prop, found->prop)) return 0 +#define MESSAGE_CHECK_URL_EQ(u, expected, found, prop, fn) \ +do { \ + char ubuf[256]; \ + \ + if ((u)->field_set & (1 << (fn))) { \ + memcpy(ubuf, (found)->request_url + (u)->field_data[(fn)].off, \ + (u)->field_data[(fn)].len); \ + ubuf[(u)->field_data[(fn)].len] = '\0'; \ + } else { \ + ubuf[0] = '\0'; \ + } \ + \ + check_str_eq(expected, #prop, expected->prop, ubuf); \ +} while(0) int message_eq (int index, const struct message *expected) @@ -1331,6 +1438,28 @@ message_eq (int index, const struct message *expected) MESSAGE_CHECK_STR_EQ(expected, m, request_url); + + /* Check URL components; we can't do this w/ CONNECT since it doesn't + * send us a well-formed URL. + */ + if (*m->request_url && m->method != HTTP_CONNECT) { + struct http_parser_url u; + + if (http_parser_parse_url(m->request_url, strlen(m->request_url), 0, &u)) { + fprintf(stderr, "\n\n*** failed to parse URL %s ***\n\n", + m->request_url); + exit(1); + } + + m->port = (u.field_set & (1 << UF_PORT)) ? + u.port : 0; + + MESSAGE_CHECK_URL_EQ(&u, expected, m, query_string, UF_QUERY); + MESSAGE_CHECK_URL_EQ(&u, expected, m, fragment, UF_FRAGMENT); + MESSAGE_CHECK_URL_EQ(&u, expected, m, request_path, UF_PATH); + MESSAGE_CHECK_NUM_EQ(expected, m, port); + } + if (expected->body_size) { MESSAGE_CHECK_NUM_EQ(expected, m, body_size); } else {