Merge pull request #58 from pgriess/parse_url

Add http_parser_parse_url().
v0.10
Peter Griess 13 years ago
commit c48351fbde

@ -164,6 +164,13 @@ and apply following logic:
------------------------ ------------ -------------------------------------------- ------------------------ ------------ --------------------------------------------
Parsing URLs
------------
A simplistic zero-copy URL parser is provided as `http_parser_parse_url()`.
Users of this library may wish to use it to parse URLs constructed from
consecutive `on_url` callbacks.
See examples of reading in headers: See examples of reading in headers:
* [partial example](http://gist.github.com/155877) in C * [partial example](http://gist.github.com/155877) in C

@ -24,6 +24,8 @@
#include <http_parser.h> #include <http_parser.h>
#include <assert.h> #include <assert.h>
#include <stddef.h> #include <stddef.h>
#include <ctype.h>
#include <stdlib.h>
#ifndef MIN #ifndef MIN
@ -261,7 +263,7 @@ enum state
, s_chunk_size , s_chunk_size
, s_chunk_parameters , s_chunk_parameters
, s_chunk_size_almost_done , s_chunk_size_almost_done
, s_headers_almost_done , s_headers_almost_done
/* Important: 's_headers_almost_done' must be the last 'header' state. All /* Important: 's_headers_almost_done' must be the last 'header' state. All
* states beyond this must be 'body' states. It is used for overflow * states beyond this must be 'body' states. It is used for overflow
@ -356,6 +358,178 @@ static struct {
}; };
#undef HTTP_STRERROR_GEN #undef HTTP_STRERROR_GEN
/* Our URL parser.
*
* This is designed to be shared by http_parser_execute() for URL validation,
* hence it has a state transition + byte-for-byte interface. In addition, it
* is meant to be embedded in http_parser_parse_url(), which does the dirty
* work of turning state transitions URL components for its API.
*
* This function should only be invoked with non-space characters. It is
* assumed that the caller cares about (and can detect) the transition between
* URL and non-URL states by looking for these.
*/
static inline enum state
parse_url_char(enum state s, const char ch, int is_connect)
{
assert(!isspace(ch));
switch (s) {
case s_req_spaces_before_url:
if (ch == '/' || ch == '*') {
return s_req_path;
}
/* Proxied requests are followed by scheme of an absolute URI (alpha).
* CONNECT is followed by a hostname, which begins with alphanum.
* All other methods are followed by '/' or '*' (handled above).
*/
if (IS_ALPHA(ch) || (is_connect && IS_NUM(ch))) {
return (is_connect) ? s_req_host : s_req_schema;
}
break;
case s_req_schema:
if (IS_ALPHA(ch)) {
return s;
}
if (ch == ':') {
return s_req_schema_slash;
}
break;
case s_req_schema_slash:
if (ch == '/') {
return s_req_schema_slash_slash;
}
break;
case s_req_schema_slash_slash:
if (ch == '/') {
return s_req_host;
}
break;
case s_req_host:
if (IS_HOST_CHAR(ch)) {
return s;
}
switch (ch) {
case ':':
return s_req_port;
case '/':
return s_req_path;
case '?':
return s_req_query_string_start;
}
break;
case s_req_port:
if (IS_NUM(ch)) {
return s;
}
switch (ch) {
case '/':
return s_req_path;
case '?':
return s_req_query_string_start;
}
break;
case s_req_path:
if (IS_URL_CHAR(ch)) {
return s;
}
switch (ch) {
case '?':
return s_req_query_string_start;
case '#':
return s_req_fragment_start;
}
break;
case s_req_query_string_start:
if (IS_URL_CHAR(ch)) {
return s_req_query_string;
}
switch (ch) {
case '?':
/* XXX ignore extra '?' ... is this right? */
return s;
case '#':
return s_req_fragment_start;
}
break;
case s_req_query_string:
if (IS_URL_CHAR(ch)) {
return s;
}
switch (ch) {
case '?':
/* allow extra '?' in query string */
return s;
case '#':
return s_req_fragment_start;
}
break;
case s_req_fragment_start:
if (IS_URL_CHAR(ch)) {
return s_req_fragment;
}
switch (ch) {
case '?':
return s_req_fragment;
case '#':
return s;
}
break;
case s_req_fragment:
if (IS_URL_CHAR(ch)) {
return s;
}
switch (ch) {
case '?':
case '#':
return s;
}
break;
default:
break;
}
/* We should never fall out of the switch above unless there's an error */
return s_dead;
}
size_t http_parser_execute (http_parser *parser, size_t http_parser_execute (http_parser *parser,
const http_parser_settings *settings, const http_parser_settings *settings,
@ -749,269 +923,72 @@ size_t http_parser_execute (http_parser *parser,
{ {
if (ch == ' ') break; if (ch == ' ') break;
if (ch == '/' || ch == '*') { MARK(url);
MARK(url);
state = s_req_path;
break;
}
/* Proxied requests are followed by scheme of an absolute URI (alpha). state = parse_url_char(state, ch, parser->method == HTTP_CONNECT);
* CONNECT is followed by a hostname, which begins with alphanum. if (state == s_dead) {
* All other methods are followed by '/' or '*' (handled above). SET_ERRNO(HPE_INVALID_URL);
*/ goto error;
if (IS_ALPHA(ch) || (parser->method == HTTP_CONNECT && IS_NUM(ch))) {
MARK(url);
state = (parser->method == HTTP_CONNECT) ? s_req_host : s_req_schema;
break;
} }
SET_ERRNO(HPE_INVALID_URL); break;
goto error;
} }
case s_req_schema: case s_req_schema:
{
if (IS_ALPHA(ch)) break;
if (ch == ':') {
state = s_req_schema_slash;
break;
}
SET_ERRNO(HPE_INVALID_URL);
goto error;
}
case s_req_schema_slash: case s_req_schema_slash:
STRICT_CHECK(ch != '/');
state = s_req_schema_slash_slash;
break;
case s_req_schema_slash_slash: case s_req_schema_slash_slash:
STRICT_CHECK(ch != '/');
state = s_req_host;
break;
case s_req_host:
{
if (IS_HOST_CHAR(ch)) break;
switch (ch) {
case ':':
state = s_req_port;
break;
case '/':
state = s_req_path;
break;
case ' ':
/* The request line looks like:
* "GET http://foo.bar.com HTTP/1.1"
* That is, there is no path.
*/
CALLBACK(url);
state = s_req_http_start;
break;
case '?':
state = s_req_query_string_start;
break;
default:
SET_ERRNO(HPE_INVALID_HOST);
goto error;
}
break;
}
case s_req_port:
{ {
if (IS_NUM(ch)) break;
switch (ch) {
case '/':
state = s_req_path;
break;
case ' ':
/* The request line looks like:
* "GET http://foo.bar.com:1234 HTTP/1.1"
* That is, there is no path.
*/
CALLBACK(url);
state = s_req_http_start;
break;
case '?':
state = s_req_query_string_start;
break;
default:
SET_ERRNO(HPE_INVALID_PORT);
goto error;
}
break;
}
case s_req_path:
{
if (IS_URL_CHAR(ch)) break;
switch (ch) { switch (ch) {
/* No whitespace allowed here */
case ' ': case ' ':
CALLBACK(url);
state = s_req_http_start;
break;
case CR: case CR:
CALLBACK(url);
parser->http_major = 0;
parser->http_minor = 9;
state = s_req_line_almost_done;
break;
case LF: case LF:
CALLBACK(url); SET_ERRNO(HPE_INVALID_URL);
parser->http_major = 0;
parser->http_minor = 9;
state = s_header_field_start;
break;
case '?':
state = s_req_query_string_start;
break;
case '#':
state = s_req_fragment_start;
break;
default:
SET_ERRNO(HPE_INVALID_PATH);
goto error; goto error;
}
break;
}
case s_req_query_string_start:
{
if (IS_URL_CHAR(ch)) {
state = s_req_query_string;
break;
}
switch (ch) {
case '?':
break; /* XXX ignore extra '?' ... is this right? */
case ' ':
CALLBACK(url);
state = s_req_http_start;
break;
case CR:
CALLBACK(url);
parser->http_major = 0;
parser->http_minor = 9;
state = s_req_line_almost_done;
break;
case LF:
CALLBACK(url);
parser->http_major = 0;
parser->http_minor = 9;
state = s_header_field_start;
break;
case '#':
state = s_req_fragment_start;
break;
default: default:
SET_ERRNO(HPE_INVALID_QUERY_STRING); state = parse_url_char(state, ch, parser->method == HTTP_CONNECT);
goto error; if (state == s_dead) {
SET_ERRNO(HPE_INVALID_URL);
goto error;
}
} }
break;
}
case s_req_query_string:
{
if (IS_URL_CHAR(ch)) break;
switch (ch) {
case '?':
/* allow extra '?' in query string */
break;
case ' ':
CALLBACK(url);
state = s_req_http_start;
break;
case CR:
CALLBACK(url);
parser->http_major = 0;
parser->http_minor = 9;
state = s_req_line_almost_done;
break;
case LF:
CALLBACK(url);
parser->http_major = 0;
parser->http_minor = 9;
state = s_header_field_start;
break;
case '#':
state = s_req_fragment_start;
break;
default:
SET_ERRNO(HPE_INVALID_QUERY_STRING);
goto error;
}
break; break;
} }
case s_req_host:
case s_req_port:
case s_req_path:
case s_req_query_string_start:
case s_req_query_string:
case s_req_fragment_start: case s_req_fragment_start:
{
if (IS_URL_CHAR(ch)) {
state = s_req_fragment;
break;
}
switch (ch) {
case ' ':
CALLBACK(url);
state = s_req_http_start;
break;
case CR:
CALLBACK(url);
parser->http_major = 0;
parser->http_minor = 9;
state = s_req_line_almost_done;
break;
case LF:
CALLBACK(url);
parser->http_major = 0;
parser->http_minor = 9;
state = s_header_field_start;
break;
case '?':
state = s_req_fragment;
break;
case '#':
break;
default:
SET_ERRNO(HPE_INVALID_FRAGMENT);
goto error;
}
break;
}
case s_req_fragment: case s_req_fragment:
{ {
if (IS_URL_CHAR(ch)) break; /* XXX: There is a bug here where if we're on the first character
* of s_req_host (e.g. our URL is 'http://' and we see a whitespace
* character, we'll consider this a valid URL. This seems incorrect,
* but at least it's bug-compatible with what we had before.
*/
switch (ch) { switch (ch) {
case ' ': case ' ':
CALLBACK(url); CALLBACK(url);
state = s_req_http_start; state = s_req_http_start;
break; break;
case CR: case CR:
CALLBACK(url);
parser->http_major = 0;
parser->http_minor = 9;
state = s_req_line_almost_done;
break;
case LF: case LF:
CALLBACK(url); CALLBACK(url);
parser->http_major = 0; parser->http_major = 0;
parser->http_minor = 9; parser->http_minor = 9;
state = s_header_field_start; state = (ch == CR) ?
break; s_req_line_almost_done :
case '?': s_header_field_start;
case '#':
break; break;
default: default:
SET_ERRNO(HPE_INVALID_FRAGMENT); state = parse_url_char(state, ch, parser->method == HTTP_CONNECT);
goto error; if (state == s_dead) {
SET_ERRNO(HPE_INVALID_URL);
goto error;
}
} }
break; break;
} }
@ -1788,3 +1765,98 @@ http_errno_description(enum http_errno err) {
assert(err < (sizeof(http_strerror_tab)/sizeof(http_strerror_tab[0]))); assert(err < (sizeof(http_strerror_tab)/sizeof(http_strerror_tab[0])));
return http_strerror_tab[err].description; return http_strerror_tab[err].description;
} }
int
http_parser_parse_url(const char *buf, size_t buflen, int is_connect,
struct http_parser_url *u)
{
enum state s;
const char *p;
enum http_parser_url_fields uf, old_uf;
u->port = u->field_set = 0;
s = s_req_spaces_before_url;
uf = old_uf = UF_MAX;
for (p = buf; p < buf + buflen; p++) {
if ((s = parse_url_char(s, *p, is_connect)) == s_dead) {
return 1;
}
/* Figure out the next field that we're operating on */
switch (s) {
case s_req_schema:
case s_req_schema_slash:
case s_req_schema_slash_slash:
uf = UF_SCHEMA;
break;
case s_req_host:
uf = UF_HOST;
break;
case s_req_port:
uf = UF_PORT;
break;
case s_req_path:
uf = UF_PATH;
break;
case s_req_query_string_start:
case s_req_query_string:
uf = UF_QUERY;
break;
case s_req_fragment_start:
case s_req_fragment:
uf = UF_FRAGMENT;
break;
default:
assert(!"Unexpected state");
return 1;
}
/* Nothing's changed; soldier on */
if (uf == old_uf) {
u->field_data[uf].len++;
continue;
}
/* We ignore the first character in some fields; without this, we end up
* with the query being "?foo=bar" rather than "foo=bar". Callers probably
* don't want this.
*/
switch (uf) {
case UF_QUERY:
case UF_FRAGMENT:
case UF_PORT:
u->field_data[uf].off = p - buf + 1;
u->field_data[uf].len = 0;
break;
default:
u->field_data[uf].off = p - buf;
u->field_data[uf].len = 1;
break;
}
u->field_set |= (1 << uf);
old_uf = uf;
}
if (u->field_set & (1 << UF_PORT)) {
/* Don't bother with endp; we've already validated the string */
unsigned long v = strtoul(buf + u->field_data[UF_PORT].off, NULL, 10);
/* Ports have a max value of 2^16 */
if (v > 0xffff) {
return 1;
}
u->port = (uint16_t) v;
}
return 0;
}

@ -244,6 +244,35 @@ struct http_parser_settings {
}; };
enum http_parser_url_fields
{ UF_SCHEMA = 0
, UF_HOST = 1
, UF_PORT = 2
, UF_PATH = 3
, UF_QUERY = 4
, UF_FRAGMENT = 5
, UF_MAX = 6
};
/* Result structure for http_parser_parse_url().
*
* Callers should index into field_data[] with UF_* values iff field_set
* has the relevant (1 << UF_*) bit set. As a courtesy to clients (and
* because we probably have padding left over), we convert any port to
* a uint16_t.
*/
struct http_parser_url {
uint16_t field_set; /* Bitmask of (1 << UF_*) values */
uint16_t port; /* Converted UF_PORT string */
struct {
uint16_t off; /* Offset into buffer in which field starts */
uint16_t len; /* Length of run in buffer */
} field_data[UF_MAX];
};
void http_parser_init(http_parser *parser, enum http_parser_type type); void http_parser_init(http_parser *parser, enum http_parser_type type);
@ -270,6 +299,11 @@ const char *http_errno_name(enum http_errno err);
/* Return a string description of the given error */ /* Return a string description of the given error */
const char *http_errno_description(enum http_errno err); const char *http_errno_description(enum http_errno err);
/* Parse a URL; return nonzero on failure */
int http_parser_parse_url(const char *buf, size_t buflen,
int is_connect,
struct http_parser_url *u);
#ifdef __cplusplus #ifdef __cplusplus
} }
#endif #endif

129
test.c

@ -44,9 +44,13 @@ struct message {
enum http_parser_type type; enum http_parser_type type;
enum http_method method; enum http_method method;
int status_code; int status_code;
char request_path[MAX_ELEMENT_SIZE];
char request_url[MAX_ELEMENT_SIZE]; char request_url[MAX_ELEMENT_SIZE];
char fragment[MAX_ELEMENT_SIZE];
char query_string[MAX_ELEMENT_SIZE];
char body[MAX_ELEMENT_SIZE]; char body[MAX_ELEMENT_SIZE];
size_t body_size; size_t body_size;
uint16_t port;
int num_headers; int num_headers;
enum { NONE=0, FIELD, VALUE } last_header_element; enum { NONE=0, FIELD, VALUE } last_header_element;
char headers [MAX_HEADERS][2][MAX_ELEMENT_SIZE]; char headers [MAX_HEADERS][2][MAX_ELEMENT_SIZE];
@ -83,6 +87,9 @@ const struct message requests[] =
,.http_major= 1 ,.http_major= 1
,.http_minor= 1 ,.http_minor= 1
,.method= HTTP_GET ,.method= HTTP_GET
,.query_string= ""
,.fragment= ""
,.request_path= "/test"
,.request_url= "/test" ,.request_url= "/test"
,.num_headers= 3 ,.num_headers= 3
,.headers= ,.headers=
@ -111,6 +118,9 @@ const struct message requests[] =
,.http_major= 1 ,.http_major= 1
,.http_minor= 1 ,.http_minor= 1
,.method= HTTP_GET ,.method= HTTP_GET
,.query_string= ""
,.fragment= ""
,.request_path= "/favicon.ico"
,.request_url= "/favicon.ico" ,.request_url= "/favicon.ico"
,.num_headers= 8 ,.num_headers= 8
,.headers= ,.headers=
@ -137,6 +147,9 @@ const struct message requests[] =
,.http_major= 1 ,.http_major= 1
,.http_minor= 1 ,.http_minor= 1
,.method= HTTP_GET ,.method= HTTP_GET
,.query_string= ""
,.fragment= ""
,.request_path= "/dumbfuck"
,.request_url= "/dumbfuck" ,.request_url= "/dumbfuck"
,.num_headers= 1 ,.num_headers= 1
,.headers= ,.headers=
@ -155,6 +168,9 @@ const struct message requests[] =
,.http_major= 1 ,.http_major= 1
,.http_minor= 1 ,.http_minor= 1
,.method= HTTP_GET ,.method= HTTP_GET
,.query_string= "page=1"
,.fragment= "posts-17408"
,.request_path= "/forums/1/topics/2375"
/* XXX request url does include fragment? */ /* XXX request url does include fragment? */
,.request_url= "/forums/1/topics/2375?page=1#posts-17408" ,.request_url= "/forums/1/topics/2375?page=1#posts-17408"
,.num_headers= 0 ,.num_headers= 0
@ -171,6 +187,9 @@ const struct message requests[] =
,.http_major= 1 ,.http_major= 1
,.http_minor= 1 ,.http_minor= 1
,.method= HTTP_GET ,.method= HTTP_GET
,.query_string= ""
,.fragment= ""
,.request_path= "/get_no_headers_no_body/world"
,.request_url= "/get_no_headers_no_body/world" ,.request_url= "/get_no_headers_no_body/world"
,.num_headers= 0 ,.num_headers= 0
,.body= "" ,.body= ""
@ -187,6 +206,9 @@ const struct message requests[] =
,.http_major= 1 ,.http_major= 1
,.http_minor= 1 ,.http_minor= 1
,.method= HTTP_GET ,.method= HTTP_GET
,.query_string= ""
,.fragment= ""
,.request_path= "/get_one_header_no_body"
,.request_url= "/get_one_header_no_body" ,.request_url= "/get_one_header_no_body"
,.num_headers= 1 ,.num_headers= 1
,.headers= ,.headers=
@ -207,6 +229,9 @@ const struct message requests[] =
,.http_major= 1 ,.http_major= 1
,.http_minor= 0 ,.http_minor= 0
,.method= HTTP_GET ,.method= HTTP_GET
,.query_string= ""
,.fragment= ""
,.request_path= "/get_funky_content_length_body_hello"
,.request_url= "/get_funky_content_length_body_hello" ,.request_url= "/get_funky_content_length_body_hello"
,.num_headers= 1 ,.num_headers= 1
,.headers= ,.headers=
@ -229,6 +254,9 @@ const struct message requests[] =
,.http_major= 1 ,.http_major= 1
,.http_minor= 1 ,.http_minor= 1
,.method= HTTP_POST ,.method= HTTP_POST
,.query_string= "q=search"
,.fragment= "hey"
,.request_path= "/post_identity_body_world"
,.request_url= "/post_identity_body_world?q=search#hey" ,.request_url= "/post_identity_body_world?q=search#hey"
,.num_headers= 3 ,.num_headers= 3
,.headers= ,.headers=
@ -253,6 +281,9 @@ const struct message requests[] =
,.http_major= 1 ,.http_major= 1
,.http_minor= 1 ,.http_minor= 1
,.method= HTTP_POST ,.method= HTTP_POST
,.query_string= ""
,.fragment= ""
,.request_path= "/post_chunked_all_your_base"
,.request_url= "/post_chunked_all_your_base" ,.request_url= "/post_chunked_all_your_base"
,.num_headers= 1 ,.num_headers= 1
,.headers= ,.headers=
@ -276,6 +307,9 @@ const struct message requests[] =
,.http_major= 1 ,.http_major= 1
,.http_minor= 1 ,.http_minor= 1
,.method= HTTP_POST ,.method= HTTP_POST
,.query_string= ""
,.fragment= ""
,.request_path= "/two_chunks_mult_zero_end"
,.request_url= "/two_chunks_mult_zero_end" ,.request_url= "/two_chunks_mult_zero_end"
,.num_headers= 1 ,.num_headers= 1
,.headers= ,.headers=
@ -301,6 +335,9 @@ const struct message requests[] =
,.http_major= 1 ,.http_major= 1
,.http_minor= 1 ,.http_minor= 1
,.method= HTTP_POST ,.method= HTTP_POST
,.query_string= ""
,.fragment= ""
,.request_path= "/chunked_w_trailing_headers"
,.request_url= "/chunked_w_trailing_headers" ,.request_url= "/chunked_w_trailing_headers"
,.num_headers= 3 ,.num_headers= 3
,.headers= ,.headers=
@ -326,6 +363,9 @@ const struct message requests[] =
,.http_major= 1 ,.http_major= 1
,.http_minor= 1 ,.http_minor= 1
,.method= HTTP_POST ,.method= HTTP_POST
,.query_string= ""
,.fragment= ""
,.request_path= "/chunked_w_bullshit_after_length"
,.request_url= "/chunked_w_bullshit_after_length" ,.request_url= "/chunked_w_bullshit_after_length"
,.num_headers= 1 ,.num_headers= 1
,.headers= ,.headers=
@ -343,6 +383,9 @@ const struct message requests[] =
,.http_major= 1 ,.http_major= 1
,.http_minor= 1 ,.http_minor= 1
,.method= HTTP_GET ,.method= HTTP_GET
,.query_string= "foo=\"bar\""
,.fragment= ""
,.request_path= "/with_\"stupid\"_quotes"
,.request_url= "/with_\"stupid\"_quotes?foo=\"bar\"" ,.request_url= "/with_\"stupid\"_quotes?foo=\"bar\""
,.num_headers= 0 ,.num_headers= 0
,.headers= { } ,.headers= { }
@ -366,6 +409,9 @@ const struct message requests[] =
,.http_major= 1 ,.http_major= 1
,.http_minor= 0 ,.http_minor= 0
,.method= HTTP_GET ,.method= HTTP_GET
,.query_string= ""
,.fragment= ""
,.request_path= "/test"
,.request_url= "/test" ,.request_url= "/test"
,.num_headers= 3 ,.num_headers= 3
,.headers= { { "Host", "0.0.0.0:5000" } ,.headers= { { "Host", "0.0.0.0:5000" }
@ -386,6 +432,9 @@ const struct message requests[] =
,.http_major= 1 ,.http_major= 1
,.http_minor= 1 ,.http_minor= 1
,.method= HTTP_GET ,.method= HTTP_GET
,.query_string= "foo=bar?baz"
,.fragment= ""
,.request_path= "/test.cgi"
,.request_url= "/test.cgi?foo=bar?baz" ,.request_url= "/test.cgi?foo=bar?baz"
,.num_headers= 0 ,.num_headers= 0
,.headers= {} ,.headers= {}
@ -404,6 +453,9 @@ const struct message requests[] =
,.http_major= 1 ,.http_major= 1
,.http_minor= 1 ,.http_minor= 1
,.method= HTTP_GET ,.method= HTTP_GET
,.query_string= ""
,.fragment= ""
,.request_path= "/test"
,.request_url= "/test" ,.request_url= "/test"
,.num_headers= 0 ,.num_headers= 0
,.headers= { } ,.headers= { }
@ -428,6 +480,9 @@ const struct message requests[] =
,.http_major= 1 ,.http_major= 1
,.http_minor= 1 ,.http_minor= 1
,.method= HTTP_GET ,.method= HTTP_GET
,.query_string= ""
,.fragment= ""
,.request_path= "/demo"
,.request_url= "/demo" ,.request_url= "/demo"
,.num_headers= 7 ,.num_headers= 7
,.upgrade="Hot diggity dogg" ,.upgrade="Hot diggity dogg"
@ -456,6 +511,9 @@ const struct message requests[] =
,.http_major= 1 ,.http_major= 1
,.http_minor= 0 ,.http_minor= 0
,.method= HTTP_CONNECT ,.method= HTTP_CONNECT
,.query_string= ""
,.fragment= ""
,.request_path= ""
,.request_url= "0-home0.netscape.com:443" ,.request_url= "0-home0.netscape.com:443"
,.num_headers= 2 ,.num_headers= 2
,.upgrade="some data\r\nand yet even more data" ,.upgrade="some data\r\nand yet even more data"
@ -475,6 +533,9 @@ const struct message requests[] =
,.http_major= 1 ,.http_major= 1
,.http_minor= 1 ,.http_minor= 1
,.method= HTTP_REPORT ,.method= HTTP_REPORT
,.query_string= ""
,.fragment= ""
,.request_path= "/test"
,.request_url= "/test" ,.request_url= "/test"
,.num_headers= 0 ,.num_headers= 0
,.headers= {} ,.headers= {}
@ -491,6 +552,9 @@ const struct message requests[] =
,.http_major= 0 ,.http_major= 0
,.http_minor= 9 ,.http_minor= 9
,.method= HTTP_GET ,.method= HTTP_GET
,.query_string= ""
,.fragment= ""
,.request_path= "/"
,.request_url= "/" ,.request_url= "/"
,.num_headers= 0 ,.num_headers= 0
,.headers= {} ,.headers= {}
@ -510,6 +574,9 @@ const struct message requests[] =
,.http_major= 1 ,.http_major= 1
,.http_minor= 1 ,.http_minor= 1
,.method= HTTP_MSEARCH ,.method= HTTP_MSEARCH
,.query_string= ""
,.fragment= ""
,.request_path= "*"
,.request_url= "*" ,.request_url= "*"
,.num_headers= 3 ,.num_headers= 3
,.headers= { { "HOST", "239.255.255.250:1900" } ,.headers= { { "HOST", "239.255.255.250:1900" }
@ -536,6 +603,9 @@ const struct message requests[] =
,.http_major= 1 ,.http_major= 1
,.http_minor= 1 ,.http_minor= 1
,.method= HTTP_GET ,.method= HTTP_GET
,.query_string= ""
,.fragment= ""
,.request_path= "/"
,.request_url= "/" ,.request_url= "/"
,.num_headers= 2 ,.num_headers= 2
,.headers= { { "Line1", "abcdefghijklmno qrs" } ,.headers= { { "Line1", "abcdefghijklmno qrs" }
@ -555,6 +625,9 @@ const struct message requests[] =
,.http_major= 1 ,.http_major= 1
,.http_minor= 1 ,.http_minor= 1
,.method= HTTP_GET ,.method= HTTP_GET
,.query_string= "hail=all"
,.fragment= ""
,.request_path= ""
,.request_url= "http://hypnotoad.org?hail=all" ,.request_url= "http://hypnotoad.org?hail=all"
,.num_headers= 0 ,.num_headers= 0
,.headers= { } ,.headers= { }
@ -571,7 +644,11 @@ const struct message requests[] =
,.http_major= 1 ,.http_major= 1
,.http_minor= 1 ,.http_minor= 1
,.method= HTTP_GET ,.method= HTTP_GET
,.query_string= "hail=all"
,.fragment= ""
,.request_path= ""
,.request_url= "http://hypnotoad.org:1234?hail=all" ,.request_url= "http://hypnotoad.org:1234?hail=all"
,.port= 1234
,.num_headers= 0 ,.num_headers= 0
,.headers= { } ,.headers= { }
,.body= "" ,.body= ""
@ -587,7 +664,11 @@ const struct message requests[] =
,.http_major= 1 ,.http_major= 1
,.http_minor= 1 ,.http_minor= 1
,.method= HTTP_GET ,.method= HTTP_GET
,.query_string= ""
,.fragment= ""
,.request_path= ""
,.request_url= "http://hypnotoad.org:1234" ,.request_url= "http://hypnotoad.org:1234"
,.port= 1234
,.num_headers= 0 ,.num_headers= 0
,.headers= { } ,.headers= { }
,.body= "" ,.body= ""
@ -608,6 +689,9 @@ const struct message requests[] =
,.http_major= 1 ,.http_major= 1
,.http_minor= 1 ,.http_minor= 1
,.method= HTTP_PATCH ,.method= HTTP_PATCH
,.query_string= ""
,.fragment= ""
,.request_path= "/file.txt"
,.request_url= "/file.txt" ,.request_url= "/file.txt"
,.num_headers= 4 ,.num_headers= 4
,.headers= { { "Host", "www.example.com" } ,.headers= { { "Host", "www.example.com" }
@ -630,6 +714,9 @@ const struct message requests[] =
,.http_major= 1 ,.http_major= 1
,.http_minor= 0 ,.http_minor= 0
,.method= HTTP_CONNECT ,.method= HTTP_CONNECT
,.query_string= ""
,.fragment= ""
,.request_path= ""
,.request_url= "HOME0.NETSCAPE.COM:443" ,.request_url= "HOME0.NETSCAPE.COM:443"
,.num_headers= 2 ,.num_headers= 2
,.upgrade="" ,.upgrade=""
@ -651,6 +738,9 @@ const struct message requests[] =
,.http_major= 1 ,.http_major= 1
,.http_minor= 1 ,.http_minor= 1
,.method= HTTP_GET ,.method= HTTP_GET
,.query_string= "q=1"
,.fragment= "narf"
,.request_path= "/δ¶/δt/pope"
,.request_url= "/δ¶/δt/pope?q=1#narf" ,.request_url= "/δ¶/δt/pope?q=1#narf"
,.num_headers= 1 ,.num_headers= 1
,.headers= { {"Host", "github.com" } ,.headers= { {"Host", "github.com" }
@ -670,6 +760,9 @@ const struct message requests[] =
,.http_major= 1 ,.http_major= 1
,.http_minor= 0 ,.http_minor= 0
,.method= HTTP_CONNECT ,.method= HTTP_CONNECT
,.query_string= ""
,.fragment= ""
,.request_path= ""
,.request_url= "home_0.netscape.com:443" ,.request_url= "home_0.netscape.com:443"
,.num_headers= 2 ,.num_headers= 2
,.upgrade="" ,.upgrade=""
@ -1306,6 +1399,20 @@ check_num_eq (const struct message *m,
#define MESSAGE_CHECK_NUM_EQ(expected, found, prop) \ #define MESSAGE_CHECK_NUM_EQ(expected, found, prop) \
if (!check_num_eq(expected, #prop, expected->prop, found->prop)) return 0 if (!check_num_eq(expected, #prop, expected->prop, found->prop)) return 0
#define MESSAGE_CHECK_URL_EQ(u, expected, found, prop, fn) \
do { \
char ubuf[256]; \
\
if ((u)->field_set & (1 << (fn))) { \
memcpy(ubuf, (found)->request_url + (u)->field_data[(fn)].off, \
(u)->field_data[(fn)].len); \
ubuf[(u)->field_data[(fn)].len] = '\0'; \
} else { \
ubuf[0] = '\0'; \
} \
\
check_str_eq(expected, #prop, expected->prop, ubuf); \
} while(0)
int int
message_eq (int index, const struct message *expected) message_eq (int index, const struct message *expected)
@ -1331,6 +1438,28 @@ message_eq (int index, const struct message *expected)
MESSAGE_CHECK_STR_EQ(expected, m, request_url); MESSAGE_CHECK_STR_EQ(expected, m, request_url);
/* Check URL components; we can't do this w/ CONNECT since it doesn't
* send us a well-formed URL.
*/
if (*m->request_url && m->method != HTTP_CONNECT) {
struct http_parser_url u;
if (http_parser_parse_url(m->request_url, strlen(m->request_url), 0, &u)) {
fprintf(stderr, "\n\n*** failed to parse URL %s ***\n\n",
m->request_url);
exit(1);
}
m->port = (u.field_set & (1 << UF_PORT)) ?
u.port : 0;
MESSAGE_CHECK_URL_EQ(&u, expected, m, query_string, UF_QUERY);
MESSAGE_CHECK_URL_EQ(&u, expected, m, fragment, UF_FRAGMENT);
MESSAGE_CHECK_URL_EQ(&u, expected, m, request_path, UF_PATH);
MESSAGE_CHECK_NUM_EQ(expected, m, port);
}
if (expected->body_size) { if (expected->body_size) {
MESSAGE_CHECK_NUM_EQ(expected, m, body_size); MESSAGE_CHECK_NUM_EQ(expected, m, body_size);
} else { } else {

Loading…
Cancel
Save