implement parsing of v6 addresses and rejection of 0-length host and ports.

the v6 parsing works by adding extra states for working with the
[] notation for v6 addresses. hosts and ports cannot be 0-length
because we url parsing from ending when we expect those fields to
begin.

http_parser_parse_url gets a free check for the correctness of
CONNECT urls (they can only be host:port).

this addresses the following issues:

i was bored and had my head in this space.
v0.10
David Gwynne 13 years ago committed by Ben Noordhuis
parent 7bc668c5f4
commit 8da60bc423

@ -265,7 +265,10 @@ enum state
, s_req_schema
, s_req_schema_slash
, s_req_schema_slash_slash
, s_req_schema_slash_slash_end
, s_req_host_start
, s_req_host_v6_start
, s_req_host_v6
, s_req_host_v6_end
, s_req_host
, s_req_port_start
, s_req_port
@ -354,6 +357,7 @@ enum header_states
#define IS_ALPHA(c) (LOWER(c) >= 'a' && LOWER(c) <= 'z')
#define IS_NUM(c) ((c) >= '0' && (c) <= '9')
#define IS_ALPHANUM(c) (IS_ALPHA(c) || IS_NUM(c))
#define IS_HEX(c) (IS_NUM(c) || (LOWER(c) >= 'a' && LOWER(c) <= 'f'))
#if HTTP_PARSER_STRICT
#define TOKEN(c) (tokens[(unsigned char)c])
@ -410,22 +414,22 @@ int http_message_needs_eof(http_parser *parser);
* URL and non-URL states by looking for these.
*/
static enum state
parse_url_char(enum state s, const char ch, int is_connect)
parse_url_char(enum state s, const char ch)
{
assert(!isspace(ch));
switch (s) {
case s_req_spaces_before_url:
/* Proxied requests are followed by scheme of an absolute URI (alpha).
* All methods except CONNECT are followed by '/' or '*'.
*/
if (ch == '/' || ch == '*') {
return s_req_path;
}
/* Proxied requests are followed by scheme of an absolute URI (alpha).
* CONNECT is followed by a hostname, which begins with alphanum.
* All other methods are followed by '/' or '*' (handled above).
*/
if (IS_ALPHA(ch) || (is_connect && IS_NUM(ch))) {
return (is_connect) ? s_req_host : s_req_schema;
if (IS_ALPHA(ch)) {
return s_req_schema;
}
break;
@ -450,17 +454,29 @@ parse_url_char(enum state s, const char ch, int is_connect)
case s_req_schema_slash_slash:
if (ch == '/') {
return s_req_schema_slash_slash_end;
return s_req_host_start;
}
break;
case s_req_host_start:
if (ch == '[') {
return s_req_host_v6_start;
}
if (IS_HOST_CHAR(ch)) {
return s_req_host;
}
break;
case s_req_schema_slash_slash_end:
case s_req_host:
if (IS_HOST_CHAR(ch)) {
return s_req_host;
}
/* FALLTHROUGH */
case s_req_host_v6_end:
switch (ch) {
case ':':
return s_req_port_start;
@ -474,12 +490,19 @@ parse_url_char(enum state s, const char ch, int is_connect)
break;
case s_req_port_start:
case s_req_port:
if (IS_NUM(ch)) {
return s_req_port;
case s_req_host_v6:
if (ch == ']') {
return s_req_host_v6_end;
}
/* FALLTHROUGH */
case s_req_host_v6_start:
if (IS_HEX(ch) || ch == ':') {
return s_req_host_v6;
}
break;
case s_req_port:
switch (ch) {
case '/':
return s_req_path;
@ -488,6 +511,12 @@ parse_url_char(enum state s, const char ch, int is_connect)
return s_req_query_string_start;
}
/* FALLTHROUGH */
case s_req_port_start:
if (IS_NUM(ch)) {
return s_req_port;
}
break;
case s_req_path:
@ -622,12 +651,15 @@ size_t http_parser_execute (http_parser *parser,
case s_req_schema:
case s_req_schema_slash:
case s_req_schema_slash_slash:
case s_req_schema_slash_slash_end:
case s_req_host_start:
case s_req_host_v6_start:
case s_req_host_v6:
case s_req_host_v6_end:
case s_req_host:
case s_req_port_start:
case s_req_port:
case s_req_query_string_start:
case s_req_query_string:
case s_req_host:
case s_req_fragment_start:
case s_req_fragment:
url_mark = data;
@ -975,9 +1007,11 @@ size_t http_parser_execute (http_parser *parser,
if (ch == ' ') break;
MARK(url);
if (parser->method == HTTP_CONNECT) {
parser->state = s_req_host_start;
}
parser->state = parse_url_char(
(enum state)parser->state, ch, parser->method == HTTP_CONNECT);
parser->state = parse_url_char((enum state)parser->state, ch);
if (parser->state == s_dead) {
SET_ERRNO(HPE_INVALID_URL);
goto error;
@ -989,6 +1023,10 @@ size_t http_parser_execute (http_parser *parser,
case s_req_schema:
case s_req_schema_slash:
case s_req_schema_slash_slash:
case s_req_host_start:
case s_req_host_v6_start:
case s_req_host_v6:
case s_req_port_start:
{
switch (ch) {
/* No whitespace allowed here */
@ -998,8 +1036,7 @@ size_t http_parser_execute (http_parser *parser,
SET_ERRNO(HPE_INVALID_URL);
goto error;
default:
parser->state = parse_url_char(
(enum state)parser->state, ch, parser->method == HTTP_CONNECT);
parser->state = parse_url_char((enum state)parser->state, ch);
if (parser->state == s_dead) {
SET_ERRNO(HPE_INVALID_URL);
goto error;
@ -1009,9 +1046,8 @@ size_t http_parser_execute (http_parser *parser,
break;
}
case s_req_schema_slash_slash_end:
case s_req_host:
case s_req_port_start:
case s_req_host_v6_end:
case s_req_port:
case s_req_path:
case s_req_query_string_start:
@ -1019,11 +1055,6 @@ size_t http_parser_execute (http_parser *parser,
case s_req_fragment_start:
case s_req_fragment:
{
/* XXX: There is a bug here where if we're on the first character
* of s_req_host (e.g. our URL is 'http://' and we see a whitespace
* character, we'll consider this a valid URL. This seems incorrect,
* but at least it's bug-compatible with what we had before.
*/
switch (ch) {
case ' ':
parser->state = s_req_http_start;
@ -1039,8 +1070,7 @@ size_t http_parser_execute (http_parser *parser,
CALLBACK_DATA(url);
break;
default:
parser->state = parse_url_char(
(enum state)parser->state, ch, parser->method == HTTP_CONNECT);
parser->state = parse_url_char((enum state)parser->state, ch);
if (parser->state == s_dead) {
SET_ERRNO(HPE_INVALID_URL);
goto error;
@ -1926,11 +1956,11 @@ http_parser_parse_url(const char *buf, size_t buflen, int is_connect,
enum http_parser_url_fields uf, old_uf;
u->port = u->field_set = 0;
s = s_req_spaces_before_url;
s = is_connect ? s_req_host_start : s_req_spaces_before_url;
uf = old_uf = UF_MAX;
for (p = buf; p < buf + buflen; p++) {
s = parse_url_char(s, *p, is_connect);
s = parse_url_char(s, *p);
/* Figure out the next field that we're operating on */
switch (s) {
@ -1940,7 +1970,9 @@ http_parser_parse_url(const char *buf, size_t buflen, int is_connect,
/* Skip delimeters */
case s_req_schema_slash:
case s_req_schema_slash_slash:
case s_req_schema_slash_slash_end:
case s_req_host_start:
case s_req_host_v6_start:
case s_req_host_v6_end:
case s_req_port_start:
case s_req_query_string_start:
case s_req_fragment_start:
@ -1951,6 +1983,7 @@ http_parser_parse_url(const char *buf, size_t buflen, int is_connect,
break;
case s_req_host:
case s_req_host_v6:
uf = UF_HOST;
break;
@ -1988,6 +2021,23 @@ http_parser_parse_url(const char *buf, size_t buflen, int is_connect,
old_uf = uf;
}
/* CONNECT requests can only contain "hostname:port" */
if (is_connect && u->field_set != ((1 << UF_HOST)|(1 << UF_PORT))) {
return 1;
}
/* Make sure we don't end somewhere unexpected */
switch (s) {
case s_req_host_v6_start:
case s_req_host_v6:
case s_req_host_v6_end:
case s_req_host:
case s_req_port_start:
return 1;
default:
break;
}
if (u->field_set & (1 << UF_PORT)) {
/* Don't bother with endp; we've already validated the string */
unsigned long v = strtoul(buf + u->field_data[UF_PORT].off, NULL, 10);

@ -1948,6 +1948,72 @@ const struct url_test url_tests[] =
}
,.rv=0
}
, {.name="proxy ipv6 request"
,.url="http://[1:2::3:4]/"
,.is_connect=0
,.u=
{.field_set=(1 << UF_SCHEMA) | (1 << UF_HOST) | (1 << UF_PATH)
,.port=0
,.field_data=
{{ 0, 4 } /* UF_SCHEMA */
,{ 8, 8 } /* UF_HOST */
,{ 0, 0 } /* UF_PORT */
,{ 17, 1 } /* UF_PATH */
,{ 0, 0 } /* UF_QUERY */
,{ 0, 0 } /* UF_FRAGMENT */
}
}
,.rv=0
}
, {.name="CONNECT ipv6 address"
,.url="[1:2::3:4]:443"
,.is_connect=1
,.u=
{.field_set=(1 << UF_HOST) | (1 << UF_PORT)
,.port=443
,.field_data=
{{ 0, 0 } /* UF_SCHEMA */
,{ 1, 8 } /* UF_HOST */
,{ 11, 3 } /* UF_PORT */
,{ 0, 0 } /* UF_PATH */
,{ 0, 0 } /* UF_QUERY */
,{ 0, 0 } /* UF_FRAGMENT */
}
}
,.rv=0
}
, {.name="proxy empty host"
,.url="http://:443/"
,.is_connect=1
,.rv=1
}
, {.name="proxy empty port"
,.url="http://hostname:/"
,.is_connect=1
,.rv=1
}
, {.name="CONNECT empty host"
,.url=":443"
,.is_connect=1
,.rv=1
}
, {.name="CONNECT empty port"
,.url="hostname:"
,.is_connect=1
,.rv=1
}
, {.name="CONNECT with extra bits"
,.url="hostname:443/"
,.is_connect=1
,.rv=1
}
};
void
@ -1993,8 +2059,8 @@ test_parse_url (void)
if (test->rv == 0) {
if (rv != 0) {
printf("\n*** http_parser_parse_url() \"%s\" test failed, "
"unexpected rv %d ***\n\n", test->name, rv);
printf("\n*** http_parser_parse_url(\"%s\") \"%s\" test failed, "
"unexpected rv %d ***\n\n", test->url, test->name, rv);
exit(1);
}
@ -2012,8 +2078,8 @@ test_parse_url (void)
} else {
/* test->rv != 0 */
if (rv == 0) {
printf("\n*** http_parser_parse_url() \"%s\" test failed, "
"unexpected rv %d ***\n\n", test->name, rv);
printf("\n*** http_parser_parse_url(\"%s\") \"%s\" test failed, "
"unexpected rv %d ***\n\n", test->url, test->name, rv);
exit(1);
}
}

Loading…
Cancel
Save