implement parsing of v6 addresses and rejection of 0-length host and ports.

the v6 parsing works by adding extra states for working with the
[] notation for v6 addresses. hosts and ports cannot be 0-length
because we url parsing from ending when we expect those fields to
begin.

http_parser_parse_url gets a free check for the correctness of
CONNECT urls (they can only be host:port).

this addresses the following issues:

i was bored and had my head in this space.
v0.10
David Gwynne 13 years ago committed by Ben Noordhuis
parent 7bc668c5f4
commit 8da60bc423

@ -265,7 +265,10 @@ enum state
, s_req_schema , s_req_schema
, s_req_schema_slash , s_req_schema_slash
, s_req_schema_slash_slash , s_req_schema_slash_slash
, s_req_schema_slash_slash_end , s_req_host_start
, s_req_host_v6_start
, s_req_host_v6
, s_req_host_v6_end
, s_req_host , s_req_host
, s_req_port_start , s_req_port_start
, s_req_port , s_req_port
@ -354,6 +357,7 @@ enum header_states
#define IS_ALPHA(c) (LOWER(c) >= 'a' && LOWER(c) <= 'z') #define IS_ALPHA(c) (LOWER(c) >= 'a' && LOWER(c) <= 'z')
#define IS_NUM(c) ((c) >= '0' && (c) <= '9') #define IS_NUM(c) ((c) >= '0' && (c) <= '9')
#define IS_ALPHANUM(c) (IS_ALPHA(c) || IS_NUM(c)) #define IS_ALPHANUM(c) (IS_ALPHA(c) || IS_NUM(c))
#define IS_HEX(c) (IS_NUM(c) || (LOWER(c) >= 'a' && LOWER(c) <= 'f'))
#if HTTP_PARSER_STRICT #if HTTP_PARSER_STRICT
#define TOKEN(c) (tokens[(unsigned char)c]) #define TOKEN(c) (tokens[(unsigned char)c])
@ -410,22 +414,22 @@ int http_message_needs_eof(http_parser *parser);
* URL and non-URL states by looking for these. * URL and non-URL states by looking for these.
*/ */
static enum state static enum state
parse_url_char(enum state s, const char ch, int is_connect) parse_url_char(enum state s, const char ch)
{ {
assert(!isspace(ch)); assert(!isspace(ch));
switch (s) { switch (s) {
case s_req_spaces_before_url: case s_req_spaces_before_url:
/* Proxied requests are followed by scheme of an absolute URI (alpha).
* All methods except CONNECT are followed by '/' or '*'.
*/
if (ch == '/' || ch == '*') { if (ch == '/' || ch == '*') {
return s_req_path; return s_req_path;
} }
/* Proxied requests are followed by scheme of an absolute URI (alpha). if (IS_ALPHA(ch)) {
* CONNECT is followed by a hostname, which begins with alphanum. return s_req_schema;
* All other methods are followed by '/' or '*' (handled above).
*/
if (IS_ALPHA(ch) || (is_connect && IS_NUM(ch))) {
return (is_connect) ? s_req_host : s_req_schema;
} }
break; break;
@ -450,17 +454,29 @@ parse_url_char(enum state s, const char ch, int is_connect)
case s_req_schema_slash_slash: case s_req_schema_slash_slash:
if (ch == '/') { if (ch == '/') {
return s_req_schema_slash_slash_end; return s_req_host_start;
}
break;
case s_req_host_start:
if (ch == '[') {
return s_req_host_v6_start;
}
if (IS_HOST_CHAR(ch)) {
return s_req_host;
} }
break; break;
case s_req_schema_slash_slash_end:
case s_req_host: case s_req_host:
if (IS_HOST_CHAR(ch)) { if (IS_HOST_CHAR(ch)) {
return s_req_host; return s_req_host;
} }
/* FALLTHROUGH */
case s_req_host_v6_end:
switch (ch) { switch (ch) {
case ':': case ':':
return s_req_port_start; return s_req_port_start;
@ -474,12 +490,19 @@ parse_url_char(enum state s, const char ch, int is_connect)
break; break;
case s_req_port_start: case s_req_host_v6:
case s_req_port: if (ch == ']') {
if (IS_NUM(ch)) { return s_req_host_v6_end;
return s_req_port;
} }
/* FALLTHROUGH */
case s_req_host_v6_start:
if (IS_HEX(ch) || ch == ':') {
return s_req_host_v6;
}
break;
case s_req_port:
switch (ch) { switch (ch) {
case '/': case '/':
return s_req_path; return s_req_path;
@ -488,6 +511,12 @@ parse_url_char(enum state s, const char ch, int is_connect)
return s_req_query_string_start; return s_req_query_string_start;
} }
/* FALLTHROUGH */
case s_req_port_start:
if (IS_NUM(ch)) {
return s_req_port;
}
break; break;
case s_req_path: case s_req_path:
@ -622,12 +651,15 @@ size_t http_parser_execute (http_parser *parser,
case s_req_schema: case s_req_schema:
case s_req_schema_slash: case s_req_schema_slash:
case s_req_schema_slash_slash: case s_req_schema_slash_slash:
case s_req_schema_slash_slash_end: case s_req_host_start:
case s_req_host_v6_start:
case s_req_host_v6:
case s_req_host_v6_end:
case s_req_host:
case s_req_port_start: case s_req_port_start:
case s_req_port: case s_req_port:
case s_req_query_string_start: case s_req_query_string_start:
case s_req_query_string: case s_req_query_string:
case s_req_host:
case s_req_fragment_start: case s_req_fragment_start:
case s_req_fragment: case s_req_fragment:
url_mark = data; url_mark = data;
@ -975,9 +1007,11 @@ size_t http_parser_execute (http_parser *parser,
if (ch == ' ') break; if (ch == ' ') break;
MARK(url); MARK(url);
if (parser->method == HTTP_CONNECT) {
parser->state = s_req_host_start;
}
parser->state = parse_url_char( parser->state = parse_url_char((enum state)parser->state, ch);
(enum state)parser->state, ch, parser->method == HTTP_CONNECT);
if (parser->state == s_dead) { if (parser->state == s_dead) {
SET_ERRNO(HPE_INVALID_URL); SET_ERRNO(HPE_INVALID_URL);
goto error; goto error;
@ -989,6 +1023,10 @@ size_t http_parser_execute (http_parser *parser,
case s_req_schema: case s_req_schema:
case s_req_schema_slash: case s_req_schema_slash:
case s_req_schema_slash_slash: case s_req_schema_slash_slash:
case s_req_host_start:
case s_req_host_v6_start:
case s_req_host_v6:
case s_req_port_start:
{ {
switch (ch) { switch (ch) {
/* No whitespace allowed here */ /* No whitespace allowed here */
@ -998,8 +1036,7 @@ size_t http_parser_execute (http_parser *parser,
SET_ERRNO(HPE_INVALID_URL); SET_ERRNO(HPE_INVALID_URL);
goto error; goto error;
default: default:
parser->state = parse_url_char( parser->state = parse_url_char((enum state)parser->state, ch);
(enum state)parser->state, ch, parser->method == HTTP_CONNECT);
if (parser->state == s_dead) { if (parser->state == s_dead) {
SET_ERRNO(HPE_INVALID_URL); SET_ERRNO(HPE_INVALID_URL);
goto error; goto error;
@ -1009,9 +1046,8 @@ size_t http_parser_execute (http_parser *parser,
break; break;
} }
case s_req_schema_slash_slash_end:
case s_req_host: case s_req_host:
case s_req_port_start: case s_req_host_v6_end:
case s_req_port: case s_req_port:
case s_req_path: case s_req_path:
case s_req_query_string_start: case s_req_query_string_start:
@ -1019,11 +1055,6 @@ size_t http_parser_execute (http_parser *parser,
case s_req_fragment_start: case s_req_fragment_start:
case s_req_fragment: case s_req_fragment:
{ {
/* XXX: There is a bug here where if we're on the first character
* of s_req_host (e.g. our URL is 'http://' and we see a whitespace
* character, we'll consider this a valid URL. This seems incorrect,
* but at least it's bug-compatible with what we had before.
*/
switch (ch) { switch (ch) {
case ' ': case ' ':
parser->state = s_req_http_start; parser->state = s_req_http_start;
@ -1039,8 +1070,7 @@ size_t http_parser_execute (http_parser *parser,
CALLBACK_DATA(url); CALLBACK_DATA(url);
break; break;
default: default:
parser->state = parse_url_char( parser->state = parse_url_char((enum state)parser->state, ch);
(enum state)parser->state, ch, parser->method == HTTP_CONNECT);
if (parser->state == s_dead) { if (parser->state == s_dead) {
SET_ERRNO(HPE_INVALID_URL); SET_ERRNO(HPE_INVALID_URL);
goto error; goto error;
@ -1926,11 +1956,11 @@ http_parser_parse_url(const char *buf, size_t buflen, int is_connect,
enum http_parser_url_fields uf, old_uf; enum http_parser_url_fields uf, old_uf;
u->port = u->field_set = 0; u->port = u->field_set = 0;
s = s_req_spaces_before_url; s = is_connect ? s_req_host_start : s_req_spaces_before_url;
uf = old_uf = UF_MAX; uf = old_uf = UF_MAX;
for (p = buf; p < buf + buflen; p++) { for (p = buf; p < buf + buflen; p++) {
s = parse_url_char(s, *p, is_connect); s = parse_url_char(s, *p);
/* Figure out the next field that we're operating on */ /* Figure out the next field that we're operating on */
switch (s) { switch (s) {
@ -1940,7 +1970,9 @@ http_parser_parse_url(const char *buf, size_t buflen, int is_connect,
/* Skip delimeters */ /* Skip delimeters */
case s_req_schema_slash: case s_req_schema_slash:
case s_req_schema_slash_slash: case s_req_schema_slash_slash:
case s_req_schema_slash_slash_end: case s_req_host_start:
case s_req_host_v6_start:
case s_req_host_v6_end:
case s_req_port_start: case s_req_port_start:
case s_req_query_string_start: case s_req_query_string_start:
case s_req_fragment_start: case s_req_fragment_start:
@ -1951,6 +1983,7 @@ http_parser_parse_url(const char *buf, size_t buflen, int is_connect,
break; break;
case s_req_host: case s_req_host:
case s_req_host_v6:
uf = UF_HOST; uf = UF_HOST;
break; break;
@ -1988,6 +2021,23 @@ http_parser_parse_url(const char *buf, size_t buflen, int is_connect,
old_uf = uf; old_uf = uf;
} }
/* CONNECT requests can only contain "hostname:port" */
if (is_connect && u->field_set != ((1 << UF_HOST)|(1 << UF_PORT))) {
return 1;
}
/* Make sure we don't end somewhere unexpected */
switch (s) {
case s_req_host_v6_start:
case s_req_host_v6:
case s_req_host_v6_end:
case s_req_host:
case s_req_port_start:
return 1;
default:
break;
}
if (u->field_set & (1 << UF_PORT)) { if (u->field_set & (1 << UF_PORT)) {
/* Don't bother with endp; we've already validated the string */ /* Don't bother with endp; we've already validated the string */
unsigned long v = strtoul(buf + u->field_data[UF_PORT].off, NULL, 10); unsigned long v = strtoul(buf + u->field_data[UF_PORT].off, NULL, 10);

@ -1948,6 +1948,72 @@ const struct url_test url_tests[] =
} }
,.rv=0 ,.rv=0
} }
, {.name="proxy ipv6 request"
,.url="http://[1:2::3:4]/"
,.is_connect=0
,.u=
{.field_set=(1 << UF_SCHEMA) | (1 << UF_HOST) | (1 << UF_PATH)
,.port=0
,.field_data=
{{ 0, 4 } /* UF_SCHEMA */
,{ 8, 8 } /* UF_HOST */
,{ 0, 0 } /* UF_PORT */
,{ 17, 1 } /* UF_PATH */
,{ 0, 0 } /* UF_QUERY */
,{ 0, 0 } /* UF_FRAGMENT */
}
}
,.rv=0
}
, {.name="CONNECT ipv6 address"
,.url="[1:2::3:4]:443"
,.is_connect=1
,.u=
{.field_set=(1 << UF_HOST) | (1 << UF_PORT)
,.port=443
,.field_data=
{{ 0, 0 } /* UF_SCHEMA */
,{ 1, 8 } /* UF_HOST */
,{ 11, 3 } /* UF_PORT */
,{ 0, 0 } /* UF_PATH */
,{ 0, 0 } /* UF_QUERY */
,{ 0, 0 } /* UF_FRAGMENT */
}
}
,.rv=0
}
, {.name="proxy empty host"
,.url="http://:443/"
,.is_connect=1
,.rv=1
}
, {.name="proxy empty port"
,.url="http://hostname:/"
,.is_connect=1
,.rv=1
}
, {.name="CONNECT empty host"
,.url=":443"
,.is_connect=1
,.rv=1
}
, {.name="CONNECT empty port"
,.url="hostname:"
,.is_connect=1
,.rv=1
}
, {.name="CONNECT with extra bits"
,.url="hostname:443/"
,.is_connect=1
,.rv=1
}
}; };
void void
@ -1993,8 +2059,8 @@ test_parse_url (void)
if (test->rv == 0) { if (test->rv == 0) {
if (rv != 0) { if (rv != 0) {
printf("\n*** http_parser_parse_url() \"%s\" test failed, " printf("\n*** http_parser_parse_url(\"%s\") \"%s\" test failed, "
"unexpected rv %d ***\n\n", test->name, rv); "unexpected rv %d ***\n\n", test->url, test->name, rv);
exit(1); exit(1);
} }
@ -2012,8 +2078,8 @@ test_parse_url (void)
} else { } else {
/* test->rv != 0 */ /* test->rv != 0 */
if (rv == 0) { if (rv == 0) {
printf("\n*** http_parser_parse_url() \"%s\" test failed, " printf("\n*** http_parser_parse_url(\"%s\") \"%s\" test failed, "
"unexpected rv %d ***\n\n", test->name, rv); "unexpected rv %d ***\n\n", test->url, test->name, rv);
exit(1); exit(1);
} }
} }

Loading…
Cancel
Save