Facility to report detailed parsing errors.

- Add http_errno enum w/ values for many parsing error conditions. Stash
  this in http_parser.state if the 0x80 bit is set.
- Report line numbers on error generation if the (new) HTTP_PARSER_DEBUG
  cpp symbol is set. Increases http_parser struct size by 8 bytes in
  this case.
- Add http_errno_*() methods to help turning errno values into
  human-readable messages.
v0.6
Peter Griess 14 years ago
parent ddbbc07c10
commit 9114e58a77

@ -1,6 +1,6 @@
CPPFLAGS?=-Wall -Wextra -Werror -I.
OPT_DEBUG=$(CPPFLAGS) -O0 -g -DHTTP_PARSER_STRICT=1
OPT_FAST=$(CPPFLAGS) -O3 -DHTTP_PARSER_STRICT=0
OPT_DEBUG=$(CPPFLAGS) -O0 -g -DHTTP_PARSER_STRICT=1 -DHTTP_PARSER_DEBUG=1
OPT_FAST=$(CPPFLAGS) -O3 -DHTTP_PARSER_STRICT=0 -DHTTP_PARSER_DEBUG=0
CC?=gcc
AR?=ar

@ -31,10 +31,24 @@
#endif
#if HTTP_PARSER_DEBUG
#define SET_ERRNO(e) \
do { \
parser->state = 0x80 | (e); \
parser->error_lineno = __LINE__; \
} while (0)
#else
#define SET_ERRNO(e) do { parser->state = 0x80 | (e); } while(0)
#endif
#define CALLBACK2(FOR) \
do { \
if (settings->on_##FOR) { \
if (0 != settings->on_##FOR(parser)) return (p - data); \
if (0 != settings->on_##FOR(parser)) { \
SET_ERRNO(HPE_CB_##FOR); \
return (p - data); \
} \
} \
} while (0)
@ -52,6 +66,7 @@ do { \
FOR##_mark, \
p - FOR##_mark)) \
{ \
SET_ERRNO(HPE_CB_##FOR); \
return (p - data); \
} \
} \
@ -319,7 +334,13 @@ enum header_states
#if HTTP_PARSER_STRICT
# define STRICT_CHECK(cond) if (cond) goto error
# define STRICT_CHECK(cond) \
do { \
if (cond) { \
SET_ERRNO(HPE_STRICT); \
goto error; \
} \
} while (0)
# define NEW_MESSAGE() (http_should_keep_alive(parser) ? start_state : s_dead)
#else
# define STRICT_CHECK(cond)
@ -327,6 +348,17 @@ enum header_states
#endif
/* Map errno values to strings for human-readable output */
#define HTTP_STRERROR_GEN(n, s) { "HPE_" #n, s },
static struct {
const char *name;
const char *description;
} http_strerror_tab[] = {
HTTP_ERRNO_MAP(HTTP_STRERROR_GEN)
};
#undef HTTP_STRERROR_GEN
size_t http_parser_execute (http_parser *parser,
const http_parser_settings *settings,
const char *data,
@ -336,12 +368,21 @@ size_t http_parser_execute (http_parser *parser,
int8_t unhex_val;
const char *p = data, *pe;
int64_t to_read;
enum state state = (enum state) parser->state;
enum header_states header_state = (enum header_states) parser->header_state;
enum state state;
enum header_states header_state;
uint64_t index = parser->index;
uint64_t nread = parser->nread;
/* We're in an error state. Don't attempt to do anything lest we overwrite
* the error information that landed us here.
*/
if (HTTP_PARSER_ERRNO(parser) != HPE_OK) {
return 0;
}
state = (enum state) parser->state;
header_state = (enum header_states) parser->header_state;
if (len == 0) {
switch (state) {
case s_body_identity_eof:
@ -355,7 +396,8 @@ size_t http_parser_execute (http_parser *parser,
return 0;
default:
return 1; // error
SET_ERRNO(HPE_INVALID_EOF_STATE);
return 1;
}
}
@ -392,7 +434,10 @@ size_t http_parser_execute (http_parser *parser,
if (PARSING_HEADER(state)) {
++nread;
/* Buffer overflow attack */
if (nread > HTTP_MAX_HEADER_SIZE) goto error;
if (nread > HTTP_MAX_HEADER_SIZE) {
SET_ERRNO(HPE_HEADER_OVERFLOW);
goto error;
}
}
switch (state) {
@ -401,6 +446,7 @@ size_t http_parser_execute (http_parser *parser,
/* this state is used after a 'Connection: close' message
* the parser will error out if it reads another message
*/
SET_ERRNO(HPE_CLOSED_CONNECTION);
goto error;
case s_start_req_or_res:
@ -426,7 +472,11 @@ size_t http_parser_execute (http_parser *parser,
parser->type = HTTP_RESPONSE;
state = s_res_HT;
} else {
if (ch != 'E') goto error;
if (ch != 'E') {
SET_ERRNO(HPE_INVALID_CONSTANT);
goto error;
}
parser->type = HTTP_REQUEST;
parser->method = HTTP_HEAD;
index = 2;
@ -451,6 +501,7 @@ size_t http_parser_execute (http_parser *parser,
break;
default:
SET_ERRNO(HPE_INVALID_CONSTANT);
goto error;
}
break;
@ -477,7 +528,11 @@ size_t http_parser_execute (http_parser *parser,
break;
case s_res_first_http_major:
if (ch < '1' || ch > '9') goto error;
if (ch < '1' || ch > '9') {
SET_ERRNO(HPE_INVALID_VERSION);
goto error;
}
parser->http_major = ch - '0';
state = s_res_http_major;
break;
@ -490,18 +545,29 @@ size_t http_parser_execute (http_parser *parser,
break;
}
if (!IS_NUM(ch)) goto error;
if (!IS_NUM(ch)) {
SET_ERRNO(HPE_INVALID_VERSION);
goto error;
}
parser->http_major *= 10;
parser->http_major += ch - '0';
if (parser->http_major > 999) goto error;
if (parser->http_major > 999) {
SET_ERRNO(HPE_INVALID_VERSION);
goto error;
}
break;
}
/* first digit of minor HTTP version */
case s_res_first_http_minor:
if (!IS_NUM(ch)) goto error;
if (!IS_NUM(ch)) {
SET_ERRNO(HPE_INVALID_VERSION);
goto error;
}
parser->http_minor = ch - '0';
state = s_res_http_minor;
break;
@ -514,12 +580,19 @@ size_t http_parser_execute (http_parser *parser,
break;
}
if (!IS_NUM(ch)) goto error;
if (!IS_NUM(ch)) {
SET_ERRNO(HPE_INVALID_VERSION);
goto error;
}
parser->http_minor *= 10;
parser->http_minor += ch - '0';
if (parser->http_minor > 999) goto error;
if (parser->http_minor > 999) {
SET_ERRNO(HPE_INVALID_VERSION);
goto error;
}
break;
}
@ -529,6 +602,8 @@ size_t http_parser_execute (http_parser *parser,
if (ch == ' ') {
break;
}
SET_ERRNO(HPE_INVALID_STATUS);
goto error;
}
parser->status_code = ch - '0';
@ -550,6 +625,7 @@ size_t http_parser_execute (http_parser *parser,
state = s_header_field_start;
break;
default:
SET_ERRNO(HPE_INVALID_STATUS);
goto error;
}
break;
@ -558,7 +634,11 @@ size_t http_parser_execute (http_parser *parser,
parser->status_code *= 10;
parser->status_code += ch - '0';
if (parser->status_code > 999) goto error;
if (parser->status_code > 999) {
SET_ERRNO(HPE_INVALID_STATUS);
goto error;
}
break;
}
@ -590,7 +670,10 @@ size_t http_parser_execute (http_parser *parser,
CALLBACK2(message_begin);
if (!IS_ALPHA(LOWER(ch))) goto error;
if (!IS_ALPHA(LOWER(ch))) {
SET_ERRNO(HPE_INVALID_METHOD);
goto error;
}
start_req_method_assign:
parser->method = (enum http_method) 0;
@ -611,7 +694,9 @@ size_t http_parser_execute (http_parser *parser,
case 'S': parser->method = HTTP_SUBSCRIBE; break;
case 'T': parser->method = HTTP_TRACE; break;
case 'U': parser->method = HTTP_UNLOCK; /* or UNSUBSCRIBE */ break;
default: goto error;
default:
SET_ERRNO(HPE_INVALID_METHOD);
goto error;
}
state = s_req_method;
break;
@ -619,8 +704,10 @@ size_t http_parser_execute (http_parser *parser,
case s_req_method:
{
if (ch == '\0')
if (ch == '\0') {
SET_ERRNO(HPE_INVALID_METHOD);
goto error;
}
const char *matcher = method_strings[parser->method];
if (ch == ' ' && matcher[index] == '\0') {
@ -658,6 +745,7 @@ size_t http_parser_execute (http_parser *parser,
} else if (index == 4 && parser->method == HTTP_PROPFIND && ch == 'P') {
parser->method = HTTP_PROPPATCH;
} else {
SET_ERRNO(HPE_INVALID_METHOD);
goto error;
}
@ -687,6 +775,7 @@ size_t http_parser_execute (http_parser *parser,
break;
}
SET_ERRNO(HPE_INVALID_URL);
goto error;
}
@ -701,6 +790,7 @@ size_t http_parser_execute (http_parser *parser,
break;
}
SET_ERRNO(HPE_INVALID_URL);
goto error;
}
@ -737,6 +827,7 @@ size_t http_parser_execute (http_parser *parser,
state = s_req_query_string_start;
break;
default:
SET_ERRNO(HPE_INVALID_HOST);
goto error;
}
break;
@ -762,6 +853,7 @@ size_t http_parser_execute (http_parser *parser,
state = s_req_query_string_start;
break;
default:
SET_ERRNO(HPE_INVALID_PORT);
goto error;
}
break;
@ -800,6 +892,7 @@ size_t http_parser_execute (http_parser *parser,
state = s_req_fragment_start;
break;
default:
SET_ERRNO(HPE_INVALID_PATH);
goto error;
}
break;
@ -836,6 +929,7 @@ size_t http_parser_execute (http_parser *parser,
state = s_req_fragment_start;
break;
default:
SET_ERRNO(HPE_INVALID_QUERY_STRING);
goto error;
}
break;
@ -873,6 +967,7 @@ size_t http_parser_execute (http_parser *parser,
state = s_req_fragment_start;
break;
default:
SET_ERRNO(HPE_INVALID_QUERY_STRING);
goto error;
}
break;
@ -910,6 +1005,7 @@ size_t http_parser_execute (http_parser *parser,
case '#':
break;
default:
SET_ERRNO(HPE_INVALID_FRAGMENT);
goto error;
}
break;
@ -943,6 +1039,7 @@ size_t http_parser_execute (http_parser *parser,
case '#':
break;
default:
SET_ERRNO(HPE_INVALID_FRAGMENT);
goto error;
}
break;
@ -956,6 +1053,7 @@ size_t http_parser_execute (http_parser *parser,
case ' ':
break;
default:
SET_ERRNO(HPE_INVALID_CONSTANT);
goto error;
}
break;
@ -982,7 +1080,11 @@ size_t http_parser_execute (http_parser *parser,
/* first digit of major HTTP version */
case s_req_first_http_major:
if (ch < '1' || ch > '9') goto error;
if (ch < '1' || ch > '9') {
SET_ERRNO(HPE_INVALID_VERSION);
goto error;
}
parser->http_major = ch - '0';
state = s_req_http_major;
break;
@ -995,18 +1097,29 @@ size_t http_parser_execute (http_parser *parser,
break;
}
if (!IS_NUM(ch)) goto error;
if (!IS_NUM(ch)) {
SET_ERRNO(HPE_INVALID_VERSION);
goto error;
}
parser->http_major *= 10;
parser->http_major += ch - '0';
if (parser->http_major > 999) goto error;
if (parser->http_major > 999) {
SET_ERRNO(HPE_INVALID_VERSION);
goto error;
}
break;
}
/* first digit of minor HTTP version */
case s_req_first_http_minor:
if (!IS_NUM(ch)) goto error;
if (!IS_NUM(ch)) {
SET_ERRNO(HPE_INVALID_VERSION);
goto error;
}
parser->http_minor = ch - '0';
state = s_req_http_minor;
break;
@ -1026,19 +1139,30 @@ size_t http_parser_execute (http_parser *parser,
/* XXX allow spaces after digit? */
if (!IS_NUM(ch)) goto error;
if (!IS_NUM(ch)) {
SET_ERRNO(HPE_INVALID_VERSION);
goto error;
}
parser->http_minor *= 10;
parser->http_minor += ch - '0';
if (parser->http_minor > 999) goto error;
if (parser->http_minor > 999) {
SET_ERRNO(HPE_INVALID_VERSION);
goto error;
}
break;
}
/* end of request line */
case s_req_line_almost_done:
{
if (ch != LF) goto error;
if (ch != LF) {
SET_ERRNO(HPE_LF_EXPECTED);
goto error;
}
state = s_header_field_start;
break;
}
@ -1060,7 +1184,10 @@ size_t http_parser_execute (http_parser *parser,
c = TOKEN(ch);
if (!c) goto error;
if (!c) {
SET_ERRNO(HPE_INVALID_HEADER_TOKEN);
goto error;
}
MARK(header_field);
@ -1217,6 +1344,7 @@ size_t http_parser_execute (http_parser *parser,
break;
}
SET_ERRNO(HPE_INVALID_HEADER_TOKEN);
goto error;
}
@ -1260,7 +1388,11 @@ size_t http_parser_execute (http_parser *parser,
break;
case h_content_length:
if (!IS_NUM(ch)) goto error;
if (!IS_NUM(ch)) {
SET_ERRNO(HPE_INVALID_CONTENT_LENGTH);
goto error;
}
parser->content_length = ch - '0';
break;
@ -1310,7 +1442,11 @@ size_t http_parser_execute (http_parser *parser,
case h_content_length:
if (ch == ' ') break;
if (!IS_NUM(ch)) goto error;
if (!IS_NUM(ch)) {
SET_ERRNO(HPE_INVALID_CONTENT_LENGTH);
goto error;
}
parser->content_length *= 10;
parser->content_length += ch - '0';
break;
@ -1431,6 +1567,7 @@ size_t http_parser_execute (http_parser *parser,
default:
parser->state = state;
SET_ERRNO(HPE_CB_headers_complete);
return p - data; /* Error */
}
}
@ -1498,7 +1635,11 @@ size_t http_parser_execute (http_parser *parser,
assert(parser->flags & F_CHUNKED);
unhex_val = unhex[(unsigned char)ch];
if (unhex_val == -1) goto error;
if (unhex_val == -1) {
SET_ERRNO(HPE_INVALID_CHUNK_SIZE);
goto error;
}
parser->content_length = unhex_val;
state = s_chunk_size;
break;
@ -1520,6 +1661,8 @@ size_t http_parser_execute (http_parser *parser,
state = s_chunk_parameters;
break;
}
SET_ERRNO(HPE_INVALID_CHUNK_SIZE);
goto error;
}
@ -1588,6 +1731,7 @@ size_t http_parser_execute (http_parser *parser,
default:
assert(0 && "unhandled state");
SET_ERRNO(HPE_INVALID_INTERNAL_STATE);
goto error;
}
}
@ -1607,7 +1751,10 @@ size_t http_parser_execute (http_parser *parser,
return len;
error:
parser->state = s_dead;
if (HTTP_PARSER_ERRNO(parser) == HPE_OK) {
SET_ERRNO(HPE_UNKNOWN);
}
return (p - data);
}
@ -1649,3 +1796,15 @@ http_parser_init (http_parser *parser, enum http_parser_type t)
parser->flags = 0;
parser->method = 0;
}
const char *
http_errno_name(enum http_errno err) {
assert(err < (sizeof(http_strerror_tab)/sizeof(http_strerror_tab[0])));
return http_strerror_tab[err].name;
}
const char *
http_errno_description(enum http_errno err) {
assert(err < (sizeof(http_strerror_tab)/sizeof(http_strerror_tab[0])));
return http_strerror_tab[err].description;
}

@ -51,6 +51,13 @@ typedef int ssize_t;
# define HTTP_PARSER_STRICT 1
#endif
/* Compile with -DHTTP_PARSER_DEBUG=1 to add extra debugging information to
* the error reporting facility.
*/
#ifndef HTTP_PARSER_DEBUG
# define HTTP_PARSER_DEBUG 0
#endif
/* Maximium header size allowed */
#define HTTP_MAX_HEADER_SIZE (80*1024)
@ -58,6 +65,7 @@ typedef int ssize_t;
typedef struct http_parser http_parser;
typedef struct http_parser_settings http_parser_settings;
typedef struct http_parser_result http_parser_result;
/* Callbacks should return non-zero to indicate an error. The parser will
@ -125,6 +133,74 @@ enum flags
};
/* Map for errno-related constants
*
* The provided argument should be a macro that takes 2 arguments.
*/
#define HTTP_ERRNO_MAP(XX) \
/* No error */ \
XX(OK, "success") \
\
/* Callback-related errors */ \
XX(CB_message_begin, "the on_message_begin callback failed") \
XX(CB_path, "the on_path callback failed") \
XX(CB_query_string, "the on_query_string callback failed") \
XX(CB_url, "the on_url callback failed") \
XX(CB_fragment, "the on_fragment callback failed") \
XX(CB_header_field, "the on_header_field callback failed") \
XX(CB_header_value, "the on_header_value callback failed") \
XX(CB_headers_complete, "the on_headers_complete callback failed") \
XX(CB_body, "th on_body callback failed") \
XX(CB_message_complete, "the on_message_complete callback failed") \
\
/* Parsing-related errors */ \
XX(INVALID_EOF_STATE, "stream ended at an unexpected time") \
XX(HEADER_OVERFLOW, \
"too many header bytes seen; overflow detected") \
XX(CLOSED_CONNECTION, \
"data received after completed connection: close message") \
XX(INVALID_VERSION, "invalid HTTP version") \
XX(INVALID_STATUS, "invalid HTTP status code") \
XX(INVALID_METHOD, "invalid HTTP method") \
XX(INVALID_URL, "invalid URL") \
XX(INVALID_HOST, "invalid host") \
XX(INVALID_PORT, "invalid port") \
XX(INVALID_PATH, "invalid path") \
XX(INVALID_QUERY_STRING, "invalid query string") \
XX(INVALID_FRAGMENT, "invalid fragment") \
XX(LF_EXPECTED, "LF character expected") \
XX(INVALID_HEADER_TOKEN, "invalid character in header") \
XX(INVALID_CONTENT_LENGTH, \
"invalid character in content-length header") \
XX(INVALID_CHUNK_SIZE, \
"invalid character in chunk size header") \
XX(INVALID_CONSTANT, "invalid constant string") \
XX(INVALID_INTERNAL_STATE, "encountered unexpected internal state")\
XX(STRICT, "strict mode assertion failed") \
XX(UNKNOWN, "an unknown error occurred")
/* Define HPE_* values for each errno value above */
#define HTTP_ERRNO_GEN(n, s) HPE_##n,
enum http_errno {
HTTP_ERRNO_MAP(HTTP_ERRNO_GEN)
};
#undef HTTP_ERRNO_GEN
/* Get an http_errno value from an http_parser */
#define HTTP_PARSER_ERRNO(p) \
((enum http_errno) (((p)->state & 0x80) ? (p)->state & ~0x80 : 0))
/* Get the line number that generated the current error */
#if HTTP_PARSER_DEBUG
#define HTTP_PARSER_ERRNO_LINE(p) ((p)->error_lineno)
#else
#define HTTP_PARSER_ERRNO_LINE(p) 0
#endif
struct http_parser {
/** PRIVATE **/
unsigned char type : 2;
@ -149,6 +225,10 @@ struct http_parser {
*/
char upgrade;
#if HTTP_PARSER_DEBUG
uint32_t error_lineno;
#endif
/** PUBLIC **/
void *data; /* A pointer to get hook to the "connection" or "socket" object */
};
@ -186,7 +266,13 @@ size_t http_parser_execute(http_parser *parser,
int http_should_keep_alive(http_parser *parser);
/* Returns a string version of the HTTP method. */
const char *http_method_str(enum http_method);
const char *http_method_str(enum http_method m);
/* Return a string name of the given error */
const char *http_errno_name(enum http_errno err);
/* Return a string description of the given error */
const char *http_errno_description(enum http_errno err);
#ifdef __cplusplus
}

@ -1490,7 +1490,9 @@ upgrade_message_fix(char *body, const size_t nread, const size_t nmsgs, ...) {
static void
print_error (const char *raw, size_t error_location)
{
fprintf(stderr, "\n*** parse error ***\n\n");
fprintf(stderr, "\n*** %s:%d -- %s ***\n\n",
"http_parser.c", HTTP_PARSER_ERRNO_LINE(parser),
http_errno_description(HTTP_PARSER_ERRNO(parser)));
int this_line = 0, char_len = 0;
size_t i, j, len = strlen(raw), error_location_line = 0;
@ -1626,21 +1628,32 @@ test_message_count_body (const struct message *message)
}
void
test_simple (const char *buf, int should_pass)
test_simple (const char *buf, enum http_errno err_expected)
{
parser_init(HTTP_REQUEST);
size_t parsed;
int pass;
enum http_errno err;
parsed = parse(buf, strlen(buf));
pass = (parsed == strlen(buf));
err = HTTP_PARSER_ERRNO(parser);
parsed = parse(NULL, 0);
pass &= (parsed == 0);
parser_free();
if (pass != should_pass) {
fprintf(stderr, "\n*** test_simple expected %s ***\n\n%s", should_pass ? "success" : "error", buf);
/* In strict mode, allow us to pass with an unexpected HPE_STRICT as
* long as the caller isn't expecting success.
*/
#if HTTP_PARSER_STRICT
if (err_expected != err && err_expected != HPE_OK && err != HPE_STRICT) {
#else
if (err_expected != err) {
#endif
fprintf(stderr, "\n*** test_simple expected %s, but saw %s ***\n\n%s\n",
http_errno_name(err_expected), http_errno_name(err), buf);
exit(1);
}
}
@ -1657,10 +1670,14 @@ test_header_overflow_error (int req)
assert(parsed == strlen(buf));
buf = "header-key: header-value\r\n";
size_t buflen = strlen(buf);
int i;
for (i = 0; i < 10000; i++) {
if (http_parser_execute(&parser, &settings_null, buf, strlen(buf)) != strlen(buf)) {
parsed = http_parser_execute(&parser, &settings_null, buf, buflen);
if (parsed != buflen) {
//fprintf(stderr, "error found on iter %d\n", i);
assert(HTTP_PARSER_ERRNO(&parser) == HPE_HEADER_OVERFLOW);
return;
}
}
@ -1996,13 +2013,13 @@ main (void)
/// REQUESTS
test_simple("hello world", 0);
test_simple("GET / HTP/1.1\r\n\r\n", 0);
test_simple("hello world", HPE_INVALID_METHOD);
test_simple("GET / HTP/1.1\r\n\r\n", HPE_INVALID_VERSION);
test_simple("ASDF / HTTP/1.1\r\n\r\n", 0);
test_simple("PROPPATCHA / HTTP/1.1\r\n\r\n", 0);
test_simple("GETA / HTTP/1.1\r\n\r\n", 0);
test_simple("ASDF / HTTP/1.1\r\n\r\n", HPE_INVALID_METHOD);
test_simple("PROPPATCHA / HTTP/1.1\r\n\r\n", HPE_INVALID_METHOD);
test_simple("GETA / HTTP/1.1\r\n\r\n", HPE_INVALID_METHOD);
// Well-formed but incomplete
test_simple("GET / HTTP/1.1\r\n"
@ -2010,7 +2027,7 @@ main (void)
"Content-Length: 6\r\n"
"\r\n"
"fooba",
0);
HPE_OK);
static const char *all_methods[] = {
"DELETE",
@ -2033,7 +2050,7 @@ main (void)
for (this_method = all_methods; *this_method; this_method++) {
char buf[200];
sprintf(buf, "%s / HTTP/1.1\r\n\r\n", *this_method);
test_simple(buf, 1);
test_simple(buf, HPE_OK);
}
static const char *bad_methods[] = {
@ -2043,7 +2060,7 @@ main (void)
for (this_method = bad_methods; *this_method; this_method++) {
char buf[200];
sprintf(buf, "%s / HTTP/1.1\r\n\r\n", *this_method);
test_simple(buf, 0);
test_simple(buf, HPE_UNKNOWN);
}
const char *dumbfuck2 =
@ -2081,7 +2098,7 @@ main (void)
"\tRA==\r\n"
"\t-----END CERTIFICATE-----\r\n"
"\r\n";
test_simple(dumbfuck2, 1);
test_simple(dumbfuck2, HPE_OK);
#if 0
// NOTE(Wed Nov 18 11:57:27 CET 2009) this seems okay. we just read body

Loading…
Cancel
Save