strict check

event_stream
Ryan Dahl 15 years ago
parent 3ac0ebdee5
commit fb6dc67b05

@ -1,9 +1,15 @@
OPT=-O0 -g -Wall -Wextra -Werror #OPT=-O0 -g -Wall -Wextra -Werror
#OPT=-O2 OPT=-O3 -DHTTP_PARSER_STRICT=0
test: http_parser.o test.c test: http_parser.o test.c
gcc $(OPT) http_parser.o test.c -o $@ gcc $(OPT) http_parser.o test.c -o $@
test-run: test
./test
test-run-timed: test
time ./test > /dev/null
http_parser.o: http_parser.c http_parser.h Makefile http_parser.o: http_parser.c http_parser.h Makefile
gcc $(OPT) -c http_parser.c gcc $(OPT) -c http_parser.c
@ -21,4 +27,4 @@ package: http_parser.c
tar -cf http_parser.tar http_parser/ tar -cf http_parser.tar http_parser/
@echo /tmp/http_parser.tar @echo /tmp/http_parser.tar
.PHONY: clean package .PHONY: clean package test-run test-run-timed

@ -32,14 +32,14 @@ using `http_parser_init()` and set the callbacks. That might look something
like this: like this:
http_parser *parser = malloc(sizeof(http_parser)); http_parser *parser = malloc(sizeof(http_parser));
http_parser_init(parser, HTTP_REQUEST); http_parser_init(parser);
parser->on_path = my_path_callback; parser->on_path = my_path_callback;
parser->on_header_field = my_header_field_callback; parser->on_header_field = my_header_field_callback;
parser->data = my_socket; parser->data = my_socket;
When data is received on the socket execute the parser and check for errors. When data is received on the socket execute the parser and check for errors.
size_t len = 80*1024; size_t len = 80*1024, nparsed;
char buf[len]; char buf[len];
ssize_t recved; ssize_t recved;
@ -50,19 +50,19 @@ When data is received on the socket execute the parser and check for errors.
} }
/* Start up / continue the parser. /* Start up / continue the parser.
* Note we pass the recved==0 to http_parser_execute to signal * Note we pass the recved==0 to http_parse_requests to signal
* that EOF has been recieved. * that EOF has been recieved.
*/ */
http_parser_execute(parser, buf, recved); nparsed = http_parse_requests(parser, buf, recved);
if (http_parser_has_error(parser)) { if (nparsed != recved) {
/* Handle error. Usually just close the connection. */ /* Handle error. Usually just close the connection. */
} }
HTTP needs to know where the end of the stream is. For example, sometimes HTTP needs to know where the end of the stream is. For example, sometimes
servers send responses without Content-Length and expect the client to servers send responses without Content-Length and expect the client to
consume input (for the body) until EOF. To tell http_parser about EOF, give consume input (for the body) until EOF. To tell http_parser about EOF, give
`0` as the third parameter to `http_parser_execute()`. Callbacks and errors `0` as the third parameter to `http_parse_requests()`. Callbacks and errors
can still be encountered during an EOF, so one must still be prepared can still be encountered during an EOF, so one must still be prepared
to receive them. to receive them.
@ -84,7 +84,7 @@ parser, for example, would not want such a feature.
Callbacks Callbacks
--------- ---------
During the `http_parser_execute()` call, the callbacks set in `http_parser` During the `http_parse_requests()` call, the callbacks set in `http_parser`
will be executed. The parser maintains state and never looks behind, so will be executed. The parser maintains state and never looks behind, so
buffering the data is not necessary. If you need to save certain data for buffering the data is not necessary. If you need to save certain data for
later usage, you can do that from the callbacks. later usage, you can do that from the callbacks.

@ -58,26 +58,6 @@ do { \
if (0 != FOR##_callback(parser)) return (p - data); \ if (0 != FOR##_callback(parser)) return (p - data); \
} while (0) } while (0)
#if 0
do { \
if (parser->FOR##_mark) { \
parser->FOR##_size += p - parser->FOR##_mark; \
if (parser->FOR##_size > MAX_FIELD_SIZE) { \
return ERROR; \
} \
if (parser->on_##FOR) { \
if (0 != parser->on_##FOR(parser, \
parser->FOR##_mark, \
p - parser->FOR##_mark)) \
{ \
return ERROR; \
} \
} \
} \
} while(0)
#endif
#define DEFINE_CALLBACK(FOR) \ #define DEFINE_CALLBACK(FOR) \
static inline int FOR##_callback (http_parser *parser, const char *p) \ static inline int FOR##_callback (http_parser *parser, const char *p) \
{ \ { \
@ -135,7 +115,7 @@ static const unsigned char lowcase[] =
"\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0" "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
"\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"; "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0";
static int unhex[] = static const int unhex[] =
{-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1 {-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1 ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1 ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
@ -166,7 +146,7 @@ static const uint32_t usual[] = {
}; };
enum state enum state
{ s_start_res = 1 { s_start_res = 1 /* important that this is > 0 */
, s_res_H , s_res_H
, s_res_HT , s_res_HT
, s_res_HTT , s_res_HTT
@ -211,10 +191,10 @@ enum state
, s_req_http_HT , s_req_http_HT
, s_req_http_HTT , s_req_http_HTT
, s_req_http_HTTP , s_req_http_HTTP
, s_req_first_major_digit , s_req_first_http_major
, s_req_major_digit , s_req_http_major
, s_req_first_minor_digit , s_req_first_http_minor
, s_req_minor_digit , s_req_http_minor
, s_req_line_almost_done , s_req_line_almost_done
, s_header_field_start , s_header_field_start
@ -269,11 +249,17 @@ enum flags
, F_TRAILING = 0x0010 , F_TRAILING = 0x0010
}; };
#define ERROR (p - data); #define ERROR (p - data)
#define CR '\r' #define CR '\r'
#define LF '\n' #define LF '\n'
#define LOWER(c) (unsigned char)(c | 0x20) #define LOWER(c) (unsigned char)(c | 0x20)
#if HTTP_PARSER_STRICT
# define STRICT_CHECK(cond) if (cond) return ERROR
#else
# define STRICT_CHECK(cond)
#endif
static inline static inline
size_t parse (http_parser *parser, const char *data, size_t len, int start_state) size_t parse (http_parser *parser, const char *data, size_t len, int start_state)
{ {
@ -325,22 +311,22 @@ size_t parse (http_parser *parser, const char *data, size_t len, int start_state
} }
case s_res_H: case s_res_H:
if (ch != 'T') return ERROR; STRICT_CHECK(ch != 'T');
state = s_res_HT; state = s_res_HT;
break; break;
case s_res_HT: case s_res_HT:
if (ch != 'T') return ERROR; STRICT_CHECK(ch != 'T');
state = s_res_HTT; state = s_res_HTT;
break; break;
case s_res_HTT: case s_res_HTT:
if (ch != 'P') return ERROR; STRICT_CHECK(ch != 'P');
state = s_res_HTTP; state = s_res_HTTP;
break; break;
case s_res_HTTP: case s_res_HTTP:
if (ch != '/') return ERROR; STRICT_CHECK(ch != '/');
state = s_res_first_http_major; state = s_res_first_http_major;
break; break;
@ -425,6 +411,7 @@ size_t parse (http_parser *parser, const char *data, size_t len, int start_state
parser->status_code *= 10; parser->status_code *= 10;
parser->status_code += ch - '0'; parser->status_code += ch - '0';
if (parser->status_code > 999) return ERROR; if (parser->status_code > 999) return ERROR;
break; break;
} }
@ -444,7 +431,7 @@ size_t parse (http_parser *parser, const char *data, size_t len, int start_state
break; break;
case s_res_line_almost_done: case s_res_line_almost_done:
if (ch != LF) return ERROR; STRICT_CHECK(ch != LF);
state = s_header_field_start; state = s_header_field_start;
break; break;
@ -489,12 +476,12 @@ size_t parse (http_parser *parser, const char *data, size_t len, int start_state
/* GET */ /* GET */
case s_req_method_G: case s_req_method_G:
if (ch != 'E') return ERROR; STRICT_CHECK(ch != 'E');
state = s_req_method_GE; state = s_req_method_GE;
break; break;
case s_req_method_GE: case s_req_method_GE:
if (ch != 'T') return ERROR; STRICT_CHECK(ch != 'T');
parser->method = HTTP_GET; parser->method = HTTP_GET;
state = s_req_spaces_before_url; state = s_req_spaces_before_url;
break; break;
@ -502,17 +489,17 @@ size_t parse (http_parser *parser, const char *data, size_t len, int start_state
/* HEAD */ /* HEAD */
case s_req_method_H: case s_req_method_H:
if (ch != 'E') return ERROR; STRICT_CHECK(ch != 'E');
state = s_req_method_HE; state = s_req_method_HE;
break; break;
case s_req_method_HE: case s_req_method_HE:
if (ch != 'A') return ERROR; STRICT_CHECK(ch != 'A');
state = s_req_method_HEA; state = s_req_method_HEA;
break; break;
case s_req_method_HEA: case s_req_method_HEA:
if (ch != 'D') return ERROR; STRICT_CHECK(ch != 'D');
parser->method = HTTP_HEAD; parser->method = HTTP_HEAD;
state = s_req_spaces_before_url; state = s_req_spaces_before_url;
break; break;
@ -537,7 +524,7 @@ size_t parse (http_parser *parser, const char *data, size_t len, int start_state
/* PUT */ /* PUT */
case s_req_method_PU: case s_req_method_PU:
if (ch != 'T') return ERROR; STRICT_CHECK(ch != 'T');
parser->method = HTTP_PUT; parser->method = HTTP_PUT;
state = s_req_spaces_before_url; state = s_req_spaces_before_url;
break; break;
@ -545,12 +532,12 @@ size_t parse (http_parser *parser, const char *data, size_t len, int start_state
/* POST */ /* POST */
case s_req_method_PO: case s_req_method_PO:
if (ch != 'S') return ERROR; STRICT_CHECK(ch != 'S');
state = s_req_method_POS; state = s_req_method_POS;
break; break;
case s_req_method_POS: case s_req_method_POS:
if (ch != 'T') return ERROR; STRICT_CHECK(ch != 'T');
parser->method = HTTP_POST; parser->method = HTTP_POST;
state = s_req_spaces_before_url; state = s_req_spaces_before_url;
break; break;
@ -558,27 +545,27 @@ size_t parse (http_parser *parser, const char *data, size_t len, int start_state
/* DELETE */ /* DELETE */
case s_req_method_D: case s_req_method_D:
if (ch != 'E') return ERROR; STRICT_CHECK(ch != 'E');
state = s_req_method_DE; state = s_req_method_DE;
break; break;
case s_req_method_DE: case s_req_method_DE:
if (ch != 'L') return ERROR; STRICT_CHECK(ch != 'L');
state = s_req_method_DEL; state = s_req_method_DEL;
break; break;
case s_req_method_DEL: case s_req_method_DEL:
if (ch != 'E') return ERROR; STRICT_CHECK(ch != 'E');
state = s_req_method_DELE; state = s_req_method_DELE;
break; break;
case s_req_method_DELE: case s_req_method_DELE:
if (ch != 'T') return ERROR; STRICT_CHECK(ch != 'T');
state = s_req_method_DELET; state = s_req_method_DELET;
break; break;
case s_req_method_DELET: case s_req_method_DELET:
if (ch != 'E') return ERROR; STRICT_CHECK(ch != 'E');
parser->method = HTTP_DELETE; parser->method = HTTP_DELETE;
state = s_req_spaces_before_url; state = s_req_spaces_before_url;
break; break;
@ -621,12 +608,12 @@ size_t parse (http_parser *parser, const char *data, size_t len, int start_state
} }
case s_req_schema_slash: case s_req_schema_slash:
if (ch != '/') return ERROR; STRICT_CHECK(ch != '/');
state = s_req_schema_slash_slash; state = s_req_schema_slash_slash;
break; break;
case s_req_schema_slash_slash: case s_req_schema_slash_slash:
if (ch != '/') return ERROR; STRICT_CHECK(ch != '/');
state = s_req_host; state = s_req_host;
break; break;
@ -860,37 +847,37 @@ size_t parse (http_parser *parser, const char *data, size_t len, int start_state
break; break;
case s_req_http_H: case s_req_http_H:
if (ch != 'T') return ERROR; STRICT_CHECK(ch != 'T');
state = s_req_http_HT; state = s_req_http_HT;
break; break;
case s_req_http_HT: case s_req_http_HT:
if (ch != 'T') return ERROR; STRICT_CHECK(ch != 'T');
state = s_req_http_HTT; state = s_req_http_HTT;
break; break;
case s_req_http_HTT: case s_req_http_HTT:
if (ch != 'P') return ERROR; STRICT_CHECK(ch != 'P');
state = s_req_http_HTTP; state = s_req_http_HTTP;
break; break;
case s_req_http_HTTP: case s_req_http_HTTP:
if (ch != '/') return ERROR; STRICT_CHECK(ch != '/');
state = s_req_first_major_digit; state = s_req_first_http_major;
break; break;
/* first digit of major HTTP version */ /* first digit of major HTTP version */
case s_req_first_major_digit: case s_req_first_http_major:
if (ch < '1' || ch > '9') return ERROR; if (ch < '1' || ch > '9') return ERROR;
parser->http_major = ch - '0'; parser->http_major = ch - '0';
state = s_req_major_digit; state = s_req_http_major;
break; break;
/* major HTTP version or dot */ /* major HTTP version or dot */
case s_req_major_digit: case s_req_http_major:
{ {
if (ch == '.') { if (ch == '.') {
state = s_req_first_minor_digit; state = s_req_first_http_minor;
break; break;
} }
@ -904,14 +891,14 @@ size_t parse (http_parser *parser, const char *data, size_t len, int start_state
} }
/* first digit of minor HTTP version */ /* first digit of minor HTTP version */
case s_req_first_minor_digit: case s_req_first_http_minor:
if (ch < '0' || ch > '9') return ERROR; if (ch < '0' || ch > '9') return ERROR;
parser->http_minor = ch - '0'; parser->http_minor = ch - '0';
state = s_req_minor_digit; state = s_req_http_minor;
break; break;
/* minor HTTP version or end of request line */ /* minor HTTP version or end of request line */
case s_req_minor_digit: case s_req_http_minor:
{ {
if (ch == CR) { if (ch == CR) {
state = s_req_line_almost_done; state = s_req_line_almost_done;
@ -1083,6 +1070,14 @@ size_t parse (http_parser *parser, const char *data, size_t len, int start_state
{ {
if (ch == ' ') break; if (ch == ' ') break;
MARK(header_value);
state = s_header_value;
header_index = 0;
c = lowcase[(int)ch];
if (!c) {
if (ch == CR) { if (ch == CR) {
header_state = h_general; header_state = h_general;
state = s_header_almost_done; state = s_header_almost_done;
@ -1094,16 +1089,10 @@ size_t parse (http_parser *parser, const char *data, size_t len, int start_state
break; break;
} }
MARK(header_value);
state = s_header_value;
header_index = 0;
c = lowcase[(int)ch];
if (!c) {
header_state = h_general; header_state = h_general;
} else { break;
}
switch (header_state) { switch (header_state) {
case h_transfer_encoding: case h_transfer_encoding:
/* looking for 'Transfer-Encoding: chunked' */ /* looking for 'Transfer-Encoding: chunked' */
@ -1135,7 +1124,6 @@ size_t parse (http_parser *parser, const char *data, size_t len, int start_state
header_state = h_general; header_state = h_general;
break; break;
} }
}
break; break;
} }
@ -1219,7 +1207,8 @@ size_t parse (http_parser *parser, const char *data, size_t len, int start_state
} }
case s_header_almost_done: case s_header_almost_done:
if (ch != LF) return ERROR; {
STRICT_CHECK(ch != LF);
state = s_header_field_start; state = s_header_field_start;
@ -1237,9 +1226,11 @@ size_t parse (http_parser *parser, const char *data, size_t len, int start_state
break; break;
} }
break; break;
}
case s_headers_almost_done: case s_headers_almost_done:
if (ch != LF) return ERROR; {
STRICT_CHECK(ch != LF);
if (parser->flags & F_TRAILING) { if (parser->flags & F_TRAILING) {
/* End of a chunked request */ /* End of a chunked request */
@ -1270,6 +1261,7 @@ size_t parse (http_parser *parser, const char *data, size_t len, int start_state
} }
} }
break; break;
}
case s_body_identity: case s_body_identity:
to_read = MIN(pe - p, (ssize_t)(parser->content_length - parser->body_read)); to_read = MIN(pe - p, (ssize_t)(parser->content_length - parser->body_read));
@ -1302,6 +1294,7 @@ size_t parse (http_parser *parser, const char *data, size_t len, int start_state
break; break;
case s_chunk_size: case s_chunk_size:
{
if (ch == CR) { if (ch == CR) {
state = s_chunk_size_almost_done; state = s_chunk_size_almost_done;
break; break;
@ -1320,10 +1313,10 @@ size_t parse (http_parser *parser, const char *data, size_t len, int start_state
parser->chunk_size *= 16; parser->chunk_size *= 16;
parser->chunk_size += c; parser->chunk_size += c;
break; break;
}
case s_chunk_parameters: case s_chunk_parameters:
/* just ignore this shit */ /* just ignore this shit. TODO check for overflow */
/* TODO check for overflow */
if (ch == CR) { if (ch == CR) {
state = s_chunk_size_almost_done; state = s_chunk_size_almost_done;
break; break;
@ -1331,7 +1324,8 @@ size_t parse (http_parser *parser, const char *data, size_t len, int start_state
break; break;
case s_chunk_size_almost_done: case s_chunk_size_almost_done:
if (ch != LF) return ERROR; {
STRICT_CHECK(ch != LF);
if (parser->chunk_size == 0) { if (parser->chunk_size == 0) {
parser->flags |= F_TRAILING; parser->flags |= F_TRAILING;
state = s_header_field_start; state = s_header_field_start;
@ -1339,8 +1333,10 @@ size_t parse (http_parser *parser, const char *data, size_t len, int start_state
state = s_chunk_data; state = s_chunk_data;
} }
break; break;
}
case s_chunk_data: case s_chunk_data:
{
to_read = MIN(pe - p, (ssize_t)(parser->chunk_size)); to_read = MIN(pe - p, (ssize_t)(parser->chunk_size));
if (to_read > 0) { if (to_read > 0) {
@ -1354,14 +1350,15 @@ size_t parse (http_parser *parser, const char *data, size_t len, int start_state
parser->chunk_size -= to_read; parser->chunk_size -= to_read;
break; break;
}
case s_chunk_data_almost_done: case s_chunk_data_almost_done:
if (ch != CR) return ERROR; STRICT_CHECK(ch != CR);
state = s_chunk_data_done; state = s_chunk_data_done;
break; break;
case s_chunk_data_done: case s_chunk_data_done:
if (ch != LF) return ERROR; STRICT_CHECK(ch != LF);
state = s_chunk_size_start; state = s_chunk_size_start;
break; break;
@ -1386,14 +1383,16 @@ size_t parse (http_parser *parser, const char *data, size_t len, int start_state
} }
size_t http_parse_requests (http_parser *parser, const char *data, size_t len) size_t
http_parse_requests (http_parser *parser, const char *data, size_t len)
{ {
if (!parser->state) parser->state = s_start_req; if (!parser->state) parser->state = s_start_req;
return parse(parser, data, len, s_start_req); return parse(parser, data, len, s_start_req);
} }
size_t http_parse_responses (http_parser *parser, const char *data, size_t len) size_t
http_parse_responses (http_parser *parser, const char *data, size_t len)
{ {
if (!parser->state) parser->state = s_start_res; if (!parser->state) parser->state = s_start_res;
return parse(parser, data, len, s_start_res); return parse(parser, data, len, s_start_res);

@ -29,6 +29,15 @@ extern "C" {
#endif #endif
#include <sys/types.h> #include <sys/types.h>
/* Compile with -DHTTP_PARSER_STRICT=0 to make less checks, but run
* faster
*/
#ifndef HTTP_PARSER_STRICT
# define HTTP_PARSER_STRICT 1
#else
# define HTTP_PARSER_STRICT 0
#endif
typedef struct http_parser http_parser; typedef struct http_parser http_parser;
/* Callbacks should return non-zero to indicate an error. The parse will /* Callbacks should return non-zero to indicate an error. The parse will
@ -52,14 +61,15 @@ enum http_method
struct http_parser { struct http_parser {
/** PRIVATE **/ /** PRIVATE **/
int state; unsigned short state;
int header_state; unsigned short header_state;
size_t header_index; size_t header_index;
size_t chunk_size;
char flags; char flags;
size_t chunk_size;
ssize_t body_read; ssize_t body_read;
ssize_t content_length;
const char *header_field_mark; const char *header_field_mark;
size_t header_field_size; size_t header_field_size;
@ -77,11 +87,8 @@ struct http_parser {
/** READ-ONLY **/ /** READ-ONLY **/
unsigned short status_code; /* responses only */ unsigned short status_code; /* responses only */
enum http_method method; /* requests only */ enum http_method method; /* requests only */
unsigned short http_major;
int http_major; unsigned short http_minor;
int http_minor;
ssize_t content_length;
/** PUBLIC **/ /** PUBLIC **/
void *data; /* A pointer to get hook to the "connection" or "socket" object */ void *data; /* A pointer to get hook to the "connection" or "socket" object */

Loading…
Cancel
Save