new version

Trashing the old Ragel parser (which was based on Mongrel) because it's
proving difficult to get the control I need in end-of-message cases.
Replacing this with a hand written parser using a couple tricks borrowed
from NGINX. The new parser will be much more work to write, but should prove
faster and allow for better hacking.
event_stream
Ryan Dahl 15 years ago
parent 6bfd5bf76d
commit 433202d825

1
.gitignore vendored

@ -1,4 +1,3 @@
tags tags
*.o *.o
test test
http_parser.c

@ -1,5 +1,5 @@
#OPT=-O0 -g -Wall -Wextra -Werror OPT=-O0 -g -Wall -Wextra -Werror
OPT=-O2 #OPT=-O2
test: http_parser.o test.c test: http_parser.o test.c
gcc $(OPT) http_parser.o test.c -o $@ gcc $(OPT) http_parser.o test.c -o $@
@ -7,10 +7,7 @@ test: http_parser.o test.c
http_parser.o: http_parser.c http_parser.h Makefile http_parser.o: http_parser.c http_parser.h Makefile
gcc $(OPT) -c http_parser.c gcc $(OPT) -c http_parser.c
http_parser.c: http_parser.rl Makefile tags: http_parser.c http_parser.h test.c
ragel -s -G2 http_parser.rl -o $@
tags: http_parser.rl http_parser.h test.c
ctags $^ ctags $^
clean: clean:
@ -18,7 +15,7 @@ clean:
package: http_parser.c package: http_parser.c
@rm -rf /tmp/http_parser && mkdir /tmp/http_parser && \ @rm -rf /tmp/http_parser && mkdir /tmp/http_parser && \
cp LICENSE README.md Makefile http_parser.c http_parser.rl \ cp LICENSE README.md Makefile http_parser.c \
http_parser.h test.c /tmp/http_parser && \ http_parser.h test.c /tmp/http_parser && \
cd /tmp && \ cd /tmp && \
tar -cf http_parser.tar http_parser/ tar -cf http_parser.tar http_parser/

@ -0,0 +1,891 @@
#include <http_parser.h>
#include <stdint.h>
#include <assert.h>
#ifndef NULL
# define NULL ((void*)0)
#endif
#define MAX_FIELD_SIZE (80*1024)
#define MARK(FOR) \
do { \
parser->FOR##_mark = p; \
parser->FOR##_size = 0; \
} while (0)
#define CALLBACK(FOR) \
do { \
if (0 != FOR##_callback(parser, p)) return (p - data); \
} while (0)
#if 0
do { \
if (parser->FOR##_mark) { \
parser->FOR##_size += p - parser->FOR##_mark; \
if (parser->FOR##_size > MAX_FIELD_SIZE) { \
return ERROR; \
} \
if (parser->on_##FOR) { \
if (0 != parser->on_##FOR(parser, \
parser->FOR##_mark, \
p - parser->FOR##_mark)) \
{ \
return ERROR; \
} \
} \
} \
} while(0)
#endif
static inline int uri_callback (http_parser *parser, const char *p)
{
assert(parser->uri_mark);
const char *mark = parser->uri_mark;
parser->uri_size += p - mark;
if (parser->uri_size > MAX_FIELD_SIZE) return -1;
if (parser->on_uri == NULL) return 0;
return parser->on_uri(parser, mark, p - mark);
}
static inline int path_callback (http_parser *parser, const char *p)
{
assert(parser->path_mark);
const char *mark = parser->path_mark;
parser->path_size += p - mark;
if (parser->path_size > MAX_FIELD_SIZE) return -1;
if (parser->on_path == NULL) return 0;
return parser->on_path(parser, mark, p - mark);
}
static inline int query_string_callback (http_parser *parser, const char *p)
{
assert(parser->query_string_mark);
const char *mark = parser->query_string_mark;
parser->query_string_size += p - mark;
if (parser->query_string_size > MAX_FIELD_SIZE) return -1;
if (parser->on_query_string == NULL) return 0;
return parser->on_query_string(parser, mark, p - mark);
}
static inline int fragment_callback (http_parser *parser, const char *p)
{
assert(parser->fragment_mark);
const char *mark = parser->fragment_mark;
parser->fragment_size += p - mark;
if (parser->fragment_size > MAX_FIELD_SIZE) return -1;
if (parser->on_fragment == NULL) return 0;
return parser->on_fragment(parser, mark, p - mark);
}
static inline int header_field_callback (http_parser *parser, const char *p)
{
assert(parser->header_field_mark);
const char *mark = parser->header_field_mark;
parser->header_field_size += p - mark;
if (parser->header_field_size > MAX_FIELD_SIZE) return -1;
if (parser->on_header_field == NULL) return 0;
return parser->on_header_field(parser, mark, p - mark);
}
static inline int header_value_callback (http_parser *parser, const char *p)
{
assert(parser->header_value_mark);
const char *mark = parser->header_value_mark;
parser->header_value_size += p - mark;
if (parser->header_value_size > MAX_FIELD_SIZE) return -1;
if (parser->on_header_value == NULL) return 0;
return parser->on_header_value(parser, mark, p - mark);
}
#define CONNECTION "connection"
#define CONTENT_LENGTH "content-length"
#define TRANSFER_ENCODING "transfer-encoding"
static const unsigned char lowcase[] =
"\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
"\0\0\0\0\0\0\0\0\0\0\0\0\0-\0\0" "0123456789\0\0\0\0\0\0"
"\0abcdefghijklmnopqrstuvwxyz\0\0\0\0\0"
"\0abcdefghijklmnopqrstuvwxyz\0\0\0\0\0"
"\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
"\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
"\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
"\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0";
static const uint32_t usual[] = {
0xffffdbfe, /* 1111 1111 1111 1111 1101 1011 1111 1110 */
/* ?>=< ;:98 7654 3210 /.-, +*)( '&%$ #"! */
0x7fff37d6, /* 0111 1111 1111 1111 1111 1111 1111 0110 */
/* _^]\ [ZYX WVUT SRQP ONML KJIH GFED CBA@ */
0xffffffff, /* 1111 1111 1111 1111 1111 1111 1111 1111 */
/* ~}| {zyx wvut srqp onml kjih gfed cba` */
0xffffffff, /* 1111 1111 1111 1111 1111 1111 1111 1111 */
0xffffffff, /* 1111 1111 1111 1111 1111 1111 1111 1111 */
0xffffffff, /* 1111 1111 1111 1111 1111 1111 1111 1111 */
0xffffffff, /* 1111 1111 1111 1111 1111 1111 1111 1111 */
0xffffffff /* 1111 1111 1111 1111 1111 1111 1111 1111 */
};
enum state
{ s_start = 0
, s_method_G
, s_method_GE
, s_method_P
, s_method_PU
, s_method_PO
, s_method_POS
, s_method_H
, s_method_HE
, s_method_HEA
, s_method_D
, s_method_DE
, s_method_DEL
, s_method_DELE
, s_method_DELET
, s_spaces_before_uri
, s_schema
, s_schema_slash
, s_schema_slash_slash
, s_host
, s_port
, s_path
, s_query_string
, s_fragment
, s_http_start
, s_http_H
, s_http_HT
, s_http_HTT
, s_http_HTTP
, s_first_major_digit
, s_major_digit
, s_first_minor_digit
, s_minor_digit
, s_req_line_almost_done
, s_header_field_start
, s_header_field
, s_header_value_start
, s_header_value
, s_header_almost_done
, s_headers_almost_done
, s_headers_done
};
enum header_states
{ h_general = 0
, h_C
, h_CO
, h_CON
, h_matching_connection
, h_matching_content_length
, h_matching_transfer_encoding
, h_connection
, h_content_length
, h_transfer_encoding
, h_encoding_C
, h_connection_K
, h_connection_C
};
#define ERROR (p - data);
#define CR '\r'
#define LF '\n'
#define LOWER(c) (unsigned char)(c | 0x20)
size_t http_parser_execute (http_parser *parser, const char *data, size_t len)
{
char c, ch;
const char *p, *pe;
enum state state = parser->state;
enum header_states header_state = parser->header_state;
size_t header_index = parser->header_index;
if (parser->header_field_mark) parser->header_field_mark = data;
if (parser->header_value_mark) parser->header_value_mark = data;
if (parser->fragment_mark) parser->fragment_mark = data;
if (parser->query_string_mark) parser->query_string_mark = data;
if (parser->path_mark) parser->path_mark = data;
if (parser->uri_mark) parser->uri_mark = data;
for (p=data, pe=data+len; p != pe; p++) {
ch = *p;
switch (state) {
case s_start:
{
switch (ch) {
/* GET */
case 'G':
state = s_method_G;
break;
/* POST, PUT */
case 'P':
state = s_method_P;
break;
/* HEAD */
case 'H':
state = s_method_H;
break;
/* DELETE */
case 'D':
state = s_method_D;
break;
case CR:
case LF:
break;
default:
return ERROR;
}
break;
}
/* GET */
case s_method_G:
if (ch != 'E') return ERROR;
state = s_method_GE;
break;
case s_method_GE:
if (ch != 'T') return ERROR;
parser->method = HTTP_GET;
state = s_spaces_before_uri;
break;
/* HEAD */
case s_method_H:
if (ch != 'E') return ERROR;
state = s_method_HE;
break;
case s_method_HE:
if (ch != 'A') return ERROR;
state = s_method_HEA;
break;
case s_method_HEA:
if (ch != 'D') return ERROR;
parser->method = HTTP_HEAD;
state = s_spaces_before_uri;
break;
/* POST, PUT */
case s_method_P:
switch (ch) {
case 'O':
state = s_method_PO;
break;
case 'U':
state = s_method_PU;
break;
default:
return ERROR;
}
break;
/* PUT */
case s_method_PU:
if (ch != 'T') return ERROR;
parser->method = HTTP_PUT;
state = s_spaces_before_uri;
break;
/* POST */
case s_method_PO:
if (ch != 'S') return ERROR;
state = s_method_POS;
break;
case s_method_POS:
if (ch != 'T') return ERROR;
parser->method = HTTP_POST;
state = s_spaces_before_uri;
break;
/* DELETE */
case s_method_D:
if (ch != 'E') return ERROR;
state = s_method_DE;
break;
case s_method_DE:
if (ch != 'L') return ERROR;
state = s_method_DEL;
break;
case s_method_DEL:
if (ch != 'E') return ERROR;
state = s_method_DELE;
break;
case s_method_DELE:
if (ch != 'T') return ERROR;
state = s_method_DELET;
break;
case s_method_DELET:
if (ch != 'E') return ERROR;
parser->method = HTTP_DELETE;
state = s_spaces_before_uri;
break;
case s_spaces_before_uri:
{
if (ch == ' ') break;
if (ch == '/') {
MARK(uri);
MARK(path);
state = s_path;
break;
}
c = LOWER(ch);
if (c >= 'a' && c <= 'z') {
MARK(uri);
state = s_schema;
break;
}
return ERROR;
}
case s_schema:
{
c = LOWER(ch);
if (c >= 'a' && c <= 'z') break;
if (ch == ':') {
state = s_schema_slash;
break;
}
return ERROR;
}
case s_schema_slash:
if (ch != '/') return ERROR;
state = s_schema_slash_slash;
break;
case s_schema_slash_slash:
if (ch != '/') return ERROR;
state = s_host;
break;
case s_host:
{
c = LOWER(ch);
if (c >= 'a' && c <= 'z') break;
if ((ch >= '0' && ch <= '9') || ch == '.' || ch == '-') break;
switch (ch) {
case ':':
state = s_port;
break;
case '/':
MARK(path);
state = s_path;
break;
case ' ':
/* The request line looks like:
* "GET http://foo.bar.com HTTP/1.1"
* That is, there is no path.
*/
CALLBACK(uri);
state = s_http_start;
break;
default:
return ERROR;
}
break;
}
case s_port:
{
if (ch >= '0' && ch <= '9') break;
switch (ch) {
case '/':
MARK(path);
state = s_path;
break;
case ' ':
/* The request line looks like:
* "GET http://foo.bar.com:1234 HTTP/1.1"
* That is, there is no path.
*/
CALLBACK(uri);
state = s_http_start;
break;
default:
return ERROR;
}
break;
}
case s_path:
{
if (usual[ch >> 5] & (1 << (ch & 0x1f))) break;
switch (ch) {
case ' ':
CALLBACK(uri);
CALLBACK(path);
state = s_http_start;
break;
case CR:
CALLBACK(uri);
CALLBACK(path);
parser->http_minor = 9;
state = s_req_line_almost_done;
break;
case LF:
CALLBACK(uri);
CALLBACK(path);
parser->http_minor = 9;
state = s_header_field_start;
break;
case '?':
CALLBACK(path);
MARK(query_string);
state = s_query_string;
break;
case '#':
CALLBACK(path);
MARK(fragment);
state = s_fragment;
break;
default:
return ERROR;
}
break;
}
case s_query_string:
{
if (usual[ch >> 5] & (1 << (ch & 0x1f))) break;
switch (ch) {
case ' ':
CALLBACK(uri);
CALLBACK(query_string);
state = s_http_start;
break;
case CR:
CALLBACK(uri);
CALLBACK(query_string);
parser->http_minor = 9;
state = s_req_line_almost_done;
break;
case LF:
CALLBACK(uri);
CALLBACK(query_string);
parser->http_minor = 9;
state = s_header_field_start;
break;
case '#':
CALLBACK(query_string);
MARK(fragment);
state = s_fragment;
break;
default:
return ERROR;
}
break;
}
case s_fragment:
{
if (usual[ch >> 5] & (1 << (ch & 0x1f))) break;
switch (ch) {
case ' ':
CALLBACK(uri);
CALLBACK(fragment);
state = s_http_start;
break;
case CR:
CALLBACK(uri);
CALLBACK(fragment);
parser->http_minor = 9;
state = s_req_line_almost_done;
break;
case LF:
CALLBACK(uri);
CALLBACK(fragment);
parser->http_minor = 9;
state = s_header_field_start;
break;
case '?':
case '#':
break;
default:
return ERROR;
}
break;
}
case s_http_start:
switch (ch) {
case 'H':
state = s_http_H;
break;
case ' ':
break;
default:
return ERROR;
}
break;
case s_http_H:
if (ch != 'T') return ERROR;
state = s_http_HT;
break;
case s_http_HT:
if (ch != 'T') return ERROR;
state = s_http_HTT;
break;
case s_http_HTT:
if (ch != 'P') return ERROR;
state = s_http_HTTP;
break;
case s_http_HTTP:
if (ch != '/') return ERROR;
state = s_first_major_digit;
break;
/* first digit of major HTTP version */
case s_first_major_digit:
if (ch < '1' || ch > '9') return ERROR;
parser->http_major = ch - '0';
state = s_major_digit;
break;
/* major HTTP version or dot */
case s_major_digit:
{
if (ch == '.') {
state = s_first_minor_digit;
break;
}
if (ch < '0' || ch > '9') return ERROR;
parser->http_major *= 10;
parser->http_major += ch - '0';
if (parser->http_major > 999) return ERROR;
break;
}
/* first digit of minor HTTP version */
case s_first_minor_digit:
if (ch < '0' || ch > '9') return ERROR;
parser->http_minor = ch - '0';
state = s_minor_digit;
break;
/* minor HTTP version or end of request line */
case s_minor_digit:
{
if (ch == CR) {
state = s_req_line_almost_done;
break;
}
if (ch == LF) {
state = s_header_field_start;
break;
}
/* XXX allow spaces after digit? */
if (ch < '0' || ch > '9') return ERROR;
parser->http_minor *= 10;
parser->http_minor += ch - '0';
if (parser->http_minor > 999) return ERROR;
break;
}
/* end of request line */
case s_req_line_almost_done:
{
if (ch != LF) return ERROR;
state = s_header_field_start;
break;
}
case s_header_field_start:
{
if (ch == CR) {
state = s_headers_almost_done;
break;
}
if (ch == LF) {
state = s_headers_done;
break;
}
c = LOWER(ch);
if (c < 'a' || 'z' < c) return ERROR;
MARK(header_field);
header_index = 0;
state = s_header_field;
switch (c) {
case 'c':
header_state = h_C;
break;
case 't':
header_state = h_matching_transfer_encoding;
break;
default:
header_state = h_general;
break;
}
break;
}
case s_header_field:
{
header_index++;
c = lowcase[(int)ch];
if (c) {
switch (header_state) {
case h_general:
break;
case h_C:
header_state = (c == 'o' ? h_CO : h_general);
break;
case h_CO:
header_state = (c == 'n' ? h_CON : h_general);
break;
case h_CON:
switch (c) {
case 'n':
header_state = h_matching_connection;
break;
case 't':
header_state = h_matching_content_length;
break;
default:
header_state = h_general;
break;
}
break;
/* connection */
case h_matching_connection:
if (header_index > sizeof(CONNECTION)-1
|| c != CONNECTION[header_index]) {
header_state = h_general;
} else if (header_index == sizeof(CONNECTION)-1) {
header_state = h_connection;
}
break;
/* content-length */
case h_matching_content_length:
if (header_index > sizeof(CONTENT_LENGTH)-1
|| c != CONTENT_LENGTH[header_index]) {
header_state = h_general;
} else if (header_index == sizeof(CONTENT_LENGTH)-1) {
header_state = h_content_length;
}
break;
/* transfer-encoding */
case h_matching_transfer_encoding:
if (header_index > sizeof(TRANSFER_ENCODING)-1
|| c != TRANSFER_ENCODING[header_index]) {
header_state = h_general;
} else if (header_index == sizeof(TRANSFER_ENCODING)-1) {
header_state = h_transfer_encoding;
}
break;
default:
assert(0 && "Unknown header_state");
break;
}
}
if (ch == ':') {
CALLBACK(header_field);
state = s_header_value_start;
break;
}
if (ch == CR) {
state = s_header_almost_done;
CALLBACK(header_field);
break;
}
if (ch == LF) {
CALLBACK(header_field);
state = s_header_field_start;
break;
}
return ERROR;
}
case s_header_value_start:
{
if (ch == ' ') break;
MARK(header_value);
if (ch == CR) {
header_state = h_general;
state = s_header_almost_done;
}
if (ch == LF) {
header_state = h_general;
state = s_headers_done;
}
c = lowcase[(int)ch];
if (!c) return ERROR;
switch (header_state) {
case h_transfer_encoding:
/* looking for 'Transfer-Encoding: chunked' */
if ('c' == c) {
header_state = h_encoding_C;
} else {
header_state = h_general;
}
break;
case h_content_length:
if (ch < '0' || ch > '9') return ERROR;
parser->content_length = ch - '0';
break;
case h_connection:
/* looking for 'Connection: keep-alive' */
if (c == 'k') {
header_state = h_connection_K;
/* looking for 'Connection: close' */
} else if (c == 'c') {
header_state = h_connection_C;
} else {
header_state = h_general;
}
break;
default:
state = s_header_value;
header_state = h_general;
break;
}
break;
}
case s_header_value:
{
break;
}
case s_header_almost_done:
if (ch != LF) return ERROR;
state = s_header_field_start;
break;
default:
assert(0 && "unhandled state");
return ERROR;
}
}
CALLBACK(header_field);
CALLBACK(header_value);
CALLBACK(fragment);
CALLBACK(query_string);
CALLBACK(path);
CALLBACK(uri);
parser->state = state;
parser->header_state = header_state;
parser->header_index = header_index;
return len;
}
void
http_parser_init (http_parser *parser, enum http_parser_type type)
{
if (type == HTTP_REQUEST) {
parser->state = s_start;
} else {
assert(0 && "responses not supported yet");
}
parser->on_message_begin = NULL;
parser->on_path = NULL;
parser->on_query_string = NULL;
parser->on_uri = NULL;
parser->on_fragment = NULL;
parser->on_header_field = NULL;
parser->on_header_value = NULL;
parser->on_headers_complete = NULL;
parser->on_body = NULL;
parser->on_message_complete = NULL;
}

@ -1,63 +1,3 @@
/*
Mongrel Web Server (Mongrel) is copyrighted free software by Zed A. Shaw
<zedshaw at zedshaw dot com> and contributors.
This source file is based on Mongrel's parser. Changes by Ryan Dahl
<ry@tinyclouds.org> in 2008 and 2009.
You can redistribute it and/or modify it under either the terms of the GPL2
or the conditions below:
1. You may make and give away verbatim copies of the source form of the
software without restriction, provided that you duplicate all of the
original copyright notices and associated disclaimers.
2. You may modify your copy of the software in any way, provided that
you do at least ONE of the following:
a) place your modifications in the Public Domain or otherwise make them
Freely Available, such as by posting said modifications to Usenet or an
equivalent medium, or by allowing the author to include your
modifications in the software.
b) use the modified software only within your corporation or
organization.
c) rename any non-standard executables so the names do not conflict with
standard executables, which must also be provided.
d) make other distribution arrangements with the author.
3. You may distribute the software in object code or executable
form, provided that you do at least ONE of the following:
a) distribute the executables and library files of the software,
together with instructions (in the manual page or equivalent) on where
to get the original distribution.
b) accompany the distribution with the machine-readable source of the
software.
c) give non-standard executables non-standard names, with
instructions on where to get the original software distribution.
d) make other distribution arrangements with the author.
4. You may modify and include the part of the software into any other
software (possibly commercial). But some files in the distribution
are not written by the author, so that they are not under this terms.
5. The scripts and library files supplied as input to or produced as
output from the software do not automatically fall under the
copyright of the software, but belong to whomever generated them,
and may be sold commercially, and may be aggregated with this
software.
6. THIS SOFTWARE IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR
IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
PURPOSE.
*/
#ifndef http_parser_h #ifndef http_parser_h
#define http_parser_h #define http_parser_h
#ifdef __cplusplus #ifdef __cplusplus
@ -83,34 +23,20 @@ typedef int (*http_cb) (http_parser*);
/* Request Methods */ /* Request Methods */
enum http_method enum http_method
{ HTTP_COPY = 0x0001 { HTTP_DELETE = 0x0002
, HTTP_DELETE = 0x0002
, HTTP_GET = 0x0004 , HTTP_GET = 0x0004
, HTTP_HEAD = 0x0008 , HTTP_HEAD = 0x0008
, HTTP_LOCK = 0x0010
, HTTP_MKCOL = 0x0020
, HTTP_MOVE = 0x0040
, HTTP_OPTIONS = 0x0080
, HTTP_POST = 0x0100 , HTTP_POST = 0x0100
, HTTP_PROPFIND = 0x0200
, HTTP_PROPPATCH = 0x0400
, HTTP_PUT = 0x0800 , HTTP_PUT = 0x0800
, HTTP_TRACE = 0x1000
, HTTP_UNLOCK = 0x2000
}; };
enum http_parser_type { HTTP_REQUEST, HTTP_RESPONSE }; enum http_parser_type { HTTP_REQUEST, HTTP_RESPONSE };
enum http_version
{ HTTP_VERSION_OTHER = 0x00
, HTTP_VERSION_11 = 0x01
, HTTP_VERSION_10 = 0x02
, HTTP_VERSION_09 = 0x04
};
struct http_parser { struct http_parser {
/** PRIVATE **/ /** PRIVATE **/
int cs; int state;
int header_state;
size_t header_index;
enum http_parser_type type; enum http_parser_type type;
size_t chunk_size; size_t chunk_size;
@ -134,7 +60,10 @@ struct http_parser {
/** READ-ONLY **/ /** READ-ONLY **/
unsigned short status_code; /* responses only */ unsigned short status_code; /* responses only */
enum http_method method; /* requests only */ enum http_method method; /* requests only */
enum http_version version;
int http_major;
int http_minor;
short keep_alive; short keep_alive;
ssize_t content_length; ssize_t content_length;
@ -163,14 +92,16 @@ struct http_parser {
*/ */
void http_parser_init (http_parser *parser, enum http_parser_type); void http_parser_init (http_parser *parser, enum http_parser_type);
void http_parser_execute (http_parser *parser, const char *data, size_t len); size_t http_parser_execute (http_parser *parser, const char *data, size_t len);
/*
int http_parser_has_error (http_parser *parser); int http_parser_has_error (http_parser *parser);
*/
static inline int static inline int
http_parser_should_keep_alive (http_parser *parser) http_parser_should_keep_alive (http_parser *parser)
{ {
if (parser->keep_alive == -1) return (parser->version == HTTP_VERSION_11); if (parser->keep_alive == -1) return (parser->http_major == 1 && parser->http_minor == 1);
return parser->keep_alive; return parser->keep_alive;
} }

@ -1,536 +0,0 @@
/*
Mongrel Web Server (Mongrel) is copyrighted free software by Zed A. Shaw
<zedshaw at zedshaw dot com> and contributors.
This source file is based on Mongrel's parser. Changes by Ryan Dahl
<ry@tinyclouds.org> in 2008 and 2009.
You can redistribute it and/or modify it under either the terms of the GPL2
or the conditions below:
1. You may make and give away verbatim copies of the source form of the
software without restriction, provided that you duplicate all of the
original copyright notices and associated disclaimers.
2. You may modify your copy of the software in any way, provided that
you do at least ONE of the following:
a) place your modifications in the Public Domain or otherwise make them
Freely Available, such as by posting said modifications to Usenet or an
equivalent medium, or by allowing the author to include your
modifications in the software.
b) use the modified software only within your corporation or
organization.
c) rename any non-standard executables so the names do not conflict with
standard executables, which must also be provided.
d) make other distribution arrangements with the author.
3. You may distribute the software in object code or executable
form, provided that you do at least ONE of the following:
a) distribute the executables and library files of the software,
together with instructions (in the manual page or equivalent) on where
to get the original distribution.
b) accompany the distribution with the machine-readable source of the
software.
c) give non-standard executables non-standard names, with
instructions on where to get the original software distribution.
d) make other distribution arrangements with the author.
4. You may modify and include the part of the software into any other
software (possibly commercial). But some files in the distribution
are not written by the author, so that they are not under this terms.
5. The scripts and library files supplied as input to or produced as
output from the software do not automatically fall under the
copyright of the software, but belong to whomever generated them,
and may be sold commercially, and may be aggregated with this
software.
6. THIS SOFTWARE IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR
IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
PURPOSE.
*/
#include "http_parser.h"
#include <limits.h>
#include <assert.h>
/* parser->flags */
#define EATING 0x01
#define ERROR 0x02
#define CHUNKED 0x04
#define EAT_FOREVER 0x10
static int unhex[] = {-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,-1,-1,-1,-1,-1,-1
,-1,10,11,12,13,14,15,-1,-1,-1,-1,-1,-1,-1,-1,-1
,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
,-1,10,11,12,13,14,15,-1,-1,-1,-1,-1,-1,-1,-1,-1
,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
};
#undef MIN
#define MIN(a,b) ((a) < (b) ? (a) : (b))
#undef NULL
#define NULL ((void*)(0))
#define MAX_FIELD_SIZE (80*1024)
#define REMAINING (unsigned long)(pe - p)
#define CALLBACK(FOR) \
do { \
if (parser->FOR##_mark) { \
parser->FOR##_size += p - parser->FOR##_mark; \
if (parser->FOR##_size > MAX_FIELD_SIZE) { \
parser->flags |= ERROR; \
return; \
} \
if (parser->on_##FOR) { \
callback_return_value = parser->on_##FOR(parser, \
parser->FOR##_mark, \
p - parser->FOR##_mark); \
} \
if (callback_return_value != 0) { \
parser->flags |= ERROR; \
return; \
} \
} \
} while(0)
#define RESET_PARSER(parser) \
parser->chunk_size = 0; \
parser->flags = 0; \
parser->header_field_mark = NULL; \
parser->header_value_mark = NULL; \
parser->query_string_mark = NULL; \
parser->path_mark = NULL; \
parser->uri_mark = NULL; \
parser->fragment_mark = NULL; \
parser->status_code = 0; \
parser->method = 0; \
parser->version = HTTP_VERSION_OTHER; \
parser->keep_alive = -1; \
parser->content_length = -1; \
parser->body_read = 0
#define END_REQUEST \
do { \
if (parser->on_message_complete) { \
callback_return_value = \
parser->on_message_complete(parser); \
} \
RESET_PARSER(parser); \
} while (0)
#define SKIP_BODY(nskip) \
do { \
tmp = (nskip); \
if (parser->on_body && tmp > 0) { \
callback_return_value = parser->on_body(parser, p, tmp); \
} \
if (callback_return_value == 0) { \
p += tmp; \
parser->body_read += tmp; \
parser->chunk_size -= tmp; \
if (0 == parser->chunk_size) { \
parser->flags &= ~EATING; \
if (!(parser->flags & CHUNKED)) { \
END_REQUEST; \
} \
} else { \
parser->flags |= EATING; \
} \
} \
} while (0)
%%{
machine http_parser;
action mark_header_field {
parser->header_field_mark = p;
parser->header_field_size = 0;
}
action mark_header_value {
parser->header_value_mark = p;
parser->header_value_size = 0;
}
action mark_fragment {
parser->fragment_mark = p;
parser->fragment_size = 0;
}
action mark_query_string {
parser->query_string_mark = p;
parser->query_string_size = 0;
}
action mark_request_path {
parser->path_mark = p;
parser->path_size = 0;
}
action mark_request_uri {
parser->uri_mark = p;
parser->uri_size = 0;
}
action header_field {
CALLBACK(header_field);
parser->header_field_mark = NULL;
parser->header_field_size = 0;
}
action header_value {
CALLBACK(header_value);
parser->header_value_mark = NULL;
parser->header_value_size = 0;
}
action request_uri {
CALLBACK(uri);
parser->uri_mark = NULL;
parser->uri_size = 0;
}
action fragment {
CALLBACK(fragment);
parser->fragment_mark = NULL;
parser->fragment_size = 0;
}
action query_string {
CALLBACK(query_string);
parser->query_string_mark = NULL;
parser->query_string_size = 0;
}
action request_path {
CALLBACK(path);
parser->path_mark = NULL;
parser->path_size = 0;
}
action headers_complete {
if(parser->on_headers_complete) {
callback_return_value = parser->on_headers_complete(parser);
if (callback_return_value != 0) {
parser->flags |= ERROR;
return;
}
}
}
action begin_message {
if(parser->on_message_begin) {
callback_return_value = parser->on_message_begin(parser);
if (callback_return_value != 0) {
parser->flags |= ERROR;
return;
}
}
}
action content_length {
if (parser->content_length == -1) parser->content_length = 0;
if (parser->content_length > INT_MAX) {
parser->flags |= ERROR;
return;
}
parser->content_length *= 10;
parser->content_length += *p - '0';
}
action status_code {
parser->status_code *= 10;
parser->status_code += *p - '0';
}
action use_chunked_encoding { parser->flags |= CHUNKED; }
action set_keep_alive { parser->keep_alive = 1; }
action set_not_keep_alive { parser->keep_alive = 0; }
action version_11 { parser->version = HTTP_VERSION_11; }
action version_10 { parser->version = HTTP_VERSION_10; }
action version_09 { parser->version = HTTP_VERSION_09; }
action add_to_chunk_size {
parser->chunk_size *= 16;
parser->chunk_size += unhex[(int)*p];
}
action skip_chunk_data {
SKIP_BODY(MIN(parser->chunk_size, REMAINING));
if (callback_return_value != 0) {
parser->flags |= ERROR;
return;
}
fhold;
if (parser->chunk_size > REMAINING) {
fbreak;
} else {
fgoto chunk_end;
}
}
action end_chunked_body {
END_REQUEST;
if (parser->type == HTTP_REQUEST) {
fnext Requests;
} else {
fnext Responses;
}
}
action body_logic {
if (parser->flags & CHUNKED) {
fnext ChunkedBody;
} else {
/* this is pretty stupid. i'd prefer to combine this with
* skip_chunk_data */
if (parser->content_length < 0) {
/* If we didn't get a content length; if not keep-alive
* just read body until EOF */
if (!http_parser_should_keep_alive(parser)) {
parser->flags |= EAT_FOREVER;
parser->chunk_size = REMAINING;
} else {
/* Otherwise, if keep-alive, then assume the message
* has no body. */
parser->chunk_size = parser->content_length = 0;
}
} else {
parser->chunk_size = parser->content_length;
}
p += 1;
SKIP_BODY(MIN(REMAINING, parser->chunk_size));
if (callback_return_value != 0) {
parser->flags |= ERROR;
return;
}
fhold;
if(parser->chunk_size > REMAINING) {
fbreak;
}
}
}
CRLF = "\r\n";
# character types
CTL = (cntrl | 127);
safe = ("$" | "-" | "_" | ".");
extra = ("!" | "*" | "'" | "(" | ")" | ",");
reserved = (";" | "/" | "?" | ":" | "@" | "&" | "=" | "+");
unsafe = (CTL | " " | "\"" | "#" | "%" | "<" | ">");
national = any -- (alpha | digit | reserved | extra | safe | unsafe);
unreserved = (alpha | digit | safe | extra | national);
escape = ("%" xdigit xdigit);
uchar = (unreserved | escape | "\"");
pchar = (uchar | ":" | "@" | "&" | "=" | "+");
tspecials = ("(" | ")" | "<" | ">" | "@" | "," | ";" | ":" | "\\" | "\""
| "/" | "[" | "]" | "?" | "=" | "{" | "}" | " " | "\t");
# elements
token = (ascii -- (CTL | tspecials));
quote = "\"";
# qdtext = token -- "\"";
# quoted_pair = "\" ascii;
# quoted_string = "\"" (qdtext | quoted_pair )* "\"";
# headers
Method = ( "COPY" %{ parser->method = HTTP_COPY; }
| "DELETE" %{ parser->method = HTTP_DELETE; }
| "GET" %{ parser->method = HTTP_GET; }
| "HEAD" %{ parser->method = HTTP_HEAD; }
| "LOCK" %{ parser->method = HTTP_LOCK; }
| "MKCOL" %{ parser->method = HTTP_MKCOL; }
| "MOVE" %{ parser->method = HTTP_MOVE; }
| "OPTIONS" %{ parser->method = HTTP_OPTIONS; }
| "POST" %{ parser->method = HTTP_POST; }
| "PROPFIND" %{ parser->method = HTTP_PROPFIND; }
| "PROPPATCH" %{ parser->method = HTTP_PROPPATCH; }
| "PUT" %{ parser->method = HTTP_PUT; }
| "TRACE" %{ parser->method = HTTP_TRACE; }
| "UNLOCK" %{ parser->method = HTTP_UNLOCK; }
); # Not allowing extension methods
HTTP_Version = "HTTP/" ( "1.1" %version_11
| "1.0" %version_10
| "0.9" %version_09
| (digit "." digit)
);
scheme = ( alpha | digit | "+" | "-" | "." )* ;
absolute_uri = (scheme ":" (uchar | reserved )*);
path = ( pchar+ ( "/" pchar* )* ) ;
query = ( uchar | reserved )* >mark_query_string %query_string ;
param = ( pchar | "/" )* ;
params = ( param ( ";" param )* ) ;
rel_path = ( path? (";" params)? ) ;
absolute_path = ( "/"+ rel_path ) >mark_request_path %request_path ("?" query)?;
Request_URI = ( "*" | absolute_uri | absolute_path ) >mark_request_uri %request_uri;
Fragment = ( uchar | reserved )* >mark_fragment %fragment;
field_name = ( token -- ":" )+;
Field_Name = field_name >mark_header_field %header_field;
field_value = ((any - " ") any*)?;
Field_Value = field_value >mark_header_value %header_value;
hsep = ":" " "*;
header = (field_name hsep field_value) :> CRLF;
Header = ( ("Content-Length"i hsep digit+ $content_length)
| ("Connection"i hsep
( "Keep-Alive"i %set_keep_alive
| "close"i %set_not_keep_alive
)
)
| ("Transfer-Encoding"i hsep "chunked"i %use_chunked_encoding)
| (Field_Name hsep Field_Value)
) :> CRLF;
Headers = (Header)* :> CRLF @headers_complete;
Request_Line = ( Method " " Request_URI ("#" Fragment)? " " HTTP_Version CRLF ) ;
StatusCode = (digit digit digit) $status_code;
ReasonPhrase = ascii* -- ("\r" | "\n");
StatusLine = HTTP_Version " " StatusCode (" " ReasonPhrase)? CRLF;
# chunked message
trailing_headers = header*;
#chunk_ext_val = token | quoted_string;
chunk_ext_val = token*;
chunk_ext_name = token*;
chunk_extension = ( ";" " "* chunk_ext_name ("=" chunk_ext_val)? )*;
last_chunk = "0"+ ( chunk_extension | " "+) CRLF;
chunk_size = (xdigit* [1-9a-fA-F] xdigit* ) $add_to_chunk_size;
chunk_end = CRLF;
chunk_body = any >skip_chunk_data;
chunk_begin = chunk_size ( chunk_extension | " "+ ) CRLF;
chunk = chunk_begin chunk_body chunk_end;
ChunkedBody := chunk* last_chunk trailing_headers CRLF @end_chunked_body;
Request = (Request_Line Headers) >begin_message @body_logic;
Response = (StatusLine Headers) >begin_message @body_logic;
Requests := Request*;
Responses := Response*;
main := any >{
fhold;
if (parser->type == HTTP_REQUEST) {
fgoto Requests;
} else {
fgoto Responses;
}
};
}%%
%% write data;
void
http_parser_init (http_parser *parser, enum http_parser_type type)
{
int cs = 0;
%% write init;
parser->cs = cs;
parser->type = type;
parser->on_message_begin = NULL;
parser->on_path = NULL;
parser->on_query_string = NULL;
parser->on_uri = NULL;
parser->on_fragment = NULL;
parser->on_header_field = NULL;
parser->on_header_value = NULL;
parser->on_headers_complete = NULL;
parser->on_body = NULL;
parser->on_message_complete = NULL;
RESET_PARSER(parser);
}
/** exec **/
void
http_parser_execute (http_parser *parser, const char *buffer, size_t len)
{
size_t tmp; // REMOVE ME this is extremely hacky
int callback_return_value = 0;
const char *p, *pe, *eof;
int cs = parser->cs;
p = buffer;
pe = buffer+len;
eof = len ? NULL : pe;
if (parser->flags & EAT_FOREVER) {
if (len == 0) {
if (parser->on_message_complete) {
callback_return_value = parser->on_message_complete(parser);
if (callback_return_value != 0) parser->flags |= ERROR;
}
} else {
if (parser->on_body) {
callback_return_value = parser->on_body(parser, p, len);
if (callback_return_value != 0) parser->flags |= ERROR;
}
}
return;
}
if (0 < parser->chunk_size && (parser->flags & EATING)) {
/* eat body */
SKIP_BODY(MIN(len, parser->chunk_size));
if (callback_return_value != 0) {
parser->flags |= ERROR;
return;
}
}
if (parser->header_field_mark) parser->header_field_mark = buffer;
if (parser->header_value_mark) parser->header_value_mark = buffer;
if (parser->fragment_mark) parser->fragment_mark = buffer;
if (parser->query_string_mark) parser->query_string_mark = buffer;
if (parser->path_mark) parser->path_mark = buffer;
if (parser->uri_mark) parser->uri_mark = buffer;
%% write exec;
parser->cs = cs;
CALLBACK(header_field);
CALLBACK(header_value);
CALLBACK(fragment);
CALLBACK(query_string);
CALLBACK(path);
CALLBACK(uri);
assert(p <= pe && "buffer overflow after parsing execute");
}
int
http_parser_has_error (http_parser *parser)
{
if (parser->flags & ERROR) return 1;
return parser->cs == http_parser_error;
}

109
test.c

@ -653,10 +653,8 @@ parse_messages (int message_count, const struct message *input_messages[])
parser_init(HTTP_REQUEST); parser_init(HTTP_REQUEST);
http_parser_execute(&parser, total, length); http_parser_execute(&parser, total, length);
assert(!http_parser_has_error(&parser));
http_parser_execute(&parser, NULL, 0); http_parser_execute(&parser, NULL, 0);
assert(!http_parser_has_error(&parser));
assert(num_messages == message_count); assert(num_messages == message_count);
@ -665,32 +663,86 @@ parse_messages (int message_count, const struct message *input_messages[])
} }
} }
static void
print_error (const struct message *message, size_t error_location)
{
printf("\n*** parse error on '%s' ***\n\n", message->name);
int this_line = 0, char_len = 0;
size_t i, j, len = strlen(message->raw), error_location_line = 0;
for (i = 0; i < len; i++) {
if (i == error_location) this_line = 1;
switch (message->raw[i]) {
case '\r':
char_len = 2;
printf("\\r");
break;
case '\n':
char_len = 2;
printf("\\n\n");
if (this_line) {
for (j = 0; j < error_location_line; j++) {
putchar(' ');
}
printf("^\n\nerror location: %d\n", error_location);
return;
}
error_location_line = 0;
continue;
default:
char_len = 1;
putchar(message->raw[i]);
break;
}
if (!this_line) error_location_line += char_len;
}
}
void void
test_message (const struct message *message) test_message (const struct message *message)
{ {
parser_init(message->type); parser_init(message->type);
http_parser_execute(&parser, message->raw, strlen(message->raw)); size_t read;
assert(!http_parser_has_error(&parser));
http_parser_execute(&parser, NULL, 0); read = http_parser_execute(&parser, message->raw, strlen(message->raw));
assert(!http_parser_has_error(&parser)); if (read != strlen(message->raw)) {
print_error(message, read);
exit(1);
}
read = http_parser_execute(&parser, NULL, 0);
if (read != 0) {
print_error(message, read);
exit(1);
}
assert(num_messages == 1); assert(num_messages == 1);
message_eq(0, message); message_eq(0, message);
} }
void int
test_error (const char *buf) test_error (const char *buf)
{ {
parser_init(HTTP_REQUEST); parser_init(HTTP_REQUEST);
http_parser_execute(&parser, buf, strlen(buf)); size_t parsed;
http_parser_execute(&parser, NULL, 0);
parsed = http_parser_execute(&parser, buf, strlen(buf));
if (parsed != strlen(buf)) return 1;
parsed = http_parser_execute(&parser, NULL, 0);
if (parsed != 0) return 1;
printf("No error found in the following: %s\n", buf);
exit(1);
assert(http_parser_has_error(&parser)); return 0;
} }
void void
@ -710,10 +762,8 @@ test_multiple3 (const struct message *r1, const struct message *r2, const struct
parser_init(HTTP_REQUEST); parser_init(HTTP_REQUEST);
http_parser_execute(&parser, total, strlen(total)); http_parser_execute(&parser, total, strlen(total));
assert(!http_parser_has_error(&parser) );
http_parser_execute(&parser, NULL, 0); http_parser_execute(&parser, NULL, 0);
assert(!http_parser_has_error(&parser) );
assert(num_messages == 3); assert(num_messages == 3);
message_eq(0, r1); message_eq(0, r1);
@ -773,16 +823,12 @@ test_scan (const struct message *r1, const struct message *r2, const struct mess
*/ */
http_parser_execute(&parser, buf1, buf1_len); http_parser_execute(&parser, buf1, buf1_len);
assert(!http_parser_has_error(&parser));
http_parser_execute(&parser, buf2, buf2_len); http_parser_execute(&parser, buf2, buf2_len);
assert(!http_parser_has_error(&parser));
http_parser_execute(&parser, buf3, buf3_len); http_parser_execute(&parser, buf3, buf3_len);
assert(!http_parser_has_error(&parser));
http_parser_execute(&parser, NULL, 0); http_parser_execute(&parser, NULL, 0);
assert(!http_parser_has_error(&parser));
assert(3 == num_messages); assert(3 == num_messages);
@ -797,8 +843,6 @@ test_scan (const struct message *r1, const struct message *r2, const struct mess
int int
main (void) main (void)
{ {
int i, j, k;
printf("sizeof(http_parser) = %d\n", sizeof(http_parser)); printf("sizeof(http_parser) = %d\n", sizeof(http_parser));
int request_count; int request_count;
@ -808,18 +852,6 @@ main (void)
for (response_count = 0; responses[response_count].name; response_count++); for (response_count = 0; responses[response_count].name; response_count++);
//// RESPONSES
for (i = 0; i < response_count; i++) {
test_message(&responses[i]);
}
puts("responses okay");
/// REQUESTS /// REQUESTS
@ -871,15 +903,19 @@ main (void)
"HELLO"; "HELLO";
test_error(bad_get_no_headers_no_body); test_error(bad_get_no_headers_no_body);
/* TODO sending junk and large headers gets rejected */ /* TODO sending junk and large headers gets rejected */
/* check to make sure our predefined requests are okay */ /* check to make sure our predefined requests are okay */
int i;
for (i = 0; requests[i].name; i++) { for (i = 0; requests[i].name; i++) {
test_message(&requests[i]); test_message(&requests[i]);
} }
#if 0
int j, k;
for (i = 0; i < request_count; i++) { for (i = 0; i < request_count; i++) {
for (j = 0; j < request_count; j++) { for (j = 0; j < request_count; j++) {
for (k = 0; k < request_count; k++) { for (k = 0; k < request_count; k++) {
@ -910,5 +946,16 @@ main (void)
puts("requests okay"); puts("requests okay");
//// RESPONSES
for (i = 0; i < response_count; i++) {
test_message(&responses[i]);
}
puts("responses okay");
#endif
return 0; return 0;
} }

Loading…
Cancel
Save