From 433202d825fa34de5e42f810bb984ece05a36d20 Mon Sep 17 00:00:00 2001 From: Ryan Dahl Date: Tue, 17 Nov 2009 18:42:15 +0100 Subject: [PATCH] new version Trashing the old Ragel parser (which was based on Mongrel) because it's proving difficult to get the control I need in end-of-message cases. Replacing this with a hand written parser using a couple tricks borrowed from NGINX. The new parser will be much more work to write, but should prove faster and allow for better hacking. --- .gitignore | 1 - Makefile | 11 +- http_parser.c | 891 +++++++++++++++++++++++++++++++++++++++++++++++++ http_parser.h | 93 +----- http_parser.rl | 536 ----------------------------- test.c | 109 ++++-- 6 files changed, 985 insertions(+), 656 deletions(-) create mode 100644 http_parser.c delete mode 100644 http_parser.rl diff --git a/.gitignore b/.gitignore index 7df586c..be21bae 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,3 @@ tags *.o test -http_parser.c diff --git a/Makefile b/Makefile index 899de24..fc0dbde 100644 --- a/Makefile +++ b/Makefile @@ -1,5 +1,5 @@ -#OPT=-O0 -g -Wall -Wextra -Werror -OPT=-O2 +OPT=-O0 -g -Wall -Wextra -Werror +#OPT=-O2 test: http_parser.o test.c gcc $(OPT) http_parser.o test.c -o $@ @@ -7,10 +7,7 @@ test: http_parser.o test.c http_parser.o: http_parser.c http_parser.h Makefile gcc $(OPT) -c http_parser.c -http_parser.c: http_parser.rl Makefile - ragel -s -G2 http_parser.rl -o $@ - -tags: http_parser.rl http_parser.h test.c +tags: http_parser.c http_parser.h test.c ctags $^ clean: @@ -18,7 +15,7 @@ clean: package: http_parser.c @rm -rf /tmp/http_parser && mkdir /tmp/http_parser && \ - cp LICENSE README.md Makefile http_parser.c http_parser.rl \ + cp LICENSE README.md Makefile http_parser.c \ http_parser.h test.c /tmp/http_parser && \ cd /tmp && \ tar -cf http_parser.tar http_parser/ diff --git a/http_parser.c b/http_parser.c new file mode 100644 index 0000000..b8524ef --- /dev/null +++ b/http_parser.c @@ -0,0 +1,891 @@ +#include +#include +#include + +#ifndef NULL +# define NULL ((void*)0) +#endif + +#define MAX_FIELD_SIZE (80*1024) + +#define MARK(FOR) \ +do { \ + parser->FOR##_mark = p; \ + parser->FOR##_size = 0; \ +} while (0) + +#define CALLBACK(FOR) \ +do { \ + if (0 != FOR##_callback(parser, p)) return (p - data); \ +} while (0) + + +#if 0 +do { \ + if (parser->FOR##_mark) { \ + parser->FOR##_size += p - parser->FOR##_mark; \ + if (parser->FOR##_size > MAX_FIELD_SIZE) { \ + return ERROR; \ + } \ + if (parser->on_##FOR) { \ + if (0 != parser->on_##FOR(parser, \ + parser->FOR##_mark, \ + p - parser->FOR##_mark)) \ + { \ + return ERROR; \ + } \ + } \ + } \ +} while(0) +#endif + +static inline int uri_callback (http_parser *parser, const char *p) +{ + assert(parser->uri_mark); + const char *mark = parser->uri_mark; + parser->uri_size += p - mark; + if (parser->uri_size > MAX_FIELD_SIZE) return -1; + if (parser->on_uri == NULL) return 0; + return parser->on_uri(parser, mark, p - mark); +} + +static inline int path_callback (http_parser *parser, const char *p) +{ + assert(parser->path_mark); + const char *mark = parser->path_mark; + parser->path_size += p - mark; + if (parser->path_size > MAX_FIELD_SIZE) return -1; + if (parser->on_path == NULL) return 0; + return parser->on_path(parser, mark, p - mark); +} + +static inline int query_string_callback (http_parser *parser, const char *p) +{ + assert(parser->query_string_mark); + const char *mark = parser->query_string_mark; + parser->query_string_size += p - mark; + if (parser->query_string_size > MAX_FIELD_SIZE) return -1; + if (parser->on_query_string == NULL) return 0; + return parser->on_query_string(parser, mark, p - mark); +} + +static inline int fragment_callback (http_parser *parser, const char *p) +{ + assert(parser->fragment_mark); + const char *mark = parser->fragment_mark; + parser->fragment_size += p - mark; + if (parser->fragment_size > MAX_FIELD_SIZE) return -1; + if (parser->on_fragment == NULL) return 0; + return parser->on_fragment(parser, mark, p - mark); +} + +static inline int header_field_callback (http_parser *parser, const char *p) +{ + assert(parser->header_field_mark); + const char *mark = parser->header_field_mark; + parser->header_field_size += p - mark; + if (parser->header_field_size > MAX_FIELD_SIZE) return -1; + if (parser->on_header_field == NULL) return 0; + return parser->on_header_field(parser, mark, p - mark); +} + +static inline int header_value_callback (http_parser *parser, const char *p) +{ + assert(parser->header_value_mark); + const char *mark = parser->header_value_mark; + parser->header_value_size += p - mark; + if (parser->header_value_size > MAX_FIELD_SIZE) return -1; + if (parser->on_header_value == NULL) return 0; + return parser->on_header_value(parser, mark, p - mark); +} + +#define CONNECTION "connection" +#define CONTENT_LENGTH "content-length" +#define TRANSFER_ENCODING "transfer-encoding" + + +static const unsigned char lowcase[] = + "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0" + "\0\0\0\0\0\0\0\0\0\0\0\0\0-\0\0" "0123456789\0\0\0\0\0\0" + "\0abcdefghijklmnopqrstuvwxyz\0\0\0\0\0" + "\0abcdefghijklmnopqrstuvwxyz\0\0\0\0\0" + "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0" + "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0" + "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0" + "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"; + + +static const uint32_t usual[] = { + 0xffffdbfe, /* 1111 1111 1111 1111 1101 1011 1111 1110 */ + + /* ?>=< ;:98 7654 3210 /.-, +*)( '&%$ #"! */ + 0x7fff37d6, /* 0111 1111 1111 1111 1111 1111 1111 0110 */ + + /* _^]\ [ZYX WVUT SRQP ONML KJIH GFED CBA@ */ + 0xffffffff, /* 1111 1111 1111 1111 1111 1111 1111 1111 */ + + /* ~}| {zyx wvut srqp onml kjih gfed cba` */ + 0xffffffff, /* 1111 1111 1111 1111 1111 1111 1111 1111 */ + + 0xffffffff, /* 1111 1111 1111 1111 1111 1111 1111 1111 */ + 0xffffffff, /* 1111 1111 1111 1111 1111 1111 1111 1111 */ + 0xffffffff, /* 1111 1111 1111 1111 1111 1111 1111 1111 */ + 0xffffffff /* 1111 1111 1111 1111 1111 1111 1111 1111 */ +}; + +enum state + { s_start = 0 + + , s_method_G + , s_method_GE + + , s_method_P + , s_method_PU + , s_method_PO + , s_method_POS + + , s_method_H + , s_method_HE + , s_method_HEA + + , s_method_D + , s_method_DE + , s_method_DEL + , s_method_DELE + , s_method_DELET + + , s_spaces_before_uri + + , s_schema + , s_schema_slash + , s_schema_slash_slash + , s_host + , s_port + + , s_path + , s_query_string + , s_fragment + + , s_http_start + , s_http_H + , s_http_HT + , s_http_HTT + , s_http_HTTP + + , s_first_major_digit + , s_major_digit + , s_first_minor_digit + , s_minor_digit + + , s_req_line_almost_done + + , s_header_field_start + , s_header_field + , s_header_value_start + , s_header_value + + , s_header_almost_done + + , s_headers_almost_done + , s_headers_done + }; + +enum header_states + { h_general = 0 + , h_C + , h_CO + , h_CON + , h_matching_connection + , h_matching_content_length + , h_matching_transfer_encoding + , h_connection + , h_content_length + , h_transfer_encoding + , h_encoding_C + , h_connection_K + , h_connection_C + }; + +#define ERROR (p - data); +#define CR '\r' +#define LF '\n' +#define LOWER(c) (unsigned char)(c | 0x20) + +size_t http_parser_execute (http_parser *parser, const char *data, size_t len) +{ + char c, ch; + const char *p, *pe; + + enum state state = parser->state; + enum header_states header_state = parser->header_state; + size_t header_index = parser->header_index; + + if (parser->header_field_mark) parser->header_field_mark = data; + if (parser->header_value_mark) parser->header_value_mark = data; + if (parser->fragment_mark) parser->fragment_mark = data; + if (parser->query_string_mark) parser->query_string_mark = data; + if (parser->path_mark) parser->path_mark = data; + if (parser->uri_mark) parser->uri_mark = data; + + for (p=data, pe=data+len; p != pe; p++) { + ch = *p; + switch (state) { + case s_start: + { + switch (ch) { + /* GET */ + case 'G': + state = s_method_G; + break; + + /* POST, PUT */ + case 'P': + state = s_method_P; + break; + + /* HEAD */ + case 'H': + state = s_method_H; + break; + + /* DELETE */ + case 'D': + state = s_method_D; + break; + + case CR: + case LF: + break; + + default: + return ERROR; + } + break; + } + + /* GET */ + + case s_method_G: + if (ch != 'E') return ERROR; + state = s_method_GE; + break; + + case s_method_GE: + if (ch != 'T') return ERROR; + parser->method = HTTP_GET; + state = s_spaces_before_uri; + break; + + /* HEAD */ + + case s_method_H: + if (ch != 'E') return ERROR; + state = s_method_HE; + break; + + case s_method_HE: + if (ch != 'A') return ERROR; + state = s_method_HEA; + break; + + case s_method_HEA: + if (ch != 'D') return ERROR; + parser->method = HTTP_HEAD; + state = s_spaces_before_uri; + break; + + /* POST, PUT */ + + case s_method_P: + switch (ch) { + case 'O': + state = s_method_PO; + break; + + case 'U': + state = s_method_PU; + break; + + default: + return ERROR; + } + break; + + /* PUT */ + + case s_method_PU: + if (ch != 'T') return ERROR; + parser->method = HTTP_PUT; + state = s_spaces_before_uri; + break; + + /* POST */ + + case s_method_PO: + if (ch != 'S') return ERROR; + state = s_method_POS; + break; + + case s_method_POS: + if (ch != 'T') return ERROR; + parser->method = HTTP_POST; + state = s_spaces_before_uri; + break; + + /* DELETE */ + + case s_method_D: + if (ch != 'E') return ERROR; + state = s_method_DE; + break; + + case s_method_DE: + if (ch != 'L') return ERROR; + state = s_method_DEL; + break; + + case s_method_DEL: + if (ch != 'E') return ERROR; + state = s_method_DELE; + break; + + case s_method_DELE: + if (ch != 'T') return ERROR; + state = s_method_DELET; + break; + + case s_method_DELET: + if (ch != 'E') return ERROR; + parser->method = HTTP_DELETE; + state = s_spaces_before_uri; + break; + + + case s_spaces_before_uri: + { + if (ch == ' ') break; + + if (ch == '/') { + MARK(uri); + MARK(path); + state = s_path; + break; + } + + c = LOWER(ch); + + if (c >= 'a' && c <= 'z') { + MARK(uri); + state = s_schema; + break; + } + + return ERROR; + } + + case s_schema: + { + c = LOWER(ch); + + if (c >= 'a' && c <= 'z') break; + + if (ch == ':') { + state = s_schema_slash; + break; + } + + return ERROR; + } + + case s_schema_slash: + if (ch != '/') return ERROR; + state = s_schema_slash_slash; + break; + + case s_schema_slash_slash: + if (ch != '/') return ERROR; + state = s_host; + break; + + case s_host: + { + c = LOWER(ch); + if (c >= 'a' && c <= 'z') break; + if ((ch >= '0' && ch <= '9') || ch == '.' || ch == '-') break; + switch (ch) { + case ':': + state = s_port; + break; + case '/': + MARK(path); + state = s_path; + break; + case ' ': + /* The request line looks like: + * "GET http://foo.bar.com HTTP/1.1" + * That is, there is no path. + */ + CALLBACK(uri); + state = s_http_start; + break; + default: + return ERROR; + } + break; + } + + case s_port: + { + if (ch >= '0' && ch <= '9') break; + switch (ch) { + case '/': + MARK(path); + state = s_path; + break; + case ' ': + /* The request line looks like: + * "GET http://foo.bar.com:1234 HTTP/1.1" + * That is, there is no path. + */ + CALLBACK(uri); + state = s_http_start; + break; + default: + return ERROR; + } + break; + } + + case s_path: + { + if (usual[ch >> 5] & (1 << (ch & 0x1f))) break; + + switch (ch) { + case ' ': + CALLBACK(uri); + CALLBACK(path); + state = s_http_start; + break; + case CR: + CALLBACK(uri); + CALLBACK(path); + parser->http_minor = 9; + state = s_req_line_almost_done; + break; + case LF: + CALLBACK(uri); + CALLBACK(path); + parser->http_minor = 9; + state = s_header_field_start; + break; + case '?': + CALLBACK(path); + MARK(query_string); + state = s_query_string; + break; + case '#': + CALLBACK(path); + MARK(fragment); + state = s_fragment; + break; + default: + return ERROR; + } + break; + } + + case s_query_string: + { + if (usual[ch >> 5] & (1 << (ch & 0x1f))) break; + + switch (ch) { + case ' ': + CALLBACK(uri); + CALLBACK(query_string); + state = s_http_start; + break; + case CR: + CALLBACK(uri); + CALLBACK(query_string); + parser->http_minor = 9; + state = s_req_line_almost_done; + break; + case LF: + CALLBACK(uri); + CALLBACK(query_string); + parser->http_minor = 9; + state = s_header_field_start; + break; + case '#': + CALLBACK(query_string); + MARK(fragment); + state = s_fragment; + break; + default: + return ERROR; + } + break; + } + + case s_fragment: + { + if (usual[ch >> 5] & (1 << (ch & 0x1f))) break; + + switch (ch) { + case ' ': + CALLBACK(uri); + CALLBACK(fragment); + state = s_http_start; + break; + case CR: + CALLBACK(uri); + CALLBACK(fragment); + parser->http_minor = 9; + state = s_req_line_almost_done; + break; + case LF: + CALLBACK(uri); + CALLBACK(fragment); + parser->http_minor = 9; + state = s_header_field_start; + break; + case '?': + case '#': + break; + default: + return ERROR; + } + break; + } + + case s_http_start: + switch (ch) { + case 'H': + state = s_http_H; + break; + case ' ': + break; + default: + return ERROR; + } + break; + + case s_http_H: + if (ch != 'T') return ERROR; + state = s_http_HT; + break; + + case s_http_HT: + if (ch != 'T') return ERROR; + state = s_http_HTT; + break; + + case s_http_HTT: + if (ch != 'P') return ERROR; + state = s_http_HTTP; + break; + + case s_http_HTTP: + if (ch != '/') return ERROR; + state = s_first_major_digit; + break; + + /* first digit of major HTTP version */ + case s_first_major_digit: + if (ch < '1' || ch > '9') return ERROR; + parser->http_major = ch - '0'; + state = s_major_digit; + break; + + /* major HTTP version or dot */ + case s_major_digit: + { + if (ch == '.') { + state = s_first_minor_digit; + break; + } + + if (ch < '0' || ch > '9') return ERROR; + + parser->http_major *= 10; + parser->http_major += ch - '0'; + + if (parser->http_major > 999) return ERROR; + break; + } + + /* first digit of minor HTTP version */ + case s_first_minor_digit: + if (ch < '0' || ch > '9') return ERROR; + parser->http_minor = ch - '0'; + state = s_minor_digit; + break; + + /* minor HTTP version or end of request line */ + case s_minor_digit: + { + if (ch == CR) { + state = s_req_line_almost_done; + break; + } + + if (ch == LF) { + state = s_header_field_start; + break; + } + + /* XXX allow spaces after digit? */ + + if (ch < '0' || ch > '9') return ERROR; + + parser->http_minor *= 10; + parser->http_minor += ch - '0'; + + if (parser->http_minor > 999) return ERROR; + break; + } + + /* end of request line */ + case s_req_line_almost_done: + { + if (ch != LF) return ERROR; + state = s_header_field_start; + break; + } + + case s_header_field_start: + { + if (ch == CR) { + state = s_headers_almost_done; + break; + } + + if (ch == LF) { + state = s_headers_done; + break; + } + + c = LOWER(ch); + + if (c < 'a' || 'z' < c) return ERROR; + + MARK(header_field); + + header_index = 0; + state = s_header_field; + + switch (c) { + case 'c': + header_state = h_C; + break; + + case 't': + header_state = h_matching_transfer_encoding; + break; + + default: + header_state = h_general; + break; + } + break; + } + + case s_header_field: + { + header_index++; + + c = lowcase[(int)ch]; + + if (c) { + switch (header_state) { + case h_general: + break; + + case h_C: + header_state = (c == 'o' ? h_CO : h_general); + break; + + case h_CO: + header_state = (c == 'n' ? h_CON : h_general); + break; + + case h_CON: + switch (c) { + case 'n': + header_state = h_matching_connection; + break; + case 't': + header_state = h_matching_content_length; + break; + default: + header_state = h_general; + break; + } + break; + + /* connection */ + + case h_matching_connection: + if (header_index > sizeof(CONNECTION)-1 + || c != CONNECTION[header_index]) { + header_state = h_general; + } else if (header_index == sizeof(CONNECTION)-1) { + header_state = h_connection; + } + break; + + /* content-length */ + + case h_matching_content_length: + if (header_index > sizeof(CONTENT_LENGTH)-1 + || c != CONTENT_LENGTH[header_index]) { + header_state = h_general; + } else if (header_index == sizeof(CONTENT_LENGTH)-1) { + header_state = h_content_length; + } + break; + + /* transfer-encoding */ + + case h_matching_transfer_encoding: + if (header_index > sizeof(TRANSFER_ENCODING)-1 + || c != TRANSFER_ENCODING[header_index]) { + header_state = h_general; + } else if (header_index == sizeof(TRANSFER_ENCODING)-1) { + header_state = h_transfer_encoding; + } + break; + + default: + assert(0 && "Unknown header_state"); + break; + } + } + + if (ch == ':') { + CALLBACK(header_field); + state = s_header_value_start; + break; + } + + if (ch == CR) { + state = s_header_almost_done; + CALLBACK(header_field); + break; + } + + if (ch == LF) { + CALLBACK(header_field); + state = s_header_field_start; + break; + } + + return ERROR; + } + + case s_header_value_start: + { + if (ch == ' ') break; + + MARK(header_value); + + if (ch == CR) { + header_state = h_general; + state = s_header_almost_done; + } + + if (ch == LF) { + header_state = h_general; + state = s_headers_done; + } + + c = lowcase[(int)ch]; + + if (!c) return ERROR; + + switch (header_state) { + case h_transfer_encoding: + /* looking for 'Transfer-Encoding: chunked' */ + if ('c' == c) { + header_state = h_encoding_C; + } else { + header_state = h_general; + } + break; + + case h_content_length: + if (ch < '0' || ch > '9') return ERROR; + parser->content_length = ch - '0'; + break; + + case h_connection: + /* looking for 'Connection: keep-alive' */ + if (c == 'k') { + header_state = h_connection_K; + /* looking for 'Connection: close' */ + } else if (c == 'c') { + header_state = h_connection_C; + } else { + header_state = h_general; + } + break; + + default: + state = s_header_value; + header_state = h_general; + break; + } + break; + } + + case s_header_value: + { + break; + } + + case s_header_almost_done: + if (ch != LF) return ERROR; + state = s_header_field_start; + break; + + default: + assert(0 && "unhandled state"); + return ERROR; + } + } + + CALLBACK(header_field); + CALLBACK(header_value); + CALLBACK(fragment); + CALLBACK(query_string); + CALLBACK(path); + CALLBACK(uri); + + parser->state = state; + parser->header_state = header_state; + parser->header_index = header_index; + + return len; +} + +void +http_parser_init (http_parser *parser, enum http_parser_type type) +{ + if (type == HTTP_REQUEST) { + parser->state = s_start; + } else { + assert(0 && "responses not supported yet"); + } + + parser->on_message_begin = NULL; + parser->on_path = NULL; + parser->on_query_string = NULL; + parser->on_uri = NULL; + parser->on_fragment = NULL; + parser->on_header_field = NULL; + parser->on_header_value = NULL; + parser->on_headers_complete = NULL; + parser->on_body = NULL; + parser->on_message_complete = NULL; +} + diff --git a/http_parser.h b/http_parser.h index 8effd99..a6c11eb 100644 --- a/http_parser.h +++ b/http_parser.h @@ -1,63 +1,3 @@ -/* -Mongrel Web Server (Mongrel) is copyrighted free software by Zed A. Shaw - and contributors. - -This source file is based on Mongrel's parser. Changes by Ryan Dahl - in 2008 and 2009. - -You can redistribute it and/or modify it under either the terms of the GPL2 -or the conditions below: - -1. You may make and give away verbatim copies of the source form of the - software without restriction, provided that you duplicate all of the - original copyright notices and associated disclaimers. - -2. You may modify your copy of the software in any way, provided that - you do at least ONE of the following: - - a) place your modifications in the Public Domain or otherwise make them - Freely Available, such as by posting said modifications to Usenet or an - equivalent medium, or by allowing the author to include your - modifications in the software. - - b) use the modified software only within your corporation or - organization. - - c) rename any non-standard executables so the names do not conflict with - standard executables, which must also be provided. - - d) make other distribution arrangements with the author. - -3. You may distribute the software in object code or executable - form, provided that you do at least ONE of the following: - - a) distribute the executables and library files of the software, - together with instructions (in the manual page or equivalent) on where - to get the original distribution. - - b) accompany the distribution with the machine-readable source of the - software. - - c) give non-standard executables non-standard names, with - instructions on where to get the original software distribution. - - d) make other distribution arrangements with the author. - -4. You may modify and include the part of the software into any other - software (possibly commercial). But some files in the distribution - are not written by the author, so that they are not under this terms. - -5. The scripts and library files supplied as input to or produced as - output from the software do not automatically fall under the - copyright of the software, but belong to whomever generated them, - and may be sold commercially, and may be aggregated with this - software. - -6. THIS SOFTWARE IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR - IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED - WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - PURPOSE. -*/ #ifndef http_parser_h #define http_parser_h #ifdef __cplusplus @@ -83,34 +23,20 @@ typedef int (*http_cb) (http_parser*); /* Request Methods */ enum http_method - { HTTP_COPY = 0x0001 - , HTTP_DELETE = 0x0002 + { HTTP_DELETE = 0x0002 , HTTP_GET = 0x0004 , HTTP_HEAD = 0x0008 - , HTTP_LOCK = 0x0010 - , HTTP_MKCOL = 0x0020 - , HTTP_MOVE = 0x0040 - , HTTP_OPTIONS = 0x0080 , HTTP_POST = 0x0100 - , HTTP_PROPFIND = 0x0200 - , HTTP_PROPPATCH = 0x0400 , HTTP_PUT = 0x0800 - , HTTP_TRACE = 0x1000 - , HTTP_UNLOCK = 0x2000 }; enum http_parser_type { HTTP_REQUEST, HTTP_RESPONSE }; -enum http_version - { HTTP_VERSION_OTHER = 0x00 - , HTTP_VERSION_11 = 0x01 - , HTTP_VERSION_10 = 0x02 - , HTTP_VERSION_09 = 0x04 - }; - struct http_parser { /** PRIVATE **/ - int cs; + int state; + int header_state; + size_t header_index; enum http_parser_type type; size_t chunk_size; @@ -134,7 +60,10 @@ struct http_parser { /** READ-ONLY **/ unsigned short status_code; /* responses only */ enum http_method method; /* requests only */ - enum http_version version; + + int http_major; + int http_minor; + short keep_alive; ssize_t content_length; @@ -163,14 +92,16 @@ struct http_parser { */ void http_parser_init (http_parser *parser, enum http_parser_type); -void http_parser_execute (http_parser *parser, const char *data, size_t len); +size_t http_parser_execute (http_parser *parser, const char *data, size_t len); +/* int http_parser_has_error (http_parser *parser); +*/ static inline int http_parser_should_keep_alive (http_parser *parser) { - if (parser->keep_alive == -1) return (parser->version == HTTP_VERSION_11); + if (parser->keep_alive == -1) return (parser->http_major == 1 && parser->http_minor == 1); return parser->keep_alive; } diff --git a/http_parser.rl b/http_parser.rl deleted file mode 100644 index fcce0f4..0000000 --- a/http_parser.rl +++ /dev/null @@ -1,536 +0,0 @@ -/* -Mongrel Web Server (Mongrel) is copyrighted free software by Zed A. Shaw - and contributors. - -This source file is based on Mongrel's parser. Changes by Ryan Dahl - in 2008 and 2009. - -You can redistribute it and/or modify it under either the terms of the GPL2 -or the conditions below: - -1. You may make and give away verbatim copies of the source form of the - software without restriction, provided that you duplicate all of the - original copyright notices and associated disclaimers. - -2. You may modify your copy of the software in any way, provided that - you do at least ONE of the following: - - a) place your modifications in the Public Domain or otherwise make them - Freely Available, such as by posting said modifications to Usenet or an - equivalent medium, or by allowing the author to include your - modifications in the software. - - b) use the modified software only within your corporation or - organization. - - c) rename any non-standard executables so the names do not conflict with - standard executables, which must also be provided. - - d) make other distribution arrangements with the author. - -3. You may distribute the software in object code or executable - form, provided that you do at least ONE of the following: - - a) distribute the executables and library files of the software, - together with instructions (in the manual page or equivalent) on where - to get the original distribution. - - b) accompany the distribution with the machine-readable source of the - software. - - c) give non-standard executables non-standard names, with - instructions on where to get the original software distribution. - - d) make other distribution arrangements with the author. - -4. You may modify and include the part of the software into any other - software (possibly commercial). But some files in the distribution - are not written by the author, so that they are not under this terms. - -5. The scripts and library files supplied as input to or produced as - output from the software do not automatically fall under the - copyright of the software, but belong to whomever generated them, - and may be sold commercially, and may be aggregated with this - software. - -6. THIS SOFTWARE IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR - IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED - WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - PURPOSE. -*/ -#include "http_parser.h" -#include -#include - -/* parser->flags */ -#define EATING 0x01 -#define ERROR 0x02 -#define CHUNKED 0x04 -#define EAT_FOREVER 0x10 - -static int unhex[] = {-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1 - ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1 - ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1 - , 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,-1,-1,-1,-1,-1,-1 - ,-1,10,11,12,13,14,15,-1,-1,-1,-1,-1,-1,-1,-1,-1 - ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1 - ,-1,10,11,12,13,14,15,-1,-1,-1,-1,-1,-1,-1,-1,-1 - ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1 - }; - -#undef MIN -#define MIN(a,b) ((a) < (b) ? (a) : (b)) - -#undef NULL -#define NULL ((void*)(0)) - -#define MAX_FIELD_SIZE (80*1024) - -#define REMAINING (unsigned long)(pe - p) -#define CALLBACK(FOR) \ -do { \ - if (parser->FOR##_mark) { \ - parser->FOR##_size += p - parser->FOR##_mark; \ - if (parser->FOR##_size > MAX_FIELD_SIZE) { \ - parser->flags |= ERROR; \ - return; \ - } \ - if (parser->on_##FOR) { \ - callback_return_value = parser->on_##FOR(parser, \ - parser->FOR##_mark, \ - p - parser->FOR##_mark); \ - } \ - if (callback_return_value != 0) { \ - parser->flags |= ERROR; \ - return; \ - } \ - } \ -} while(0) - -#define RESET_PARSER(parser) \ - parser->chunk_size = 0; \ - parser->flags = 0; \ - parser->header_field_mark = NULL; \ - parser->header_value_mark = NULL; \ - parser->query_string_mark = NULL; \ - parser->path_mark = NULL; \ - parser->uri_mark = NULL; \ - parser->fragment_mark = NULL; \ - parser->status_code = 0; \ - parser->method = 0; \ - parser->version = HTTP_VERSION_OTHER; \ - parser->keep_alive = -1; \ - parser->content_length = -1; \ - parser->body_read = 0 - -#define END_REQUEST \ -do { \ - if (parser->on_message_complete) { \ - callback_return_value = \ - parser->on_message_complete(parser); \ - } \ - RESET_PARSER(parser); \ -} while (0) - -#define SKIP_BODY(nskip) \ -do { \ - tmp = (nskip); \ - if (parser->on_body && tmp > 0) { \ - callback_return_value = parser->on_body(parser, p, tmp); \ - } \ - if (callback_return_value == 0) { \ - p += tmp; \ - parser->body_read += tmp; \ - parser->chunk_size -= tmp; \ - if (0 == parser->chunk_size) { \ - parser->flags &= ~EATING; \ - if (!(parser->flags & CHUNKED)) { \ - END_REQUEST; \ - } \ - } else { \ - parser->flags |= EATING; \ - } \ - } \ -} while (0) - -%%{ - machine http_parser; - - action mark_header_field { - parser->header_field_mark = p; - parser->header_field_size = 0; - } - - action mark_header_value { - parser->header_value_mark = p; - parser->header_value_size = 0; - } - - action mark_fragment { - parser->fragment_mark = p; - parser->fragment_size = 0; - } - - action mark_query_string { - parser->query_string_mark = p; - parser->query_string_size = 0; - } - - action mark_request_path { - parser->path_mark = p; - parser->path_size = 0; - } - - action mark_request_uri { - parser->uri_mark = p; - parser->uri_size = 0; - } - - action header_field { - CALLBACK(header_field); - parser->header_field_mark = NULL; - parser->header_field_size = 0; - } - - action header_value { - CALLBACK(header_value); - parser->header_value_mark = NULL; - parser->header_value_size = 0; - } - - action request_uri { - CALLBACK(uri); - parser->uri_mark = NULL; - parser->uri_size = 0; - } - - action fragment { - CALLBACK(fragment); - parser->fragment_mark = NULL; - parser->fragment_size = 0; - } - - action query_string { - CALLBACK(query_string); - parser->query_string_mark = NULL; - parser->query_string_size = 0; - } - - action request_path { - CALLBACK(path); - parser->path_mark = NULL; - parser->path_size = 0; - } - - action headers_complete { - if(parser->on_headers_complete) { - callback_return_value = parser->on_headers_complete(parser); - if (callback_return_value != 0) { - parser->flags |= ERROR; - return; - } - } - } - - action begin_message { - if(parser->on_message_begin) { - callback_return_value = parser->on_message_begin(parser); - if (callback_return_value != 0) { - parser->flags |= ERROR; - return; - } - } - } - - action content_length { - if (parser->content_length == -1) parser->content_length = 0; - if (parser->content_length > INT_MAX) { - parser->flags |= ERROR; - return; - } - parser->content_length *= 10; - parser->content_length += *p - '0'; - } - - action status_code { - parser->status_code *= 10; - parser->status_code += *p - '0'; - } - - action use_chunked_encoding { parser->flags |= CHUNKED; } - - action set_keep_alive { parser->keep_alive = 1; } - action set_not_keep_alive { parser->keep_alive = 0; } - - action version_11 { parser->version = HTTP_VERSION_11; } - action version_10 { parser->version = HTTP_VERSION_10; } - action version_09 { parser->version = HTTP_VERSION_09; } - - action add_to_chunk_size { - parser->chunk_size *= 16; - parser->chunk_size += unhex[(int)*p]; - } - - action skip_chunk_data { - SKIP_BODY(MIN(parser->chunk_size, REMAINING)); - if (callback_return_value != 0) { - parser->flags |= ERROR; - return; - } - - fhold; - if (parser->chunk_size > REMAINING) { - fbreak; - } else { - fgoto chunk_end; - } - } - - action end_chunked_body { - END_REQUEST; - if (parser->type == HTTP_REQUEST) { - fnext Requests; - } else { - fnext Responses; - } - } - - action body_logic { - if (parser->flags & CHUNKED) { - fnext ChunkedBody; - } else { - /* this is pretty stupid. i'd prefer to combine this with - * skip_chunk_data */ - if (parser->content_length < 0) { - /* If we didn't get a content length; if not keep-alive - * just read body until EOF */ - if (!http_parser_should_keep_alive(parser)) { - parser->flags |= EAT_FOREVER; - parser->chunk_size = REMAINING; - } else { - /* Otherwise, if keep-alive, then assume the message - * has no body. */ - parser->chunk_size = parser->content_length = 0; - } - } else { - parser->chunk_size = parser->content_length; - } - p += 1; - - SKIP_BODY(MIN(REMAINING, parser->chunk_size)); - - if (callback_return_value != 0) { - parser->flags |= ERROR; - return; - } - - fhold; - if(parser->chunk_size > REMAINING) { - fbreak; - } - } - } - - CRLF = "\r\n"; - -# character types - CTL = (cntrl | 127); - safe = ("$" | "-" | "_" | "."); - extra = ("!" | "*" | "'" | "(" | ")" | ","); - reserved = (";" | "/" | "?" | ":" | "@" | "&" | "=" | "+"); - unsafe = (CTL | " " | "\"" | "#" | "%" | "<" | ">"); - national = any -- (alpha | digit | reserved | extra | safe | unsafe); - unreserved = (alpha | digit | safe | extra | national); - escape = ("%" xdigit xdigit); - uchar = (unreserved | escape | "\""); - pchar = (uchar | ":" | "@" | "&" | "=" | "+"); - tspecials = ("(" | ")" | "<" | ">" | "@" | "," | ";" | ":" | "\\" | "\"" - | "/" | "[" | "]" | "?" | "=" | "{" | "}" | " " | "\t"); - -# elements - token = (ascii -- (CTL | tspecials)); - quote = "\""; -# qdtext = token -- "\""; -# quoted_pair = "\" ascii; -# quoted_string = "\"" (qdtext | quoted_pair )* "\""; - -# headers - - Method = ( "COPY" %{ parser->method = HTTP_COPY; } - | "DELETE" %{ parser->method = HTTP_DELETE; } - | "GET" %{ parser->method = HTTP_GET; } - | "HEAD" %{ parser->method = HTTP_HEAD; } - | "LOCK" %{ parser->method = HTTP_LOCK; } - | "MKCOL" %{ parser->method = HTTP_MKCOL; } - | "MOVE" %{ parser->method = HTTP_MOVE; } - | "OPTIONS" %{ parser->method = HTTP_OPTIONS; } - | "POST" %{ parser->method = HTTP_POST; } - | "PROPFIND" %{ parser->method = HTTP_PROPFIND; } - | "PROPPATCH" %{ parser->method = HTTP_PROPPATCH; } - | "PUT" %{ parser->method = HTTP_PUT; } - | "TRACE" %{ parser->method = HTTP_TRACE; } - | "UNLOCK" %{ parser->method = HTTP_UNLOCK; } - ); # Not allowing extension methods - - HTTP_Version = "HTTP/" ( "1.1" %version_11 - | "1.0" %version_10 - | "0.9" %version_09 - | (digit "." digit) - ); - - scheme = ( alpha | digit | "+" | "-" | "." )* ; - absolute_uri = (scheme ":" (uchar | reserved )*); - path = ( pchar+ ( "/" pchar* )* ) ; - query = ( uchar | reserved )* >mark_query_string %query_string ; - param = ( pchar | "/" )* ; - params = ( param ( ";" param )* ) ; - rel_path = ( path? (";" params)? ) ; - absolute_path = ( "/"+ rel_path ) >mark_request_path %request_path ("?" query)?; - Request_URI = ( "*" | absolute_uri | absolute_path ) >mark_request_uri %request_uri; - Fragment = ( uchar | reserved )* >mark_fragment %fragment; - - field_name = ( token -- ":" )+; - Field_Name = field_name >mark_header_field %header_field; - - field_value = ((any - " ") any*)?; - Field_Value = field_value >mark_header_value %header_value; - - hsep = ":" " "*; - header = (field_name hsep field_value) :> CRLF; - Header = ( ("Content-Length"i hsep digit+ $content_length) - | ("Connection"i hsep - ( "Keep-Alive"i %set_keep_alive - | "close"i %set_not_keep_alive - ) - ) - | ("Transfer-Encoding"i hsep "chunked"i %use_chunked_encoding) - | (Field_Name hsep Field_Value) - ) :> CRLF; - - Headers = (Header)* :> CRLF @headers_complete; - - Request_Line = ( Method " " Request_URI ("#" Fragment)? " " HTTP_Version CRLF ) ; - - StatusCode = (digit digit digit) $status_code; - ReasonPhrase = ascii* -- ("\r" | "\n"); - StatusLine = HTTP_Version " " StatusCode (" " ReasonPhrase)? CRLF; - -# chunked message - trailing_headers = header*; - #chunk_ext_val = token | quoted_string; - chunk_ext_val = token*; - chunk_ext_name = token*; - chunk_extension = ( ";" " "* chunk_ext_name ("=" chunk_ext_val)? )*; - last_chunk = "0"+ ( chunk_extension | " "+) CRLF; - chunk_size = (xdigit* [1-9a-fA-F] xdigit* ) $add_to_chunk_size; - chunk_end = CRLF; - chunk_body = any >skip_chunk_data; - chunk_begin = chunk_size ( chunk_extension | " "+ ) CRLF; - chunk = chunk_begin chunk_body chunk_end; - ChunkedBody := chunk* last_chunk trailing_headers CRLF @end_chunked_body; - - Request = (Request_Line Headers) >begin_message @body_logic; - Response = (StatusLine Headers) >begin_message @body_logic; - - Requests := Request*; - Responses := Response*; - - main := any >{ - fhold; - if (parser->type == HTTP_REQUEST) { - fgoto Requests; - } else { - fgoto Responses; - } - }; - -}%% - -%% write data; - -void -http_parser_init (http_parser *parser, enum http_parser_type type) -{ - int cs = 0; - %% write init; - parser->cs = cs; - parser->type = type; - - parser->on_message_begin = NULL; - parser->on_path = NULL; - parser->on_query_string = NULL; - parser->on_uri = NULL; - parser->on_fragment = NULL; - parser->on_header_field = NULL; - parser->on_header_value = NULL; - parser->on_headers_complete = NULL; - parser->on_body = NULL; - parser->on_message_complete = NULL; - - RESET_PARSER(parser); -} - -/** exec **/ -void -http_parser_execute (http_parser *parser, const char *buffer, size_t len) -{ - size_t tmp; // REMOVE ME this is extremely hacky - int callback_return_value = 0; - const char *p, *pe, *eof; - int cs = parser->cs; - - p = buffer; - pe = buffer+len; - eof = len ? NULL : pe; - - if (parser->flags & EAT_FOREVER) { - if (len == 0) { - if (parser->on_message_complete) { - callback_return_value = parser->on_message_complete(parser); - if (callback_return_value != 0) parser->flags |= ERROR; - } - } else { - if (parser->on_body) { - callback_return_value = parser->on_body(parser, p, len); - if (callback_return_value != 0) parser->flags |= ERROR; - } - } - return; - } - - if (0 < parser->chunk_size && (parser->flags & EATING)) { - /* eat body */ - SKIP_BODY(MIN(len, parser->chunk_size)); - if (callback_return_value != 0) { - parser->flags |= ERROR; - return; - } - } - - if (parser->header_field_mark) parser->header_field_mark = buffer; - if (parser->header_value_mark) parser->header_value_mark = buffer; - if (parser->fragment_mark) parser->fragment_mark = buffer; - if (parser->query_string_mark) parser->query_string_mark = buffer; - if (parser->path_mark) parser->path_mark = buffer; - if (parser->uri_mark) parser->uri_mark = buffer; - - %% write exec; - - parser->cs = cs; - - CALLBACK(header_field); - CALLBACK(header_value); - CALLBACK(fragment); - CALLBACK(query_string); - CALLBACK(path); - CALLBACK(uri); - - assert(p <= pe && "buffer overflow after parsing execute"); -} - -int -http_parser_has_error (http_parser *parser) -{ - if (parser->flags & ERROR) return 1; - return parser->cs == http_parser_error; -} diff --git a/test.c b/test.c index 803ea7f..7bb26cb 100644 --- a/test.c +++ b/test.c @@ -653,10 +653,8 @@ parse_messages (int message_count, const struct message *input_messages[]) parser_init(HTTP_REQUEST); http_parser_execute(&parser, total, length); - assert(!http_parser_has_error(&parser)); http_parser_execute(&parser, NULL, 0); - assert(!http_parser_has_error(&parser)); assert(num_messages == message_count); @@ -665,32 +663,86 @@ parse_messages (int message_count, const struct message *input_messages[]) } } +static void +print_error (const struct message *message, size_t error_location) +{ + printf("\n*** parse error on '%s' ***\n\n", message->name); + + int this_line = 0, char_len = 0; + size_t i, j, len = strlen(message->raw), error_location_line = 0; + for (i = 0; i < len; i++) { + if (i == error_location) this_line = 1; + switch (message->raw[i]) { + case '\r': + char_len = 2; + printf("\\r"); + break; + + case '\n': + char_len = 2; + printf("\\n\n"); + + if (this_line) { + for (j = 0; j < error_location_line; j++) { + putchar(' '); + } + printf("^\n\nerror location: %d\n", error_location); + return; + } + + error_location_line = 0; + continue; + + default: + char_len = 1; + putchar(message->raw[i]); + break; + } + if (!this_line) error_location_line += char_len; + } +} + void test_message (const struct message *message) { parser_init(message->type); - http_parser_execute(&parser, message->raw, strlen(message->raw)); - assert(!http_parser_has_error(&parser)); + size_t read; - http_parser_execute(&parser, NULL, 0); - assert(!http_parser_has_error(&parser)); + read = http_parser_execute(&parser, message->raw, strlen(message->raw)); + if (read != strlen(message->raw)) { + print_error(message, read); + exit(1); + } + + read = http_parser_execute(&parser, NULL, 0); + if (read != 0) { + print_error(message, read); + exit(1); + } assert(num_messages == 1); message_eq(0, message); } -void +int test_error (const char *buf) { parser_init(HTTP_REQUEST); - http_parser_execute(&parser, buf, strlen(buf)); - http_parser_execute(&parser, NULL, 0); + size_t parsed; + + parsed = http_parser_execute(&parser, buf, strlen(buf)); + if (parsed != strlen(buf)) return 1; + parsed = http_parser_execute(&parser, NULL, 0); + if (parsed != 0) return 1; + + printf("No error found in the following: %s\n", buf); + exit(1); - assert(http_parser_has_error(&parser)); + return 0; } void @@ -710,10 +762,8 @@ test_multiple3 (const struct message *r1, const struct message *r2, const struct parser_init(HTTP_REQUEST); http_parser_execute(&parser, total, strlen(total)); - assert(!http_parser_has_error(&parser) ); http_parser_execute(&parser, NULL, 0); - assert(!http_parser_has_error(&parser) ); assert(num_messages == 3); message_eq(0, r1); @@ -773,16 +823,12 @@ test_scan (const struct message *r1, const struct message *r2, const struct mess */ http_parser_execute(&parser, buf1, buf1_len); - assert(!http_parser_has_error(&parser)); http_parser_execute(&parser, buf2, buf2_len); - assert(!http_parser_has_error(&parser)); http_parser_execute(&parser, buf3, buf3_len); - assert(!http_parser_has_error(&parser)); http_parser_execute(&parser, NULL, 0); - assert(!http_parser_has_error(&parser)); assert(3 == num_messages); @@ -797,8 +843,6 @@ test_scan (const struct message *r1, const struct message *r2, const struct mess int main (void) { - int i, j, k; - printf("sizeof(http_parser) = %d\n", sizeof(http_parser)); int request_count; @@ -808,18 +852,6 @@ main (void) for (response_count = 0; responses[response_count].name; response_count++); - //// RESPONSES - - for (i = 0; i < response_count; i++) { - test_message(&responses[i]); - } - - - - puts("responses okay"); - - - /// REQUESTS @@ -871,15 +903,19 @@ main (void) "HELLO"; test_error(bad_get_no_headers_no_body); - /* TODO sending junk and large headers gets rejected */ /* check to make sure our predefined requests are okay */ + int i; for (i = 0; requests[i].name; i++) { test_message(&requests[i]); } +#if 0 + int j, k; + + for (i = 0; i < request_count; i++) { for (j = 0; j < request_count; j++) { for (k = 0; k < request_count; k++) { @@ -910,5 +946,16 @@ main (void) puts("requests okay"); + //// RESPONSES + + for (i = 0; i < response_count; i++) { + test_message(&responses[i]); + } + + + + puts("responses okay"); + +#endif return 0; }