commit 97d4572c6828a8d34259cae6e67e490b3877088c Author: Ryan Dahl Date: Sat Apr 25 13:33:16 2009 +0200 Remove "ebb_message" object from libebb's parser. And much more: * Begin the framework for parsing HTTP responses * Modify the test code with the new layout * Remove EBB_ prefix and use just HTTP_ * Start simple new Makefile Currently the test passes but it is only checking HTTP requests. I get the feeling that it is much slower than before, although I have no solid evidence for this. diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..7df586c --- /dev/null +++ b/.gitignore @@ -0,0 +1,4 @@ +tags +*.o +test +http_parser.c diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..762c2da --- /dev/null +++ b/Makefile @@ -0,0 +1,14 @@ +test: http_parser.o test.c + gcc -g -O2 $^ -o $@ + +http_parser.o: http_parser.c http_parser.h Makefile + gcc -g -c -O2 $< + +http_parser.c: http_parser.rl Makefile + ragel -s -G2 $< -o $@ + +clean: + rm -f *.o http_parser.c test + +.PHONY: clean + diff --git a/http_parser.h b/http_parser.h new file mode 100644 index 0000000..4cc4638 --- /dev/null +++ b/http_parser.h @@ -0,0 +1,128 @@ +/* Copyright (c) 2008 Ryan Dahl (ry@tinyclouds.org) + * All rights reserved. + * + * This parser is based on code from Zed Shaw's Mongrel. + * Copyright (c) 2005 Zed A. Shaw + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ +#ifndef http_parser_h +#define http_parser_h +#ifdef __cplusplus +extern "C" { +#endif + +#include + +typedef struct http_parser http_parser; + +/* Callbacks should return non-zero to indicate an error. The parse will + * then halt execution. + * + * http_data_cb does not return data chunks. It will be call arbitrarally + * many times for each string. E.G. you might get 10 callbacks for "on_path" + * each providing just a few characters more data. + */ +typedef int (*http_data_cb) (http_parser*, const char *at, size_t length); +typedef int (*http_cb) (http_parser*); + +/* Request Methods */ +#define HTTP_COPY 0x0001 +#define HTTP_DELETE 0x0002 +#define HTTP_GET 0x0004 +#define HTTP_HEAD 0x0008 +#define HTTP_LOCK 0x0010 +#define HTTP_MKCOL 0x0020 +#define HTTP_MOVE 0x0040 +#define HTTP_OPTIONS 0x0080 +#define HTTP_POST 0x0100 +#define HTTP_PROPFIND 0x0200 +#define HTTP_PROPPATCH 0x0400 +#define HTTP_PUT 0x0800 +#define HTTP_TRACE 0x1000 +#define HTTP_UNLOCK 0x2000 +/* Transfer Encodings */ +#define HTTP_IDENTITY 0x01 +#define HTTP_CHUNKED 0x02 + +struct http_parser { + /** PRIVATE **/ + int cs; + int is_request_stream; + + size_t chunk_size; + unsigned eating:1; + size_t body_read; + + const char *header_field_mark; + const char *header_value_mark; + const char *query_string_mark; + const char *path_mark; + const char *uri_mark; + const char *fragment_mark; + + /** READ-ONLY **/ + unsigned int status_code; /* responses only */ + unsigned int method; /* requests only */ + int transfer_encoding; + unsigned int version_major; + unsigned int version_minor; + unsigned int number_of_headers; + int keep_alive; + size_t content_length; + + /** PUBLIC **/ + void *data; /* A pointer to get hook to the "connection" or "socket" object */ + + /* an ordered list of callbacks */ + + http_cb on_message_begin; + + /* requests only */ + http_data_cb on_path; + http_data_cb on_query_string; + http_data_cb on_uri; + http_data_cb on_fragment; + + http_data_cb on_header_field; + http_data_cb on_header_value; + http_cb on_headers_complete; + http_data_cb on_body; + http_cb on_message_complete; +}; + +/* Initializes an http_parser structure. The second argument specifies if + * it will be parsing requests or responses. Set the second argument to 1 + * for requests; 0 for responses. + */ +void http_parser_init (http_parser *parser, int is_request_stream); + +size_t http_parser_execute (http_parser *parser, const char *data, size_t len); + +int http_parser_has_error (http_parser *parser); + +int http_message_should_keep_alive (http_parser *message); +#define http_message_has_body(parser) \ + (parser->transfer_encoding == HTTP_CHUNKED || parser->content_length > 0 ) + +#ifdef __cplusplus +} +#endif +#endif diff --git a/http_parser.rl b/http_parser.rl new file mode 100644 index 0000000..e928902 --- /dev/null +++ b/http_parser.rl @@ -0,0 +1,381 @@ +/* Copyright (c) 2008, 2009 Ryan Dahl (ry@tinyclouds.org) + * All rights reserved. + * + * This parser is based on code from Zed Shaw's Mongrel. + * Copyright (c) 2005 Zed A. Shaw + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ +#include "http_parser.h" + +#include +#include +#include + +static int unhex[] = {-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1 + ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1 + ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1 + , 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,-1,-1,-1,-1,-1,-1 + ,-1,10,11,12,13,14,15,-1,-1,-1,-1,-1,-1,-1,-1,-1 + ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1 + ,-1,10,11,12,13,14,15,-1,-1,-1,-1,-1,-1,-1,-1,-1 + ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1 + }; +#define TRUE 1 +#define FALSE 0 +#define MIN(a,b) (a < b ? a : b) + +#define REMAINING (pe - p) +#define CALLBACK(FOR) \ + if(parser->FOR##_mark && parser->on_##FOR) { \ + parser->on_##FOR( parser \ + , parser->FOR##_mark \ + , p - parser->FOR##_mark \ + ); \ + } +#define RESET_PARSER(parser) \ + parser->chunk_size = 0; \ + parser->eating = 0; \ + parser->header_field_mark = NULL; \ + parser->header_value_mark = NULL; \ + parser->query_string_mark = NULL; \ + parser->path_mark = NULL; \ + parser->uri_mark = NULL; \ + parser->fragment_mark = NULL; \ + parser->status_code = 0; \ + parser->method = 0; \ + parser->transfer_encoding = HTTP_IDENTITY; \ + parser->version_major = 0; \ + parser->version_minor = 0; \ + parser->number_of_headers = 0; \ + parser->keep_alive = 0; \ + parser->content_length = 0; \ + parser->body_read = 0; + +#define END_REQUEST \ + if(parser->on_message_complete) { \ + parser->on_message_complete(parser); \ + } \ + RESET_PARSER(parser); + + +%%{ + machine http_parser; + + action mark_header_field { parser->header_field_mark = p; } + action mark_header_value { parser->header_value_mark = p; } + action mark_fragment { parser->fragment_mark = p; } + action mark_query_string { parser->query_string_mark = p; } + action mark_request_path { parser->path_mark = p; } + action mark_request_uri { parser->uri_mark = p; } + + action write_field { + CALLBACK(header_field); + parser->header_field_mark = NULL; + } + + action write_value { + CALLBACK(header_value); + parser->header_value_mark = NULL; + } + + action request_uri { + CALLBACK(uri); + parser->uri_mark = NULL; + } + + action fragment { + CALLBACK(fragment); + parser->fragment_mark = NULL; + } + + action query_string { + CALLBACK(query_string); + parser->query_string_mark = NULL; + } + + action request_path { + CALLBACK(path); + parser->path_mark = NULL; + } + + action content_length { + parser->content_length *= 10; + parser->content_length += *p - '0'; + } + + action status_code { + parser->status_code *= 10; + parser->status_code += *p - '0'; + } + + action use_identity_encoding { parser->transfer_encoding = HTTP_IDENTITY; } + action use_chunked_encoding { parser->transfer_encoding = HTTP_CHUNKED; } + + action set_keep_alive { parser->keep_alive = TRUE; } + action set_not_keep_alive { parser->keep_alive = FALSE; } + + action trailer { + /* not implemenetd yet. (do requests even have trailing headers?) */ + } + + action version_major { + parser->version_major *= 10; + parser->version_major += *p - '0'; + } + + action version_minor { + parser->version_minor *= 10; + parser->version_minor += *p - '0'; + } + + action headers_complete { + if(parser->on_headers_complete) + parser->on_headers_complete(parser); + } + + action add_to_chunk_size { + parser->chunk_size *= 16; + parser->chunk_size += unhex[(int)*p]; + } + + action skip_chunk_data { + skip_body(&p, parser, MIN(parser->chunk_size, REMAINING)); + fhold; + if(parser->chunk_size > REMAINING) { + fbreak; + } else { + fgoto chunk_end; + } + } + + action end_chunked_body { + END_REQUEST + fnext main; + } + + action body_logic { + if(parser->transfer_encoding == HTTP_CHUNKED) { + fnext ChunkedBody; + } else { + /* this is pretty stupid. i'd prefer to combine this with skip_chunk_data */ + parser->chunk_size = parser->content_length; + p += 1; + skip_body(&p, parser, MIN(REMAINING, parser->content_length)); + fhold; + if(parser->chunk_size > REMAINING) { + fbreak; + } + } + } + + + CRLF = "\r\n"; + +# character types + CTL = (cntrl | 127); + safe = ("$" | "-" | "_" | "."); + extra = ("!" | "*" | "'" | "(" | ")" | ","); + reserved = (";" | "/" | "?" | ":" | "@" | "&" | "=" | "+"); + unsafe = (CTL | " " | "\"" | "#" | "%" | "<" | ">"); + national = any -- (alpha | digit | reserved | extra | safe | unsafe); + unreserved = (alpha | digit | safe | extra | national); + escape = ("%" xdigit xdigit); + uchar = (unreserved | escape); + pchar = (uchar | ":" | "@" | "&" | "=" | "+"); + tspecials = ("(" | ")" | "<" | ">" | "@" | "," | ";" | ":" | "\\" | "\"" + | "/" | "[" | "]" | "?" | "=" | "{" | "}" | " " | "\t"); + +# elements + token = (ascii -- (CTL | tspecials)); + quote = "\""; +# qdtext = token -- "\""; +# quoted_pair = "\" ascii; +# quoted_string = "\"" (qdtext | quoted_pair )* "\""; + +# headers + + Method = ( "COPY" %{ parser->method = HTTP_COPY; } + | "DELETE" %{ parser->method = HTTP_DELETE; } + | "GET" %{ parser->method = HTTP_GET; } + | "HEAD" %{ parser->method = HTTP_HEAD; } + | "LOCK" %{ parser->method = HTTP_LOCK; } + | "MKCOL" %{ parser->method = HTTP_MKCOL; } + | "MOVE" %{ parser->method = HTTP_MOVE; } + | "OPTIONS" %{ parser->method = HTTP_OPTIONS; } + | "POST" %{ parser->method = HTTP_POST; } + | "PROPFIND" %{ parser->method = HTTP_PROPFIND; } + | "PROPPATCH" %{ parser->method = HTTP_PROPPATCH; } + | "PUT" %{ parser->method = HTTP_PUT; } + | "TRACE" %{ parser->method = HTTP_TRACE; } + | "UNLOCK" %{ parser->method = HTTP_UNLOCK; } + ); # Not allowing extension methods + + HTTP_Version = "HTTP/" digit+ $version_major "." digit+ $version_minor; + + scheme = ( alpha | digit | "+" | "-" | "." )* ; + absolute_uri = (scheme ":" (uchar | reserved )*); + path = ( pchar+ ( "/" pchar* )* ) ; + query = ( uchar | reserved )* >mark_query_string %query_string ; + param = ( pchar | "/" )* ; + params = ( param ( ";" param )* ) ; + rel_path = ( path? (";" params)? ) ; + absolute_path = ( "/"+ rel_path ) >mark_request_path %request_path ("?" query)?; + Request_URI = ( "*" | absolute_uri | absolute_path ) >mark_request_uri %request_uri; + Fragment = ( uchar | reserved )* >mark_fragment %fragment; + + field_name = ( token -- ":" )+; + Field_Name = field_name >mark_header_field %write_field; + + field_value = ((any - " ") any*)?; + Field_Value = field_value >mark_header_value %write_value; + + hsep = ":" " "*; + header = (field_name hsep field_value) :> CRLF; + Header = ( ("Content-Length"i hsep digit+ $content_length) + | ("Connection"i hsep + ( "Keep-Alive"i %set_keep_alive + | "close"i %set_not_keep_alive + ) + ) + | ("Transfer-Encoding"i %use_chunked_encoding hsep "identity" %use_identity_encoding) + | (Field_Name hsep Field_Value) + ) :> CRLF; + + Headers = (Header)* :> CRLF @headers_complete; + + Request_Line = ( Method " " Request_URI ("#" Fragment)? " " HTTP_Version CRLF ) ; + + StatusCode = digit digit digit $status_code; + ReasonPhrase = ascii -- ("\r" | "\n"); + StatusLine = HTTP_Version " " StatusCode " " ReasonPhrase CRLF; + +# chunked message + trailing_headers = header*; + #chunk_ext_val = token | quoted_string; + chunk_ext_val = token*; + chunk_ext_name = token*; + chunk_extension = ( ";" " "* chunk_ext_name ("=" chunk_ext_val)? )*; + last_chunk = "0"+ chunk_extension CRLF; + chunk_size = (xdigit* [1-9a-fA-F] xdigit*) $add_to_chunk_size; + chunk_end = CRLF; + chunk_body = any >skip_chunk_data; + chunk_begin = chunk_size chunk_extension CRLF; + chunk = chunk_begin chunk_body chunk_end; + ChunkedBody := chunk* last_chunk trailing_headers CRLF @end_chunked_body; + + Request = (Request_Line Headers) @body_logic; + Response = (StatusLine Headers) @body_logic; + + Requests = Request*; + Responses = Response*; + + main := Requests | Responses ; +}%% + +%% write data; + +static void +skip_body(const char **p, http_parser *parser, size_t nskip) { + if(parser->on_body && nskip > 0) { + parser->on_body(parser, *p, nskip); + } + parser->body_read += nskip; + parser->chunk_size -= nskip; + *p += nskip; + if(0 == parser->chunk_size) { + parser->eating = FALSE; + if(parser->transfer_encoding == HTTP_IDENTITY) { + END_REQUEST + } + } else { + parser->eating = TRUE; + } +} + +void http_parser_init(http_parser *parser, int is_request_stream) +{ + memset(parser, 0, sizeof(struct http_parser)); + + int cs = 0; + %% write init; + parser->cs = cs; + parser->is_request_stream = is_request_stream; + + RESET_PARSER(parser); +} + +/** exec **/ +size_t http_parser_execute(http_parser *parser, const char *buffer, size_t len) +{ + const char *p, *pe; + int cs = parser->cs; + + p = buffer; + pe = buffer+len; + + if(0 < parser->chunk_size && parser->eating) { + /* eat body */ + size_t eat = MIN(len, parser->chunk_size); + skip_body(&p, parser, eat); + } + + if(parser->header_field_mark) parser->header_field_mark = buffer; + if(parser->header_value_mark) parser->header_value_mark = buffer; + if(parser->fragment_mark) parser->fragment_mark = buffer; + if(parser->query_string_mark) parser->query_string_mark = buffer; + if(parser->path_mark) parser->path_mark = buffer; + if(parser->uri_mark) parser->uri_mark = buffer; + + %% write exec; + + parser->cs = cs; + + CALLBACK(header_field); + CALLBACK(header_value); + CALLBACK(fragment); + CALLBACK(query_string); + CALLBACK(path); + CALLBACK(uri); + + assert(p <= pe && "buffer overflow after parsing execute"); + + return(p - buffer); +} + +int http_parser_has_error(http_parser *parser) +{ + return parser->cs == http_parser_error; +} + +#if 0 +int http_should_keep_alive(http *request) +{ + if(request->keep_alive == -1) + if(request->version_major == 1) + return (request->version_minor != 0); + else if(request->version_major == 0) + return FALSE; + else + return TRUE; + else + return request->keep_alive; +} +#endif diff --git a/test.c b/test.c new file mode 100644 index 0000000..792b2df --- /dev/null +++ b/test.c @@ -0,0 +1,763 @@ +#include "http_parser.h" +#include +#include +#include +#include + +#define TRUE 1 +#define FALSE 0 + +#define MAX_HEADERS 10 +#define MAX_ELEMENT_SIZE 200 + +static http_parser parser; +struct message { + const char *raw; + int method; + char request_path[MAX_ELEMENT_SIZE]; + char request_uri[MAX_ELEMENT_SIZE]; + char fragment[MAX_ELEMENT_SIZE]; + char query_string[MAX_ELEMENT_SIZE]; + char body[MAX_ELEMENT_SIZE]; + int num_headers; + enum { NONE=0, FIELD, VALUE } last_header_element; + char headers [MAX_HEADERS][2][MAX_ELEMENT_SIZE]; + int should_keep_alive; +}; +static struct message messages[5]; +static int num_messages; + +const struct message curl_get = + { raw: "GET /test HTTP/1.1\r\n" + "User-Agent: curl/7.18.0 (i486-pc-linux-gnu) libcurl/7.18.0 OpenSSL/0.9.8g zlib/1.2.3.3 libidn/1.1\r\n" + "Host: 0.0.0.0:5000\r\n" + "Accept: */*\r\n" + "\r\n" + , should_keep_alive: TRUE + , method: HTTP_GET + , query_string: "" + , fragment: "" + , request_path: "/test" + , request_uri: "/test" + , num_headers: 3 + , headers: + { { "User-Agent", "curl/7.18.0 (i486-pc-linux-gnu) libcurl/7.18.0 OpenSSL/0.9.8g zlib/1.2.3.3 libidn/1.1" } + , { "Host", "0.0.0.0:5000" } + , { "Accept", "*/*" } + } + , body: "" + }; + +const struct message firefox_get = + { raw: "GET /favicon.ico HTTP/1.1\r\n" + "Host: 0.0.0.0:5000\r\n" + "User-Agent: Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9) Gecko/2008061015 Firefox/3.0\r\n" + "Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8\r\n" + "Accept-Language: en-us,en;q=0.5\r\n" + "Accept-Encoding: gzip,deflate\r\n" + "Accept-Charset: ISO-8859-1,utf-8;q=0.7,*;q=0.7\r\n" + "Keep-Alive: 300\r\n" + "Connection: keep-alive\r\n" + "\r\n" + , should_keep_alive: TRUE + , method: HTTP_GET + , query_string: "" + , fragment: "" + , request_path: "/favicon.ico" + , request_uri: "/favicon.ico" + , num_headers: 8 + , headers: + { { "Host", "0.0.0.0:5000" } + , { "User-Agent", "Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9) Gecko/2008061015 Firefox/3.0" } + , { "Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8" } + , { "Accept-Language", "en-us,en;q=0.5" } + , { "Accept-Encoding", "gzip,deflate" } + , { "Accept-Charset", "ISO-8859-1,utf-8;q=0.7,*;q=0.7" } + , { "Keep-Alive", "300" } + , { "Connection", "keep-alive" } + } + , body: "" + }; + +const struct message dumbfuck = + { raw: "GET /dumbfuck HTTP/1.1\r\n" + "aaaaaaaaaaaaa:++++++++++\r\n" + "\r\n" + , should_keep_alive: TRUE + , method: HTTP_GET + , query_string: "" + , fragment: "" + , request_path: "/dumbfuck" + , request_uri: "/dumbfuck" + , num_headers: 1 + , headers: + { { "aaaaaaaaaaaaa", "++++++++++" } + } + , body: "" + }; + +const struct message fragment_in_uri = + { raw: "GET /forums/1/topics/2375?page=1#posts-17408 HTTP/1.1\r\n" + "\r\n" + , should_keep_alive: TRUE + , method: HTTP_GET + , query_string: "page=1" + , fragment: "posts-17408" + , request_path: "/forums/1/topics/2375" + /* XXX request uri does not include fragment? */ + , request_uri: "/forums/1/topics/2375?page=1" + , num_headers: 0 + , body: "" + }; + +// get - no headers - no body +const struct message get_no_headers_no_body = + { raw: "GET /get_no_headers_no_body/world HTTP/1.1\r\n" + "\r\n" + , should_keep_alive: TRUE + , method: HTTP_GET + , query_string: "" + , fragment: "" + , request_path: "/get_no_headers_no_body/world" + , request_uri: "/get_no_headers_no_body/world" + , num_headers: 0 + , body: "" + }; + +// get - one header - no body +const struct message get_one_header_no_body = + { raw: "GET /get_one_header_no_body HTTP/1.1\r\n" + "Accept: */*\r\n" + "\r\n" + , should_keep_alive: TRUE + , method: HTTP_GET + , query_string: "" + , fragment: "" + , request_path: "/get_one_header_no_body" + , request_uri: "/get_one_header_no_body" + , num_headers: 1 + , headers: + { { "Accept" , "*/*" } + } + , body: "" + }; + +// get - no headers - body "HELLO" +const struct message get_funky_content_length_body_hello = + { raw: "GET /get_funky_content_length_body_hello HTTP/1.0\r\n" + "conTENT-Length: 5\r\n" + "\r\n" + "HELLO" + , should_keep_alive: FALSE + , method: HTTP_GET + , query_string: "" + , fragment: "" + , request_path: "/get_funky_content_length_body_hello" + , request_uri: "/get_funky_content_length_body_hello" + , num_headers: 1 + , headers: + { { "conTENT-Length" , "5" } + } + , body: "HELLO" + }; + +// post - one header - body "World" +const struct message post_identity_body_world = + { raw: "POST /post_identity_body_world?q=search#hey HTTP/1.1\r\n" + "Accept: */*\r\n" + "Transfer-Encoding: identity\r\n" + "Content-Length: 5\r\n" + "\r\n" + "World" + , should_keep_alive: TRUE + , method: HTTP_POST + , query_string: "q=search" + , fragment: "hey" + , request_path: "/post_identity_body_world" + , request_uri: "/post_identity_body_world?q=search" + , num_headers: 3 + , headers: + { { "Accept", "*/*" } + , { "Transfer-Encoding", "identity" } + , { "Content-Length", "5" } + } + , body: "World" + }; + +// post - no headers - chunked body "all your base are belong to us" +const struct message post_chunked_all_your_base = + { raw: "POST /post_chunked_all_your_base HTTP/1.1\r\n" + "Transfer-Encoding: chunked\r\n" + "\r\n" + "1e\r\nall your base are belong to us\r\n" + "0\r\n" + "\r\n" + , should_keep_alive: TRUE + , method: HTTP_POST + , query_string: "" + , fragment: "" + , request_path: "/post_chunked_all_your_base" + , request_uri: "/post_chunked_all_your_base" + , num_headers: 1 + , headers: + { { "Transfer-Encoding" , "chunked" } + } + , body: "all your base are belong to us" + }; + +// two chunks ; triple zero ending +const struct message two_chunks_mult_zero_end = + { raw: "POST /two_chunks_mult_zero_end HTTP/1.1\r\n" + "Transfer-Encoding: chunked\r\n" + "\r\n" + "5\r\nhello\r\n" + "6\r\n world\r\n" + "000\r\n" + "\r\n" + , should_keep_alive: TRUE + , method: HTTP_POST + , query_string: "" + , fragment: "" + , request_path: "/two_chunks_mult_zero_end" + , request_uri: "/two_chunks_mult_zero_end" + , num_headers: 1 + , headers: + { { "Transfer-Encoding", "chunked" } + } + , body: "hello world" + }; + +// chunked with trailing headers. blech. +const struct message chunked_w_trailing_headers = + { raw: "POST /chunked_w_trailing_headers HTTP/1.1\r\n" + "Transfer-Encoding: chunked\r\n" + "\r\n" + "5\r\nhello\r\n" + "6\r\n world\r\n" + "0\r\n" + "Vary: *\r\n" + "Content-Type: text/plain\r\n" + "\r\n" + , should_keep_alive: TRUE + , method: HTTP_POST + , query_string: "" + , fragment: "" + , request_path: "/chunked_w_trailing_headers" + , request_uri: "/chunked_w_trailing_headers" + , num_headers: 1 + , headers: + { { "Transfer-Encoding", "chunked" } + } + , body: "hello world" + }; + +// with bullshit after the length +const struct message chunked_w_bullshit_after_length = + { raw: "POST /chunked_w_bullshit_after_length HTTP/1.1\r\n" + "Transfer-Encoding: chunked\r\n" + "\r\n" + "5; ihatew3;whatthefuck=aretheseparametersfor\r\nhello\r\n" + "6; blahblah; blah\r\n world\r\n" + "0\r\n" + "\r\n" + , should_keep_alive: TRUE + , method: HTTP_POST + , query_string: "" + , fragment: "" + , request_path: "/chunked_w_bullshit_after_length" + , request_uri: "/chunked_w_bullshit_after_length" + , num_headers: 1 + , headers: + { { "Transfer-Encoding", "chunked" } + } + , body: "hello world" + }; + +const struct message *requests[] = + { &curl_get + , &firefox_get + , &dumbfuck + , &fragment_in_uri + , &get_no_headers_no_body + , &get_one_header_no_body + , &get_funky_content_length_body_hello + , &post_identity_body_world + , &post_chunked_all_your_base + , &two_chunks_mult_zero_end + , &chunked_w_trailing_headers + , &chunked_w_bullshit_after_length + , NULL + }; + + +int +message_eq (struct message *r1, const struct message *r2) +{ + /* + if(http_should_keep_alive(&r1->request) != r2->should_keep_alive) { + printf("requests disagree on keep-alive"); + assert(0); + return FALSE; + } + */ + + if(0 != strcmp(r1->body, r2->body)) { + printf("body '%s' != '%s'\n", r1->body, r2->body); + assert(0); + return FALSE; + } + if(0 != strcmp(r1->fragment, r2->fragment)) { + printf("fragment '%s' != '%s'\n", r1->fragment, r2->fragment); + assert(0); + return FALSE; + } + if(0 != strcmp(r1->query_string, r2->query_string)) { + printf("query_string '%s' != '%s'\n", r1->query_string, r2->query_string); + assert(0); + return FALSE; + } + if(r1->method != r2->method) { + printf("method '%d' != '%d'\n", r1->method, r2->method); + assert(0); + return FALSE; + } + if(0 != strcmp(r1->request_path, r2->request_path)) { + printf("request_path '%s' != '%s'\n", r1->request_path, r2->request_path); + assert(0); + return FALSE; + } + if(0 != strcmp(r1->request_uri, r2->request_uri)) { + printf("request_uri '%s' != '%s'\n", r1->request_uri, r2->request_uri); + assert(0); + return FALSE; + } + if(r1->num_headers != r2->num_headers) { + printf("num_headers '%d' != '%d'\n", r1->num_headers, r2->num_headers); + assert(0); + return FALSE; + } + int i; + for(i = 0; i < r1->num_headers; i++) { + if(0 != strcmp(r1->headers[i][0], r2->headers[i][0])) { + printf("header field '%s' != '%s'\n" + , r1->headers[i][0] + , r2->headers[i][0] + ); + assert(0); + return FALSE; + } + if(0 != strcmp(r1->headers[i][1], r2->headers[i][1])) { + printf("header field '%s' != '%s'\n" + , r1->headers[i][1] + , r2->headers[i][1] + ); + assert(0); + return FALSE; + } + } + return TRUE; +} + +int +request_eq (int index, const struct message *expected) +{ + return message_eq(&messages[index], expected); +} + +int request_path_cb(http_parser *_, const char *p, size_t len) +{ + strncat(messages[num_messages].request_path, p, len); + return 0; +} + +int request_uri_cb(http_parser *_, const char *p, size_t len) +{ + strncat(messages[num_messages].request_uri, p, len); + return 0; +} + +int query_string_cb(http_parser *_, const char *p, size_t len) +{ + strncat(messages[num_messages].query_string, p, len); + return 0; +} + +int fragment_cb(http_parser *_, const char *p, size_t len) +{ + strncat(messages[num_messages].fragment, p, len); + return 0; +} + +int header_field_cb(http_parser *_, const char *p, size_t len) +{ + struct message *m = &messages[num_messages]; + + if (m->last_header_element != FIELD) + m->num_headers++; + + strncat(m->headers[m->num_headers-1][0], p, len); + + m->last_header_element = FIELD; + + return 0; +} + +int header_value_cb (http_parser *_, const char *p, size_t len) +{ + struct message *m = &messages[num_messages]; + + strncat(m->headers[m->num_headers-1][1], p, len); + + m->last_header_element = VALUE; + + return 0; +} + +int body_handler (http_parser *_, const char *p, size_t len) +{ + strncat(messages[num_messages].body, p, len); + // printf("body_handler: '%s'\n", requests[num_messages].body); + return 0; +} + +int message_complete(http_parser *parser) +{ + messages[num_messages].method = parser->method; + + num_messages++; + return 0; +} + +int begin_message (http_parser *_) +{ + return 0; +} + +void +parser_init (void) +{ + num_messages = 0; + + http_parser_init(&parser, 1); + + memset(&messages, 0, sizeof messages); + + parser.on_message_begin = begin_message; + parser.on_header_field = header_field_cb; + parser.on_header_value = header_value_cb; + parser.on_path = request_path_cb; + parser.on_uri = request_uri_cb; + parser.on_fragment = fragment_cb; + parser.on_query_string = query_string_cb; + parser.on_body = body_handler; + parser.on_headers_complete = NULL; + parser.on_message_complete = message_complete; +} + +int test_request + ( const struct message *message + ) +{ + size_t traversed = 0; + parser_init(); + + traversed = http_parser_execute( &parser + , message->raw + , strlen(message->raw) + ); + if( http_parser_has_error(&parser) ) + return FALSE; + if(num_messages != 1) + return FALSE; + + return request_eq(0, message); +} + +int test_error + ( const char *buf + ) +{ + size_t traversed = 0; + parser_init(); + + traversed = http_parser_execute(&parser, buf, strlen(buf)); + + return http_parser_has_error(&parser); +} + + +int test_multiple3 + ( const struct message *r1 + , const struct message *r2 + , const struct message *r3 + ) +{ + char total[ strlen(r1->raw) + + strlen(r2->raw) + + strlen(r3->raw) + + 1 + ]; + total[0] = '\0'; + + strcat(total, r1->raw); + strcat(total, r2->raw); + strcat(total, r3->raw); + + size_t traversed = 0; + parser_init(); + + traversed = http_parser_execute(&parser, total, strlen(total)); + + if( http_parser_has_error(&parser) ) { + puts("parser error"); + return FALSE; + } + + if(num_messages != 3) { + printf("num_messages expected 3 got %d\n", num_messages); + return FALSE; + } + + if(!request_eq(0, r1)) { + puts("request 1 error."); + return FALSE; + } + if(!request_eq(1, r2)) { + puts("request 2 error."); + return FALSE; + } + if(!request_eq(2, r3)) { + puts("request 3 error."); + return FALSE; + } + + return TRUE; +} + +/** + * SCAN through every possible breaking to make sure the + * parser can handle getting the content in any chunks that + * might come from the socket + */ +int test_scan2 + ( const struct message *r1 + , const struct message *r2 + , const struct message *r3 + ) +{ + char total[80*1024] = "\0"; + char buf1[80*1024] = "\0"; + char buf2[80*1024] = "\0"; + + strcat(total, r1->raw); + strcat(total, r2->raw); + strcat(total, r3->raw); + + int total_len = strlen(total); + + //printf("total_len = %d\n", total_len); + int i; + for(i = 1; i < total_len - 1; i ++ ) { + + parser_init(); + + int buf1_len = i; + strncpy(buf1, total, buf1_len); + buf1[buf1_len] = 0; + + int buf2_len = total_len - i; + strncpy(buf2, total+i, buf2_len); + buf2[buf2_len] = 0; + + http_parser_execute(&parser, buf1, buf1_len); + + if( http_parser_has_error(&parser) ) { + return FALSE; + } + /* + if(http_parser_is_finished(&parser)) + return FALSE; + */ + + http_parser_execute(&parser, buf2, buf2_len); + + if( http_parser_has_error(&parser)) + return FALSE; + + if(3 != num_messages) { + printf("scan error: got %d requests in iteration %d\n", num_messages, i); + return FALSE; + } + + if(!request_eq(0, r1)) { + printf("not maching r1\n"); + return FALSE; + } + if(!request_eq(1, r2)) { + printf("not maching r2\n"); + return FALSE; + } + if(!request_eq(2, r3)) { + printf("not maching r3\n"); + return FALSE; + } + } + return TRUE; +} + +int test_scan3 + ( const struct message *r1 + , const struct message *r2 + , const struct message *r3 + ) +{ + char total[80*1024] = "\0"; + char buf1[80*1024] = "\0"; + char buf2[80*1024] = "\0"; + char buf3[80*1024] = "\0"; + + strcat(total, r1->raw); + strcat(total, r2->raw); + strcat(total, r3->raw); + + int total_len = strlen(total); + + //printf("total_len = %d\n", total_len); + int i,j; + for(j = 2; j < total_len - 1; j ++ ) { + for(i = 1; i < j; i ++ ) { + + parser_init(); + + + + + int buf1_len = i; + strncpy(buf1, total, buf1_len); + buf1[buf1_len] = 0; + + int buf2_len = j - i; + strncpy(buf2, total+i, buf2_len); + buf2[buf2_len] = 0; + + int buf3_len = total_len - j; + strncpy(buf3, total+j, buf3_len); + buf3[buf3_len] = 0; + + /* + printf("buf1: %s - %d\n", buf1, buf1_len); + printf("buf2: %s - %d \n", buf2, buf2_len ); + printf("buf3: %s - %d\n\n", buf3, buf3_len); + */ + + http_parser_execute(&parser, buf1, buf1_len); + + if( http_parser_has_error(&parser) ) { + return FALSE; + } + + http_parser_execute(&parser, buf2, buf2_len); + + if( http_parser_has_error(&parser) ) { + return FALSE; + } + + http_parser_execute(&parser, buf3, buf3_len); + + if( http_parser_has_error(&parser)) + return FALSE; + + if(3 != num_messages) { + printf("scan error: only got %d requests in iteration %d\n", num_messages, i); + return FALSE; + } + + if(!request_eq(0, r1)) { + printf("not maching r1\n"); + return FALSE; + } + if(!request_eq(1, r2)) { + printf("not maching r2\n"); + return FALSE; + } + if(!request_eq(2, r3)) { + printf("not maching r3\n"); + return FALSE; + } + } + } + return TRUE; +} + +int main() +{ + + assert(test_error("hello world")); + assert(test_error("GET / HTP/1.1\r\n\r\n")); + + assert(test_request(&curl_get)); + assert(test_request(&firefox_get)); + + // Zed's header tests + + assert(test_request(&dumbfuck)); + + const char *dumbfuck2 = "GET / HTTP/1.1\r\nX-SSL-Bullshit: -----BEGIN CERTIFICATE-----\r\n\tMIIFbTCCBFWgAwIBAgICH4cwDQYJKoZIhvcNAQEFBQAwcDELMAkGA1UEBhMCVUsx\r\n\tETAPBgNVBAoTCGVTY2llbmNlMRIwEAYDVQQLEwlBdXRob3JpdHkxCzAJBgNVBAMT\r\n\tAkNBMS0wKwYJKoZIhvcNAQkBFh5jYS1vcGVyYXRvckBncmlkLXN1cHBvcnQuYWMu\r\n\tdWswHhcNMDYwNzI3MTQxMzI4WhcNMDcwNzI3MTQxMzI4WjBbMQswCQYDVQQGEwJV\r\n\tSzERMA8GA1UEChMIZVNjaWVuY2UxEzARBgNVBAsTCk1hbmNoZXN0ZXIxCzAJBgNV\r\n\tBAcTmrsogriqMWLAk1DMRcwFQYDVQQDEw5taWNoYWVsIHBhcmQYJKoZIhvcNAQEB\r\n\tBQADggEPADCCAQoCggEBANPEQBgl1IaKdSS1TbhF3hEXSl72G9J+WC/1R64fAcEF\r\n\tW51rEyFYiIeZGx/BVzwXbeBoNUK41OK65sxGuflMo5gLflbwJtHBRIEKAfVVp3YR\r\n\tgW7cMA/s/XKgL1GEC7rQw8lIZT8RApukCGqOVHSi/F1SiFlPDxuDfmdiNzL31+sL\r\n\t0iwHDdNkGjy5pyBSB8Y79dsSJtCW/iaLB0/n8Sj7HgvvZJ7x0fr+RQjYOUUfrePP\r\n\tu2MSpFyf+9BbC/aXgaZuiCvSR+8Snv3xApQY+fULK/xY8h8Ua51iXoQ5jrgu2SqR\r\n\twgA7BUi3G8LFzMBl8FRCDYGUDy7M6QaHXx1ZWIPWNKsCAwEAAaOCAiQwggIgMAwG\r\n\tA1UdEwEB/wQCMAAwEQYJYIZIAYb4QgHTTPAQDAgWgMA4GA1UdDwEB/wQEAwID6DAs\r\n\tBglghkgBhvhCAQ0EHxYdVUsgZS1TY2llbmNlIFVzZXIgQ2VydGlmaWNhdGUwHQYD\r\n\tVR0OBBYEFDTt/sf9PeMaZDHkUIldrDYMNTBZMIGaBgNVHSMEgZIwgY+AFAI4qxGj\r\n\tloCLDdMVKwiljjDastqooXSkcjBwMQswCQYDVQQGEwJVSzERMA8GA1UEChMIZVNj\r\n\taWVuY2UxEjAQBgNVBAsTCUF1dGhvcml0eTELMAkGA1UEAxMCQ0ExLTArBgkqhkiG\r\n\t9w0BCQEWHmNhLW9wZXJhdG9yQGdyaWQtc3VwcG9ydC5hYy51a4IBADApBgNVHRIE\r\n\tIjAggR5jYS1vcGVyYXRvckBncmlkLXN1cHBvcnQuYWMudWswGQYDVR0gBBIwEDAO\r\n\tBgwrBgEEAdkvAQEBAQYwPQYJYIZIAYb4QgEEBDAWLmh0dHA6Ly9jYS5ncmlkLXN1\r\n\tcHBvcnQuYWMudmT4sopwqlBWsvcHViL2NybC9jYWNybC5jcmwwPQYJYIZIAYb4QgEDBDAWLmh0\r\n\tdHA6Ly9jYS5ncmlkLXN1cHBvcnQuYWMudWsvcHViL2NybC9jYWNybC5jcmwwPwYD\r\n\tVR0fBDgwNjA0oDKgMIYuaHR0cDovL2NhLmdyaWQt5hYy51ay9wdWIv\r\n\tY3JsL2NhY3JsLmNybDANBgkqhkiG9w0BAQUFAAOCAQEAS/U4iiooBENGW/Hwmmd3\r\n\tXCy6Zrt08YjKCzGNjorT98g8uGsqYjSxv/hmi0qlnlHs+k/3Iobc3LjS5AMYr5L8\r\n\tUO7OSkgFFlLHQyC9JzPfmLCAugvzEbyv4Olnsr8hbxF1MbKZoQxUZtMVu29wjfXk\r\n\thTeApBv7eaKCWpSp7MCbvgzm74izKhu3vlDk9w6qVrxePfGgpKPqfHiOoGhFnbTK\r\n\twTC6o2xq5y0qZ03JonF7OJspEd3I5zKY3E+ov7/ZhW6DqT8UFvsAdjvQbXyhV8Eu\r\n\tYhixw1aKEPzNjNowuIseVogKOLXxWI5vAi5HgXdS0/ES5gDGsABo4fqovUKlgop3\r\n\tRA==\r\n\t-----END CERTIFICATE-----\r\n\r\n"; + assert(test_error(dumbfuck2)); + + assert(test_request(&fragment_in_uri)); + + /* TODO sending junk and large headers gets rejected */ + + + /* check to make sure our predefined requests are okay */ + + assert(test_request(&get_no_headers_no_body)); + assert(test_request(&get_one_header_no_body)); + assert(test_request(&get_no_headers_no_body)); + + // no content-length + const char *bad_get_no_headers_no_body = "GET /bad_get_no_headers_no_body/world HTTP/1.1\r\nAccept: */*\r\nHELLO\r\n"; + assert(test_error(bad_get_no_headers_no_body)); // error if there is a body without content length + + assert(test_request(&get_funky_content_length_body_hello)); + assert(test_request(&post_identity_body_world)); + assert(test_request(&post_chunked_all_your_base)); + assert(test_request(&two_chunks_mult_zero_end)); + assert(test_request(&chunked_w_trailing_headers)); + + assert(test_request(&chunked_w_bullshit_after_length)); + /* + assert(1 == messages[0].version_major); + assert(1 == messages[0].version_minor); + */ + + // three requests - no bodies + assert( test_multiple3( &get_no_headers_no_body + , &get_one_header_no_body + , &get_no_headers_no_body + )); + + // three requests - one body + assert( test_multiple3(&get_no_headers_no_body, &get_funky_content_length_body_hello, &get_no_headers_no_body)); + + // three requests with bodies -- last is chunked + assert( test_multiple3(&get_funky_content_length_body_hello, &post_identity_body_world, &post_chunked_all_your_base)); + + // three chunked requests + assert( test_multiple3(&two_chunks_mult_zero_end, &post_chunked_all_your_base, &chunked_w_trailing_headers)); + + + assert(test_scan2(&get_no_headers_no_body, &get_one_header_no_body, &get_no_headers_no_body)); + assert(test_scan2(&get_funky_content_length_body_hello, &post_identity_body_world, &post_chunked_all_your_base)); + assert(test_scan2(&two_chunks_mult_zero_end, &chunked_w_trailing_headers, &chunked_w_bullshit_after_length)); + + assert(test_scan3(&get_no_headers_no_body, &get_one_header_no_body, &get_no_headers_no_body)); + assert(test_scan3(&get_funky_content_length_body_hello, &post_identity_body_world, &post_chunked_all_your_base)); + assert(test_scan3(&two_chunks_mult_zero_end, &chunked_w_trailing_headers, &chunked_w_bullshit_after_length)); + + + printf("okay\n"); + return 0; +} +