Remove "ebb_message" object from libebb's parser.

And much more:
 * Begin the framework for parsing HTTP responses
 * Modify the test code with the new layout
 * Remove EBB_ prefix and use just HTTP_
 * Start simple new Makefile

Currently the test passes but it is only checking HTTP requests. I get the
feeling that it is much slower than before, although I have no solid
evidence for this.
version0.2
Ryan 16 years ago
commit 97d4572c68

4
.gitignore vendored

@ -0,0 +1,4 @@
tags
*.o
test
http_parser.c

@ -0,0 +1,14 @@
test: http_parser.o test.c
gcc -g -O2 $^ -o $@
http_parser.o: http_parser.c http_parser.h Makefile
gcc -g -c -O2 $<
http_parser.c: http_parser.rl Makefile
ragel -s -G2 $< -o $@
clean:
rm -f *.o http_parser.c test
.PHONY: clean

@ -0,0 +1,128 @@
/* Copyright (c) 2008 Ryan Dahl (ry@tinyclouds.org)
* All rights reserved.
*
* This parser is based on code from Zed Shaw's Mongrel.
* Copyright (c) 2005 Zed A. Shaw
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#ifndef http_parser_h
#define http_parser_h
#ifdef __cplusplus
extern "C" {
#endif
#include <sys/types.h>
typedef struct http_parser http_parser;
/* Callbacks should return non-zero to indicate an error. The parse will
* then halt execution.
*
* http_data_cb does not return data chunks. It will be call arbitrarally
* many times for each string. E.G. you might get 10 callbacks for "on_path"
* each providing just a few characters more data.
*/
typedef int (*http_data_cb) (http_parser*, const char *at, size_t length);
typedef int (*http_cb) (http_parser*);
/* Request Methods */
#define HTTP_COPY 0x0001
#define HTTP_DELETE 0x0002
#define HTTP_GET 0x0004
#define HTTP_HEAD 0x0008
#define HTTP_LOCK 0x0010
#define HTTP_MKCOL 0x0020
#define HTTP_MOVE 0x0040
#define HTTP_OPTIONS 0x0080
#define HTTP_POST 0x0100
#define HTTP_PROPFIND 0x0200
#define HTTP_PROPPATCH 0x0400
#define HTTP_PUT 0x0800
#define HTTP_TRACE 0x1000
#define HTTP_UNLOCK 0x2000
/* Transfer Encodings */
#define HTTP_IDENTITY 0x01
#define HTTP_CHUNKED 0x02
struct http_parser {
/** PRIVATE **/
int cs;
int is_request_stream;
size_t chunk_size;
unsigned eating:1;
size_t body_read;
const char *header_field_mark;
const char *header_value_mark;
const char *query_string_mark;
const char *path_mark;
const char *uri_mark;
const char *fragment_mark;
/** READ-ONLY **/
unsigned int status_code; /* responses only */
unsigned int method; /* requests only */
int transfer_encoding;
unsigned int version_major;
unsigned int version_minor;
unsigned int number_of_headers;
int keep_alive;
size_t content_length;
/** PUBLIC **/
void *data; /* A pointer to get hook to the "connection" or "socket" object */
/* an ordered list of callbacks */
http_cb on_message_begin;
/* requests only */
http_data_cb on_path;
http_data_cb on_query_string;
http_data_cb on_uri;
http_data_cb on_fragment;
http_data_cb on_header_field;
http_data_cb on_header_value;
http_cb on_headers_complete;
http_data_cb on_body;
http_cb on_message_complete;
};
/* Initializes an http_parser structure. The second argument specifies if
* it will be parsing requests or responses. Set the second argument to 1
* for requests; 0 for responses.
*/
void http_parser_init (http_parser *parser, int is_request_stream);
size_t http_parser_execute (http_parser *parser, const char *data, size_t len);
int http_parser_has_error (http_parser *parser);
int http_message_should_keep_alive (http_parser *message);
#define http_message_has_body(parser) \
(parser->transfer_encoding == HTTP_CHUNKED || parser->content_length > 0 )
#ifdef __cplusplus
}
#endif
#endif

@ -0,0 +1,381 @@
/* Copyright (c) 2008, 2009 Ryan Dahl (ry@tinyclouds.org)
* All rights reserved.
*
* This parser is based on code from Zed Shaw's Mongrel.
* Copyright (c) 2005 Zed A. Shaw
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#include "http_parser.h"
#include <stdio.h>
#include <assert.h>
#include <string.h>
static int unhex[] = {-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,-1,-1,-1,-1,-1,-1
,-1,10,11,12,13,14,15,-1,-1,-1,-1,-1,-1,-1,-1,-1
,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
,-1,10,11,12,13,14,15,-1,-1,-1,-1,-1,-1,-1,-1,-1
,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
};
#define TRUE 1
#define FALSE 0
#define MIN(a,b) (a < b ? a : b)
#define REMAINING (pe - p)
#define CALLBACK(FOR) \
if(parser->FOR##_mark && parser->on_##FOR) { \
parser->on_##FOR( parser \
, parser->FOR##_mark \
, p - parser->FOR##_mark \
); \
}
#define RESET_PARSER(parser) \
parser->chunk_size = 0; \
parser->eating = 0; \
parser->header_field_mark = NULL; \
parser->header_value_mark = NULL; \
parser->query_string_mark = NULL; \
parser->path_mark = NULL; \
parser->uri_mark = NULL; \
parser->fragment_mark = NULL; \
parser->status_code = 0; \
parser->method = 0; \
parser->transfer_encoding = HTTP_IDENTITY; \
parser->version_major = 0; \
parser->version_minor = 0; \
parser->number_of_headers = 0; \
parser->keep_alive = 0; \
parser->content_length = 0; \
parser->body_read = 0;
#define END_REQUEST \
if(parser->on_message_complete) { \
parser->on_message_complete(parser); \
} \
RESET_PARSER(parser);
%%{
machine http_parser;
action mark_header_field { parser->header_field_mark = p; }
action mark_header_value { parser->header_value_mark = p; }
action mark_fragment { parser->fragment_mark = p; }
action mark_query_string { parser->query_string_mark = p; }
action mark_request_path { parser->path_mark = p; }
action mark_request_uri { parser->uri_mark = p; }
action write_field {
CALLBACK(header_field);
parser->header_field_mark = NULL;
}
action write_value {
CALLBACK(header_value);
parser->header_value_mark = NULL;
}
action request_uri {
CALLBACK(uri);
parser->uri_mark = NULL;
}
action fragment {
CALLBACK(fragment);
parser->fragment_mark = NULL;
}
action query_string {
CALLBACK(query_string);
parser->query_string_mark = NULL;
}
action request_path {
CALLBACK(path);
parser->path_mark = NULL;
}
action content_length {
parser->content_length *= 10;
parser->content_length += *p - '0';
}
action status_code {
parser->status_code *= 10;
parser->status_code += *p - '0';
}
action use_identity_encoding { parser->transfer_encoding = HTTP_IDENTITY; }
action use_chunked_encoding { parser->transfer_encoding = HTTP_CHUNKED; }
action set_keep_alive { parser->keep_alive = TRUE; }
action set_not_keep_alive { parser->keep_alive = FALSE; }
action trailer {
/* not implemenetd yet. (do requests even have trailing headers?) */
}
action version_major {
parser->version_major *= 10;
parser->version_major += *p - '0';
}
action version_minor {
parser->version_minor *= 10;
parser->version_minor += *p - '0';
}
action headers_complete {
if(parser->on_headers_complete)
parser->on_headers_complete(parser);
}
action add_to_chunk_size {
parser->chunk_size *= 16;
parser->chunk_size += unhex[(int)*p];
}
action skip_chunk_data {
skip_body(&p, parser, MIN(parser->chunk_size, REMAINING));
fhold;
if(parser->chunk_size > REMAINING) {
fbreak;
} else {
fgoto chunk_end;
}
}
action end_chunked_body {
END_REQUEST
fnext main;
}
action body_logic {
if(parser->transfer_encoding == HTTP_CHUNKED) {
fnext ChunkedBody;
} else {
/* this is pretty stupid. i'd prefer to combine this with skip_chunk_data */
parser->chunk_size = parser->content_length;
p += 1;
skip_body(&p, parser, MIN(REMAINING, parser->content_length));
fhold;
if(parser->chunk_size > REMAINING) {
fbreak;
}
}
}
CRLF = "\r\n";
# character types
CTL = (cntrl | 127);
safe = ("$" | "-" | "_" | ".");
extra = ("!" | "*" | "'" | "(" | ")" | ",");
reserved = (";" | "/" | "?" | ":" | "@" | "&" | "=" | "+");
unsafe = (CTL | " " | "\"" | "#" | "%" | "<" | ">");
national = any -- (alpha | digit | reserved | extra | safe | unsafe);
unreserved = (alpha | digit | safe | extra | national);
escape = ("%" xdigit xdigit);
uchar = (unreserved | escape);
pchar = (uchar | ":" | "@" | "&" | "=" | "+");
tspecials = ("(" | ")" | "<" | ">" | "@" | "," | ";" | ":" | "\\" | "\""
| "/" | "[" | "]" | "?" | "=" | "{" | "}" | " " | "\t");
# elements
token = (ascii -- (CTL | tspecials));
quote = "\"";
# qdtext = token -- "\"";
# quoted_pair = "\" ascii;
# quoted_string = "\"" (qdtext | quoted_pair )* "\"";
# headers
Method = ( "COPY" %{ parser->method = HTTP_COPY; }
| "DELETE" %{ parser->method = HTTP_DELETE; }
| "GET" %{ parser->method = HTTP_GET; }
| "HEAD" %{ parser->method = HTTP_HEAD; }
| "LOCK" %{ parser->method = HTTP_LOCK; }
| "MKCOL" %{ parser->method = HTTP_MKCOL; }
| "MOVE" %{ parser->method = HTTP_MOVE; }
| "OPTIONS" %{ parser->method = HTTP_OPTIONS; }
| "POST" %{ parser->method = HTTP_POST; }
| "PROPFIND" %{ parser->method = HTTP_PROPFIND; }
| "PROPPATCH" %{ parser->method = HTTP_PROPPATCH; }
| "PUT" %{ parser->method = HTTP_PUT; }
| "TRACE" %{ parser->method = HTTP_TRACE; }
| "UNLOCK" %{ parser->method = HTTP_UNLOCK; }
); # Not allowing extension methods
HTTP_Version = "HTTP/" digit+ $version_major "." digit+ $version_minor;
scheme = ( alpha | digit | "+" | "-" | "." )* ;
absolute_uri = (scheme ":" (uchar | reserved )*);
path = ( pchar+ ( "/" pchar* )* ) ;
query = ( uchar | reserved )* >mark_query_string %query_string ;
param = ( pchar | "/" )* ;
params = ( param ( ";" param )* ) ;
rel_path = ( path? (";" params)? ) ;
absolute_path = ( "/"+ rel_path ) >mark_request_path %request_path ("?" query)?;
Request_URI = ( "*" | absolute_uri | absolute_path ) >mark_request_uri %request_uri;
Fragment = ( uchar | reserved )* >mark_fragment %fragment;
field_name = ( token -- ":" )+;
Field_Name = field_name >mark_header_field %write_field;
field_value = ((any - " ") any*)?;
Field_Value = field_value >mark_header_value %write_value;
hsep = ":" " "*;
header = (field_name hsep field_value) :> CRLF;
Header = ( ("Content-Length"i hsep digit+ $content_length)
| ("Connection"i hsep
( "Keep-Alive"i %set_keep_alive
| "close"i %set_not_keep_alive
)
)
| ("Transfer-Encoding"i %use_chunked_encoding hsep "identity" %use_identity_encoding)
| (Field_Name hsep Field_Value)
) :> CRLF;
Headers = (Header)* :> CRLF @headers_complete;
Request_Line = ( Method " " Request_URI ("#" Fragment)? " " HTTP_Version CRLF ) ;
StatusCode = digit digit digit $status_code;
ReasonPhrase = ascii -- ("\r" | "\n");
StatusLine = HTTP_Version " " StatusCode " " ReasonPhrase CRLF;
# chunked message
trailing_headers = header*;
#chunk_ext_val = token | quoted_string;
chunk_ext_val = token*;
chunk_ext_name = token*;
chunk_extension = ( ";" " "* chunk_ext_name ("=" chunk_ext_val)? )*;
last_chunk = "0"+ chunk_extension CRLF;
chunk_size = (xdigit* [1-9a-fA-F] xdigit*) $add_to_chunk_size;
chunk_end = CRLF;
chunk_body = any >skip_chunk_data;
chunk_begin = chunk_size chunk_extension CRLF;
chunk = chunk_begin chunk_body chunk_end;
ChunkedBody := chunk* last_chunk trailing_headers CRLF @end_chunked_body;
Request = (Request_Line Headers) @body_logic;
Response = (StatusLine Headers) @body_logic;
Requests = Request*;
Responses = Response*;
main := Requests | Responses ;
}%%
%% write data;
static void
skip_body(const char **p, http_parser *parser, size_t nskip) {
if(parser->on_body && nskip > 0) {
parser->on_body(parser, *p, nskip);
}
parser->body_read += nskip;
parser->chunk_size -= nskip;
*p += nskip;
if(0 == parser->chunk_size) {
parser->eating = FALSE;
if(parser->transfer_encoding == HTTP_IDENTITY) {
END_REQUEST
}
} else {
parser->eating = TRUE;
}
}
void http_parser_init(http_parser *parser, int is_request_stream)
{
memset(parser, 0, sizeof(struct http_parser));
int cs = 0;
%% write init;
parser->cs = cs;
parser->is_request_stream = is_request_stream;
RESET_PARSER(parser);
}
/** exec **/
size_t http_parser_execute(http_parser *parser, const char *buffer, size_t len)
{
const char *p, *pe;
int cs = parser->cs;
p = buffer;
pe = buffer+len;
if(0 < parser->chunk_size && parser->eating) {
/* eat body */
size_t eat = MIN(len, parser->chunk_size);
skip_body(&p, parser, eat);
}
if(parser->header_field_mark) parser->header_field_mark = buffer;
if(parser->header_value_mark) parser->header_value_mark = buffer;
if(parser->fragment_mark) parser->fragment_mark = buffer;
if(parser->query_string_mark) parser->query_string_mark = buffer;
if(parser->path_mark) parser->path_mark = buffer;
if(parser->uri_mark) parser->uri_mark = buffer;
%% write exec;
parser->cs = cs;
CALLBACK(header_field);
CALLBACK(header_value);
CALLBACK(fragment);
CALLBACK(query_string);
CALLBACK(path);
CALLBACK(uri);
assert(p <= pe && "buffer overflow after parsing execute");
return(p - buffer);
}
int http_parser_has_error(http_parser *parser)
{
return parser->cs == http_parser_error;
}
#if 0
int http_should_keep_alive(http *request)
{
if(request->keep_alive == -1)
if(request->version_major == 1)
return (request->version_minor != 0);
else if(request->version_major == 0)
return FALSE;
else
return TRUE;
else
return request->keep_alive;
}
#endif

763
test.c

@ -0,0 +1,763 @@
#include "http_parser.h"
#include <stdlib.h>
#include <assert.h>
#include <stdio.h>
#include <string.h>
#define TRUE 1
#define FALSE 0
#define MAX_HEADERS 10
#define MAX_ELEMENT_SIZE 200
static http_parser parser;
struct message {
const char *raw;
int method;
char request_path[MAX_ELEMENT_SIZE];
char request_uri[MAX_ELEMENT_SIZE];
char fragment[MAX_ELEMENT_SIZE];
char query_string[MAX_ELEMENT_SIZE];
char body[MAX_ELEMENT_SIZE];
int num_headers;
enum { NONE=0, FIELD, VALUE } last_header_element;
char headers [MAX_HEADERS][2][MAX_ELEMENT_SIZE];
int should_keep_alive;
};
static struct message messages[5];
static int num_messages;
const struct message curl_get =
{ raw: "GET /test HTTP/1.1\r\n"
"User-Agent: curl/7.18.0 (i486-pc-linux-gnu) libcurl/7.18.0 OpenSSL/0.9.8g zlib/1.2.3.3 libidn/1.1\r\n"
"Host: 0.0.0.0:5000\r\n"
"Accept: */*\r\n"
"\r\n"
, should_keep_alive: TRUE
, method: HTTP_GET
, query_string: ""
, fragment: ""
, request_path: "/test"
, request_uri: "/test"
, num_headers: 3
, headers:
{ { "User-Agent", "curl/7.18.0 (i486-pc-linux-gnu) libcurl/7.18.0 OpenSSL/0.9.8g zlib/1.2.3.3 libidn/1.1" }
, { "Host", "0.0.0.0:5000" }
, { "Accept", "*/*" }
}
, body: ""
};
const struct message firefox_get =
{ raw: "GET /favicon.ico HTTP/1.1\r\n"
"Host: 0.0.0.0:5000\r\n"
"User-Agent: Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9) Gecko/2008061015 Firefox/3.0\r\n"
"Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8\r\n"
"Accept-Language: en-us,en;q=0.5\r\n"
"Accept-Encoding: gzip,deflate\r\n"
"Accept-Charset: ISO-8859-1,utf-8;q=0.7,*;q=0.7\r\n"
"Keep-Alive: 300\r\n"
"Connection: keep-alive\r\n"
"\r\n"
, should_keep_alive: TRUE
, method: HTTP_GET
, query_string: ""
, fragment: ""
, request_path: "/favicon.ico"
, request_uri: "/favicon.ico"
, num_headers: 8
, headers:
{ { "Host", "0.0.0.0:5000" }
, { "User-Agent", "Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9) Gecko/2008061015 Firefox/3.0" }
, { "Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8" }
, { "Accept-Language", "en-us,en;q=0.5" }
, { "Accept-Encoding", "gzip,deflate" }
, { "Accept-Charset", "ISO-8859-1,utf-8;q=0.7,*;q=0.7" }
, { "Keep-Alive", "300" }
, { "Connection", "keep-alive" }
}
, body: ""
};
const struct message dumbfuck =
{ raw: "GET /dumbfuck HTTP/1.1\r\n"
"aaaaaaaaaaaaa:++++++++++\r\n"
"\r\n"
, should_keep_alive: TRUE
, method: HTTP_GET
, query_string: ""
, fragment: ""
, request_path: "/dumbfuck"
, request_uri: "/dumbfuck"
, num_headers: 1
, headers:
{ { "aaaaaaaaaaaaa", "++++++++++" }
}
, body: ""
};
const struct message fragment_in_uri =
{ raw: "GET /forums/1/topics/2375?page=1#posts-17408 HTTP/1.1\r\n"
"\r\n"
, should_keep_alive: TRUE
, method: HTTP_GET
, query_string: "page=1"
, fragment: "posts-17408"
, request_path: "/forums/1/topics/2375"
/* XXX request uri does not include fragment? */
, request_uri: "/forums/1/topics/2375?page=1"
, num_headers: 0
, body: ""
};
// get - no headers - no body
const struct message get_no_headers_no_body =
{ raw: "GET /get_no_headers_no_body/world HTTP/1.1\r\n"
"\r\n"
, should_keep_alive: TRUE
, method: HTTP_GET
, query_string: ""
, fragment: ""
, request_path: "/get_no_headers_no_body/world"
, request_uri: "/get_no_headers_no_body/world"
, num_headers: 0
, body: ""
};
// get - one header - no body
const struct message get_one_header_no_body =
{ raw: "GET /get_one_header_no_body HTTP/1.1\r\n"
"Accept: */*\r\n"
"\r\n"
, should_keep_alive: TRUE
, method: HTTP_GET
, query_string: ""
, fragment: ""
, request_path: "/get_one_header_no_body"
, request_uri: "/get_one_header_no_body"
, num_headers: 1
, headers:
{ { "Accept" , "*/*" }
}
, body: ""
};
// get - no headers - body "HELLO"
const struct message get_funky_content_length_body_hello =
{ raw: "GET /get_funky_content_length_body_hello HTTP/1.0\r\n"
"conTENT-Length: 5\r\n"
"\r\n"
"HELLO"
, should_keep_alive: FALSE
, method: HTTP_GET
, query_string: ""
, fragment: ""
, request_path: "/get_funky_content_length_body_hello"
, request_uri: "/get_funky_content_length_body_hello"
, num_headers: 1
, headers:
{ { "conTENT-Length" , "5" }
}
, body: "HELLO"
};
// post - one header - body "World"
const struct message post_identity_body_world =
{ raw: "POST /post_identity_body_world?q=search#hey HTTP/1.1\r\n"
"Accept: */*\r\n"
"Transfer-Encoding: identity\r\n"
"Content-Length: 5\r\n"
"\r\n"
"World"
, should_keep_alive: TRUE
, method: HTTP_POST
, query_string: "q=search"
, fragment: "hey"
, request_path: "/post_identity_body_world"
, request_uri: "/post_identity_body_world?q=search"
, num_headers: 3
, headers:
{ { "Accept", "*/*" }
, { "Transfer-Encoding", "identity" }
, { "Content-Length", "5" }
}
, body: "World"
};
// post - no headers - chunked body "all your base are belong to us"
const struct message post_chunked_all_your_base =
{ raw: "POST /post_chunked_all_your_base HTTP/1.1\r\n"
"Transfer-Encoding: chunked\r\n"
"\r\n"
"1e\r\nall your base are belong to us\r\n"
"0\r\n"
"\r\n"
, should_keep_alive: TRUE
, method: HTTP_POST
, query_string: ""
, fragment: ""
, request_path: "/post_chunked_all_your_base"
, request_uri: "/post_chunked_all_your_base"
, num_headers: 1
, headers:
{ { "Transfer-Encoding" , "chunked" }
}
, body: "all your base are belong to us"
};
// two chunks ; triple zero ending
const struct message two_chunks_mult_zero_end =
{ raw: "POST /two_chunks_mult_zero_end HTTP/1.1\r\n"
"Transfer-Encoding: chunked\r\n"
"\r\n"
"5\r\nhello\r\n"
"6\r\n world\r\n"
"000\r\n"
"\r\n"
, should_keep_alive: TRUE
, method: HTTP_POST
, query_string: ""
, fragment: ""
, request_path: "/two_chunks_mult_zero_end"
, request_uri: "/two_chunks_mult_zero_end"
, num_headers: 1
, headers:
{ { "Transfer-Encoding", "chunked" }
}
, body: "hello world"
};
// chunked with trailing headers. blech.
const struct message chunked_w_trailing_headers =
{ raw: "POST /chunked_w_trailing_headers HTTP/1.1\r\n"
"Transfer-Encoding: chunked\r\n"
"\r\n"
"5\r\nhello\r\n"
"6\r\n world\r\n"
"0\r\n"
"Vary: *\r\n"
"Content-Type: text/plain\r\n"
"\r\n"
, should_keep_alive: TRUE
, method: HTTP_POST
, query_string: ""
, fragment: ""
, request_path: "/chunked_w_trailing_headers"
, request_uri: "/chunked_w_trailing_headers"
, num_headers: 1
, headers:
{ { "Transfer-Encoding", "chunked" }
}
, body: "hello world"
};
// with bullshit after the length
const struct message chunked_w_bullshit_after_length =
{ raw: "POST /chunked_w_bullshit_after_length HTTP/1.1\r\n"
"Transfer-Encoding: chunked\r\n"
"\r\n"
"5; ihatew3;whatthefuck=aretheseparametersfor\r\nhello\r\n"
"6; blahblah; blah\r\n world\r\n"
"0\r\n"
"\r\n"
, should_keep_alive: TRUE
, method: HTTP_POST
, query_string: ""
, fragment: ""
, request_path: "/chunked_w_bullshit_after_length"
, request_uri: "/chunked_w_bullshit_after_length"
, num_headers: 1
, headers:
{ { "Transfer-Encoding", "chunked" }
}
, body: "hello world"
};
const struct message *requests[] =
{ &curl_get
, &firefox_get
, &dumbfuck
, &fragment_in_uri
, &get_no_headers_no_body
, &get_one_header_no_body
, &get_funky_content_length_body_hello
, &post_identity_body_world
, &post_chunked_all_your_base
, &two_chunks_mult_zero_end
, &chunked_w_trailing_headers
, &chunked_w_bullshit_after_length
, NULL
};
int
message_eq (struct message *r1, const struct message *r2)
{
/*
if(http_should_keep_alive(&r1->request) != r2->should_keep_alive) {
printf("requests disagree on keep-alive");
assert(0);
return FALSE;
}
*/
if(0 != strcmp(r1->body, r2->body)) {
printf("body '%s' != '%s'\n", r1->body, r2->body);
assert(0);
return FALSE;
}
if(0 != strcmp(r1->fragment, r2->fragment)) {
printf("fragment '%s' != '%s'\n", r1->fragment, r2->fragment);
assert(0);
return FALSE;
}
if(0 != strcmp(r1->query_string, r2->query_string)) {
printf("query_string '%s' != '%s'\n", r1->query_string, r2->query_string);
assert(0);
return FALSE;
}
if(r1->method != r2->method) {
printf("method '%d' != '%d'\n", r1->method, r2->method);
assert(0);
return FALSE;
}
if(0 != strcmp(r1->request_path, r2->request_path)) {
printf("request_path '%s' != '%s'\n", r1->request_path, r2->request_path);
assert(0);
return FALSE;
}
if(0 != strcmp(r1->request_uri, r2->request_uri)) {
printf("request_uri '%s' != '%s'\n", r1->request_uri, r2->request_uri);
assert(0);
return FALSE;
}
if(r1->num_headers != r2->num_headers) {
printf("num_headers '%d' != '%d'\n", r1->num_headers, r2->num_headers);
assert(0);
return FALSE;
}
int i;
for(i = 0; i < r1->num_headers; i++) {
if(0 != strcmp(r1->headers[i][0], r2->headers[i][0])) {
printf("header field '%s' != '%s'\n"
, r1->headers[i][0]
, r2->headers[i][0]
);
assert(0);
return FALSE;
}
if(0 != strcmp(r1->headers[i][1], r2->headers[i][1])) {
printf("header field '%s' != '%s'\n"
, r1->headers[i][1]
, r2->headers[i][1]
);
assert(0);
return FALSE;
}
}
return TRUE;
}
int
request_eq (int index, const struct message *expected)
{
return message_eq(&messages[index], expected);
}
int request_path_cb(http_parser *_, const char *p, size_t len)
{
strncat(messages[num_messages].request_path, p, len);
return 0;
}
int request_uri_cb(http_parser *_, const char *p, size_t len)
{
strncat(messages[num_messages].request_uri, p, len);
return 0;
}
int query_string_cb(http_parser *_, const char *p, size_t len)
{
strncat(messages[num_messages].query_string, p, len);
return 0;
}
int fragment_cb(http_parser *_, const char *p, size_t len)
{
strncat(messages[num_messages].fragment, p, len);
return 0;
}
int header_field_cb(http_parser *_, const char *p, size_t len)
{
struct message *m = &messages[num_messages];
if (m->last_header_element != FIELD)
m->num_headers++;
strncat(m->headers[m->num_headers-1][0], p, len);
m->last_header_element = FIELD;
return 0;
}
int header_value_cb (http_parser *_, const char *p, size_t len)
{
struct message *m = &messages[num_messages];
strncat(m->headers[m->num_headers-1][1], p, len);
m->last_header_element = VALUE;
return 0;
}
int body_handler (http_parser *_, const char *p, size_t len)
{
strncat(messages[num_messages].body, p, len);
// printf("body_handler: '%s'\n", requests[num_messages].body);
return 0;
}
int message_complete(http_parser *parser)
{
messages[num_messages].method = parser->method;
num_messages++;
return 0;
}
int begin_message (http_parser *_)
{
return 0;
}
void
parser_init (void)
{
num_messages = 0;
http_parser_init(&parser, 1);
memset(&messages, 0, sizeof messages);
parser.on_message_begin = begin_message;
parser.on_header_field = header_field_cb;
parser.on_header_value = header_value_cb;
parser.on_path = request_path_cb;
parser.on_uri = request_uri_cb;
parser.on_fragment = fragment_cb;
parser.on_query_string = query_string_cb;
parser.on_body = body_handler;
parser.on_headers_complete = NULL;
parser.on_message_complete = message_complete;
}
int test_request
( const struct message *message
)
{
size_t traversed = 0;
parser_init();
traversed = http_parser_execute( &parser
, message->raw
, strlen(message->raw)
);
if( http_parser_has_error(&parser) )
return FALSE;
if(num_messages != 1)
return FALSE;
return request_eq(0, message);
}
int test_error
( const char *buf
)
{
size_t traversed = 0;
parser_init();
traversed = http_parser_execute(&parser, buf, strlen(buf));
return http_parser_has_error(&parser);
}
int test_multiple3
( const struct message *r1
, const struct message *r2
, const struct message *r3
)
{
char total[ strlen(r1->raw)
+ strlen(r2->raw)
+ strlen(r3->raw)
+ 1
];
total[0] = '\0';
strcat(total, r1->raw);
strcat(total, r2->raw);
strcat(total, r3->raw);
size_t traversed = 0;
parser_init();
traversed = http_parser_execute(&parser, total, strlen(total));
if( http_parser_has_error(&parser) ) {
puts("parser error");
return FALSE;
}
if(num_messages != 3) {
printf("num_messages expected 3 got %d\n", num_messages);
return FALSE;
}
if(!request_eq(0, r1)) {
puts("request 1 error.");
return FALSE;
}
if(!request_eq(1, r2)) {
puts("request 2 error.");
return FALSE;
}
if(!request_eq(2, r3)) {
puts("request 3 error.");
return FALSE;
}
return TRUE;
}
/**
* SCAN through every possible breaking to make sure the
* parser can handle getting the content in any chunks that
* might come from the socket
*/
int test_scan2
( const struct message *r1
, const struct message *r2
, const struct message *r3
)
{
char total[80*1024] = "\0";
char buf1[80*1024] = "\0";
char buf2[80*1024] = "\0";
strcat(total, r1->raw);
strcat(total, r2->raw);
strcat(total, r3->raw);
int total_len = strlen(total);
//printf("total_len = %d\n", total_len);
int i;
for(i = 1; i < total_len - 1; i ++ ) {
parser_init();
int buf1_len = i;
strncpy(buf1, total, buf1_len);
buf1[buf1_len] = 0;
int buf2_len = total_len - i;
strncpy(buf2, total+i, buf2_len);
buf2[buf2_len] = 0;
http_parser_execute(&parser, buf1, buf1_len);
if( http_parser_has_error(&parser) ) {
return FALSE;
}
/*
if(http_parser_is_finished(&parser))
return FALSE;
*/
http_parser_execute(&parser, buf2, buf2_len);
if( http_parser_has_error(&parser))
return FALSE;
if(3 != num_messages) {
printf("scan error: got %d requests in iteration %d\n", num_messages, i);
return FALSE;
}
if(!request_eq(0, r1)) {
printf("not maching r1\n");
return FALSE;
}
if(!request_eq(1, r2)) {
printf("not maching r2\n");
return FALSE;
}
if(!request_eq(2, r3)) {
printf("not maching r3\n");
return FALSE;
}
}
return TRUE;
}
int test_scan3
( const struct message *r1
, const struct message *r2
, const struct message *r3
)
{
char total[80*1024] = "\0";
char buf1[80*1024] = "\0";
char buf2[80*1024] = "\0";
char buf3[80*1024] = "\0";
strcat(total, r1->raw);
strcat(total, r2->raw);
strcat(total, r3->raw);
int total_len = strlen(total);
//printf("total_len = %d\n", total_len);
int i,j;
for(j = 2; j < total_len - 1; j ++ ) {
for(i = 1; i < j; i ++ ) {
parser_init();
int buf1_len = i;
strncpy(buf1, total, buf1_len);
buf1[buf1_len] = 0;
int buf2_len = j - i;
strncpy(buf2, total+i, buf2_len);
buf2[buf2_len] = 0;
int buf3_len = total_len - j;
strncpy(buf3, total+j, buf3_len);
buf3[buf3_len] = 0;
/*
printf("buf1: %s - %d\n", buf1, buf1_len);
printf("buf2: %s - %d \n", buf2, buf2_len );
printf("buf3: %s - %d\n\n", buf3, buf3_len);
*/
http_parser_execute(&parser, buf1, buf1_len);
if( http_parser_has_error(&parser) ) {
return FALSE;
}
http_parser_execute(&parser, buf2, buf2_len);
if( http_parser_has_error(&parser) ) {
return FALSE;
}
http_parser_execute(&parser, buf3, buf3_len);
if( http_parser_has_error(&parser))
return FALSE;
if(3 != num_messages) {
printf("scan error: only got %d requests in iteration %d\n", num_messages, i);
return FALSE;
}
if(!request_eq(0, r1)) {
printf("not maching r1\n");
return FALSE;
}
if(!request_eq(1, r2)) {
printf("not maching r2\n");
return FALSE;
}
if(!request_eq(2, r3)) {
printf("not maching r3\n");
return FALSE;
}
}
}
return TRUE;
}
int main()
{
assert(test_error("hello world"));
assert(test_error("GET / HTP/1.1\r\n\r\n"));
assert(test_request(&curl_get));
assert(test_request(&firefox_get));
// Zed's header tests
assert(test_request(&dumbfuck));
const char *dumbfuck2 = "GET / HTTP/1.1\r\nX-SSL-Bullshit: -----BEGIN CERTIFICATE-----\r\n\tMIIFbTCCBFWgAwIBAgICH4cwDQYJKoZIhvcNAQEFBQAwcDELMAkGA1UEBhMCVUsx\r\n\tETAPBgNVBAoTCGVTY2llbmNlMRIwEAYDVQQLEwlBdXRob3JpdHkxCzAJBgNVBAMT\r\n\tAkNBMS0wKwYJKoZIhvcNAQkBFh5jYS1vcGVyYXRvckBncmlkLXN1cHBvcnQuYWMu\r\n\tdWswHhcNMDYwNzI3MTQxMzI4WhcNMDcwNzI3MTQxMzI4WjBbMQswCQYDVQQGEwJV\r\n\tSzERMA8GA1UEChMIZVNjaWVuY2UxEzARBgNVBAsTCk1hbmNoZXN0ZXIxCzAJBgNV\r\n\tBAcTmrsogriqMWLAk1DMRcwFQYDVQQDEw5taWNoYWVsIHBhcmQYJKoZIhvcNAQEB\r\n\tBQADggEPADCCAQoCggEBANPEQBgl1IaKdSS1TbhF3hEXSl72G9J+WC/1R64fAcEF\r\n\tW51rEyFYiIeZGx/BVzwXbeBoNUK41OK65sxGuflMo5gLflbwJtHBRIEKAfVVp3YR\r\n\tgW7cMA/s/XKgL1GEC7rQw8lIZT8RApukCGqOVHSi/F1SiFlPDxuDfmdiNzL31+sL\r\n\t0iwHDdNkGjy5pyBSB8Y79dsSJtCW/iaLB0/n8Sj7HgvvZJ7x0fr+RQjYOUUfrePP\r\n\tu2MSpFyf+9BbC/aXgaZuiCvSR+8Snv3xApQY+fULK/xY8h8Ua51iXoQ5jrgu2SqR\r\n\twgA7BUi3G8LFzMBl8FRCDYGUDy7M6QaHXx1ZWIPWNKsCAwEAAaOCAiQwggIgMAwG\r\n\tA1UdEwEB/wQCMAAwEQYJYIZIAYb4QgHTTPAQDAgWgMA4GA1UdDwEB/wQEAwID6DAs\r\n\tBglghkgBhvhCAQ0EHxYdVUsgZS1TY2llbmNlIFVzZXIgQ2VydGlmaWNhdGUwHQYD\r\n\tVR0OBBYEFDTt/sf9PeMaZDHkUIldrDYMNTBZMIGaBgNVHSMEgZIwgY+AFAI4qxGj\r\n\tloCLDdMVKwiljjDastqooXSkcjBwMQswCQYDVQQGEwJVSzERMA8GA1UEChMIZVNj\r\n\taWVuY2UxEjAQBgNVBAsTCUF1dGhvcml0eTELMAkGA1UEAxMCQ0ExLTArBgkqhkiG\r\n\t9w0BCQEWHmNhLW9wZXJhdG9yQGdyaWQtc3VwcG9ydC5hYy51a4IBADApBgNVHRIE\r\n\tIjAggR5jYS1vcGVyYXRvckBncmlkLXN1cHBvcnQuYWMudWswGQYDVR0gBBIwEDAO\r\n\tBgwrBgEEAdkvAQEBAQYwPQYJYIZIAYb4QgEEBDAWLmh0dHA6Ly9jYS5ncmlkLXN1\r\n\tcHBvcnQuYWMudmT4sopwqlBWsvcHViL2NybC9jYWNybC5jcmwwPQYJYIZIAYb4QgEDBDAWLmh0\r\n\tdHA6Ly9jYS5ncmlkLXN1cHBvcnQuYWMudWsvcHViL2NybC9jYWNybC5jcmwwPwYD\r\n\tVR0fBDgwNjA0oDKgMIYuaHR0cDovL2NhLmdyaWQt5hYy51ay9wdWIv\r\n\tY3JsL2NhY3JsLmNybDANBgkqhkiG9w0BAQUFAAOCAQEAS/U4iiooBENGW/Hwmmd3\r\n\tXCy6Zrt08YjKCzGNjorT98g8uGsqYjSxv/hmi0qlnlHs+k/3Iobc3LjS5AMYr5L8\r\n\tUO7OSkgFFlLHQyC9JzPfmLCAugvzEbyv4Olnsr8hbxF1MbKZoQxUZtMVu29wjfXk\r\n\thTeApBv7eaKCWpSp7MCbvgzm74izKhu3vlDk9w6qVrxePfGgpKPqfHiOoGhFnbTK\r\n\twTC6o2xq5y0qZ03JonF7OJspEd3I5zKY3E+ov7/ZhW6DqT8UFvsAdjvQbXyhV8Eu\r\n\tYhixw1aKEPzNjNowuIseVogKOLXxWI5vAi5HgXdS0/ES5gDGsABo4fqovUKlgop3\r\n\tRA==\r\n\t-----END CERTIFICATE-----\r\n\r\n";
assert(test_error(dumbfuck2));
assert(test_request(&fragment_in_uri));
/* TODO sending junk and large headers gets rejected */
/* check to make sure our predefined requests are okay */
assert(test_request(&get_no_headers_no_body));
assert(test_request(&get_one_header_no_body));
assert(test_request(&get_no_headers_no_body));
// no content-length
const char *bad_get_no_headers_no_body = "GET /bad_get_no_headers_no_body/world HTTP/1.1\r\nAccept: */*\r\nHELLO\r\n";
assert(test_error(bad_get_no_headers_no_body)); // error if there is a body without content length
assert(test_request(&get_funky_content_length_body_hello));
assert(test_request(&post_identity_body_world));
assert(test_request(&post_chunked_all_your_base));
assert(test_request(&two_chunks_mult_zero_end));
assert(test_request(&chunked_w_trailing_headers));
assert(test_request(&chunked_w_bullshit_after_length));
/*
assert(1 == messages[0].version_major);
assert(1 == messages[0].version_minor);
*/
// three requests - no bodies
assert( test_multiple3( &get_no_headers_no_body
, &get_one_header_no_body
, &get_no_headers_no_body
));
// three requests - one body
assert( test_multiple3(&get_no_headers_no_body, &get_funky_content_length_body_hello, &get_no_headers_no_body));
// three requests with bodies -- last is chunked
assert( test_multiple3(&get_funky_content_length_body_hello, &post_identity_body_world, &post_chunked_all_your_base));
// three chunked requests
assert( test_multiple3(&two_chunks_mult_zero_end, &post_chunked_all_your_base, &chunked_w_trailing_headers));
assert(test_scan2(&get_no_headers_no_body, &get_one_header_no_body, &get_no_headers_no_body));
assert(test_scan2(&get_funky_content_length_body_hello, &post_identity_body_world, &post_chunked_all_your_base));
assert(test_scan2(&two_chunks_mult_zero_end, &chunked_w_trailing_headers, &chunked_w_bullshit_after_length));
assert(test_scan3(&get_no_headers_no_body, &get_one_header_no_body, &get_no_headers_no_body));
assert(test_scan3(&get_funky_content_length_body_hello, &post_identity_body_world, &post_chunked_all_your_base));
assert(test_scan3(&two_chunks_mult_zero_end, &chunked_w_trailing_headers, &chunked_w_bullshit_after_length));
printf("okay\n");
return 0;
}
Loading…
Cancel
Save