diff --git a/LICENSE b/LICENSE index 7fb7fcd..561e5c5 100644 --- a/LICENSE +++ b/LICENSE @@ -1,4 +1,5 @@ Copyright 2009, Ryan Lienhart Dahl. All rights reserved. + Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the @@ -15,16 +16,13 @@ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS -IN THE SOFTWARE. - - - +IN THE SOFTWARE. http_parser is based on Zed Shaw's Mongrel. Mongrel's license is as follows. --- CUT ---- CUT ---- CUT ---- CUT ---- CUT ---- CUT ---- CUT ---- CUT -- +---- 8< ---- 8< ---- 8< ---- 8< ---- 8< ---- 8< ---- 8< ---- 8< ---- Mongrel Web Server (Mongrel) is copyrighted free software by Zed A. Shaw - and contributors. You can redistribute it + and contributors. You can redistribute it and/or modify it under either the terms of the GPL2 or the conditions below: 1. You may make and give away verbatim copies of the source form of the @@ -66,9 +64,9 @@ and/or modify it under either the terms of the GPL2 or the conditions below: software (possibly commercial). But some files in the distribution are not written by the author, so that they are not under this terms. -5. The scripts and library files supplied as input to or produced as +5. The scripts and library files supplied as input to or produced as output from the software do not automatically fall under the - copyright of the software, but belong to whomever generated them, + copyright of the software, but belong to whomever generated them, and may be sold commercially, and may be aggregated with this software. @@ -76,4 +74,4 @@ and/or modify it under either the terms of the GPL2 or the conditions below: IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. --- CUT ---- CUT ---- CUT ---- CUT ---- CUT ---- CUT ---- CUT ---- CUT -- +---- 8< ---- 8< ---- 8< ---- 8< ---- 8< ---- 8< ---- 8< ---- 8< ---- diff --git a/Makefile b/Makefile index afe7229..899de24 100644 --- a/Makefile +++ b/Makefile @@ -1,8 +1,8 @@ #OPT=-O0 -g -Wall -Wextra -Werror OPT=-O2 -test: http_parser.o test.c - gcc $(OPT) http_parser.o test.c -o $@ +test: http_parser.o test.c + gcc $(OPT) http_parser.o test.c -o $@ http_parser.o: http_parser.c http_parser.h Makefile gcc $(OPT) -c http_parser.c diff --git a/README.md b/README.md index d60473e..d59a12d 100644 --- a/README.md +++ b/README.md @@ -5,11 +5,11 @@ This is a parser for HTTP messages written in C. It parses both requests and responses. The parser is designed to be used in performance HTTP applications. It does not make any allocations, it does not buffer data, and it can be interrupted at anytime. It only requires about 128 bytes of data -per message stream (in a web server that is per connection). +per message stream (in a web server that is per connection). Features: - * No dependencies + * No dependencies * Parses both requests and responses. * Handles keep-alive streams. * Decodes chunked encoding. @@ -57,7 +57,7 @@ HTTP version are stored in the parser structure. This data is only temporarlly stored in `http_parser` and gets reset on each new message. If this information is needed later, copy it out of the structure during the `headers_complete` callback. - + The parser decodes the transfer-encoding for both requests and responses transparently. That is, a chunked encoding is decoded before being sent to the on_body callback. diff --git a/http_parser.h b/http_parser.h index b0250fe..ead1e5e 100644 --- a/http_parser.h +++ b/http_parser.h @@ -2,7 +2,7 @@ * Based on Zed Shaw's Mongrel, copyright (c) Zed A. Shaw * * All rights reserved. - * + * * Permission is hereby granted, free of charge, to any person obtaining * a copy of this software and associated documentation files (the * "Software"), to deal in the Software without restriction, including @@ -10,34 +10,34 @@ * distribute, sublicense, and/or sell copies of the Software, and to * permit persons to whom the Software is furnished to do so, subject to * the following conditions: - * + * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. - * + * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #ifndef http_parser_h #define http_parser_h #ifdef __cplusplus extern "C" { -#endif +#endif #ifdef _MSC_VER - #include +# include #endif -#include +#include typedef struct http_parser http_parser; /* Callbacks should return non-zero to indicate an error. The parse will - * then halt execution. - * + * then halt execution. + * * http_data_cb does not return data chunks. It will be call arbitrarally * many times for each string. E.G. you might get 10 callbacks for "on_path" * each providing just a few characters more data. @@ -94,18 +94,18 @@ struct http_parser { size_t body_read; - const char *header_field_mark; - size_t header_field_size; - const char *header_value_mark; - size_t header_value_size; - const char *query_string_mark; - size_t query_string_size; - const char *path_mark; - size_t path_size; - const char *uri_mark; - size_t uri_size; - const char *fragment_mark; - size_t fragment_size; + const char *header_field_mark; + size_t header_field_size; + const char *header_value_mark; + size_t header_value_size; + const char *query_string_mark; + size_t query_string_size; + const char *path_mark; + size_t path_size; + const char *uri_mark; + size_t uri_size; + const char *fragment_mark; + size_t fragment_size; /** READ-ONLY **/ unsigned short status_code; /* responses only */ @@ -137,7 +137,7 @@ struct http_parser { }; /* Initializes an http_parser structure. The second argument specifies if - * it will be parsing requests or responses. + * it will be parsing requests or responses. */ void http_parser_init (http_parser *parser, enum http_parser_type); @@ -149,5 +149,5 @@ int http_parser_should_keep_alive (http_parser *parser); #ifdef __cplusplus } -#endif +#endif #endif diff --git a/http_parser.rl b/http_parser.rl index 5d27074..7368dea 100644 --- a/http_parser.rl +++ b/http_parser.rl @@ -2,7 +2,7 @@ * Based on Zed Shaw's Mongrel, copyright (c) Zed A. Shaw * * All rights reserved. - * + * * Permission is hereby granted, free of charge, to any person obtaining * a copy of this software and associated documentation files (the * "Software"), to deal in the Software without restriction, including @@ -10,17 +10,17 @@ * distribute, sublicense, and/or sell copies of the Software, and to * permit persons to whom the Software is furnished to do so, subject to * the following conditions: - * + * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. - * + * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include "http_parser.h" #include @@ -35,12 +35,14 @@ static int unhex[] = {-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1 ,-1,10,11,12,13,14,15,-1,-1,-1,-1,-1,-1,-1,-1,-1 ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1 }; -#define TRUE 1 -#define FALSE 0 -#define MIN(a,b) (a < b ? a : b) -#define NULL (void*)(0) -#define MAX_FIELD_SIZE 80*1024 +#undef MIN +#define MIN(a,b) ((a) < (b) ? (a) : (b)) + +#undef NULL +#define NULL ((void*)(0)) + +#define MAX_FIELD_SIZE (80*1024) #define REMAINING (unsigned long)(pe - p) #define CALLBACK(FOR) \ @@ -48,7 +50,7 @@ do { \ if (parser->FOR##_mark) { \ parser->FOR##_size += p - parser->FOR##_mark; \ if (parser->FOR##_size > MAX_FIELD_SIZE) { \ - parser->error = TRUE; \ + parser->error = 1; \ return 0; \ } \ if (parser->on_##FOR) { \ @@ -60,22 +62,22 @@ do { \ } while(0) #define RESET_PARSER(parser) \ - parser->chunk_size = 0; \ - parser->eating = 0; \ - parser->header_field_mark = NULL; \ - parser->header_value_mark = NULL; \ - parser->query_string_mark = NULL; \ - parser->path_mark = NULL; \ - parser->uri_mark = NULL; \ - parser->fragment_mark = NULL; \ - parser->status_code = 0; \ - parser->method = 0; \ - parser->transfer_encoding = HTTP_IDENTITY; \ - parser->version_major = 0; \ - parser->version_minor = 0; \ - parser->keep_alive = -1; \ - parser->content_length = 0; \ - parser->body_read = 0; + parser->chunk_size = 0; \ + parser->eating = 0; \ + parser->header_field_mark = NULL; \ + parser->header_value_mark = NULL; \ + parser->query_string_mark = NULL; \ + parser->path_mark = NULL; \ + parser->uri_mark = NULL; \ + parser->fragment_mark = NULL; \ + parser->status_code = 0; \ + parser->method = 0; \ + parser->transfer_encoding = HTTP_IDENTITY; \ + parser->version_major = 0; \ + parser->version_minor = 0; \ + parser->keep_alive = -1; \ + parser->content_length = 0; \ + parser->body_read = 0 #define END_REQUEST \ do { \ @@ -97,12 +99,12 @@ do { \ parser->body_read += tmp; \ parser->chunk_size -= tmp; \ if (0 == parser->chunk_size) { \ - parser->eating = FALSE; \ + parser->eating = 0; \ if (parser->transfer_encoding == HTTP_IDENTITY) { \ END_REQUEST; \ } \ } else { \ - parser->eating = TRUE; \ + parser->eating = 1; \ } \ } \ } while (0) @@ -143,7 +145,7 @@ do { \ action header_field { CALLBACK(header_field); if (callback_return_value != 0) { - parser->error = TRUE; + parser->error = 1; return 0; } parser->header_field_mark = NULL; @@ -153,37 +155,37 @@ do { \ action header_value { CALLBACK(header_value); if (callback_return_value != 0) { - parser->error = TRUE; + parser->error = 1; return 0; } parser->header_value_mark = NULL; parser->header_value_size = 0; } - action request_uri { + action request_uri { CALLBACK(uri); if (callback_return_value != 0) { - parser->error = TRUE; + parser->error = 1; return 0; } parser->uri_mark = NULL; parser->uri_size = 0; } - action fragment { + action fragment { CALLBACK(fragment); if (callback_return_value != 0) { - parser->error = TRUE; + parser->error = 1; return 0; } parser->fragment_mark = NULL; parser->fragment_size = 0; } - action query_string { + action query_string { CALLBACK(query_string); if (callback_return_value != 0) { - parser->error = TRUE; + parser->error = 1; return 0; } parser->query_string_mark = NULL; @@ -193,7 +195,7 @@ do { \ action request_path { CALLBACK(path); if (callback_return_value != 0) { - parser->error = TRUE; + parser->error = 1; return 0; } parser->path_mark = NULL; @@ -204,7 +206,7 @@ do { \ if(parser->on_headers_complete) { callback_return_value = parser->on_headers_complete(parser); if (callback_return_value != 0) { - parser->error = TRUE; + parser->error = 1; return 0; } } @@ -214,7 +216,7 @@ do { \ if(parser->on_message_begin) { callback_return_value = parser->on_message_begin(parser); if (callback_return_value != 0) { - parser->error = TRUE; + parser->error = 1; return 0; } } @@ -222,7 +224,7 @@ do { \ action content_length { if (parser->content_length > INT_MAX) { - parser->error = TRUE; + parser->error = 1; return 0; } parser->content_length *= 10; @@ -237,8 +239,8 @@ do { \ action use_identity_encoding { parser->transfer_encoding = HTTP_IDENTITY; } action use_chunked_encoding { parser->transfer_encoding = HTTP_CHUNKED; } - action set_keep_alive { parser->keep_alive = TRUE; } - action set_not_keep_alive { parser->keep_alive = FALSE; } + action set_keep_alive { parser->keep_alive = 1; } + action set_not_keep_alive { parser->keep_alive = 0; } action version_major { parser->version_major *= 10; @@ -258,15 +260,15 @@ do { \ action skip_chunk_data { SKIP_BODY(MIN(parser->chunk_size, REMAINING)); if (callback_return_value != 0) { - parser->error = TRUE; + parser->error = 1; return 0; } - fhold; + fhold; if (parser->chunk_size > REMAINING) { fbreak; } else { - fgoto chunk_end; + fgoto chunk_end; } } @@ -285,12 +287,12 @@ do { \ } else { /* this is pretty stupid. i'd prefer to combine this with skip_chunk_data */ parser->chunk_size = parser->content_length; - p += 1; + p += 1; SKIP_BODY(MIN(REMAINING, parser->content_length)); if (callback_return_value != 0) { - parser->error = TRUE; + parser->error = 1; return 0; } @@ -314,13 +316,13 @@ do { \ escape = ("%" xdigit xdigit); uchar = (unreserved | escape); pchar = (uchar | ":" | "@" | "&" | "=" | "+"); - tspecials = ("(" | ")" | "<" | ">" | "@" | "," | ";" | ":" | "\\" | "\"" + tspecials = ("(" | ")" | "<" | ">" | "@" | "," | ";" | ":" | "\\" | "\"" | "/" | "[" | "]" | "?" | "=" | "{" | "}" | " " | "\t"); # elements token = (ascii -- (CTL | tspecials)); quote = "\""; -# qdtext = token -- "\""; +# qdtext = token -- "\""; # quoted_pair = "\" ascii; # quoted_string = "\"" (qdtext | quoted_pair )* "\""; @@ -364,7 +366,7 @@ do { \ hsep = ":" " "*; header = (field_name hsep field_value) :> CRLF; Header = ( ("Content-Length"i hsep digit+ $content_length) - | ("Connection"i hsep + | ("Connection"i hsep ( "Keep-Alive"i %set_keep_alive | "close"i %set_not_keep_alive ) @@ -415,7 +417,7 @@ do { \ %% write data; void -http_parser_init (http_parser *parser, enum http_parser_type type) +http_parser_init (http_parser *parser, enum http_parser_type type) { int cs = 0; %% write init; @@ -453,7 +455,7 @@ http_parser_execute (http_parser *parser, const char *buffer, size_t len) /* eat body */ SKIP_BODY(MIN(len, parser->chunk_size)); if (callback_return_value != 0) { - parser->error = TRUE; + parser->error = 1; return 0; } } @@ -481,9 +483,9 @@ http_parser_execute (http_parser *parser, const char *buffer, size_t len) } int -http_parser_has_error (http_parser *parser) +http_parser_has_error (http_parser *parser) { - if (parser->error) return TRUE; + if (parser->error) return 1; return parser->cs == http_parser_error; } @@ -494,9 +496,9 @@ http_parser_should_keep_alive (http_parser *parser) if (parser->version_major == 1) return (parser->version_minor != 0); else if (parser->version_major == 0) - return FALSE; + return 0; else - return TRUE; + return 1; else return parser->keep_alive; } diff --git a/test.c b/test.c index 0adf7e5..7c2fb83 100644 --- a/test.c +++ b/test.c @@ -38,7 +38,7 @@ struct message { static struct message messages[5]; static int num_messages; -/* * R E Q U E S T S * */ +/* * R E Q U E S T S * */ const struct message requests[] = #define CURL_GET 0 { {.name= "curl get" @@ -55,7 +55,7 @@ const struct message requests[] = ,.request_path= "/test" ,.request_uri= "/test" ,.num_headers= 3 - ,.headers= + ,.headers= { { "User-Agent", "curl/7.18.0 (i486-pc-linux-gnu) libcurl/7.18.0 OpenSSL/0.9.8g zlib/1.2.3.3 libidn/1.1" } , { "Host", "0.0.0.0=5000" } , { "Accept", "*/*" } @@ -83,7 +83,7 @@ const struct message requests[] = ,.request_path= "/favicon.ico" ,.request_uri= "/favicon.ico" ,.num_headers= 8 - ,.headers= + ,.headers= { { "Host", "0.0.0.0=5000" } , { "User-Agent", "Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9) Gecko/2008061015 Firefox/3.0" } , { "Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8" } @@ -109,7 +109,7 @@ const struct message requests[] = ,.request_path= "/dumbfuck" ,.request_uri= "/dumbfuck" ,.num_headers= 1 - ,.headers= + ,.headers= { { "aaaaaaaaaaaaa", "++++++++++" } } ,.body= "" @@ -126,7 +126,7 @@ const struct message requests[] = ,.fragment= "posts-17408" ,.request_path= "/forums/1/topics/2375" /* XXX request uri does not include fragment? */ - ,.request_uri= "/forums/1/topics/2375?page=1" + ,.request_uri= "/forums/1/topics/2375?page=1" ,.num_headers= 0 ,.body= "" } @@ -159,7 +159,7 @@ const struct message requests[] = ,.request_path= "/get_one_header_no_body" ,.request_uri= "/get_one_header_no_body" ,.num_headers= 1 - ,.headers= + ,.headers= { { "Accept" , "*/*" } } ,.body= "" @@ -179,7 +179,7 @@ const struct message requests[] = ,.request_path= "/get_funky_content_length_body_hello" ,.request_uri= "/get_funky_content_length_body_hello" ,.num_headers= 1 - ,.headers= + ,.headers= { { "conTENT-Length" , "5" } } ,.body= "HELLO" @@ -201,10 +201,10 @@ const struct message requests[] = ,.request_path= "/post_identity_body_world" ,.request_uri= "/post_identity_body_world?q=search" ,.num_headers= 3 - ,.headers= + ,.headers= { { "Accept", "*/*" } , { "Transfer-Encoding", "identity" } - , { "Content-Length", "5" } + , { "Content-Length", "5" } } ,.body= "World" } @@ -225,7 +225,7 @@ const struct message requests[] = ,.request_path= "/post_chunked_all_your_base" ,.request_uri= "/post_chunked_all_your_base" ,.num_headers= 1 - ,.headers= + ,.headers= { { "Transfer-Encoding" , "chunked" } } ,.body= "all your base are belong to us" @@ -248,13 +248,13 @@ const struct message requests[] = ,.request_path= "/two_chunks_mult_zero_end" ,.request_uri= "/two_chunks_mult_zero_end" ,.num_headers= 1 - ,.headers= + ,.headers= { { "Transfer-Encoding", "chunked" } } ,.body= "hello world" } -#define CHUNKED_W_TRAILING_HEADERS 10 +#define CHUNKED_W_TRAILING_HEADERS 10 , {.name= "chunked with trailing headers. blech." ,.type= HTTP_REQUEST ,.raw= "POST /chunked_w_trailing_headers HTTP/1.1\r\n" @@ -273,13 +273,13 @@ const struct message requests[] = ,.request_path= "/chunked_w_trailing_headers" ,.request_uri= "/chunked_w_trailing_headers" ,.num_headers= 1 - ,.headers= + ,.headers= { { "Transfer-Encoding", "chunked" } } ,.body= "hello world" } -#define CHUNKED_W_BULLSHIT_AFTER_LENGTH 11 +#define CHUNKED_W_BULLSHIT_AFTER_LENGTH 11 , {.name= "with bullshit after the length" ,.type= HTTP_REQUEST ,.raw= "POST /chunked_w_bullshit_after_length HTTP/1.1\r\n" @@ -296,7 +296,7 @@ const struct message requests[] = ,.request_path= "/chunked_w_bullshit_after_length" ,.request_uri= "/chunked_w_bullshit_after_length" ,.num_headers= 1 - ,.headers= + ,.headers= { { "Transfer-Encoding", "chunked" } } ,.body= "hello world" @@ -305,8 +305,8 @@ const struct message requests[] = , {.name= NULL } /* sentinel */ }; -/* * R E S P O N S E S * */ -const struct message responses[] = +/* * R E S P O N S E S * */ +const struct message responses[] = { {.name= "google 301" ,.type= HTTP_RESPONSE ,.raw= "HTTP/1.1 301 Moved Permanently\r\n" @@ -327,7 +327,7 @@ const struct message responses[] = ,.should_keep_alive= TRUE ,.status_code= 301 ,.num_headers= 7 - ,.headers= + ,.headers= { { "Location", "http://www.google.com/" } , { "Content-Type", "text/html; charset=UTF-8" } , { "Date", "Sun, 26 Apr 2009 11:11:49 GMT" } @@ -596,16 +596,16 @@ test_error (const char *buf) void test_multiple3 (const struct message *r1, const struct message *r2, const struct message *r3) { - char total[ strlen(r1->raw) - + strlen(r2->raw) - + strlen(r3->raw) + char total[ strlen(r1->raw) + + strlen(r2->raw) + + strlen(r3->raw) + 1 ]; total[0] = '\0'; - strcat(total, r1->raw); - strcat(total, r2->raw); - strcat(total, r3->raw); + strcat(total, r1->raw); + strcat(total, r2->raw); + strcat(total, r3->raw); size_t traversed = 0; parser_init(HTTP_REQUEST); @@ -619,7 +619,7 @@ test_multiple3 (const struct message *r1, const struct message *r2, const struct message_eq(2, r3); } -/* SCAN through every possible breaking to make sure the +/* SCAN through every possible breaking to make sure the * parser can handle getting the content in any chunks that * might come from the socket */ @@ -631,13 +631,13 @@ test_scan (const struct message *r1, const struct message *r2, const struct mess char buf2[80*1024] = "\0"; char buf3[80*1024] = "\0"; - strcat(total, r1->raw); - strcat(total, r2->raw); - strcat(total, r3->raw); + strcat(total, r1->raw); + strcat(total, r2->raw); + strcat(total, r3->raw); int total_len = strlen(total); - int total_ops = (total_len - 1) * (total_len - 2) / 2; + int total_ops = (total_len - 1) * (total_len - 2) / 2; int ops = 0 ; int i,j; @@ -699,14 +699,14 @@ main (void) printf("sizeof(http_parser) = %d\n", sizeof(http_parser)); - int request_count; + int request_count; for (request_count = 0; requests[request_count].name; request_count++); - int response_count; + int response_count; for (response_count = 0; responses[response_count].name; response_count++); - //// RESPONSES + //// RESPONSES for (i = 0; i < response_count; i++) { test_message(&responses[i]); @@ -767,7 +767,7 @@ main (void) "Accept: */*\r\n" "\r\n" "HELLO"; - test_error(bad_get_no_headers_no_body); + test_error(bad_get_no_headers_no_body); /* TODO sending junk and large headers gets rejected */