You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
http-parser/http_parser.rl

537 lines
18 KiB

/*
Mongrel Web Server (Mongrel) is copyrighted free software by Zed A. Shaw
<zedshaw at zedshaw dot com> and contributors.
This source file is based on Mongrel's parser. Changes by Ryan Dahl
<ry@tinyclouds.org> in 2008 and 2009.
You can redistribute it and/or modify it under either the terms of the GPL2
or the conditions below:
1. You may make and give away verbatim copies of the source form of the
software without restriction, provided that you duplicate all of the
original copyright notices and associated disclaimers.
2. You may modify your copy of the software in any way, provided that
you do at least ONE of the following:
a) place your modifications in the Public Domain or otherwise make them
Freely Available, such as by posting said modifications to Usenet or an
equivalent medium, or by allowing the author to include your
modifications in the software.
b) use the modified software only within your corporation or
organization.
c) rename any non-standard executables so the names do not conflict with
standard executables, which must also be provided.
d) make other distribution arrangements with the author.
3. You may distribute the software in object code or executable
form, provided that you do at least ONE of the following:
a) distribute the executables and library files of the software,
together with instructions (in the manual page or equivalent) on where
to get the original distribution.
b) accompany the distribution with the machine-readable source of the
software.
c) give non-standard executables non-standard names, with
instructions on where to get the original software distribution.
d) make other distribution arrangements with the author.
4. You may modify and include the part of the software into any other
software (possibly commercial). But some files in the distribution
are not written by the author, so that they are not under this terms.
5. The scripts and library files supplied as input to or produced as
output from the software do not automatically fall under the
copyright of the software, but belong to whomever generated them,
and may be sold commercially, and may be aggregated with this
software.
6. THIS SOFTWARE IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR
IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
PURPOSE.
*/
#include "http_parser.h"
#include <limits.h>
#include <assert.h>
/* parser->flags */
#define EATING 0x01
#define ERROR 0x02
#define CHUNKED 0x04
#define EAT_FOREVER 0x10
static int unhex[] = {-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,-1,-1,-1,-1,-1,-1
,-1,10,11,12,13,14,15,-1,-1,-1,-1,-1,-1,-1,-1,-1
,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
,-1,10,11,12,13,14,15,-1,-1,-1,-1,-1,-1,-1,-1,-1
,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
};
#undef MIN
#define MIN(a,b) ((a) < (b) ? (a) : (b))
#undef NULL
#define NULL ((void*)(0))
#define MAX_FIELD_SIZE (80*1024)
#define REMAINING (unsigned long)(pe - p)
#define CALLBACK(FOR) \
do { \
if (parser->FOR##_mark) { \
parser->FOR##_size += p - parser->FOR##_mark; \
if (parser->FOR##_size > MAX_FIELD_SIZE) { \
parser->flags |= ERROR; \
return; \
} \
if (parser->on_##FOR) { \
callback_return_value = parser->on_##FOR(parser, \
parser->FOR##_mark, \
p - parser->FOR##_mark); \
} \
if (callback_return_value != 0) { \
parser->flags |= ERROR; \
return; \
} \
} \
} while(0)
#define RESET_PARSER(parser) \
parser->chunk_size = 0; \
parser->flags = 0; \
parser->header_field_mark = NULL; \
parser->header_value_mark = NULL; \
parser->query_string_mark = NULL; \
parser->path_mark = NULL; \
parser->uri_mark = NULL; \
parser->fragment_mark = NULL; \
parser->status_code = 0; \
parser->method = 0; \
parser->version = HTTP_VERSION_OTHER; \
parser->keep_alive = -1; \
parser->content_length = -1; \
parser->body_read = 0
#define END_REQUEST \
do { \
if (parser->on_message_complete) { \
callback_return_value = \
parser->on_message_complete(parser); \
} \
RESET_PARSER(parser); \
} while (0)
#define SKIP_BODY(nskip) \
do { \
tmp = (nskip); \
if (parser->on_body && tmp > 0) { \
callback_return_value = parser->on_body(parser, p, tmp); \
} \
if (callback_return_value == 0) { \
p += tmp; \
parser->body_read += tmp; \
parser->chunk_size -= tmp; \
if (0 == parser->chunk_size) { \
parser->flags &= ~EATING; \
if (!(parser->flags & CHUNKED)) { \
END_REQUEST; \
} \
} else { \
parser->flags |= EATING; \
} \
} \
} while (0)
%%{
machine http_parser;
action mark_header_field {
parser->header_field_mark = p;
parser->header_field_size = 0;
}
action mark_header_value {
parser->header_value_mark = p;
parser->header_value_size = 0;
}
action mark_fragment {
parser->fragment_mark = p;
parser->fragment_size = 0;
}
action mark_query_string {
parser->query_string_mark = p;
parser->query_string_size = 0;
}
action mark_request_path {
parser->path_mark = p;
parser->path_size = 0;
}
action mark_request_uri {
parser->uri_mark = p;
parser->uri_size = 0;
}
action header_field {
CALLBACK(header_field);
parser->header_field_mark = NULL;
parser->header_field_size = 0;
}
action header_value {
CALLBACK(header_value);
parser->header_value_mark = NULL;
parser->header_value_size = 0;
}
action request_uri {
CALLBACK(uri);
parser->uri_mark = NULL;
parser->uri_size = 0;
}
action fragment {
CALLBACK(fragment);
parser->fragment_mark = NULL;
parser->fragment_size = 0;
}
action query_string {
CALLBACK(query_string);
parser->query_string_mark = NULL;
parser->query_string_size = 0;
}
action request_path {
CALLBACK(path);
parser->path_mark = NULL;
parser->path_size = 0;
}
action headers_complete {
if(parser->on_headers_complete) {
callback_return_value = parser->on_headers_complete(parser);
if (callback_return_value != 0) {
parser->flags |= ERROR;
return;
}
}
}
action begin_message {
if(parser->on_message_begin) {
callback_return_value = parser->on_message_begin(parser);
if (callback_return_value != 0) {
parser->flags |= ERROR;
return;
}
}
}
action content_length {
if (parser->content_length == -1) parser->content_length = 0;
if (parser->content_length > INT_MAX) {
parser->flags |= ERROR;
return;
}
parser->content_length *= 10;
parser->content_length += *p - '0';
}
action status_code {
parser->status_code *= 10;
parser->status_code += *p - '0';
}
action use_chunked_encoding { parser->flags |= CHUNKED; }
action set_keep_alive { parser->keep_alive = 1; }
action set_not_keep_alive { parser->keep_alive = 0; }
action version_11 { parser->version = HTTP_VERSION_11; }
action version_10 { parser->version = HTTP_VERSION_10; }
action version_09 { parser->version = HTTP_VERSION_09; }
action add_to_chunk_size {
parser->chunk_size *= 16;
parser->chunk_size += unhex[(int)*p];
}
action skip_chunk_data {
SKIP_BODY(MIN(parser->chunk_size, REMAINING));
if (callback_return_value != 0) {
parser->flags |= ERROR;
return;
}
fhold;
if (parser->chunk_size > REMAINING) {
fbreak;
} else {
fgoto chunk_end;
}
}
action end_chunked_body {
END_REQUEST;
if (parser->type == HTTP_REQUEST) {
fnext Requests;
} else {
fnext Responses;
}
}
action body_logic {
if (parser->flags & CHUNKED) {
fnext ChunkedBody;
} else {
/* this is pretty stupid. i'd prefer to combine this with
* skip_chunk_data */
if (parser->content_length < 0) {
/* If we didn't get a content length; if not keep-alive
* just read body until EOF */
if (!http_parser_should_keep_alive(parser)) {
parser->flags |= EAT_FOREVER;
parser->chunk_size = REMAINING;
} else {
/* Otherwise, if keep-alive, then assume the message
* has no body. */
parser->chunk_size = parser->content_length = 0;
}
} else {
parser->chunk_size = parser->content_length;
}
p += 1;
SKIP_BODY(MIN(REMAINING, parser->chunk_size));
if (callback_return_value != 0) {
parser->flags |= ERROR;
return;
}
fhold;
if(parser->chunk_size > REMAINING) {
fbreak;
}
}
}
CRLF = "\r\n";
# character types
CTL = (cntrl | 127);
safe = ("$" | "-" | "_" | ".");
extra = ("!" | "*" | "'" | "(" | ")" | ",");
reserved = (";" | "/" | "?" | ":" | "@" | "&" | "=" | "+");
unsafe = (CTL | " " | "\"" | "#" | "%" | "<" | ">");
national = any -- (alpha | digit | reserved | extra | safe | unsafe);
unreserved = (alpha | digit | safe | extra | national);
escape = ("%" xdigit xdigit);
uchar = (unreserved | escape | "\"");
pchar = (uchar | ":" | "@" | "&" | "=" | "+");
tspecials = ("(" | ")" | "<" | ">" | "@" | "," | ";" | ":" | "\\" | "\""
| "/" | "[" | "]" | "?" | "=" | "{" | "}" | " " | "\t");
# elements
token = (ascii -- (CTL | tspecials));
quote = "\"";
# qdtext = token -- "\"";
# quoted_pair = "\" ascii;
# quoted_string = "\"" (qdtext | quoted_pair )* "\"";
# headers
Method = ( "COPY" %{ parser->method = HTTP_COPY; }
| "DELETE" %{ parser->method = HTTP_DELETE; }
| "GET" %{ parser->method = HTTP_GET; }
| "HEAD" %{ parser->method = HTTP_HEAD; }
| "LOCK" %{ parser->method = HTTP_LOCK; }
| "MKCOL" %{ parser->method = HTTP_MKCOL; }
| "MOVE" %{ parser->method = HTTP_MOVE; }
| "OPTIONS" %{ parser->method = HTTP_OPTIONS; }
| "POST" %{ parser->method = HTTP_POST; }
| "PROPFIND" %{ parser->method = HTTP_PROPFIND; }
| "PROPPATCH" %{ parser->method = HTTP_PROPPATCH; }
| "PUT" %{ parser->method = HTTP_PUT; }
| "TRACE" %{ parser->method = HTTP_TRACE; }
| "UNLOCK" %{ parser->method = HTTP_UNLOCK; }
); # Not allowing extension methods
HTTP_Version = "HTTP/" ( "1.1" %version_11
| "1.0" %version_10
| "0.9" %version_09
| (digit "." digit)
);
scheme = ( alpha | digit | "+" | "-" | "." )* ;
absolute_uri = (scheme ":" (uchar | reserved )*);
path = ( pchar+ ( "/" pchar* )* ) ;
query = ( uchar | reserved )* >mark_query_string %query_string ;
param = ( pchar | "/" )* ;
params = ( param ( ";" param )* ) ;
rel_path = ( path? (";" params)? ) ;
absolute_path = ( "/"+ rel_path ) >mark_request_path %request_path ("?" query)?;
Request_URI = ( "*" | absolute_uri | absolute_path ) >mark_request_uri %request_uri;
Fragment = ( uchar | reserved )* >mark_fragment %fragment;
field_name = ( token -- ":" )+;
Field_Name = field_name >mark_header_field %header_field;
field_value = ((any - " ") any*)?;
Field_Value = field_value >mark_header_value %header_value;
hsep = ":" " "*;
header = (field_name hsep field_value) :> CRLF;
Header = ( ("Content-Length"i hsep digit+ $content_length)
| ("Connection"i hsep
( "Keep-Alive"i %set_keep_alive
| "close"i %set_not_keep_alive
)
)
| ("Transfer-Encoding"i hsep "chunked"i %use_chunked_encoding)
| (Field_Name hsep Field_Value)
) :> CRLF;
Headers = (Header)* :> CRLF @headers_complete;
Request_Line = ( Method " " Request_URI ("#" Fragment)? " " HTTP_Version CRLF ) ;
StatusCode = (digit digit digit) $status_code;
ReasonPhrase = ascii* -- ("\r" | "\n");
StatusLine = HTTP_Version " " StatusCode (" " ReasonPhrase)? CRLF;
# chunked message
trailing_headers = header*;
#chunk_ext_val = token | quoted_string;
chunk_ext_val = token*;
chunk_ext_name = token*;
chunk_extension = ( ";" " "* chunk_ext_name ("=" chunk_ext_val)? )*;
last_chunk = "0"+ ( chunk_extension | " "+) CRLF;
chunk_size = (xdigit* [1-9a-fA-F] xdigit* ) $add_to_chunk_size;
chunk_end = CRLF;
chunk_body = any >skip_chunk_data;
chunk_begin = chunk_size ( chunk_extension | " "+ ) CRLF;
chunk = chunk_begin chunk_body chunk_end;
ChunkedBody := chunk* last_chunk trailing_headers CRLF @end_chunked_body;
Request = (Request_Line Headers) >begin_message @body_logic;
Response = (StatusLine Headers) >begin_message @body_logic;
Requests := Request*;
Responses := Response*;
main := any >{
fhold;
if (parser->type == HTTP_REQUEST) {
fgoto Requests;
} else {
fgoto Responses;
}
};
}%%
%% write data;
void
http_parser_init (http_parser *parser, enum http_parser_type type)
{
int cs = 0;
%% write init;
parser->cs = cs;
parser->type = type;
parser->on_message_begin = NULL;
parser->on_path = NULL;
parser->on_query_string = NULL;
parser->on_uri = NULL;
parser->on_fragment = NULL;
parser->on_header_field = NULL;
parser->on_header_value = NULL;
parser->on_headers_complete = NULL;
parser->on_body = NULL;
parser->on_message_complete = NULL;
RESET_PARSER(parser);
}
/** exec **/
void
http_parser_execute (http_parser *parser, const char *buffer, size_t len)
{
size_t tmp; // REMOVE ME this is extremely hacky
int callback_return_value = 0;
const char *p, *pe, *eof;
int cs = parser->cs;
p = buffer;
pe = buffer+len;
eof = len ? NULL : pe;
if (parser->flags & EAT_FOREVER) {
if (len == 0) {
if (parser->on_message_complete) {
callback_return_value = parser->on_message_complete(parser);
if (callback_return_value != 0) parser->flags |= ERROR;
}
} else {
if (parser->on_body) {
callback_return_value = parser->on_body(parser, p, len);
if (callback_return_value != 0) parser->flags |= ERROR;
}
}
return;
}
if (0 < parser->chunk_size && (parser->flags & EATING)) {
/* eat body */
SKIP_BODY(MIN(len, parser->chunk_size));
if (callback_return_value != 0) {
parser->flags |= ERROR;
return;
}
}
if (parser->header_field_mark) parser->header_field_mark = buffer;
if (parser->header_value_mark) parser->header_value_mark = buffer;
if (parser->fragment_mark) parser->fragment_mark = buffer;
if (parser->query_string_mark) parser->query_string_mark = buffer;
if (parser->path_mark) parser->path_mark = buffer;
if (parser->uri_mark) parser->uri_mark = buffer;
%% write exec;
parser->cs = cs;
CALLBACK(header_field);
CALLBACK(header_value);
CALLBACK(fragment);
CALLBACK(query_string);
CALLBACK(path);
CALLBACK(uri);
assert(p <= pe && "buffer overflow after parsing execute");
}
int
http_parser_has_error (http_parser *parser)
{
if (parser->flags & ERROR) return 1;
return parser->cs == http_parser_error;
}