Reimplement support for extension methods

This sacrifices
- a little space (10 bytes),
- a few extra calculations, and
- introduces a dependency on strncmp()
to dramatically simplify the code of parsing methods and support almost
arbitrary extension methods.

In the future I will do as NGINX does and not use strncmp but bit level
blob comparisons.
event_stream
Ryan Dahl 16 years ago
parent 12808fe1e6
commit 9c059ec60d

@ -4,7 +4,7 @@ HTTP Parser
This is a parser for HTTP messages written in C. It parses both requests
and responses. The parser is designed to be used in performance HTTP
applications. It does not make any allocations, it does not buffer data, and
it can be interrupted at anytime. It only requires about 128 bytes of data
it can be interrupted at anytime. It only requires about 136 bytes of data
per message stream (in a web server that is per connection).
Features:

@ -24,6 +24,7 @@
#include <http_parser.h>
#include <stdint.h>
#include <assert.h>
#include <string.h> /* strncmp */
#ifndef NULL
# define NULL ((void*)0)
@ -166,72 +167,7 @@ enum state
, s_start_req
/* COPY */
, s_req_method_C
, s_req_method_CO
, s_req_method_COP
/* DELETE */
, s_req_method_D
, s_req_method_DE
, s_req_method_DEL
, s_req_method_DELE
, s_req_method_DELET
/* GET */
, s_req_method_G
, s_req_method_GE
/* HEAD */
, s_req_method_H
, s_req_method_HE
, s_req_method_HEA
/* LOCK */
, s_req_method_L
, s_req_method_LO
, s_req_method_LOC
/* MKCOL */
, s_req_method_M
, s_req_method_MK
, s_req_method_MKC
, s_req_method_MKCO
/* MOVE */
, s_req_method_MO
, s_req_method_MOV
/* OPTIONS */
, s_req_method_O
, s_req_method_OP
, s_req_method_OPT
, s_req_method_OPTI
, s_req_method_OPTIO
, s_req_method_OPTION
/* PUT */
, s_req_method_P
, s_req_method_PU
/* POST */
, s_req_method_PO
, s_req_method_POS
/* PROPFIND */
, s_req_method_PR
, s_req_method_PRO
, s_req_method_PROP
, s_req_method_PROPF
, s_req_method_PROPFI
, s_req_method_PROPFIN
/* PROPPATCH */
, s_req_method_PROPP
, s_req_method_PROPPA
, s_req_method_PROPPAT
, s_req_method_PROPPATC
/* TRACE */
, s_req_method_T
, s_req_method_TR
, s_req_method_TRA
, s_req_method_TRAC
/* UNLOCK */
, s_req_method_U
, s_req_method_UN
, s_req_method_UNL
, s_req_method_UNLO
, s_req_method_UNLOC
, s_req_method
, s_req_spaces_before_url
, s_req_schema
, s_req_schema_slash
@ -327,7 +263,7 @@ size_t parse (http_parser *parser, const char *data, size_t len, int start_state
enum state state = parser->state;
enum header_states header_state = parser->header_state;
size_t header_index = parser->header_index;
size_t index = parser->index;
if (len == 0) {
if (state == s_body_identity_eof) {
@ -507,406 +443,128 @@ size_t parse (http_parser *parser, const char *data, size_t len, int start_state
CALLBACK2(message_begin);
switch (ch) {
/* COPY */
case 'C':
state = s_req_method_C;
break;
/* DELETE */
case 'D':
state = s_req_method_D;
break;
/* GET */
case 'G':
state = s_req_method_G;
break;
/* HEAD */
case 'H':
state = s_req_method_H;
break;
/* LOCK */
case 'L':
state = s_req_method_L;
break;
if (ch < 'A' || 'Z' < ch) goto error;
/* MKCOL, MOVE */
case 'M':
state = s_req_method_M;
break;
/* OPTIONS */
case 'O':
state = s_req_method_O;
break;
/* POST, PUT, PROPFIND, PROPPATCH */
case 'P':
state = s_req_method_P;
break;
/* TRACE */
case 'T':
state = s_req_method_T;
break;
/* UNLOCK */
case 'U':
state = s_req_method_U;
break;
case CR:
case LF:
break;
default:
goto error;
}
parser->method = 0;
index = 0;
parser->buffer[0] = ch;
state = s_req_method;
break;
}
/* COPY */
case s_req_method_C:
STRICT_CHECK(ch != 'O');
state = s_req_method_CO;
break;
case s_req_method_CO:
STRICT_CHECK(ch != 'P');
state = s_req_method_COP;
break;
case s_req_method_COP:
STRICT_CHECK(ch != 'Y');
parser->method = HTTP_COPY;
state = s_req_spaces_before_url;
break;
/* DELETE */
case s_req_method_D:
STRICT_CHECK(ch != 'E');
state = s_req_method_DE;
break;
case s_req_method_DE:
STRICT_CHECK(ch != 'L');
state = s_req_method_DEL;
break;
case s_req_method_DEL:
STRICT_CHECK(ch != 'E');
state = s_req_method_DELE;
break;
case s_req_method_DELE:
STRICT_CHECK(ch != 'T');
state = s_req_method_DELET;
break;
case s_req_method_DELET:
STRICT_CHECK(ch != 'E');
parser->method = HTTP_DELETE;
state = s_req_spaces_before_url;
break;
/* GET */
case s_req_method_G:
STRICT_CHECK(ch != 'E');
state = s_req_method_GE;
break;
case s_req_method_GE:
STRICT_CHECK(ch != 'T');
parser->method = HTTP_GET;
state = s_req_spaces_before_url;
break;
/* HEAD */
case s_req_method_H:
STRICT_CHECK(ch != 'E');
state = s_req_method_HE;
break;
case s_req_method_HE:
STRICT_CHECK(ch != 'A');
state = s_req_method_HEA;
break;
case s_req_method_HEA:
STRICT_CHECK(ch != 'D');
parser->method = HTTP_HEAD;
state = s_req_spaces_before_url;
break;
/* LOCK */
case s_req_method_L:
STRICT_CHECK(ch != 'O');
state = s_req_method_LO;
break;
case s_req_method_LO:
STRICT_CHECK(ch != 'C');
state = s_req_method_LOC;
break;
case s_req_method_LOC:
STRICT_CHECK(ch != 'K');
parser->method = HTTP_LOCK;
state = s_req_spaces_before_url;
break;
/* MKCOL, MOVE */
case s_req_method:
if (ch == ' ') {
assert(index+1 < HTTP_PARSER_MAX_METHOD_LEN);
parser->buffer[index+1] = '\0';
case s_req_method_M:
switch (ch) {
case 'K':
state = s_req_method_MK;
break;
/* TODO Instead of using strncmp() use NGINX's ngx_str3Ocmp() */
case 'O':
state = s_req_method_MO;
break;
switch (index+1) {
case 3:
if (strncmp(parser->buffer, "GET", 3) == 0) {
parser->method = HTTP_GET;
break;
}
default:
goto error;
}
break;
if (strncmp(parser->buffer, "PUT", 3) == 0) {
parser->method = HTTP_PUT;
break;
}
/* MKCOL */
break;
case s_req_method_MK:
STRICT_CHECK(ch != 'C');
state = s_req_method_MKC;
break;
case 4:
if (strncmp(parser->buffer, "POST", 4) == 0) {
parser->method = HTTP_POST;
break;
}
case s_req_method_MKC:
STRICT_CHECK(ch != 'O');
state = s_req_method_MKCO;
break;
if (strncmp(parser->buffer, "HEAD", 4) == 0) {
parser->method = HTTP_HEAD;
break;
}
case s_req_method_MKCO:
STRICT_CHECK(ch != 'L');
parser->method = HTTP_MKCOL;
state = s_req_spaces_before_url;
break;
if (strncmp(parser->buffer, "COPY", 4) == 0) {
parser->method = HTTP_COPY;
break;
}
/* MOVE */
if (strncmp(parser->buffer, "MOVE", 4) == 0) {
parser->method = HTTP_MOVE;
break;
}
case s_req_method_MO:
STRICT_CHECK(ch != 'V');
state = s_req_method_MOV;
break;
break;
case s_req_method_MOV:
STRICT_CHECK(ch != 'E');
parser->method = HTTP_MOVE;
state = s_req_spaces_before_url;
break;
case 5:
if (strncmp(parser->buffer, "MKCOL", 5) == 0) {
parser->method = HTTP_MKCOL;
break;
}
/* OPTIONS */
if (strncmp(parser->buffer, "TRACE", 5) == 0) {
parser->method = HTTP_TRACE;
break;
}
case s_req_method_O:
STRICT_CHECK(ch != 'P');
state = s_req_method_OP;
break;
break;
case s_req_method_OP:
STRICT_CHECK(ch != 'T');
state = s_req_method_OPT;
break;
case 6:
if (strncmp(parser->buffer, "DELETE", 6) == 0) {
parser->method = HTTP_DELETE;
break;
}
case s_req_method_OPT:
STRICT_CHECK(ch != 'I');
state = s_req_method_OPTI;
break;
if (strncmp(parser->buffer, "UNLOCK", 6) == 0) {
parser->method = HTTP_UNLOCK;
break;
}
case s_req_method_OPTI:
STRICT_CHECK(ch != 'O');
state = s_req_method_OPTIO;
break;
break;
case s_req_method_OPTIO:
STRICT_CHECK(ch != 'N');
state = s_req_method_OPTION;
break;
case 7:
if (strncmp(parser->buffer, "OPTIONS", 7) == 0) {
parser->method = HTTP_OPTIONS;
break;
}
case s_req_method_OPTION:
STRICT_CHECK(ch != 'S');
parser->method = HTTP_OPTIONS;
state = s_req_spaces_before_url;
break;
if (strncmp(parser->buffer, "CONNECT", 7) == 0) {
parser->method = HTTP_CONNECT;
break;
}
/* POST, PUT, PROPFIND, PROPPATCH */
break;
case s_req_method_P:
switch (ch) {
case 'O':
state = s_req_method_PO;
break;
case 8:
if (strncmp(parser->buffer, "PROPFIND", 8) == 0) {
parser->method = HTTP_OPTIONS;
break;
}
case 'R':
state = s_req_method_PR;
break;
break;
case 'U':
state = s_req_method_PU;
break;
case 9:
if (strncmp(parser->buffer, "PROPPATCH", 9) == 0) {
parser->method = HTTP_OPTIONS;
break;
}
default:
goto error;
break;
}
state = s_req_spaces_before_url;
break;
}
break;
/* PUT */
case s_req_method_PU:
STRICT_CHECK(ch != 'T');
parser->method = HTTP_PUT;
state = s_req_spaces_before_url;
break;
/* POST */
case s_req_method_PO:
STRICT_CHECK(ch != 'S');
state = s_req_method_POS;
break;
case s_req_method_POS:
STRICT_CHECK(ch != 'T');
parser->method = HTTP_POST;
state = s_req_spaces_before_url;
break;
/* PROPFIND, PROPPATCH */
case s_req_method_PR:
STRICT_CHECK(ch != 'O');
state = s_req_method_PRO;
break;
case s_req_method_PRO:
STRICT_CHECK(ch != 'P');
state = s_req_method_PROP;
break;
if (ch < 'A' || 'Z' < ch) goto error;
case s_req_method_PROP:
switch (ch) {
case 'F':
state = s_req_method_PROPF;
break;
case 'P':
state = s_req_method_PROPP;
break;
default:
goto error;
if (++index >= HTTP_PARSER_MAX_METHOD_LEN - 1) {
goto error;
}
break;
/* PROPFIND */
case s_req_method_PROPF:
STRICT_CHECK(ch != 'I');
state = s_req_method_PROPFI;
break;
case s_req_method_PROPFI:
STRICT_CHECK(ch != 'N');
state = s_req_method_PROPFIN;
break;
case s_req_method_PROPFIN:
STRICT_CHECK(ch != 'D');
parser->method = HTTP_PROPFIND;
state = s_req_spaces_before_url;
break;
/* PROPPATCH */
case s_req_method_PROPP:
STRICT_CHECK(ch != 'A');
state = s_req_method_PROPPA;
break;
case s_req_method_PROPPA:
STRICT_CHECK(ch != 'T');
state = s_req_method_PROPPAT;
break;
case s_req_method_PROPPAT:
STRICT_CHECK(ch != 'C');
state = s_req_method_PROPPATC;
break;
case s_req_method_PROPPATC:
STRICT_CHECK(ch != 'H');
parser->method = HTTP_PROPPATCH;
state = s_req_spaces_before_url;
break;
/* TRACE */
case s_req_method_T:
STRICT_CHECK(ch != 'R');
state = s_req_method_TR;
break;
case s_req_method_TR:
STRICT_CHECK(ch != 'A');
state = s_req_method_TRA;
break;
case s_req_method_TRA:
STRICT_CHECK(ch != 'C');
state = s_req_method_TRAC;
break;
case s_req_method_TRAC:
STRICT_CHECK(ch != 'E');
parser->method = HTTP_TRACE;
state = s_req_spaces_before_url;
break;
/* UNLOCK */
case s_req_method_U:
STRICT_CHECK(ch != 'N');
state = s_req_method_UN;
break;
case s_req_method_UN:
STRICT_CHECK(ch != 'L');
state = s_req_method_UNL;
break;
parser->buffer[index] = ch;
case s_req_method_UNL:
STRICT_CHECK(ch != 'O');
state = s_req_method_UNLO;
break;
case s_req_method_UNLO:
STRICT_CHECK(ch != 'C');
state = s_req_method_UNLOC;
break;
case s_req_method_UNLOC:
STRICT_CHECK(ch != 'K');
parser->method = HTTP_UNLOCK;
state = s_req_spaces_before_url;
break;
/* whew! that was annoying! */
case s_req_spaces_before_url:
{
if (ch == ' ') break;
@ -1283,7 +941,7 @@ size_t parse (http_parser *parser, const char *data, size_t len, int start_state
MARK(header_field);
header_index = 0;
index = 0;
state = s_header_field;
switch (c) {
@ -1312,17 +970,17 @@ size_t parse (http_parser *parser, const char *data, size_t len, int start_state
break;
case h_C:
header_index++;
index++;
header_state = (c == 'o' ? h_CO : h_general);
break;
case h_CO:
header_index++;
index++;
header_state = (c == 'n' ? h_CON : h_general);
break;
case h_CON:
header_index++;
index++;
switch (c) {
case 'n':
header_state = h_matching_connection;
@ -1339,11 +997,11 @@ size_t parse (http_parser *parser, const char *data, size_t len, int start_state
/* connection */
case h_matching_connection:
header_index++;
if (header_index > sizeof(CONNECTION)-1
|| c != CONNECTION[header_index]) {
index++;
if (index > sizeof(CONNECTION)-1
|| c != CONNECTION[index]) {
header_state = h_general;
} else if (header_index == sizeof(CONNECTION)-2) {
} else if (index == sizeof(CONNECTION)-2) {
header_state = h_connection;
}
break;
@ -1351,11 +1009,11 @@ size_t parse (http_parser *parser, const char *data, size_t len, int start_state
/* content-length */
case h_matching_content_length:
header_index++;
if (header_index > sizeof(CONTENT_LENGTH)-1
|| c != CONTENT_LENGTH[header_index]) {
index++;
if (index > sizeof(CONTENT_LENGTH)-1
|| c != CONTENT_LENGTH[index]) {
header_state = h_general;
} else if (header_index == sizeof(CONTENT_LENGTH)-2) {
} else if (index == sizeof(CONTENT_LENGTH)-2) {
header_state = h_content_length;
}
break;
@ -1363,11 +1021,11 @@ size_t parse (http_parser *parser, const char *data, size_t len, int start_state
/* transfer-encoding */
case h_matching_transfer_encoding:
header_index++;
if (header_index > sizeof(TRANSFER_ENCODING)-1
|| c != TRANSFER_ENCODING[header_index]) {
index++;
if (index > sizeof(TRANSFER_ENCODING)-1
|| c != TRANSFER_ENCODING[index]) {
header_state = h_general;
} else if (header_index == sizeof(TRANSFER_ENCODING)-2) {
} else if (index == sizeof(TRANSFER_ENCODING)-2) {
header_state = h_transfer_encoding;
}
break;
@ -1413,7 +1071,7 @@ size_t parse (http_parser *parser, const char *data, size_t len, int start_state
MARK(header_value);
state = s_header_value;
header_index = 0;
index = 0;
c = lowcase[(int)ch];
@ -1503,32 +1161,32 @@ size_t parse (http_parser *parser, const char *data, size_t len, int start_state
/* Transfer-Encoding: chunked */
case h_matching_transfer_encoding_chunked:
header_index++;
if (header_index > sizeof(CHUNKED)-1
|| c != CHUNKED[header_index]) {
index++;
if (index > sizeof(CHUNKED)-1
|| c != CHUNKED[index]) {
header_state = h_general;
} else if (header_index == sizeof(CHUNKED)-2) {
} else if (index == sizeof(CHUNKED)-2) {
header_state = h_transfer_encoding_chunked;
}
break;
/* looking for 'Connection: keep-alive' */
case h_matching_connection_keep_alive:
header_index++;
if (header_index > sizeof(KEEP_ALIVE)-1
|| c != KEEP_ALIVE[header_index]) {
index++;
if (index > sizeof(KEEP_ALIVE)-1
|| c != KEEP_ALIVE[index]) {
header_state = h_general;
} else if (header_index == sizeof(KEEP_ALIVE)-2) {
} else if (index == sizeof(KEEP_ALIVE)-2) {
header_state = h_connection_keep_alive;
}
break;
/* looking for 'Connection: close' */
case h_matching_connection_close:
header_index++;
if (header_index > sizeof(CLOSE)-1 || c != CLOSE[header_index]) {
index++;
if (index > sizeof(CLOSE)-1 || c != CLOSE[index]) {
header_state = h_general;
} else if (header_index == sizeof(CLOSE)-2) {
} else if (index == sizeof(CLOSE)-2) {
header_state = h_connection_close;
}
break;
@ -1739,7 +1397,7 @@ size_t parse (http_parser *parser, const char *data, size_t len, int start_state
parser->state = state;
parser->header_state = header_state;
parser->header_index = header_index;
parser->index = index;
return len;

@ -50,6 +50,9 @@ typedef struct http_parser http_parser;
typedef int (*http_data_cb) (http_parser*, const char *at, size_t length);
typedef int (*http_cb) (http_parser*);
/* Should be at least one longer than the longest request method */
#define HTTP_PARSER_MAX_METHOD_LEN 10
/* Request Methods */
enum http_method
{ HTTP_DELETE = 0x0002
@ -67,13 +70,14 @@ enum http_method
, HTTP_PROPPATCH = 0x1000
, HTTP_TRACE = 0x2000
, HTTP_UNLOCK = 0x4000
, HTTP_CONNECT = 0x8000
};
struct http_parser {
/** PRIVATE **/
unsigned short state;
unsigned short header_state;
size_t header_index;
size_t index;
char flags;
@ -98,6 +102,7 @@ struct http_parser {
enum http_method method; /* requests only */
unsigned short http_major;
unsigned short http_minor;
char buffer[HTTP_PARSER_MAX_METHOD_LEN];
/** PUBLIC **/
void *data; /* A pointer to get hook to the "connection" or "socket" object */

Loading…
Cancel
Save