Speed up the http_parser_execute loop

Changes:
* Cache parser->nread in a local variable (same optimization that was
  already in place for parser->state).
* Move the cost of the conditional branch for spaces in tokens out of
  the fast-tokenizer implementation and into the strict-tokenizer
  implementation.

Together, these changes yield a ~15% increase in MB/s and req/s in
the included bench program on x86_64.

PR-URL: https://github.com/nodejs/http-parser/pull/422
Reviewed-By: Ben Noordhuis <info@bnoordhuis.nl>
make-http-max-header-size-gyp-configurable
Brian Pane 7 years ago committed by Ben Noordhuis
parent 31232735c6
commit cf69c8eda9

@ -49,6 +49,7 @@
#define SET_ERRNO(e) \ #define SET_ERRNO(e) \
do { \ do { \
parser->nread = nread; \
parser->http_errno = (e); \ parser->http_errno = (e); \
} while(0) } while(0)
@ -56,6 +57,7 @@ do { \
#define UPDATE_STATE(V) p_state = (enum state) (V); #define UPDATE_STATE(V) p_state = (enum state) (V);
#define RETURN(V) \ #define RETURN(V) \
do { \ do { \
parser->nread = nread; \
parser->state = CURRENT_STATE(); \ parser->state = CURRENT_STATE(); \
return (V); \ return (V); \
} while (0); } while (0);
@ -149,8 +151,8 @@ do { \
*/ */
#define COUNT_HEADER_SIZE(V) \ #define COUNT_HEADER_SIZE(V) \
do { \ do { \
parser->nread += (V); \ nread += (V); \
if (UNLIKELY(parser->nread > (HTTP_MAX_HEADER_SIZE))) { \ if (UNLIKELY(nread > (HTTP_MAX_HEADER_SIZE))) { \
SET_ERRNO(HPE_HEADER_OVERFLOW); \ SET_ERRNO(HPE_HEADER_OVERFLOW); \
goto error; \ goto error; \
} \ } \
@ -192,7 +194,7 @@ static const char tokens[256] = {
/* 24 can 25 em 26 sub 27 esc 28 fs 29 gs 30 rs 31 us */ /* 24 can 25 em 26 sub 27 esc 28 fs 29 gs 30 rs 31 us */
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
/* 32 sp 33 ! 34 " 35 # 36 $ 37 % 38 & 39 ' */ /* 32 sp 33 ! 34 " 35 # 36 $ 37 % 38 & 39 ' */
0, '!', 0, '#', '$', '%', '&', '\'', ' ', '!', 0, '#', '$', '%', '&', '\'',
/* 40 ( 41 ) 42 * 43 + 44 , 45 - 46 . 47 / */ /* 40 ( 41 ) 42 * 43 + 44 , 45 - 46 . 47 / */
0, 0, '*', '+', 0, '-', '.', 0, 0, 0, '*', '+', 0, '-', '.', 0,
/* 48 0 49 1 50 2 51 3 52 4 53 5 54 6 55 7 */ /* 48 0 49 1 50 2 51 3 52 4 53 5 54 6 55 7 */
@ -419,14 +421,14 @@ enum http_host_state
(c) == ';' || (c) == ':' || (c) == '&' || (c) == '=' || (c) == '+' || \ (c) == ';' || (c) == ':' || (c) == '&' || (c) == '=' || (c) == '+' || \
(c) == '$' || (c) == ',') (c) == '$' || (c) == ',')
#define STRICT_TOKEN(c) (tokens[(unsigned char)c]) #define STRICT_TOKEN(c) ((c == ' ') ? 0 : tokens[(unsigned char)c])
#if HTTP_PARSER_STRICT #if HTTP_PARSER_STRICT
#define TOKEN(c) (tokens[(unsigned char)c]) #define TOKEN(c) STRICT_TOKEN(c)
#define IS_URL_CHAR(c) (BIT_AT(normal_url_char, (unsigned char)c)) #define IS_URL_CHAR(c) (BIT_AT(normal_url_char, (unsigned char)c))
#define IS_HOST_CHAR(c) (IS_ALPHANUM(c) || (c) == '.' || (c) == '-') #define IS_HOST_CHAR(c) (IS_ALPHANUM(c) || (c) == '.' || (c) == '-')
#else #else
#define TOKEN(c) ((c == ' ') ? ' ' : tokens[(unsigned char)c]) #define TOKEN(c) tokens[(unsigned char)c]
#define IS_URL_CHAR(c) \ #define IS_URL_CHAR(c) \
(BIT_AT(normal_url_char, (unsigned char)c) || ((c) & 0x80)) (BIT_AT(normal_url_char, (unsigned char)c) || ((c) & 0x80))
#define IS_HOST_CHAR(c) \ #define IS_HOST_CHAR(c) \
@ -644,6 +646,7 @@ size_t http_parser_execute (http_parser *parser,
const char *status_mark = 0; const char *status_mark = 0;
enum state p_state = (enum state) parser->state; enum state p_state = (enum state) parser->state;
const unsigned int lenient = parser->lenient_http_headers; const unsigned int lenient = parser->lenient_http_headers;
uint32_t nread = parser->nread;
/* We're in an error state. Don't bother doing anything. */ /* We're in an error state. Don't bother doing anything. */
if (HTTP_PARSER_ERRNO(parser) != HPE_OK) { if (HTTP_PARSER_ERRNO(parser) != HPE_OK) {
@ -1238,8 +1241,14 @@ reexecute:
break; break;
switch (parser->header_state) { switch (parser->header_state) {
case h_general: case h_general: {
size_t limit = data + len - p;
limit = MIN(limit, HTTP_MAX_HEADER_SIZE);
while (p+1 < data + limit && TOKEN(p[1])) {
p++;
}
break; break;
}
case h_C: case h_C:
parser->index++; parser->index++;
@ -1487,7 +1496,6 @@ reexecute:
p = data + len; p = data + len;
} }
--p; --p;
break; break;
} }
@ -1795,6 +1803,7 @@ reexecute:
STRICT_CHECK(ch != LF); STRICT_CHECK(ch != LF);
parser->nread = 0; parser->nread = 0;
nread = 0;
hasBody = parser->flags & F_CHUNKED || hasBody = parser->flags & F_CHUNKED ||
(parser->content_length > 0 && parser->content_length != ULLONG_MAX); (parser->content_length > 0 && parser->content_length != ULLONG_MAX);
@ -1889,7 +1898,7 @@ reexecute:
case s_chunk_size_start: case s_chunk_size_start:
{ {
assert(parser->nread == 1); assert(nread == 1);
assert(parser->flags & F_CHUNKED); assert(parser->flags & F_CHUNKED);
unhex_val = unhex[(unsigned char)ch]; unhex_val = unhex[(unsigned char)ch];
@ -1957,6 +1966,7 @@ reexecute:
STRICT_CHECK(ch != LF); STRICT_CHECK(ch != LF);
parser->nread = 0; parser->nread = 0;
nread = 0;
if (parser->content_length == 0) { if (parser->content_length == 0) {
parser->flags |= F_TRAILING; parser->flags |= F_TRAILING;
@ -2003,6 +2013,7 @@ reexecute:
assert(parser->flags & F_CHUNKED); assert(parser->flags & F_CHUNKED);
STRICT_CHECK(ch != LF); STRICT_CHECK(ch != LF);
parser->nread = 0; parser->nread = 0;
nread = 0;
UPDATE_STATE(s_chunk_size_start); UPDATE_STATE(s_chunk_size_start);
CALLBACK_NOTIFY(chunk_complete); CALLBACK_NOTIFY(chunk_complete);
break; break;
@ -2436,6 +2447,7 @@ http_parser_pause(http_parser *parser, int paused) {
*/ */
if (HTTP_PARSER_ERRNO(parser) == HPE_OK || if (HTTP_PARSER_ERRNO(parser) == HPE_OK ||
HTTP_PARSER_ERRNO(parser) == HPE_PAUSED) { HTTP_PARSER_ERRNO(parser) == HPE_PAUSED) {
uint32_t nread = parser->nread; /* used by the SET_ERRNO macro */
SET_ERRNO((paused) ? HPE_PAUSED : HPE_OK); SET_ERRNO((paused) ? HPE_PAUSED : HPE_OK);
} else { } else {
assert(0 && "Attempting to pause parser in error state"); assert(0 && "Attempting to pause parser in error state");

Loading…
Cancel
Save