diff --git a/Makefile b/Makefile index 3c62766..d41bc2b 100644 --- a/Makefile +++ b/Makefile @@ -1,5 +1,5 @@ -#OPT=-O0 -g -Wall -Wextra -Werror -OPT=-O3 -DHTTP_PARSER_STRICT=0 +OPT=-O0 -g -Wall -Wextra -Werror +#OPT=-O3 -DHTTP_PARSER_STRICT=0 test: http_parser.o test.c gcc $(OPT) http_parser.o test.c -o $@ diff --git a/http_parser.c b/http_parser.c index 1f67f6b..65c12db 100644 --- a/http_parser.c +++ b/http_parser.c @@ -265,10 +265,11 @@ size_t parse (http_parser *parser, const char *data, size_t len, int start_state { char c, ch; const char *p, *pe; + ssize_t to_read; enum state state = parser->state; enum header_states header_state = parser->header_state; - size_t to_read, header_index = parser->header_index; + size_t header_index = parser->header_index; if (len == 0) { if (state == s_body_identity_eof) { @@ -1188,7 +1189,7 @@ size_t parse (http_parser *parser, const char *data, size_t len, int start_state if (header_index > sizeof(CLOSE)-1 || c != CLOSE[header_index]) { header_state = h_general; } else if (header_index == sizeof(CLOSE)-2) { - header_state = h_connection_keep_alive; + header_state = h_connection_close; } break; @@ -1244,19 +1245,38 @@ size_t parse (http_parser *parser, const char *data, size_t len, int start_state parser->body_read = 0; if (parser->flags & F_CHUNKED) { + /* chunked encoding - ignore content-lenght header */ state = s_chunk_size_start; } else { if (parser->content_length == 0) { + /* content-length header given, but zero: Content-Length: 0\r\n */ CALLBACK2(message_complete); state = start_state; } else if (parser->content_length > 0) { + /* content-length header given, and positive */ state = s_body_identity; } else { - if (parser->method & (HTTP_GET | HTTP_HEAD)) { - CALLBACK2(message_complete); - state = start_state; + /* No content-length header, not chunked */ + if (parser->http_major > 0) { + /* HTTP/1.0 or HTTP/1.1 */ + if (parser->flags & F_CONNECTION_CLOSE) { + /* Read body until EOF */ + state = s_body_identity_eof; + } else { + /* Message is done - read the next */ + CALLBACK2(message_complete); + state = start_state; + } } else { - state = s_body_identity_eof; + /* HTTP/0.9 or earlier */ + if (parser->flags & F_CONNECTION_KEEP_ALIVE) { + /* Message is done - read the next */ + CALLBACK2(message_complete); + state = start_state; + } else { + /* Read body until EOF */ + state = s_body_identity_eof; + } } } } @@ -1287,14 +1307,20 @@ size_t parse (http_parser *parser, const char *data, size_t len, int start_state break; case s_chunk_size_start: + { + assert(parser->flags & F_CHUNKED); + c = unhex[(int)ch]; if (c == -1) return ERROR; - parser->chunk_size = c; + parser->content_length = c; state = s_chunk_size; break; + } case s_chunk_size: { + assert(parser->flags & F_CHUNKED); + if (ch == CR) { state = s_chunk_size_almost_done; break; @@ -1310,23 +1336,28 @@ size_t parse (http_parser *parser, const char *data, size_t len, int start_state return ERROR; } - parser->chunk_size *= 16; - parser->chunk_size += c; + parser->content_length *= 16; + parser->content_length += c; break; } case s_chunk_parameters: + { + assert(parser->flags & F_CHUNKED); /* just ignore this shit. TODO check for overflow */ if (ch == CR) { state = s_chunk_size_almost_done; break; } break; + } case s_chunk_size_almost_done: { + assert(parser->flags & F_CHUNKED); STRICT_CHECK(ch != LF); - if (parser->chunk_size == 0) { + + if (parser->content_length == 0) { parser->flags |= F_TRAILING; state = s_header_field_start; } else { @@ -1337,27 +1368,31 @@ size_t parse (http_parser *parser, const char *data, size_t len, int start_state case s_chunk_data: { - to_read = MIN(pe - p, (ssize_t)(parser->chunk_size)); + assert(parser->flags & F_CHUNKED); + + to_read = MIN(pe - p, (ssize_t)(parser->content_length)); if (to_read > 0) { if (parser->on_body) parser->on_body(parser, p, to_read); p += to_read - 1; } - if (to_read == parser->chunk_size) { + if (to_read == parser->content_length) { state = s_chunk_data_almost_done; } - parser->chunk_size -= to_read; + parser->content_length -= to_read; break; } case s_chunk_data_almost_done: + assert(parser->flags & F_CHUNKED); STRICT_CHECK(ch != CR); state = s_chunk_data_done; break; case s_chunk_data_done: + assert(parser->flags & F_CHUNKED); STRICT_CHECK(ch != LF); state = s_chunk_size_start; break; diff --git a/http_parser.h b/http_parser.h index a245765..eaca330 100644 --- a/http_parser.h +++ b/http_parser.h @@ -67,7 +67,6 @@ struct http_parser { char flags; - size_t chunk_size; ssize_t body_read; ssize_t content_length; diff --git a/test.c b/test.c index c68087e..73035d5 100644 --- a/test.c +++ b/test.c @@ -373,6 +373,7 @@ const struct message requests[] = /* * R E S P O N S E S * */ const struct message responses[] = +#define GOOGLE_301 0 { {.name= "google 301" ,.type= RESPONSE ,.raw= "HTTP/1.1 301 Moved Permanently\r\n" @@ -410,6 +411,7 @@ const struct message responses[] = "\r\n" } +#define NO_CONTENT_LENGTH_RESPONSE 1 , {.name= "no content-length response" ,.type= RESPONSE ,.raw= "HTTP/1.1 200 OK\r\n" @@ -449,6 +451,7 @@ const struct message responses[] = "" } +#define NO_HEADERS_NO_BODY_404 2 , {.name= "404 no headers no body" ,.type= RESPONSE ,.raw= "HTTP/1.1 404 Not Found\r\n\r\n" @@ -459,6 +462,7 @@ const struct message responses[] = ,.body= "" } +#define NO_REASON_PHRASE 3 , {.name= "301 no response phrase" ,.type= RESPONSE ,.raw= "HTTP/1.1 301\r\n\r\n" @@ -469,6 +473,7 @@ const struct message responses[] = ,.body= "" } +#define TRAILING_SPACE_ON_CHUNKED_BODY 4 , {.name="200 trailing space on chunked body" ,.type= RESPONSE ,.raw= "HTTP/1.1 200 OK\r\n" @@ -880,7 +885,10 @@ test_scan (const struct message *r1, const struct message *r2, const struct mess parse(r1->type, NULL, 0); - assert(3 == num_messages); + if (3 != num_messages) { + fprintf(stderr, "\n\nParser didn't see 3 messages only %d\n", num_messages); + goto error; + } if (!message_eq(0, r1)) { fprintf(stderr, "\n\nError matching messages[0] in test_scan.\n"); @@ -928,6 +936,12 @@ main (void) test_message(&responses[i]); } + printf("response scan 1/3 "); + test_scan( &responses[TRAILING_SPACE_ON_CHUNKED_BODY] + , &responses[NO_HEADERS_NO_BODY_404] + , &responses[NO_REASON_PHRASE] + ); + puts("responses okay");