diff --git a/http_parser.c b/http_parser.c index 264c3b3..6b0a94d 100644 --- a/http_parser.c +++ b/http_parser.c @@ -92,6 +92,10 @@ static inline int message_complete_callback (http_parser *parser) #define CONTENT_LENGTH "content-length" #define TRANSFER_ENCODING "transfer-encoding" +#define CHUNKED "chunked" +#define KEEP_ALIVE "keep-alive" +#define CLOSE "close" + static const unsigned char lowcase[] = "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0" @@ -191,13 +195,13 @@ enum state , s_headers_almost_done , s_headers_done - , s_chunked_len_start - , s_chunked_len - , s_chunked_len_almost_done - , s_chunked_data - , s_chunked_data_almost_done - , s_chunked_data_done - , s_chunked_almost_done + , s_chunk_size_start + , s_chunk_size + , s_chunk_size_almost_done + , s_chunk_parameters + , s_chunk_data + , s_chunk_data_almost_done + , s_chunk_data_done , s_body_identity , s_body_identity_eof @@ -208,26 +212,29 @@ enum header_states , h_C , h_CO , h_CON + , h_matching_connection , h_matching_content_length , h_matching_transfer_encoding + , h_connection , h_content_length , h_transfer_encoding - , h_encoding_C - , h_encoding_CH - , h_encoding_CHU - , h_encoding_CHUN - , h_encoding_CHUNK - , h_encoding_CHUNKE - , h_encoding_CHUNKED - , h_connection_K - , h_connection_C + + , h_matching_transfer_encoding_chunked + , h_matching_connection_keep_alive + , h_matching_connection_close + + , h_transfer_encoding_chunked + , h_connection_keep_alive + , h_connection_close }; enum flags - { F_CHUNKED = 0x0001 - , F_TRAILING= 0x0002 + { F_CHUNKED = 0x0001 + , F_CONNECTION_KEEP_ALIVE = 0x0002 + , F_CONNECTION_CLOSE = 0x0004 + , F_TRAILING = 0x0010 }; #define ERROR (p - data); @@ -836,10 +843,6 @@ size_t http_parser_execute (http_parser *parser, const char *data, size_t len) } break; - case h_connection: - if (ch != ' ') header_state = h_general; - break; - /* content-length */ case h_matching_content_length: @@ -851,10 +854,6 @@ size_t http_parser_execute (http_parser *parser, const char *data, size_t len) } break; - case h_content_length: - if (ch != ' ') header_state = h_general; - break; - /* transfer-encoding */ case h_matching_transfer_encoding: @@ -866,6 +865,8 @@ size_t http_parser_execute (http_parser *parser, const char *data, size_t len) } break; + case h_connection: + case h_content_length: case h_transfer_encoding: if (ch != ' ') header_state = h_general; break; @@ -916,6 +917,7 @@ size_t http_parser_execute (http_parser *parser, const char *data, size_t len) MARK(header_value); state = s_header_value; + header_index = 0; c = lowcase[(int)ch]; @@ -926,7 +928,7 @@ size_t http_parser_execute (http_parser *parser, const char *data, size_t len) case h_transfer_encoding: /* looking for 'Transfer-Encoding: chunked' */ if ('c' == c) { - header_state = h_encoding_C; + header_state = h_matching_transfer_encoding_chunked; } else { header_state = h_general; } @@ -940,10 +942,10 @@ size_t http_parser_execute (http_parser *parser, const char *data, size_t len) case h_connection: /* looking for 'Connection: keep-alive' */ if (c == 'k') { - header_state = h_connection_K; + header_state = h_matching_connection_keep_alive; /* looking for 'Connection: close' */ } else if (c == 'c') { - header_state = h_connection_C; + header_state = h_matching_connection_close; } else { header_state = h_general; } @@ -976,15 +978,17 @@ size_t http_parser_execute (http_parser *parser, const char *data, size_t len) break; } + header_index++; + switch (header_state) { + case h_general: + break; + case h_connection: case h_transfer_encoding: assert(0 && "Shouldn't get here."); break; - case h_general: - break; - case h_content_length: if (ch < '0' || ch > '9') return ERROR; parser->content_length *= 10; @@ -992,37 +996,38 @@ size_t http_parser_execute (http_parser *parser, const char *data, size_t len) break; /* Transfer-Encoding: chunked */ - - case h_encoding_C: - header_state = (c == 'h' ? h_encoding_CH : h_general); - break; - case h_encoding_CH: - header_state = (c == 'u' ? h_encoding_CHU : h_general); - break; - case h_encoding_CHU: - header_state = (c == 'n' ? h_encoding_CHUN : h_general); - break; - case h_encoding_CHUN: - header_state = (c == 'k' ? h_encoding_CHUNK : h_general); - break; - case h_encoding_CHUNK: - header_state = (c == 'e' ? h_encoding_CHUNKE : h_general); - break; - case h_encoding_CHUNKE: - if (c == 'd') { - parser->flags |= F_CHUNKED; - header_state = h_encoding_CHUNKED; + case h_matching_transfer_encoding_chunked: + if (header_index > sizeof(CHUNKED)-1 + || c != CHUNKED[header_index]) { + header_state = h_general; + } else if (header_index == sizeof(CHUNKED)-2) { + header_state = h_transfer_encoding_chunked; } break; - case h_encoding_CHUNKED: - if (ch != ' ') return ERROR; - break; /* looking for 'Connection: keep-alive' */ + case h_matching_connection_keep_alive: + if (header_index > sizeof(KEEP_ALIVE)-1 + || c != KEEP_ALIVE[header_index]) { + header_state = h_general; + } else if (header_index == sizeof(KEEP_ALIVE)-2) { + header_state = h_connection_keep_alive; + } + break; + /* looking for 'Connection: close' */ - case h_connection_K: - case h_connection_C: - header_state = h_general; + case h_matching_connection_close: + if (header_index > sizeof(CLOSE)-1 || c != CLOSE[header_index]) { + header_state = h_general; + } else if (header_index == sizeof(CLOSE)-2) { + header_state = h_connection_keep_alive; + } + break; + + case h_transfer_encoding_chunked: + case h_connection_keep_alive: + case h_connection_close: + if (ch != ' ') header_state = h_general; break; default: @@ -1035,7 +1040,22 @@ size_t http_parser_execute (http_parser *parser, const char *data, size_t len) case s_header_almost_done: if (ch != LF) return ERROR; + state = s_header_field_start; + + switch (header_state) { + case h_connection_keep_alive: + parser->flags |= F_CONNECTION_KEEP_ALIVE; + break; + case h_connection_close: + parser->flags |= F_CONNECTION_CLOSE; + break; + case h_transfer_encoding_chunked: + parser->flags |= F_CHUNKED; + break; + default: + break; + } break; case s_headers_almost_done: @@ -1053,23 +1073,24 @@ size_t http_parser_execute (http_parser *parser, const char *data, size_t len) parser->body_read = 0; if (parser->flags & F_CHUNKED) { - state = s_chunked_len_start; + state = s_chunk_size_start; } else { if (parser->content_length == 0) { CALLBACK2(message_complete); state = s_start; - } else if (parser->content_length < 0) { - state = s_body_identity_eof; - } else { + } else if (parser->content_length > 0) { state = s_body_identity; + } else { + if (parser->method & (HTTP_GET | HTTP_HEAD)) { + CALLBACK2(message_complete); + state = s_start; + } else { + state = s_body_identity_eof; + } } } break; - /* read until EOF */ - case s_body_identity_eof: - break; - case s_body_identity: to_read = MIN(pe - p, (ssize_t)(parser->content_length - parser->body_read)); if (to_read > 0) { @@ -1083,38 +1104,63 @@ size_t http_parser_execute (http_parser *parser, const char *data, size_t len) } break; - case s_chunked_len_start: + /* read until EOF */ + case s_body_identity_eof: + to_read = pe - p; + if (to_read > 0) { + if (parser->on_body) parser->on_body(parser, p, to_read); + p += to_read - 1; + parser->body_read += to_read; + } + break; + + case s_chunk_size_start: c = unhex[(int)ch]; if (c == -1) return ERROR; parser->chunk_size = c; - state = s_chunked_len; + state = s_chunk_size; break; - case s_chunked_len: + case s_chunk_size: if (ch == CR) { - state = s_chunked_len_almost_done; + state = s_chunk_size_almost_done; break; } c = unhex[(int)ch]; - if (c == -1) return ERROR; + if (c == -1) { + if (ch == ';' || ch == ' ') { + state = s_chunk_parameters; + break; + } + return ERROR; + } parser->chunk_size *= 16; parser->chunk_size += c; break; - case s_chunked_len_almost_done: + case s_chunk_parameters: + /* just ignore this shit */ + /* TODO check for overflow */ + if (ch == CR) { + state = s_chunk_size_almost_done; + break; + } + break; + + case s_chunk_size_almost_done: if (ch != LF) return ERROR; if (parser->chunk_size == 0) { parser->flags |= F_TRAILING; state = s_header_field_start; } else { - state = s_chunked_data; + state = s_chunk_data; } break; - case s_chunked_data: + case s_chunk_data: to_read = MIN(pe - p, (ssize_t)(parser->chunk_size)); if (to_read > 0) { @@ -1123,25 +1169,22 @@ size_t http_parser_execute (http_parser *parser, const char *data, size_t len) } if (to_read == parser->chunk_size) { - state = s_chunked_data_almost_done; + state = s_chunk_data_almost_done; } parser->chunk_size -= to_read; break; - case s_chunked_data_almost_done: + case s_chunk_data_almost_done: if (ch != CR) return ERROR; - state = s_chunked_data_done; + state = s_chunk_data_done; break; - case s_chunked_data_done: + case s_chunk_data_done: if (ch != LF) return ERROR; - state = s_chunked_len_start; + state = s_chunk_size_start; break; - case s_chunked_almost_done: - if (ch != LF) return ERROR; - default: assert(0 && "unhandled state"); return ERROR; diff --git a/test.c b/test.c index c4a6381..f481030 100644 --- a/test.c +++ b/test.c @@ -610,7 +610,7 @@ parser_init (enum http_parser_type type) parser.on_message_complete = message_complete_cb; } -static inline void +static inline int check_str_eq (const struct message *m, const char *prop, const char *expected, @@ -619,11 +619,12 @@ check_str_eq (const struct message *m, printf("\n*** Error: %s in '%s' ***\n\n", prop, m->name); printf("expected '%s'\n", expected); printf(" found '%s'\n", found); - exit(1); + return 0; } + return 1; } -static inline void +static inline int check_num_eq (const struct message *m, const char *prop, int expected, @@ -632,18 +633,19 @@ check_num_eq (const struct message *m, printf("\n*** Error: %s in '%s' ***\n\n", prop, m->name); printf("expected %d\n", expected); printf(" found %d\n", found); - exit(1); + return 0; } + return 1; } #define MESSAGE_CHECK_STR_EQ(expected, found, prop) \ - check_str_eq(expected, #prop, expected->prop, found->prop) + if (!check_str_eq(expected, #prop, expected->prop, found->prop)) return 0 #define MESSAGE_CHECK_NUM_EQ(expected, found, prop) \ - check_num_eq(expected, #prop, expected->prop, found->prop) + if (!check_num_eq(expected, #prop, expected->prop, found->prop)) return 0 -void +int message_eq (int index, const struct message *expected) { int i; @@ -664,60 +666,35 @@ message_eq (int index, const struct message *expected) MESSAGE_CHECK_NUM_EQ(expected, m, num_headers); + int r; for (i = 0; i < m->num_headers; i++) { - check_str_eq(expected, "header field", expected->headers[i][0], m->headers[i][0]); - check_str_eq(expected, "header value", expected->headers[i][1], m->headers[i][1]); - } -} - -void -parse_messages (int message_count, const struct message *input_messages[]) -{ - // Concat the input messages - size_t length = 0; - int i; - for (i = 0; i < message_count; i++) { - length += strlen(input_messages[i]->raw); - } - char total[length + 1]; - total[0] = '\0'; - - for (i = 0; i < message_count; i++) { - strcat(total, input_messages[i]->raw); + r = check_str_eq(expected, "header field", expected->headers[i][0], m->headers[i][0]); + if (!r) return 0; + r = check_str_eq(expected, "header value", expected->headers[i][1], m->headers[i][1]); + if (!r) return 0; } - // Parse the stream - parser_init(HTTP_REQUEST); - - http_parser_execute(&parser, total, length); - - http_parser_execute(&parser, NULL, 0); - - assert(num_messages == message_count); - - for (i = 0; i < message_count; i++) { - message_eq(i, input_messages[i]); - } + return 1; } static void -print_error (const struct message *message, size_t error_location) +print_error (const char *raw, size_t error_location) { - printf("\n*** parse error on '%s' ***\n\n", message->name); + fprintf(stderr, "\n*** parse error ***\n\n"); int this_line = 0, char_len = 0; - size_t i, j, len = strlen(message->raw), error_location_line = 0; + size_t i, j, len = strlen(raw), error_location_line = 0; for (i = 0; i < len; i++) { if (i == error_location) this_line = 1; - switch (message->raw[i]) { + switch (raw[i]) { case '\r': char_len = 2; - printf("\\r"); + fprintf(stderr, "\\r"); break; case '\n': char_len = 2; - printf("\\n\n"); + fprintf(stderr, "\\n\n"); if (this_line) goto print; @@ -726,19 +703,19 @@ print_error (const struct message *message, size_t error_location) default: char_len = 1; - putchar(message->raw[i]); + fputc(raw[i], stderr); break; } if (!this_line) error_location_line += char_len; } - printf("[eof]\n"); + fprintf(stderr, "[eof]\n"); print: for (j = 0; j < error_location_line; j++) { - putchar(' '); + fputc(' ', stderr); } - printf("^\n\nerror location: %d\n", error_location); + fprintf(stderr, "^\n\nerror location: %d\n", error_location); } @@ -751,13 +728,13 @@ test_message (const struct message *message) read = http_parser_execute(&parser, message->raw, strlen(message->raw)); if (read != strlen(message->raw)) { - print_error(message, read); + print_error(message->raw, read); exit(1); } read = http_parser_execute(&parser, NULL, 0); if (read != 0) { - print_error(message, read); + print_error(message->raw, read); exit(1); } @@ -766,7 +743,7 @@ test_message (const struct message *message) exit(1); } - message_eq(0, message); + if(!message_eq(0, message)) exit(1); } int @@ -781,7 +758,7 @@ test_error (const char *buf) parsed = http_parser_execute(&parser, NULL, 0); if (parsed != 0) return 1; - printf("\n*** Error expected but none found ***\n\n%s", buf); + fprintf(stderr, "\n*** Error expected but none found ***\n\n%s", buf); exit(1); return 0; @@ -803,14 +780,24 @@ test_multiple3 (const struct message *r1, const struct message *r2, const struct parser_init(HTTP_REQUEST); - http_parser_execute(&parser, total, strlen(total)); + size_t read; - http_parser_execute(&parser, NULL, 0); + read = http_parser_execute(&parser, total, strlen(total)); + if (read != strlen(total)) { + print_error(total, read); + exit(1); + } + + read = http_parser_execute(&parser, NULL, 0); + if (read != 0) { + print_error(total, read); + exit(1); + } assert(num_messages == 3); - message_eq(0, r1); - message_eq(1, r2); - message_eq(2, r3); + if (!message_eq(0, r1)) exit(1); + if (!message_eq(1, r2)) exit(1); + if (!message_eq(2, r3)) exit(1); } /* SCAN through every possible breaking to make sure the @@ -829,11 +816,15 @@ test_scan (const struct message *r1, const struct message *r2, const struct mess strcat(total, r2->raw); strcat(total, r3->raw); + size_t read; + int total_len = strlen(total); int total_ops = (total_len - 1) * (total_len - 2) / 2; int ops = 0 ; + size_t buf1_len, buf2_len, buf3_len; + int i,j; for (j = 2; j < total_len; j ++ ) { for (i = 1; i < j; i ++ ) { @@ -846,40 +837,65 @@ test_scan (const struct message *r1, const struct message *r2, const struct mess parser_init(HTTP_REQUEST); - int buf1_len = i; + buf1_len = i; strncpy(buf1, total, buf1_len); buf1[buf1_len] = 0; - int buf2_len = j - i; + buf2_len = j - i; strncpy(buf2, total+i, buf2_len); buf2[buf2_len] = 0; - int buf3_len = total_len - j; + buf3_len = total_len - j; strncpy(buf3, total+j, buf3_len); buf3[buf3_len] = 0; - /* - printf("buf1: %s - %d\n", buf1, buf1_len); - printf("buf2: %s - %d \n", buf2, buf2_len ); - printf("buf3: %s - %d\n\n", buf3, buf3_len); - */ - - http_parser_execute(&parser, buf1, buf1_len); + read = http_parser_execute(&parser, buf1, buf1_len); + if (read != buf1_len) { + print_error(buf1, read); + goto error; + } - http_parser_execute(&parser, buf2, buf2_len); + read = http_parser_execute(&parser, buf2, buf2_len); + if (read != buf2_len) { + print_error(buf2, read); + goto error; + } - http_parser_execute(&parser, buf3, buf3_len); + read = http_parser_execute(&parser, buf3, buf3_len); + if (read != buf3_len) { + print_error(buf3, read); + goto error; + } http_parser_execute(&parser, NULL, 0); assert(3 == num_messages); - message_eq(0, r1); - message_eq(1, r2); - message_eq(2, r3); + if (!message_eq(0, r1)) { + fprintf(stderr, "\n\nError matching messages[0] in test_scan.\n"); + goto error; + } + + if (!message_eq(1, r2)) { + fprintf(stderr, "\n\nError matching messages[1] in test_scan.\n"); + goto error; + } + + if (!message_eq(2, r3)) { + fprintf(stderr, "\n\nError matching messages[2] in test_scan.\n"); + goto error; + } } } puts("\b\b\b\b100%"); + return; + +error: + fprintf(stderr, "i=%d j=%d\n", i, j); + fprintf(stderr, "buf1 (%d) %s\n\n", buf1_len, buf1); + fprintf(stderr, "buf2 (%d) %s\n\n", buf2_len , buf2); + fprintf(stderr, "buf3 (%d) %s\n", buf3_len, buf3); + exit(1); } int @@ -958,14 +974,12 @@ main (void) test_message(&requests[i]); } -#if 0 int j, k; for (i = 0; i < request_count; i++) { for (j = 0; j < request_count; j++) { for (k = 0; k < request_count; k++) { - //printf("%d %d %d\n", i, j, k); test_multiple3(&requests[i], &requests[j], &requests[k]); } } @@ -976,6 +990,7 @@ main (void) , &requests[GET_ONE_HEADER_NO_BODY] , &requests[GET_NO_HEADERS_NO_BODY] ); +#if 0 printf("request scan 2/3 "); test_scan( &requests[GET_FUNKY_CONTENT_LENGTH]