Merge pull request #58 from pgriess/parse_url

Add http_parser_parse_url().
13 years ago · c48351fbde
parent c4ae661afc d7675cd9a6
commit c48351fbde
4 changed files with 471 additions and 229 deletions
--- a/README.md
+++ b/README.md
@ -164,6 +164,13 @@ and apply following logic:
     ------------------------ ------------ --------------------------------------------
 Parsing URLs
 ------------
 A simplistic zero-copy URL parser is provided as `http_parser_parse_url()`.
 Users of this library may wish to use it to parse URLs constructed from
 consecutive `on_url` callbacks.
 See examples of reading in headers:
 * [partial example](http://gist.github.com/155877) in C
--- a/http_parser.c
+++ b/http_parser.c
@ -24,6 +24,8 @@
 #include <http_parser.h>
 #include <assert.h>
 #include <stddef.h>
 #include <ctype.h>
 #include <stdlib.h>
 #ifndef MIN
@ -261,7 +263,7 @@ enum state
  , s_chunk_size
  , s_chunk_parameters
  , s_chunk_size_almost_done
-  
+
  , s_headers_almost_done
  /* Important: 's_headers_almost_done' must be the last 'header' state. All
   * states beyond this must be 'body' states. It is used for overflow
@ -356,6 +358,178 @@ static struct {
 };
 #undef HTTP_STRERROR_GEN
 /* Our URL parser.
 *
 * This is designed to be shared by http_parser_execute() for URL validation,
 * hence it has a state transition + byte-for-byte interface. In addition, it
 * is meant to be embedded in http_parser_parse_url(), which does the dirty
 * work of turning state transitions URL components for its API.
 *
 * This function should only be invoked with non-space characters. It is
 * assumed that the caller cares about (and can detect) the transition between
 * URL and non-URL states by looking for these.
 */
 static inline enum state
 parse_url_char(enum state s, const char ch, int is_connect)
 {
  assert(!isspace(ch));
  switch (s) {
    case s_req_spaces_before_url:
      if (ch == '/' || ch == '*') {
        return s_req_path;
      }
      /* Proxied requests are followed by scheme of an absolute URI (alpha).
       * CONNECT is followed by a hostname, which begins with alphanum.
       * All other methods are followed by '/' or '*' (handled above).
       */
      if (IS_ALPHA(ch) || (is_connect && IS_NUM(ch))) {
        return (is_connect) ? s_req_host : s_req_schema;
      }
      break;
    case s_req_schema:
      if (IS_ALPHA(ch)) {
        return s;
      }
      if (ch == ':') {
        return s_req_schema_slash;
      }
      break;
    case s_req_schema_slash:
      if (ch == '/') {
        return s_req_schema_slash_slash;
      }
      break;
    case s_req_schema_slash_slash:
      if (ch == '/') {
        return s_req_host;
      }
      break;
    case s_req_host:
      if (IS_HOST_CHAR(ch)) {
        return s;
      }
      switch (ch) {
        case ':':
          return s_req_port;
        case '/':
          return s_req_path;
        case '?':
          return s_req_query_string_start;
      }
      break;
    case s_req_port:
      if (IS_NUM(ch)) {
        return s;
      }
      switch (ch) {
        case '/':
          return s_req_path;
        case '?':
          return s_req_query_string_start;
      }
      break;
    case s_req_path:
      if (IS_URL_CHAR(ch)) {
        return s;
      }
      switch (ch) {
        case '?':
          return s_req_query_string_start;
        case '#':
          return s_req_fragment_start;
      }
      break;
    case s_req_query_string_start:
      if (IS_URL_CHAR(ch)) {
        return s_req_query_string;
      }
      switch (ch) {
        case '?':
          /* XXX ignore extra '?' ... is this right? */
          return s;
        case '#':
          return s_req_fragment_start;
      }
      break;
    case s_req_query_string:
      if (IS_URL_CHAR(ch)) {
        return s;
      }
      switch (ch) {
        case '?':
          /* allow extra '?' in query string */
          return s;
        case '#':
          return s_req_fragment_start;
      }
      break;
    case s_req_fragment_start:
      if (IS_URL_CHAR(ch)) {
        return s_req_fragment;
      }
      switch (ch) {
        case '?':
          return s_req_fragment;
        case '#':
          return s;
      }
      break;
    case s_req_fragment:
      if (IS_URL_CHAR(ch)) {
        return s;
      }
      switch (ch) {
        case '?':
        case '#':
          return s;
      }
      break;
    default:
      break;
  }
  /* We should never fall out of the switch above unless there's an error */
  return s_dead;
 }
 size_t http_parser_execute (http_parser *parser,
                            const http_parser_settings *settings,
@ -749,269 +923,72 @@ size_t http_parser_execute (http_parser *parser,
      {
        if (ch == ' ') break;
-        if (ch == '/' || ch == '*') {
+        MARK(url);
          MARK(url);
          state = s_req_path;
          break;
        }
-        /* Proxied requests are followed by scheme of an absolute URI (alpha).
+        state = parse_url_char(state, ch, parser->method == HTTP_CONNECT);
-         * CONNECT is followed by a hostname, which begins with alphanum.
+        if (state == s_dead) {
-         * All other methods are followed by '/' or '*' (handled above).
+          SET_ERRNO(HPE_INVALID_URL);
-         */
+          goto error;
        if (IS_ALPHA(ch) || (parser->method == HTTP_CONNECT && IS_NUM(ch))) {
          MARK(url);
          state = (parser->method == HTTP_CONNECT) ? s_req_host : s_req_schema;
          break;
        }
-        SET_ERRNO(HPE_INVALID_URL);
+        break;
        goto error;
      }
      case s_req_schema:
      {
        if (IS_ALPHA(ch)) break;
        if (ch == ':') {
          state = s_req_schema_slash;
          break;
        }
        SET_ERRNO(HPE_INVALID_URL);
        goto error;
      }
      case s_req_schema_slash:
        STRICT_CHECK(ch != '/');
        state = s_req_schema_slash_slash;
        break;
      case s_req_schema_slash_slash:
        STRICT_CHECK(ch != '/');
        state = s_req_host;
        break;
      case s_req_host:
      {
        if (IS_HOST_CHAR(ch)) break;
        switch (ch) {
          case ':':
            state = s_req_port;
            break;
          case '/':
            state = s_req_path;
            break;
          case ' ':
            /* The request line looks like:
             *   "GET http://foo.bar.com HTTP/1.1"
             * That is, there is no path.
             */
            CALLBACK(url);
            state = s_req_http_start;
            break;
          case '?':
            state = s_req_query_string_start;
            break;
          default:
            SET_ERRNO(HPE_INVALID_HOST);
            goto error;
        }
        break;
      }
      case s_req_port:
      {
        if (IS_NUM(ch)) break;
        switch (ch) {
          case '/':
            state = s_req_path;
            break;
          case ' ':
            /* The request line looks like:
             *   "GET http://foo.bar.com:1234 HTTP/1.1"
             * That is, there is no path.
             */
            CALLBACK(url);
            state = s_req_http_start;
            break;
          case '?':
            state = s_req_query_string_start;
            break;
          default:
            SET_ERRNO(HPE_INVALID_PORT);
            goto error;
        }
        break;
      }
      case s_req_path:
      {
        if (IS_URL_CHAR(ch)) break;
        switch (ch) {
          /* No whitespace allowed here */
          case ' ':
            CALLBACK(url);
            state = s_req_http_start;
            break;
          case CR:
            CALLBACK(url);
            parser->http_major = 0;
            parser->http_minor = 9;
            state = s_req_line_almost_done;
            break;
          case LF:
-            CALLBACK(url);
+            SET_ERRNO(HPE_INVALID_URL);
            parser->http_major = 0;
            parser->http_minor = 9;
            state = s_header_field_start;
            break;
          case '?':
            state = s_req_query_string_start;
            break;
          case '#':
            state = s_req_fragment_start;
            break;
          default:
            SET_ERRNO(HPE_INVALID_PATH);
            goto error;
        }
        break;
      }
      case s_req_query_string_start:
      {
        if (IS_URL_CHAR(ch)) {
          state = s_req_query_string;
          break;
        }
        switch (ch) {
          case '?':
            break; /* XXX ignore extra '?' ... is this right? */
          case ' ':
            CALLBACK(url);
            state = s_req_http_start;
            break;
          case CR:
            CALLBACK(url);
            parser->http_major = 0;
            parser->http_minor = 9;
            state = s_req_line_almost_done;
            break;
          case LF:
            CALLBACK(url);
            parser->http_major = 0;
            parser->http_minor = 9;
            state = s_header_field_start;
            break;
          case '#':
            state = s_req_fragment_start;
            break;
          default:
-            SET_ERRNO(HPE_INVALID_QUERY_STRING);
+            state = parse_url_char(state, ch, parser->method == HTTP_CONNECT);
-            goto error;
+            if (state == s_dead) {
              SET_ERRNO(HPE_INVALID_URL);
              goto error;
            }
        }
        break;
      }
      case s_req_query_string:
      {
        if (IS_URL_CHAR(ch)) break;
        switch (ch) {
          case '?':
            /* allow extra '?' in query string */
            break;
          case ' ':
            CALLBACK(url);
            state = s_req_http_start;
            break;
          case CR:
            CALLBACK(url);
            parser->http_major = 0;
            parser->http_minor = 9;
            state = s_req_line_almost_done;
            break;
          case LF:
            CALLBACK(url);
            parser->http_major = 0;
            parser->http_minor = 9;
            state = s_header_field_start;
            break;
          case '#':
            state = s_req_fragment_start;
            break;
          default:
            SET_ERRNO(HPE_INVALID_QUERY_STRING);
            goto error;
        }
        break;
      }
      case s_req_host:
      case s_req_port:
      case s_req_path:
      case s_req_query_string_start:
      case s_req_query_string:
      case s_req_fragment_start:
      {
        if (IS_URL_CHAR(ch)) {
          state = s_req_fragment;
          break;
        }
        switch (ch) {
          case ' ':
            CALLBACK(url);
            state = s_req_http_start;
            break;
          case CR:
            CALLBACK(url);
            parser->http_major = 0;
            parser->http_minor = 9;
            state = s_req_line_almost_done;
            break;
          case LF:
            CALLBACK(url);
            parser->http_major = 0;
            parser->http_minor = 9;
            state = s_header_field_start;
            break;
          case '?':
            state = s_req_fragment;
            break;
          case '#':
            break;
          default:
            SET_ERRNO(HPE_INVALID_FRAGMENT);
            goto error;
        }
        break;
      }
      case s_req_fragment:
      {
-        if (IS_URL_CHAR(ch)) break;
+        /* XXX: There is a bug here where if we're on the first character
-
+         *      of s_req_host (e.g. our URL is 'http://' and we see a whitespace
         *      character, we'll consider this a valid URL. This seems incorrect,
         *      but at least it's bug-compatible with what we had before.
         */
        switch (ch) {
          case ' ':
            CALLBACK(url);
            state = s_req_http_start;
            break;
          case CR:
            CALLBACK(url);
            parser->http_major = 0;
            parser->http_minor = 9;
            state = s_req_line_almost_done;
            break;
          case LF:
            CALLBACK(url);
            parser->http_major = 0;
            parser->http_minor = 9;
-            state = s_header_field_start;
+            state = (ch == CR) ?
-            break;
+              s_req_line_almost_done :
-          case '?':
+              s_header_field_start;
          case '#':
            break;
          default:
-            SET_ERRNO(HPE_INVALID_FRAGMENT);
+            state = parse_url_char(state, ch, parser->method == HTTP_CONNECT);
-            goto error;
+            if (state == s_dead) {
              SET_ERRNO(HPE_INVALID_URL);
              goto error;
            }
        }
        break;
      }
@ -1788,3 +1765,98 @@ http_errno_description(enum http_errno err) {
  assert(err < (sizeof(http_strerror_tab)/sizeof(http_strerror_tab[0])));
  return http_strerror_tab[err].description;
 }
 int
 http_parser_parse_url(const char *buf, size_t buflen, int is_connect,
                      struct http_parser_url *u)
 {
  enum state s;
  const char *p;
  enum http_parser_url_fields uf, old_uf;
  u->port = u->field_set = 0;
  s = s_req_spaces_before_url;
  uf = old_uf = UF_MAX;
  for (p = buf; p < buf + buflen; p++) {
    if ((s = parse_url_char(s, *p, is_connect)) == s_dead) {
      return 1;
    }
    /* Figure out the next field that we're operating on */
    switch (s) {
      case s_req_schema:
      case s_req_schema_slash:
      case s_req_schema_slash_slash:
        uf = UF_SCHEMA;
        break;
      case s_req_host:
        uf = UF_HOST;
        break;
      case s_req_port:
        uf = UF_PORT;
        break;
      case s_req_path:
        uf = UF_PATH;
        break;
      case s_req_query_string_start:
      case s_req_query_string:
        uf = UF_QUERY;
        break;
      case s_req_fragment_start:
      case s_req_fragment:
        uf = UF_FRAGMENT;
        break;
      default:
        assert(!"Unexpected state");
        return 1;
    }
    /* Nothing's changed; soldier on */
    if (uf == old_uf) {
      u->field_data[uf].len++;
      continue;
    }
    /* We ignore the first character in some fields; without this, we end up
     * with the query being "?foo=bar" rather than "foo=bar". Callers probably
     * don't want this.
     */
    switch (uf) {
    case UF_QUERY:
    case UF_FRAGMENT:
    case UF_PORT:
        u->field_data[uf].off = p - buf + 1;
        u->field_data[uf].len = 0;
        break;
    default:
        u->field_data[uf].off = p - buf;
        u->field_data[uf].len = 1;
        break;
    }
    u->field_set |= (1 << uf);
    old_uf = uf;
  }
  if (u->field_set & (1 << UF_PORT)) {
    /* Don't bother with endp; we've already validated the string */
    unsigned long v = strtoul(buf + u->field_data[UF_PORT].off, NULL, 10);
    /* Ports have a max value of 2^16 */
    if (v > 0xffff) {
      return 1;
    }
    u->port = (uint16_t) v;
  }
  return 0;
 }
--- a/http_parser.h
+++ b/http_parser.h
@ -244,6 +244,35 @@ struct http_parser_settings {
 };
 enum http_parser_url_fields
  { UF_SCHEMA           = 0
  , UF_HOST             = 1
  , UF_PORT             = 2
  , UF_PATH             = 3
  , UF_QUERY            = 4
  , UF_FRAGMENT         = 5
  , UF_MAX              = 6
  };
 /* Result structure for http_parser_parse_url().
 *
 * Callers should index into field_data[] with UF_* values iff field_set
 * has the relevant (1 << UF_*) bit set. As a courtesy to clients (and
 * because we probably have padding left over), we convert any port to
 * a uint16_t.
 */
 struct http_parser_url {
  uint16_t field_set;           /* Bitmask of (1 << UF_*) values */
  uint16_t port;                /* Converted UF_PORT string */
  struct {
    uint16_t off;               /* Offset into buffer in which field starts */
    uint16_t len;               /* Length of run in buffer */
  } field_data[UF_MAX];
 };
 void http_parser_init(http_parser *parser, enum http_parser_type type);
@ -270,6 +299,11 @@ const char *http_errno_name(enum http_errno err);
 /* Return a string description of the given error */
 const char *http_errno_description(enum http_errno err);
 /* Parse a URL; return nonzero on failure */
 int http_parser_parse_url(const char *buf, size_t buflen,
                          int is_connect,
                          struct http_parser_url *u);
 #ifdef __cplusplus
 }
 #endif
--- a/test.c
+++ b/test.c
@ -44,9 +44,13 @@ struct message {
  enum http_parser_type type;
  enum http_method method;
  int status_code;
  char request_path[MAX_ELEMENT_SIZE];
  char request_url[MAX_ELEMENT_SIZE];
  char fragment[MAX_ELEMENT_SIZE];
  char query_string[MAX_ELEMENT_SIZE];
  char body[MAX_ELEMENT_SIZE];
  size_t body_size;
  uint16_t port;
  int num_headers;
  enum { NONE=0, FIELD, VALUE } last_header_element;
  char headers [MAX_HEADERS][2][MAX_ELEMENT_SIZE];
@ -83,6 +87,9 @@ const struct message requests[] =
  ,.http_major= 1
  ,.http_minor= 1
  ,.method= HTTP_GET
  ,.query_string= ""
  ,.fragment= ""
  ,.request_path= "/test"
  ,.request_url= "/test"
  ,.num_headers= 3
  ,.headers=
@ -111,6 +118,9 @@ const struct message requests[] =
  ,.http_major= 1
  ,.http_minor= 1
  ,.method= HTTP_GET
  ,.query_string= ""
  ,.fragment= ""
  ,.request_path= "/favicon.ico"
  ,.request_url= "/favicon.ico"
  ,.num_headers= 8
  ,.headers=
@ -137,6 +147,9 @@ const struct message requests[] =
  ,.http_major= 1
  ,.http_minor= 1
  ,.method= HTTP_GET
  ,.query_string= ""
  ,.fragment= ""
  ,.request_path= "/dumbfuck"
  ,.request_url= "/dumbfuck"
  ,.num_headers= 1
  ,.headers=
@ -155,6 +168,9 @@ const struct message requests[] =
  ,.http_major= 1
  ,.http_minor= 1
  ,.method= HTTP_GET
  ,.query_string= "page=1"
  ,.fragment= "posts-17408"
  ,.request_path= "/forums/1/topics/2375"
  /* XXX request url does include fragment? */
  ,.request_url= "/forums/1/topics/2375?page=1#posts-17408"
  ,.num_headers= 0
@ -171,6 +187,9 @@ const struct message requests[] =
  ,.http_major= 1
  ,.http_minor= 1
  ,.method= HTTP_GET
  ,.query_string= ""
  ,.fragment= ""
  ,.request_path= "/get_no_headers_no_body/world"
  ,.request_url= "/get_no_headers_no_body/world"
  ,.num_headers= 0
  ,.body= ""
@ -187,6 +206,9 @@ const struct message requests[] =
  ,.http_major= 1
  ,.http_minor= 1
  ,.method= HTTP_GET
  ,.query_string= ""
  ,.fragment= ""
  ,.request_path= "/get_one_header_no_body"
  ,.request_url= "/get_one_header_no_body"
  ,.num_headers= 1
  ,.headers=
@ -207,6 +229,9 @@ const struct message requests[] =
  ,.http_major= 1
  ,.http_minor= 0
  ,.method= HTTP_GET
  ,.query_string= ""
  ,.fragment= ""
  ,.request_path= "/get_funky_content_length_body_hello"
  ,.request_url= "/get_funky_content_length_body_hello"
  ,.num_headers= 1
  ,.headers=
@ -229,6 +254,9 @@ const struct message requests[] =
  ,.http_major= 1
  ,.http_minor= 1
  ,.method= HTTP_POST
  ,.query_string= "q=search"
  ,.fragment= "hey"
  ,.request_path= "/post_identity_body_world"
  ,.request_url= "/post_identity_body_world?q=search#hey"
  ,.num_headers= 3
  ,.headers=
@ -253,6 +281,9 @@ const struct message requests[] =
  ,.http_major= 1
  ,.http_minor= 1
  ,.method= HTTP_POST
  ,.query_string= ""
  ,.fragment= ""
  ,.request_path= "/post_chunked_all_your_base"
  ,.request_url= "/post_chunked_all_your_base"
  ,.num_headers= 1
  ,.headers=
@ -276,6 +307,9 @@ const struct message requests[] =
  ,.http_major= 1
  ,.http_minor= 1
  ,.method= HTTP_POST
  ,.query_string= ""
  ,.fragment= ""
  ,.request_path= "/two_chunks_mult_zero_end"
  ,.request_url= "/two_chunks_mult_zero_end"
  ,.num_headers= 1
  ,.headers=
@ -301,6 +335,9 @@ const struct message requests[] =
  ,.http_major= 1
  ,.http_minor= 1
  ,.method= HTTP_POST
  ,.query_string= ""
  ,.fragment= ""
  ,.request_path= "/chunked_w_trailing_headers"
  ,.request_url= "/chunked_w_trailing_headers"
  ,.num_headers= 3
  ,.headers=
@ -326,6 +363,9 @@ const struct message requests[] =
  ,.http_major= 1
  ,.http_minor= 1
  ,.method= HTTP_POST
  ,.query_string= ""
  ,.fragment= ""
  ,.request_path= "/chunked_w_bullshit_after_length"
  ,.request_url= "/chunked_w_bullshit_after_length"
  ,.num_headers= 1
  ,.headers=
@ -343,6 +383,9 @@ const struct message requests[] =
  ,.http_major= 1
  ,.http_minor= 1
  ,.method= HTTP_GET
  ,.query_string= "foo=\"bar\""
  ,.fragment= ""
  ,.request_path= "/with_\"stupid\"_quotes"
  ,.request_url= "/with_\"stupid\"_quotes?foo=\"bar\""
  ,.num_headers= 0
  ,.headers= { }
@ -366,6 +409,9 @@ const struct message requests[] =
  ,.http_major= 1
  ,.http_minor= 0
  ,.method= HTTP_GET
  ,.query_string= ""
  ,.fragment= ""
  ,.request_path= "/test"
  ,.request_url= "/test"
  ,.num_headers= 3
  ,.headers= { { "Host", "0.0.0.0:5000" }
@ -386,6 +432,9 @@ const struct message requests[] =
  ,.http_major= 1
  ,.http_minor= 1
  ,.method= HTTP_GET
  ,.query_string= "foo=bar?baz"
  ,.fragment= ""
  ,.request_path= "/test.cgi"
  ,.request_url= "/test.cgi?foo=bar?baz"
  ,.num_headers= 0
  ,.headers= {}
@ -404,6 +453,9 @@ const struct message requests[] =
  ,.http_major= 1
  ,.http_minor= 1
  ,.method= HTTP_GET
  ,.query_string= ""
  ,.fragment= ""
  ,.request_path= "/test"
  ,.request_url= "/test"
  ,.num_headers= 0
  ,.headers= { }
@ -428,6 +480,9 @@ const struct message requests[] =
  ,.http_major= 1
  ,.http_minor= 1
  ,.method= HTTP_GET
  ,.query_string= ""
  ,.fragment= ""
  ,.request_path= "/demo"
  ,.request_url= "/demo"
  ,.num_headers= 7
  ,.upgrade="Hot diggity dogg"
@ -456,6 +511,9 @@ const struct message requests[] =
  ,.http_major= 1
  ,.http_minor= 0
  ,.method= HTTP_CONNECT
  ,.query_string= ""
  ,.fragment= ""
  ,.request_path= ""
  ,.request_url= "0-home0.netscape.com:443"
  ,.num_headers= 2
  ,.upgrade="some data\r\nand yet even more data"
@ -475,6 +533,9 @@ const struct message requests[] =
  ,.http_major= 1
  ,.http_minor= 1
  ,.method= HTTP_REPORT
  ,.query_string= ""
  ,.fragment= ""
  ,.request_path= "/test"
  ,.request_url= "/test"
  ,.num_headers= 0
  ,.headers= {}
@ -491,6 +552,9 @@ const struct message requests[] =
  ,.http_major= 0
  ,.http_minor= 9
  ,.method= HTTP_GET
  ,.query_string= ""
  ,.fragment= ""
  ,.request_path= "/"
  ,.request_url= "/"
  ,.num_headers= 0
  ,.headers= {}
@ -510,6 +574,9 @@ const struct message requests[] =
  ,.http_major= 1
  ,.http_minor= 1
  ,.method= HTTP_MSEARCH
  ,.query_string= ""
  ,.fragment= ""
  ,.request_path= "*"
  ,.request_url= "*"
  ,.num_headers= 3
  ,.headers= { { "HOST", "239.255.255.250:1900" }
@ -536,6 +603,9 @@ const struct message requests[] =
  ,.http_major= 1
  ,.http_minor= 1
  ,.method= HTTP_GET
  ,.query_string= ""
  ,.fragment= ""
  ,.request_path= "/"
  ,.request_url= "/"
  ,.num_headers= 2
  ,.headers= { { "Line1", "abcdefghijklmno qrs" }
@ -555,6 +625,9 @@ const struct message requests[] =
  ,.http_major= 1
  ,.http_minor= 1
  ,.method= HTTP_GET
  ,.query_string= "hail=all"
  ,.fragment= ""
  ,.request_path= ""
  ,.request_url= "http://hypnotoad.org?hail=all"
  ,.num_headers= 0
  ,.headers= { }
@ -571,7 +644,11 @@ const struct message requests[] =
  ,.http_major= 1
  ,.http_minor= 1
  ,.method= HTTP_GET
  ,.query_string= "hail=all"
  ,.fragment= ""
  ,.request_path= ""
  ,.request_url= "http://hypnotoad.org:1234?hail=all"
  ,.port= 1234
  ,.num_headers= 0
  ,.headers= { }
  ,.body= ""
@ -587,7 +664,11 @@ const struct message requests[] =
  ,.http_major= 1
  ,.http_minor= 1
  ,.method= HTTP_GET
  ,.query_string= ""
  ,.fragment= ""
  ,.request_path= ""
  ,.request_url= "http://hypnotoad.org:1234"
  ,.port= 1234
  ,.num_headers= 0
  ,.headers= { }
  ,.body= ""
@ -608,6 +689,9 @@ const struct message requests[] =
  ,.http_major= 1
  ,.http_minor= 1
  ,.method= HTTP_PATCH
  ,.query_string= ""
  ,.fragment= ""
  ,.request_path= "/file.txt"
  ,.request_url= "/file.txt"
  ,.num_headers= 4
  ,.headers= { { "Host", "www.example.com" }
@ -630,6 +714,9 @@ const struct message requests[] =
  ,.http_major= 1
  ,.http_minor= 0
  ,.method= HTTP_CONNECT
  ,.query_string= ""
  ,.fragment= ""
  ,.request_path= ""
  ,.request_url= "HOME0.NETSCAPE.COM:443"
  ,.num_headers= 2
  ,.upgrade=""
@ -651,6 +738,9 @@ const struct message requests[] =
  ,.http_major= 1
  ,.http_minor= 1
  ,.method= HTTP_GET
  ,.query_string= "q=1"
  ,.fragment= "narf"
  ,.request_path= "/δ¶/δt/pope"
  ,.request_url= "/δ¶/δt/pope?q=1#narf"
  ,.num_headers= 1
  ,.headers= { {"Host", "github.com" }
@ -670,6 +760,9 @@ const struct message requests[] =
  ,.http_major= 1
  ,.http_minor= 0
  ,.method= HTTP_CONNECT
  ,.query_string= ""
  ,.fragment= ""
  ,.request_path= ""
  ,.request_url= "home_0.netscape.com:443"
  ,.num_headers= 2
  ,.upgrade=""
@ -1306,6 +1399,20 @@ check_num_eq (const struct message *m,
 #define MESSAGE_CHECK_NUM_EQ(expected, found, prop) \
  if (!check_num_eq(expected, #prop, expected->prop, found->prop)) return 0
 #define MESSAGE_CHECK_URL_EQ(u, expected, found, prop, fn)           \
 do {                                                                 \
  char ubuf[256];                                                    \
                                                                     \
  if ((u)->field_set & (1 << (fn))) {                                \
    memcpy(ubuf, (found)->request_url + (u)->field_data[(fn)].off,   \
      (u)->field_data[(fn)].len);                                    \
    ubuf[(u)->field_data[(fn)].len] = '\0';                          \
  } else {                                                           \
    ubuf[0] = '\0';                                                  \
  }                                                                  \
                                                                     \
  check_str_eq(expected, #prop, expected->prop, ubuf);               \
 } while(0)
 int
 message_eq (int index, const struct message *expected)
@ -1331,6 +1438,28 @@ message_eq (int index, const struct message *expected)
  MESSAGE_CHECK_STR_EQ(expected, m, request_url);
  /* Check URL components; we can't do this w/ CONNECT since it doesn't
   * send us a well-formed URL.
   */
  if (*m->request_url && m->method != HTTP_CONNECT) {
    struct http_parser_url u;
    if (http_parser_parse_url(m->request_url, strlen(m->request_url), 0, &u)) {
      fprintf(stderr, "\n\n*** failed to parse URL %s ***\n\n",
        m->request_url);
      exit(1);
    }
    m->port = (u.field_set & (1 << UF_PORT)) ?
      u.port : 0;
    MESSAGE_CHECK_URL_EQ(&u, expected, m, query_string, UF_QUERY);
    MESSAGE_CHECK_URL_EQ(&u, expected, m, fragment, UF_FRAGMENT);
    MESSAGE_CHECK_URL_EQ(&u, expected, m, request_path, UF_PATH);
    MESSAGE_CHECK_NUM_EQ(expected, m, port);
  }
  if (expected->body_size) {
    MESSAGE_CHECK_NUM_EQ(expected, m, body_size);
  } else {