Allow octets > 127 in path components.

- This is non-spec behavior, but it appears that most HTTP servers
  implicitly support non-ASCII characters when parsing path components.
  Extend http-parser to allow this.
- Fill out slots [128, 256) in normal_url_char[] with 1 so that these
  high octets are accepted in path components.
- Add unit test for paths that include such non-ASCII characters.

Fixes #37.
v0.6
Peter Griess 14 years ago committed by Ryan Dahl
parent 63daf22f2c
commit 50b9bec552

@ -186,7 +186,28 @@ static const uint8_t normal_url_char[256] = {
/* 112 p 113 q 114 r 115 s 116 t 117 u 118 v 119 w */ /* 112 p 113 q 114 r 115 s 116 t 117 u 118 v 119 w */
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
/* 120 x 121 y 122 z 123 { 124 | 125 } 126 ~ 127 del */ /* 120 x 121 y 122 z 123 { 124 | 125 } 126 ~ 127 del */
1, 1, 1, 1, 1, 1, 1, 0 }; 1, 1, 1, 1, 1, 1, 1, 0,
/* Remainder of non-ASCII range are accepted as-is to support implicitly UTF-8
encoded paths. This is out of spec, but clients generate this and most other
HTTP servers support it. We should, too. */
1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1 };
enum state enum state

@ -557,7 +557,7 @@ const struct message requests[] =
,.body= "" ,.body= ""
} }
#define MSEARCH_REQ 19 #define MSEARCH_REQ 20
, {.name= "m-search request" , {.name= "m-search request"
,.type= HTTP_REQUEST ,.type= HTTP_REQUEST
,.raw= "M-SEARCH * HTTP/1.1\r\n" ,.raw= "M-SEARCH * HTTP/1.1\r\n"
@ -582,6 +582,27 @@ const struct message requests[] =
,.body= "" ,.body= ""
} }
#define UTF8_PATH_REQ 21
, {.name= "utf-8 path request"
,.type= HTTP_REQUEST
,.raw= "GET /δ¶/δt/pope?q=1#narf HTTP/1.1\r\n"
"Host: github.com\r\n"
"\r\n"
,.should_keep_alive= TRUE
,.message_complete_on_eof= FALSE
,.http_major= 1
,.http_minor= 1
,.method= HTTP_GET
,.query_string= "q=1"
,.fragment= "narf"
,.request_path= "/δ¶/δt/pope"
,.request_url= "/δ¶/δt/pope?q=1#narf"
,.num_headers= 1
,.headers= { {"Host", "github.com" }
}
,.body= ""
}
, {.name= NULL } /* sentinel */ , {.name= NULL } /* sentinel */
}; };

Loading…
Cancel
Save