httplex.go 8.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352
  1. // Copyright 2016 The Go Authors. All rights reserved.
  2. // Use of this source code is governed by a BSD-style
  3. // license that can be found in the LICENSE file.
  4. package httpguts
  5. import (
  6. "net"
  7. "strings"
  8. "unicode/utf8"
  9. "golang.org/x/net/idna"
  10. )
  11. var isTokenTable = [127]bool{
  12. '!': true,
  13. '#': true,
  14. '$': true,
  15. '%': true,
  16. '&': true,
  17. '\'': true,
  18. '*': true,
  19. '+': true,
  20. '-': true,
  21. '.': true,
  22. '0': true,
  23. '1': true,
  24. '2': true,
  25. '3': true,
  26. '4': true,
  27. '5': true,
  28. '6': true,
  29. '7': true,
  30. '8': true,
  31. '9': true,
  32. 'A': true,
  33. 'B': true,
  34. 'C': true,
  35. 'D': true,
  36. 'E': true,
  37. 'F': true,
  38. 'G': true,
  39. 'H': true,
  40. 'I': true,
  41. 'J': true,
  42. 'K': true,
  43. 'L': true,
  44. 'M': true,
  45. 'N': true,
  46. 'O': true,
  47. 'P': true,
  48. 'Q': true,
  49. 'R': true,
  50. 'S': true,
  51. 'T': true,
  52. 'U': true,
  53. 'W': true,
  54. 'V': true,
  55. 'X': true,
  56. 'Y': true,
  57. 'Z': true,
  58. '^': true,
  59. '_': true,
  60. '`': true,
  61. 'a': true,
  62. 'b': true,
  63. 'c': true,
  64. 'd': true,
  65. 'e': true,
  66. 'f': true,
  67. 'g': true,
  68. 'h': true,
  69. 'i': true,
  70. 'j': true,
  71. 'k': true,
  72. 'l': true,
  73. 'm': true,
  74. 'n': true,
  75. 'o': true,
  76. 'p': true,
  77. 'q': true,
  78. 'r': true,
  79. 's': true,
  80. 't': true,
  81. 'u': true,
  82. 'v': true,
  83. 'w': true,
  84. 'x': true,
  85. 'y': true,
  86. 'z': true,
  87. '|': true,
  88. '~': true,
  89. }
  90. func IsTokenRune(r rune) bool {
  91. i := int(r)
  92. return i < len(isTokenTable) && isTokenTable[i]
  93. }
  94. func isNotToken(r rune) bool {
  95. return !IsTokenRune(r)
  96. }
  97. // HeaderValuesContainsToken reports whether any string in values
  98. // contains the provided token, ASCII case-insensitively.
  99. func HeaderValuesContainsToken(values []string, token string) bool {
  100. for _, v := range values {
  101. if headerValueContainsToken(v, token) {
  102. return true
  103. }
  104. }
  105. return false
  106. }
  107. // isOWS reports whether b is an optional whitespace byte, as defined
  108. // by RFC 7230 section 3.2.3.
  109. func isOWS(b byte) bool { return b == ' ' || b == '\t' }
  110. // trimOWS returns x with all optional whitespace removes from the
  111. // beginning and end.
  112. func trimOWS(x string) string {
  113. // TODO: consider using strings.Trim(x, " \t") instead,
  114. // if and when it's fast enough. See issue 10292.
  115. // But this ASCII-only code will probably always beat UTF-8
  116. // aware code.
  117. for len(x) > 0 && isOWS(x[0]) {
  118. x = x[1:]
  119. }
  120. for len(x) > 0 && isOWS(x[len(x)-1]) {
  121. x = x[:len(x)-1]
  122. }
  123. return x
  124. }
  125. // headerValueContainsToken reports whether v (assumed to be a
  126. // 0#element, in the ABNF extension described in RFC 7230 section 7)
  127. // contains token amongst its comma-separated tokens, ASCII
  128. // case-insensitively.
  129. func headerValueContainsToken(v string, token string) bool {
  130. for comma := strings.IndexByte(v, ','); comma != -1; comma = strings.IndexByte(v, ',') {
  131. if tokenEqual(trimOWS(v[:comma]), token) {
  132. return true
  133. }
  134. v = v[comma+1:]
  135. }
  136. return tokenEqual(trimOWS(v), token)
  137. }
  138. // lowerASCII returns the ASCII lowercase version of b.
  139. func lowerASCII(b byte) byte {
  140. if 'A' <= b && b <= 'Z' {
  141. return b + ('a' - 'A')
  142. }
  143. return b
  144. }
  145. // tokenEqual reports whether t1 and t2 are equal, ASCII case-insensitively.
  146. func tokenEqual(t1, t2 string) bool {
  147. if len(t1) != len(t2) {
  148. return false
  149. }
  150. for i, b := range t1 {
  151. if b >= utf8.RuneSelf {
  152. // No UTF-8 or non-ASCII allowed in tokens.
  153. return false
  154. }
  155. if lowerASCII(byte(b)) != lowerASCII(t2[i]) {
  156. return false
  157. }
  158. }
  159. return true
  160. }
  161. // isLWS reports whether b is linear white space, according
  162. // to http://www.w3.org/Protocols/rfc2616/rfc2616-sec2.html#sec2.2
  163. //
  164. // LWS = [CRLF] 1*( SP | HT )
  165. func isLWS(b byte) bool { return b == ' ' || b == '\t' }
  166. // isCTL reports whether b is a control byte, according
  167. // to http://www.w3.org/Protocols/rfc2616/rfc2616-sec2.html#sec2.2
  168. //
  169. // CTL = <any US-ASCII control character
  170. // (octets 0 - 31) and DEL (127)>
  171. func isCTL(b byte) bool {
  172. const del = 0x7f // a CTL
  173. return b < ' ' || b == del
  174. }
  175. // ValidHeaderFieldName reports whether v is a valid HTTP/1.x header name.
  176. // HTTP/2 imposes the additional restriction that uppercase ASCII
  177. // letters are not allowed.
  178. //
  179. // RFC 7230 says:
  180. //
  181. // header-field = field-name ":" OWS field-value OWS
  182. // field-name = token
  183. // token = 1*tchar
  184. // tchar = "!" / "#" / "$" / "%" / "&" / "'" / "*" / "+" / "-" / "." /
  185. // "^" / "_" / "`" / "|" / "~" / DIGIT / ALPHA
  186. func ValidHeaderFieldName(v string) bool {
  187. if len(v) == 0 {
  188. return false
  189. }
  190. for _, r := range v {
  191. if !IsTokenRune(r) {
  192. return false
  193. }
  194. }
  195. return true
  196. }
  197. // ValidHostHeader reports whether h is a valid host header.
  198. func ValidHostHeader(h string) bool {
  199. // The latest spec is actually this:
  200. //
  201. // http://tools.ietf.org/html/rfc7230#section-5.4
  202. // Host = uri-host [ ":" port ]
  203. //
  204. // Where uri-host is:
  205. // http://tools.ietf.org/html/rfc3986#section-3.2.2
  206. //
  207. // But we're going to be much more lenient for now and just
  208. // search for any byte that's not a valid byte in any of those
  209. // expressions.
  210. for i := 0; i < len(h); i++ {
  211. if !validHostByte[h[i]] {
  212. return false
  213. }
  214. }
  215. return true
  216. }
  217. // See the validHostHeader comment.
  218. var validHostByte = [256]bool{
  219. '0': true, '1': true, '2': true, '3': true, '4': true, '5': true, '6': true, '7': true,
  220. '8': true, '9': true,
  221. 'a': true, 'b': true, 'c': true, 'd': true, 'e': true, 'f': true, 'g': true, 'h': true,
  222. 'i': true, 'j': true, 'k': true, 'l': true, 'm': true, 'n': true, 'o': true, 'p': true,
  223. 'q': true, 'r': true, 's': true, 't': true, 'u': true, 'v': true, 'w': true, 'x': true,
  224. 'y': true, 'z': true,
  225. 'A': true, 'B': true, 'C': true, 'D': true, 'E': true, 'F': true, 'G': true, 'H': true,
  226. 'I': true, 'J': true, 'K': true, 'L': true, 'M': true, 'N': true, 'O': true, 'P': true,
  227. 'Q': true, 'R': true, 'S': true, 'T': true, 'U': true, 'V': true, 'W': true, 'X': true,
  228. 'Y': true, 'Z': true,
  229. '!': true, // sub-delims
  230. '$': true, // sub-delims
  231. '%': true, // pct-encoded (and used in IPv6 zones)
  232. '&': true, // sub-delims
  233. '(': true, // sub-delims
  234. ')': true, // sub-delims
  235. '*': true, // sub-delims
  236. '+': true, // sub-delims
  237. ',': true, // sub-delims
  238. '-': true, // unreserved
  239. '.': true, // unreserved
  240. ':': true, // IPv6address + Host expression's optional port
  241. ';': true, // sub-delims
  242. '=': true, // sub-delims
  243. '[': true,
  244. '\'': true, // sub-delims
  245. ']': true,
  246. '_': true, // unreserved
  247. '~': true, // unreserved
  248. }
  249. // ValidHeaderFieldValue reports whether v is a valid "field-value" according to
  250. // http://www.w3.org/Protocols/rfc2616/rfc2616-sec4.html#sec4.2 :
  251. //
  252. // message-header = field-name ":" [ field-value ]
  253. // field-value = *( field-content | LWS )
  254. // field-content = <the OCTETs making up the field-value
  255. // and consisting of either *TEXT or combinations
  256. // of token, separators, and quoted-string>
  257. //
  258. // http://www.w3.org/Protocols/rfc2616/rfc2616-sec2.html#sec2.2 :
  259. //
  260. // TEXT = <any OCTET except CTLs,
  261. // but including LWS>
  262. // LWS = [CRLF] 1*( SP | HT )
  263. // CTL = <any US-ASCII control character
  264. // (octets 0 - 31) and DEL (127)>
  265. //
  266. // RFC 7230 says:
  267. //
  268. // field-value = *( field-content / obs-fold )
  269. // obj-fold = N/A to http2, and deprecated
  270. // field-content = field-vchar [ 1*( SP / HTAB ) field-vchar ]
  271. // field-vchar = VCHAR / obs-text
  272. // obs-text = %x80-FF
  273. // VCHAR = "any visible [USASCII] character"
  274. //
  275. // http2 further says: "Similarly, HTTP/2 allows header field values
  276. // that are not valid. While most of the values that can be encoded
  277. // will not alter header field parsing, carriage return (CR, ASCII
  278. // 0xd), line feed (LF, ASCII 0xa), and the zero character (NUL, ASCII
  279. // 0x0) might be exploited by an attacker if they are translated
  280. // verbatim. Any request or response that contains a character not
  281. // permitted in a header field value MUST be treated as malformed
  282. // (Section 8.1.2.6). Valid characters are defined by the
  283. // field-content ABNF rule in Section 3.2 of [RFC7230]."
  284. //
  285. // This function does not (yet?) properly handle the rejection of
  286. // strings that begin or end with SP or HTAB.
  287. func ValidHeaderFieldValue(v string) bool {
  288. for i := 0; i < len(v); i++ {
  289. b := v[i]
  290. if isCTL(b) && !isLWS(b) {
  291. return false
  292. }
  293. }
  294. return true
  295. }
  296. func isASCII(s string) bool {
  297. for i := 0; i < len(s); i++ {
  298. if s[i] >= utf8.RuneSelf {
  299. return false
  300. }
  301. }
  302. return true
  303. }
  304. // PunycodeHostPort returns the IDNA Punycode version
  305. // of the provided "host" or "host:port" string.
  306. func PunycodeHostPort(v string) (string, error) {
  307. if isASCII(v) {
  308. return v, nil
  309. }
  310. host, port, err := net.SplitHostPort(v)
  311. if err != nil {
  312. // The input 'v' argument was just a "host" argument,
  313. // without a port. This error should not be returned
  314. // to the caller.
  315. host = v
  316. port = ""
  317. }
  318. host, err = idna.ToASCII(host)
  319. if err != nil {
  320. // Non-UTF-8? Not representable in Punycode, in any
  321. // case.
  322. return "", err
  323. }
  324. if port == "" {
  325. return host, nil
  326. }
  327. return net.JoinHostPort(host, port), nil
  328. }