Holger Hildebrandt | fa07499 | 2020-03-27 15:42:06 +0000 | [diff] [blame^] | 1 | // Copyright 2016 The Go Authors. All rights reserved. |
| 2 | // Use of this source code is governed by a BSD-style |
| 3 | // license that can be found in the LICENSE file. |
| 4 | |
| 5 | package httpguts |
| 6 | |
| 7 | import ( |
| 8 | "net" |
| 9 | "strings" |
| 10 | "unicode/utf8" |
| 11 | |
| 12 | "golang.org/x/net/idna" |
| 13 | ) |
| 14 | |
| 15 | var isTokenTable = [127]bool{ |
| 16 | '!': true, |
| 17 | '#': true, |
| 18 | '$': true, |
| 19 | '%': true, |
| 20 | '&': true, |
| 21 | '\'': true, |
| 22 | '*': true, |
| 23 | '+': true, |
| 24 | '-': true, |
| 25 | '.': true, |
| 26 | '0': true, |
| 27 | '1': true, |
| 28 | '2': true, |
| 29 | '3': true, |
| 30 | '4': true, |
| 31 | '5': true, |
| 32 | '6': true, |
| 33 | '7': true, |
| 34 | '8': true, |
| 35 | '9': true, |
| 36 | 'A': true, |
| 37 | 'B': true, |
| 38 | 'C': true, |
| 39 | 'D': true, |
| 40 | 'E': true, |
| 41 | 'F': true, |
| 42 | 'G': true, |
| 43 | 'H': true, |
| 44 | 'I': true, |
| 45 | 'J': true, |
| 46 | 'K': true, |
| 47 | 'L': true, |
| 48 | 'M': true, |
| 49 | 'N': true, |
| 50 | 'O': true, |
| 51 | 'P': true, |
| 52 | 'Q': true, |
| 53 | 'R': true, |
| 54 | 'S': true, |
| 55 | 'T': true, |
| 56 | 'U': true, |
| 57 | 'W': true, |
| 58 | 'V': true, |
| 59 | 'X': true, |
| 60 | 'Y': true, |
| 61 | 'Z': true, |
| 62 | '^': true, |
| 63 | '_': true, |
| 64 | '`': true, |
| 65 | 'a': true, |
| 66 | 'b': true, |
| 67 | 'c': true, |
| 68 | 'd': true, |
| 69 | 'e': true, |
| 70 | 'f': true, |
| 71 | 'g': true, |
| 72 | 'h': true, |
| 73 | 'i': true, |
| 74 | 'j': true, |
| 75 | 'k': true, |
| 76 | 'l': true, |
| 77 | 'm': true, |
| 78 | 'n': true, |
| 79 | 'o': true, |
| 80 | 'p': true, |
| 81 | 'q': true, |
| 82 | 'r': true, |
| 83 | 's': true, |
| 84 | 't': true, |
| 85 | 'u': true, |
| 86 | 'v': true, |
| 87 | 'w': true, |
| 88 | 'x': true, |
| 89 | 'y': true, |
| 90 | 'z': true, |
| 91 | '|': true, |
| 92 | '~': true, |
| 93 | } |
| 94 | |
| 95 | func IsTokenRune(r rune) bool { |
| 96 | i := int(r) |
| 97 | return i < len(isTokenTable) && isTokenTable[i] |
| 98 | } |
| 99 | |
| 100 | func isNotToken(r rune) bool { |
| 101 | return !IsTokenRune(r) |
| 102 | } |
| 103 | |
| 104 | // HeaderValuesContainsToken reports whether any string in values |
| 105 | // contains the provided token, ASCII case-insensitively. |
| 106 | func HeaderValuesContainsToken(values []string, token string) bool { |
| 107 | for _, v := range values { |
| 108 | if headerValueContainsToken(v, token) { |
| 109 | return true |
| 110 | } |
| 111 | } |
| 112 | return false |
| 113 | } |
| 114 | |
| 115 | // isOWS reports whether b is an optional whitespace byte, as defined |
| 116 | // by RFC 7230 section 3.2.3. |
| 117 | func isOWS(b byte) bool { return b == ' ' || b == '\t' } |
| 118 | |
| 119 | // trimOWS returns x with all optional whitespace removes from the |
| 120 | // beginning and end. |
| 121 | func trimOWS(x string) string { |
| 122 | // TODO: consider using strings.Trim(x, " \t") instead, |
| 123 | // if and when it's fast enough. See issue 10292. |
| 124 | // But this ASCII-only code will probably always beat UTF-8 |
| 125 | // aware code. |
| 126 | for len(x) > 0 && isOWS(x[0]) { |
| 127 | x = x[1:] |
| 128 | } |
| 129 | for len(x) > 0 && isOWS(x[len(x)-1]) { |
| 130 | x = x[:len(x)-1] |
| 131 | } |
| 132 | return x |
| 133 | } |
| 134 | |
| 135 | // headerValueContainsToken reports whether v (assumed to be a |
| 136 | // 0#element, in the ABNF extension described in RFC 7230 section 7) |
| 137 | // contains token amongst its comma-separated tokens, ASCII |
| 138 | // case-insensitively. |
| 139 | func headerValueContainsToken(v string, token string) bool { |
| 140 | v = trimOWS(v) |
| 141 | if comma := strings.IndexByte(v, ','); comma != -1 { |
| 142 | return tokenEqual(trimOWS(v[:comma]), token) || headerValueContainsToken(v[comma+1:], token) |
| 143 | } |
| 144 | return tokenEqual(v, token) |
| 145 | } |
| 146 | |
| 147 | // lowerASCII returns the ASCII lowercase version of b. |
| 148 | func lowerASCII(b byte) byte { |
| 149 | if 'A' <= b && b <= 'Z' { |
| 150 | return b + ('a' - 'A') |
| 151 | } |
| 152 | return b |
| 153 | } |
| 154 | |
| 155 | // tokenEqual reports whether t1 and t2 are equal, ASCII case-insensitively. |
| 156 | func tokenEqual(t1, t2 string) bool { |
| 157 | if len(t1) != len(t2) { |
| 158 | return false |
| 159 | } |
| 160 | for i, b := range t1 { |
| 161 | if b >= utf8.RuneSelf { |
| 162 | // No UTF-8 or non-ASCII allowed in tokens. |
| 163 | return false |
| 164 | } |
| 165 | if lowerASCII(byte(b)) != lowerASCII(t2[i]) { |
| 166 | return false |
| 167 | } |
| 168 | } |
| 169 | return true |
| 170 | } |
| 171 | |
| 172 | // isLWS reports whether b is linear white space, according |
| 173 | // to http://www.w3.org/Protocols/rfc2616/rfc2616-sec2.html#sec2.2 |
| 174 | // LWS = [CRLF] 1*( SP | HT ) |
| 175 | func isLWS(b byte) bool { return b == ' ' || b == '\t' } |
| 176 | |
| 177 | // isCTL reports whether b is a control byte, according |
| 178 | // to http://www.w3.org/Protocols/rfc2616/rfc2616-sec2.html#sec2.2 |
| 179 | // CTL = <any US-ASCII control character |
| 180 | // (octets 0 - 31) and DEL (127)> |
| 181 | func isCTL(b byte) bool { |
| 182 | const del = 0x7f // a CTL |
| 183 | return b < ' ' || b == del |
| 184 | } |
| 185 | |
| 186 | // ValidHeaderFieldName reports whether v is a valid HTTP/1.x header name. |
| 187 | // HTTP/2 imposes the additional restriction that uppercase ASCII |
| 188 | // letters are not allowed. |
| 189 | // |
| 190 | // RFC 7230 says: |
| 191 | // header-field = field-name ":" OWS field-value OWS |
| 192 | // field-name = token |
| 193 | // token = 1*tchar |
| 194 | // tchar = "!" / "#" / "$" / "%" / "&" / "'" / "*" / "+" / "-" / "." / |
| 195 | // "^" / "_" / "`" / "|" / "~" / DIGIT / ALPHA |
| 196 | func ValidHeaderFieldName(v string) bool { |
| 197 | if len(v) == 0 { |
| 198 | return false |
| 199 | } |
| 200 | for _, r := range v { |
| 201 | if !IsTokenRune(r) { |
| 202 | return false |
| 203 | } |
| 204 | } |
| 205 | return true |
| 206 | } |
| 207 | |
| 208 | // ValidHostHeader reports whether h is a valid host header. |
| 209 | func ValidHostHeader(h string) bool { |
| 210 | // The latest spec is actually this: |
| 211 | // |
| 212 | // http://tools.ietf.org/html/rfc7230#section-5.4 |
| 213 | // Host = uri-host [ ":" port ] |
| 214 | // |
| 215 | // Where uri-host is: |
| 216 | // http://tools.ietf.org/html/rfc3986#section-3.2.2 |
| 217 | // |
| 218 | // But we're going to be much more lenient for now and just |
| 219 | // search for any byte that's not a valid byte in any of those |
| 220 | // expressions. |
| 221 | for i := 0; i < len(h); i++ { |
| 222 | if !validHostByte[h[i]] { |
| 223 | return false |
| 224 | } |
| 225 | } |
| 226 | return true |
| 227 | } |
| 228 | |
| 229 | // See the validHostHeader comment. |
| 230 | var validHostByte = [256]bool{ |
| 231 | '0': true, '1': true, '2': true, '3': true, '4': true, '5': true, '6': true, '7': true, |
| 232 | '8': true, '9': true, |
| 233 | |
| 234 | 'a': true, 'b': true, 'c': true, 'd': true, 'e': true, 'f': true, 'g': true, 'h': true, |
| 235 | 'i': true, 'j': true, 'k': true, 'l': true, 'm': true, 'n': true, 'o': true, 'p': true, |
| 236 | 'q': true, 'r': true, 's': true, 't': true, 'u': true, 'v': true, 'w': true, 'x': true, |
| 237 | 'y': true, 'z': true, |
| 238 | |
| 239 | 'A': true, 'B': true, 'C': true, 'D': true, 'E': true, 'F': true, 'G': true, 'H': true, |
| 240 | 'I': true, 'J': true, 'K': true, 'L': true, 'M': true, 'N': true, 'O': true, 'P': true, |
| 241 | 'Q': true, 'R': true, 'S': true, 'T': true, 'U': true, 'V': true, 'W': true, 'X': true, |
| 242 | 'Y': true, 'Z': true, |
| 243 | |
| 244 | '!': true, // sub-delims |
| 245 | '$': true, // sub-delims |
| 246 | '%': true, // pct-encoded (and used in IPv6 zones) |
| 247 | '&': true, // sub-delims |
| 248 | '(': true, // sub-delims |
| 249 | ')': true, // sub-delims |
| 250 | '*': true, // sub-delims |
| 251 | '+': true, // sub-delims |
| 252 | ',': true, // sub-delims |
| 253 | '-': true, // unreserved |
| 254 | '.': true, // unreserved |
| 255 | ':': true, // IPv6address + Host expression's optional port |
| 256 | ';': true, // sub-delims |
| 257 | '=': true, // sub-delims |
| 258 | '[': true, |
| 259 | '\'': true, // sub-delims |
| 260 | ']': true, |
| 261 | '_': true, // unreserved |
| 262 | '~': true, // unreserved |
| 263 | } |
| 264 | |
| 265 | // ValidHeaderFieldValue reports whether v is a valid "field-value" according to |
| 266 | // http://www.w3.org/Protocols/rfc2616/rfc2616-sec4.html#sec4.2 : |
| 267 | // |
| 268 | // message-header = field-name ":" [ field-value ] |
| 269 | // field-value = *( field-content | LWS ) |
| 270 | // field-content = <the OCTETs making up the field-value |
| 271 | // and consisting of either *TEXT or combinations |
| 272 | // of token, separators, and quoted-string> |
| 273 | // |
| 274 | // http://www.w3.org/Protocols/rfc2616/rfc2616-sec2.html#sec2.2 : |
| 275 | // |
| 276 | // TEXT = <any OCTET except CTLs, |
| 277 | // but including LWS> |
| 278 | // LWS = [CRLF] 1*( SP | HT ) |
| 279 | // CTL = <any US-ASCII control character |
| 280 | // (octets 0 - 31) and DEL (127)> |
| 281 | // |
| 282 | // RFC 7230 says: |
| 283 | // field-value = *( field-content / obs-fold ) |
| 284 | // obj-fold = N/A to http2, and deprecated |
| 285 | // field-content = field-vchar [ 1*( SP / HTAB ) field-vchar ] |
| 286 | // field-vchar = VCHAR / obs-text |
| 287 | // obs-text = %x80-FF |
| 288 | // VCHAR = "any visible [USASCII] character" |
| 289 | // |
| 290 | // http2 further says: "Similarly, HTTP/2 allows header field values |
| 291 | // that are not valid. While most of the values that can be encoded |
| 292 | // will not alter header field parsing, carriage return (CR, ASCII |
| 293 | // 0xd), line feed (LF, ASCII 0xa), and the zero character (NUL, ASCII |
| 294 | // 0x0) might be exploited by an attacker if they are translated |
| 295 | // verbatim. Any request or response that contains a character not |
| 296 | // permitted in a header field value MUST be treated as malformed |
| 297 | // (Section 8.1.2.6). Valid characters are defined by the |
| 298 | // field-content ABNF rule in Section 3.2 of [RFC7230]." |
| 299 | // |
| 300 | // This function does not (yet?) properly handle the rejection of |
| 301 | // strings that begin or end with SP or HTAB. |
| 302 | func ValidHeaderFieldValue(v string) bool { |
| 303 | for i := 0; i < len(v); i++ { |
| 304 | b := v[i] |
| 305 | if isCTL(b) && !isLWS(b) { |
| 306 | return false |
| 307 | } |
| 308 | } |
| 309 | return true |
| 310 | } |
| 311 | |
| 312 | func isASCII(s string) bool { |
| 313 | for i := 0; i < len(s); i++ { |
| 314 | if s[i] >= utf8.RuneSelf { |
| 315 | return false |
| 316 | } |
| 317 | } |
| 318 | return true |
| 319 | } |
| 320 | |
| 321 | // PunycodeHostPort returns the IDNA Punycode version |
| 322 | // of the provided "host" or "host:port" string. |
| 323 | func PunycodeHostPort(v string) (string, error) { |
| 324 | if isASCII(v) { |
| 325 | return v, nil |
| 326 | } |
| 327 | |
| 328 | host, port, err := net.SplitHostPort(v) |
| 329 | if err != nil { |
| 330 | // The input 'v' argument was just a "host" argument, |
| 331 | // without a port. This error should not be returned |
| 332 | // to the caller. |
| 333 | host = v |
| 334 | port = "" |
| 335 | } |
| 336 | host, err = idna.ToASCII(host) |
| 337 | if err != nil { |
| 338 | // Non-UTF-8? Not representable in Punycode, in any |
| 339 | // case. |
| 340 | return "", err |
| 341 | } |
| 342 | if port == "" { |
| 343 | return host, nil |
| 344 | } |
| 345 | return net.JoinHostPort(host, port), nil |
| 346 | } |