blob: c79aa73f28bb9a522de0d653b01637083c631e85 [file] [log] [blame]
khenaidooac637102019-01-14 15:44:34 -05001// Copyright 2016 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5package httpguts
6
7import (
8 "net"
9 "strings"
10 "unicode/utf8"
11
12 "golang.org/x/net/idna"
13)
14
15var isTokenTable = [127]bool{
16 '!': true,
17 '#': true,
18 '$': true,
19 '%': true,
20 '&': true,
21 '\'': true,
22 '*': true,
23 '+': true,
24 '-': true,
25 '.': true,
26 '0': true,
27 '1': true,
28 '2': true,
29 '3': true,
30 '4': true,
31 '5': true,
32 '6': true,
33 '7': true,
34 '8': true,
35 '9': true,
36 'A': true,
37 'B': true,
38 'C': true,
39 'D': true,
40 'E': true,
41 'F': true,
42 'G': true,
43 'H': true,
44 'I': true,
45 'J': true,
46 'K': true,
47 'L': true,
48 'M': true,
49 'N': true,
50 'O': true,
51 'P': true,
52 'Q': true,
53 'R': true,
54 'S': true,
55 'T': true,
56 'U': true,
57 'W': true,
58 'V': true,
59 'X': true,
60 'Y': true,
61 'Z': true,
62 '^': true,
63 '_': true,
64 '`': true,
65 'a': true,
66 'b': true,
67 'c': true,
68 'd': true,
69 'e': true,
70 'f': true,
71 'g': true,
72 'h': true,
73 'i': true,
74 'j': true,
75 'k': true,
76 'l': true,
77 'm': true,
78 'n': true,
79 'o': true,
80 'p': true,
81 'q': true,
82 'r': true,
83 's': true,
84 't': true,
85 'u': true,
86 'v': true,
87 'w': true,
88 'x': true,
89 'y': true,
90 'z': true,
91 '|': true,
92 '~': true,
93}
94
95func IsTokenRune(r rune) bool {
96 i := int(r)
97 return i < len(isTokenTable) && isTokenTable[i]
98}
99
100func isNotToken(r rune) bool {
101 return !IsTokenRune(r)
102}
103
104// HeaderValuesContainsToken reports whether any string in values
105// contains the provided token, ASCII case-insensitively.
106func HeaderValuesContainsToken(values []string, token string) bool {
107 for _, v := range values {
108 if headerValueContainsToken(v, token) {
109 return true
110 }
111 }
112 return false
113}
114
115// isOWS reports whether b is an optional whitespace byte, as defined
116// by RFC 7230 section 3.2.3.
117func isOWS(b byte) bool { return b == ' ' || b == '\t' }
118
119// trimOWS returns x with all optional whitespace removes from the
120// beginning and end.
121func trimOWS(x string) string {
122 // TODO: consider using strings.Trim(x, " \t") instead,
123 // if and when it's fast enough. See issue 10292.
124 // But this ASCII-only code will probably always beat UTF-8
125 // aware code.
126 for len(x) > 0 && isOWS(x[0]) {
127 x = x[1:]
128 }
129 for len(x) > 0 && isOWS(x[len(x)-1]) {
130 x = x[:len(x)-1]
131 }
132 return x
133}
134
135// headerValueContainsToken reports whether v (assumed to be a
136// 0#element, in the ABNF extension described in RFC 7230 section 7)
137// contains token amongst its comma-separated tokens, ASCII
138// case-insensitively.
139func headerValueContainsToken(v string, token string) bool {
khenaidood948f772021-08-11 17:49:24 -0400140 for comma := strings.IndexByte(v, ','); comma != -1; comma = strings.IndexByte(v, ',') {
141 if tokenEqual(trimOWS(v[:comma]), token) {
142 return true
143 }
144 v = v[comma+1:]
khenaidooac637102019-01-14 15:44:34 -0500145 }
khenaidood948f772021-08-11 17:49:24 -0400146 return tokenEqual(trimOWS(v), token)
khenaidooac637102019-01-14 15:44:34 -0500147}
148
149// lowerASCII returns the ASCII lowercase version of b.
150func lowerASCII(b byte) byte {
151 if 'A' <= b && b <= 'Z' {
152 return b + ('a' - 'A')
153 }
154 return b
155}
156
157// tokenEqual reports whether t1 and t2 are equal, ASCII case-insensitively.
158func tokenEqual(t1, t2 string) bool {
159 if len(t1) != len(t2) {
160 return false
161 }
162 for i, b := range t1 {
163 if b >= utf8.RuneSelf {
164 // No UTF-8 or non-ASCII allowed in tokens.
165 return false
166 }
167 if lowerASCII(byte(b)) != lowerASCII(t2[i]) {
168 return false
169 }
170 }
171 return true
172}
173
174// isLWS reports whether b is linear white space, according
175// to http://www.w3.org/Protocols/rfc2616/rfc2616-sec2.html#sec2.2
176// LWS = [CRLF] 1*( SP | HT )
177func isLWS(b byte) bool { return b == ' ' || b == '\t' }
178
179// isCTL reports whether b is a control byte, according
180// to http://www.w3.org/Protocols/rfc2616/rfc2616-sec2.html#sec2.2
181// CTL = <any US-ASCII control character
182// (octets 0 - 31) and DEL (127)>
183func isCTL(b byte) bool {
184 const del = 0x7f // a CTL
185 return b < ' ' || b == del
186}
187
188// ValidHeaderFieldName reports whether v is a valid HTTP/1.x header name.
189// HTTP/2 imposes the additional restriction that uppercase ASCII
190// letters are not allowed.
191//
192// RFC 7230 says:
193// header-field = field-name ":" OWS field-value OWS
194// field-name = token
195// token = 1*tchar
196// tchar = "!" / "#" / "$" / "%" / "&" / "'" / "*" / "+" / "-" / "." /
197// "^" / "_" / "`" / "|" / "~" / DIGIT / ALPHA
198func ValidHeaderFieldName(v string) bool {
199 if len(v) == 0 {
200 return false
201 }
202 for _, r := range v {
203 if !IsTokenRune(r) {
204 return false
205 }
206 }
207 return true
208}
209
210// ValidHostHeader reports whether h is a valid host header.
211func ValidHostHeader(h string) bool {
212 // The latest spec is actually this:
213 //
214 // http://tools.ietf.org/html/rfc7230#section-5.4
215 // Host = uri-host [ ":" port ]
216 //
217 // Where uri-host is:
218 // http://tools.ietf.org/html/rfc3986#section-3.2.2
219 //
220 // But we're going to be much more lenient for now and just
221 // search for any byte that's not a valid byte in any of those
222 // expressions.
223 for i := 0; i < len(h); i++ {
224 if !validHostByte[h[i]] {
225 return false
226 }
227 }
228 return true
229}
230
231// See the validHostHeader comment.
232var validHostByte = [256]bool{
233 '0': true, '1': true, '2': true, '3': true, '4': true, '5': true, '6': true, '7': true,
234 '8': true, '9': true,
235
236 'a': true, 'b': true, 'c': true, 'd': true, 'e': true, 'f': true, 'g': true, 'h': true,
237 'i': true, 'j': true, 'k': true, 'l': true, 'm': true, 'n': true, 'o': true, 'p': true,
238 'q': true, 'r': true, 's': true, 't': true, 'u': true, 'v': true, 'w': true, 'x': true,
239 'y': true, 'z': true,
240
241 'A': true, 'B': true, 'C': true, 'D': true, 'E': true, 'F': true, 'G': true, 'H': true,
242 'I': true, 'J': true, 'K': true, 'L': true, 'M': true, 'N': true, 'O': true, 'P': true,
243 'Q': true, 'R': true, 'S': true, 'T': true, 'U': true, 'V': true, 'W': true, 'X': true,
244 'Y': true, 'Z': true,
245
246 '!': true, // sub-delims
247 '$': true, // sub-delims
248 '%': true, // pct-encoded (and used in IPv6 zones)
249 '&': true, // sub-delims
250 '(': true, // sub-delims
251 ')': true, // sub-delims
252 '*': true, // sub-delims
253 '+': true, // sub-delims
254 ',': true, // sub-delims
255 '-': true, // unreserved
256 '.': true, // unreserved
257 ':': true, // IPv6address + Host expression's optional port
258 ';': true, // sub-delims
259 '=': true, // sub-delims
260 '[': true,
261 '\'': true, // sub-delims
262 ']': true,
263 '_': true, // unreserved
264 '~': true, // unreserved
265}
266
267// ValidHeaderFieldValue reports whether v is a valid "field-value" according to
268// http://www.w3.org/Protocols/rfc2616/rfc2616-sec4.html#sec4.2 :
269//
270// message-header = field-name ":" [ field-value ]
271// field-value = *( field-content | LWS )
272// field-content = <the OCTETs making up the field-value
273// and consisting of either *TEXT or combinations
274// of token, separators, and quoted-string>
275//
276// http://www.w3.org/Protocols/rfc2616/rfc2616-sec2.html#sec2.2 :
277//
278// TEXT = <any OCTET except CTLs,
279// but including LWS>
280// LWS = [CRLF] 1*( SP | HT )
281// CTL = <any US-ASCII control character
282// (octets 0 - 31) and DEL (127)>
283//
284// RFC 7230 says:
285// field-value = *( field-content / obs-fold )
286// obj-fold = N/A to http2, and deprecated
287// field-content = field-vchar [ 1*( SP / HTAB ) field-vchar ]
288// field-vchar = VCHAR / obs-text
289// obs-text = %x80-FF
290// VCHAR = "any visible [USASCII] character"
291//
292// http2 further says: "Similarly, HTTP/2 allows header field values
293// that are not valid. While most of the values that can be encoded
294// will not alter header field parsing, carriage return (CR, ASCII
295// 0xd), line feed (LF, ASCII 0xa), and the zero character (NUL, ASCII
296// 0x0) might be exploited by an attacker if they are translated
297// verbatim. Any request or response that contains a character not
298// permitted in a header field value MUST be treated as malformed
299// (Section 8.1.2.6). Valid characters are defined by the
300// field-content ABNF rule in Section 3.2 of [RFC7230]."
301//
302// This function does not (yet?) properly handle the rejection of
303// strings that begin or end with SP or HTAB.
304func ValidHeaderFieldValue(v string) bool {
305 for i := 0; i < len(v); i++ {
306 b := v[i]
307 if isCTL(b) && !isLWS(b) {
308 return false
309 }
310 }
311 return true
312}
313
314func isASCII(s string) bool {
315 for i := 0; i < len(s); i++ {
316 if s[i] >= utf8.RuneSelf {
317 return false
318 }
319 }
320 return true
321}
322
323// PunycodeHostPort returns the IDNA Punycode version
324// of the provided "host" or "host:port" string.
325func PunycodeHostPort(v string) (string, error) {
326 if isASCII(v) {
327 return v, nil
328 }
329
330 host, port, err := net.SplitHostPort(v)
331 if err != nil {
332 // The input 'v' argument was just a "host" argument,
333 // without a port. This error should not be returned
334 // to the caller.
335 host = v
336 port = ""
337 }
338 host, err = idna.ToASCII(host)
339 if err != nil {
340 // Non-UTF-8? Not representable in Punycode, in any
341 // case.
342 return "", err
343 }
344 if port == "" {
345 return host, nil
346 }
347 return net.JoinHostPort(host, port), nil
348}