Blame - vendor/golang.org/x/net/http/httpguts/httplex.go - ofagent-go

blob: e7de24ee64efc6b10326616b2b5b2588b2b70439 [file] [log] [blame]

Don Newton	98fd881	2019-09-23 15:15:02 -0400	[diff] [blame^]	1	// Copyright 2016 The Go Authors. All rights reserved.
				2	// Use of this source code is governed by a BSD-style
				3	// license that can be found in the LICENSE file.
				4
				5	package httpguts
				6
				7	import (
				8	"net"
				9	"strings"
				10	"unicode/utf8"
				11
				12	"golang.org/x/net/idna"
				13	)
				14
				15	var isTokenTable = [127]bool{
				16	'!': true,
				17	'#': true,
				18	'$': true,
				19	'%': true,
				20	'&': true,
				21	'\'': true,
				22	'*': true,
				23	'+': true,
				24	'-': true,
				25	'.': true,
				26	'0': true,
				27	'1': true,
				28	'2': true,
				29	'3': true,
				30	'4': true,
				31	'5': true,
				32	'6': true,
				33	'7': true,
				34	'8': true,
				35	'9': true,
				36	'A': true,
				37	'B': true,
				38	'C': true,
				39	'D': true,
				40	'E': true,
				41	'F': true,
				42	'G': true,
				43	'H': true,
				44	'I': true,
				45	'J': true,
				46	'K': true,
				47	'L': true,
				48	'M': true,
				49	'N': true,
				50	'O': true,
				51	'P': true,
				52	'Q': true,
				53	'R': true,
				54	'S': true,
				55	'T': true,
				56	'U': true,
				57	'W': true,
				58	'V': true,
				59	'X': true,
				60	'Y': true,
				61	'Z': true,
				62	'^': true,
				63	'_': true,
				64	'`': true,
				65	'a': true,
				66	'b': true,
				67	'c': true,
				68	'd': true,
				69	'e': true,
				70	'f': true,
				71	'g': true,
				72	'h': true,
				73	'i': true,
				74	'j': true,
				75	'k': true,
				76	'l': true,
				77	'm': true,
				78	'n': true,
				79	'o': true,
				80	'p': true,
				81	'q': true,
				82	'r': true,
				83	's': true,
				84	't': true,
				85	'u': true,
				86	'v': true,
				87	'w': true,
				88	'x': true,
				89	'y': true,
				90	'z': true,
				91	'\|': true,
				92	'~': true,
				93	}
				94
				95	func IsTokenRune(r rune) bool {
				96	i := int(r)
				97	return i < len(isTokenTable) && isTokenTable[i]
				98	}
				99
				100	func isNotToken(r rune) bool {
				101	return !IsTokenRune(r)
				102	}
				103
				104	// HeaderValuesContainsToken reports whether any string in values
				105	// contains the provided token, ASCII case-insensitively.
				106	func HeaderValuesContainsToken(values []string, token string) bool {
				107	for _, v := range values {
				108	if headerValueContainsToken(v, token) {
				109	return true
				110	}
				111	}
				112	return false
				113	}
				114
				115	// isOWS reports whether b is an optional whitespace byte, as defined
				116	// by RFC 7230 section 3.2.3.
				117	func isOWS(b byte) bool { return b == ' ' \|\| b == '\t' }
				118
				119	// trimOWS returns x with all optional whitespace removes from the
				120	// beginning and end.
				121	func trimOWS(x string) string {
				122	// TODO: consider using strings.Trim(x, " \t") instead,
				123	// if and when it's fast enough. See issue 10292.
				124	// But this ASCII-only code will probably always beat UTF-8
				125	// aware code.
				126	for len(x) > 0 && isOWS(x[0]) {
				127	x = x[1:]
				128	}
				129	for len(x) > 0 && isOWS(x[len(x)-1]) {
				130	x = x[:len(x)-1]
				131	}
				132	return x
				133	}
				134
				135	// headerValueContainsToken reports whether v (assumed to be a
				136	// 0#element, in the ABNF extension described in RFC 7230 section 7)
				137	// contains token amongst its comma-separated tokens, ASCII
				138	// case-insensitively.
				139	func headerValueContainsToken(v string, token string) bool {
				140	v = trimOWS(v)
				141	if comma := strings.IndexByte(v, ','); comma != -1 {
				142	return tokenEqual(trimOWS(v[:comma]), token) \|\| headerValueContainsToken(v[comma+1:], token)
				143	}
				144	return tokenEqual(v, token)
				145	}
				146
				147	// lowerASCII returns the ASCII lowercase version of b.
				148	func lowerASCII(b byte) byte {
				149	if 'A' <= b && b <= 'Z' {
				150	return b + ('a' - 'A')
				151	}
				152	return b
				153	}
				154
				155	// tokenEqual reports whether t1 and t2 are equal, ASCII case-insensitively.
				156	func tokenEqual(t1, t2 string) bool {
				157	if len(t1) != len(t2) {
				158	return false
				159	}
				160	for i, b := range t1 {
				161	if b >= utf8.RuneSelf {
				162	// No UTF-8 or non-ASCII allowed in tokens.
				163	return false
				164	}
				165	if lowerASCII(byte(b)) != lowerASCII(t2[i]) {
				166	return false
				167	}
				168	}
				169	return true
				170	}
				171
				172	// isLWS reports whether b is linear white space, according
				173	// to http://www.w3.org/Protocols/rfc2616/rfc2616-sec2.html#sec2.2
				174	// LWS = [CRLF] 1*( SP \| HT )
				175	func isLWS(b byte) bool { return b == ' ' \|\| b == '\t' }
				176
				177	// isCTL reports whether b is a control byte, according
				178	// to http://www.w3.org/Protocols/rfc2616/rfc2616-sec2.html#sec2.2
				179	// CTL = <any US-ASCII control character
				180	// (octets 0 - 31) and DEL (127)>
				181	func isCTL(b byte) bool {
				182	const del = 0x7f // a CTL
				183	return b < ' ' \|\| b == del
				184	}
				185
				186	// ValidHeaderFieldName reports whether v is a valid HTTP/1.x header name.
				187	// HTTP/2 imposes the additional restriction that uppercase ASCII
				188	// letters are not allowed.
				189	//
				190	// RFC 7230 says:
				191	// header-field = field-name ":" OWS field-value OWS
				192	// field-name = token
				193	// token = 1*tchar
				194	// tchar = "!" / "#" / "$" / "%" / "&" / "'" / "*" / "+" / "-" / "." /
				195	// "^" / "_" / "`" / "\|" / "~" / DIGIT / ALPHA
				196	func ValidHeaderFieldName(v string) bool {
				197	if len(v) == 0 {
				198	return false
				199	}
				200	for _, r := range v {
				201	if !IsTokenRune(r) {
				202	return false
				203	}
				204	}
				205	return true
				206	}
				207
				208	// ValidHostHeader reports whether h is a valid host header.
				209	func ValidHostHeader(h string) bool {
				210	// The latest spec is actually this:
				211	//
				212	// http://tools.ietf.org/html/rfc7230#section-5.4
				213	// Host = uri-host [ ":" port ]
				214	//
				215	// Where uri-host is:
				216	// http://tools.ietf.org/html/rfc3986#section-3.2.2
				217	//
				218	// But we're going to be much more lenient for now and just
				219	// search for any byte that's not a valid byte in any of those
				220	// expressions.
				221	for i := 0; i < len(h); i++ {
				222	if !validHostByte[h[i]] {
				223	return false
				224	}
				225	}
				226	return true
				227	}
				228
				229	// See the validHostHeader comment.
				230	var validHostByte = [256]bool{
				231	'0': true, '1': true, '2': true, '3': true, '4': true, '5': true, '6': true, '7': true,
				232	'8': true, '9': true,
				233
				234	'a': true, 'b': true, 'c': true, 'd': true, 'e': true, 'f': true, 'g': true, 'h': true,
				235	'i': true, 'j': true, 'k': true, 'l': true, 'm': true, 'n': true, 'o': true, 'p': true,
				236	'q': true, 'r': true, 's': true, 't': true, 'u': true, 'v': true, 'w': true, 'x': true,
				237	'y': true, 'z': true,
				238
				239	'A': true, 'B': true, 'C': true, 'D': true, 'E': true, 'F': true, 'G': true, 'H': true,
				240	'I': true, 'J': true, 'K': true, 'L': true, 'M': true, 'N': true, 'O': true, 'P': true,
				241	'Q': true, 'R': true, 'S': true, 'T': true, 'U': true, 'V': true, 'W': true, 'X': true,
				242	'Y': true, 'Z': true,
				243
				244	'!': true, // sub-delims
				245	'$': true, // sub-delims
				246	'%': true, // pct-encoded (and used in IPv6 zones)
				247	'&': true, // sub-delims
				248	'(': true, // sub-delims
				249	')': true, // sub-delims
				250	'*': true, // sub-delims
				251	'+': true, // sub-delims
				252	',': true, // sub-delims
				253	'-': true, // unreserved
				254	'.': true, // unreserved
				255	':': true, // IPv6address + Host expression's optional port
				256	';': true, // sub-delims
				257	'=': true, // sub-delims
				258	'[': true,
				259	'\'': true, // sub-delims
				260	']': true,
				261	'_': true, // unreserved
				262	'~': true, // unreserved
				263	}
				264
				265	// ValidHeaderFieldValue reports whether v is a valid "field-value" according to
				266	// http://www.w3.org/Protocols/rfc2616/rfc2616-sec4.html#sec4.2 :
				267	//
				268	// message-header = field-name ":" [ field-value ]
				269	// field-value = *( field-content \| LWS )
				270	// field-content = <the OCTETs making up the field-value
				271	// and consisting of either *TEXT or combinations
				272	// of token, separators, and quoted-string>
				273	//
				274	// http://www.w3.org/Protocols/rfc2616/rfc2616-sec2.html#sec2.2 :
				275	//
				276	// TEXT = <any OCTET except CTLs,
				277	// but including LWS>
				278	// LWS = [CRLF] 1*( SP \| HT )
				279	// CTL = <any US-ASCII control character
				280	// (octets 0 - 31) and DEL (127)>
				281	//
				282	// RFC 7230 says:
				283	// field-value = *( field-content / obs-fold )
				284	// obj-fold = N/A to http2, and deprecated
				285	// field-content = field-vchar [ 1*( SP / HTAB ) field-vchar ]
				286	// field-vchar = VCHAR / obs-text
				287	// obs-text = %x80-FF
				288	// VCHAR = "any visible [USASCII] character"
				289	//
				290	// http2 further says: "Similarly, HTTP/2 allows header field values
				291	// that are not valid. While most of the values that can be encoded
				292	// will not alter header field parsing, carriage return (CR, ASCII
				293	// 0xd), line feed (LF, ASCII 0xa), and the zero character (NUL, ASCII
				294	// 0x0) might be exploited by an attacker if they are translated
				295	// verbatim. Any request or response that contains a character not
				296	// permitted in a header field value MUST be treated as malformed
				297	// (Section 8.1.2.6). Valid characters are defined by the
				298	// field-content ABNF rule in Section 3.2 of [RFC7230]."
				299	//
				300	// This function does not (yet?) properly handle the rejection of
				301	// strings that begin or end with SP or HTAB.
				302	func ValidHeaderFieldValue(v string) bool {
				303	for i := 0; i < len(v); i++ {
				304	b := v[i]
				305	if isCTL(b) && !isLWS(b) {
				306	return false
				307	}
				308	}
				309	return true
				310	}
				311
				312	func isASCII(s string) bool {
				313	for i := 0; i < len(s); i++ {
				314	if s[i] >= utf8.RuneSelf {
				315	return false
				316	}
				317	}
				318	return true
				319	}
				320
				321	// PunycodeHostPort returns the IDNA Punycode version
				322	// of the provided "host" or "host:port" string.
				323	func PunycodeHostPort(v string) (string, error) {
				324	if isASCII(v) {
				325	return v, nil
				326	}
				327
				328	host, port, err := net.SplitHostPort(v)
				329	if err != nil {
				330	// The input 'v' argument was just a "host" argument,
				331	// without a port. This error should not be returned
				332	// to the caller.
				333	host = v
				334	port = ""
				335	}
				336	host, err = idna.ToASCII(host)
				337	if err != nil {
				338	// Non-UTF-8? Not representable in Punycode, in any
				339	// case.
				340	return "", err
				341	}
				342	if port == "" {
				343	return host, nil
				344	}
				345	return net.JoinHostPort(host, port), nil
				346	}