Holger Hildebrandt | fa07499 | 2020-03-27 15:42:06 +0000 | [diff] [blame^] | 1 | // Copyright 2014 The Go Authors. All rights reserved. |
| 2 | // Use of this source code is governed by a BSD-style |
| 3 | // license that can be found in the LICENSE file. |
| 4 | |
| 5 | // Package hpack implements HPACK, a compression format for |
| 6 | // efficiently representing HTTP header fields in the context of HTTP/2. |
| 7 | // |
| 8 | // See http://tools.ietf.org/html/draft-ietf-httpbis-header-compression-09 |
| 9 | package hpack |
| 10 | |
| 11 | import ( |
| 12 | "bytes" |
| 13 | "errors" |
| 14 | "fmt" |
| 15 | ) |
| 16 | |
| 17 | // A DecodingError is something the spec defines as a decoding error. |
| 18 | type DecodingError struct { |
| 19 | Err error |
| 20 | } |
| 21 | |
| 22 | func (de DecodingError) Error() string { |
| 23 | return fmt.Sprintf("decoding error: %v", de.Err) |
| 24 | } |
| 25 | |
| 26 | // An InvalidIndexError is returned when an encoder references a table |
| 27 | // entry before the static table or after the end of the dynamic table. |
| 28 | type InvalidIndexError int |
| 29 | |
| 30 | func (e InvalidIndexError) Error() string { |
| 31 | return fmt.Sprintf("invalid indexed representation index %d", int(e)) |
| 32 | } |
| 33 | |
| 34 | // A HeaderField is a name-value pair. Both the name and value are |
| 35 | // treated as opaque sequences of octets. |
| 36 | type HeaderField struct { |
| 37 | Name, Value string |
| 38 | |
| 39 | // Sensitive means that this header field should never be |
| 40 | // indexed. |
| 41 | Sensitive bool |
| 42 | } |
| 43 | |
| 44 | // IsPseudo reports whether the header field is an http2 pseudo header. |
| 45 | // That is, it reports whether it starts with a colon. |
| 46 | // It is not otherwise guaranteed to be a valid pseudo header field, |
| 47 | // though. |
| 48 | func (hf HeaderField) IsPseudo() bool { |
| 49 | return len(hf.Name) != 0 && hf.Name[0] == ':' |
| 50 | } |
| 51 | |
| 52 | func (hf HeaderField) String() string { |
| 53 | var suffix string |
| 54 | if hf.Sensitive { |
| 55 | suffix = " (sensitive)" |
| 56 | } |
| 57 | return fmt.Sprintf("header field %q = %q%s", hf.Name, hf.Value, suffix) |
| 58 | } |
| 59 | |
| 60 | // Size returns the size of an entry per RFC 7541 section 4.1. |
| 61 | func (hf HeaderField) Size() uint32 { |
| 62 | // http://http2.github.io/http2-spec/compression.html#rfc.section.4.1 |
| 63 | // "The size of the dynamic table is the sum of the size of |
| 64 | // its entries. The size of an entry is the sum of its name's |
| 65 | // length in octets (as defined in Section 5.2), its value's |
| 66 | // length in octets (see Section 5.2), plus 32. The size of |
| 67 | // an entry is calculated using the length of the name and |
| 68 | // value without any Huffman encoding applied." |
| 69 | |
| 70 | // This can overflow if somebody makes a large HeaderField |
| 71 | // Name and/or Value by hand, but we don't care, because that |
| 72 | // won't happen on the wire because the encoding doesn't allow |
| 73 | // it. |
| 74 | return uint32(len(hf.Name) + len(hf.Value) + 32) |
| 75 | } |
| 76 | |
| 77 | // A Decoder is the decoding context for incremental processing of |
| 78 | // header blocks. |
| 79 | type Decoder struct { |
| 80 | dynTab dynamicTable |
| 81 | emit func(f HeaderField) |
| 82 | |
| 83 | emitEnabled bool // whether calls to emit are enabled |
| 84 | maxStrLen int // 0 means unlimited |
| 85 | |
| 86 | // buf is the unparsed buffer. It's only written to |
| 87 | // saveBuf if it was truncated in the middle of a header |
| 88 | // block. Because it's usually not owned, we can only |
| 89 | // process it under Write. |
| 90 | buf []byte // not owned; only valid during Write |
| 91 | |
| 92 | // saveBuf is previous data passed to Write which we weren't able |
| 93 | // to fully parse before. Unlike buf, we own this data. |
| 94 | saveBuf bytes.Buffer |
| 95 | |
| 96 | firstField bool // processing the first field of the header block |
| 97 | } |
| 98 | |
| 99 | // NewDecoder returns a new decoder with the provided maximum dynamic |
| 100 | // table size. The emitFunc will be called for each valid field |
| 101 | // parsed, in the same goroutine as calls to Write, before Write returns. |
| 102 | func NewDecoder(maxDynamicTableSize uint32, emitFunc func(f HeaderField)) *Decoder { |
| 103 | d := &Decoder{ |
| 104 | emit: emitFunc, |
| 105 | emitEnabled: true, |
| 106 | firstField: true, |
| 107 | } |
| 108 | d.dynTab.table.init() |
| 109 | d.dynTab.allowedMaxSize = maxDynamicTableSize |
| 110 | d.dynTab.setMaxSize(maxDynamicTableSize) |
| 111 | return d |
| 112 | } |
| 113 | |
| 114 | // ErrStringLength is returned by Decoder.Write when the max string length |
| 115 | // (as configured by Decoder.SetMaxStringLength) would be violated. |
| 116 | var ErrStringLength = errors.New("hpack: string too long") |
| 117 | |
| 118 | // SetMaxStringLength sets the maximum size of a HeaderField name or |
| 119 | // value string. If a string exceeds this length (even after any |
| 120 | // decompression), Write will return ErrStringLength. |
| 121 | // A value of 0 means unlimited and is the default from NewDecoder. |
| 122 | func (d *Decoder) SetMaxStringLength(n int) { |
| 123 | d.maxStrLen = n |
| 124 | } |
| 125 | |
| 126 | // SetEmitFunc changes the callback used when new header fields |
| 127 | // are decoded. |
| 128 | // It must be non-nil. It does not affect EmitEnabled. |
| 129 | func (d *Decoder) SetEmitFunc(emitFunc func(f HeaderField)) { |
| 130 | d.emit = emitFunc |
| 131 | } |
| 132 | |
| 133 | // SetEmitEnabled controls whether the emitFunc provided to NewDecoder |
| 134 | // should be called. The default is true. |
| 135 | // |
| 136 | // This facility exists to let servers enforce MAX_HEADER_LIST_SIZE |
| 137 | // while still decoding and keeping in-sync with decoder state, but |
| 138 | // without doing unnecessary decompression or generating unnecessary |
| 139 | // garbage for header fields past the limit. |
| 140 | func (d *Decoder) SetEmitEnabled(v bool) { d.emitEnabled = v } |
| 141 | |
| 142 | // EmitEnabled reports whether calls to the emitFunc provided to NewDecoder |
| 143 | // are currently enabled. The default is true. |
| 144 | func (d *Decoder) EmitEnabled() bool { return d.emitEnabled } |
| 145 | |
| 146 | // TODO: add method *Decoder.Reset(maxSize, emitFunc) to let callers re-use Decoders and their |
| 147 | // underlying buffers for garbage reasons. |
| 148 | |
| 149 | func (d *Decoder) SetMaxDynamicTableSize(v uint32) { |
| 150 | d.dynTab.setMaxSize(v) |
| 151 | } |
| 152 | |
| 153 | // SetAllowedMaxDynamicTableSize sets the upper bound that the encoded |
| 154 | // stream (via dynamic table size updates) may set the maximum size |
| 155 | // to. |
| 156 | func (d *Decoder) SetAllowedMaxDynamicTableSize(v uint32) { |
| 157 | d.dynTab.allowedMaxSize = v |
| 158 | } |
| 159 | |
| 160 | type dynamicTable struct { |
| 161 | // http://http2.github.io/http2-spec/compression.html#rfc.section.2.3.2 |
| 162 | table headerFieldTable |
| 163 | size uint32 // in bytes |
| 164 | maxSize uint32 // current maxSize |
| 165 | allowedMaxSize uint32 // maxSize may go up to this, inclusive |
| 166 | } |
| 167 | |
| 168 | func (dt *dynamicTable) setMaxSize(v uint32) { |
| 169 | dt.maxSize = v |
| 170 | dt.evict() |
| 171 | } |
| 172 | |
| 173 | func (dt *dynamicTable) add(f HeaderField) { |
| 174 | dt.table.addEntry(f) |
| 175 | dt.size += f.Size() |
| 176 | dt.evict() |
| 177 | } |
| 178 | |
| 179 | // If we're too big, evict old stuff. |
| 180 | func (dt *dynamicTable) evict() { |
| 181 | var n int |
| 182 | for dt.size > dt.maxSize && n < dt.table.len() { |
| 183 | dt.size -= dt.table.ents[n].Size() |
| 184 | n++ |
| 185 | } |
| 186 | dt.table.evictOldest(n) |
| 187 | } |
| 188 | |
| 189 | func (d *Decoder) maxTableIndex() int { |
| 190 | // This should never overflow. RFC 7540 Section 6.5.2 limits the size of |
| 191 | // the dynamic table to 2^32 bytes, where each entry will occupy more than |
| 192 | // one byte. Further, the staticTable has a fixed, small length. |
| 193 | return d.dynTab.table.len() + staticTable.len() |
| 194 | } |
| 195 | |
| 196 | func (d *Decoder) at(i uint64) (hf HeaderField, ok bool) { |
| 197 | // See Section 2.3.3. |
| 198 | if i == 0 { |
| 199 | return |
| 200 | } |
| 201 | if i <= uint64(staticTable.len()) { |
| 202 | return staticTable.ents[i-1], true |
| 203 | } |
| 204 | if i > uint64(d.maxTableIndex()) { |
| 205 | return |
| 206 | } |
| 207 | // In the dynamic table, newer entries have lower indices. |
| 208 | // However, dt.ents[0] is the oldest entry. Hence, dt.ents is |
| 209 | // the reversed dynamic table. |
| 210 | dt := d.dynTab.table |
| 211 | return dt.ents[dt.len()-(int(i)-staticTable.len())], true |
| 212 | } |
| 213 | |
| 214 | // Decode decodes an entire block. |
| 215 | // |
| 216 | // TODO: remove this method and make it incremental later? This is |
| 217 | // easier for debugging now. |
| 218 | func (d *Decoder) DecodeFull(p []byte) ([]HeaderField, error) { |
| 219 | var hf []HeaderField |
| 220 | saveFunc := d.emit |
| 221 | defer func() { d.emit = saveFunc }() |
| 222 | d.emit = func(f HeaderField) { hf = append(hf, f) } |
| 223 | if _, err := d.Write(p); err != nil { |
| 224 | return nil, err |
| 225 | } |
| 226 | if err := d.Close(); err != nil { |
| 227 | return nil, err |
| 228 | } |
| 229 | return hf, nil |
| 230 | } |
| 231 | |
| 232 | // Close declares that the decoding is complete and resets the Decoder |
| 233 | // to be reused again for a new header block. If there is any remaining |
| 234 | // data in the decoder's buffer, Close returns an error. |
| 235 | func (d *Decoder) Close() error { |
| 236 | if d.saveBuf.Len() > 0 { |
| 237 | d.saveBuf.Reset() |
| 238 | return DecodingError{errors.New("truncated headers")} |
| 239 | } |
| 240 | d.firstField = true |
| 241 | return nil |
| 242 | } |
| 243 | |
| 244 | func (d *Decoder) Write(p []byte) (n int, err error) { |
| 245 | if len(p) == 0 { |
| 246 | // Prevent state machine CPU attacks (making us redo |
| 247 | // work up to the point of finding out we don't have |
| 248 | // enough data) |
| 249 | return |
| 250 | } |
| 251 | // Only copy the data if we have to. Optimistically assume |
| 252 | // that p will contain a complete header block. |
| 253 | if d.saveBuf.Len() == 0 { |
| 254 | d.buf = p |
| 255 | } else { |
| 256 | d.saveBuf.Write(p) |
| 257 | d.buf = d.saveBuf.Bytes() |
| 258 | d.saveBuf.Reset() |
| 259 | } |
| 260 | |
| 261 | for len(d.buf) > 0 { |
| 262 | err = d.parseHeaderFieldRepr() |
| 263 | if err == errNeedMore { |
| 264 | // Extra paranoia, making sure saveBuf won't |
| 265 | // get too large. All the varint and string |
| 266 | // reading code earlier should already catch |
| 267 | // overlong things and return ErrStringLength, |
| 268 | // but keep this as a last resort. |
| 269 | const varIntOverhead = 8 // conservative |
| 270 | if d.maxStrLen != 0 && int64(len(d.buf)) > 2*(int64(d.maxStrLen)+varIntOverhead) { |
| 271 | return 0, ErrStringLength |
| 272 | } |
| 273 | d.saveBuf.Write(d.buf) |
| 274 | return len(p), nil |
| 275 | } |
| 276 | d.firstField = false |
| 277 | if err != nil { |
| 278 | break |
| 279 | } |
| 280 | } |
| 281 | return len(p), err |
| 282 | } |
| 283 | |
| 284 | // errNeedMore is an internal sentinel error value that means the |
| 285 | // buffer is truncated and we need to read more data before we can |
| 286 | // continue parsing. |
| 287 | var errNeedMore = errors.New("need more data") |
| 288 | |
| 289 | type indexType int |
| 290 | |
| 291 | const ( |
| 292 | indexedTrue indexType = iota |
| 293 | indexedFalse |
| 294 | indexedNever |
| 295 | ) |
| 296 | |
| 297 | func (v indexType) indexed() bool { return v == indexedTrue } |
| 298 | func (v indexType) sensitive() bool { return v == indexedNever } |
| 299 | |
| 300 | // returns errNeedMore if there isn't enough data available. |
| 301 | // any other error is fatal. |
| 302 | // consumes d.buf iff it returns nil. |
| 303 | // precondition: must be called with len(d.buf) > 0 |
| 304 | func (d *Decoder) parseHeaderFieldRepr() error { |
| 305 | b := d.buf[0] |
| 306 | switch { |
| 307 | case b&128 != 0: |
| 308 | // Indexed representation. |
| 309 | // High bit set? |
| 310 | // http://http2.github.io/http2-spec/compression.html#rfc.section.6.1 |
| 311 | return d.parseFieldIndexed() |
| 312 | case b&192 == 64: |
| 313 | // 6.2.1 Literal Header Field with Incremental Indexing |
| 314 | // 0b10xxxxxx: top two bits are 10 |
| 315 | // http://http2.github.io/http2-spec/compression.html#rfc.section.6.2.1 |
| 316 | return d.parseFieldLiteral(6, indexedTrue) |
| 317 | case b&240 == 0: |
| 318 | // 6.2.2 Literal Header Field without Indexing |
| 319 | // 0b0000xxxx: top four bits are 0000 |
| 320 | // http://http2.github.io/http2-spec/compression.html#rfc.section.6.2.2 |
| 321 | return d.parseFieldLiteral(4, indexedFalse) |
| 322 | case b&240 == 16: |
| 323 | // 6.2.3 Literal Header Field never Indexed |
| 324 | // 0b0001xxxx: top four bits are 0001 |
| 325 | // http://http2.github.io/http2-spec/compression.html#rfc.section.6.2.3 |
| 326 | return d.parseFieldLiteral(4, indexedNever) |
| 327 | case b&224 == 32: |
| 328 | // 6.3 Dynamic Table Size Update |
| 329 | // Top three bits are '001'. |
| 330 | // http://http2.github.io/http2-spec/compression.html#rfc.section.6.3 |
| 331 | return d.parseDynamicTableSizeUpdate() |
| 332 | } |
| 333 | |
| 334 | return DecodingError{errors.New("invalid encoding")} |
| 335 | } |
| 336 | |
| 337 | // (same invariants and behavior as parseHeaderFieldRepr) |
| 338 | func (d *Decoder) parseFieldIndexed() error { |
| 339 | buf := d.buf |
| 340 | idx, buf, err := readVarInt(7, buf) |
| 341 | if err != nil { |
| 342 | return err |
| 343 | } |
| 344 | hf, ok := d.at(idx) |
| 345 | if !ok { |
| 346 | return DecodingError{InvalidIndexError(idx)} |
| 347 | } |
| 348 | d.buf = buf |
| 349 | return d.callEmit(HeaderField{Name: hf.Name, Value: hf.Value}) |
| 350 | } |
| 351 | |
| 352 | // (same invariants and behavior as parseHeaderFieldRepr) |
| 353 | func (d *Decoder) parseFieldLiteral(n uint8, it indexType) error { |
| 354 | buf := d.buf |
| 355 | nameIdx, buf, err := readVarInt(n, buf) |
| 356 | if err != nil { |
| 357 | return err |
| 358 | } |
| 359 | |
| 360 | var hf HeaderField |
| 361 | wantStr := d.emitEnabled || it.indexed() |
| 362 | if nameIdx > 0 { |
| 363 | ihf, ok := d.at(nameIdx) |
| 364 | if !ok { |
| 365 | return DecodingError{InvalidIndexError(nameIdx)} |
| 366 | } |
| 367 | hf.Name = ihf.Name |
| 368 | } else { |
| 369 | hf.Name, buf, err = d.readString(buf, wantStr) |
| 370 | if err != nil { |
| 371 | return err |
| 372 | } |
| 373 | } |
| 374 | hf.Value, buf, err = d.readString(buf, wantStr) |
| 375 | if err != nil { |
| 376 | return err |
| 377 | } |
| 378 | d.buf = buf |
| 379 | if it.indexed() { |
| 380 | d.dynTab.add(hf) |
| 381 | } |
| 382 | hf.Sensitive = it.sensitive() |
| 383 | return d.callEmit(hf) |
| 384 | } |
| 385 | |
| 386 | func (d *Decoder) callEmit(hf HeaderField) error { |
| 387 | if d.maxStrLen != 0 { |
| 388 | if len(hf.Name) > d.maxStrLen || len(hf.Value) > d.maxStrLen { |
| 389 | return ErrStringLength |
| 390 | } |
| 391 | } |
| 392 | if d.emitEnabled { |
| 393 | d.emit(hf) |
| 394 | } |
| 395 | return nil |
| 396 | } |
| 397 | |
| 398 | // (same invariants and behavior as parseHeaderFieldRepr) |
| 399 | func (d *Decoder) parseDynamicTableSizeUpdate() error { |
| 400 | // RFC 7541, sec 4.2: This dynamic table size update MUST occur at the |
| 401 | // beginning of the first header block following the change to the dynamic table size. |
| 402 | if !d.firstField && d.dynTab.size > 0 { |
| 403 | return DecodingError{errors.New("dynamic table size update MUST occur at the beginning of a header block")} |
| 404 | } |
| 405 | |
| 406 | buf := d.buf |
| 407 | size, buf, err := readVarInt(5, buf) |
| 408 | if err != nil { |
| 409 | return err |
| 410 | } |
| 411 | if size > uint64(d.dynTab.allowedMaxSize) { |
| 412 | return DecodingError{errors.New("dynamic table size update too large")} |
| 413 | } |
| 414 | d.dynTab.setMaxSize(uint32(size)) |
| 415 | d.buf = buf |
| 416 | return nil |
| 417 | } |
| 418 | |
| 419 | var errVarintOverflow = DecodingError{errors.New("varint integer overflow")} |
| 420 | |
| 421 | // readVarInt reads an unsigned variable length integer off the |
| 422 | // beginning of p. n is the parameter as described in |
| 423 | // http://http2.github.io/http2-spec/compression.html#rfc.section.5.1. |
| 424 | // |
| 425 | // n must always be between 1 and 8. |
| 426 | // |
| 427 | // The returned remain buffer is either a smaller suffix of p, or err != nil. |
| 428 | // The error is errNeedMore if p doesn't contain a complete integer. |
| 429 | func readVarInt(n byte, p []byte) (i uint64, remain []byte, err error) { |
| 430 | if n < 1 || n > 8 { |
| 431 | panic("bad n") |
| 432 | } |
| 433 | if len(p) == 0 { |
| 434 | return 0, p, errNeedMore |
| 435 | } |
| 436 | i = uint64(p[0]) |
| 437 | if n < 8 { |
| 438 | i &= (1 << uint64(n)) - 1 |
| 439 | } |
| 440 | if i < (1<<uint64(n))-1 { |
| 441 | return i, p[1:], nil |
| 442 | } |
| 443 | |
| 444 | origP := p |
| 445 | p = p[1:] |
| 446 | var m uint64 |
| 447 | for len(p) > 0 { |
| 448 | b := p[0] |
| 449 | p = p[1:] |
| 450 | i += uint64(b&127) << m |
| 451 | if b&128 == 0 { |
| 452 | return i, p, nil |
| 453 | } |
| 454 | m += 7 |
| 455 | if m >= 63 { // TODO: proper overflow check. making this up. |
| 456 | return 0, origP, errVarintOverflow |
| 457 | } |
| 458 | } |
| 459 | return 0, origP, errNeedMore |
| 460 | } |
| 461 | |
| 462 | // readString decodes an hpack string from p. |
| 463 | // |
| 464 | // wantStr is whether s will be used. If false, decompression and |
| 465 | // []byte->string garbage are skipped if s will be ignored |
| 466 | // anyway. This does mean that huffman decoding errors for non-indexed |
| 467 | // strings past the MAX_HEADER_LIST_SIZE are ignored, but the server |
| 468 | // is returning an error anyway, and because they're not indexed, the error |
| 469 | // won't affect the decoding state. |
| 470 | func (d *Decoder) readString(p []byte, wantStr bool) (s string, remain []byte, err error) { |
| 471 | if len(p) == 0 { |
| 472 | return "", p, errNeedMore |
| 473 | } |
| 474 | isHuff := p[0]&128 != 0 |
| 475 | strLen, p, err := readVarInt(7, p) |
| 476 | if err != nil { |
| 477 | return "", p, err |
| 478 | } |
| 479 | if d.maxStrLen != 0 && strLen > uint64(d.maxStrLen) { |
| 480 | return "", nil, ErrStringLength |
| 481 | } |
| 482 | if uint64(len(p)) < strLen { |
| 483 | return "", p, errNeedMore |
| 484 | } |
| 485 | if !isHuff { |
| 486 | if wantStr { |
| 487 | s = string(p[:strLen]) |
| 488 | } |
| 489 | return s, p[strLen:], nil |
| 490 | } |
| 491 | |
| 492 | if wantStr { |
| 493 | buf := bufPool.Get().(*bytes.Buffer) |
| 494 | buf.Reset() // don't trust others |
| 495 | defer bufPool.Put(buf) |
| 496 | if err := huffmanDecode(buf, d.maxStrLen, p[:strLen]); err != nil { |
| 497 | buf.Reset() |
| 498 | return "", nil, err |
| 499 | } |
| 500 | s = buf.String() |
| 501 | buf.Reset() // be nice to GC |
| 502 | } |
| 503 | return s, p[strLen:], nil |
| 504 | } |