blob: 1ba308c8bf77619b0793e3e3c0079f3dd94e8b6b [file] [log] [blame]
Scott Bakered4efab2020-01-13 19:12:25 -08001// Package zstd provides decompression of zstandard files.
2//
3// For advanced usage and examples, go to the README: https://github.com/klauspost/compress/tree/master/zstd#zstd
4package zstd
5
6import (
David K. Bainbridgebd6b2882021-08-26 13:31:02 +00007 "bytes"
8 "encoding/binary"
Scott Bakered4efab2020-01-13 19:12:25 -08009 "errors"
10 "log"
David K. Bainbridgebd6b2882021-08-26 13:31:02 +000011 "math"
Scott Bakered4efab2020-01-13 19:12:25 -080012 "math/bits"
13)
14
David K. Bainbridgebd6b2882021-08-26 13:31:02 +000015// enable debug printing
Scott Bakered4efab2020-01-13 19:12:25 -080016const debug = false
David K. Bainbridgebd6b2882021-08-26 13:31:02 +000017
18// Enable extra assertions.
19const debugAsserts = debug || false
20
21// print sequence details
Scott Bakered4efab2020-01-13 19:12:25 -080022const debugSequences = false
David K. Bainbridgebd6b2882021-08-26 13:31:02 +000023
24// print detailed matching information
Scott Bakered4efab2020-01-13 19:12:25 -080025const debugMatches = false
26
27// force encoder to use predefined tables.
28const forcePreDef = false
29
30// zstdMinMatch is the minimum zstd match length.
31const zstdMinMatch = 3
32
David K. Bainbridgebd6b2882021-08-26 13:31:02 +000033// Reset the buffer offset when reaching this.
34const bufferReset = math.MaxInt32 - MaxWindowSize
35
Scott Bakered4efab2020-01-13 19:12:25 -080036var (
37 // ErrReservedBlockType is returned when a reserved block type is found.
38 // Typically this indicates wrong or corrupted input.
39 ErrReservedBlockType = errors.New("invalid input: reserved block type encountered")
40
41 // ErrCompressedSizeTooBig is returned when a block is bigger than allowed.
42 // Typically this indicates wrong or corrupted input.
43 ErrCompressedSizeTooBig = errors.New("invalid input: compressed size too big")
44
45 // ErrBlockTooSmall is returned when a block is too small to be decoded.
46 // Typically returned on invalid input.
47 ErrBlockTooSmall = errors.New("block too small")
48
49 // ErrMagicMismatch is returned when a "magic" number isn't what is expected.
50 // Typically this indicates wrong or corrupted input.
51 ErrMagicMismatch = errors.New("invalid input: magic number mismatch")
52
53 // ErrWindowSizeExceeded is returned when a reference exceeds the valid window size.
54 // Typically this indicates wrong or corrupted input.
55 ErrWindowSizeExceeded = errors.New("window size exceeded")
56
57 // ErrWindowSizeTooSmall is returned when no window size is specified.
58 // Typically this indicates wrong or corrupted input.
59 ErrWindowSizeTooSmall = errors.New("invalid input: window size was too small")
60
61 // ErrDecoderSizeExceeded is returned if decompressed size exceeds the configured limit.
62 ErrDecoderSizeExceeded = errors.New("decompressed size exceeds configured limit")
63
64 // ErrUnknownDictionary is returned if the dictionary ID is unknown.
65 // For the time being dictionaries are not supported.
66 ErrUnknownDictionary = errors.New("unknown dictionary")
67
68 // ErrFrameSizeExceeded is returned if the stated frame size is exceeded.
69 // This is only returned if SingleSegment is specified on the frame.
70 ErrFrameSizeExceeded = errors.New("frame size exceeded")
71
72 // ErrCRCMismatch is returned if CRC mismatches.
73 ErrCRCMismatch = errors.New("CRC check failed")
74
75 // ErrDecoderClosed will be returned if the Decoder was used after
76 // Close has been called.
77 ErrDecoderClosed = errors.New("decoder used after Close")
David K. Bainbridgebd6b2882021-08-26 13:31:02 +000078
79 // ErrDecoderNilInput is returned when a nil Reader was provided
80 // and an operation other than Reset/DecodeAll/Close was attempted.
81 ErrDecoderNilInput = errors.New("nil input provided as reader")
Scott Bakered4efab2020-01-13 19:12:25 -080082)
83
84func println(a ...interface{}) {
85 if debug {
86 log.Println(a...)
87 }
88}
89
90func printf(format string, a ...interface{}) {
91 if debug {
92 log.Printf(format, a...)
93 }
94}
95
David K. Bainbridgebd6b2882021-08-26 13:31:02 +000096// matchLenFast does matching, but will not match the last up to 7 bytes.
97func matchLenFast(a, b []byte) int {
98 endI := len(a) & (math.MaxInt32 - 7)
99 for i := 0; i < endI; i += 8 {
100 if diff := load64(a, i) ^ load64(b, i); diff != 0 {
101 return i + bits.TrailingZeros64(diff)>>3
102 }
103 }
104 return endI
105}
106
Scott Bakered4efab2020-01-13 19:12:25 -0800107// matchLen returns the maximum length.
108// a must be the shortest of the two.
109// The function also returns whether all bytes matched.
110func matchLen(a, b []byte) int {
111 b = b[:len(a)]
112 for i := 0; i < len(a)-7; i += 8 {
113 if diff := load64(a, i) ^ load64(b, i); diff != 0 {
114 return i + (bits.TrailingZeros64(diff) >> 3)
115 }
116 }
David K. Bainbridgebd6b2882021-08-26 13:31:02 +0000117
Scott Bakered4efab2020-01-13 19:12:25 -0800118 checked := (len(a) >> 3) << 3
119 a = a[checked:]
120 b = b[checked:]
Scott Bakered4efab2020-01-13 19:12:25 -0800121 for i := range a {
122 if a[i] != b[i] {
David K. Bainbridgebd6b2882021-08-26 13:31:02 +0000123 return i + checked
Scott Bakered4efab2020-01-13 19:12:25 -0800124 }
125 }
126 return len(a) + checked
127}
128
Scott Bakered4efab2020-01-13 19:12:25 -0800129func load3232(b []byte, i int32) uint32 {
David K. Bainbridgebd6b2882021-08-26 13:31:02 +0000130 return binary.LittleEndian.Uint32(b[i:])
Scott Bakered4efab2020-01-13 19:12:25 -0800131}
132
133func load6432(b []byte, i int32) uint64 {
David K. Bainbridgebd6b2882021-08-26 13:31:02 +0000134 return binary.LittleEndian.Uint64(b[i:])
Scott Bakered4efab2020-01-13 19:12:25 -0800135}
136
137func load64(b []byte, i int) uint64 {
David K. Bainbridgebd6b2882021-08-26 13:31:02 +0000138 return binary.LittleEndian.Uint64(b[i:])
Scott Bakered4efab2020-01-13 19:12:25 -0800139}
David K. Bainbridgebd6b2882021-08-26 13:31:02 +0000140
141type byter interface {
142 Bytes() []byte
143 Len() int
144}
145
146var _ byter = &bytes.Buffer{}