blob: 7598c1018bded97034973beec588649ea6588116 [file] [log] [blame]
Akash Reddy Kankanalac28f0e22025-06-16 11:00:55 +05301//go:build amd64 && !appengine && !noasm && gc
2// +build amd64,!appengine,!noasm,gc
3
4package zstd
5
6import (
7 "fmt"
8
9 "github.com/klauspost/compress/internal/cpuinfo"
10)
11
12type decodeSyncAsmContext struct {
13 llTable []decSymbol
14 mlTable []decSymbol
15 ofTable []decSymbol
16 llState uint64
17 mlState uint64
18 ofState uint64
19 iteration int
20 litRemain int
21 out []byte
22 outPosition int
23 literals []byte
24 litPosition int
25 history []byte
26 windowSize int
27 ll int // set on error (not for all errors, please refer to _generate/gen.go)
28 ml int // set on error (not for all errors, please refer to _generate/gen.go)
29 mo int // set on error (not for all errors, please refer to _generate/gen.go)
30}
31
32// sequenceDecs_decodeSync_amd64 implements the main loop of sequenceDecs.decodeSync in x86 asm.
33//
34// Please refer to seqdec_generic.go for the reference implementation.
35//go:noescape
36func sequenceDecs_decodeSync_amd64(s *sequenceDecs, br *bitReader, ctx *decodeSyncAsmContext) int
37
38// sequenceDecs_decodeSync_bmi2 implements the main loop of sequenceDecs.decodeSync in x86 asm with BMI2 extensions.
39//go:noescape
40func sequenceDecs_decodeSync_bmi2(s *sequenceDecs, br *bitReader, ctx *decodeSyncAsmContext) int
41
42// sequenceDecs_decodeSync_safe_amd64 does the same as above, but does not write more than output buffer.
43//go:noescape
44func sequenceDecs_decodeSync_safe_amd64(s *sequenceDecs, br *bitReader, ctx *decodeSyncAsmContext) int
45
46// sequenceDecs_decodeSync_safe_bmi2 does the same as above, but does not write more than output buffer.
47//go:noescape
48func sequenceDecs_decodeSync_safe_bmi2(s *sequenceDecs, br *bitReader, ctx *decodeSyncAsmContext) int
49
50// decode sequences from the stream with the provided history but without a dictionary.
51func (s *sequenceDecs) decodeSyncSimple(hist []byte) (bool, error) {
52 if len(s.dict) > 0 {
53 return false, nil
54 }
55 if s.maxSyncLen == 0 && cap(s.out)-len(s.out) < maxCompressedBlockSize {
56 return false, nil
57 }
58
59 // FIXME: Using unsafe memory copies leads to rare, random crashes
60 // with fuzz testing. It is therefore disabled for now.
61 const useSafe = true
62 /*
63 useSafe := false
64 if s.maxSyncLen == 0 && cap(s.out)-len(s.out) < maxCompressedBlockSizeAlloc {
65 useSafe = true
66 }
67 if s.maxSyncLen > 0 && cap(s.out)-len(s.out)-compressedBlockOverAlloc < int(s.maxSyncLen) {
68 useSafe = true
69 }
70 if cap(s.literals) < len(s.literals)+compressedBlockOverAlloc {
71 useSafe = true
72 }
73 */
74
75 br := s.br
76
77 maxBlockSize := maxCompressedBlockSize
78 if s.windowSize < maxBlockSize {
79 maxBlockSize = s.windowSize
80 }
81
82 ctx := decodeSyncAsmContext{
83 llTable: s.litLengths.fse.dt[:maxTablesize],
84 mlTable: s.matchLengths.fse.dt[:maxTablesize],
85 ofTable: s.offsets.fse.dt[:maxTablesize],
86 llState: uint64(s.litLengths.state.state),
87 mlState: uint64(s.matchLengths.state.state),
88 ofState: uint64(s.offsets.state.state),
89 iteration: s.nSeqs - 1,
90 litRemain: len(s.literals),
91 out: s.out,
92 outPosition: len(s.out),
93 literals: s.literals,
94 windowSize: s.windowSize,
95 history: hist,
96 }
97
98 s.seqSize = 0
99 startSize := len(s.out)
100
101 var errCode int
102 if cpuinfo.HasBMI2() {
103 if useSafe {
104 errCode = sequenceDecs_decodeSync_safe_bmi2(s, br, &ctx)
105 } else {
106 errCode = sequenceDecs_decodeSync_bmi2(s, br, &ctx)
107 }
108 } else {
109 if useSafe {
110 errCode = sequenceDecs_decodeSync_safe_amd64(s, br, &ctx)
111 } else {
112 errCode = sequenceDecs_decodeSync_amd64(s, br, &ctx)
113 }
114 }
115 switch errCode {
116 case noError:
117 break
118
119 case errorMatchLenOfsMismatch:
120 return true, fmt.Errorf("zero matchoff and matchlen (%d) > 0", ctx.ml)
121
122 case errorMatchLenTooBig:
123 return true, fmt.Errorf("match len (%d) bigger than max allowed length", ctx.ml)
124
125 case errorMatchOffTooBig:
126 return true, fmt.Errorf("match offset (%d) bigger than current history (%d)",
127 ctx.mo, ctx.outPosition+len(hist)-startSize)
128
129 case errorNotEnoughLiterals:
130 return true, fmt.Errorf("unexpected literal count, want %d bytes, but only %d is available",
131 ctx.ll, ctx.litRemain+ctx.ll)
132
133 case errorNotEnoughSpace:
134 size := ctx.outPosition + ctx.ll + ctx.ml
135 if debugDecoder {
136 println("msl:", s.maxSyncLen, "cap", cap(s.out), "bef:", startSize, "sz:", size-startSize, "mbs:", maxBlockSize, "outsz:", cap(s.out)-startSize)
137 }
138 return true, fmt.Errorf("output (%d) bigger than max block size (%d)", size-startSize, maxBlockSize)
139
140 default:
141 return true, fmt.Errorf("sequenceDecs_decode returned erronous code %d", errCode)
142 }
143
144 s.seqSize += ctx.litRemain
145 if s.seqSize > maxBlockSize {
146 return true, fmt.Errorf("output (%d) bigger than max block size (%d)", s.seqSize, maxBlockSize)
147 }
148 err := br.close()
149 if err != nil {
150 printf("Closing sequences: %v, %+v\n", err, *br)
151 return true, err
152 }
153
154 s.literals = s.literals[ctx.litPosition:]
155 t := ctx.outPosition
156 s.out = s.out[:t]
157
158 // Add final literals
159 s.out = append(s.out, s.literals...)
160 if debugDecoder {
161 t += len(s.literals)
162 if t != len(s.out) {
163 panic(fmt.Errorf("length mismatch, want %d, got %d", len(s.out), t))
164 }
165 }
166
167 return true, nil
168}
169
170// --------------------------------------------------------------------------------
171
172type decodeAsmContext struct {
173 llTable []decSymbol
174 mlTable []decSymbol
175 ofTable []decSymbol
176 llState uint64
177 mlState uint64
178 ofState uint64
179 iteration int
180 seqs []seqVals
181 litRemain int
182}
183
184const noError = 0
185
186// error reported when mo == 0 && ml > 0
187const errorMatchLenOfsMismatch = 1
188
189// error reported when ml > maxMatchLen
190const errorMatchLenTooBig = 2
191
192// error reported when mo > available history or mo > s.windowSize
193const errorMatchOffTooBig = 3
194
195// error reported when the sum of literal lengths exeeceds the literal buffer size
196const errorNotEnoughLiterals = 4
197
198// error reported when capacity of `out` is too small
199const errorNotEnoughSpace = 5
200
201// sequenceDecs_decode implements the main loop of sequenceDecs in x86 asm.
202//
203// Please refer to seqdec_generic.go for the reference implementation.
204//go:noescape
205func sequenceDecs_decode_amd64(s *sequenceDecs, br *bitReader, ctx *decodeAsmContext) int
206
207// sequenceDecs_decode implements the main loop of sequenceDecs in x86 asm.
208//
209// Please refer to seqdec_generic.go for the reference implementation.
210//go:noescape
211func sequenceDecs_decode_56_amd64(s *sequenceDecs, br *bitReader, ctx *decodeAsmContext) int
212
213// sequenceDecs_decode implements the main loop of sequenceDecs in x86 asm with BMI2 extensions.
214//go:noescape
215func sequenceDecs_decode_bmi2(s *sequenceDecs, br *bitReader, ctx *decodeAsmContext) int
216
217// sequenceDecs_decode implements the main loop of sequenceDecs in x86 asm with BMI2 extensions.
218//go:noescape
219func sequenceDecs_decode_56_bmi2(s *sequenceDecs, br *bitReader, ctx *decodeAsmContext) int
220
221// decode sequences from the stream without the provided history.
222func (s *sequenceDecs) decode(seqs []seqVals) error {
223 br := s.br
224
225 maxBlockSize := maxCompressedBlockSize
226 if s.windowSize < maxBlockSize {
227 maxBlockSize = s.windowSize
228 }
229
230 ctx := decodeAsmContext{
231 llTable: s.litLengths.fse.dt[:maxTablesize],
232 mlTable: s.matchLengths.fse.dt[:maxTablesize],
233 ofTable: s.offsets.fse.dt[:maxTablesize],
234 llState: uint64(s.litLengths.state.state),
235 mlState: uint64(s.matchLengths.state.state),
236 ofState: uint64(s.offsets.state.state),
237 seqs: seqs,
238 iteration: len(seqs) - 1,
239 litRemain: len(s.literals),
240 }
241
242 s.seqSize = 0
243 lte56bits := s.maxBits+s.offsets.fse.actualTableLog+s.matchLengths.fse.actualTableLog+s.litLengths.fse.actualTableLog <= 56
244 var errCode int
245 if cpuinfo.HasBMI2() {
246 if lte56bits {
247 errCode = sequenceDecs_decode_56_bmi2(s, br, &ctx)
248 } else {
249 errCode = sequenceDecs_decode_bmi2(s, br, &ctx)
250 }
251 } else {
252 if lte56bits {
253 errCode = sequenceDecs_decode_56_amd64(s, br, &ctx)
254 } else {
255 errCode = sequenceDecs_decode_amd64(s, br, &ctx)
256 }
257 }
258 if errCode != 0 {
259 i := len(seqs) - ctx.iteration - 1
260 switch errCode {
261 case errorMatchLenOfsMismatch:
262 ml := ctx.seqs[i].ml
263 return fmt.Errorf("zero matchoff and matchlen (%d) > 0", ml)
264
265 case errorMatchLenTooBig:
266 ml := ctx.seqs[i].ml
267 return fmt.Errorf("match len (%d) bigger than max allowed length", ml)
268
269 case errorNotEnoughLiterals:
270 ll := ctx.seqs[i].ll
271 return fmt.Errorf("unexpected literal count, want %d bytes, but only %d is available", ll, ctx.litRemain+ll)
272 }
273
274 return fmt.Errorf("sequenceDecs_decode_amd64 returned erronous code %d", errCode)
275 }
276
277 if ctx.litRemain < 0 {
278 return fmt.Errorf("literal count is too big: total available %d, total requested %d",
279 len(s.literals), len(s.literals)-ctx.litRemain)
280 }
281
282 s.seqSize += ctx.litRemain
283 if s.seqSize > maxBlockSize {
284 return fmt.Errorf("output (%d) bigger than max block size (%d)", s.seqSize, maxBlockSize)
285 }
286 err := br.close()
287 if err != nil {
288 printf("Closing sequences: %v, %+v\n", err, *br)
289 }
290 return err
291}
292
293// --------------------------------------------------------------------------------
294
295type executeAsmContext struct {
296 seqs []seqVals
297 seqIndex int
298 out []byte
299 history []byte
300 literals []byte
301 outPosition int
302 litPosition int
303 windowSize int
304}
305
306// sequenceDecs_executeSimple_amd64 implements the main loop of sequenceDecs.executeSimple in x86 asm.
307//
308// Returns false if a match offset is too big.
309//
310// Please refer to seqdec_generic.go for the reference implementation.
311//go:noescape
312func sequenceDecs_executeSimple_amd64(ctx *executeAsmContext) bool
313
314// Same as above, but with safe memcopies
315//go:noescape
316func sequenceDecs_executeSimple_safe_amd64(ctx *executeAsmContext) bool
317
318// executeSimple handles cases when dictionary is not used.
319func (s *sequenceDecs) executeSimple(seqs []seqVals, hist []byte) error {
320 // Ensure we have enough output size...
321 if len(s.out)+s.seqSize+compressedBlockOverAlloc > cap(s.out) {
322 addBytes := s.seqSize + len(s.out) + compressedBlockOverAlloc
323 s.out = append(s.out, make([]byte, addBytes)...)
324 s.out = s.out[:len(s.out)-addBytes]
325 }
326
327 if debugDecoder {
328 printf("Execute %d seqs with literals: %d into %d bytes\n", len(seqs), len(s.literals), s.seqSize)
329 }
330
331 var t = len(s.out)
332 out := s.out[:t+s.seqSize]
333
334 ctx := executeAsmContext{
335 seqs: seqs,
336 seqIndex: 0,
337 out: out,
338 history: hist,
339 outPosition: t,
340 litPosition: 0,
341 literals: s.literals,
342 windowSize: s.windowSize,
343 }
344 var ok bool
345 if cap(s.literals) < len(s.literals)+compressedBlockOverAlloc {
346 ok = sequenceDecs_executeSimple_safe_amd64(&ctx)
347 } else {
348 ok = sequenceDecs_executeSimple_amd64(&ctx)
349 }
350 if !ok {
351 return fmt.Errorf("match offset (%d) bigger than current history (%d)",
352 seqs[ctx.seqIndex].mo, ctx.outPosition+len(hist))
353 }
354 s.literals = s.literals[ctx.litPosition:]
355 t = ctx.outPosition
356
357 // Add final literals
358 copy(out[t:], s.literals)
359 if debugDecoder {
360 t += len(s.literals)
361 if t != len(out) {
362 panic(fmt.Errorf("length mismatch, want %d, got %d, ss: %d", len(out), t, s.seqSize))
363 }
364 }
365 s.out = out
366
367 return nil
368}