blob: 426b9cac7869e2ee448fa1659dabea50f2cffc04 [file] [log] [blame]
khenaidoo106c61a2021-08-11 18:05:46 -04001// Package xxhash implements the 64-bit variant of xxHash (XXH64) as described
2// at http://cyan4973.github.io/xxHash/.
3// THIS IS VENDORED: Go to github.com/cespare/xxhash for original package.
4
5package xxhash
6
7import (
8 "encoding/binary"
9 "errors"
10 "math/bits"
11)
12
13const (
14 prime1 uint64 = 11400714785074694791
15 prime2 uint64 = 14029467366897019727
16 prime3 uint64 = 1609587929392839161
17 prime4 uint64 = 9650029242287828579
18 prime5 uint64 = 2870177450012600261
19)
20
21// NOTE(caleb): I'm using both consts and vars of the primes. Using consts where
22// possible in the Go code is worth a small (but measurable) performance boost
23// by avoiding some MOVQs. Vars are needed for the asm and also are useful for
24// convenience in the Go code in a few places where we need to intentionally
25// avoid constant arithmetic (e.g., v1 := prime1 + prime2 fails because the
26// result overflows a uint64).
27var (
28 prime1v = prime1
29 prime2v = prime2
30 prime3v = prime3
31 prime4v = prime4
32 prime5v = prime5
33)
34
35// Digest implements hash.Hash64.
36type Digest struct {
37 v1 uint64
38 v2 uint64
39 v3 uint64
40 v4 uint64
41 total uint64
42 mem [32]byte
43 n int // how much of mem is used
44}
45
46// New creates a new Digest that computes the 64-bit xxHash algorithm.
47func New() *Digest {
48 var d Digest
49 d.Reset()
50 return &d
51}
52
53// Reset clears the Digest's state so that it can be reused.
54func (d *Digest) Reset() {
55 d.v1 = prime1v + prime2
56 d.v2 = prime2
57 d.v3 = 0
58 d.v4 = -prime1v
59 d.total = 0
60 d.n = 0
61}
62
63// Size always returns 8 bytes.
64func (d *Digest) Size() int { return 8 }
65
66// BlockSize always returns 32 bytes.
67func (d *Digest) BlockSize() int { return 32 }
68
69// Write adds more data to d. It always returns len(b), nil.
70func (d *Digest) Write(b []byte) (n int, err error) {
71 n = len(b)
72 d.total += uint64(n)
73
74 if d.n+n < 32 {
75 // This new data doesn't even fill the current block.
76 copy(d.mem[d.n:], b)
77 d.n += n
78 return
79 }
80
81 if d.n > 0 {
82 // Finish off the partial block.
83 copy(d.mem[d.n:], b)
84 d.v1 = round(d.v1, u64(d.mem[0:8]))
85 d.v2 = round(d.v2, u64(d.mem[8:16]))
86 d.v3 = round(d.v3, u64(d.mem[16:24]))
87 d.v4 = round(d.v4, u64(d.mem[24:32]))
88 b = b[32-d.n:]
89 d.n = 0
90 }
91
92 if len(b) >= 32 {
93 // One or more full blocks left.
94 nw := writeBlocks(d, b)
95 b = b[nw:]
96 }
97
98 // Store any remaining partial block.
99 copy(d.mem[:], b)
100 d.n = len(b)
101
102 return
103}
104
105// Sum appends the current hash to b and returns the resulting slice.
106func (d *Digest) Sum(b []byte) []byte {
107 s := d.Sum64()
108 return append(
109 b,
110 byte(s>>56),
111 byte(s>>48),
112 byte(s>>40),
113 byte(s>>32),
114 byte(s>>24),
115 byte(s>>16),
116 byte(s>>8),
117 byte(s),
118 )
119}
120
121// Sum64 returns the current hash.
122func (d *Digest) Sum64() uint64 {
123 var h uint64
124
125 if d.total >= 32 {
126 v1, v2, v3, v4 := d.v1, d.v2, d.v3, d.v4
127 h = rol1(v1) + rol7(v2) + rol12(v3) + rol18(v4)
128 h = mergeRound(h, v1)
129 h = mergeRound(h, v2)
130 h = mergeRound(h, v3)
131 h = mergeRound(h, v4)
132 } else {
133 h = d.v3 + prime5
134 }
135
136 h += d.total
137
138 i, end := 0, d.n
139 for ; i+8 <= end; i += 8 {
140 k1 := round(0, u64(d.mem[i:i+8]))
141 h ^= k1
142 h = rol27(h)*prime1 + prime4
143 }
144 if i+4 <= end {
145 h ^= uint64(u32(d.mem[i:i+4])) * prime1
146 h = rol23(h)*prime2 + prime3
147 i += 4
148 }
149 for i < end {
150 h ^= uint64(d.mem[i]) * prime5
151 h = rol11(h) * prime1
152 i++
153 }
154
155 h ^= h >> 33
156 h *= prime2
157 h ^= h >> 29
158 h *= prime3
159 h ^= h >> 32
160
161 return h
162}
163
164const (
165 magic = "xxh\x06"
166 marshaledSize = len(magic) + 8*5 + 32
167)
168
169// MarshalBinary implements the encoding.BinaryMarshaler interface.
170func (d *Digest) MarshalBinary() ([]byte, error) {
171 b := make([]byte, 0, marshaledSize)
172 b = append(b, magic...)
173 b = appendUint64(b, d.v1)
174 b = appendUint64(b, d.v2)
175 b = appendUint64(b, d.v3)
176 b = appendUint64(b, d.v4)
177 b = appendUint64(b, d.total)
178 b = append(b, d.mem[:d.n]...)
179 b = b[:len(b)+len(d.mem)-d.n]
180 return b, nil
181}
182
183// UnmarshalBinary implements the encoding.BinaryUnmarshaler interface.
184func (d *Digest) UnmarshalBinary(b []byte) error {
185 if len(b) < len(magic) || string(b[:len(magic)]) != magic {
186 return errors.New("xxhash: invalid hash state identifier")
187 }
188 if len(b) != marshaledSize {
189 return errors.New("xxhash: invalid hash state size")
190 }
191 b = b[len(magic):]
192 b, d.v1 = consumeUint64(b)
193 b, d.v2 = consumeUint64(b)
194 b, d.v3 = consumeUint64(b)
195 b, d.v4 = consumeUint64(b)
196 b, d.total = consumeUint64(b)
197 copy(d.mem[:], b)
198 b = b[len(d.mem):]
199 d.n = int(d.total % uint64(len(d.mem)))
200 return nil
201}
202
203func appendUint64(b []byte, x uint64) []byte {
204 var a [8]byte
205 binary.LittleEndian.PutUint64(a[:], x)
206 return append(b, a[:]...)
207}
208
209func consumeUint64(b []byte) ([]byte, uint64) {
210 x := u64(b)
211 return b[8:], x
212}
213
214func u64(b []byte) uint64 { return binary.LittleEndian.Uint64(b) }
215func u32(b []byte) uint32 { return binary.LittleEndian.Uint32(b) }
216
217func round(acc, input uint64) uint64 {
218 acc += input * prime2
219 acc = rol31(acc)
220 acc *= prime1
221 return acc
222}
223
224func mergeRound(acc, val uint64) uint64 {
225 val = round(0, val)
226 acc ^= val
227 acc = acc*prime1 + prime4
228 return acc
229}
230
231func rol1(x uint64) uint64 { return bits.RotateLeft64(x, 1) }
232func rol7(x uint64) uint64 { return bits.RotateLeft64(x, 7) }
233func rol11(x uint64) uint64 { return bits.RotateLeft64(x, 11) }
234func rol12(x uint64) uint64 { return bits.RotateLeft64(x, 12) }
235func rol18(x uint64) uint64 { return bits.RotateLeft64(x, 18) }
236func rol23(x uint64) uint64 { return bits.RotateLeft64(x, 23) }
237func rol27(x uint64) uint64 { return bits.RotateLeft64(x, 27) }
238func rol31(x uint64) uint64 { return bits.RotateLeft64(x, 31) }