blob: db0b35fbe39f00202dcb9ed6b0e890c05fe3992c [file] [log] [blame]
Joey Armstronge8c091f2023-01-17 16:56:26 -05001// Package xxhash implements the 64-bit variant of xxHash (XXH64) as described
2// at http://cyan4973.github.io/xxHash/.
3package xxhash
4
5import (
6 "encoding/binary"
7 "errors"
8 "math/bits"
9)
10
11const (
12 prime1 uint64 = 11400714785074694791
13 prime2 uint64 = 14029467366897019727
14 prime3 uint64 = 1609587929392839161
15 prime4 uint64 = 9650029242287828579
16 prime5 uint64 = 2870177450012600261
17)
18
19// NOTE(caleb): I'm using both consts and vars of the primes. Using consts where
20// possible in the Go code is worth a small (but measurable) performance boost
21// by avoiding some MOVQs. Vars are needed for the asm and also are useful for
22// convenience in the Go code in a few places where we need to intentionally
23// avoid constant arithmetic (e.g., v1 := prime1 + prime2 fails because the
24// result overflows a uint64).
25var (
26 prime1v = prime1
27 prime2v = prime2
28 prime3v = prime3
29 prime4v = prime4
30 prime5v = prime5
31)
32
33// Digest implements hash.Hash64.
34type Digest struct {
35 v1 uint64
36 v2 uint64
37 v3 uint64
38 v4 uint64
39 total uint64
40 mem [32]byte
41 n int // how much of mem is used
42}
43
44// New creates a new Digest that computes the 64-bit xxHash algorithm.
45func New() *Digest {
46 var d Digest
47 d.Reset()
48 return &d
49}
50
51// Reset clears the Digest's state so that it can be reused.
52func (d *Digest) Reset() {
53 d.v1 = prime1v + prime2
54 d.v2 = prime2
55 d.v3 = 0
56 d.v4 = -prime1v
57 d.total = 0
58 d.n = 0
59}
60
61// Size always returns 8 bytes.
62func (d *Digest) Size() int { return 8 }
63
64// BlockSize always returns 32 bytes.
65func (d *Digest) BlockSize() int { return 32 }
66
67// Write adds more data to d. It always returns len(b), nil.
68func (d *Digest) Write(b []byte) (n int, err error) {
69 n = len(b)
70 d.total += uint64(n)
71
72 if d.n+n < 32 {
73 // This new data doesn't even fill the current block.
74 copy(d.mem[d.n:], b)
75 d.n += n
76 return
77 }
78
79 if d.n > 0 {
80 // Finish off the partial block.
81 copy(d.mem[d.n:], b)
82 d.v1 = round(d.v1, u64(d.mem[0:8]))
83 d.v2 = round(d.v2, u64(d.mem[8:16]))
84 d.v3 = round(d.v3, u64(d.mem[16:24]))
85 d.v4 = round(d.v4, u64(d.mem[24:32]))
86 b = b[32-d.n:]
87 d.n = 0
88 }
89
90 if len(b) >= 32 {
91 // One or more full blocks left.
92 nw := writeBlocks(d, b)
93 b = b[nw:]
94 }
95
96 // Store any remaining partial block.
97 copy(d.mem[:], b)
98 d.n = len(b)
99
100 return
101}
102
103// Sum appends the current hash to b and returns the resulting slice.
104func (d *Digest) Sum(b []byte) []byte {
105 s := d.Sum64()
106 return append(
107 b,
108 byte(s>>56),
109 byte(s>>48),
110 byte(s>>40),
111 byte(s>>32),
112 byte(s>>24),
113 byte(s>>16),
114 byte(s>>8),
115 byte(s),
116 )
117}
118
119// Sum64 returns the current hash.
120func (d *Digest) Sum64() uint64 {
121 var h uint64
122
123 if d.total >= 32 {
124 v1, v2, v3, v4 := d.v1, d.v2, d.v3, d.v4
125 h = rol1(v1) + rol7(v2) + rol12(v3) + rol18(v4)
126 h = mergeRound(h, v1)
127 h = mergeRound(h, v2)
128 h = mergeRound(h, v3)
129 h = mergeRound(h, v4)
130 } else {
131 h = d.v3 + prime5
132 }
133
134 h += d.total
135
136 i, end := 0, d.n
137 for ; i+8 <= end; i += 8 {
138 k1 := round(0, u64(d.mem[i:i+8]))
139 h ^= k1
140 h = rol27(h)*prime1 + prime4
141 }
142 if i+4 <= end {
143 h ^= uint64(u32(d.mem[i:i+4])) * prime1
144 h = rol23(h)*prime2 + prime3
145 i += 4
146 }
147 for i < end {
148 h ^= uint64(d.mem[i]) * prime5
149 h = rol11(h) * prime1
150 i++
151 }
152
153 h ^= h >> 33
154 h *= prime2
155 h ^= h >> 29
156 h *= prime3
157 h ^= h >> 32
158
159 return h
160}
161
162const (
163 magic = "xxh\x06"
164 marshaledSize = len(magic) + 8*5 + 32
165)
166
167// MarshalBinary implements the encoding.BinaryMarshaler interface.
168func (d *Digest) MarshalBinary() ([]byte, error) {
169 b := make([]byte, 0, marshaledSize)
170 b = append(b, magic...)
171 b = appendUint64(b, d.v1)
172 b = appendUint64(b, d.v2)
173 b = appendUint64(b, d.v3)
174 b = appendUint64(b, d.v4)
175 b = appendUint64(b, d.total)
176 b = append(b, d.mem[:d.n]...)
177 b = b[:len(b)+len(d.mem)-d.n]
178 return b, nil
179}
180
181// UnmarshalBinary implements the encoding.BinaryUnmarshaler interface.
182func (d *Digest) UnmarshalBinary(b []byte) error {
183 if len(b) < len(magic) || string(b[:len(magic)]) != magic {
184 return errors.New("xxhash: invalid hash state identifier")
185 }
186 if len(b) != marshaledSize {
187 return errors.New("xxhash: invalid hash state size")
188 }
189 b = b[len(magic):]
190 b, d.v1 = consumeUint64(b)
191 b, d.v2 = consumeUint64(b)
192 b, d.v3 = consumeUint64(b)
193 b, d.v4 = consumeUint64(b)
194 b, d.total = consumeUint64(b)
195 copy(d.mem[:], b)
196 b = b[len(d.mem):]
197 d.n = int(d.total % uint64(len(d.mem)))
198 return nil
199}
200
201func appendUint64(b []byte, x uint64) []byte {
202 var a [8]byte
203 binary.LittleEndian.PutUint64(a[:], x)
204 return append(b, a[:]...)
205}
206
207func consumeUint64(b []byte) ([]byte, uint64) {
208 x := u64(b)
209 return b[8:], x
210}
211
212func u64(b []byte) uint64 { return binary.LittleEndian.Uint64(b) }
213func u32(b []byte) uint32 { return binary.LittleEndian.Uint32(b) }
214
215func round(acc, input uint64) uint64 {
216 acc += input * prime2
217 acc = rol31(acc)
218 acc *= prime1
219 return acc
220}
221
222func mergeRound(acc, val uint64) uint64 {
223 val = round(0, val)
224 acc ^= val
225 acc = acc*prime1 + prime4
226 return acc
227}
228
229func rol1(x uint64) uint64 { return bits.RotateLeft64(x, 1) }
230func rol7(x uint64) uint64 { return bits.RotateLeft64(x, 7) }
231func rol11(x uint64) uint64 { return bits.RotateLeft64(x, 11) }
232func rol12(x uint64) uint64 { return bits.RotateLeft64(x, 12) }
233func rol18(x uint64) uint64 { return bits.RotateLeft64(x, 18) }
234func rol23(x uint64) uint64 { return bits.RotateLeft64(x, 23) }
235func rol27(x uint64) uint64 { return bits.RotateLeft64(x, 27) }
236func rol31(x uint64) uint64 { return bits.RotateLeft64(x, 31) }