Joey Armstrong | a6af152 | 2023-01-17 16:06:16 -0500 | [diff] [blame] | 1 | // Package xxhash implements the 64-bit variant of xxHash (XXH64) as described |
| 2 | // at http://cyan4973.github.io/xxHash/. |
| 3 | package xxhash |
| 4 | |
| 5 | import ( |
| 6 | "encoding/binary" |
| 7 | "errors" |
| 8 | "math/bits" |
| 9 | ) |
| 10 | |
| 11 | const ( |
| 12 | prime1 uint64 = 11400714785074694791 |
| 13 | prime2 uint64 = 14029467366897019727 |
| 14 | prime3 uint64 = 1609587929392839161 |
| 15 | prime4 uint64 = 9650029242287828579 |
| 16 | prime5 uint64 = 2870177450012600261 |
| 17 | ) |
| 18 | |
Akash Reddy Kankanala | c6b6ca1 | 2025-06-12 14:26:57 +0530 | [diff] [blame^] | 19 | // Store the primes in an array as well. |
| 20 | // |
| 21 | // The consts are used when possible in Go code to avoid MOVs but we need a |
| 22 | // contiguous array of the assembly code. |
| 23 | var primes = [...]uint64{prime1, prime2, prime3, prime4, prime5} |
Joey Armstrong | a6af152 | 2023-01-17 16:06:16 -0500 | [diff] [blame] | 24 | |
| 25 | // Digest implements hash.Hash64. |
| 26 | type Digest struct { |
| 27 | v1 uint64 |
| 28 | v2 uint64 |
| 29 | v3 uint64 |
| 30 | v4 uint64 |
| 31 | total uint64 |
| 32 | mem [32]byte |
| 33 | n int // how much of mem is used |
| 34 | } |
| 35 | |
| 36 | // New creates a new Digest that computes the 64-bit xxHash algorithm. |
| 37 | func New() *Digest { |
| 38 | var d Digest |
| 39 | d.Reset() |
| 40 | return &d |
| 41 | } |
| 42 | |
| 43 | // Reset clears the Digest's state so that it can be reused. |
| 44 | func (d *Digest) Reset() { |
Akash Reddy Kankanala | c6b6ca1 | 2025-06-12 14:26:57 +0530 | [diff] [blame^] | 45 | d.v1 = primes[0] + prime2 |
Joey Armstrong | a6af152 | 2023-01-17 16:06:16 -0500 | [diff] [blame] | 46 | d.v2 = prime2 |
| 47 | d.v3 = 0 |
Akash Reddy Kankanala | c6b6ca1 | 2025-06-12 14:26:57 +0530 | [diff] [blame^] | 48 | d.v4 = -primes[0] |
Joey Armstrong | a6af152 | 2023-01-17 16:06:16 -0500 | [diff] [blame] | 49 | d.total = 0 |
| 50 | d.n = 0 |
| 51 | } |
| 52 | |
| 53 | // Size always returns 8 bytes. |
| 54 | func (d *Digest) Size() int { return 8 } |
| 55 | |
| 56 | // BlockSize always returns 32 bytes. |
| 57 | func (d *Digest) BlockSize() int { return 32 } |
| 58 | |
| 59 | // Write adds more data to d. It always returns len(b), nil. |
| 60 | func (d *Digest) Write(b []byte) (n int, err error) { |
| 61 | n = len(b) |
| 62 | d.total += uint64(n) |
| 63 | |
Akash Reddy Kankanala | c6b6ca1 | 2025-06-12 14:26:57 +0530 | [diff] [blame^] | 64 | memleft := d.mem[d.n&(len(d.mem)-1):] |
| 65 | |
Joey Armstrong | a6af152 | 2023-01-17 16:06:16 -0500 | [diff] [blame] | 66 | if d.n+n < 32 { |
| 67 | // This new data doesn't even fill the current block. |
Akash Reddy Kankanala | c6b6ca1 | 2025-06-12 14:26:57 +0530 | [diff] [blame^] | 68 | copy(memleft, b) |
Joey Armstrong | a6af152 | 2023-01-17 16:06:16 -0500 | [diff] [blame] | 69 | d.n += n |
| 70 | return |
| 71 | } |
| 72 | |
| 73 | if d.n > 0 { |
| 74 | // Finish off the partial block. |
Akash Reddy Kankanala | c6b6ca1 | 2025-06-12 14:26:57 +0530 | [diff] [blame^] | 75 | c := copy(memleft, b) |
Joey Armstrong | a6af152 | 2023-01-17 16:06:16 -0500 | [diff] [blame] | 76 | d.v1 = round(d.v1, u64(d.mem[0:8])) |
| 77 | d.v2 = round(d.v2, u64(d.mem[8:16])) |
| 78 | d.v3 = round(d.v3, u64(d.mem[16:24])) |
| 79 | d.v4 = round(d.v4, u64(d.mem[24:32])) |
Akash Reddy Kankanala | c6b6ca1 | 2025-06-12 14:26:57 +0530 | [diff] [blame^] | 80 | b = b[c:] |
Joey Armstrong | a6af152 | 2023-01-17 16:06:16 -0500 | [diff] [blame] | 81 | d.n = 0 |
| 82 | } |
| 83 | |
| 84 | if len(b) >= 32 { |
| 85 | // One or more full blocks left. |
| 86 | nw := writeBlocks(d, b) |
| 87 | b = b[nw:] |
| 88 | } |
| 89 | |
| 90 | // Store any remaining partial block. |
| 91 | copy(d.mem[:], b) |
| 92 | d.n = len(b) |
| 93 | |
| 94 | return |
| 95 | } |
| 96 | |
| 97 | // Sum appends the current hash to b and returns the resulting slice. |
| 98 | func (d *Digest) Sum(b []byte) []byte { |
| 99 | s := d.Sum64() |
| 100 | return append( |
| 101 | b, |
| 102 | byte(s>>56), |
| 103 | byte(s>>48), |
| 104 | byte(s>>40), |
| 105 | byte(s>>32), |
| 106 | byte(s>>24), |
| 107 | byte(s>>16), |
| 108 | byte(s>>8), |
| 109 | byte(s), |
| 110 | ) |
| 111 | } |
| 112 | |
| 113 | // Sum64 returns the current hash. |
| 114 | func (d *Digest) Sum64() uint64 { |
| 115 | var h uint64 |
| 116 | |
| 117 | if d.total >= 32 { |
| 118 | v1, v2, v3, v4 := d.v1, d.v2, d.v3, d.v4 |
| 119 | h = rol1(v1) + rol7(v2) + rol12(v3) + rol18(v4) |
| 120 | h = mergeRound(h, v1) |
| 121 | h = mergeRound(h, v2) |
| 122 | h = mergeRound(h, v3) |
| 123 | h = mergeRound(h, v4) |
| 124 | } else { |
| 125 | h = d.v3 + prime5 |
| 126 | } |
| 127 | |
| 128 | h += d.total |
| 129 | |
Akash Reddy Kankanala | c6b6ca1 | 2025-06-12 14:26:57 +0530 | [diff] [blame^] | 130 | b := d.mem[:d.n&(len(d.mem)-1)] |
| 131 | for ; len(b) >= 8; b = b[8:] { |
| 132 | k1 := round(0, u64(b[:8])) |
Joey Armstrong | a6af152 | 2023-01-17 16:06:16 -0500 | [diff] [blame] | 133 | h ^= k1 |
| 134 | h = rol27(h)*prime1 + prime4 |
| 135 | } |
Akash Reddy Kankanala | c6b6ca1 | 2025-06-12 14:26:57 +0530 | [diff] [blame^] | 136 | if len(b) >= 4 { |
| 137 | h ^= uint64(u32(b[:4])) * prime1 |
Joey Armstrong | a6af152 | 2023-01-17 16:06:16 -0500 | [diff] [blame] | 138 | h = rol23(h)*prime2 + prime3 |
Akash Reddy Kankanala | c6b6ca1 | 2025-06-12 14:26:57 +0530 | [diff] [blame^] | 139 | b = b[4:] |
Joey Armstrong | a6af152 | 2023-01-17 16:06:16 -0500 | [diff] [blame] | 140 | } |
Akash Reddy Kankanala | c6b6ca1 | 2025-06-12 14:26:57 +0530 | [diff] [blame^] | 141 | for ; len(b) > 0; b = b[1:] { |
| 142 | h ^= uint64(b[0]) * prime5 |
Joey Armstrong | a6af152 | 2023-01-17 16:06:16 -0500 | [diff] [blame] | 143 | h = rol11(h) * prime1 |
Joey Armstrong | a6af152 | 2023-01-17 16:06:16 -0500 | [diff] [blame] | 144 | } |
| 145 | |
| 146 | h ^= h >> 33 |
| 147 | h *= prime2 |
| 148 | h ^= h >> 29 |
| 149 | h *= prime3 |
| 150 | h ^= h >> 32 |
| 151 | |
| 152 | return h |
| 153 | } |
| 154 | |
| 155 | const ( |
| 156 | magic = "xxh\x06" |
| 157 | marshaledSize = len(magic) + 8*5 + 32 |
| 158 | ) |
| 159 | |
| 160 | // MarshalBinary implements the encoding.BinaryMarshaler interface. |
| 161 | func (d *Digest) MarshalBinary() ([]byte, error) { |
| 162 | b := make([]byte, 0, marshaledSize) |
| 163 | b = append(b, magic...) |
| 164 | b = appendUint64(b, d.v1) |
| 165 | b = appendUint64(b, d.v2) |
| 166 | b = appendUint64(b, d.v3) |
| 167 | b = appendUint64(b, d.v4) |
| 168 | b = appendUint64(b, d.total) |
| 169 | b = append(b, d.mem[:d.n]...) |
| 170 | b = b[:len(b)+len(d.mem)-d.n] |
| 171 | return b, nil |
| 172 | } |
| 173 | |
| 174 | // UnmarshalBinary implements the encoding.BinaryUnmarshaler interface. |
| 175 | func (d *Digest) UnmarshalBinary(b []byte) error { |
| 176 | if len(b) < len(magic) || string(b[:len(magic)]) != magic { |
| 177 | return errors.New("xxhash: invalid hash state identifier") |
| 178 | } |
| 179 | if len(b) != marshaledSize { |
| 180 | return errors.New("xxhash: invalid hash state size") |
| 181 | } |
| 182 | b = b[len(magic):] |
| 183 | b, d.v1 = consumeUint64(b) |
| 184 | b, d.v2 = consumeUint64(b) |
| 185 | b, d.v3 = consumeUint64(b) |
| 186 | b, d.v4 = consumeUint64(b) |
| 187 | b, d.total = consumeUint64(b) |
| 188 | copy(d.mem[:], b) |
Joey Armstrong | a6af152 | 2023-01-17 16:06:16 -0500 | [diff] [blame] | 189 | d.n = int(d.total % uint64(len(d.mem))) |
| 190 | return nil |
| 191 | } |
| 192 | |
| 193 | func appendUint64(b []byte, x uint64) []byte { |
| 194 | var a [8]byte |
| 195 | binary.LittleEndian.PutUint64(a[:], x) |
| 196 | return append(b, a[:]...) |
| 197 | } |
| 198 | |
| 199 | func consumeUint64(b []byte) ([]byte, uint64) { |
| 200 | x := u64(b) |
| 201 | return b[8:], x |
| 202 | } |
| 203 | |
| 204 | func u64(b []byte) uint64 { return binary.LittleEndian.Uint64(b) } |
| 205 | func u32(b []byte) uint32 { return binary.LittleEndian.Uint32(b) } |
| 206 | |
| 207 | func round(acc, input uint64) uint64 { |
| 208 | acc += input * prime2 |
| 209 | acc = rol31(acc) |
| 210 | acc *= prime1 |
| 211 | return acc |
| 212 | } |
| 213 | |
| 214 | func mergeRound(acc, val uint64) uint64 { |
| 215 | val = round(0, val) |
| 216 | acc ^= val |
| 217 | acc = acc*prime1 + prime4 |
| 218 | return acc |
| 219 | } |
| 220 | |
| 221 | func rol1(x uint64) uint64 { return bits.RotateLeft64(x, 1) } |
| 222 | func rol7(x uint64) uint64 { return bits.RotateLeft64(x, 7) } |
| 223 | func rol11(x uint64) uint64 { return bits.RotateLeft64(x, 11) } |
| 224 | func rol12(x uint64) uint64 { return bits.RotateLeft64(x, 12) } |
| 225 | func rol18(x uint64) uint64 { return bits.RotateLeft64(x, 18) } |
| 226 | func rol23(x uint64) uint64 { return bits.RotateLeft64(x, 23) } |
| 227 | func rol27(x uint64) uint64 { return bits.RotateLeft64(x, 27) } |
| 228 | func rol31(x uint64) uint64 { return bits.RotateLeft64(x, 31) } |