[VOL-5291] - pon & nni stats changes, onu stats from OLT, onu stats from onu
Change-Id: I4f23cb1b1276d27ca6f2c183875b8b227f772edd
Signed-off-by: Akash Reddy Kankanala <akash.kankanala@radisys.com>
diff --git a/vendor/github.com/cespare/xxhash/v2/.travis.yml b/vendor/github.com/cespare/xxhash/v2/.travis.yml
deleted file mode 100644
index c516ea8..0000000
--- a/vendor/github.com/cespare/xxhash/v2/.travis.yml
+++ /dev/null
@@ -1,8 +0,0 @@
-language: go
-go:
- - "1.x"
- - master
-env:
- - TAGS=""
- - TAGS="-tags purego"
-script: go test $TAGS -v ./...
diff --git a/vendor/github.com/cespare/xxhash/v2/README.md b/vendor/github.com/cespare/xxhash/v2/README.md
index 2fd8693..8bf0e5b 100644
--- a/vendor/github.com/cespare/xxhash/v2/README.md
+++ b/vendor/github.com/cespare/xxhash/v2/README.md
@@ -1,10 +1,9 @@
# xxhash
-[](https://godoc.org/github.com/cespare/xxhash)
-[](https://travis-ci.org/cespare/xxhash)
+[](https://pkg.go.dev/github.com/cespare/xxhash/v2)
+[](https://github.com/cespare/xxhash/actions/workflows/test.yml)
-xxhash is a Go implementation of the 64-bit
-[xxHash](http://cyan4973.github.io/xxHash/) algorithm, XXH64. This is a
+xxhash is a Go implementation of the 64-bit [xxHash] algorithm, XXH64. This is a
high-quality hashing algorithm that is much faster than anything in the Go
standard library.
@@ -25,8 +24,11 @@
func (*Digest) Sum64() uint64
```
-This implementation provides a fast pure-Go implementation and an even faster
-assembly implementation for amd64.
+The package is written with optimized pure Go and also contains even faster
+assembly implementations for amd64 and arm64. If desired, the `purego` build tag
+opts into using the Go code even on those architectures.
+
+[xxHash]: http://cyan4973.github.io/xxHash/
## Compatibility
@@ -45,23 +47,26 @@
Here are some quick benchmarks comparing the pure-Go and assembly
implementations of Sum64.
-| input size | purego | asm |
-| --- | --- | --- |
-| 5 B | 979.66 MB/s | 1291.17 MB/s |
-| 100 B | 7475.26 MB/s | 7973.40 MB/s |
-| 4 KB | 17573.46 MB/s | 17602.65 MB/s |
-| 10 MB | 17131.46 MB/s | 17142.16 MB/s |
+| input size | purego | asm |
+| ---------- | --------- | --------- |
+| 4 B | 1.3 GB/s | 1.2 GB/s |
+| 16 B | 2.9 GB/s | 3.5 GB/s |
+| 100 B | 6.9 GB/s | 8.1 GB/s |
+| 4 KB | 11.7 GB/s | 16.7 GB/s |
+| 10 MB | 12.0 GB/s | 17.3 GB/s |
-These numbers were generated on Ubuntu 18.04 with an Intel i7-8700K CPU using
-the following commands under Go 1.11.2:
+These numbers were generated on Ubuntu 20.04 with an Intel Xeon Platinum 8252C
+CPU using the following commands under Go 1.19.2:
```
-$ go test -tags purego -benchtime 10s -bench '/xxhash,direct,bytes'
-$ go test -benchtime 10s -bench '/xxhash,direct,bytes'
+benchstat <(go test -tags purego -benchtime 500ms -count 15 -bench 'Sum64$')
+benchstat <(go test -benchtime 500ms -count 15 -bench 'Sum64$')
```
## Projects using this package
- [InfluxDB](https://github.com/influxdata/influxdb)
- [Prometheus](https://github.com/prometheus/prometheus)
+- [VictoriaMetrics](https://github.com/VictoriaMetrics/VictoriaMetrics)
- [FreeCache](https://github.com/coocood/freecache)
+- [FastCache](https://github.com/VictoriaMetrics/fastcache)
diff --git a/vendor/github.com/cespare/xxhash/v2/go.mod b/vendor/github.com/cespare/xxhash/v2/go.mod
deleted file mode 100644
index 49f6760..0000000
--- a/vendor/github.com/cespare/xxhash/v2/go.mod
+++ /dev/null
@@ -1,3 +0,0 @@
-module github.com/cespare/xxhash/v2
-
-go 1.11
diff --git a/vendor/github.com/cespare/xxhash/v2/go.sum b/vendor/github.com/cespare/xxhash/v2/go.sum
deleted file mode 100644
index e69de29..0000000
--- a/vendor/github.com/cespare/xxhash/v2/go.sum
+++ /dev/null
diff --git a/vendor/github.com/cespare/xxhash/v2/testall.sh b/vendor/github.com/cespare/xxhash/v2/testall.sh
new file mode 100644
index 0000000..94b9c44
--- /dev/null
+++ b/vendor/github.com/cespare/xxhash/v2/testall.sh
@@ -0,0 +1,10 @@
+#!/bin/bash
+set -eu -o pipefail
+
+# Small convenience script for running the tests with various combinations of
+# arch/tags. This assumes we're running on amd64 and have qemu available.
+
+go test ./...
+go test -tags purego ./...
+GOARCH=arm64 go test
+GOARCH=arm64 go test -tags purego
diff --git a/vendor/github.com/cespare/xxhash/v2/xxhash.go b/vendor/github.com/cespare/xxhash/v2/xxhash.go
index db0b35f..a9e0d45 100644
--- a/vendor/github.com/cespare/xxhash/v2/xxhash.go
+++ b/vendor/github.com/cespare/xxhash/v2/xxhash.go
@@ -16,19 +16,11 @@
prime5 uint64 = 2870177450012600261
)
-// NOTE(caleb): I'm using both consts and vars of the primes. Using consts where
-// possible in the Go code is worth a small (but measurable) performance boost
-// by avoiding some MOVQs. Vars are needed for the asm and also are useful for
-// convenience in the Go code in a few places where we need to intentionally
-// avoid constant arithmetic (e.g., v1 := prime1 + prime2 fails because the
-// result overflows a uint64).
-var (
- prime1v = prime1
- prime2v = prime2
- prime3v = prime3
- prime4v = prime4
- prime5v = prime5
-)
+// Store the primes in an array as well.
+//
+// The consts are used when possible in Go code to avoid MOVs but we need a
+// contiguous array of the assembly code.
+var primes = [...]uint64{prime1, prime2, prime3, prime4, prime5}
// Digest implements hash.Hash64.
type Digest struct {
@@ -50,10 +42,10 @@
// Reset clears the Digest's state so that it can be reused.
func (d *Digest) Reset() {
- d.v1 = prime1v + prime2
+ d.v1 = primes[0] + prime2
d.v2 = prime2
d.v3 = 0
- d.v4 = -prime1v
+ d.v4 = -primes[0]
d.total = 0
d.n = 0
}
@@ -69,21 +61,23 @@
n = len(b)
d.total += uint64(n)
+ memleft := d.mem[d.n&(len(d.mem)-1):]
+
if d.n+n < 32 {
// This new data doesn't even fill the current block.
- copy(d.mem[d.n:], b)
+ copy(memleft, b)
d.n += n
return
}
if d.n > 0 {
// Finish off the partial block.
- copy(d.mem[d.n:], b)
+ c := copy(memleft, b)
d.v1 = round(d.v1, u64(d.mem[0:8]))
d.v2 = round(d.v2, u64(d.mem[8:16]))
d.v3 = round(d.v3, u64(d.mem[16:24]))
d.v4 = round(d.v4, u64(d.mem[24:32]))
- b = b[32-d.n:]
+ b = b[c:]
d.n = 0
}
@@ -133,21 +127,20 @@
h += d.total
- i, end := 0, d.n
- for ; i+8 <= end; i += 8 {
- k1 := round(0, u64(d.mem[i:i+8]))
+ b := d.mem[:d.n&(len(d.mem)-1)]
+ for ; len(b) >= 8; b = b[8:] {
+ k1 := round(0, u64(b[:8]))
h ^= k1
h = rol27(h)*prime1 + prime4
}
- if i+4 <= end {
- h ^= uint64(u32(d.mem[i:i+4])) * prime1
+ if len(b) >= 4 {
+ h ^= uint64(u32(b[:4])) * prime1
h = rol23(h)*prime2 + prime3
- i += 4
+ b = b[4:]
}
- for i < end {
- h ^= uint64(d.mem[i]) * prime5
+ for ; len(b) > 0; b = b[1:] {
+ h ^= uint64(b[0]) * prime5
h = rol11(h) * prime1
- i++
}
h ^= h >> 33
@@ -193,7 +186,6 @@
b, d.v4 = consumeUint64(b)
b, d.total = consumeUint64(b)
copy(d.mem[:], b)
- b = b[len(d.mem):]
d.n = int(d.total % uint64(len(d.mem)))
return nil
}
diff --git a/vendor/github.com/cespare/xxhash/v2/xxhash_amd64.s b/vendor/github.com/cespare/xxhash/v2/xxhash_amd64.s
index d580e32..3e8b132 100644
--- a/vendor/github.com/cespare/xxhash/v2/xxhash_amd64.s
+++ b/vendor/github.com/cespare/xxhash/v2/xxhash_amd64.s
@@ -1,215 +1,209 @@
+//go:build !appengine && gc && !purego
// +build !appengine
// +build gc
// +build !purego
#include "textflag.h"
-// Register allocation:
-// AX h
-// CX pointer to advance through b
-// DX n
-// BX loop end
-// R8 v1, k1
-// R9 v2
-// R10 v3
-// R11 v4
-// R12 tmp
-// R13 prime1v
-// R14 prime2v
-// R15 prime4v
+// Registers:
+#define h AX
+#define d AX
+#define p SI // pointer to advance through b
+#define n DX
+#define end BX // loop end
+#define v1 R8
+#define v2 R9
+#define v3 R10
+#define v4 R11
+#define x R12
+#define prime1 R13
+#define prime2 R14
+#define prime4 DI
-// round reads from and advances the buffer pointer in CX.
-// It assumes that R13 has prime1v and R14 has prime2v.
-#define round(r) \
- MOVQ (CX), R12 \
- ADDQ $8, CX \
- IMULQ R14, R12 \
- ADDQ R12, r \
- ROLQ $31, r \
- IMULQ R13, r
+#define round(acc, x) \
+ IMULQ prime2, x \
+ ADDQ x, acc \
+ ROLQ $31, acc \
+ IMULQ prime1, acc
-// mergeRound applies a merge round on the two registers acc and val.
-// It assumes that R13 has prime1v, R14 has prime2v, and R15 has prime4v.
-#define mergeRound(acc, val) \
- IMULQ R14, val \
- ROLQ $31, val \
- IMULQ R13, val \
- XORQ val, acc \
- IMULQ R13, acc \
- ADDQ R15, acc
+// round0 performs the operation x = round(0, x).
+#define round0(x) \
+ IMULQ prime2, x \
+ ROLQ $31, x \
+ IMULQ prime1, x
+
+// mergeRound applies a merge round on the two registers acc and x.
+// It assumes that prime1, prime2, and prime4 have been loaded.
+#define mergeRound(acc, x) \
+ round0(x) \
+ XORQ x, acc \
+ IMULQ prime1, acc \
+ ADDQ prime4, acc
+
+// blockLoop processes as many 32-byte blocks as possible,
+// updating v1, v2, v3, and v4. It assumes that there is at least one block
+// to process.
+#define blockLoop() \
+loop: \
+ MOVQ +0(p), x \
+ round(v1, x) \
+ MOVQ +8(p), x \
+ round(v2, x) \
+ MOVQ +16(p), x \
+ round(v3, x) \
+ MOVQ +24(p), x \
+ round(v4, x) \
+ ADDQ $32, p \
+ CMPQ p, end \
+ JLE loop
// func Sum64(b []byte) uint64
-TEXT ·Sum64(SB), NOSPLIT, $0-32
+TEXT ·Sum64(SB), NOSPLIT|NOFRAME, $0-32
// Load fixed primes.
- MOVQ ·prime1v(SB), R13
- MOVQ ·prime2v(SB), R14
- MOVQ ·prime4v(SB), R15
+ MOVQ ·primes+0(SB), prime1
+ MOVQ ·primes+8(SB), prime2
+ MOVQ ·primes+24(SB), prime4
// Load slice.
- MOVQ b_base+0(FP), CX
- MOVQ b_len+8(FP), DX
- LEAQ (CX)(DX*1), BX
+ MOVQ b_base+0(FP), p
+ MOVQ b_len+8(FP), n
+ LEAQ (p)(n*1), end
// The first loop limit will be len(b)-32.
- SUBQ $32, BX
+ SUBQ $32, end
// Check whether we have at least one block.
- CMPQ DX, $32
+ CMPQ n, $32
JLT noBlocks
// Set up initial state (v1, v2, v3, v4).
- MOVQ R13, R8
- ADDQ R14, R8
- MOVQ R14, R9
- XORQ R10, R10
- XORQ R11, R11
- SUBQ R13, R11
+ MOVQ prime1, v1
+ ADDQ prime2, v1
+ MOVQ prime2, v2
+ XORQ v3, v3
+ XORQ v4, v4
+ SUBQ prime1, v4
- // Loop until CX > BX.
-blockLoop:
- round(R8)
- round(R9)
- round(R10)
- round(R11)
+ blockLoop()
- CMPQ CX, BX
- JLE blockLoop
+ MOVQ v1, h
+ ROLQ $1, h
+ MOVQ v2, x
+ ROLQ $7, x
+ ADDQ x, h
+ MOVQ v3, x
+ ROLQ $12, x
+ ADDQ x, h
+ MOVQ v4, x
+ ROLQ $18, x
+ ADDQ x, h
- MOVQ R8, AX
- ROLQ $1, AX
- MOVQ R9, R12
- ROLQ $7, R12
- ADDQ R12, AX
- MOVQ R10, R12
- ROLQ $12, R12
- ADDQ R12, AX
- MOVQ R11, R12
- ROLQ $18, R12
- ADDQ R12, AX
-
- mergeRound(AX, R8)
- mergeRound(AX, R9)
- mergeRound(AX, R10)
- mergeRound(AX, R11)
+ mergeRound(h, v1)
+ mergeRound(h, v2)
+ mergeRound(h, v3)
+ mergeRound(h, v4)
JMP afterBlocks
noBlocks:
- MOVQ ·prime5v(SB), AX
+ MOVQ ·primes+32(SB), h
afterBlocks:
- ADDQ DX, AX
+ ADDQ n, h
- // Right now BX has len(b)-32, and we want to loop until CX > len(b)-8.
- ADDQ $24, BX
+ ADDQ $24, end
+ CMPQ p, end
+ JG try4
- CMPQ CX, BX
- JG fourByte
+loop8:
+ MOVQ (p), x
+ ADDQ $8, p
+ round0(x)
+ XORQ x, h
+ ROLQ $27, h
+ IMULQ prime1, h
+ ADDQ prime4, h
-wordLoop:
- // Calculate k1.
- MOVQ (CX), R8
- ADDQ $8, CX
- IMULQ R14, R8
- ROLQ $31, R8
- IMULQ R13, R8
+ CMPQ p, end
+ JLE loop8
- XORQ R8, AX
- ROLQ $27, AX
- IMULQ R13, AX
- ADDQ R15, AX
+try4:
+ ADDQ $4, end
+ CMPQ p, end
+ JG try1
- CMPQ CX, BX
- JLE wordLoop
+ MOVL (p), x
+ ADDQ $4, p
+ IMULQ prime1, x
+ XORQ x, h
-fourByte:
- ADDQ $4, BX
- CMPQ CX, BX
- JG singles
+ ROLQ $23, h
+ IMULQ prime2, h
+ ADDQ ·primes+16(SB), h
- MOVL (CX), R8
- ADDQ $4, CX
- IMULQ R13, R8
- XORQ R8, AX
-
- ROLQ $23, AX
- IMULQ R14, AX
- ADDQ ·prime3v(SB), AX
-
-singles:
- ADDQ $4, BX
- CMPQ CX, BX
+try1:
+ ADDQ $4, end
+ CMPQ p, end
JGE finalize
-singlesLoop:
- MOVBQZX (CX), R12
- ADDQ $1, CX
- IMULQ ·prime5v(SB), R12
- XORQ R12, AX
+loop1:
+ MOVBQZX (p), x
+ ADDQ $1, p
+ IMULQ ·primes+32(SB), x
+ XORQ x, h
+ ROLQ $11, h
+ IMULQ prime1, h
- ROLQ $11, AX
- IMULQ R13, AX
-
- CMPQ CX, BX
- JL singlesLoop
+ CMPQ p, end
+ JL loop1
finalize:
- MOVQ AX, R12
- SHRQ $33, R12
- XORQ R12, AX
- IMULQ R14, AX
- MOVQ AX, R12
- SHRQ $29, R12
- XORQ R12, AX
- IMULQ ·prime3v(SB), AX
- MOVQ AX, R12
- SHRQ $32, R12
- XORQ R12, AX
+ MOVQ h, x
+ SHRQ $33, x
+ XORQ x, h
+ IMULQ prime2, h
+ MOVQ h, x
+ SHRQ $29, x
+ XORQ x, h
+ IMULQ ·primes+16(SB), h
+ MOVQ h, x
+ SHRQ $32, x
+ XORQ x, h
- MOVQ AX, ret+24(FP)
+ MOVQ h, ret+24(FP)
RET
-// writeBlocks uses the same registers as above except that it uses AX to store
-// the d pointer.
-
// func writeBlocks(d *Digest, b []byte) int
-TEXT ·writeBlocks(SB), NOSPLIT, $0-40
+TEXT ·writeBlocks(SB), NOSPLIT|NOFRAME, $0-40
// Load fixed primes needed for round.
- MOVQ ·prime1v(SB), R13
- MOVQ ·prime2v(SB), R14
+ MOVQ ·primes+0(SB), prime1
+ MOVQ ·primes+8(SB), prime2
// Load slice.
- MOVQ b_base+8(FP), CX
- MOVQ b_len+16(FP), DX
- LEAQ (CX)(DX*1), BX
- SUBQ $32, BX
+ MOVQ b_base+8(FP), p
+ MOVQ b_len+16(FP), n
+ LEAQ (p)(n*1), end
+ SUBQ $32, end
// Load vN from d.
- MOVQ d+0(FP), AX
- MOVQ 0(AX), R8 // v1
- MOVQ 8(AX), R9 // v2
- MOVQ 16(AX), R10 // v3
- MOVQ 24(AX), R11 // v4
+ MOVQ s+0(FP), d
+ MOVQ 0(d), v1
+ MOVQ 8(d), v2
+ MOVQ 16(d), v3
+ MOVQ 24(d), v4
// We don't need to check the loop condition here; this function is
// always called with at least one block of data to process.
-blockLoop:
- round(R8)
- round(R9)
- round(R10)
- round(R11)
-
- CMPQ CX, BX
- JLE blockLoop
+ blockLoop()
// Copy vN back to d.
- MOVQ R8, 0(AX)
- MOVQ R9, 8(AX)
- MOVQ R10, 16(AX)
- MOVQ R11, 24(AX)
+ MOVQ v1, 0(d)
+ MOVQ v2, 8(d)
+ MOVQ v3, 16(d)
+ MOVQ v4, 24(d)
- // The number of bytes written is CX minus the old base pointer.
- SUBQ b_base+8(FP), CX
- MOVQ CX, ret+32(FP)
+ // The number of bytes written is p minus the old base pointer.
+ SUBQ b_base+8(FP), p
+ MOVQ p, ret+32(FP)
RET
diff --git a/vendor/github.com/cespare/xxhash/v2/xxhash_arm64.s b/vendor/github.com/cespare/xxhash/v2/xxhash_arm64.s
new file mode 100644
index 0000000..7e3145a
--- /dev/null
+++ b/vendor/github.com/cespare/xxhash/v2/xxhash_arm64.s
@@ -0,0 +1,183 @@
+//go:build !appengine && gc && !purego
+// +build !appengine
+// +build gc
+// +build !purego
+
+#include "textflag.h"
+
+// Registers:
+#define digest R1
+#define h R2 // return value
+#define p R3 // input pointer
+#define n R4 // input length
+#define nblocks R5 // n / 32
+#define prime1 R7
+#define prime2 R8
+#define prime3 R9
+#define prime4 R10
+#define prime5 R11
+#define v1 R12
+#define v2 R13
+#define v3 R14
+#define v4 R15
+#define x1 R20
+#define x2 R21
+#define x3 R22
+#define x4 R23
+
+#define round(acc, x) \
+ MADD prime2, acc, x, acc \
+ ROR $64-31, acc \
+ MUL prime1, acc
+
+// round0 performs the operation x = round(0, x).
+#define round0(x) \
+ MUL prime2, x \
+ ROR $64-31, x \
+ MUL prime1, x
+
+#define mergeRound(acc, x) \
+ round0(x) \
+ EOR x, acc \
+ MADD acc, prime4, prime1, acc
+
+// blockLoop processes as many 32-byte blocks as possible,
+// updating v1, v2, v3, and v4. It assumes that n >= 32.
+#define blockLoop() \
+ LSR $5, n, nblocks \
+ PCALIGN $16 \
+ loop: \
+ LDP.P 16(p), (x1, x2) \
+ LDP.P 16(p), (x3, x4) \
+ round(v1, x1) \
+ round(v2, x2) \
+ round(v3, x3) \
+ round(v4, x4) \
+ SUB $1, nblocks \
+ CBNZ nblocks, loop
+
+// func Sum64(b []byte) uint64
+TEXT ·Sum64(SB), NOSPLIT|NOFRAME, $0-32
+ LDP b_base+0(FP), (p, n)
+
+ LDP ·primes+0(SB), (prime1, prime2)
+ LDP ·primes+16(SB), (prime3, prime4)
+ MOVD ·primes+32(SB), prime5
+
+ CMP $32, n
+ CSEL LT, prime5, ZR, h // if n < 32 { h = prime5 } else { h = 0 }
+ BLT afterLoop
+
+ ADD prime1, prime2, v1
+ MOVD prime2, v2
+ MOVD $0, v3
+ NEG prime1, v4
+
+ blockLoop()
+
+ ROR $64-1, v1, x1
+ ROR $64-7, v2, x2
+ ADD x1, x2
+ ROR $64-12, v3, x3
+ ROR $64-18, v4, x4
+ ADD x3, x4
+ ADD x2, x4, h
+
+ mergeRound(h, v1)
+ mergeRound(h, v2)
+ mergeRound(h, v3)
+ mergeRound(h, v4)
+
+afterLoop:
+ ADD n, h
+
+ TBZ $4, n, try8
+ LDP.P 16(p), (x1, x2)
+
+ round0(x1)
+
+ // NOTE: here and below, sequencing the EOR after the ROR (using a
+ // rotated register) is worth a small but measurable speedup for small
+ // inputs.
+ ROR $64-27, h
+ EOR x1 @> 64-27, h, h
+ MADD h, prime4, prime1, h
+
+ round0(x2)
+ ROR $64-27, h
+ EOR x2 @> 64-27, h, h
+ MADD h, prime4, prime1, h
+
+try8:
+ TBZ $3, n, try4
+ MOVD.P 8(p), x1
+
+ round0(x1)
+ ROR $64-27, h
+ EOR x1 @> 64-27, h, h
+ MADD h, prime4, prime1, h
+
+try4:
+ TBZ $2, n, try2
+ MOVWU.P 4(p), x2
+
+ MUL prime1, x2
+ ROR $64-23, h
+ EOR x2 @> 64-23, h, h
+ MADD h, prime3, prime2, h
+
+try2:
+ TBZ $1, n, try1
+ MOVHU.P 2(p), x3
+ AND $255, x3, x1
+ LSR $8, x3, x2
+
+ MUL prime5, x1
+ ROR $64-11, h
+ EOR x1 @> 64-11, h, h
+ MUL prime1, h
+
+ MUL prime5, x2
+ ROR $64-11, h
+ EOR x2 @> 64-11, h, h
+ MUL prime1, h
+
+try1:
+ TBZ $0, n, finalize
+ MOVBU (p), x4
+
+ MUL prime5, x4
+ ROR $64-11, h
+ EOR x4 @> 64-11, h, h
+ MUL prime1, h
+
+finalize:
+ EOR h >> 33, h
+ MUL prime2, h
+ EOR h >> 29, h
+ MUL prime3, h
+ EOR h >> 32, h
+
+ MOVD h, ret+24(FP)
+ RET
+
+// func writeBlocks(d *Digest, b []byte) int
+TEXT ·writeBlocks(SB), NOSPLIT|NOFRAME, $0-40
+ LDP ·primes+0(SB), (prime1, prime2)
+
+ // Load state. Assume v[1-4] are stored contiguously.
+ MOVD d+0(FP), digest
+ LDP 0(digest), (v1, v2)
+ LDP 16(digest), (v3, v4)
+
+ LDP b_base+8(FP), (p, n)
+
+ blockLoop()
+
+ // Store updated state.
+ STP (v1, v2), 0(digest)
+ STP (v3, v4), 16(digest)
+
+ BIC $31, n
+ MOVD n, ret+32(FP)
+ RET
diff --git a/vendor/github.com/cespare/xxhash/v2/xxhash_amd64.go b/vendor/github.com/cespare/xxhash/v2/xxhash_asm.go
similarity index 72%
rename from vendor/github.com/cespare/xxhash/v2/xxhash_amd64.go
rename to vendor/github.com/cespare/xxhash/v2/xxhash_asm.go
index ad14b80..9216e0a 100644
--- a/vendor/github.com/cespare/xxhash/v2/xxhash_amd64.go
+++ b/vendor/github.com/cespare/xxhash/v2/xxhash_asm.go
@@ -1,3 +1,5 @@
+//go:build (amd64 || arm64) && !appengine && gc && !purego
+// +build amd64 arm64
// +build !appengine
// +build gc
// +build !purego
diff --git a/vendor/github.com/cespare/xxhash/v2/xxhash_other.go b/vendor/github.com/cespare/xxhash/v2/xxhash_other.go
index 4a5a821..26df13b 100644
--- a/vendor/github.com/cespare/xxhash/v2/xxhash_other.go
+++ b/vendor/github.com/cespare/xxhash/v2/xxhash_other.go
@@ -1,4 +1,5 @@
-// +build !amd64 appengine !gc purego
+//go:build (!amd64 && !arm64) || appengine || !gc || purego
+// +build !amd64,!arm64 appengine !gc purego
package xxhash
@@ -14,10 +15,10 @@
var h uint64
if n >= 32 {
- v1 := prime1v + prime2
+ v1 := primes[0] + prime2
v2 := prime2
v3 := uint64(0)
- v4 := -prime1v
+ v4 := -primes[0]
for len(b) >= 32 {
v1 = round(v1, u64(b[0:8:len(b)]))
v2 = round(v2, u64(b[8:16:len(b)]))
@@ -36,19 +37,18 @@
h += uint64(n)
- i, end := 0, len(b)
- for ; i+8 <= end; i += 8 {
- k1 := round(0, u64(b[i:i+8:len(b)]))
+ for ; len(b) >= 8; b = b[8:] {
+ k1 := round(0, u64(b[:8]))
h ^= k1
h = rol27(h)*prime1 + prime4
}
- if i+4 <= end {
- h ^= uint64(u32(b[i:i+4:len(b)])) * prime1
+ if len(b) >= 4 {
+ h ^= uint64(u32(b[:4])) * prime1
h = rol23(h)*prime2 + prime3
- i += 4
+ b = b[4:]
}
- for ; i < end; i++ {
- h ^= uint64(b[i]) * prime5
+ for ; len(b) > 0; b = b[1:] {
+ h ^= uint64(b[0]) * prime5
h = rol11(h) * prime1
}
diff --git a/vendor/github.com/cespare/xxhash/v2/xxhash_safe.go b/vendor/github.com/cespare/xxhash/v2/xxhash_safe.go
index fc9bea7..e86f1b5 100644
--- a/vendor/github.com/cespare/xxhash/v2/xxhash_safe.go
+++ b/vendor/github.com/cespare/xxhash/v2/xxhash_safe.go
@@ -1,3 +1,4 @@
+//go:build appengine
// +build appengine
// This file contains the safe implementations of otherwise unsafe-using code.
diff --git a/vendor/github.com/cespare/xxhash/v2/xxhash_unsafe.go b/vendor/github.com/cespare/xxhash/v2/xxhash_unsafe.go
index 53bf76e..1c1638f 100644
--- a/vendor/github.com/cespare/xxhash/v2/xxhash_unsafe.go
+++ b/vendor/github.com/cespare/xxhash/v2/xxhash_unsafe.go
@@ -1,3 +1,4 @@
+//go:build !appengine
// +build !appengine
// This file encapsulates usage of unsafe.
@@ -6,41 +7,52 @@
package xxhash
import (
- "reflect"
"unsafe"
)
-// Notes:
-//
-// See https://groups.google.com/d/msg/golang-nuts/dcjzJy-bSpw/tcZYBzQqAQAJ
-// for some discussion about these unsafe conversions.
-//
// In the future it's possible that compiler optimizations will make these
-// unsafe operations unnecessary: https://golang.org/issue/2205.
+// XxxString functions unnecessary by realizing that calls such as
+// Sum64([]byte(s)) don't need to copy s. See https://go.dev/issue/2205.
+// If that happens, even if we keep these functions they can be replaced with
+// the trivial safe code.
+
+// NOTE: The usual way of doing an unsafe string-to-[]byte conversion is:
//
-// Both of these wrapper functions still incur function call overhead since they
-// will not be inlined. We could write Go/asm copies of Sum64 and Digest.Write
-// for strings to squeeze out a bit more speed. Mid-stack inlining should
-// eventually fix this.
+// var b []byte
+// bh := (*reflect.SliceHeader)(unsafe.Pointer(&b))
+// bh.Data = (*reflect.StringHeader)(unsafe.Pointer(&s)).Data
+// bh.Len = len(s)
+// bh.Cap = len(s)
+//
+// Unfortunately, as of Go 1.15.3 the inliner's cost model assigns a high enough
+// weight to this sequence of expressions that any function that uses it will
+// not be inlined. Instead, the functions below use a different unsafe
+// conversion designed to minimize the inliner weight and allow both to be
+// inlined. There is also a test (TestInlining) which verifies that these are
+// inlined.
+//
+// See https://github.com/golang/go/issues/42739 for discussion.
// Sum64String computes the 64-bit xxHash digest of s.
// It may be faster than Sum64([]byte(s)) by avoiding a copy.
func Sum64String(s string) uint64 {
- var b []byte
- bh := (*reflect.SliceHeader)(unsafe.Pointer(&b))
- bh.Data = (*reflect.StringHeader)(unsafe.Pointer(&s)).Data
- bh.Len = len(s)
- bh.Cap = len(s)
+ b := *(*[]byte)(unsafe.Pointer(&sliceHeader{s, len(s)}))
return Sum64(b)
}
// WriteString adds more data to d. It always returns len(s), nil.
// It may be faster than Write([]byte(s)) by avoiding a copy.
func (d *Digest) WriteString(s string) (n int, err error) {
- var b []byte
- bh := (*reflect.SliceHeader)(unsafe.Pointer(&b))
- bh.Data = (*reflect.StringHeader)(unsafe.Pointer(&s)).Data
- bh.Len = len(s)
- bh.Cap = len(s)
- return d.Write(b)
+ d.Write(*(*[]byte)(unsafe.Pointer(&sliceHeader{s, len(s)})))
+ // d.Write always returns len(s), nil.
+ // Ignoring the return output and returning these fixed values buys a
+ // savings of 6 in the inliner's cost model.
+ return len(s), nil
+}
+
+// sliceHeader is similar to reflect.SliceHeader, but it assumes that the layout
+// of the first two words is the same as the layout of a string.
+type sliceHeader struct {
+ s string
+ cap int
}