[VOL-2941] Upgrading to latest protos and lib
Change-Id: I2ce126c0fd78735ecd53a4c3b1e34f2de42cbdf3
diff --git a/vendor/github.com/cespare/xxhash/xxhash_amd64.s b/vendor/github.com/cespare/xxhash/xxhash_amd64.s
new file mode 100644
index 0000000..757f201
--- /dev/null
+++ b/vendor/github.com/cespare/xxhash/xxhash_amd64.s
@@ -0,0 +1,233 @@
+// +build !appengine
+// +build gc
+// +build !purego
+
+#include "textflag.h"
+
+// Register allocation:
+// AX h
+// CX pointer to advance through b
+// DX n
+// BX loop end
+// R8 v1, k1
+// R9 v2
+// R10 v3
+// R11 v4
+// R12 tmp
+// R13 prime1v
+// R14 prime2v
+// R15 prime4v
+
+// round reads from and advances the buffer pointer in CX.
+// It assumes that R13 has prime1v and R14 has prime2v.
+#define round(r) \
+ MOVQ (CX), R12 \
+ ADDQ $8, CX \
+ IMULQ R14, R12 \
+ ADDQ R12, r \
+ ROLQ $31, r \
+ IMULQ R13, r
+
+// mergeRound applies a merge round on the two registers acc and val.
+// It assumes that R13 has prime1v, R14 has prime2v, and R15 has prime4v.
+#define mergeRound(acc, val) \
+ IMULQ R14, val \
+ ROLQ $31, val \
+ IMULQ R13, val \
+ XORQ val, acc \
+ IMULQ R13, acc \
+ ADDQ R15, acc
+
+// func Sum64(b []byte) uint64
+TEXT ·Sum64(SB), NOSPLIT, $0-32
+ // Load fixed primes.
+ MOVQ ·prime1v(SB), R13
+ MOVQ ·prime2v(SB), R14
+ MOVQ ·prime4v(SB), R15
+
+ // Load slice.
+ MOVQ b_base+0(FP), CX
+ MOVQ b_len+8(FP), DX
+ LEAQ (CX)(DX*1), BX
+
+ // The first loop limit will be len(b)-32.
+ SUBQ $32, BX
+
+ // Check whether we have at least one block.
+ CMPQ DX, $32
+ JLT noBlocks
+
+ // Set up initial state (v1, v2, v3, v4).
+ MOVQ R13, R8
+ ADDQ R14, R8
+ MOVQ R14, R9
+ XORQ R10, R10
+ XORQ R11, R11
+ SUBQ R13, R11
+
+ // Loop until CX > BX.
+blockLoop:
+ round(R8)
+ round(R9)
+ round(R10)
+ round(R11)
+
+ CMPQ CX, BX
+ JLE blockLoop
+
+ MOVQ R8, AX
+ ROLQ $1, AX
+ MOVQ R9, R12
+ ROLQ $7, R12
+ ADDQ R12, AX
+ MOVQ R10, R12
+ ROLQ $12, R12
+ ADDQ R12, AX
+ MOVQ R11, R12
+ ROLQ $18, R12
+ ADDQ R12, AX
+
+ mergeRound(AX, R8)
+ mergeRound(AX, R9)
+ mergeRound(AX, R10)
+ mergeRound(AX, R11)
+
+ JMP afterBlocks
+
+noBlocks:
+ MOVQ ·prime5v(SB), AX
+
+afterBlocks:
+ ADDQ DX, AX
+
+ // Right now BX has len(b)-32, and we want to loop until CX > len(b)-8.
+ ADDQ $24, BX
+
+ CMPQ CX, BX
+ JG fourByte
+
+wordLoop:
+ // Calculate k1.
+ MOVQ (CX), R8
+ ADDQ $8, CX
+ IMULQ R14, R8
+ ROLQ $31, R8
+ IMULQ R13, R8
+
+ XORQ R8, AX
+ ROLQ $27, AX
+ IMULQ R13, AX
+ ADDQ R15, AX
+
+ CMPQ CX, BX
+ JLE wordLoop
+
+fourByte:
+ ADDQ $4, BX
+ CMPQ CX, BX
+ JG singles
+
+ MOVL (CX), R8
+ ADDQ $4, CX
+ IMULQ R13, R8
+ XORQ R8, AX
+
+ ROLQ $23, AX
+ IMULQ R14, AX
+ ADDQ ·prime3v(SB), AX
+
+singles:
+ ADDQ $4, BX
+ CMPQ CX, BX
+ JGE finalize
+
+singlesLoop:
+ MOVBQZX (CX), R12
+ ADDQ $1, CX
+ IMULQ ·prime5v(SB), R12
+ XORQ R12, AX
+
+ ROLQ $11, AX
+ IMULQ R13, AX
+
+ CMPQ CX, BX
+ JL singlesLoop
+
+finalize:
+ MOVQ AX, R12
+ SHRQ $33, R12
+ XORQ R12, AX
+ IMULQ R14, AX
+ MOVQ AX, R12
+ SHRQ $29, R12
+ XORQ R12, AX
+ IMULQ ·prime3v(SB), AX
+ MOVQ AX, R12
+ SHRQ $32, R12
+ XORQ R12, AX
+
+ MOVQ AX, ret+24(FP)
+ RET
+
+// writeBlocks uses the same registers as above except that it uses AX to store
+// the x pointer.
+
+// func writeBlocks(x *xxh, b []byte) []byte
+TEXT ·writeBlocks(SB), NOSPLIT, $0-56
+ // Load fixed primes needed for round.
+ MOVQ ·prime1v(SB), R13
+ MOVQ ·prime2v(SB), R14
+
+ // Load slice.
+ MOVQ b_base+8(FP), CX
+ MOVQ CX, ret_base+32(FP) // initialize return base pointer; see NOTE below
+ MOVQ b_len+16(FP), DX
+ LEAQ (CX)(DX*1), BX
+ SUBQ $32, BX
+
+ // Load vN from x.
+ MOVQ x+0(FP), AX
+ MOVQ 0(AX), R8 // v1
+ MOVQ 8(AX), R9 // v2
+ MOVQ 16(AX), R10 // v3
+ MOVQ 24(AX), R11 // v4
+
+ // We don't need to check the loop condition here; this function is
+ // always called with at least one block of data to process.
+blockLoop:
+ round(R8)
+ round(R9)
+ round(R10)
+ round(R11)
+
+ CMPQ CX, BX
+ JLE blockLoop
+
+ // Copy vN back to x.
+ MOVQ R8, 0(AX)
+ MOVQ R9, 8(AX)
+ MOVQ R10, 16(AX)
+ MOVQ R11, 24(AX)
+
+ // Construct return slice.
+ // NOTE: It's important that we don't construct a slice that has a base
+ // pointer off the end of the original slice, as in Go 1.7+ this will
+ // cause runtime crashes. (See discussion in, for example,
+ // https://github.com/golang/go/issues/16772.)
+ // Therefore, we calculate the length/cap first, and if they're zero, we
+ // keep the old base. This is what the compiler does as well if you
+ // write code like
+ // b = b[len(b):]
+
+ // New length is 32 - (CX - BX) -> BX+32 - CX.
+ ADDQ $32, BX
+ SUBQ CX, BX
+ JZ afterSetBase
+
+ MOVQ CX, ret_base+32(FP)
+
+afterSetBase:
+ MOVQ BX, ret_len+40(FP)
+ MOVQ BX, ret_cap+48(FP) // set cap == len
+
+ RET