[VOL-4291] Rw-core updates for gRPC migration
Change-Id: I8d5a554409115b29318089671ca4e1ab3fa98810
diff --git a/vendor/github.com/golang/snappy/AUTHORS b/vendor/github.com/golang/snappy/AUTHORS
index bcfa195..203e84e 100644
--- a/vendor/github.com/golang/snappy/AUTHORS
+++ b/vendor/github.com/golang/snappy/AUTHORS
@@ -8,8 +8,10 @@
# Please keep the list sorted.
+Amazon.com, Inc
Damian Gryski <dgryski@gmail.com>
Google Inc.
Jan Mercl <0xjnml@gmail.com>
+Klaus Post <klauspost@gmail.com>
Rodolfo Carvalho <rhcarvalho@gmail.com>
Sebastien Binet <seb.binet@gmail.com>
diff --git a/vendor/github.com/golang/snappy/CONTRIBUTORS b/vendor/github.com/golang/snappy/CONTRIBUTORS
index 931ae31..d991473 100644
--- a/vendor/github.com/golang/snappy/CONTRIBUTORS
+++ b/vendor/github.com/golang/snappy/CONTRIBUTORS
@@ -28,7 +28,9 @@
Damian Gryski <dgryski@gmail.com>
Jan Mercl <0xjnml@gmail.com>
+Jonathan Swinney <jswinney@amazon.com>
Kai Backman <kaib@golang.org>
+Klaus Post <klauspost@gmail.com>
Marc-Antoine Ruel <maruel@chromium.org>
Nigel Tao <nigeltao@golang.org>
Rob Pike <r@golang.org>
diff --git a/vendor/github.com/golang/snappy/decode.go b/vendor/github.com/golang/snappy/decode.go
index 72efb03..f1e04b1 100644
--- a/vendor/github.com/golang/snappy/decode.go
+++ b/vendor/github.com/golang/snappy/decode.go
@@ -52,6 +52,8 @@
// Otherwise, a newly allocated slice will be returned.
//
// The dst and src must not overlap. It is valid to pass a nil dst.
+//
+// Decode handles the Snappy block format, not the Snappy stream format.
func Decode(dst, src []byte) ([]byte, error) {
dLen, s, err := decodedLen(src)
if err != nil {
@@ -83,6 +85,8 @@
}
// Reader is an io.Reader that can read Snappy-compressed bytes.
+//
+// Reader handles the Snappy stream format, not the Snappy block format.
type Reader struct {
r io.Reader
err error
diff --git a/vendor/github.com/golang/snappy/decode_arm64.s b/vendor/github.com/golang/snappy/decode_arm64.s
new file mode 100644
index 0000000..7a3ead1
--- /dev/null
+++ b/vendor/github.com/golang/snappy/decode_arm64.s
@@ -0,0 +1,494 @@
+// Copyright 2020 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build !appengine
+// +build gc
+// +build !noasm
+
+#include "textflag.h"
+
+// The asm code generally follows the pure Go code in decode_other.go, except
+// where marked with a "!!!".
+
+// func decode(dst, src []byte) int
+//
+// All local variables fit into registers. The non-zero stack size is only to
+// spill registers and push args when issuing a CALL. The register allocation:
+// - R2 scratch
+// - R3 scratch
+// - R4 length or x
+// - R5 offset
+// - R6 &src[s]
+// - R7 &dst[d]
+// + R8 dst_base
+// + R9 dst_len
+// + R10 dst_base + dst_len
+// + R11 src_base
+// + R12 src_len
+// + R13 src_base + src_len
+// - R14 used by doCopy
+// - R15 used by doCopy
+//
+// The registers R8-R13 (marked with a "+") are set at the start of the
+// function, and after a CALL returns, and are not otherwise modified.
+//
+// The d variable is implicitly R7 - R8, and len(dst)-d is R10 - R7.
+// The s variable is implicitly R6 - R11, and len(src)-s is R13 - R6.
+TEXT ·decode(SB), NOSPLIT, $56-56
+ // Initialize R6, R7 and R8-R13.
+ MOVD dst_base+0(FP), R8
+ MOVD dst_len+8(FP), R9
+ MOVD R8, R7
+ MOVD R8, R10
+ ADD R9, R10, R10
+ MOVD src_base+24(FP), R11
+ MOVD src_len+32(FP), R12
+ MOVD R11, R6
+ MOVD R11, R13
+ ADD R12, R13, R13
+
+loop:
+ // for s < len(src)
+ CMP R13, R6
+ BEQ end
+
+ // R4 = uint32(src[s])
+ //
+ // switch src[s] & 0x03
+ MOVBU (R6), R4
+ MOVW R4, R3
+ ANDW $3, R3
+ MOVW $1, R1
+ CMPW R1, R3
+ BGE tagCopy
+
+ // ----------------------------------------
+ // The code below handles literal tags.
+
+ // case tagLiteral:
+ // x := uint32(src[s] >> 2)
+ // switch
+ MOVW $60, R1
+ LSRW $2, R4, R4
+ CMPW R4, R1
+ BLS tagLit60Plus
+
+ // case x < 60:
+ // s++
+ ADD $1, R6, R6
+
+doLit:
+ // This is the end of the inner "switch", when we have a literal tag.
+ //
+ // We assume that R4 == x and x fits in a uint32, where x is the variable
+ // used in the pure Go decode_other.go code.
+
+ // length = int(x) + 1
+ //
+ // Unlike the pure Go code, we don't need to check if length <= 0 because
+ // R4 can hold 64 bits, so the increment cannot overflow.
+ ADD $1, R4, R4
+
+ // Prepare to check if copying length bytes will run past the end of dst or
+ // src.
+ //
+ // R2 = len(dst) - d
+ // R3 = len(src) - s
+ MOVD R10, R2
+ SUB R7, R2, R2
+ MOVD R13, R3
+ SUB R6, R3, R3
+
+ // !!! Try a faster technique for short (16 or fewer bytes) copies.
+ //
+ // if length > 16 || len(dst)-d < 16 || len(src)-s < 16 {
+ // goto callMemmove // Fall back on calling runtime·memmove.
+ // }
+ //
+ // The C++ snappy code calls this TryFastAppend. It also checks len(src)-s
+ // against 21 instead of 16, because it cannot assume that all of its input
+ // is contiguous in memory and so it needs to leave enough source bytes to
+ // read the next tag without refilling buffers, but Go's Decode assumes
+ // contiguousness (the src argument is a []byte).
+ CMP $16, R4
+ BGT callMemmove
+ CMP $16, R2
+ BLT callMemmove
+ CMP $16, R3
+ BLT callMemmove
+
+ // !!! Implement the copy from src to dst as a 16-byte load and store.
+ // (Decode's documentation says that dst and src must not overlap.)
+ //
+ // This always copies 16 bytes, instead of only length bytes, but that's
+ // OK. If the input is a valid Snappy encoding then subsequent iterations
+ // will fix up the overrun. Otherwise, Decode returns a nil []byte (and a
+ // non-nil error), so the overrun will be ignored.
+ //
+ // Note that on arm64, it is legal and cheap to issue unaligned 8-byte or
+ // 16-byte loads and stores. This technique probably wouldn't be as
+ // effective on architectures that are fussier about alignment.
+ LDP 0(R6), (R14, R15)
+ STP (R14, R15), 0(R7)
+
+ // d += length
+ // s += length
+ ADD R4, R7, R7
+ ADD R4, R6, R6
+ B loop
+
+callMemmove:
+ // if length > len(dst)-d || length > len(src)-s { etc }
+ CMP R2, R4
+ BGT errCorrupt
+ CMP R3, R4
+ BGT errCorrupt
+
+ // copy(dst[d:], src[s:s+length])
+ //
+ // This means calling runtime·memmove(&dst[d], &src[s], length), so we push
+ // R7, R6 and R4 as arguments. Coincidentally, we also need to spill those
+ // three registers to the stack, to save local variables across the CALL.
+ MOVD R7, 8(RSP)
+ MOVD R6, 16(RSP)
+ MOVD R4, 24(RSP)
+ MOVD R7, 32(RSP)
+ MOVD R6, 40(RSP)
+ MOVD R4, 48(RSP)
+ CALL runtime·memmove(SB)
+
+ // Restore local variables: unspill registers from the stack and
+ // re-calculate R8-R13.
+ MOVD 32(RSP), R7
+ MOVD 40(RSP), R6
+ MOVD 48(RSP), R4
+ MOVD dst_base+0(FP), R8
+ MOVD dst_len+8(FP), R9
+ MOVD R8, R10
+ ADD R9, R10, R10
+ MOVD src_base+24(FP), R11
+ MOVD src_len+32(FP), R12
+ MOVD R11, R13
+ ADD R12, R13, R13
+
+ // d += length
+ // s += length
+ ADD R4, R7, R7
+ ADD R4, R6, R6
+ B loop
+
+tagLit60Plus:
+ // !!! This fragment does the
+ //
+ // s += x - 58; if uint(s) > uint(len(src)) { etc }
+ //
+ // checks. In the asm version, we code it once instead of once per switch case.
+ ADD R4, R6, R6
+ SUB $58, R6, R6
+ MOVD R6, R3
+ SUB R11, R3, R3
+ CMP R12, R3
+ BGT errCorrupt
+
+ // case x == 60:
+ MOVW $61, R1
+ CMPW R1, R4
+ BEQ tagLit61
+ BGT tagLit62Plus
+
+ // x = uint32(src[s-1])
+ MOVBU -1(R6), R4
+ B doLit
+
+tagLit61:
+ // case x == 61:
+ // x = uint32(src[s-2]) | uint32(src[s-1])<<8
+ MOVHU -2(R6), R4
+ B doLit
+
+tagLit62Plus:
+ CMPW $62, R4
+ BHI tagLit63
+
+ // case x == 62:
+ // x = uint32(src[s-3]) | uint32(src[s-2])<<8 | uint32(src[s-1])<<16
+ MOVHU -3(R6), R4
+ MOVBU -1(R6), R3
+ ORR R3<<16, R4
+ B doLit
+
+tagLit63:
+ // case x == 63:
+ // x = uint32(src[s-4]) | uint32(src[s-3])<<8 | uint32(src[s-2])<<16 | uint32(src[s-1])<<24
+ MOVWU -4(R6), R4
+ B doLit
+
+ // The code above handles literal tags.
+ // ----------------------------------------
+ // The code below handles copy tags.
+
+tagCopy4:
+ // case tagCopy4:
+ // s += 5
+ ADD $5, R6, R6
+
+ // if uint(s) > uint(len(src)) { etc }
+ MOVD R6, R3
+ SUB R11, R3, R3
+ CMP R12, R3
+ BGT errCorrupt
+
+ // length = 1 + int(src[s-5])>>2
+ MOVD $1, R1
+ ADD R4>>2, R1, R4
+
+ // offset = int(uint32(src[s-4]) | uint32(src[s-3])<<8 | uint32(src[s-2])<<16 | uint32(src[s-1])<<24)
+ MOVWU -4(R6), R5
+ B doCopy
+
+tagCopy2:
+ // case tagCopy2:
+ // s += 3
+ ADD $3, R6, R6
+
+ // if uint(s) > uint(len(src)) { etc }
+ MOVD R6, R3
+ SUB R11, R3, R3
+ CMP R12, R3
+ BGT errCorrupt
+
+ // length = 1 + int(src[s-3])>>2
+ MOVD $1, R1
+ ADD R4>>2, R1, R4
+
+ // offset = int(uint32(src[s-2]) | uint32(src[s-1])<<8)
+ MOVHU -2(R6), R5
+ B doCopy
+
+tagCopy:
+ // We have a copy tag. We assume that:
+ // - R3 == src[s] & 0x03
+ // - R4 == src[s]
+ CMP $2, R3
+ BEQ tagCopy2
+ BGT tagCopy4
+
+ // case tagCopy1:
+ // s += 2
+ ADD $2, R6, R6
+
+ // if uint(s) > uint(len(src)) { etc }
+ MOVD R6, R3
+ SUB R11, R3, R3
+ CMP R12, R3
+ BGT errCorrupt
+
+ // offset = int(uint32(src[s-2])&0xe0<<3 | uint32(src[s-1]))
+ MOVD R4, R5
+ AND $0xe0, R5
+ MOVBU -1(R6), R3
+ ORR R5<<3, R3, R5
+
+ // length = 4 + int(src[s-2])>>2&0x7
+ MOVD $7, R1
+ AND R4>>2, R1, R4
+ ADD $4, R4, R4
+
+doCopy:
+ // This is the end of the outer "switch", when we have a copy tag.
+ //
+ // We assume that:
+ // - R4 == length && R4 > 0
+ // - R5 == offset
+
+ // if offset <= 0 { etc }
+ MOVD $0, R1
+ CMP R1, R5
+ BLE errCorrupt
+
+ // if d < offset { etc }
+ MOVD R7, R3
+ SUB R8, R3, R3
+ CMP R5, R3
+ BLT errCorrupt
+
+ // if length > len(dst)-d { etc }
+ MOVD R10, R3
+ SUB R7, R3, R3
+ CMP R3, R4
+ BGT errCorrupt
+
+ // forwardCopy(dst[d:d+length], dst[d-offset:]); d += length
+ //
+ // Set:
+ // - R14 = len(dst)-d
+ // - R15 = &dst[d-offset]
+ MOVD R10, R14
+ SUB R7, R14, R14
+ MOVD R7, R15
+ SUB R5, R15, R15
+
+ // !!! Try a faster technique for short (16 or fewer bytes) forward copies.
+ //
+ // First, try using two 8-byte load/stores, similar to the doLit technique
+ // above. Even if dst[d:d+length] and dst[d-offset:] can overlap, this is
+ // still OK if offset >= 8. Note that this has to be two 8-byte load/stores
+ // and not one 16-byte load/store, and the first store has to be before the
+ // second load, due to the overlap if offset is in the range [8, 16).
+ //
+ // if length > 16 || offset < 8 || len(dst)-d < 16 {
+ // goto slowForwardCopy
+ // }
+ // copy 16 bytes
+ // d += length
+ CMP $16, R4
+ BGT slowForwardCopy
+ CMP $8, R5
+ BLT slowForwardCopy
+ CMP $16, R14
+ BLT slowForwardCopy
+ MOVD 0(R15), R2
+ MOVD R2, 0(R7)
+ MOVD 8(R15), R3
+ MOVD R3, 8(R7)
+ ADD R4, R7, R7
+ B loop
+
+slowForwardCopy:
+ // !!! If the forward copy is longer than 16 bytes, or if offset < 8, we
+ // can still try 8-byte load stores, provided we can overrun up to 10 extra
+ // bytes. As above, the overrun will be fixed up by subsequent iterations
+ // of the outermost loop.
+ //
+ // The C++ snappy code calls this technique IncrementalCopyFastPath. Its
+ // commentary says:
+ //
+ // ----
+ //
+ // The main part of this loop is a simple copy of eight bytes at a time
+ // until we've copied (at least) the requested amount of bytes. However,
+ // if d and d-offset are less than eight bytes apart (indicating a
+ // repeating pattern of length < 8), we first need to expand the pattern in
+ // order to get the correct results. For instance, if the buffer looks like
+ // this, with the eight-byte <d-offset> and <d> patterns marked as
+ // intervals:
+ //
+ // abxxxxxxxxxxxx
+ // [------] d-offset
+ // [------] d
+ //
+ // a single eight-byte copy from <d-offset> to <d> will repeat the pattern
+ // once, after which we can move <d> two bytes without moving <d-offset>:
+ //
+ // ababxxxxxxxxxx
+ // [------] d-offset
+ // [------] d
+ //
+ // and repeat the exercise until the two no longer overlap.
+ //
+ // This allows us to do very well in the special case of one single byte
+ // repeated many times, without taking a big hit for more general cases.
+ //
+ // The worst case of extra writing past the end of the match occurs when
+ // offset == 1 and length == 1; the last copy will read from byte positions
+ // [0..7] and write to [4..11], whereas it was only supposed to write to
+ // position 1. Thus, ten excess bytes.
+ //
+ // ----
+ //
+ // That "10 byte overrun" worst case is confirmed by Go's
+ // TestSlowForwardCopyOverrun, which also tests the fixUpSlowForwardCopy
+ // and finishSlowForwardCopy algorithm.
+ //
+ // if length > len(dst)-d-10 {
+ // goto verySlowForwardCopy
+ // }
+ SUB $10, R14, R14
+ CMP R14, R4
+ BGT verySlowForwardCopy
+
+makeOffsetAtLeast8:
+ // !!! As above, expand the pattern so that offset >= 8 and we can use
+ // 8-byte load/stores.
+ //
+ // for offset < 8 {
+ // copy 8 bytes from dst[d-offset:] to dst[d:]
+ // length -= offset
+ // d += offset
+ // offset += offset
+ // // The two previous lines together means that d-offset, and therefore
+ // // R15, is unchanged.
+ // }
+ CMP $8, R5
+ BGE fixUpSlowForwardCopy
+ MOVD (R15), R3
+ MOVD R3, (R7)
+ SUB R5, R4, R4
+ ADD R5, R7, R7
+ ADD R5, R5, R5
+ B makeOffsetAtLeast8
+
+fixUpSlowForwardCopy:
+ // !!! Add length (which might be negative now) to d (implied by R7 being
+ // &dst[d]) so that d ends up at the right place when we jump back to the
+ // top of the loop. Before we do that, though, we save R7 to R2 so that, if
+ // length is positive, copying the remaining length bytes will write to the
+ // right place.
+ MOVD R7, R2
+ ADD R4, R7, R7
+
+finishSlowForwardCopy:
+ // !!! Repeat 8-byte load/stores until length <= 0. Ending with a negative
+ // length means that we overrun, but as above, that will be fixed up by
+ // subsequent iterations of the outermost loop.
+ MOVD $0, R1
+ CMP R1, R4
+ BLE loop
+ MOVD (R15), R3
+ MOVD R3, (R2)
+ ADD $8, R15, R15
+ ADD $8, R2, R2
+ SUB $8, R4, R4
+ B finishSlowForwardCopy
+
+verySlowForwardCopy:
+ // verySlowForwardCopy is a simple implementation of forward copy. In C
+ // parlance, this is a do/while loop instead of a while loop, since we know
+ // that length > 0. In Go syntax:
+ //
+ // for {
+ // dst[d] = dst[d - offset]
+ // d++
+ // length--
+ // if length == 0 {
+ // break
+ // }
+ // }
+ MOVB (R15), R3
+ MOVB R3, (R7)
+ ADD $1, R15, R15
+ ADD $1, R7, R7
+ SUB $1, R4, R4
+ CBNZ R4, verySlowForwardCopy
+ B loop
+
+ // The code above handles copy tags.
+ // ----------------------------------------
+
+end:
+ // This is the end of the "for s < len(src)".
+ //
+ // if d != len(dst) { etc }
+ CMP R10, R7
+ BNE errCorrupt
+
+ // return 0
+ MOVD $0, ret+48(FP)
+ RET
+
+errCorrupt:
+ // return decodeErrCodeCorrupt
+ MOVD $1, R2
+ MOVD R2, ret+48(FP)
+ RET
diff --git a/vendor/github.com/golang/snappy/decode_amd64.go b/vendor/github.com/golang/snappy/decode_asm.go
similarity index 93%
rename from vendor/github.com/golang/snappy/decode_amd64.go
rename to vendor/github.com/golang/snappy/decode_asm.go
index fcd192b..7082b34 100644
--- a/vendor/github.com/golang/snappy/decode_amd64.go
+++ b/vendor/github.com/golang/snappy/decode_asm.go
@@ -5,6 +5,7 @@
// +build !appengine
// +build gc
// +build !noasm
+// +build amd64 arm64
package snappy
diff --git a/vendor/github.com/golang/snappy/decode_other.go b/vendor/github.com/golang/snappy/decode_other.go
index 8c9f204..2f672be 100644
--- a/vendor/github.com/golang/snappy/decode_other.go
+++ b/vendor/github.com/golang/snappy/decode_other.go
@@ -2,7 +2,7 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
-// +build !amd64 appengine !gc noasm
+// +build !amd64,!arm64 appengine !gc noasm
package snappy
@@ -85,14 +85,28 @@
if offset <= 0 || d < offset || length > len(dst)-d {
return decodeErrCodeCorrupt
}
- // Copy from an earlier sub-slice of dst to a later sub-slice. Unlike
- // the built-in copy function, this byte-by-byte copy always runs
+ // Copy from an earlier sub-slice of dst to a later sub-slice.
+ // If no overlap, use the built-in copy:
+ if offset >= length {
+ copy(dst[d:d+length], dst[d-offset:])
+ d += length
+ continue
+ }
+
+ // Unlike the built-in copy function, this byte-by-byte copy always runs
// forwards, even if the slices overlap. Conceptually, this is:
//
// d += forwardCopy(dst[d:d+length], dst[d-offset:])
- for end := d + length; d != end; d++ {
- dst[d] = dst[d-offset]
+ //
+ // We align the slices into a and b and show the compiler they are the same size.
+ // This allows the loop to run without bounds checks.
+ a := dst[d : d+length]
+ b := dst[d-offset:]
+ b = b[:len(a)]
+ for i := range a {
+ a[i] = b[i]
}
+ d += length
}
if d != len(dst) {
return decodeErrCodeCorrupt
diff --git a/vendor/github.com/golang/snappy/encode.go b/vendor/github.com/golang/snappy/encode.go
index 8d393e9..7f23657 100644
--- a/vendor/github.com/golang/snappy/encode.go
+++ b/vendor/github.com/golang/snappy/encode.go
@@ -15,6 +15,8 @@
// Otherwise, a newly allocated slice will be returned.
//
// The dst and src must not overlap. It is valid to pass a nil dst.
+//
+// Encode handles the Snappy block format, not the Snappy stream format.
func Encode(dst, src []byte) []byte {
if n := MaxEncodedLen(len(src)); n < 0 {
panic(ErrTooLarge)
@@ -139,6 +141,8 @@
}
// Writer is an io.Writer that can write Snappy-compressed bytes.
+//
+// Writer handles the Snappy stream format, not the Snappy block format.
type Writer struct {
w io.Writer
err error
diff --git a/vendor/github.com/golang/snappy/encode_arm64.s b/vendor/github.com/golang/snappy/encode_arm64.s
new file mode 100644
index 0000000..bf83667
--- /dev/null
+++ b/vendor/github.com/golang/snappy/encode_arm64.s
@@ -0,0 +1,722 @@
+// Copyright 2020 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build !appengine
+// +build gc
+// +build !noasm
+
+#include "textflag.h"
+
+// The asm code generally follows the pure Go code in encode_other.go, except
+// where marked with a "!!!".
+
+// ----------------------------------------------------------------------------
+
+// func emitLiteral(dst, lit []byte) int
+//
+// All local variables fit into registers. The register allocation:
+// - R3 len(lit)
+// - R4 n
+// - R6 return value
+// - R8 &dst[i]
+// - R10 &lit[0]
+//
+// The 32 bytes of stack space is to call runtime·memmove.
+//
+// The unusual register allocation of local variables, such as R10 for the
+// source pointer, matches the allocation used at the call site in encodeBlock,
+// which makes it easier to manually inline this function.
+TEXT ·emitLiteral(SB), NOSPLIT, $32-56
+ MOVD dst_base+0(FP), R8
+ MOVD lit_base+24(FP), R10
+ MOVD lit_len+32(FP), R3
+ MOVD R3, R6
+ MOVW R3, R4
+ SUBW $1, R4, R4
+
+ CMPW $60, R4
+ BLT oneByte
+ CMPW $256, R4
+ BLT twoBytes
+
+threeBytes:
+ MOVD $0xf4, R2
+ MOVB R2, 0(R8)
+ MOVW R4, 1(R8)
+ ADD $3, R8, R8
+ ADD $3, R6, R6
+ B memmove
+
+twoBytes:
+ MOVD $0xf0, R2
+ MOVB R2, 0(R8)
+ MOVB R4, 1(R8)
+ ADD $2, R8, R8
+ ADD $2, R6, R6
+ B memmove
+
+oneByte:
+ LSLW $2, R4, R4
+ MOVB R4, 0(R8)
+ ADD $1, R8, R8
+ ADD $1, R6, R6
+
+memmove:
+ MOVD R6, ret+48(FP)
+
+ // copy(dst[i:], lit)
+ //
+ // This means calling runtime·memmove(&dst[i], &lit[0], len(lit)), so we push
+ // R8, R10 and R3 as arguments.
+ MOVD R8, 8(RSP)
+ MOVD R10, 16(RSP)
+ MOVD R3, 24(RSP)
+ CALL runtime·memmove(SB)
+ RET
+
+// ----------------------------------------------------------------------------
+
+// func emitCopy(dst []byte, offset, length int) int
+//
+// All local variables fit into registers. The register allocation:
+// - R3 length
+// - R7 &dst[0]
+// - R8 &dst[i]
+// - R11 offset
+//
+// The unusual register allocation of local variables, such as R11 for the
+// offset, matches the allocation used at the call site in encodeBlock, which
+// makes it easier to manually inline this function.
+TEXT ·emitCopy(SB), NOSPLIT, $0-48
+ MOVD dst_base+0(FP), R8
+ MOVD R8, R7
+ MOVD offset+24(FP), R11
+ MOVD length+32(FP), R3
+
+loop0:
+ // for length >= 68 { etc }
+ CMPW $68, R3
+ BLT step1
+
+ // Emit a length 64 copy, encoded as 3 bytes.
+ MOVD $0xfe, R2
+ MOVB R2, 0(R8)
+ MOVW R11, 1(R8)
+ ADD $3, R8, R8
+ SUB $64, R3, R3
+ B loop0
+
+step1:
+ // if length > 64 { etc }
+ CMP $64, R3
+ BLE step2
+
+ // Emit a length 60 copy, encoded as 3 bytes.
+ MOVD $0xee, R2
+ MOVB R2, 0(R8)
+ MOVW R11, 1(R8)
+ ADD $3, R8, R8
+ SUB $60, R3, R3
+
+step2:
+ // if length >= 12 || offset >= 2048 { goto step3 }
+ CMP $12, R3
+ BGE step3
+ CMPW $2048, R11
+ BGE step3
+
+ // Emit the remaining copy, encoded as 2 bytes.
+ MOVB R11, 1(R8)
+ LSRW $3, R11, R11
+ AND $0xe0, R11, R11
+ SUB $4, R3, R3
+ LSLW $2, R3
+ AND $0xff, R3, R3
+ ORRW R3, R11, R11
+ ORRW $1, R11, R11
+ MOVB R11, 0(R8)
+ ADD $2, R8, R8
+
+ // Return the number of bytes written.
+ SUB R7, R8, R8
+ MOVD R8, ret+40(FP)
+ RET
+
+step3:
+ // Emit the remaining copy, encoded as 3 bytes.
+ SUB $1, R3, R3
+ AND $0xff, R3, R3
+ LSLW $2, R3, R3
+ ORRW $2, R3, R3
+ MOVB R3, 0(R8)
+ MOVW R11, 1(R8)
+ ADD $3, R8, R8
+
+ // Return the number of bytes written.
+ SUB R7, R8, R8
+ MOVD R8, ret+40(FP)
+ RET
+
+// ----------------------------------------------------------------------------
+
+// func extendMatch(src []byte, i, j int) int
+//
+// All local variables fit into registers. The register allocation:
+// - R6 &src[0]
+// - R7 &src[j]
+// - R13 &src[len(src) - 8]
+// - R14 &src[len(src)]
+// - R15 &src[i]
+//
+// The unusual register allocation of local variables, such as R15 for a source
+// pointer, matches the allocation used at the call site in encodeBlock, which
+// makes it easier to manually inline this function.
+TEXT ·extendMatch(SB), NOSPLIT, $0-48
+ MOVD src_base+0(FP), R6
+ MOVD src_len+8(FP), R14
+ MOVD i+24(FP), R15
+ MOVD j+32(FP), R7
+ ADD R6, R14, R14
+ ADD R6, R15, R15
+ ADD R6, R7, R7
+ MOVD R14, R13
+ SUB $8, R13, R13
+
+cmp8:
+ // As long as we are 8 or more bytes before the end of src, we can load and
+ // compare 8 bytes at a time. If those 8 bytes are equal, repeat.
+ CMP R13, R7
+ BHI cmp1
+ MOVD (R15), R3
+ MOVD (R7), R4
+ CMP R4, R3
+ BNE bsf
+ ADD $8, R15, R15
+ ADD $8, R7, R7
+ B cmp8
+
+bsf:
+ // If those 8 bytes were not equal, XOR the two 8 byte values, and return
+ // the index of the first byte that differs.
+ // RBIT reverses the bit order, then CLZ counts the leading zeros, the
+ // combination of which finds the least significant bit which is set.
+ // The arm64 architecture is little-endian, and the shift by 3 converts
+ // a bit index to a byte index.
+ EOR R3, R4, R4
+ RBIT R4, R4
+ CLZ R4, R4
+ ADD R4>>3, R7, R7
+
+ // Convert from &src[ret] to ret.
+ SUB R6, R7, R7
+ MOVD R7, ret+40(FP)
+ RET
+
+cmp1:
+ // In src's tail, compare 1 byte at a time.
+ CMP R7, R14
+ BLS extendMatchEnd
+ MOVB (R15), R3
+ MOVB (R7), R4
+ CMP R4, R3
+ BNE extendMatchEnd
+ ADD $1, R15, R15
+ ADD $1, R7, R7
+ B cmp1
+
+extendMatchEnd:
+ // Convert from &src[ret] to ret.
+ SUB R6, R7, R7
+ MOVD R7, ret+40(FP)
+ RET
+
+// ----------------------------------------------------------------------------
+
+// func encodeBlock(dst, src []byte) (d int)
+//
+// All local variables fit into registers, other than "var table". The register
+// allocation:
+// - R3 . .
+// - R4 . .
+// - R5 64 shift
+// - R6 72 &src[0], tableSize
+// - R7 80 &src[s]
+// - R8 88 &dst[d]
+// - R9 96 sLimit
+// - R10 . &src[nextEmit]
+// - R11 104 prevHash, currHash, nextHash, offset
+// - R12 112 &src[base], skip
+// - R13 . &src[nextS], &src[len(src) - 8]
+// - R14 . len(src), bytesBetweenHashLookups, &src[len(src)], x
+// - R15 120 candidate
+// - R16 . hash constant, 0x1e35a7bd
+// - R17 . &table
+// - . 128 table
+//
+// The second column (64, 72, etc) is the stack offset to spill the registers
+// when calling other functions. We could pack this slightly tighter, but it's
+// simpler to have a dedicated spill map independent of the function called.
+//
+// "var table [maxTableSize]uint16" takes up 32768 bytes of stack space. An
+// extra 64 bytes, to call other functions, and an extra 64 bytes, to spill
+// local variables (registers) during calls gives 32768 + 64 + 64 = 32896.
+TEXT ·encodeBlock(SB), 0, $32896-56
+ MOVD dst_base+0(FP), R8
+ MOVD src_base+24(FP), R7
+ MOVD src_len+32(FP), R14
+
+ // shift, tableSize := uint32(32-8), 1<<8
+ MOVD $24, R5
+ MOVD $256, R6
+ MOVW $0xa7bd, R16
+ MOVKW $(0x1e35<<16), R16
+
+calcShift:
+ // for ; tableSize < maxTableSize && tableSize < len(src); tableSize *= 2 {
+ // shift--
+ // }
+ MOVD $16384, R2
+ CMP R2, R6
+ BGE varTable
+ CMP R14, R6
+ BGE varTable
+ SUB $1, R5, R5
+ LSL $1, R6, R6
+ B calcShift
+
+varTable:
+ // var table [maxTableSize]uint16
+ //
+ // In the asm code, unlike the Go code, we can zero-initialize only the
+ // first tableSize elements. Each uint16 element is 2 bytes and each
+ // iterations writes 64 bytes, so we can do only tableSize/32 writes
+ // instead of the 2048 writes that would zero-initialize all of table's
+ // 32768 bytes. This clear could overrun the first tableSize elements, but
+ // it won't overrun the allocated stack size.
+ ADD $128, RSP, R17
+ MOVD R17, R4
+
+ // !!! R6 = &src[tableSize]
+ ADD R6<<1, R17, R6
+
+memclr:
+ STP.P (ZR, ZR), 64(R4)
+ STP (ZR, ZR), -48(R4)
+ STP (ZR, ZR), -32(R4)
+ STP (ZR, ZR), -16(R4)
+ CMP R4, R6
+ BHI memclr
+
+ // !!! R6 = &src[0]
+ MOVD R7, R6
+
+ // sLimit := len(src) - inputMargin
+ MOVD R14, R9
+ SUB $15, R9, R9
+
+ // !!! Pre-emptively spill R5, R6 and R9 to the stack. Their values don't
+ // change for the rest of the function.
+ MOVD R5, 64(RSP)
+ MOVD R6, 72(RSP)
+ MOVD R9, 96(RSP)
+
+ // nextEmit := 0
+ MOVD R6, R10
+
+ // s := 1
+ ADD $1, R7, R7
+
+ // nextHash := hash(load32(src, s), shift)
+ MOVW 0(R7), R11
+ MULW R16, R11, R11
+ LSRW R5, R11, R11
+
+outer:
+ // for { etc }
+
+ // skip := 32
+ MOVD $32, R12
+
+ // nextS := s
+ MOVD R7, R13
+
+ // candidate := 0
+ MOVD $0, R15
+
+inner0:
+ // for { etc }
+
+ // s := nextS
+ MOVD R13, R7
+
+ // bytesBetweenHashLookups := skip >> 5
+ MOVD R12, R14
+ LSR $5, R14, R14
+
+ // nextS = s + bytesBetweenHashLookups
+ ADD R14, R13, R13
+
+ // skip += bytesBetweenHashLookups
+ ADD R14, R12, R12
+
+ // if nextS > sLimit { goto emitRemainder }
+ MOVD R13, R3
+ SUB R6, R3, R3
+ CMP R9, R3
+ BHI emitRemainder
+
+ // candidate = int(table[nextHash])
+ MOVHU 0(R17)(R11<<1), R15
+
+ // table[nextHash] = uint16(s)
+ MOVD R7, R3
+ SUB R6, R3, R3
+
+ MOVH R3, 0(R17)(R11<<1)
+
+ // nextHash = hash(load32(src, nextS), shift)
+ MOVW 0(R13), R11
+ MULW R16, R11
+ LSRW R5, R11, R11
+
+ // if load32(src, s) != load32(src, candidate) { continue } break
+ MOVW 0(R7), R3
+ MOVW (R6)(R15*1), R4
+ CMPW R4, R3
+ BNE inner0
+
+fourByteMatch:
+ // As per the encode_other.go code:
+ //
+ // A 4-byte match has been found. We'll later see etc.
+
+ // !!! Jump to a fast path for short (<= 16 byte) literals. See the comment
+ // on inputMargin in encode.go.
+ MOVD R7, R3
+ SUB R10, R3, R3
+ CMP $16, R3
+ BLE emitLiteralFastPath
+
+ // ----------------------------------------
+ // Begin inline of the emitLiteral call.
+ //
+ // d += emitLiteral(dst[d:], src[nextEmit:s])
+
+ MOVW R3, R4
+ SUBW $1, R4, R4
+
+ MOVW $60, R2
+ CMPW R2, R4
+ BLT inlineEmitLiteralOneByte
+ MOVW $256, R2
+ CMPW R2, R4
+ BLT inlineEmitLiteralTwoBytes
+
+inlineEmitLiteralThreeBytes:
+ MOVD $0xf4, R1
+ MOVB R1, 0(R8)
+ MOVW R4, 1(R8)
+ ADD $3, R8, R8
+ B inlineEmitLiteralMemmove
+
+inlineEmitLiteralTwoBytes:
+ MOVD $0xf0, R1
+ MOVB R1, 0(R8)
+ MOVB R4, 1(R8)
+ ADD $2, R8, R8
+ B inlineEmitLiteralMemmove
+
+inlineEmitLiteralOneByte:
+ LSLW $2, R4, R4
+ MOVB R4, 0(R8)
+ ADD $1, R8, R8
+
+inlineEmitLiteralMemmove:
+ // Spill local variables (registers) onto the stack; call; unspill.
+ //
+ // copy(dst[i:], lit)
+ //
+ // This means calling runtime·memmove(&dst[i], &lit[0], len(lit)), so we push
+ // R8, R10 and R3 as arguments.
+ MOVD R8, 8(RSP)
+ MOVD R10, 16(RSP)
+ MOVD R3, 24(RSP)
+
+ // Finish the "d +=" part of "d += emitLiteral(etc)".
+ ADD R3, R8, R8
+ MOVD R7, 80(RSP)
+ MOVD R8, 88(RSP)
+ MOVD R15, 120(RSP)
+ CALL runtime·memmove(SB)
+ MOVD 64(RSP), R5
+ MOVD 72(RSP), R6
+ MOVD 80(RSP), R7
+ MOVD 88(RSP), R8
+ MOVD 96(RSP), R9
+ MOVD 120(RSP), R15
+ ADD $128, RSP, R17
+ MOVW $0xa7bd, R16
+ MOVKW $(0x1e35<<16), R16
+ B inner1
+
+inlineEmitLiteralEnd:
+ // End inline of the emitLiteral call.
+ // ----------------------------------------
+
+emitLiteralFastPath:
+ // !!! Emit the 1-byte encoding "uint8(len(lit)-1)<<2".
+ MOVB R3, R4
+ SUBW $1, R4, R4
+ AND $0xff, R4, R4
+ LSLW $2, R4, R4
+ MOVB R4, (R8)
+ ADD $1, R8, R8
+
+ // !!! Implement the copy from lit to dst as a 16-byte load and store.
+ // (Encode's documentation says that dst and src must not overlap.)
+ //
+ // This always copies 16 bytes, instead of only len(lit) bytes, but that's
+ // OK. Subsequent iterations will fix up the overrun.
+ //
+ // Note that on arm64, it is legal and cheap to issue unaligned 8-byte or
+ // 16-byte loads and stores. This technique probably wouldn't be as
+ // effective on architectures that are fussier about alignment.
+ LDP 0(R10), (R0, R1)
+ STP (R0, R1), 0(R8)
+ ADD R3, R8, R8
+
+inner1:
+ // for { etc }
+
+ // base := s
+ MOVD R7, R12
+
+ // !!! offset := base - candidate
+ MOVD R12, R11
+ SUB R15, R11, R11
+ SUB R6, R11, R11
+
+ // ----------------------------------------
+ // Begin inline of the extendMatch call.
+ //
+ // s = extendMatch(src, candidate+4, s+4)
+
+ // !!! R14 = &src[len(src)]
+ MOVD src_len+32(FP), R14
+ ADD R6, R14, R14
+
+ // !!! R13 = &src[len(src) - 8]
+ MOVD R14, R13
+ SUB $8, R13, R13
+
+ // !!! R15 = &src[candidate + 4]
+ ADD $4, R15, R15
+ ADD R6, R15, R15
+
+ // !!! s += 4
+ ADD $4, R7, R7
+
+inlineExtendMatchCmp8:
+ // As long as we are 8 or more bytes before the end of src, we can load and
+ // compare 8 bytes at a time. If those 8 bytes are equal, repeat.
+ CMP R13, R7
+ BHI inlineExtendMatchCmp1
+ MOVD (R15), R3
+ MOVD (R7), R4
+ CMP R4, R3
+ BNE inlineExtendMatchBSF
+ ADD $8, R15, R15
+ ADD $8, R7, R7
+ B inlineExtendMatchCmp8
+
+inlineExtendMatchBSF:
+ // If those 8 bytes were not equal, XOR the two 8 byte values, and return
+ // the index of the first byte that differs.
+ // RBIT reverses the bit order, then CLZ counts the leading zeros, the
+ // combination of which finds the least significant bit which is set.
+ // The arm64 architecture is little-endian, and the shift by 3 converts
+ // a bit index to a byte index.
+ EOR R3, R4, R4
+ RBIT R4, R4
+ CLZ R4, R4
+ ADD R4>>3, R7, R7
+ B inlineExtendMatchEnd
+
+inlineExtendMatchCmp1:
+ // In src's tail, compare 1 byte at a time.
+ CMP R7, R14
+ BLS inlineExtendMatchEnd
+ MOVB (R15), R3
+ MOVB (R7), R4
+ CMP R4, R3
+ BNE inlineExtendMatchEnd
+ ADD $1, R15, R15
+ ADD $1, R7, R7
+ B inlineExtendMatchCmp1
+
+inlineExtendMatchEnd:
+ // End inline of the extendMatch call.
+ // ----------------------------------------
+
+ // ----------------------------------------
+ // Begin inline of the emitCopy call.
+ //
+ // d += emitCopy(dst[d:], base-candidate, s-base)
+
+ // !!! length := s - base
+ MOVD R7, R3
+ SUB R12, R3, R3
+
+inlineEmitCopyLoop0:
+ // for length >= 68 { etc }
+ MOVW $68, R2
+ CMPW R2, R3
+ BLT inlineEmitCopyStep1
+
+ // Emit a length 64 copy, encoded as 3 bytes.
+ MOVD $0xfe, R1
+ MOVB R1, 0(R8)
+ MOVW R11, 1(R8)
+ ADD $3, R8, R8
+ SUBW $64, R3, R3
+ B inlineEmitCopyLoop0
+
+inlineEmitCopyStep1:
+ // if length > 64 { etc }
+ MOVW $64, R2
+ CMPW R2, R3
+ BLE inlineEmitCopyStep2
+
+ // Emit a length 60 copy, encoded as 3 bytes.
+ MOVD $0xee, R1
+ MOVB R1, 0(R8)
+ MOVW R11, 1(R8)
+ ADD $3, R8, R8
+ SUBW $60, R3, R3
+
+inlineEmitCopyStep2:
+ // if length >= 12 || offset >= 2048 { goto inlineEmitCopyStep3 }
+ MOVW $12, R2
+ CMPW R2, R3
+ BGE inlineEmitCopyStep3
+ MOVW $2048, R2
+ CMPW R2, R11
+ BGE inlineEmitCopyStep3
+
+ // Emit the remaining copy, encoded as 2 bytes.
+ MOVB R11, 1(R8)
+ LSRW $8, R11, R11
+ LSLW $5, R11, R11
+ SUBW $4, R3, R3
+ AND $0xff, R3, R3
+ LSLW $2, R3, R3
+ ORRW R3, R11, R11
+ ORRW $1, R11, R11
+ MOVB R11, 0(R8)
+ ADD $2, R8, R8
+ B inlineEmitCopyEnd
+
+inlineEmitCopyStep3:
+ // Emit the remaining copy, encoded as 3 bytes.
+ SUBW $1, R3, R3
+ LSLW $2, R3, R3
+ ORRW $2, R3, R3
+ MOVB R3, 0(R8)
+ MOVW R11, 1(R8)
+ ADD $3, R8, R8
+
+inlineEmitCopyEnd:
+ // End inline of the emitCopy call.
+ // ----------------------------------------
+
+ // nextEmit = s
+ MOVD R7, R10
+
+ // if s >= sLimit { goto emitRemainder }
+ MOVD R7, R3
+ SUB R6, R3, R3
+ CMP R3, R9
+ BLS emitRemainder
+
+ // As per the encode_other.go code:
+ //
+ // We could immediately etc.
+
+ // x := load64(src, s-1)
+ MOVD -1(R7), R14
+
+ // prevHash := hash(uint32(x>>0), shift)
+ MOVW R14, R11
+ MULW R16, R11, R11
+ LSRW R5, R11, R11
+
+ // table[prevHash] = uint16(s-1)
+ MOVD R7, R3
+ SUB R6, R3, R3
+ SUB $1, R3, R3
+
+ MOVHU R3, 0(R17)(R11<<1)
+
+ // currHash := hash(uint32(x>>8), shift)
+ LSR $8, R14, R14
+ MOVW R14, R11
+ MULW R16, R11, R11
+ LSRW R5, R11, R11
+
+ // candidate = int(table[currHash])
+ MOVHU 0(R17)(R11<<1), R15
+
+ // table[currHash] = uint16(s)
+ ADD $1, R3, R3
+ MOVHU R3, 0(R17)(R11<<1)
+
+ // if uint32(x>>8) == load32(src, candidate) { continue }
+ MOVW (R6)(R15*1), R4
+ CMPW R4, R14
+ BEQ inner1
+
+ // nextHash = hash(uint32(x>>16), shift)
+ LSR $8, R14, R14
+ MOVW R14, R11
+ MULW R16, R11, R11
+ LSRW R5, R11, R11
+
+ // s++
+ ADD $1, R7, R7
+
+ // break out of the inner1 for loop, i.e. continue the outer loop.
+ B outer
+
+emitRemainder:
+ // if nextEmit < len(src) { etc }
+ MOVD src_len+32(FP), R3
+ ADD R6, R3, R3
+ CMP R3, R10
+ BEQ encodeBlockEnd
+
+ // d += emitLiteral(dst[d:], src[nextEmit:])
+ //
+ // Push args.
+ MOVD R8, 8(RSP)
+ MOVD $0, 16(RSP) // Unnecessary, as the callee ignores it, but conservative.
+ MOVD $0, 24(RSP) // Unnecessary, as the callee ignores it, but conservative.
+ MOVD R10, 32(RSP)
+ SUB R10, R3, R3
+ MOVD R3, 40(RSP)
+ MOVD R3, 48(RSP) // Unnecessary, as the callee ignores it, but conservative.
+
+ // Spill local variables (registers) onto the stack; call; unspill.
+ MOVD R8, 88(RSP)
+ CALL ·emitLiteral(SB)
+ MOVD 88(RSP), R8
+
+ // Finish the "d +=" part of "d += emitLiteral(etc)".
+ MOVD 56(RSP), R1
+ ADD R1, R8, R8
+
+encodeBlockEnd:
+ MOVD dst_base+0(FP), R3
+ SUB R3, R8, R8
+ MOVD R8, d+48(FP)
+ RET
diff --git a/vendor/github.com/golang/snappy/encode_amd64.go b/vendor/github.com/golang/snappy/encode_asm.go
similarity index 96%
rename from vendor/github.com/golang/snappy/encode_amd64.go
rename to vendor/github.com/golang/snappy/encode_asm.go
index 150d91b..107c1e7 100644
--- a/vendor/github.com/golang/snappy/encode_amd64.go
+++ b/vendor/github.com/golang/snappy/encode_asm.go
@@ -5,6 +5,7 @@
// +build !appengine
// +build gc
// +build !noasm
+// +build amd64 arm64
package snappy
diff --git a/vendor/github.com/golang/snappy/encode_other.go b/vendor/github.com/golang/snappy/encode_other.go
index dbcae90..296d7f0 100644
--- a/vendor/github.com/golang/snappy/encode_other.go
+++ b/vendor/github.com/golang/snappy/encode_other.go
@@ -2,7 +2,7 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
-// +build !amd64 appengine !gc noasm
+// +build !amd64,!arm64 appengine !gc noasm
package snappy