[VOL-2831] - Multiple adapter support

This commit introduces the necessary APIs needed to support
multiple adapters.  It uses the number of replicas of a given
adapter and consistent hashing to determine the target of a
given request.

The endpoint_manager.go provides two APIs that will be needed
by components communicating over kafka:
 - GetEndPoint() : to be called before sending a request to kafka
 - IsDeviceOwnedByService(): used during device reconciliation

A change is made to the adapter_proxy.go to use this new mechanism
when sending a request to an adapter from another adapter.

The mocks directory was refactored to get around circular package
dependencies.  This implies any component using these mocks will
need to adjust to the new set of directories when using this
library version.

Change-Id: I470cd62fcfd04edc1fd4508400c9619cadaab25a
diff --git a/vendor/github.com/cespare/xxhash/xxhash_amd64.s b/vendor/github.com/cespare/xxhash/xxhash_amd64.s
new file mode 100644
index 0000000..757f201
--- /dev/null
+++ b/vendor/github.com/cespare/xxhash/xxhash_amd64.s
@@ -0,0 +1,233 @@
+// +build !appengine
+// +build gc
+// +build !purego
+
+#include "textflag.h"
+
+// Register allocation:
+// AX	h
+// CX	pointer to advance through b
+// DX	n
+// BX	loop end
+// R8	v1, k1
+// R9	v2
+// R10	v3
+// R11	v4
+// R12	tmp
+// R13	prime1v
+// R14	prime2v
+// R15	prime4v
+
+// round reads from and advances the buffer pointer in CX.
+// It assumes that R13 has prime1v and R14 has prime2v.
+#define round(r) \
+	MOVQ  (CX), R12 \
+	ADDQ  $8, CX    \
+	IMULQ R14, R12  \
+	ADDQ  R12, r    \
+	ROLQ  $31, r    \
+	IMULQ R13, r
+
+// mergeRound applies a merge round on the two registers acc and val.
+// It assumes that R13 has prime1v, R14 has prime2v, and R15 has prime4v.
+#define mergeRound(acc, val) \
+	IMULQ R14, val \
+	ROLQ  $31, val \
+	IMULQ R13, val \
+	XORQ  val, acc \
+	IMULQ R13, acc \
+	ADDQ  R15, acc
+
+// func Sum64(b []byte) uint64
+TEXT ·Sum64(SB), NOSPLIT, $0-32
+	// Load fixed primes.
+	MOVQ ·prime1v(SB), R13
+	MOVQ ·prime2v(SB), R14
+	MOVQ ·prime4v(SB), R15
+
+	// Load slice.
+	MOVQ b_base+0(FP), CX
+	MOVQ b_len+8(FP), DX
+	LEAQ (CX)(DX*1), BX
+
+	// The first loop limit will be len(b)-32.
+	SUBQ $32, BX
+
+	// Check whether we have at least one block.
+	CMPQ DX, $32
+	JLT  noBlocks
+
+	// Set up initial state (v1, v2, v3, v4).
+	MOVQ R13, R8
+	ADDQ R14, R8
+	MOVQ R14, R9
+	XORQ R10, R10
+	XORQ R11, R11
+	SUBQ R13, R11
+
+	// Loop until CX > BX.
+blockLoop:
+	round(R8)
+	round(R9)
+	round(R10)
+	round(R11)
+
+	CMPQ CX, BX
+	JLE  blockLoop
+
+	MOVQ R8, AX
+	ROLQ $1, AX
+	MOVQ R9, R12
+	ROLQ $7, R12
+	ADDQ R12, AX
+	MOVQ R10, R12
+	ROLQ $12, R12
+	ADDQ R12, AX
+	MOVQ R11, R12
+	ROLQ $18, R12
+	ADDQ R12, AX
+
+	mergeRound(AX, R8)
+	mergeRound(AX, R9)
+	mergeRound(AX, R10)
+	mergeRound(AX, R11)
+
+	JMP afterBlocks
+
+noBlocks:
+	MOVQ ·prime5v(SB), AX
+
+afterBlocks:
+	ADDQ DX, AX
+
+	// Right now BX has len(b)-32, and we want to loop until CX > len(b)-8.
+	ADDQ $24, BX
+
+	CMPQ CX, BX
+	JG   fourByte
+
+wordLoop:
+	// Calculate k1.
+	MOVQ  (CX), R8
+	ADDQ  $8, CX
+	IMULQ R14, R8
+	ROLQ  $31, R8
+	IMULQ R13, R8
+
+	XORQ  R8, AX
+	ROLQ  $27, AX
+	IMULQ R13, AX
+	ADDQ  R15, AX
+
+	CMPQ CX, BX
+	JLE  wordLoop
+
+fourByte:
+	ADDQ $4, BX
+	CMPQ CX, BX
+	JG   singles
+
+	MOVL  (CX), R8
+	ADDQ  $4, CX
+	IMULQ R13, R8
+	XORQ  R8, AX
+
+	ROLQ  $23, AX
+	IMULQ R14, AX
+	ADDQ  ·prime3v(SB), AX
+
+singles:
+	ADDQ $4, BX
+	CMPQ CX, BX
+	JGE  finalize
+
+singlesLoop:
+	MOVBQZX (CX), R12
+	ADDQ    $1, CX
+	IMULQ   ·prime5v(SB), R12
+	XORQ    R12, AX
+
+	ROLQ  $11, AX
+	IMULQ R13, AX
+
+	CMPQ CX, BX
+	JL   singlesLoop
+
+finalize:
+	MOVQ  AX, R12
+	SHRQ  $33, R12
+	XORQ  R12, AX
+	IMULQ R14, AX
+	MOVQ  AX, R12
+	SHRQ  $29, R12
+	XORQ  R12, AX
+	IMULQ ·prime3v(SB), AX
+	MOVQ  AX, R12
+	SHRQ  $32, R12
+	XORQ  R12, AX
+
+	MOVQ AX, ret+24(FP)
+	RET
+
+// writeBlocks uses the same registers as above except that it uses AX to store
+// the x pointer.
+
+// func writeBlocks(x *xxh, b []byte) []byte
+TEXT ·writeBlocks(SB), NOSPLIT, $0-56
+	// Load fixed primes needed for round.
+	MOVQ ·prime1v(SB), R13
+	MOVQ ·prime2v(SB), R14
+
+	// Load slice.
+	MOVQ b_base+8(FP), CX
+	MOVQ CX, ret_base+32(FP) // initialize return base pointer; see NOTE below
+	MOVQ b_len+16(FP), DX
+	LEAQ (CX)(DX*1), BX
+	SUBQ $32, BX
+
+	// Load vN from x.
+	MOVQ x+0(FP), AX
+	MOVQ 0(AX), R8   // v1
+	MOVQ 8(AX), R9   // v2
+	MOVQ 16(AX), R10 // v3
+	MOVQ 24(AX), R11 // v4
+
+	// We don't need to check the loop condition here; this function is
+	// always called with at least one block of data to process.
+blockLoop:
+	round(R8)
+	round(R9)
+	round(R10)
+	round(R11)
+
+	CMPQ CX, BX
+	JLE  blockLoop
+
+	// Copy vN back to x.
+	MOVQ R8, 0(AX)
+	MOVQ R9, 8(AX)
+	MOVQ R10, 16(AX)
+	MOVQ R11, 24(AX)
+
+	// Construct return slice.
+	// NOTE: It's important that we don't construct a slice that has a base
+	// pointer off the end of the original slice, as in Go 1.7+ this will
+	// cause runtime crashes. (See discussion in, for example,
+	// https://github.com/golang/go/issues/16772.)
+	// Therefore, we calculate the length/cap first, and if they're zero, we
+	// keep the old base. This is what the compiler does as well if you
+	// write code like
+	//   b = b[len(b):]
+
+	// New length is 32 - (CX - BX) -> BX+32 - CX.
+	ADDQ $32, BX
+	SUBQ CX, BX
+	JZ   afterSetBase
+
+	MOVQ CX, ret_base+32(FP)
+
+afterSetBase:
+	MOVQ BX, ret_len+40(FP)
+	MOVQ BX, ret_cap+48(FP) // set cap == len
+
+	RET