VOL-1868 move simulated onu from voltha-go to voltha-simonu-adapter

Sourced from voltha-go commit 251a11c0ffe60512318a644cd6ce0dc4e12f4018

Change-Id: Iab179bc2f3dd772ed7f488d1c03d1a84ba75e874
diff --git a/vendor/github.com/pierrec/lz4/.gitignore b/vendor/github.com/pierrec/lz4/.gitignore
new file mode 100644
index 0000000..e48bab3
--- /dev/null
+++ b/vendor/github.com/pierrec/lz4/.gitignore
@@ -0,0 +1,33 @@
+# Created by https://www.gitignore.io/api/macos
+
+### macOS ###
+*.DS_Store
+.AppleDouble
+.LSOverride
+
+# Icon must end with two \r
+Icon
+
+
+# Thumbnails
+._*
+
+# Files that might appear in the root of a volume
+.DocumentRevisions-V100
+.fseventsd
+.Spotlight-V100
+.TemporaryItems
+.Trashes
+.VolumeIcon.icns
+.com.apple.timemachine.donotpresent
+
+# Directories potentially created on remote AFP share
+.AppleDB
+.AppleDesktop
+Network Trash Folder
+Temporary Items
+.apdisk
+
+# End of https://www.gitignore.io/api/macos
+
+lz4c/lz4c
diff --git a/vendor/github.com/pierrec/lz4/.travis.yml b/vendor/github.com/pierrec/lz4/.travis.yml
new file mode 100644
index 0000000..658910d
--- /dev/null
+++ b/vendor/github.com/pierrec/lz4/.travis.yml
@@ -0,0 +1,24 @@
+language: go
+
+env:
+  - GO111MODULE=off
+  - GO111MODULE=on
+
+go:
+  - 1.9.x
+  - 1.10.x
+  - 1.11.x
+  - master
+
+matrix:
+ fast_finish: true
+ allow_failures:
+   - go: master
+
+sudo: false
+
+script: 
+ - go test -v -cpu=2
+ - go test -v -cpu=2 -race
+ - go test -v -cpu=2 -tags noasm
+ - go test -v -cpu=2 -race -tags noasm
diff --git a/vendor/github.com/pierrec/lz4/LICENSE b/vendor/github.com/pierrec/lz4/LICENSE
new file mode 100644
index 0000000..bd899d8
--- /dev/null
+++ b/vendor/github.com/pierrec/lz4/LICENSE
@@ -0,0 +1,28 @@
+Copyright (c) 2015, Pierre Curto
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+* Redistributions of source code must retain the above copyright notice, this
+  list of conditions and the following disclaimer.
+
+* Redistributions in binary form must reproduce the above copyright notice,
+  this list of conditions and the following disclaimer in the documentation
+  and/or other materials provided with the distribution.
+
+* Neither the name of xxHash nor the names of its
+  contributors may be used to endorse or promote products derived from
+  this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
diff --git a/vendor/github.com/pierrec/lz4/README.md b/vendor/github.com/pierrec/lz4/README.md
new file mode 100644
index 0000000..50a10ee
--- /dev/null
+++ b/vendor/github.com/pierrec/lz4/README.md
@@ -0,0 +1,24 @@
+[![godoc](https://godoc.org/github.com/pierrec/lz4?status.png)](https://godoc.org/github.com/pierrec/lz4)
+
+# lz4
+LZ4 compression and decompression in pure Go.
+
+## Usage
+
+```go
+import "github.com/pierrec/lz4"
+```
+
+## Description
+Package lz4 implements reading and writing lz4 compressed data (a frame),
+as specified in http://fastcompression.blogspot.fr/2013/04/lz4-streaming-format-final.html.
+
+This package is **compatible with the LZ4 frame format** although the block level compression 
+and decompression functions are exposed and are fully compatible with the lz4 block format 
+definition, they are low level and should not be used directly.
+
+For a complete description of an lz4 compressed block, see:
+http://fastcompression.blogspot.fr/2011/05/lz4-explained.html
+
+See https://github.com/Cyan4973/lz4 for the reference C implementation.
+
diff --git a/vendor/github.com/pierrec/lz4/block.go b/vendor/github.com/pierrec/lz4/block.go
new file mode 100644
index 0000000..d96e0e7
--- /dev/null
+++ b/vendor/github.com/pierrec/lz4/block.go
@@ -0,0 +1,339 @@
+package lz4
+
+import (
+	"encoding/binary"
+	"errors"
+)
+
+var (
+	// ErrInvalidSourceShortBuffer is returned by UncompressBlock or CompressBLock when a compressed
+	// block is corrupted or the destination buffer is not large enough for the uncompressed data.
+	ErrInvalidSourceShortBuffer = errors.New("lz4: invalid source or destination buffer too short")
+	// ErrInvalid is returned when reading an invalid LZ4 archive.
+	ErrInvalid = errors.New("lz4: bad magic number")
+)
+
+// blockHash hashes 4 bytes into a value < winSize.
+func blockHash(x uint32) uint32 {
+	const hasher uint32 = 2654435761 // Knuth multiplicative hash.
+	return x * hasher >> hashShift
+}
+
+// CompressBlockBound returns the maximum size of a given buffer of size n, when not compressible.
+func CompressBlockBound(n int) int {
+	return n + n/255 + 16
+}
+
+// UncompressBlock uncompresses the source buffer into the destination one,
+// and returns the uncompressed size.
+//
+// The destination buffer must be sized appropriately.
+//
+// An error is returned if the source data is invalid or the destination buffer is too small.
+func UncompressBlock(src, dst []byte) (di int, err error) {
+	sn := len(src)
+	if sn == 0 {
+		return 0, nil
+	}
+
+	di = decodeBlock(dst, src)
+	if di < 0 {
+		return 0, ErrInvalidSourceShortBuffer
+	}
+	return di, nil
+}
+
+// CompressBlock compresses the source buffer into the destination one.
+// This is the fast version of LZ4 compression and also the default one.
+// The size of hashTable must be at least 64Kb.
+//
+// The size of the compressed data is returned. If it is 0 and no error, then the data is incompressible.
+//
+// An error is returned if the destination buffer is too small.
+func CompressBlock(src, dst []byte, hashTable []int) (di int, err error) {
+	defer func() {
+		if recover() != nil {
+			err = ErrInvalidSourceShortBuffer
+		}
+	}()
+
+	sn, dn := len(src)-mfLimit, len(dst)
+	if sn <= 0 || dn == 0 {
+		return 0, nil
+	}
+	var si int
+
+	// Fast scan strategy: the hash table only stores the last 4 bytes sequences.
+	// const accInit = 1 << skipStrength
+
+	anchor := si // Position of the current literals.
+	// acc := accInit // Variable step: improves performance on non-compressible data.
+
+	for si < sn {
+		// Hash the next 4 bytes (sequence)...
+		match := binary.LittleEndian.Uint32(src[si:])
+		h := blockHash(match)
+
+		ref := hashTable[h]
+		hashTable[h] = si
+		if ref >= sn { // Invalid reference (dirty hashtable).
+			si++
+			continue
+		}
+		offset := si - ref
+		if offset <= 0 || offset >= winSize || // Out of window.
+			match != binary.LittleEndian.Uint32(src[ref:]) { // Hash collision on different matches.
+			// si += acc >> skipStrength
+			// acc++
+			si++
+			continue
+		}
+
+		// Match found.
+		// acc = accInit
+		lLen := si - anchor // Literal length.
+
+		// Encode match length part 1.
+		si += minMatch
+		mLen := si // Match length has minMatch already.
+		// Find the longest match, first looking by batches of 8 bytes.
+		for si < sn && binary.LittleEndian.Uint64(src[si:]) == binary.LittleEndian.Uint64(src[si-offset:]) {
+			si += 8
+		}
+		// Then byte by byte.
+		for si < sn && src[si] == src[si-offset] {
+			si++
+		}
+
+		mLen = si - mLen
+		if mLen < 0xF {
+			dst[di] = byte(mLen)
+		} else {
+			dst[di] = 0xF
+		}
+
+		// Encode literals length.
+		if lLen < 0xF {
+			dst[di] |= byte(lLen << 4)
+		} else {
+			dst[di] |= 0xF0
+			di++
+			l := lLen - 0xF
+			for ; l >= 0xFF; l -= 0xFF {
+				dst[di] = 0xFF
+				di++
+			}
+			dst[di] = byte(l)
+		}
+		di++
+
+		// Literals.
+		copy(dst[di:di+lLen], src[anchor:anchor+lLen])
+		di += lLen + 2
+		anchor = si
+
+		// Encode offset.
+		_ = dst[di] // Bound check elimination.
+		dst[di-2], dst[di-1] = byte(offset), byte(offset>>8)
+
+		// Encode match length part 2.
+		if mLen >= 0xF {
+			for mLen -= 0xF; mLen >= 0xFF; mLen -= 0xFF {
+				dst[di] = 0xFF
+				di++
+			}
+			dst[di] = byte(mLen)
+			di++
+		}
+	}
+
+	if anchor == 0 {
+		// Incompressible.
+		return 0, nil
+	}
+
+	// Last literals.
+	lLen := len(src) - anchor
+	if lLen < 0xF {
+		dst[di] = byte(lLen << 4)
+	} else {
+		dst[di] = 0xF0
+		di++
+		for lLen -= 0xF; lLen >= 0xFF; lLen -= 0xFF {
+			dst[di] = 0xFF
+			di++
+		}
+		dst[di] = byte(lLen)
+	}
+	di++
+
+	// Write the last literals.
+	if di >= anchor {
+		// Incompressible.
+		return 0, nil
+	}
+	di += copy(dst[di:di+len(src)-anchor], src[anchor:])
+	return di, nil
+}
+
+// CompressBlockHC compresses the source buffer src into the destination dst
+// with max search depth (use 0 or negative value for no max).
+//
+// CompressBlockHC compression ratio is better than CompressBlock but it is also slower.
+//
+// The size of the compressed data is returned. If it is 0 and no error, then the data is not compressible.
+//
+// An error is returned if the destination buffer is too small.
+func CompressBlockHC(src, dst []byte, depth int) (di int, err error) {
+	defer func() {
+		if recover() != nil {
+			err = ErrInvalidSourceShortBuffer
+		}
+	}()
+
+	sn, dn := len(src)-mfLimit, len(dst)
+	if sn <= 0 || dn == 0 {
+		return 0, nil
+	}
+	var si int
+
+	// hashTable: stores the last position found for a given hash
+	// chaingTable: stores previous positions for a given hash
+	var hashTable, chainTable [winSize]int
+
+	if depth <= 0 {
+		depth = winSize
+	}
+
+	anchor := si
+	for si < sn {
+		// Hash the next 4 bytes (sequence).
+		match := binary.LittleEndian.Uint32(src[si:])
+		h := blockHash(match)
+
+		// Follow the chain until out of window and give the longest match.
+		mLen := 0
+		offset := 0
+		for next, try := hashTable[h], depth; try > 0 && next > 0 && si-next < winSize; next = chainTable[next&winMask] {
+			// The first (mLen==0) or next byte (mLen>=minMatch) at current match length
+			// must match to improve on the match length.
+			if src[next+mLen] != src[si+mLen] {
+				continue
+			}
+			ml := 0
+			// Compare the current position with a previous with the same hash.
+			for ml < sn-si && binary.LittleEndian.Uint64(src[next+ml:]) == binary.LittleEndian.Uint64(src[si+ml:]) {
+				ml += 8
+			}
+			for ml < sn-si && src[next+ml] == src[si+ml] {
+				ml++
+			}
+			if ml < minMatch || ml <= mLen {
+				// Match too small (<minMath) or smaller than the current match.
+				continue
+			}
+			// Found a longer match, keep its position and length.
+			mLen = ml
+			offset = si - next
+			// Try another previous position with the same hash.
+			try--
+		}
+		chainTable[si&winMask] = hashTable[h]
+		hashTable[h] = si
+
+		// No match found.
+		if mLen == 0 {
+			si++
+			continue
+		}
+
+		// Match found.
+		// Update hash/chain tables with overlapping bytes:
+		// si already hashed, add everything from si+1 up to the match length.
+		winStart := si + 1
+		if ws := si + mLen - winSize; ws > winStart {
+			winStart = ws
+		}
+		for si, ml := winStart, si+mLen; si < ml; {
+			match >>= 8
+			match |= uint32(src[si+3]) << 24
+			h := blockHash(match)
+			chainTable[si&winMask] = hashTable[h]
+			hashTable[h] = si
+			si++
+		}
+
+		lLen := si - anchor
+		si += mLen
+		mLen -= minMatch // Match length does not include minMatch.
+
+		if mLen < 0xF {
+			dst[di] = byte(mLen)
+		} else {
+			dst[di] = 0xF
+		}
+
+		// Encode literals length.
+		if lLen < 0xF {
+			dst[di] |= byte(lLen << 4)
+		} else {
+			dst[di] |= 0xF0
+			di++
+			l := lLen - 0xF
+			for ; l >= 0xFF; l -= 0xFF {
+				dst[di] = 0xFF
+				di++
+			}
+			dst[di] = byte(l)
+		}
+		di++
+
+		// Literals.
+		copy(dst[di:di+lLen], src[anchor:anchor+lLen])
+		di += lLen
+		anchor = si
+
+		// Encode offset.
+		di += 2
+		dst[di-2], dst[di-1] = byte(offset), byte(offset>>8)
+
+		// Encode match length part 2.
+		if mLen >= 0xF {
+			for mLen -= 0xF; mLen >= 0xFF; mLen -= 0xFF {
+				dst[di] = 0xFF
+				di++
+			}
+			dst[di] = byte(mLen)
+			di++
+		}
+	}
+
+	if anchor == 0 {
+		// Incompressible.
+		return 0, nil
+	}
+
+	// Last literals.
+	lLen := len(src) - anchor
+	if lLen < 0xF {
+		dst[di] = byte(lLen << 4)
+	} else {
+		dst[di] = 0xF0
+		di++
+		lLen -= 0xF
+		for ; lLen >= 0xFF; lLen -= 0xFF {
+			dst[di] = 0xFF
+			di++
+		}
+		dst[di] = byte(lLen)
+	}
+	di++
+
+	// Write the last literals.
+	if di >= anchor {
+		// Incompressible.
+		return 0, nil
+	}
+	di += copy(dst[di:di+len(src)-anchor], src[anchor:])
+	return di, nil
+}
diff --git a/vendor/github.com/pierrec/lz4/debug.go b/vendor/github.com/pierrec/lz4/debug.go
new file mode 100644
index 0000000..bc5e78d
--- /dev/null
+++ b/vendor/github.com/pierrec/lz4/debug.go
@@ -0,0 +1,23 @@
+// +build lz4debug
+
+package lz4
+
+import (
+	"fmt"
+	"os"
+	"path/filepath"
+	"runtime"
+)
+
+const debugFlag = true
+
+func debug(args ...interface{}) {
+	_, file, line, _ := runtime.Caller(1)
+	file = filepath.Base(file)
+
+	f := fmt.Sprintf("LZ4: %s:%d %s", file, line, args[0])
+	if f[len(f)-1] != '\n' {
+		f += "\n"
+	}
+	fmt.Fprintf(os.Stderr, f, args[1:]...)
+}
diff --git a/vendor/github.com/pierrec/lz4/debug_stub.go b/vendor/github.com/pierrec/lz4/debug_stub.go
new file mode 100644
index 0000000..44211ad
--- /dev/null
+++ b/vendor/github.com/pierrec/lz4/debug_stub.go
@@ -0,0 +1,7 @@
+// +build !lz4debug
+
+package lz4
+
+const debugFlag = false
+
+func debug(args ...interface{}) {}
diff --git a/vendor/github.com/pierrec/lz4/decode_amd64.go b/vendor/github.com/pierrec/lz4/decode_amd64.go
new file mode 100644
index 0000000..43cc14f
--- /dev/null
+++ b/vendor/github.com/pierrec/lz4/decode_amd64.go
@@ -0,0 +1,8 @@
+// +build !appengine
+// +build gc
+// +build !noasm
+
+package lz4
+
+//go:noescape
+func decodeBlock(dst, src []byte) int
diff --git a/vendor/github.com/pierrec/lz4/decode_amd64.s b/vendor/github.com/pierrec/lz4/decode_amd64.s
new file mode 100644
index 0000000..20fef39
--- /dev/null
+++ b/vendor/github.com/pierrec/lz4/decode_amd64.s
@@ -0,0 +1,375 @@
+// +build !appengine
+// +build gc
+// +build !noasm
+
+#include "textflag.h"
+
+// AX scratch
+// BX scratch
+// CX scratch
+// DX token
+//
+// DI &dst
+// SI &src
+// R8 &dst + len(dst)
+// R9 &src + len(src)
+// R11 &dst
+// R12 short output end
+// R13 short input end
+// func decodeBlock(dst, src []byte) int
+// using 50 bytes of stack currently
+TEXT ·decodeBlock(SB), NOSPLIT, $64-56
+	MOVQ dst_base+0(FP), DI
+	MOVQ DI, R11
+	MOVQ dst_len+8(FP), R8
+	ADDQ DI, R8
+
+	MOVQ src_base+24(FP), SI
+	MOVQ src_len+32(FP), R9
+	ADDQ SI, R9
+
+	// shortcut ends
+	// short output end
+	MOVQ R8, R12
+	SUBQ $32, R12
+	// short input end
+	MOVQ R9, R13
+	SUBQ $16, R13
+
+loop:
+	// for si < len(src)
+	CMPQ SI, R9
+	JGE end
+
+	// token := uint32(src[si])
+	MOVBQZX (SI), DX
+	INCQ SI
+
+	// lit_len = token >> 4
+	// if lit_len > 0
+	// CX = lit_len
+	MOVQ DX, CX
+	SHRQ $4, CX
+
+	// if lit_len != 0xF
+	CMPQ CX, $0xF
+	JEQ lit_len_loop_pre
+	CMPQ DI, R12
+	JGE lit_len_loop_pre
+	CMPQ SI, R13
+	JGE lit_len_loop_pre
+
+	// copy shortcut
+
+	// A two-stage shortcut for the most common case:
+	// 1) If the literal length is 0..14, and there is enough space,
+	// enter the shortcut and copy 16 bytes on behalf of the literals
+	// (in the fast mode, only 8 bytes can be safely copied this way).
+	// 2) Further if the match length is 4..18, copy 18 bytes in a similar
+	// manner; but we ensure that there's enough space in the output for
+	// those 18 bytes earlier, upon entering the shortcut (in other words,
+	// there is a combined check for both stages).
+
+	// copy literal
+	MOVOU (SI), X0
+	MOVOU X0, (DI)
+	ADDQ CX, DI
+	ADDQ CX, SI
+
+	MOVQ DX, CX
+	ANDQ $0xF, CX
+
+	// The second stage: prepare for match copying, decode full info.
+	// If it doesn't work out, the info won't be wasted.
+	// offset := uint16(data[:2])
+	MOVWQZX (SI), DX
+	ADDQ $2, SI
+
+	MOVQ DI, AX
+	SUBQ DX, AX
+	CMPQ AX, DI
+	JGT err_short_buf
+
+	// if we can't do the second stage then jump straight to read the
+	// match length, we already have the offset.
+	CMPQ CX, $0xF
+	JEQ match_len_loop_pre
+	CMPQ DX, $8
+	JLT match_len_loop_pre
+	CMPQ AX, R11
+	JLT err_short_buf
+
+	// memcpy(op + 0, match + 0, 8);
+	MOVQ (AX), BX
+	MOVQ BX, (DI)
+	// memcpy(op + 8, match + 8, 8);
+	MOVQ 8(AX), BX
+	MOVQ BX, 8(DI)
+	// memcpy(op +16, match +16, 2);
+	MOVW 16(AX), BX
+	MOVW BX, 16(DI)
+
+	ADDQ $4, DI // minmatch
+	ADDQ CX, DI
+
+	// shortcut complete, load next token
+	JMP loop
+
+lit_len_loop_pre:
+	// if lit_len > 0
+	CMPQ CX, $0
+	JEQ offset
+	CMPQ CX, $0xF
+	JNE copy_literal
+
+lit_len_loop:
+	// for src[si] == 0xFF
+	CMPB (SI), $0xFF
+	JNE lit_len_finalise
+
+	// bounds check src[si+1]
+	MOVQ SI, AX
+	ADDQ $1, AX
+	CMPQ AX, R9
+	JGT err_short_buf
+
+	// lit_len += 0xFF
+	ADDQ $0xFF, CX
+	INCQ SI
+	JMP lit_len_loop
+
+lit_len_finalise:
+	// lit_len += int(src[si])
+	// si++
+	MOVBQZX (SI), AX
+	ADDQ AX, CX
+	INCQ SI
+
+copy_literal:
+	// bounds check src and dst
+	MOVQ SI, AX
+	ADDQ CX, AX
+	CMPQ AX, R9
+	JGT err_short_buf
+
+	MOVQ DI, AX
+	ADDQ CX, AX
+	CMPQ AX, R8
+	JGT err_short_buf
+
+	// whats a good cut off to call memmove?
+	CMPQ CX, $16
+	JGT memmove_lit
+
+	// if len(dst[di:]) < 16
+	MOVQ R8, AX
+	SUBQ DI, AX
+	CMPQ AX, $16
+	JLT memmove_lit
+
+	// if len(src[si:]) < 16
+	MOVQ R9, AX
+	SUBQ SI, AX
+	CMPQ AX, $16
+	JLT memmove_lit
+
+	MOVOU (SI), X0
+	MOVOU X0, (DI)
+
+	JMP finish_lit_copy
+
+memmove_lit:
+	// memmove(to, from, len)
+	MOVQ DI, 0(SP)
+	MOVQ SI, 8(SP)
+	MOVQ CX, 16(SP)
+	// spill
+	MOVQ DI, 24(SP)
+	MOVQ SI, 32(SP)
+	MOVQ CX, 40(SP) // need len to inc SI, DI after
+	MOVB DX, 48(SP)
+	CALL runtime·memmove(SB)
+
+	// restore registers
+	MOVQ 24(SP), DI
+	MOVQ 32(SP), SI
+	MOVQ 40(SP), CX
+	MOVB 48(SP), DX
+
+	// recalc initial values
+	MOVQ dst_base+0(FP), R8
+	MOVQ R8, R11
+	ADDQ dst_len+8(FP), R8
+	MOVQ src_base+24(FP), R9
+	ADDQ src_len+32(FP), R9
+	MOVQ R8, R12
+	SUBQ $32, R12
+	MOVQ R9, R13
+	SUBQ $16, R13
+
+finish_lit_copy:
+	ADDQ CX, SI
+	ADDQ CX, DI
+
+	CMPQ SI, R9
+	JGE end
+
+offset:
+	// CX := mLen
+	// free up DX to use for offset
+	MOVQ DX, CX
+
+	MOVQ SI, AX
+	ADDQ $2, AX
+	CMPQ AX, R9
+	JGT err_short_buf
+
+	// offset
+	// DX := int(src[si]) | int(src[si+1])<<8
+	MOVWQZX (SI), DX
+	ADDQ $2, SI
+
+	// 0 offset is invalid
+	CMPQ DX, $0
+	JEQ err_corrupt
+
+	ANDB $0xF, CX
+
+match_len_loop_pre:
+	// if mlen != 0xF
+	CMPB CX, $0xF
+	JNE copy_match
+
+match_len_loop:
+	// for src[si] == 0xFF
+	// lit_len += 0xFF
+	CMPB (SI), $0xFF
+	JNE match_len_finalise
+
+	// bounds check src[si+1]
+	MOVQ SI, AX
+	ADDQ $1, AX
+	CMPQ AX, R9
+	JGT err_short_buf
+
+	ADDQ $0xFF, CX
+	INCQ SI
+	JMP match_len_loop
+
+match_len_finalise:
+	// lit_len += int(src[si])
+	// si++
+	MOVBQZX (SI), AX
+	ADDQ AX, CX
+	INCQ SI
+
+copy_match:
+	// mLen += minMatch
+	ADDQ $4, CX
+
+	// check we have match_len bytes left in dst
+	// di+match_len < len(dst)
+	MOVQ DI, AX
+	ADDQ CX, AX
+	CMPQ AX, R8
+	JGT err_short_buf
+
+	// DX = offset
+	// CX = match_len
+	// BX = &dst + (di - offset)
+	MOVQ DI, BX
+	SUBQ DX, BX
+
+	// check BX is within dst
+	// if BX < &dst
+	CMPQ BX, R11
+	JLT err_short_buf
+
+	// if offset + match_len < di
+	MOVQ BX, AX
+	ADDQ CX, AX
+	CMPQ DI, AX
+	JGT copy_interior_match
+
+	// AX := len(dst[:di])
+	// MOVQ DI, AX
+	// SUBQ R11, AX
+
+	// copy 16 bytes at a time
+	// if di-offset < 16 copy 16-(di-offset) bytes to di
+	// then do the remaining
+
+copy_match_loop:
+	// for match_len >= 0
+	// dst[di] = dst[i]
+	// di++
+	// i++
+	MOVB (BX), AX
+	MOVB AX, (DI)
+	INCQ DI
+	INCQ BX
+	DECQ CX
+
+	CMPQ CX, $0
+	JGT copy_match_loop
+
+	JMP loop
+
+copy_interior_match:
+	CMPQ CX, $16
+	JGT memmove_match
+
+	// if len(dst[di:]) < 16
+	MOVQ R8, AX
+	SUBQ DI, AX
+	CMPQ AX, $16
+	JLT memmove_match
+
+	MOVOU (BX), X0
+	MOVOU X0, (DI)
+
+	ADDQ CX, DI
+	JMP loop
+
+memmove_match:
+	// memmove(to, from, len)
+	MOVQ DI, 0(SP)
+	MOVQ BX, 8(SP)
+	MOVQ CX, 16(SP)
+	// spill
+	MOVQ DI, 24(SP)
+	MOVQ SI, 32(SP)
+	MOVQ CX, 40(SP) // need len to inc SI, DI after
+	CALL runtime·memmove(SB)
+
+	// restore registers
+	MOVQ 24(SP), DI
+	MOVQ 32(SP), SI
+	MOVQ 40(SP), CX
+
+	// recalc initial values
+	MOVQ dst_base+0(FP), R8
+	MOVQ R8, R11 // TODO: make these sensible numbers
+	ADDQ dst_len+8(FP), R8
+	MOVQ src_base+24(FP), R9
+	ADDQ src_len+32(FP), R9
+	MOVQ R8, R12
+	SUBQ $32, R12
+	MOVQ R9, R13
+	SUBQ $16, R13
+
+	ADDQ CX, DI
+	JMP loop
+
+err_corrupt:
+	MOVQ $-1, ret+48(FP)
+	RET
+
+err_short_buf:
+	MOVQ $-2, ret+48(FP)
+	RET
+
+end:
+	SUBQ R11, DI
+	MOVQ DI, ret+48(FP)
+	RET
diff --git a/vendor/github.com/pierrec/lz4/decode_other.go b/vendor/github.com/pierrec/lz4/decode_other.go
new file mode 100644
index 0000000..b83a19a
--- /dev/null
+++ b/vendor/github.com/pierrec/lz4/decode_other.go
@@ -0,0 +1,95 @@
+// +build !amd64 appengine !gc noasm
+
+package lz4
+
+func decodeBlock(dst, src []byte) (ret int) {
+	defer func() {
+		// It is now faster to let the runtime panic and recover on out of bound slice access
+		// than checking indices as we go along.
+		if recover() != nil {
+			ret = -2
+		}
+	}()
+
+	var si, di int
+	for {
+		// Literals and match lengths (token).
+		b := int(src[si])
+		si++
+
+		// Literals.
+		if lLen := b >> 4; lLen > 0 {
+			switch {
+			case lLen < 0xF && di+18 < len(dst) && si+16 < len(src):
+				// Shortcut 1
+				// if we have enough room in src and dst, and the literals length
+				// is small enough (0..14) then copy all 16 bytes, even if not all
+				// are part of the literals.
+				copy(dst[di:], src[si:si+16])
+				si += lLen
+				di += lLen
+				if mLen := b & 0xF; mLen < 0xF {
+					// Shortcut 2
+					// if the match length (4..18) fits within the literals, then copy
+					// all 18 bytes, even if not all are part of the literals.
+					mLen += 4
+					if offset := int(src[si]) | int(src[si+1])<<8; mLen <= offset {
+						i := di - offset
+						copy(dst[di:], dst[i:i+18])
+						si += 2
+						di += mLen
+						continue
+					}
+				}
+			case lLen == 0xF:
+				for src[si] == 0xFF {
+					lLen += 0xFF
+					si++
+				}
+				lLen += int(src[si])
+				si++
+				fallthrough
+			default:
+				copy(dst[di:di+lLen], src[si:si+lLen])
+				si += lLen
+				di += lLen
+			}
+		}
+		if si >= len(src) {
+			return di
+		}
+
+		offset := int(src[si]) | int(src[si+1])<<8
+		if offset == 0 {
+			return -2
+		}
+		si += 2
+
+		// Match.
+		mLen := b & 0xF
+		if mLen == 0xF {
+			for src[si] == 0xFF {
+				mLen += 0xFF
+				si++
+			}
+			mLen += int(src[si])
+			si++
+		}
+		mLen += minMatch
+
+		// Copy the match.
+		expanded := dst[di-offset:]
+		if mLen > offset {
+			// Efficiently copy the match dst[di-offset:di] into the dst slice.
+			bytesToCopy := offset * (mLen / offset)
+			for n := offset; n <= bytesToCopy+offset; n *= 2 {
+				copy(expanded[n:], expanded[:n])
+			}
+			di += bytesToCopy
+			mLen -= bytesToCopy
+		}
+		di += copy(dst[di:di+mLen], expanded[:mLen])
+	}
+
+	return di
+}
diff --git a/vendor/github.com/pierrec/lz4/go.mod b/vendor/github.com/pierrec/lz4/go.mod
new file mode 100644
index 0000000..f9f570a
--- /dev/null
+++ b/vendor/github.com/pierrec/lz4/go.mod
@@ -0,0 +1,3 @@
+module github.com/pierrec/lz4
+
+require github.com/pkg/profile v1.2.1
diff --git a/vendor/github.com/pierrec/lz4/go.sum b/vendor/github.com/pierrec/lz4/go.sum
new file mode 100644
index 0000000..6ca7598
--- /dev/null
+++ b/vendor/github.com/pierrec/lz4/go.sum
@@ -0,0 +1,2 @@
+github.com/pkg/profile v1.2.1 h1:F++O52m40owAmADcojzM+9gyjmMOY/T4oYJkgFDH8RE=
+github.com/pkg/profile v1.2.1/go.mod h1:hJw3o1OdXxsrSjjVksARp5W95eeEaEfptyVZyv6JUPA=
diff --git a/vendor/github.com/pierrec/lz4/internal/xxh32/xxh32zero.go b/vendor/github.com/pierrec/lz4/internal/xxh32/xxh32zero.go
new file mode 100644
index 0000000..850a6fd
--- /dev/null
+++ b/vendor/github.com/pierrec/lz4/internal/xxh32/xxh32zero.go
@@ -0,0 +1,222 @@
+// Package xxh32 implements the very fast XXH hashing algorithm (32 bits version).
+// (https://github.com/Cyan4973/XXH/)
+package xxh32
+
+import (
+	"encoding/binary"
+)
+
+const (
+	prime32_1 uint32 = 2654435761
+	prime32_2 uint32 = 2246822519
+	prime32_3 uint32 = 3266489917
+	prime32_4 uint32 = 668265263
+	prime32_5 uint32 = 374761393
+
+	prime32_1plus2 uint32 = 606290984
+	prime32_minus1 uint32 = 1640531535
+)
+
+// XXHZero represents an xxhash32 object with seed 0.
+type XXHZero struct {
+	v1       uint32
+	v2       uint32
+	v3       uint32
+	v4       uint32
+	totalLen uint64
+	buf      [16]byte
+	bufused  int
+}
+
+// Sum appends the current hash to b and returns the resulting slice.
+// It does not change the underlying hash state.
+func (xxh XXHZero) Sum(b []byte) []byte {
+	h32 := xxh.Sum32()
+	return append(b, byte(h32), byte(h32>>8), byte(h32>>16), byte(h32>>24))
+}
+
+// Reset resets the Hash to its initial state.
+func (xxh *XXHZero) Reset() {
+	xxh.v1 = prime32_1plus2
+	xxh.v2 = prime32_2
+	xxh.v3 = 0
+	xxh.v4 = prime32_minus1
+	xxh.totalLen = 0
+	xxh.bufused = 0
+}
+
+// Size returns the number of bytes returned by Sum().
+func (xxh *XXHZero) Size() int {
+	return 4
+}
+
+// BlockSize gives the minimum number of bytes accepted by Write().
+func (xxh *XXHZero) BlockSize() int {
+	return 1
+}
+
+// Write adds input bytes to the Hash.
+// It never returns an error.
+func (xxh *XXHZero) Write(input []byte) (int, error) {
+	if xxh.totalLen == 0 {
+		xxh.Reset()
+	}
+	n := len(input)
+	m := xxh.bufused
+
+	xxh.totalLen += uint64(n)
+
+	r := len(xxh.buf) - m
+	if n < r {
+		copy(xxh.buf[m:], input)
+		xxh.bufused += len(input)
+		return n, nil
+	}
+
+	p := 0
+	// Causes compiler to work directly from registers instead of stack:
+	v1, v2, v3, v4 := xxh.v1, xxh.v2, xxh.v3, xxh.v4
+	if m > 0 {
+		// some data left from previous update
+		copy(xxh.buf[xxh.bufused:], input[:r])
+		xxh.bufused += len(input) - r
+
+		// fast rotl(13)
+		buf := xxh.buf[:16] // BCE hint.
+		v1 = rol13(v1+binary.LittleEndian.Uint32(buf[:])*prime32_2) * prime32_1
+		v2 = rol13(v2+binary.LittleEndian.Uint32(buf[4:])*prime32_2) * prime32_1
+		v3 = rol13(v3+binary.LittleEndian.Uint32(buf[8:])*prime32_2) * prime32_1
+		v4 = rol13(v4+binary.LittleEndian.Uint32(buf[12:])*prime32_2) * prime32_1
+		p = r
+		xxh.bufused = 0
+	}
+
+	for n := n - 16; p <= n; p += 16 {
+		sub := input[p:][:16] //BCE hint for compiler
+		v1 = rol13(v1+binary.LittleEndian.Uint32(sub[:])*prime32_2) * prime32_1
+		v2 = rol13(v2+binary.LittleEndian.Uint32(sub[4:])*prime32_2) * prime32_1
+		v3 = rol13(v3+binary.LittleEndian.Uint32(sub[8:])*prime32_2) * prime32_1
+		v4 = rol13(v4+binary.LittleEndian.Uint32(sub[12:])*prime32_2) * prime32_1
+	}
+	xxh.v1, xxh.v2, xxh.v3, xxh.v4 = v1, v2, v3, v4
+
+	copy(xxh.buf[xxh.bufused:], input[p:])
+	xxh.bufused += len(input) - p
+
+	return n, nil
+}
+
+// Sum32 returns the 32 bits Hash value.
+func (xxh *XXHZero) Sum32() uint32 {
+	h32 := uint32(xxh.totalLen)
+	if h32 >= 16 {
+		h32 += rol1(xxh.v1) + rol7(xxh.v2) + rol12(xxh.v3) + rol18(xxh.v4)
+	} else {
+		h32 += prime32_5
+	}
+
+	p := 0
+	n := xxh.bufused
+	buf := xxh.buf
+	for n := n - 4; p <= n; p += 4 {
+		h32 += binary.LittleEndian.Uint32(buf[p:p+4]) * prime32_3
+		h32 = rol17(h32) * prime32_4
+	}
+	for ; p < n; p++ {
+		h32 += uint32(buf[p]) * prime32_5
+		h32 = rol11(h32) * prime32_1
+	}
+
+	h32 ^= h32 >> 15
+	h32 *= prime32_2
+	h32 ^= h32 >> 13
+	h32 *= prime32_3
+	h32 ^= h32 >> 16
+
+	return h32
+}
+
+// ChecksumZero returns the 32bits Hash value.
+func ChecksumZero(input []byte) uint32 {
+	n := len(input)
+	h32 := uint32(n)
+
+	if n < 16 {
+		h32 += prime32_5
+	} else {
+		v1 := prime32_1plus2
+		v2 := prime32_2
+		v3 := uint32(0)
+		v4 := prime32_minus1
+		p := 0
+		for n := n - 16; p <= n; p += 16 {
+			sub := input[p:][:16] //BCE hint for compiler
+			v1 = rol13(v1+binary.LittleEndian.Uint32(sub[:])*prime32_2) * prime32_1
+			v2 = rol13(v2+binary.LittleEndian.Uint32(sub[4:])*prime32_2) * prime32_1
+			v3 = rol13(v3+binary.LittleEndian.Uint32(sub[8:])*prime32_2) * prime32_1
+			v4 = rol13(v4+binary.LittleEndian.Uint32(sub[12:])*prime32_2) * prime32_1
+		}
+		input = input[p:]
+		n -= p
+		h32 += rol1(v1) + rol7(v2) + rol12(v3) + rol18(v4)
+	}
+
+	p := 0
+	for n := n - 4; p <= n; p += 4 {
+		h32 += binary.LittleEndian.Uint32(input[p:p+4]) * prime32_3
+		h32 = rol17(h32) * prime32_4
+	}
+	for p < n {
+		h32 += uint32(input[p]) * prime32_5
+		h32 = rol11(h32) * prime32_1
+		p++
+	}
+
+	h32 ^= h32 >> 15
+	h32 *= prime32_2
+	h32 ^= h32 >> 13
+	h32 *= prime32_3
+	h32 ^= h32 >> 16
+
+	return h32
+}
+
+// Uint32Zero hashes x with seed 0.
+func Uint32Zero(x uint32) uint32 {
+	h := prime32_5 + 4 + x*prime32_3
+	h = rol17(h) * prime32_4
+	h ^= h >> 15
+	h *= prime32_2
+	h ^= h >> 13
+	h *= prime32_3
+	h ^= h >> 16
+	return h
+}
+
+func rol1(u uint32) uint32 {
+	return u<<1 | u>>31
+}
+
+func rol7(u uint32) uint32 {
+	return u<<7 | u>>25
+}
+
+func rol11(u uint32) uint32 {
+	return u<<11 | u>>21
+}
+
+func rol12(u uint32) uint32 {
+	return u<<12 | u>>20
+}
+
+func rol13(u uint32) uint32 {
+	return u<<13 | u>>19
+}
+
+func rol17(u uint32) uint32 {
+	return u<<17 | u>>15
+}
+
+func rol18(u uint32) uint32 {
+	return u<<18 | u>>14
+}
diff --git a/vendor/github.com/pierrec/lz4/lz4.go b/vendor/github.com/pierrec/lz4/lz4.go
new file mode 100644
index 0000000..3580275
--- /dev/null
+++ b/vendor/github.com/pierrec/lz4/lz4.go
@@ -0,0 +1,68 @@
+// Package lz4 implements reading and writing lz4 compressed data (a frame),
+// as specified in http://fastcompression.blogspot.fr/2013/04/lz4-streaming-format-final.html.
+//
+// Although the block level compression and decompression functions are exposed and are fully compatible
+// with the lz4 block format definition, they are low level and should not be used directly.
+// For a complete description of an lz4 compressed block, see:
+// http://fastcompression.blogspot.fr/2011/05/lz4-explained.html
+//
+// See https://github.com/Cyan4973/lz4 for the reference C implementation.
+//
+package lz4
+
+const (
+	// Extension is the LZ4 frame file name extension
+	Extension = ".lz4"
+	// Version is the LZ4 frame format version
+	Version = 1
+
+	frameMagic     uint32 = 0x184D2204
+	frameSkipMagic uint32 = 0x184D2A50
+
+	// The following constants are used to setup the compression algorithm.
+	minMatch            = 4  // the minimum size of the match sequence size (4 bytes)
+	winSizeLog          = 16 // LZ4 64Kb window size limit
+	winSize             = 1 << winSizeLog
+	winMask             = winSize - 1 // 64Kb window of previous data for dependent blocks
+	compressedBlockFlag = 1 << 31
+	compressedBlockMask = compressedBlockFlag - 1
+
+	// hashLog determines the size of the hash table used to quickly find a previous match position.
+	// Its value influences the compression speed and memory usage, the lower the faster,
+	// but at the expense of the compression ratio.
+	// 16 seems to be the best compromise.
+	hashLog       = 16
+	hashTableSize = 1 << hashLog
+	hashShift     = uint((minMatch * 8) - hashLog)
+
+	mfLimit      = 8 + minMatch // The last match cannot start within the last 12 bytes.
+	skipStrength = 6            // variable step for fast scan
+)
+
+// map the block max size id with its value in bytes: 64Kb, 256Kb, 1Mb and 4Mb.
+var (
+	bsMapID    = map[byte]int{4: 64 << 10, 5: 256 << 10, 6: 1 << 20, 7: 4 << 20}
+	bsMapValue = make(map[int]byte, len(bsMapID))
+)
+
+// Reversed.
+func init() {
+	for i, v := range bsMapID {
+		bsMapValue[v] = i
+	}
+}
+
+// Header describes the various flags that can be set on a Writer or obtained from a Reader.
+// The default values match those of the LZ4 frame format definition
+// (http://fastcompression.blogspot.com/2013/04/lz4-streaming-format-final.html).
+//
+// NB. in a Reader, in case of concatenated frames, the Header values may change between Read() calls.
+// It is the caller responsibility to check them if necessary.
+type Header struct {
+	BlockChecksum    bool   // Compressed blocks checksum flag.
+	NoChecksum       bool   // Frame checksum flag.
+	BlockMaxSize     int    // Size of the uncompressed data block (one of [64KB, 256KB, 1MB, 4MB]). Default=4MB.
+	Size             uint64 // Frame total size. It is _not_ computed by the Writer.
+	CompressionLevel int    // Compression level (higher is better, use 0 for fastest compression).
+	done             bool   // Header processed flag (Read or Write and checked).
+}
diff --git a/vendor/github.com/pierrec/lz4/lz4_go1.10.go b/vendor/github.com/pierrec/lz4/lz4_go1.10.go
new file mode 100644
index 0000000..9a0fb00
--- /dev/null
+++ b/vendor/github.com/pierrec/lz4/lz4_go1.10.go
@@ -0,0 +1,29 @@
+//+build go1.10
+
+package lz4
+
+import (
+	"fmt"
+	"strings"
+)
+
+func (h Header) String() string {
+	var s strings.Builder
+
+	s.WriteString(fmt.Sprintf("%T{", h))
+	if h.BlockChecksum {
+		s.WriteString("BlockChecksum: true ")
+	}
+	if h.NoChecksum {
+		s.WriteString("NoChecksum: true ")
+	}
+	if bs := h.BlockMaxSize; bs != 0 && bs != 4<<20 {
+		s.WriteString(fmt.Sprintf("BlockMaxSize: %d ", bs))
+	}
+	if l := h.CompressionLevel; l != 0 {
+		s.WriteString(fmt.Sprintf("CompressionLevel: %d ", l))
+	}
+	s.WriteByte('}')
+
+	return s.String()
+}
diff --git a/vendor/github.com/pierrec/lz4/lz4_notgo1.10.go b/vendor/github.com/pierrec/lz4/lz4_notgo1.10.go
new file mode 100644
index 0000000..12c761a
--- /dev/null
+++ b/vendor/github.com/pierrec/lz4/lz4_notgo1.10.go
@@ -0,0 +1,29 @@
+//+build !go1.10
+
+package lz4
+
+import (
+	"bytes"
+	"fmt"
+)
+
+func (h Header) String() string {
+	var s bytes.Buffer
+
+	s.WriteString(fmt.Sprintf("%T{", h))
+	if h.BlockChecksum {
+		s.WriteString("BlockChecksum: true ")
+	}
+	if h.NoChecksum {
+		s.WriteString("NoChecksum: true ")
+	}
+	if bs := h.BlockMaxSize; bs != 0 && bs != 4<<20 {
+		s.WriteString(fmt.Sprintf("BlockMaxSize: %d ", bs))
+	}
+	if l := h.CompressionLevel; l != 0 {
+		s.WriteString(fmt.Sprintf("CompressionLevel: %d ", l))
+	}
+	s.WriteByte('}')
+
+	return s.String()
+}
diff --git a/vendor/github.com/pierrec/lz4/reader.go b/vendor/github.com/pierrec/lz4/reader.go
new file mode 100644
index 0000000..f08db47
--- /dev/null
+++ b/vendor/github.com/pierrec/lz4/reader.go
@@ -0,0 +1,295 @@
+package lz4
+
+import (
+	"encoding/binary"
+	"fmt"
+	"io"
+	"io/ioutil"
+
+	"github.com/pierrec/lz4/internal/xxh32"
+)
+
+// Reader implements the LZ4 frame decoder.
+// The Header is set after the first call to Read().
+// The Header may change between Read() calls in case of concatenated frames.
+type Reader struct {
+	Header
+
+	buf      [8]byte       // Scrap buffer.
+	pos      int64         // Current position in src.
+	src      io.Reader     // Source.
+	zdata    []byte        // Compressed data.
+	data     []byte        // Uncompressed data.
+	idx      int           // Index of unread bytes into data.
+	checksum xxh32.XXHZero // Frame hash.
+}
+
+// NewReader returns a new LZ4 frame decoder.
+// No access to the underlying io.Reader is performed.
+func NewReader(src io.Reader) *Reader {
+	r := &Reader{src: src}
+	return r
+}
+
+// readHeader checks the frame magic number and parses the frame descriptoz.
+// Skippable frames are supported even as a first frame although the LZ4
+// specifications recommends skippable frames not to be used as first frames.
+func (z *Reader) readHeader(first bool) error {
+	defer z.checksum.Reset()
+
+	buf := z.buf[:]
+	for {
+		magic, err := z.readUint32()
+		if err != nil {
+			z.pos += 4
+			if !first && err == io.ErrUnexpectedEOF {
+				return io.EOF
+			}
+			return err
+		}
+		if magic == frameMagic {
+			break
+		}
+		if magic>>8 != frameSkipMagic>>8 {
+			return ErrInvalid
+		}
+		skipSize, err := z.readUint32()
+		if err != nil {
+			return err
+		}
+		z.pos += 4
+		m, err := io.CopyN(ioutil.Discard, z.src, int64(skipSize))
+		if err != nil {
+			return err
+		}
+		z.pos += m
+	}
+
+	// Header.
+	if _, err := io.ReadFull(z.src, buf[:2]); err != nil {
+		return err
+	}
+	z.pos += 8
+
+	b := buf[0]
+	if v := b >> 6; v != Version {
+		return fmt.Errorf("lz4: invalid version: got %d; expected %d", v, Version)
+	}
+	if b>>5&1 == 0 {
+		return fmt.Errorf("lz4: block dependency not supported")
+	}
+	z.BlockChecksum = b>>4&1 > 0
+	frameSize := b>>3&1 > 0
+	z.NoChecksum = b>>2&1 == 0
+
+	bmsID := buf[1] >> 4 & 0x7
+	bSize, ok := bsMapID[bmsID]
+	if !ok {
+		return fmt.Errorf("lz4: invalid block max size ID: %d", bmsID)
+	}
+	z.BlockMaxSize = bSize
+
+	// Allocate the compressed/uncompressed buffers.
+	// The compressed buffer cannot exceed the uncompressed one.
+	if n := 2 * bSize; cap(z.zdata) < n {
+		z.zdata = make([]byte, n, n)
+	}
+	if debugFlag {
+		debug("header block max size id=%d size=%d", bmsID, bSize)
+	}
+	z.zdata = z.zdata[:bSize]
+	z.data = z.zdata[:cap(z.zdata)][bSize:]
+	z.idx = len(z.data)
+
+	z.checksum.Write(buf[0:2])
+
+	if frameSize {
+		buf := buf[:8]
+		if _, err := io.ReadFull(z.src, buf); err != nil {
+			return err
+		}
+		z.Size = binary.LittleEndian.Uint64(buf)
+		z.pos += 8
+		z.checksum.Write(buf)
+	}
+
+	// Header checksum.
+	if _, err := io.ReadFull(z.src, buf[:1]); err != nil {
+		return err
+	}
+	z.pos++
+	if h := byte(z.checksum.Sum32() >> 8 & 0xFF); h != buf[0] {
+		return fmt.Errorf("lz4: invalid header checksum: got %x; expected %x", buf[0], h)
+	}
+
+	z.Header.done = true
+	if debugFlag {
+		debug("header read: %v", z.Header)
+	}
+
+	return nil
+}
+
+// Read decompresses data from the underlying source into the supplied buffer.
+//
+// Since there can be multiple streams concatenated, Header values may
+// change between calls to Read(). If that is the case, no data is actually read from
+// the underlying io.Reader, to allow for potential input buffer resizing.
+func (z *Reader) Read(buf []byte) (int, error) {
+	if debugFlag {
+		debug("Read buf len=%d", len(buf))
+	}
+	if !z.Header.done {
+		if err := z.readHeader(true); err != nil {
+			return 0, err
+		}
+		if debugFlag {
+			debug("header read OK compressed buffer %d / %d uncompressed buffer %d : %d index=%d",
+				len(z.zdata), cap(z.zdata), len(z.data), cap(z.data), z.idx)
+		}
+	}
+
+	if len(buf) == 0 {
+		return 0, nil
+	}
+
+	if z.idx == len(z.data) {
+		// No data ready for reading, process the next block.
+		if debugFlag {
+			debug("reading block from writer")
+		}
+		// Block length: 0 = end of frame, highest bit set: uncompressed.
+		bLen, err := z.readUint32()
+		if err != nil {
+			return 0, err
+		}
+		z.pos += 4
+
+		if bLen == 0 {
+			// End of frame reached.
+			if !z.NoChecksum {
+				// Validate the frame checksum.
+				checksum, err := z.readUint32()
+				if err != nil {
+					return 0, err
+				}
+				if debugFlag {
+					debug("frame checksum got=%x / want=%x", z.checksum.Sum32(), checksum)
+				}
+				z.pos += 4
+				if h := z.checksum.Sum32(); checksum != h {
+					return 0, fmt.Errorf("lz4: invalid frame checksum: got %x; expected %x", h, checksum)
+				}
+			}
+
+			// Get ready for the next concatenated frame and keep the position.
+			pos := z.pos
+			z.Reset(z.src)
+			z.pos = pos
+
+			// Since multiple frames can be concatenated, check for more.
+			return 0, z.readHeader(false)
+		}
+
+		if debugFlag {
+			debug("raw block size %d", bLen)
+		}
+		if bLen&compressedBlockFlag > 0 {
+			// Uncompressed block.
+			bLen &= compressedBlockMask
+			if debugFlag {
+				debug("uncompressed block size %d", bLen)
+			}
+			if int(bLen) > cap(z.data) {
+				return 0, fmt.Errorf("lz4: invalid block size: %d", bLen)
+			}
+			z.data = z.data[:bLen]
+			if _, err := io.ReadFull(z.src, z.data); err != nil {
+				return 0, err
+			}
+			z.pos += int64(bLen)
+
+			if z.BlockChecksum {
+				checksum, err := z.readUint32()
+				if err != nil {
+					return 0, err
+				}
+				z.pos += 4
+
+				if h := xxh32.ChecksumZero(z.data); h != checksum {
+					return 0, fmt.Errorf("lz4: invalid block checksum: got %x; expected %x", h, checksum)
+				}
+			}
+
+		} else {
+			// Compressed block.
+			if debugFlag {
+				debug("compressed block size %d", bLen)
+			}
+			if int(bLen) > cap(z.data) {
+				return 0, fmt.Errorf("lz4: invalid block size: %d", bLen)
+			}
+			zdata := z.zdata[:bLen]
+			if _, err := io.ReadFull(z.src, zdata); err != nil {
+				return 0, err
+			}
+			z.pos += int64(bLen)
+
+			if z.BlockChecksum {
+				checksum, err := z.readUint32()
+				if err != nil {
+					return 0, err
+				}
+				z.pos += 4
+
+				if h := xxh32.ChecksumZero(zdata); h != checksum {
+					return 0, fmt.Errorf("lz4: invalid block checksum: got %x; expected %x", h, checksum)
+				}
+			}
+
+			n, err := UncompressBlock(zdata, z.data)
+			if err != nil {
+				return 0, err
+			}
+			z.data = z.data[:n]
+		}
+
+		if !z.NoChecksum {
+			z.checksum.Write(z.data)
+			if debugFlag {
+				debug("current frame checksum %x", z.checksum.Sum32())
+			}
+		}
+		z.idx = 0
+	}
+
+	n := copy(buf, z.data[z.idx:])
+	z.idx += n
+	if debugFlag {
+		debug("copied %d bytes to input", n)
+	}
+
+	return n, nil
+}
+
+// Reset discards the Reader's state and makes it equivalent to the
+// result of its original state from NewReader, but reading from r instead.
+// This permits reusing a Reader rather than allocating a new one.
+func (z *Reader) Reset(r io.Reader) {
+	z.Header = Header{}
+	z.pos = 0
+	z.src = r
+	z.zdata = z.zdata[:0]
+	z.data = z.data[:0]
+	z.idx = 0
+	z.checksum.Reset()
+}
+
+// readUint32 reads an uint32 into the supplied buffer.
+// The idea is to make use of the already allocated buffers avoiding additional allocations.
+func (z *Reader) readUint32() (uint32, error) {
+	buf := z.buf[:4]
+	_, err := io.ReadFull(z.src, buf)
+	x := binary.LittleEndian.Uint32(buf)
+	return x, err
+}
diff --git a/vendor/github.com/pierrec/lz4/writer.go b/vendor/github.com/pierrec/lz4/writer.go
new file mode 100644
index 0000000..0120438
--- /dev/null
+++ b/vendor/github.com/pierrec/lz4/writer.go
@@ -0,0 +1,267 @@
+package lz4
+
+import (
+	"encoding/binary"
+	"fmt"
+	"io"
+
+	"github.com/pierrec/lz4/internal/xxh32"
+)
+
+// Writer implements the LZ4 frame encoder.
+type Writer struct {
+	Header
+
+	buf       [19]byte      // magic number(4) + header(flags(2)+[Size(8)+DictID(4)]+checksum(1)) does not exceed 19 bytes
+	dst       io.Writer     // Destination.
+	checksum  xxh32.XXHZero // Frame checksum.
+	zdata     []byte        // Compressed data.
+	data      []byte        // Data to be compressed.
+	idx       int           // Index into data.
+	hashtable [winSize]int  // Hash table used in CompressBlock().
+}
+
+// NewWriter returns a new LZ4 frame encoder.
+// No access to the underlying io.Writer is performed.
+// The supplied Header is checked at the first Write.
+// It is ok to change it before the first Write but then not until a Reset() is performed.
+func NewWriter(dst io.Writer) *Writer {
+	return &Writer{dst: dst}
+}
+
+// writeHeader builds and writes the header (magic+header) to the underlying io.Writer.
+func (z *Writer) writeHeader() error {
+	// Default to 4Mb if BlockMaxSize is not set.
+	if z.Header.BlockMaxSize == 0 {
+		z.Header.BlockMaxSize = bsMapID[7]
+	}
+	// The only option that needs to be validated.
+	bSize := z.Header.BlockMaxSize
+	bSizeID, ok := bsMapValue[bSize]
+	if !ok {
+		return fmt.Errorf("lz4: invalid block max size: %d", bSize)
+	}
+	// Allocate the compressed/uncompressed buffers.
+	// The compressed buffer cannot exceed the uncompressed one.
+	if n := 2 * bSize; cap(z.zdata) < n {
+		z.zdata = make([]byte, n, n)
+	}
+	z.zdata = z.zdata[:bSize]
+	z.data = z.zdata[:cap(z.zdata)][bSize:]
+	z.idx = 0
+
+	// Size is optional.
+	buf := z.buf[:]
+
+	// Set the fixed size data: magic number, block max size and flags.
+	binary.LittleEndian.PutUint32(buf[0:], frameMagic)
+	flg := byte(Version << 6)
+	flg |= 1 << 5 // No block dependency.
+	if z.Header.BlockChecksum {
+		flg |= 1 << 4
+	}
+	if z.Header.Size > 0 {
+		flg |= 1 << 3
+	}
+	if !z.Header.NoChecksum {
+		flg |= 1 << 2
+	}
+	buf[4] = flg
+	buf[5] = bSizeID << 4
+
+	// Current buffer size: magic(4) + flags(1) + block max size (1).
+	n := 6
+	// Optional items.
+	if z.Header.Size > 0 {
+		binary.LittleEndian.PutUint64(buf[n:], z.Header.Size)
+		n += 8
+	}
+
+	// The header checksum includes the flags, block max size and optional Size.
+	buf[n] = byte(xxh32.ChecksumZero(buf[4:n]) >> 8 & 0xFF)
+	z.checksum.Reset()
+
+	// Header ready, write it out.
+	if _, err := z.dst.Write(buf[0 : n+1]); err != nil {
+		return err
+	}
+	z.Header.done = true
+	if debugFlag {
+		debug("wrote header %v", z.Header)
+	}
+
+	return nil
+}
+
+// Write compresses data from the supplied buffer into the underlying io.Writer.
+// Write does not return until the data has been written.
+func (z *Writer) Write(buf []byte) (int, error) {
+	if !z.Header.done {
+		if err := z.writeHeader(); err != nil {
+			return 0, err
+		}
+	}
+	if debugFlag {
+		debug("input buffer len=%d index=%d", len(buf), z.idx)
+	}
+
+	zn := len(z.data)
+	var n int
+	for len(buf) > 0 {
+		if z.idx == 0 && len(buf) >= zn {
+			// Avoid a copy as there is enough data for a block.
+			if err := z.compressBlock(buf[:zn]); err != nil {
+				return n, err
+			}
+			n += zn
+			buf = buf[zn:]
+			continue
+		}
+		// Accumulate the data to be compressed.
+		m := copy(z.data[z.idx:], buf)
+		n += m
+		z.idx += m
+		buf = buf[m:]
+		if debugFlag {
+			debug("%d bytes copied to buf, current index %d", n, z.idx)
+		}
+
+		if z.idx < len(z.data) {
+			// Buffer not filled.
+			if debugFlag {
+				debug("need more data for compression")
+			}
+			return n, nil
+		}
+
+		// Buffer full.
+		if err := z.compressBlock(z.data); err != nil {
+			return n, err
+		}
+		z.idx = 0
+	}
+
+	return n, nil
+}
+
+// compressBlock compresses a block.
+func (z *Writer) compressBlock(data []byte) error {
+	if !z.NoChecksum {
+		z.checksum.Write(data)
+	}
+
+	// The compressed block size cannot exceed the input's.
+	var zn int
+	var err error
+
+	if level := z.Header.CompressionLevel; level != 0 {
+		zn, err = CompressBlockHC(data, z.zdata, level)
+	} else {
+		zn, err = CompressBlock(data, z.zdata, z.hashtable[:])
+	}
+
+	var zdata []byte
+	var bLen uint32
+	if debugFlag {
+		debug("block compression %d => %d", len(data), zn)
+	}
+	if err == nil && zn > 0 && zn < len(data) {
+		// Compressible and compressed size smaller than uncompressed: ok!
+		bLen = uint32(zn)
+		zdata = z.zdata[:zn]
+	} else {
+		// Uncompressed block.
+		bLen = uint32(len(data)) | compressedBlockFlag
+		zdata = data
+	}
+	if debugFlag {
+		debug("block compression to be written len=%d data len=%d", bLen, len(zdata))
+	}
+
+	// Write the block.
+	if err := z.writeUint32(bLen); err != nil {
+		return err
+	}
+	if _, err := z.dst.Write(zdata); err != nil {
+		return err
+	}
+
+	if z.BlockChecksum {
+		checksum := xxh32.ChecksumZero(zdata)
+		if debugFlag {
+			debug("block checksum %x", checksum)
+		}
+		if err := z.writeUint32(checksum); err != nil {
+			return err
+		}
+	}
+	if debugFlag {
+		debug("current frame checksum %x", z.checksum.Sum32())
+	}
+
+	return nil
+}
+
+// Flush flushes any pending compressed data to the underlying writer.
+// Flush does not return until the data has been written.
+// If the underlying writer returns an error, Flush returns that error.
+func (z *Writer) Flush() error {
+	if debugFlag {
+		debug("flush with index %d", z.idx)
+	}
+	if z.idx == 0 {
+		return nil
+	}
+
+	return z.compressBlock(z.data[:z.idx])
+}
+
+// Close closes the Writer, flushing any unwritten data to the underlying io.Writer, but does not close the underlying io.Writer.
+func (z *Writer) Close() error {
+	if !z.Header.done {
+		if err := z.writeHeader(); err != nil {
+			return err
+		}
+	}
+
+	if err := z.Flush(); err != nil {
+		return err
+	}
+
+	if debugFlag {
+		debug("writing last empty block")
+	}
+	if err := z.writeUint32(0); err != nil {
+		return err
+	}
+	if !z.NoChecksum {
+		checksum := z.checksum.Sum32()
+		if debugFlag {
+			debug("stream checksum %x", checksum)
+		}
+		if err := z.writeUint32(checksum); err != nil {
+			return err
+		}
+	}
+	return nil
+}
+
+// Reset clears the state of the Writer z such that it is equivalent to its
+// initial state from NewWriter, but instead writing to w.
+// No access to the underlying io.Writer is performed.
+func (z *Writer) Reset(w io.Writer) {
+	z.Header = Header{}
+	z.dst = w
+	z.checksum.Reset()
+	z.zdata = z.zdata[:0]
+	z.data = z.data[:0]
+	z.idx = 0
+}
+
+// writeUint32 writes a uint32 to the underlying writer.
+func (z *Writer) writeUint32(x uint32) error {
+	buf := z.buf[:4]
+	binary.LittleEndian.PutUint32(buf, x)
+	_, err := z.dst.Write(buf)
+	return err
+}