Add NETCONF notification for ONU activation and Kafka client to receive events, update dependencies

Change-Id: I5f768fa8077ef7c64e00a534744ca47492344935
diff --git a/vendor/github.com/pierrec/lz4/.gitignore b/vendor/github.com/pierrec/lz4/.gitignore
new file mode 100644
index 0000000..5e98735
--- /dev/null
+++ b/vendor/github.com/pierrec/lz4/.gitignore
@@ -0,0 +1,34 @@
+# Created by https://www.gitignore.io/api/macos
+
+### macOS ###
+*.DS_Store
+.AppleDouble
+.LSOverride
+
+# Icon must end with two \r
+Icon
+
+
+# Thumbnails
+._*
+
+# Files that might appear in the root of a volume
+.DocumentRevisions-V100
+.fseventsd
+.Spotlight-V100
+.TemporaryItems
+.Trashes
+.VolumeIcon.icns
+.com.apple.timemachine.donotpresent
+
+# Directories potentially created on remote AFP share
+.AppleDB
+.AppleDesktop
+Network Trash Folder
+Temporary Items
+.apdisk
+
+# End of https://www.gitignore.io/api/macos
+
+cmd/*/*exe
+.idea
\ No newline at end of file
diff --git a/vendor/github.com/pierrec/lz4/.travis.yml b/vendor/github.com/pierrec/lz4/.travis.yml
new file mode 100644
index 0000000..fd6c6db
--- /dev/null
+++ b/vendor/github.com/pierrec/lz4/.travis.yml
@@ -0,0 +1,24 @@
+language: go
+
+env:
+  - GO111MODULE=off
+
+go:
+  - 1.9.x
+  - 1.10.x
+  - 1.11.x
+  - 1.12.x
+  - master
+
+matrix:
+ fast_finish: true
+ allow_failures:
+   - go: master
+
+sudo: false
+
+script: 
+ - go test -v -cpu=2
+ - go test -v -cpu=2 -race
+ - go test -v -cpu=2 -tags noasm
+ - go test -v -cpu=2 -race -tags noasm
diff --git a/vendor/github.com/pierrec/lz4/LICENSE b/vendor/github.com/pierrec/lz4/LICENSE
new file mode 100644
index 0000000..bd899d8
--- /dev/null
+++ b/vendor/github.com/pierrec/lz4/LICENSE
@@ -0,0 +1,28 @@
+Copyright (c) 2015, Pierre Curto
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+* Redistributions of source code must retain the above copyright notice, this
+  list of conditions and the following disclaimer.
+
+* Redistributions in binary form must reproduce the above copyright notice,
+  this list of conditions and the following disclaimer in the documentation
+  and/or other materials provided with the distribution.
+
+* Neither the name of xxHash nor the names of its
+  contributors may be used to endorse or promote products derived from
+  this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
diff --git a/vendor/github.com/pierrec/lz4/README.md b/vendor/github.com/pierrec/lz4/README.md
new file mode 100644
index 0000000..4ee388e
--- /dev/null
+++ b/vendor/github.com/pierrec/lz4/README.md
@@ -0,0 +1,90 @@
+# lz4 : LZ4 compression in pure Go
+
+[![GoDoc](https://godoc.org/github.com/pierrec/lz4?status.svg)](https://godoc.org/github.com/pierrec/lz4)
+[![Build Status](https://travis-ci.org/pierrec/lz4.svg?branch=master)](https://travis-ci.org/pierrec/lz4)
+[![Go Report Card](https://goreportcard.com/badge/github.com/pierrec/lz4)](https://goreportcard.com/report/github.com/pierrec/lz4)
+[![GitHub tag (latest SemVer)](https://img.shields.io/github/tag/pierrec/lz4.svg?style=social)](https://github.com/pierrec/lz4/tags)
+
+## Overview
+
+This package provides a streaming interface to [LZ4 data streams](http://fastcompression.blogspot.fr/2013/04/lz4-streaming-format-final.html) as well as low level compress and uncompress functions for LZ4 data blocks.
+The implementation is based on the reference C [one](https://github.com/lz4/lz4).
+
+## Install
+
+Assuming you have the go toolchain installed:
+
+```
+go get github.com/pierrec/lz4
+```
+
+There is a command line interface tool to compress and decompress LZ4 files.
+
+```
+go install github.com/pierrec/lz4/cmd/lz4c
+```
+
+Usage
+
+```
+Usage of lz4c:
+  -version
+        print the program version
+
+Subcommands:
+Compress the given files or from stdin to stdout.
+compress [arguments] [<file name> ...]
+  -bc
+        enable block checksum
+  -l int
+        compression level (0=fastest)
+  -sc
+        disable stream checksum
+  -size string
+        block max size [64K,256K,1M,4M] (default "4M")
+
+Uncompress the given files or from stdin to stdout.
+uncompress [arguments] [<file name> ...]
+
+```
+
+
+## Example
+
+```
+// Compress and uncompress an input string.
+s := "hello world"
+r := strings.NewReader(s)
+
+// The pipe will uncompress the data from the writer.
+pr, pw := io.Pipe()
+zw := lz4.NewWriter(pw)
+zr := lz4.NewReader(pr)
+
+go func() {
+	// Compress the input string.
+	_, _ = io.Copy(zw, r)
+	_ = zw.Close() // Make sure the writer is closed
+	_ = pw.Close() // Terminate the pipe
+}()
+
+_, _ = io.Copy(os.Stdout, zr)
+
+// Output:
+// hello world
+```
+
+## Contributing
+
+Contributions are very welcome for bug fixing, performance improvements...!
+
+- Open an issue with a proper description
+- Send a pull request with appropriate test case(s)
+
+## Contributors
+
+Thanks to all [contributors](https://github.com/pierrec/lz4/graphs/contributors)  so far!
+
+Special thanks to [@Zariel](https://github.com/Zariel) for his asm implementation of the decoder.
+
+Special thanks to [@klauspost](https://github.com/klauspost) for his work on optimizing the code.
diff --git a/vendor/github.com/pierrec/lz4/block.go b/vendor/github.com/pierrec/lz4/block.go
new file mode 100644
index 0000000..664d9be
--- /dev/null
+++ b/vendor/github.com/pierrec/lz4/block.go
@@ -0,0 +1,413 @@
+package lz4
+
+import (
+	"encoding/binary"
+	"math/bits"
+	"sync"
+)
+
+// blockHash hashes the lower 6 bytes into a value < htSize.
+func blockHash(x uint64) uint32 {
+	const prime6bytes = 227718039650203
+	return uint32(((x << (64 - 48)) * prime6bytes) >> (64 - hashLog))
+}
+
+// CompressBlockBound returns the maximum size of a given buffer of size n, when not compressible.
+func CompressBlockBound(n int) int {
+	return n + n/255 + 16
+}
+
+// UncompressBlock uncompresses the source buffer into the destination one,
+// and returns the uncompressed size.
+//
+// The destination buffer must be sized appropriately.
+//
+// An error is returned if the source data is invalid or the destination buffer is too small.
+func UncompressBlock(src, dst []byte) (int, error) {
+	if len(src) == 0 {
+		return 0, nil
+	}
+	if di := decodeBlock(dst, src); di >= 0 {
+		return di, nil
+	}
+	return 0, ErrInvalidSourceShortBuffer
+}
+
+// CompressBlock compresses the source buffer into the destination one.
+// This is the fast version of LZ4 compression and also the default one.
+//
+// The argument hashTable is scratch space for a hash table used by the
+// compressor. If provided, it should have length at least 1<<16. If it is
+// shorter (or nil), CompressBlock allocates its own hash table.
+//
+// The size of the compressed data is returned.
+//
+// If the destination buffer size is lower than CompressBlockBound and
+// the compressed size is 0 and no error, then the data is incompressible.
+//
+// An error is returned if the destination buffer is too small.
+func CompressBlock(src, dst []byte, hashTable []int) (_ int, err error) {
+	defer recoverBlock(&err)
+
+	// Return 0, nil only if the destination buffer size is < CompressBlockBound.
+	isNotCompressible := len(dst) < CompressBlockBound(len(src))
+
+	// adaptSkipLog sets how quickly the compressor begins skipping blocks when data is incompressible.
+	// This significantly speeds up incompressible data and usually has very small impact on compression.
+	// bytes to skip =  1 + (bytes since last match >> adaptSkipLog)
+	const adaptSkipLog = 7
+	if len(hashTable) < htSize {
+		htIface := htPool.Get()
+		defer htPool.Put(htIface)
+		hashTable = (*(htIface).(*[htSize]int))[:]
+	}
+	// Prove to the compiler the table has at least htSize elements.
+	// The compiler can see that "uint32() >> hashShift" cannot be out of bounds.
+	hashTable = hashTable[:htSize]
+
+	// si: Current position of the search.
+	// anchor: Position of the current literals.
+	var si, di, anchor int
+	sn := len(src) - mfLimit
+	if sn <= 0 {
+		goto lastLiterals
+	}
+
+	// Fast scan strategy: the hash table only stores the last 4 bytes sequences.
+	for si < sn {
+		// Hash the next 6 bytes (sequence)...
+		match := binary.LittleEndian.Uint64(src[si:])
+		h := blockHash(match)
+		h2 := blockHash(match >> 8)
+
+		// We check a match at s, s+1 and s+2 and pick the first one we get.
+		// Checking 3 only requires us to load the source one.
+		ref := hashTable[h]
+		ref2 := hashTable[h2]
+		hashTable[h] = si
+		hashTable[h2] = si + 1
+		offset := si - ref
+
+		// If offset <= 0 we got an old entry in the hash table.
+		if offset <= 0 || offset >= winSize || // Out of window.
+			uint32(match) != binary.LittleEndian.Uint32(src[ref:]) { // Hash collision on different matches.
+			// No match. Start calculating another hash.
+			// The processor can usually do this out-of-order.
+			h = blockHash(match >> 16)
+			ref = hashTable[h]
+
+			// Check the second match at si+1
+			si += 1
+			offset = si - ref2
+
+			if offset <= 0 || offset >= winSize ||
+				uint32(match>>8) != binary.LittleEndian.Uint32(src[ref2:]) {
+				// No match. Check the third match at si+2
+				si += 1
+				offset = si - ref
+				hashTable[h] = si
+
+				if offset <= 0 || offset >= winSize ||
+					uint32(match>>16) != binary.LittleEndian.Uint32(src[ref:]) {
+					// Skip one extra byte (at si+3) before we check 3 matches again.
+					si += 2 + (si-anchor)>>adaptSkipLog
+					continue
+				}
+			}
+		}
+
+		// Match found.
+		lLen := si - anchor // Literal length.
+		// We already matched 4 bytes.
+		mLen := 4
+
+		// Extend backwards if we can, reducing literals.
+		tOff := si - offset - 1
+		for lLen > 0 && tOff >= 0 && src[si-1] == src[tOff] {
+			si--
+			tOff--
+			lLen--
+			mLen++
+		}
+
+		// Add the match length, so we continue search at the end.
+		// Use mLen to store the offset base.
+		si, mLen = si+mLen, si+minMatch
+
+		// Find the longest match by looking by batches of 8 bytes.
+		for si+8 < sn {
+			x := binary.LittleEndian.Uint64(src[si:]) ^ binary.LittleEndian.Uint64(src[si-offset:])
+			if x == 0 {
+				si += 8
+			} else {
+				// Stop is first non-zero byte.
+				si += bits.TrailingZeros64(x) >> 3
+				break
+			}
+		}
+
+		mLen = si - mLen
+		if mLen < 0xF {
+			dst[di] = byte(mLen)
+		} else {
+			dst[di] = 0xF
+		}
+
+		// Encode literals length.
+		if lLen < 0xF {
+			dst[di] |= byte(lLen << 4)
+		} else {
+			dst[di] |= 0xF0
+			di++
+			l := lLen - 0xF
+			for ; l >= 0xFF; l -= 0xFF {
+				dst[di] = 0xFF
+				di++
+			}
+			dst[di] = byte(l)
+		}
+		di++
+
+		// Literals.
+		copy(dst[di:di+lLen], src[anchor:anchor+lLen])
+		di += lLen + 2
+		anchor = si
+
+		// Encode offset.
+		_ = dst[di] // Bound check elimination.
+		dst[di-2], dst[di-1] = byte(offset), byte(offset>>8)
+
+		// Encode match length part 2.
+		if mLen >= 0xF {
+			for mLen -= 0xF; mLen >= 0xFF; mLen -= 0xFF {
+				dst[di] = 0xFF
+				di++
+			}
+			dst[di] = byte(mLen)
+			di++
+		}
+		// Check if we can load next values.
+		if si >= sn {
+			break
+		}
+		// Hash match end-2
+		h = blockHash(binary.LittleEndian.Uint64(src[si-2:]))
+		hashTable[h] = si - 2
+	}
+
+lastLiterals:
+	if isNotCompressible && anchor == 0 {
+		// Incompressible.
+		return 0, nil
+	}
+
+	// Last literals.
+	lLen := len(src) - anchor
+	if lLen < 0xF {
+		dst[di] = byte(lLen << 4)
+	} else {
+		dst[di] = 0xF0
+		di++
+		for lLen -= 0xF; lLen >= 0xFF; lLen -= 0xFF {
+			dst[di] = 0xFF
+			di++
+		}
+		dst[di] = byte(lLen)
+	}
+	di++
+
+	// Write the last literals.
+	if isNotCompressible && di >= anchor {
+		// Incompressible.
+		return 0, nil
+	}
+	di += copy(dst[di:di+len(src)-anchor], src[anchor:])
+	return di, nil
+}
+
+// Pool of hash tables for CompressBlock.
+var htPool = sync.Pool{
+	New: func() interface{} {
+		return new([htSize]int)
+	},
+}
+
+// blockHash hashes 4 bytes into a value < winSize.
+func blockHashHC(x uint32) uint32 {
+	const hasher uint32 = 2654435761 // Knuth multiplicative hash.
+	return x * hasher >> (32 - winSizeLog)
+}
+
+// CompressBlockHC compresses the source buffer src into the destination dst
+// with max search depth (use 0 or negative value for no max).
+//
+// CompressBlockHC compression ratio is better than CompressBlock but it is also slower.
+//
+// The size of the compressed data is returned.
+//
+// If the destination buffer size is lower than CompressBlockBound and
+// the compressed size is 0 and no error, then the data is incompressible.
+//
+// An error is returned if the destination buffer is too small.
+func CompressBlockHC(src, dst []byte, depth int) (_ int, err error) {
+	defer recoverBlock(&err)
+
+	// Return 0, nil only if the destination buffer size is < CompressBlockBound.
+	isNotCompressible := len(dst) < CompressBlockBound(len(src))
+
+	// adaptSkipLog sets how quickly the compressor begins skipping blocks when data is incompressible.
+	// This significantly speeds up incompressible data and usually has very small impact on compression.
+	// bytes to skip =  1 + (bytes since last match >> adaptSkipLog)
+	const adaptSkipLog = 7
+
+	var si, di, anchor int
+
+	// hashTable: stores the last position found for a given hash
+	// chainTable: stores previous positions for a given hash
+	var hashTable, chainTable [winSize]int
+
+	if depth <= 0 {
+		depth = winSize
+	}
+
+	sn := len(src) - mfLimit
+	if sn <= 0 {
+		goto lastLiterals
+	}
+
+	for si < sn {
+		// Hash the next 4 bytes (sequence).
+		match := binary.LittleEndian.Uint32(src[si:])
+		h := blockHashHC(match)
+
+		// Follow the chain until out of window and give the longest match.
+		mLen := 0
+		offset := 0
+		for next, try := hashTable[h], depth; try > 0 && next > 0 && si-next < winSize; next = chainTable[next&winMask] {
+			// The first (mLen==0) or next byte (mLen>=minMatch) at current match length
+			// must match to improve on the match length.
+			if src[next+mLen] != src[si+mLen] {
+				continue
+			}
+			ml := 0
+			// Compare the current position with a previous with the same hash.
+			for ml < sn-si {
+				x := binary.LittleEndian.Uint64(src[next+ml:]) ^ binary.LittleEndian.Uint64(src[si+ml:])
+				if x == 0 {
+					ml += 8
+				} else {
+					// Stop is first non-zero byte.
+					ml += bits.TrailingZeros64(x) >> 3
+					break
+				}
+			}
+			if ml < minMatch || ml <= mLen {
+				// Match too small (<minMath) or smaller than the current match.
+				continue
+			}
+			// Found a longer match, keep its position and length.
+			mLen = ml
+			offset = si - next
+			// Try another previous position with the same hash.
+			try--
+		}
+		chainTable[si&winMask] = hashTable[h]
+		hashTable[h] = si
+
+		// No match found.
+		if mLen == 0 {
+			si += 1 + (si-anchor)>>adaptSkipLog
+			continue
+		}
+
+		// Match found.
+		// Update hash/chain tables with overlapping bytes:
+		// si already hashed, add everything from si+1 up to the match length.
+		winStart := si + 1
+		if ws := si + mLen - winSize; ws > winStart {
+			winStart = ws
+		}
+		for si, ml := winStart, si+mLen; si < ml; {
+			match >>= 8
+			match |= uint32(src[si+3]) << 24
+			h := blockHashHC(match)
+			chainTable[si&winMask] = hashTable[h]
+			hashTable[h] = si
+			si++
+		}
+
+		lLen := si - anchor
+		si += mLen
+		mLen -= minMatch // Match length does not include minMatch.
+
+		if mLen < 0xF {
+			dst[di] = byte(mLen)
+		} else {
+			dst[di] = 0xF
+		}
+
+		// Encode literals length.
+		if lLen < 0xF {
+			dst[di] |= byte(lLen << 4)
+		} else {
+			dst[di] |= 0xF0
+			di++
+			l := lLen - 0xF
+			for ; l >= 0xFF; l -= 0xFF {
+				dst[di] = 0xFF
+				di++
+			}
+			dst[di] = byte(l)
+		}
+		di++
+
+		// Literals.
+		copy(dst[di:di+lLen], src[anchor:anchor+lLen])
+		di += lLen
+		anchor = si
+
+		// Encode offset.
+		di += 2
+		dst[di-2], dst[di-1] = byte(offset), byte(offset>>8)
+
+		// Encode match length part 2.
+		if mLen >= 0xF {
+			for mLen -= 0xF; mLen >= 0xFF; mLen -= 0xFF {
+				dst[di] = 0xFF
+				di++
+			}
+			dst[di] = byte(mLen)
+			di++
+		}
+	}
+
+	if isNotCompressible && anchor == 0 {
+		// Incompressible.
+		return 0, nil
+	}
+
+	// Last literals.
+lastLiterals:
+	lLen := len(src) - anchor
+	if lLen < 0xF {
+		dst[di] = byte(lLen << 4)
+	} else {
+		dst[di] = 0xF0
+		di++
+		lLen -= 0xF
+		for ; lLen >= 0xFF; lLen -= 0xFF {
+			dst[di] = 0xFF
+			di++
+		}
+		dst[di] = byte(lLen)
+	}
+	di++
+
+	// Write the last literals.
+	if isNotCompressible && di >= anchor {
+		// Incompressible.
+		return 0, nil
+	}
+	di += copy(dst[di:di+len(src)-anchor], src[anchor:])
+	return di, nil
+}
diff --git a/vendor/github.com/pierrec/lz4/debug.go b/vendor/github.com/pierrec/lz4/debug.go
new file mode 100644
index 0000000..bc5e78d
--- /dev/null
+++ b/vendor/github.com/pierrec/lz4/debug.go
@@ -0,0 +1,23 @@
+// +build lz4debug
+
+package lz4
+
+import (
+	"fmt"
+	"os"
+	"path/filepath"
+	"runtime"
+)
+
+const debugFlag = true
+
+func debug(args ...interface{}) {
+	_, file, line, _ := runtime.Caller(1)
+	file = filepath.Base(file)
+
+	f := fmt.Sprintf("LZ4: %s:%d %s", file, line, args[0])
+	if f[len(f)-1] != '\n' {
+		f += "\n"
+	}
+	fmt.Fprintf(os.Stderr, f, args[1:]...)
+}
diff --git a/vendor/github.com/pierrec/lz4/debug_stub.go b/vendor/github.com/pierrec/lz4/debug_stub.go
new file mode 100644
index 0000000..44211ad
--- /dev/null
+++ b/vendor/github.com/pierrec/lz4/debug_stub.go
@@ -0,0 +1,7 @@
+// +build !lz4debug
+
+package lz4
+
+const debugFlag = false
+
+func debug(args ...interface{}) {}
diff --git a/vendor/github.com/pierrec/lz4/decode_amd64.go b/vendor/github.com/pierrec/lz4/decode_amd64.go
new file mode 100644
index 0000000..43cc14f
--- /dev/null
+++ b/vendor/github.com/pierrec/lz4/decode_amd64.go
@@ -0,0 +1,8 @@
+// +build !appengine
+// +build gc
+// +build !noasm
+
+package lz4
+
+//go:noescape
+func decodeBlock(dst, src []byte) int
diff --git a/vendor/github.com/pierrec/lz4/decode_amd64.s b/vendor/github.com/pierrec/lz4/decode_amd64.s
new file mode 100644
index 0000000..20fef39
--- /dev/null
+++ b/vendor/github.com/pierrec/lz4/decode_amd64.s
@@ -0,0 +1,375 @@
+// +build !appengine
+// +build gc
+// +build !noasm
+
+#include "textflag.h"
+
+// AX scratch
+// BX scratch
+// CX scratch
+// DX token
+//
+// DI &dst
+// SI &src
+// R8 &dst + len(dst)
+// R9 &src + len(src)
+// R11 &dst
+// R12 short output end
+// R13 short input end
+// func decodeBlock(dst, src []byte) int
+// using 50 bytes of stack currently
+TEXT ·decodeBlock(SB), NOSPLIT, $64-56
+	MOVQ dst_base+0(FP), DI
+	MOVQ DI, R11
+	MOVQ dst_len+8(FP), R8
+	ADDQ DI, R8
+
+	MOVQ src_base+24(FP), SI
+	MOVQ src_len+32(FP), R9
+	ADDQ SI, R9
+
+	// shortcut ends
+	// short output end
+	MOVQ R8, R12
+	SUBQ $32, R12
+	// short input end
+	MOVQ R9, R13
+	SUBQ $16, R13
+
+loop:
+	// for si < len(src)
+	CMPQ SI, R9
+	JGE end
+
+	// token := uint32(src[si])
+	MOVBQZX (SI), DX
+	INCQ SI
+
+	// lit_len = token >> 4
+	// if lit_len > 0
+	// CX = lit_len
+	MOVQ DX, CX
+	SHRQ $4, CX
+
+	// if lit_len != 0xF
+	CMPQ CX, $0xF
+	JEQ lit_len_loop_pre
+	CMPQ DI, R12
+	JGE lit_len_loop_pre
+	CMPQ SI, R13
+	JGE lit_len_loop_pre
+
+	// copy shortcut
+
+	// A two-stage shortcut for the most common case:
+	// 1) If the literal length is 0..14, and there is enough space,
+	// enter the shortcut and copy 16 bytes on behalf of the literals
+	// (in the fast mode, only 8 bytes can be safely copied this way).
+	// 2) Further if the match length is 4..18, copy 18 bytes in a similar
+	// manner; but we ensure that there's enough space in the output for
+	// those 18 bytes earlier, upon entering the shortcut (in other words,
+	// there is a combined check for both stages).
+
+	// copy literal
+	MOVOU (SI), X0
+	MOVOU X0, (DI)
+	ADDQ CX, DI
+	ADDQ CX, SI
+
+	MOVQ DX, CX
+	ANDQ $0xF, CX
+
+	// The second stage: prepare for match copying, decode full info.
+	// If it doesn't work out, the info won't be wasted.
+	// offset := uint16(data[:2])
+	MOVWQZX (SI), DX
+	ADDQ $2, SI
+
+	MOVQ DI, AX
+	SUBQ DX, AX
+	CMPQ AX, DI
+	JGT err_short_buf
+
+	// if we can't do the second stage then jump straight to read the
+	// match length, we already have the offset.
+	CMPQ CX, $0xF
+	JEQ match_len_loop_pre
+	CMPQ DX, $8
+	JLT match_len_loop_pre
+	CMPQ AX, R11
+	JLT err_short_buf
+
+	// memcpy(op + 0, match + 0, 8);
+	MOVQ (AX), BX
+	MOVQ BX, (DI)
+	// memcpy(op + 8, match + 8, 8);
+	MOVQ 8(AX), BX
+	MOVQ BX, 8(DI)
+	// memcpy(op +16, match +16, 2);
+	MOVW 16(AX), BX
+	MOVW BX, 16(DI)
+
+	ADDQ $4, DI // minmatch
+	ADDQ CX, DI
+
+	// shortcut complete, load next token
+	JMP loop
+
+lit_len_loop_pre:
+	// if lit_len > 0
+	CMPQ CX, $0
+	JEQ offset
+	CMPQ CX, $0xF
+	JNE copy_literal
+
+lit_len_loop:
+	// for src[si] == 0xFF
+	CMPB (SI), $0xFF
+	JNE lit_len_finalise
+
+	// bounds check src[si+1]
+	MOVQ SI, AX
+	ADDQ $1, AX
+	CMPQ AX, R9
+	JGT err_short_buf
+
+	// lit_len += 0xFF
+	ADDQ $0xFF, CX
+	INCQ SI
+	JMP lit_len_loop
+
+lit_len_finalise:
+	// lit_len += int(src[si])
+	// si++
+	MOVBQZX (SI), AX
+	ADDQ AX, CX
+	INCQ SI
+
+copy_literal:
+	// bounds check src and dst
+	MOVQ SI, AX
+	ADDQ CX, AX
+	CMPQ AX, R9
+	JGT err_short_buf
+
+	MOVQ DI, AX
+	ADDQ CX, AX
+	CMPQ AX, R8
+	JGT err_short_buf
+
+	// whats a good cut off to call memmove?
+	CMPQ CX, $16
+	JGT memmove_lit
+
+	// if len(dst[di:]) < 16
+	MOVQ R8, AX
+	SUBQ DI, AX
+	CMPQ AX, $16
+	JLT memmove_lit
+
+	// if len(src[si:]) < 16
+	MOVQ R9, AX
+	SUBQ SI, AX
+	CMPQ AX, $16
+	JLT memmove_lit
+
+	MOVOU (SI), X0
+	MOVOU X0, (DI)
+
+	JMP finish_lit_copy
+
+memmove_lit:
+	// memmove(to, from, len)
+	MOVQ DI, 0(SP)
+	MOVQ SI, 8(SP)
+	MOVQ CX, 16(SP)
+	// spill
+	MOVQ DI, 24(SP)
+	MOVQ SI, 32(SP)
+	MOVQ CX, 40(SP) // need len to inc SI, DI after
+	MOVB DX, 48(SP)
+	CALL runtime·memmove(SB)
+
+	// restore registers
+	MOVQ 24(SP), DI
+	MOVQ 32(SP), SI
+	MOVQ 40(SP), CX
+	MOVB 48(SP), DX
+
+	// recalc initial values
+	MOVQ dst_base+0(FP), R8
+	MOVQ R8, R11
+	ADDQ dst_len+8(FP), R8
+	MOVQ src_base+24(FP), R9
+	ADDQ src_len+32(FP), R9
+	MOVQ R8, R12
+	SUBQ $32, R12
+	MOVQ R9, R13
+	SUBQ $16, R13
+
+finish_lit_copy:
+	ADDQ CX, SI
+	ADDQ CX, DI
+
+	CMPQ SI, R9
+	JGE end
+
+offset:
+	// CX := mLen
+	// free up DX to use for offset
+	MOVQ DX, CX
+
+	MOVQ SI, AX
+	ADDQ $2, AX
+	CMPQ AX, R9
+	JGT err_short_buf
+
+	// offset
+	// DX := int(src[si]) | int(src[si+1])<<8
+	MOVWQZX (SI), DX
+	ADDQ $2, SI
+
+	// 0 offset is invalid
+	CMPQ DX, $0
+	JEQ err_corrupt
+
+	ANDB $0xF, CX
+
+match_len_loop_pre:
+	// if mlen != 0xF
+	CMPB CX, $0xF
+	JNE copy_match
+
+match_len_loop:
+	// for src[si] == 0xFF
+	// lit_len += 0xFF
+	CMPB (SI), $0xFF
+	JNE match_len_finalise
+
+	// bounds check src[si+1]
+	MOVQ SI, AX
+	ADDQ $1, AX
+	CMPQ AX, R9
+	JGT err_short_buf
+
+	ADDQ $0xFF, CX
+	INCQ SI
+	JMP match_len_loop
+
+match_len_finalise:
+	// lit_len += int(src[si])
+	// si++
+	MOVBQZX (SI), AX
+	ADDQ AX, CX
+	INCQ SI
+
+copy_match:
+	// mLen += minMatch
+	ADDQ $4, CX
+
+	// check we have match_len bytes left in dst
+	// di+match_len < len(dst)
+	MOVQ DI, AX
+	ADDQ CX, AX
+	CMPQ AX, R8
+	JGT err_short_buf
+
+	// DX = offset
+	// CX = match_len
+	// BX = &dst + (di - offset)
+	MOVQ DI, BX
+	SUBQ DX, BX
+
+	// check BX is within dst
+	// if BX < &dst
+	CMPQ BX, R11
+	JLT err_short_buf
+
+	// if offset + match_len < di
+	MOVQ BX, AX
+	ADDQ CX, AX
+	CMPQ DI, AX
+	JGT copy_interior_match
+
+	// AX := len(dst[:di])
+	// MOVQ DI, AX
+	// SUBQ R11, AX
+
+	// copy 16 bytes at a time
+	// if di-offset < 16 copy 16-(di-offset) bytes to di
+	// then do the remaining
+
+copy_match_loop:
+	// for match_len >= 0
+	// dst[di] = dst[i]
+	// di++
+	// i++
+	MOVB (BX), AX
+	MOVB AX, (DI)
+	INCQ DI
+	INCQ BX
+	DECQ CX
+
+	CMPQ CX, $0
+	JGT copy_match_loop
+
+	JMP loop
+
+copy_interior_match:
+	CMPQ CX, $16
+	JGT memmove_match
+
+	// if len(dst[di:]) < 16
+	MOVQ R8, AX
+	SUBQ DI, AX
+	CMPQ AX, $16
+	JLT memmove_match
+
+	MOVOU (BX), X0
+	MOVOU X0, (DI)
+
+	ADDQ CX, DI
+	JMP loop
+
+memmove_match:
+	// memmove(to, from, len)
+	MOVQ DI, 0(SP)
+	MOVQ BX, 8(SP)
+	MOVQ CX, 16(SP)
+	// spill
+	MOVQ DI, 24(SP)
+	MOVQ SI, 32(SP)
+	MOVQ CX, 40(SP) // need len to inc SI, DI after
+	CALL runtime·memmove(SB)
+
+	// restore registers
+	MOVQ 24(SP), DI
+	MOVQ 32(SP), SI
+	MOVQ 40(SP), CX
+
+	// recalc initial values
+	MOVQ dst_base+0(FP), R8
+	MOVQ R8, R11 // TODO: make these sensible numbers
+	ADDQ dst_len+8(FP), R8
+	MOVQ src_base+24(FP), R9
+	ADDQ src_len+32(FP), R9
+	MOVQ R8, R12
+	SUBQ $32, R12
+	MOVQ R9, R13
+	SUBQ $16, R13
+
+	ADDQ CX, DI
+	JMP loop
+
+err_corrupt:
+	MOVQ $-1, ret+48(FP)
+	RET
+
+err_short_buf:
+	MOVQ $-2, ret+48(FP)
+	RET
+
+end:
+	SUBQ R11, DI
+	MOVQ DI, ret+48(FP)
+	RET
diff --git a/vendor/github.com/pierrec/lz4/decode_other.go b/vendor/github.com/pierrec/lz4/decode_other.go
new file mode 100644
index 0000000..919888e
--- /dev/null
+++ b/vendor/github.com/pierrec/lz4/decode_other.go
@@ -0,0 +1,98 @@
+// +build !amd64 appengine !gc noasm
+
+package lz4
+
+func decodeBlock(dst, src []byte) (ret int) {
+	const hasError = -2
+	defer func() {
+		if recover() != nil {
+			ret = hasError
+		}
+	}()
+
+	var si, di int
+	for {
+		// Literals and match lengths (token).
+		b := int(src[si])
+		si++
+
+		// Literals.
+		if lLen := b >> 4; lLen > 0 {
+			switch {
+			case lLen < 0xF && si+16 < len(src):
+				// Shortcut 1
+				// if we have enough room in src and dst, and the literals length
+				// is small enough (0..14) then copy all 16 bytes, even if not all
+				// are part of the literals.
+				copy(dst[di:], src[si:si+16])
+				si += lLen
+				di += lLen
+				if mLen := b & 0xF; mLen < 0xF {
+					// Shortcut 2
+					// if the match length (4..18) fits within the literals, then copy
+					// all 18 bytes, even if not all are part of the literals.
+					mLen += 4
+					if offset := int(src[si]) | int(src[si+1])<<8; mLen <= offset {
+						i := di - offset
+						end := i + 18
+						if end > len(dst) {
+							// The remaining buffer may not hold 18 bytes.
+							// See https://github.com/pierrec/lz4/issues/51.
+							end = len(dst)
+						}
+						copy(dst[di:], dst[i:end])
+						si += 2
+						di += mLen
+						continue
+					}
+				}
+			case lLen == 0xF:
+				for src[si] == 0xFF {
+					lLen += 0xFF
+					si++
+				}
+				lLen += int(src[si])
+				si++
+				fallthrough
+			default:
+				copy(dst[di:di+lLen], src[si:si+lLen])
+				si += lLen
+				di += lLen
+			}
+		}
+		if si >= len(src) {
+			return di
+		}
+
+		offset := int(src[si]) | int(src[si+1])<<8
+		if offset == 0 {
+			return hasError
+		}
+		si += 2
+
+		// Match.
+		mLen := b & 0xF
+		if mLen == 0xF {
+			for src[si] == 0xFF {
+				mLen += 0xFF
+				si++
+			}
+			mLen += int(src[si])
+			si++
+		}
+		mLen += minMatch
+
+		// Copy the match.
+		expanded := dst[di-offset:]
+		if mLen > offset {
+			// Efficiently copy the match dst[di-offset:di] into the dst slice.
+			bytesToCopy := offset * (mLen / offset)
+			for n := offset; n <= bytesToCopy+offset; n *= 2 {
+				copy(expanded[n:], expanded[:n])
+			}
+			di += bytesToCopy
+			mLen -= bytesToCopy
+		}
+		di += copy(dst[di:di+mLen], expanded[:mLen])
+	}
+}
diff --git a/vendor/github.com/pierrec/lz4/errors.go b/vendor/github.com/pierrec/lz4/errors.go
new file mode 100644
index 0000000..1c45d18
--- /dev/null
+++ b/vendor/github.com/pierrec/lz4/errors.go
@@ -0,0 +1,30 @@
+package lz4
+
+import (
+	"errors"
+	"fmt"
+	"os"
+	rdebug "runtime/debug"
+)
+
+var (
+	// ErrInvalidSourceShortBuffer is returned by UncompressBlock or CompressBLock when a compressed
+	// block is corrupted or the destination buffer is not large enough for the uncompressed data.
+	ErrInvalidSourceShortBuffer = errors.New("lz4: invalid source or destination buffer too short")
+	// ErrInvalid is returned when reading an invalid LZ4 archive.
+	ErrInvalid = errors.New("lz4: bad magic number")
+	// ErrBlockDependency is returned when attempting to decompress an archive created with block dependency.
+	ErrBlockDependency = errors.New("lz4: block dependency not supported")
+	// ErrUnsupportedSeek is returned when attempting to Seek any way but forward from the current position.
+	ErrUnsupportedSeek = errors.New("lz4: can only seek forward from io.SeekCurrent")
+)
+
+func recoverBlock(e *error) {
+	if r := recover(); r != nil && *e == nil {
+		if debugFlag {
+			fmt.Fprintln(os.Stderr, r)
+			rdebug.PrintStack()
+		}
+		*e = ErrInvalidSourceShortBuffer
+	}
+}
diff --git a/vendor/github.com/pierrec/lz4/internal/xxh32/xxh32zero.go b/vendor/github.com/pierrec/lz4/internal/xxh32/xxh32zero.go
new file mode 100644
index 0000000..7a76a6b
--- /dev/null
+++ b/vendor/github.com/pierrec/lz4/internal/xxh32/xxh32zero.go
@@ -0,0 +1,223 @@
+// Package xxh32 implements the very fast XXH hashing algorithm (32 bits version).
+// (https://github.com/Cyan4973/XXH/)
+package xxh32
+
+import (
+	"encoding/binary"
+)
+
+const (
+	prime1 uint32 = 2654435761
+	prime2 uint32 = 2246822519
+	prime3 uint32 = 3266489917
+	prime4 uint32 = 668265263
+	prime5 uint32 = 374761393
+
+	primeMask   = 0xFFFFFFFF
+	prime1plus2 = uint32((uint64(prime1) + uint64(prime2)) & primeMask) // 606290984
+	prime1minus = uint32((-int64(prime1)) & primeMask)                  // 1640531535
+)
+
+// XXHZero represents an xxhash32 object with seed 0.
+type XXHZero struct {
+	v1       uint32
+	v2       uint32
+	v3       uint32
+	v4       uint32
+	totalLen uint64
+	buf      [16]byte
+	bufused  int
+}
+
+// Sum appends the current hash to b and returns the resulting slice.
+// It does not change the underlying hash state.
+func (xxh XXHZero) Sum(b []byte) []byte {
+	h32 := xxh.Sum32()
+	return append(b, byte(h32), byte(h32>>8), byte(h32>>16), byte(h32>>24))
+}
+
+// Reset resets the Hash to its initial state.
+func (xxh *XXHZero) Reset() {
+	xxh.v1 = prime1plus2
+	xxh.v2 = prime2
+	xxh.v3 = 0
+	xxh.v4 = prime1minus
+	xxh.totalLen = 0
+	xxh.bufused = 0
+}
+
+// Size returns the number of bytes returned by Sum().
+func (xxh *XXHZero) Size() int {
+	return 4
+}
+
+// BlockSize gives the minimum number of bytes accepted by Write().
+func (xxh *XXHZero) BlockSize() int {
+	return 1
+}
+
+// Write adds input bytes to the Hash.
+// It never returns an error.
+func (xxh *XXHZero) Write(input []byte) (int, error) {
+	if xxh.totalLen == 0 {
+		xxh.Reset()
+	}
+	n := len(input)
+	m := xxh.bufused
+
+	xxh.totalLen += uint64(n)
+
+	r := len(xxh.buf) - m
+	if n < r {
+		copy(xxh.buf[m:], input)
+		xxh.bufused += len(input)
+		return n, nil
+	}
+
+	p := 0
+	// Causes compiler to work directly from registers instead of stack:
+	v1, v2, v3, v4 := xxh.v1, xxh.v2, xxh.v3, xxh.v4
+	if m > 0 {
+		// some data left from previous update
+		copy(xxh.buf[xxh.bufused:], input[:r])
+		xxh.bufused += len(input) - r
+
+		// fast rotl(13)
+		buf := xxh.buf[:16] // BCE hint.
+		v1 = rol13(v1+binary.LittleEndian.Uint32(buf[:])*prime2) * prime1
+		v2 = rol13(v2+binary.LittleEndian.Uint32(buf[4:])*prime2) * prime1
+		v3 = rol13(v3+binary.LittleEndian.Uint32(buf[8:])*prime2) * prime1
+		v4 = rol13(v4+binary.LittleEndian.Uint32(buf[12:])*prime2) * prime1
+		p = r
+		xxh.bufused = 0
+	}
+
+	for n := n - 16; p <= n; p += 16 {
+		sub := input[p:][:16] //BCE hint for compiler
+		v1 = rol13(v1+binary.LittleEndian.Uint32(sub[:])*prime2) * prime1
+		v2 = rol13(v2+binary.LittleEndian.Uint32(sub[4:])*prime2) * prime1
+		v3 = rol13(v3+binary.LittleEndian.Uint32(sub[8:])*prime2) * prime1
+		v4 = rol13(v4+binary.LittleEndian.Uint32(sub[12:])*prime2) * prime1
+	}
+	xxh.v1, xxh.v2, xxh.v3, xxh.v4 = v1, v2, v3, v4
+
+	copy(xxh.buf[xxh.bufused:], input[p:])
+	xxh.bufused += len(input) - p
+
+	return n, nil
+}
+
+// Sum32 returns the 32 bits Hash value.
+func (xxh *XXHZero) Sum32() uint32 {
+	h32 := uint32(xxh.totalLen)
+	if h32 >= 16 {
+		h32 += rol1(xxh.v1) + rol7(xxh.v2) + rol12(xxh.v3) + rol18(xxh.v4)
+	} else {
+		h32 += prime5
+	}
+
+	p := 0
+	n := xxh.bufused
+	buf := xxh.buf
+	for n := n - 4; p <= n; p += 4 {
+		h32 += binary.LittleEndian.Uint32(buf[p:p+4]) * prime3
+		h32 = rol17(h32) * prime4
+	}
+	for ; p < n; p++ {
+		h32 += uint32(buf[p]) * prime5
+		h32 = rol11(h32) * prime1
+	}
+
+	h32 ^= h32 >> 15
+	h32 *= prime2
+	h32 ^= h32 >> 13
+	h32 *= prime3
+	h32 ^= h32 >> 16
+
+	return h32
+}
+
+// ChecksumZero returns the 32bits Hash value.
+func ChecksumZero(input []byte) uint32 {
+	n := len(input)
+	h32 := uint32(n)
+
+	if n < 16 {
+		h32 += prime5
+	} else {
+		v1 := prime1plus2
+		v2 := prime2
+		v3 := uint32(0)
+		v4 := prime1minus
+		p := 0
+		for n := n - 16; p <= n; p += 16 {
+			sub := input[p:][:16] //BCE hint for compiler
+			v1 = rol13(v1+binary.LittleEndian.Uint32(sub[:])*prime2) * prime1
+			v2 = rol13(v2+binary.LittleEndian.Uint32(sub[4:])*prime2) * prime1
+			v3 = rol13(v3+binary.LittleEndian.Uint32(sub[8:])*prime2) * prime1
+			v4 = rol13(v4+binary.LittleEndian.Uint32(sub[12:])*prime2) * prime1
+		}
+		input = input[p:]
+		n -= p
+		h32 += rol1(v1) + rol7(v2) + rol12(v3) + rol18(v4)
+	}
+
+	p := 0
+	for n := n - 4; p <= n; p += 4 {
+		h32 += binary.LittleEndian.Uint32(input[p:p+4]) * prime3
+		h32 = rol17(h32) * prime4
+	}
+	for p < n {
+		h32 += uint32(input[p]) * prime5
+		h32 = rol11(h32) * prime1
+		p++
+	}
+
+	h32 ^= h32 >> 15
+	h32 *= prime2
+	h32 ^= h32 >> 13
+	h32 *= prime3
+	h32 ^= h32 >> 16
+
+	return h32
+}
+
+// Uint32Zero hashes x with seed 0.
+func Uint32Zero(x uint32) uint32 {
+	h := prime5 + 4 + x*prime3
+	h = rol17(h) * prime4
+	h ^= h >> 15
+	h *= prime2
+	h ^= h >> 13
+	h *= prime3
+	h ^= h >> 16
+	return h
+}
+
+func rol1(u uint32) uint32 {
+	return u<<1 | u>>31
+}
+
+func rol7(u uint32) uint32 {
+	return u<<7 | u>>25
+}
+
+func rol11(u uint32) uint32 {
+	return u<<11 | u>>21
+}
+
+func rol12(u uint32) uint32 {
+	return u<<12 | u>>20
+}
+
+func rol13(u uint32) uint32 {
+	return u<<13 | u>>19
+}
+
+func rol17(u uint32) uint32 {
+	return u<<17 | u>>15
+}
+
+func rol18(u uint32) uint32 {
+	return u<<18 | u>>14
+}
diff --git a/vendor/github.com/pierrec/lz4/lz4.go b/vendor/github.com/pierrec/lz4/lz4.go
new file mode 100644
index 0000000..a3284bd
--- /dev/null
+++ b/vendor/github.com/pierrec/lz4/lz4.go
@@ -0,0 +1,116 @@
+// Package lz4 implements reading and writing lz4 compressed data (a frame),
+// as specified in http://fastcompression.blogspot.fr/2013/04/lz4-streaming-format-final.html.
+//
+// Although the block level compression and decompression functions are exposed and are fully compatible
+// with the lz4 block format definition, they are low level and should not be used directly.
+// For a complete description of an lz4 compressed block, see:
+// http://fastcompression.blogspot.fr/2011/05/lz4-explained.html
+//
+// See https://github.com/Cyan4973/lz4 for the reference C implementation.
+//
+package lz4
+
+import (
+	"math/bits"
+	"sync"
+)
+
+const (
+	// Extension is the LZ4 frame file name extension
+	Extension = ".lz4"
+	// Version is the LZ4 frame format version
+	Version = 1
+
+	frameMagic       uint32 = 0x184D2204
+	frameSkipMagic   uint32 = 0x184D2A50
+	frameMagicLegacy uint32 = 0x184C2102
+
+	// The following constants are used to setup the compression algorithm.
+	minMatch            = 4  // the minimum size of the match sequence size (4 bytes)
+	winSizeLog          = 16 // LZ4 64Kb window size limit
+	winSize             = 1 << winSizeLog
+	winMask             = winSize - 1 // 64Kb window of previous data for dependent blocks
+	compressedBlockFlag = 1 << 31
+	compressedBlockMask = compressedBlockFlag - 1
+
+	// hashLog determines the size of the hash table used to quickly find a previous match position.
+	// Its value influences the compression speed and memory usage, the lower the faster,
+	// but at the expense of the compression ratio.
+	// 16 seems to be the best compromise for fast compression.
+	hashLog = 16
+	htSize  = 1 << hashLog
+
+	mfLimit = 10 + minMatch // The last match cannot start within the last 14 bytes.
+)
+
+// map the block max size id with its value in bytes: 64Kb, 256Kb, 1Mb and 4Mb.
+const (
+	blockSize64K = 1 << (16 + 2*iota)
+	blockSize256K
+	blockSize1M
+	blockSize4M
+)
+
+var (
+	// Keep a pool of buffers for each valid block sizes.
+	bsMapValue = [...]*sync.Pool{
+		newBufferPool(2 * blockSize64K),
+		newBufferPool(2 * blockSize256K),
+		newBufferPool(2 * blockSize1M),
+		newBufferPool(2 * blockSize4M),
+	}
+)
+
+// newBufferPool returns a pool for buffers of the given size.
+func newBufferPool(size int) *sync.Pool {
+	return &sync.Pool{
+		New: func() interface{} {
+			return make([]byte, size)
+		},
+	}
+}
+
+// getBuffer returns a buffer to its pool.
+func getBuffer(size int) []byte {
+	idx := blockSizeValueToIndex(size) - 4
+	return bsMapValue[idx].Get().([]byte)
+}
+
+// putBuffer returns a buffer to its pool.
+func putBuffer(size int, buf []byte) {
+	if cap(buf) > 0 {
+		idx := blockSizeValueToIndex(size) - 4
+		bsMapValue[idx].Put(buf[:cap(buf)])
+	}
+}
+func blockSizeIndexToValue(i byte) int {
+	return 1 << (16 + 2*uint(i))
+}
+func isValidBlockSize(size int) bool {
+	const blockSizeMask = blockSize64K | blockSize256K | blockSize1M | blockSize4M
+
+	return size&blockSizeMask > 0 && bits.OnesCount(uint(size)) == 1
+}
+func blockSizeValueToIndex(size int) byte {
+	return 4 + byte(bits.TrailingZeros(uint(size)>>16)/2)
+}
+
+// Header describes the various flags that can be set on a Writer or obtained from a Reader.
+// The default values match those of the LZ4 frame format definition
+// (http://fastcompression.blogspot.com/2013/04/lz4-streaming-format-final.html).
+//
+// NB. in a Reader, in case of concatenated frames, the Header values may change between Read() calls.
+// It is the caller's responsibility to check them if necessary.
+type Header struct {
+	BlockChecksum    bool   // Compressed blocks checksum flag.
+	NoChecksum       bool   // Frame checksum flag.
+	BlockMaxSize     int    // Size of the uncompressed data block (one of [64KB, 256KB, 1MB, 4MB]). Default=4MB.
+	Size             uint64 // Frame total size. It is _not_ computed by the Writer.
+	CompressionLevel int    // Compression level (higher is better, use 0 for fastest compression).
+	done             bool   // Header processed flag (Read or Write and checked).
+}
+
+// Reset reset internal status
+func (h *Header) Reset() {
+	h.done = false
+}
diff --git a/vendor/github.com/pierrec/lz4/lz4_go1.10.go b/vendor/github.com/pierrec/lz4/lz4_go1.10.go
new file mode 100644
index 0000000..9a0fb00
--- /dev/null
+++ b/vendor/github.com/pierrec/lz4/lz4_go1.10.go
@@ -0,0 +1,29 @@
+//+build go1.10
+
+package lz4
+
+import (
+	"fmt"
+	"strings"
+)
+
+func (h Header) String() string {
+	var s strings.Builder
+
+	s.WriteString(fmt.Sprintf("%T{", h))
+	if h.BlockChecksum {
+		s.WriteString("BlockChecksum: true ")
+	}
+	if h.NoChecksum {
+		s.WriteString("NoChecksum: true ")
+	}
+	if bs := h.BlockMaxSize; bs != 0 && bs != 4<<20 {
+		s.WriteString(fmt.Sprintf("BlockMaxSize: %d ", bs))
+	}
+	if l := h.CompressionLevel; l != 0 {
+		s.WriteString(fmt.Sprintf("CompressionLevel: %d ", l))
+	}
+	s.WriteByte('}')
+
+	return s.String()
+}
diff --git a/vendor/github.com/pierrec/lz4/lz4_notgo1.10.go b/vendor/github.com/pierrec/lz4/lz4_notgo1.10.go
new file mode 100644
index 0000000..12c761a
--- /dev/null
+++ b/vendor/github.com/pierrec/lz4/lz4_notgo1.10.go
@@ -0,0 +1,29 @@
+//+build !go1.10
+
+package lz4
+
+import (
+	"bytes"
+	"fmt"
+)
+
+func (h Header) String() string {
+	var s bytes.Buffer
+
+	s.WriteString(fmt.Sprintf("%T{", h))
+	if h.BlockChecksum {
+		s.WriteString("BlockChecksum: true ")
+	}
+	if h.NoChecksum {
+		s.WriteString("NoChecksum: true ")
+	}
+	if bs := h.BlockMaxSize; bs != 0 && bs != 4<<20 {
+		s.WriteString(fmt.Sprintf("BlockMaxSize: %d ", bs))
+	}
+	if l := h.CompressionLevel; l != 0 {
+		s.WriteString(fmt.Sprintf("CompressionLevel: %d ", l))
+	}
+	s.WriteByte('}')
+
+	return s.String()
+}
diff --git a/vendor/github.com/pierrec/lz4/reader.go b/vendor/github.com/pierrec/lz4/reader.go
new file mode 100644
index 0000000..87dd72b
--- /dev/null
+++ b/vendor/github.com/pierrec/lz4/reader.go
@@ -0,0 +1,335 @@
+package lz4
+
+import (
+	"encoding/binary"
+	"fmt"
+	"io"
+	"io/ioutil"
+
+	"github.com/pierrec/lz4/internal/xxh32"
+)
+
+// Reader implements the LZ4 frame decoder.
+// The Header is set after the first call to Read().
+// The Header may change between Read() calls in case of concatenated frames.
+type Reader struct {
+	Header
+	// Handler called when a block has been successfully read.
+	// It provides the number of bytes read.
+	OnBlockDone func(size int)
+
+	buf      [8]byte       // Scrap buffer.
+	pos      int64         // Current position in src.
+	src      io.Reader     // Source.
+	zdata    []byte        // Compressed data.
+	data     []byte        // Uncompressed data.
+	idx      int           // Index of unread bytes into data.
+	checksum xxh32.XXHZero // Frame hash.
+	skip     int64         // Bytes to skip before next read.
+	dpos     int64         // Position in dest
+}
+
+// NewReader returns a new LZ4 frame decoder.
+// No access to the underlying io.Reader is performed.
+func NewReader(src io.Reader) *Reader {
+	r := &Reader{src: src}
+	return r
+}
+
+// readHeader checks the frame magic number and parses the frame descriptoz.
+// Skippable frames are supported even as a first frame although the LZ4
+// specifications recommends skippable frames not to be used as first frames.
+func (z *Reader) readHeader(first bool) error {
+	defer z.checksum.Reset()
+
+	buf := z.buf[:]
+	for {
+		magic, err := z.readUint32()
+		if err != nil {
+			z.pos += 4
+			if !first && err == io.ErrUnexpectedEOF {
+				return io.EOF
+			}
+			return err
+		}
+		if magic == frameMagic {
+			break
+		}
+		if magic>>8 != frameSkipMagic>>8 {
+			return ErrInvalid
+		}
+		skipSize, err := z.readUint32()
+		if err != nil {
+			return err
+		}
+		z.pos += 4
+		m, err := io.CopyN(ioutil.Discard, z.src, int64(skipSize))
+		if err != nil {
+			return err
+		}
+		z.pos += m
+	}
+
+	// Header.
+	if _, err := io.ReadFull(z.src, buf[:2]); err != nil {
+		return err
+	}
+	z.pos += 8
+
+	b := buf[0]
+	if v := b >> 6; v != Version {
+		return fmt.Errorf("lz4: invalid version: got %d; expected %d", v, Version)
+	}
+	if b>>5&1 == 0 {
+		return ErrBlockDependency
+	}
+	z.BlockChecksum = b>>4&1 > 0
+	frameSize := b>>3&1 > 0
+	z.NoChecksum = b>>2&1 == 0
+
+	bmsID := buf[1] >> 4 & 0x7
+	if bmsID < 4 || bmsID > 7 {
+		return fmt.Errorf("lz4: invalid block max size ID: %d", bmsID)
+	}
+	bSize := blockSizeIndexToValue(bmsID - 4)
+	z.BlockMaxSize = bSize
+
+	// Allocate the compressed/uncompressed buffers.
+	// The compressed buffer cannot exceed the uncompressed one.
+	if n := 2 * bSize; cap(z.zdata) < n {
+		z.zdata = make([]byte, n, n)
+	}
+	if debugFlag {
+		debug("header block max size id=%d size=%d", bmsID, bSize)
+	}
+	z.zdata = z.zdata[:bSize]
+	z.data = z.zdata[:cap(z.zdata)][bSize:]
+	z.idx = len(z.data)
+
+	_, _ = z.checksum.Write(buf[0:2])
+
+	if frameSize {
+		buf := buf[:8]
+		if _, err := io.ReadFull(z.src, buf); err != nil {
+			return err
+		}
+		z.Size = binary.LittleEndian.Uint64(buf)
+		z.pos += 8
+		_, _ = z.checksum.Write(buf)
+	}
+
+	// Header checksum.
+	if _, err := io.ReadFull(z.src, buf[:1]); err != nil {
+		return err
+	}
+	z.pos++
+	if h := byte(z.checksum.Sum32() >> 8 & 0xFF); h != buf[0] {
+		return fmt.Errorf("lz4: invalid header checksum: got %x; expected %x", buf[0], h)
+	}
+
+	z.Header.done = true
+	if debugFlag {
+		debug("header read: %v", z.Header)
+	}
+
+	return nil
+}
+
+// Read decompresses data from the underlying source into the supplied buffer.
+//
+// Since there can be multiple streams concatenated, Header values may
+// change between calls to Read(). If that is the case, no data is actually read from
+// the underlying io.Reader, to allow for potential input buffer resizing.
+func (z *Reader) Read(buf []byte) (int, error) {
+	if debugFlag {
+		debug("Read buf len=%d", len(buf))
+	}
+	if !z.Header.done {
+		if err := z.readHeader(true); err != nil {
+			return 0, err
+		}
+		if debugFlag {
+			debug("header read OK compressed buffer %d / %d uncompressed buffer %d : %d index=%d",
+				len(z.zdata), cap(z.zdata), len(z.data), cap(z.data), z.idx)
+		}
+	}
+
+	if len(buf) == 0 {
+		return 0, nil
+	}
+
+	if z.idx == len(z.data) {
+		// No data ready for reading, process the next block.
+		if debugFlag {
+			debug("reading block from writer")
+		}
+		// Reset uncompressed buffer
+		z.data = z.zdata[:cap(z.zdata)][len(z.zdata):]
+
+		// Block length: 0 = end of frame, highest bit set: uncompressed.
+		bLen, err := z.readUint32()
+		if err != nil {
+			return 0, err
+		}
+		z.pos += 4
+
+		if bLen == 0 {
+			// End of frame reached.
+			if !z.NoChecksum {
+				// Validate the frame checksum.
+				checksum, err := z.readUint32()
+				if err != nil {
+					return 0, err
+				}
+				if debugFlag {
+					debug("frame checksum got=%x / want=%x", z.checksum.Sum32(), checksum)
+				}
+				z.pos += 4
+				if h := z.checksum.Sum32(); checksum != h {
+					return 0, fmt.Errorf("lz4: invalid frame checksum: got %x; expected %x", h, checksum)
+				}
+			}
+
+			// Get ready for the next concatenated frame and keep the position.
+			pos := z.pos
+			z.Reset(z.src)
+			z.pos = pos
+
+			// Since multiple frames can be concatenated, check for more.
+			return 0, z.readHeader(false)
+		}
+
+		if debugFlag {
+			debug("raw block size %d", bLen)
+		}
+		if bLen&compressedBlockFlag > 0 {
+			// Uncompressed block.
+			bLen &= compressedBlockMask
+			if debugFlag {
+				debug("uncompressed block size %d", bLen)
+			}
+			if int(bLen) > cap(z.data) {
+				return 0, fmt.Errorf("lz4: invalid block size: %d", bLen)
+			}
+			z.data = z.data[:bLen]
+			if _, err := io.ReadFull(z.src, z.data); err != nil {
+				return 0, err
+			}
+			z.pos += int64(bLen)
+			if z.OnBlockDone != nil {
+				z.OnBlockDone(int(bLen))
+			}
+
+			if z.BlockChecksum {
+				checksum, err := z.readUint32()
+				if err != nil {
+					return 0, err
+				}
+				z.pos += 4
+
+				if h := xxh32.ChecksumZero(z.data); h != checksum {
+					return 0, fmt.Errorf("lz4: invalid block checksum: got %x; expected %x", h, checksum)
+				}
+			}
+
+		} else {
+			// Compressed block.
+			if debugFlag {
+				debug("compressed block size %d", bLen)
+			}
+			if int(bLen) > cap(z.data) {
+				return 0, fmt.Errorf("lz4: invalid block size: %d", bLen)
+			}
+			zdata := z.zdata[:bLen]
+			if _, err := io.ReadFull(z.src, zdata); err != nil {
+				return 0, err
+			}
+			z.pos += int64(bLen)
+
+			if z.BlockChecksum {
+				checksum, err := z.readUint32()
+				if err != nil {
+					return 0, err
+				}
+				z.pos += 4
+
+				if h := xxh32.ChecksumZero(zdata); h != checksum {
+					return 0, fmt.Errorf("lz4: invalid block checksum: got %x; expected %x", h, checksum)
+				}
+			}
+
+			n, err := UncompressBlock(zdata, z.data)
+			if err != nil {
+				return 0, err
+			}
+			z.data = z.data[:n]
+			if z.OnBlockDone != nil {
+				z.OnBlockDone(n)
+			}
+		}
+
+		if !z.NoChecksum {
+			_, _ = z.checksum.Write(z.data)
+			if debugFlag {
+				debug("current frame checksum %x", z.checksum.Sum32())
+			}
+		}
+		z.idx = 0
+	}
+
+	if z.skip > int64(len(z.data[z.idx:])) {
+		z.skip -= int64(len(z.data[z.idx:]))
+		z.dpos += int64(len(z.data[z.idx:]))
+		z.idx = len(z.data)
+		return 0, nil
+	}
+
+	z.idx += int(z.skip)
+	z.dpos += z.skip
+	z.skip = 0
+
+	n := copy(buf, z.data[z.idx:])
+	z.idx += n
+	z.dpos += int64(n)
+	if debugFlag {
+		debug("copied %d bytes to input", n)
+	}
+
+	return n, nil
+}
+
+// Seek implements io.Seeker, but supports seeking forward from the current
+// position only. Any other seek will return an error. Allows skipping output
+// bytes which aren't needed, which in some scenarios is faster than reading
+// and discarding them.
+// Note this may cause future calls to Read() to read 0 bytes if all of the
+// data they would have returned is skipped.
+func (z *Reader) Seek(offset int64, whence int) (int64, error) {
+	if offset < 0 || whence != io.SeekCurrent {
+		return z.dpos + z.skip, ErrUnsupportedSeek
+	}
+	z.skip += offset
+	return z.dpos + z.skip, nil
+}
+
+// Reset discards the Reader's state and makes it equivalent to the
+// result of its original state from NewReader, but reading from r instead.
+// This permits reusing a Reader rather than allocating a new one.
+func (z *Reader) Reset(r io.Reader) {
+	z.Header = Header{}
+	z.pos = 0
+	z.src = r
+	z.zdata = z.zdata[:0]
+	z.data = z.data[:0]
+	z.idx = 0
+	z.checksum.Reset()
+}
+
+// readUint32 reads an uint32 into the supplied buffer.
+// The idea is to make use of the already allocated buffers avoiding additional allocations.
+func (z *Reader) readUint32() (uint32, error) {
+	buf := z.buf[:4]
+	_, err := io.ReadFull(z.src, buf)
+	x := binary.LittleEndian.Uint32(buf)
+	return x, err
+}
diff --git a/vendor/github.com/pierrec/lz4/reader_legacy.go b/vendor/github.com/pierrec/lz4/reader_legacy.go
new file mode 100644
index 0000000..1670a77
--- /dev/null
+++ b/vendor/github.com/pierrec/lz4/reader_legacy.go
@@ -0,0 +1,207 @@
+package lz4
+
+import (
+	"encoding/binary"
+	"fmt"
+	"io"
+)
+
+// ReaderLegacy implements the LZ4Demo frame decoder.
+// The Header is set after the first call to Read().
+type ReaderLegacy struct {
+	Header
+	// Handler called when a block has been successfully read.
+	// It provides the number of bytes read.
+	OnBlockDone func(size int)
+
+	lastBlock bool
+	buf       [8]byte   // Scrap buffer.
+	pos       int64     // Current position in src.
+	src       io.Reader // Source.
+	zdata     []byte    // Compressed data.
+	data      []byte    // Uncompressed data.
+	idx       int       // Index of unread bytes into data.
+	skip      int64     // Bytes to skip before next read.
+	dpos      int64     // Position in dest
+}
+
+// NewReaderLegacy returns a new LZ4Demo frame decoder.
+// No access to the underlying io.Reader is performed.
+func NewReaderLegacy(src io.Reader) *ReaderLegacy {
+	r := &ReaderLegacy{src: src}
+	return r
+}
+
+// readHeader checks the frame magic number and parses the frame descriptoz.
+// Skippable frames are supported even as a first frame although the LZ4
+// specifications recommends skippable frames not to be used as first frames.
+func (z *ReaderLegacy) readLegacyHeader() error {
+	z.lastBlock = false
+	magic, err := z.readUint32()
+	if err != nil {
+		z.pos += 4
+		if err == io.ErrUnexpectedEOF {
+			return io.EOF
+		}
+		return err
+	}
+	if magic != frameMagicLegacy {
+		return ErrInvalid
+	}
+	z.pos += 4
+
+	// Legacy has fixed 8MB blocksizes
+	// https://github.com/lz4/lz4/blob/dev/doc/lz4_Frame_format.md#legacy-frame
+	bSize := blockSize4M * 2
+
+	// Allocate the compressed/uncompressed buffers.
+	// The compressed buffer cannot exceed the uncompressed one.
+	if n := 2 * bSize; cap(z.zdata) < n {
+		z.zdata = make([]byte, n, n)
+	}
+	if debugFlag {
+		debug("header block max size size=%d", bSize)
+	}
+	z.zdata = z.zdata[:bSize]
+	z.data = z.zdata[:cap(z.zdata)][bSize:]
+	z.idx = len(z.data)
+
+	z.Header.done = true
+	if debugFlag {
+		debug("header read: %v", z.Header)
+	}
+
+	return nil
+}
+
+// Read decompresses data from the underlying source into the supplied buffer.
+//
+// Since there can be multiple streams concatenated, Header values may
+// change between calls to Read(). If that is the case, no data is actually read from
+// the underlying io.Reader, to allow for potential input buffer resizing.
+func (z *ReaderLegacy) Read(buf []byte) (int, error) {
+	if debugFlag {
+		debug("Read buf len=%d", len(buf))
+	}
+	if !z.Header.done {
+		if err := z.readLegacyHeader(); err != nil {
+			return 0, err
+		}
+		if debugFlag {
+			debug("header read OK compressed buffer %d / %d uncompressed buffer %d : %d index=%d",
+				len(z.zdata), cap(z.zdata), len(z.data), cap(z.data), z.idx)
+		}
+	}
+
+	if len(buf) == 0 {
+		return 0, nil
+	}
+
+	if z.idx == len(z.data) {
+		// No data ready for reading, process the next block.
+		if debugFlag {
+			debug("  reading block from writer %d %d", z.idx, blockSize4M*2)
+		}
+
+		// Reset uncompressed buffer
+		z.data = z.zdata[:cap(z.zdata)][len(z.zdata):]
+
+		bLen, err := z.readUint32()
+		if err != nil {
+			return 0, err
+		}
+		if debugFlag {
+			debug("   bLen %d (0x%x) offset = %d (0x%x)", bLen, bLen, z.pos, z.pos)
+		}
+		z.pos += 4
+
+		// Legacy blocks are always compressed, even when detrimental
+		if debugFlag {
+			debug("   compressed block size %d", bLen)
+		}
+
+		if int(bLen) > cap(z.data) {
+			return 0, fmt.Errorf("lz4: invalid block size: %d", bLen)
+		}
+		zdata := z.zdata[:bLen]
+		if _, err := io.ReadFull(z.src, zdata); err != nil {
+			return 0, err
+		}
+		z.pos += int64(bLen)
+
+		n, err := UncompressBlock(zdata, z.data)
+		if err != nil {
+			return 0, err
+		}
+
+		z.data = z.data[:n]
+		if z.OnBlockDone != nil {
+			z.OnBlockDone(n)
+		}
+
+		z.idx = 0
+
+		// Legacy blocks are fixed to 8MB, if we read a decompressed block smaller than this
+		// it means we've reached the end...
+		if n < blockSize4M*2 {
+			z.lastBlock = true
+		}
+	}
+
+	if z.skip > int64(len(z.data[z.idx:])) {
+		z.skip -= int64(len(z.data[z.idx:]))
+		z.dpos += int64(len(z.data[z.idx:]))
+		z.idx = len(z.data)
+		return 0, nil
+	}
+
+	z.idx += int(z.skip)
+	z.dpos += z.skip
+	z.skip = 0
+
+	n := copy(buf, z.data[z.idx:])
+	z.idx += n
+	z.dpos += int64(n)
+	if debugFlag {
+		debug("%v] copied %d bytes to input (%d:%d)", z.lastBlock, n, z.idx, len(z.data))
+	}
+	if z.lastBlock && len(z.data) == z.idx {
+		return n, io.EOF
+	}
+	return n, nil
+}
+
+// Seek implements io.Seeker, but supports seeking forward from the current
+// position only. Any other seek will return an error. Allows skipping output
+// bytes which aren't needed, which in some scenarios is faster than reading
+// and discarding them.
+// Note this may cause future calls to Read() to read 0 bytes if all of the
+// data they would have returned is skipped.
+func (z *ReaderLegacy) Seek(offset int64, whence int) (int64, error) {
+	if offset < 0 || whence != io.SeekCurrent {
+		return z.dpos + z.skip, ErrUnsupportedSeek
+	}
+	z.skip += offset
+	return z.dpos + z.skip, nil
+}
+
+// Reset discards the Reader's state and makes it equivalent to the
+// result of its original state from NewReader, but reading from r instead.
+// This permits reusing a Reader rather than allocating a new one.
+func (z *ReaderLegacy) Reset(r io.Reader) {
+	z.Header = Header{}
+	z.pos = 0
+	z.src = r
+	z.zdata = z.zdata[:0]
+	z.data = z.data[:0]
+	z.idx = 0
+}
+
+// readUint32 reads an uint32 into the supplied buffer.
+// The idea is to make use of the already allocated buffers avoiding additional allocations.
+func (z *ReaderLegacy) readUint32() (uint32, error) {
+	buf := z.buf[:4]
+	_, err := io.ReadFull(z.src, buf)
+	x := binary.LittleEndian.Uint32(buf)
+	return x, err
+}
diff --git a/vendor/github.com/pierrec/lz4/writer.go b/vendor/github.com/pierrec/lz4/writer.go
new file mode 100644
index 0000000..6a60a9a
--- /dev/null
+++ b/vendor/github.com/pierrec/lz4/writer.go
@@ -0,0 +1,413 @@
+package lz4
+
+import (
+	"encoding/binary"
+	"fmt"
+	"io"
+	"runtime"
+
+	"github.com/pierrec/lz4/internal/xxh32"
+)
+
+// zResult contains the results of compressing a block.
+type zResult struct {
+	size     uint32 // Block header
+	data     []byte // Compressed data
+	checksum uint32 // Data checksum
+}
+
+// Writer implements the LZ4 frame encoder.
+type Writer struct {
+	Header
+	// Handler called when a block has been successfully written out.
+	// It provides the number of bytes written.
+	OnBlockDone func(size int)
+
+	buf       [19]byte      // magic number(4) + header(flags(2)+[Size(8)+DictID(4)]+checksum(1)) does not exceed 19 bytes
+	dst       io.Writer     // Destination.
+	checksum  xxh32.XXHZero // Frame checksum.
+	data      []byte        // Data to be compressed + buffer for compressed data.
+	idx       int           // Index into data.
+	hashtable [winSize]int  // Hash table used in CompressBlock().
+
+	// For concurrency.
+	c   chan chan zResult // Channel for block compression goroutines and writer goroutine.
+	err error             // Any error encountered while writing to the underlying destination.
+}
+
+// NewWriter returns a new LZ4 frame encoder.
+// No access to the underlying io.Writer is performed.
+// The supplied Header is checked at the first Write.
+// It is ok to change it before the first Write but then not until a Reset() is performed.
+func NewWriter(dst io.Writer) *Writer {
+	z := new(Writer)
+	z.Reset(dst)
+	return z
+}
+
+// WithConcurrency sets the number of concurrent go routines used for compression.
+// A negative value sets the concurrency to GOMAXPROCS.
+func (z *Writer) WithConcurrency(n int) *Writer {
+	switch {
+	case n == 0 || n == 1:
+		z.c = nil
+		return z
+	case n < 0:
+		n = runtime.GOMAXPROCS(0)
+	}
+	z.c = make(chan chan zResult, n)
+	// Writer goroutine managing concurrent block compression goroutines.
+	go func() {
+		// Process next block compression item.
+		for c := range z.c {
+			// Read the next compressed block result.
+			// Waiting here ensures that the blocks are output in the order they were sent.
+			// The incoming channel is always closed as it indicates to the caller that
+			// the block has been processed.
+			res := <-c
+			n := len(res.data)
+			if n == 0 {
+				// Notify the block compression routine that we are done with its result.
+				// This is used when a sentinel block is sent to terminate the compression.
+				close(c)
+				return
+			}
+			// Write the block.
+			if err := z.writeUint32(res.size); err != nil && z.err == nil {
+				z.err = err
+			}
+			if _, err := z.dst.Write(res.data); err != nil && z.err == nil {
+				z.err = err
+			}
+			if z.BlockChecksum {
+				if err := z.writeUint32(res.checksum); err != nil && z.err == nil {
+					z.err = err
+				}
+			}
+			if isCompressed := res.size&compressedBlockFlag == 0; isCompressed {
+				// It is now safe to release the buffer as no longer in use by any goroutine.
+				putBuffer(cap(res.data), res.data)
+			}
+			if h := z.OnBlockDone; h != nil {
+				h(n)
+			}
+			close(c)
+		}
+	}()
+	return z
+}
+
+// newBuffers instantiates new buffers which size matches the one in Header.
+// The returned buffers are for decompression and compression respectively.
+func (z *Writer) newBuffers() {
+	bSize := z.Header.BlockMaxSize
+	buf := getBuffer(bSize)
+	z.data = buf[:bSize] // Uncompressed buffer is the first half.
+}
+
+// freeBuffers puts the writer's buffers back to the pool.
+func (z *Writer) freeBuffers() {
+	// Put the buffer back into the pool, if any.
+	putBuffer(z.Header.BlockMaxSize, z.data)
+	z.data = nil
+}
+
+// writeHeader builds and writes the header (magic+header) to the underlying io.Writer.
+func (z *Writer) writeHeader() error {
+	// Default to 4Mb if BlockMaxSize is not set.
+	if z.Header.BlockMaxSize == 0 {
+		z.Header.BlockMaxSize = blockSize4M
+	}
+	// The only option that needs to be validated.
+	bSize := z.Header.BlockMaxSize
+	if !isValidBlockSize(z.Header.BlockMaxSize) {
+		return fmt.Errorf("lz4: invalid block max size: %d", bSize)
+	}
+	// Allocate the compressed/uncompressed buffers.
+	// The compressed buffer cannot exceed the uncompressed one.
+	z.newBuffers()
+	z.idx = 0
+
+	// Size is optional.
+	buf := z.buf[:]
+
+	// Set the fixed size data: magic number, block max size and flags.
+	binary.LittleEndian.PutUint32(buf[0:], frameMagic)
+	flg := byte(Version << 6)
+	flg |= 1 << 5 // No block dependency.
+	if z.Header.BlockChecksum {
+		flg |= 1 << 4
+	}
+	if z.Header.Size > 0 {
+		flg |= 1 << 3
+	}
+	if !z.Header.NoChecksum {
+		flg |= 1 << 2
+	}
+	buf[4] = flg
+	buf[5] = blockSizeValueToIndex(z.Header.BlockMaxSize) << 4
+
+	// Current buffer size: magic(4) + flags(1) + block max size (1).
+	n := 6
+	// Optional items.
+	if z.Header.Size > 0 {
+		binary.LittleEndian.PutUint64(buf[n:], z.Header.Size)
+		n += 8
+	}
+
+	// The header checksum includes the flags, block max size and optional Size.
+	buf[n] = byte(xxh32.ChecksumZero(buf[4:n]) >> 8 & 0xFF)
+	z.checksum.Reset()
+
+	// Header ready, write it out.
+	if _, err := z.dst.Write(buf[0 : n+1]); err != nil {
+		return err
+	}
+	z.Header.done = true
+	if debugFlag {
+		debug("wrote header %v", z.Header)
+	}
+
+	return nil
+}
+
+// Write compresses data from the supplied buffer into the underlying io.Writer.
+// Write does not return until the data has been written.
+func (z *Writer) Write(buf []byte) (int, error) {
+	if !z.Header.done {
+		if err := z.writeHeader(); err != nil {
+			return 0, err
+		}
+	}
+	if debugFlag {
+		debug("input buffer len=%d index=%d", len(buf), z.idx)
+	}
+
+	zn := len(z.data)
+	var n int
+	for len(buf) > 0 {
+		if z.idx == 0 && len(buf) >= zn {
+			// Avoid a copy as there is enough data for a block.
+			if err := z.compressBlock(buf[:zn]); err != nil {
+				return n, err
+			}
+			n += zn
+			buf = buf[zn:]
+			continue
+		}
+		// Accumulate the data to be compressed.
+		m := copy(z.data[z.idx:], buf)
+		n += m
+		z.idx += m
+		buf = buf[m:]
+		if debugFlag {
+			debug("%d bytes copied to buf, current index %d", n, z.idx)
+		}
+
+		if z.idx < len(z.data) {
+			// Buffer not filled.
+			if debugFlag {
+				debug("need more data for compression")
+			}
+			return n, nil
+		}
+
+		// Buffer full.
+		if err := z.compressBlock(z.data); err != nil {
+			return n, err
+		}
+		z.idx = 0
+	}
+
+	return n, nil
+}
+
+// compressBlock compresses a block.
+func (z *Writer) compressBlock(data []byte) error {
+	if !z.NoChecksum {
+		_, _ = z.checksum.Write(data)
+	}
+
+	if z.c != nil {
+		c := make(chan zResult)
+		z.c <- c // Send now to guarantee order
+		go writerCompressBlock(c, z.Header, data)
+		return nil
+	}
+
+	zdata := z.data[z.Header.BlockMaxSize:cap(z.data)]
+	// The compressed block size cannot exceed the input's.
+	var zn int
+
+	if level := z.Header.CompressionLevel; level != 0 {
+		zn, _ = CompressBlockHC(data, zdata, level)
+	} else {
+		zn, _ = CompressBlock(data, zdata, z.hashtable[:])
+	}
+
+	var bLen uint32
+	if debugFlag {
+		debug("block compression %d => %d", len(data), zn)
+	}
+	if zn > 0 && zn < len(data) {
+		// Compressible and compressed size smaller than uncompressed: ok!
+		bLen = uint32(zn)
+		zdata = zdata[:zn]
+	} else {
+		// Uncompressed block.
+		bLen = uint32(len(data)) | compressedBlockFlag
+		zdata = data
+	}
+	if debugFlag {
+		debug("block compression to be written len=%d data len=%d", bLen, len(zdata))
+	}
+
+	// Write the block.
+	if err := z.writeUint32(bLen); err != nil {
+		return err
+	}
+	written, err := z.dst.Write(zdata)
+	if err != nil {
+		return err
+	}
+	if h := z.OnBlockDone; h != nil {
+		h(written)
+	}
+
+	if !z.BlockChecksum {
+		if debugFlag {
+			debug("current frame checksum %x", z.checksum.Sum32())
+		}
+		return nil
+	}
+	checksum := xxh32.ChecksumZero(zdata)
+	if debugFlag {
+		debug("block checksum %x", checksum)
+		defer func() { debug("current frame checksum %x", z.checksum.Sum32()) }()
+	}
+	return z.writeUint32(checksum)
+}
+
+// Flush flushes any pending compressed data to the underlying writer.
+// Flush does not return until the data has been written.
+// If the underlying writer returns an error, Flush returns that error.
+func (z *Writer) Flush() error {
+	if debugFlag {
+		debug("flush with index %d", z.idx)
+	}
+	if z.idx == 0 {
+		return nil
+	}
+
+	data := z.data[:z.idx]
+	z.idx = 0
+	if z.c == nil {
+		return z.compressBlock(data)
+	}
+	if !z.NoChecksum {
+		_, _ = z.checksum.Write(data)
+	}
+	c := make(chan zResult)
+	z.c <- c
+	writerCompressBlock(c, z.Header, data)
+	return nil
+}
+
+func (z *Writer) close() error {
+	if z.c == nil {
+		return nil
+	}
+	// Send a sentinel block (no data to compress) to terminate the writer main goroutine.
+	c := make(chan zResult)
+	z.c <- c
+	c <- zResult{}
+	// Wait for the main goroutine to complete.
+	<-c
+	// At this point the main goroutine has shut down or is about to return.
+	z.c = nil
+	return z.err
+}
+
+// Close closes the Writer, flushing any unwritten data to the underlying io.Writer, but does not close the underlying io.Writer.
+func (z *Writer) Close() error {
+	if !z.Header.done {
+		if err := z.writeHeader(); err != nil {
+			return err
+		}
+	}
+	if err := z.Flush(); err != nil {
+		return err
+	}
+	if err := z.close(); err != nil {
+		return err
+	}
+	z.freeBuffers()
+
+	if debugFlag {
+		debug("writing last empty block")
+	}
+	if err := z.writeUint32(0); err != nil {
+		return err
+	}
+	if z.NoChecksum {
+		return nil
+	}
+	checksum := z.checksum.Sum32()
+	if debugFlag {
+		debug("stream checksum %x", checksum)
+	}
+	return z.writeUint32(checksum)
+}
+
+// Reset clears the state of the Writer z such that it is equivalent to its
+// initial state from NewWriter, but instead writing to w.
+// No access to the underlying io.Writer is performed.
+func (z *Writer) Reset(w io.Writer) {
+	n := cap(z.c)
+	_ = z.close()
+	z.freeBuffers()
+	z.Header.Reset()
+	z.dst = w
+	z.checksum.Reset()
+	z.idx = 0
+	z.err = nil
+	// reset hashtable to ensure deterministic output.
+	for i := range z.hashtable {
+		z.hashtable[i] = 0
+	}
+	z.WithConcurrency(n)
+}
+
+// writeUint32 writes a uint32 to the underlying writer.
+func (z *Writer) writeUint32(x uint32) error {
+	buf := z.buf[:4]
+	binary.LittleEndian.PutUint32(buf, x)
+	_, err := z.dst.Write(buf)
+	return err
+}
+
+// writerCompressBlock compresses data into a pooled buffer and writes its result
+// out to the input channel.
+func writerCompressBlock(c chan zResult, header Header, data []byte) {
+	zdata := getBuffer(header.BlockMaxSize)
+	// The compressed block size cannot exceed the input's.
+	var zn int
+	if level := header.CompressionLevel; level != 0 {
+		zn, _ = CompressBlockHC(data, zdata, level)
+	} else {
+		var hashTable [winSize]int
+		zn, _ = CompressBlock(data, zdata, hashTable[:])
+	}
+	var res zResult
+	if zn > 0 && zn < len(data) {
+		res.size = uint32(zn)
+		res.data = zdata[:zn]
+	} else {
+		res.size = uint32(len(data)) | compressedBlockFlag
+		res.data = data
+	}
+	if header.BlockChecksum {
+		res.checksum = xxh32.ChecksumZero(res.data)
+	}
+	c <- res
+}