Blame - vendor/github.com/golang/snappy/encode_other.go - bbsim

blob: dbcae905e6e047ba3c00f68057f5bf8541e981fa [file] [log] [blame]

Pragya Arya	324337e	2020-02-20 14:35:08 +0530	[diff] [blame]	1	// Copyright 2016 The Snappy-Go Authors. All rights reserved.
				2	// Use of this source code is governed by a BSD-style
				3	// license that can be found in the LICENSE file.
				4
				5	// +build !amd64 appengine !gc noasm
				6
				7	package snappy
				8
				9	func load32(b []byte, i int) uint32 {
				10	b = b[i : i+4 : len(b)] // Help the compiler eliminate bounds checks on the next line.
				11	return uint32(b[0]) \| uint32(b[1])<<8 \| uint32(b[2])<<16 \| uint32(b[3])<<24
				12	}
				13
				14	func load64(b []byte, i int) uint64 {
				15	b = b[i : i+8 : len(b)] // Help the compiler eliminate bounds checks on the next line.
				16	return uint64(b[0]) \| uint64(b[1])<<8 \| uint64(b[2])<<16 \| uint64(b[3])<<24 \|
				17	uint64(b[4])<<32 \| uint64(b[5])<<40 \| uint64(b[6])<<48 \| uint64(b[7])<<56
				18	}
				19
				20	// emitLiteral writes a literal chunk and returns the number of bytes written.
				21	//
				22	// It assumes that:
				23	// dst is long enough to hold the encoded bytes
				24	// 1 <= len(lit) && len(lit) <= 65536
				25	func emitLiteral(dst, lit []byte) int {
				26	i, n := 0, uint(len(lit)-1)
				27	switch {
				28	case n < 60:
				29	dst[0] = uint8(n)<<2 \| tagLiteral
				30	i = 1
				31	case n < 1<<8:
				32	dst[0] = 60<<2 \| tagLiteral
				33	dst[1] = uint8(n)
				34	i = 2
				35	default:
				36	dst[0] = 61<<2 \| tagLiteral
				37	dst[1] = uint8(n)
				38	dst[2] = uint8(n >> 8)
				39	i = 3
				40	}
				41	return i + copy(dst[i:], lit)
				42	}
				43
				44	// emitCopy writes a copy chunk and returns the number of bytes written.
				45	//
				46	// It assumes that:
				47	// dst is long enough to hold the encoded bytes
				48	// 1 <= offset && offset <= 65535
				49	// 4 <= length && length <= 65535
				50	func emitCopy(dst []byte, offset, length int) int {
				51	i := 0
				52	// The maximum length for a single tagCopy1 or tagCopy2 op is 64 bytes. The
				53	// threshold for this loop is a little higher (at 68 = 64 + 4), and the
				54	// length emitted down below is is a little lower (at 60 = 64 - 4), because
				55	// it's shorter to encode a length 67 copy as a length 60 tagCopy2 followed
				56	// by a length 7 tagCopy1 (which encodes as 3+2 bytes) than to encode it as
				57	// a length 64 tagCopy2 followed by a length 3 tagCopy2 (which encodes as
				58	// 3+3 bytes). The magic 4 in the 64±4 is because the minimum length for a
				59	// tagCopy1 op is 4 bytes, which is why a length 3 copy has to be an
				60	// encodes-as-3-bytes tagCopy2 instead of an encodes-as-2-bytes tagCopy1.
				61	for length >= 68 {
				62	// Emit a length 64 copy, encoded as 3 bytes.
				63	dst[i+0] = 63<<2 \| tagCopy2
				64	dst[i+1] = uint8(offset)
				65	dst[i+2] = uint8(offset >> 8)
				66	i += 3
				67	length -= 64
				68	}
				69	if length > 64 {
				70	// Emit a length 60 copy, encoded as 3 bytes.
				71	dst[i+0] = 59<<2 \| tagCopy2
				72	dst[i+1] = uint8(offset)
				73	dst[i+2] = uint8(offset >> 8)
				74	i += 3
				75	length -= 60
				76	}
				77	if length >= 12 \|\| offset >= 2048 {
				78	// Emit the remaining copy, encoded as 3 bytes.
				79	dst[i+0] = uint8(length-1)<<2 \| tagCopy2
				80	dst[i+1] = uint8(offset)
				81	dst[i+2] = uint8(offset >> 8)
				82	return i + 3
				83	}
				84	// Emit the remaining copy, encoded as 2 bytes.
				85	dst[i+0] = uint8(offset>>8)<<5 \| uint8(length-4)<<2 \| tagCopy1
				86	dst[i+1] = uint8(offset)
				87	return i + 2
				88	}
				89
				90	// extendMatch returns the largest k such that k <= len(src) and that
				91	// src[i:i+k-j] and src[j:k] have the same contents.
				92	//
				93	// It assumes that:
				94	// 0 <= i && i < j && j <= len(src)
				95	func extendMatch(src []byte, i, j int) int {
				96	for ; j < len(src) && src[i] == src[j]; i, j = i+1, j+1 {
				97	}
				98	return j
				99	}
				100
				101	func hash(u, shift uint32) uint32 {
				102	return (u * 0x1e35a7bd) >> shift
				103	}
				104
				105	// encodeBlock encodes a non-empty src to a guaranteed-large-enough dst. It
				106	// assumes that the varint-encoded length of the decompressed bytes has already
				107	// been written.
				108	//
				109	// It also assumes that:
				110	// len(dst) >= MaxEncodedLen(len(src)) &&
				111	// minNonLiteralBlockSize <= len(src) && len(src) <= maxBlockSize
				112	func encodeBlock(dst, src []byte) (d int) {
				113	// Initialize the hash table. Its size ranges from 1<<8 to 1<<14 inclusive.
				114	// The table element type is uint16, as s < sLimit and sLimit < len(src)
				115	// and len(src) <= maxBlockSize and maxBlockSize == 65536.
				116	const (
				117	maxTableSize = 1 << 14
				118	// tableMask is redundant, but helps the compiler eliminate bounds
				119	// checks.
				120	tableMask = maxTableSize - 1
				121	)
				122	shift := uint32(32 - 8)
				123	for tableSize := 1 << 8; tableSize < maxTableSize && tableSize < len(src); tableSize *= 2 {
				124	shift--
				125	}
				126	// In Go, all array elements are zero-initialized, so there is no advantage
				127	// to a smaller tableSize per se. However, it matches the C++ algorithm,
				128	// and in the asm versions of this code, we can get away with zeroing only
				129	// the first tableSize elements.
				130	var table [maxTableSize]uint16
				131
				132	// sLimit is when to stop looking for offset/length copies. The inputMargin
				133	// lets us use a fast path for emitLiteral in the main loop, while we are
				134	// looking for copies.
				135	sLimit := len(src) - inputMargin
				136
				137	// nextEmit is where in src the next emitLiteral should start from.
				138	nextEmit := 0
				139
				140	// The encoded form must start with a literal, as there are no previous
				141	// bytes to copy, so we start looking for hash matches at s == 1.
				142	s := 1
				143	nextHash := hash(load32(src, s), shift)
				144
				145	for {
				146	// Copied from the C++ snappy implementation:
				147	//
				148	// Heuristic match skipping: If 32 bytes are scanned with no matches
				149	// found, start looking only at every other byte. If 32 more bytes are
				150	// scanned (or skipped), look at every third byte, etc.. When a match
				151	// is found, immediately go back to looking at every byte. This is a
				152	// small loss (~5% performance, ~0.1% density) for compressible data
				153	// due to more bookkeeping, but for non-compressible data (such as
				154	// JPEG) it's a huge win since the compressor quickly "realizes" the
				155	// data is incompressible and doesn't bother looking for matches
				156	// everywhere.
				157	//
				158	// The "skip" variable keeps track of how many bytes there are since
				159	// the last match; dividing it by 32 (ie. right-shifting by five) gives
				160	// the number of bytes to move ahead for each iteration.
				161	skip := 32
				162
				163	nextS := s
				164	candidate := 0
				165	for {
				166	s = nextS
				167	bytesBetweenHashLookups := skip >> 5
				168	nextS = s + bytesBetweenHashLookups
				169	skip += bytesBetweenHashLookups
				170	if nextS > sLimit {
				171	goto emitRemainder
				172	}
				173	candidate = int(table[nextHash&tableMask])
				174	table[nextHash&tableMask] = uint16(s)
				175	nextHash = hash(load32(src, nextS), shift)
				176	if load32(src, s) == load32(src, candidate) {
				177	break
				178	}
				179	}
				180
				181	// A 4-byte match has been found. We'll later see if more than 4 bytes
				182	// match. But, prior to the match, src[nextEmit:s] are unmatched. Emit
				183	// them as literal bytes.
				184	d += emitLiteral(dst[d:], src[nextEmit:s])
				185
				186	// Call emitCopy, and then see if another emitCopy could be our next
				187	// move. Repeat until we find no match for the input immediately after
				188	// what was consumed by the last emitCopy call.
				189	//
				190	// If we exit this loop normally then we need to call emitLiteral next,
				191	// though we don't yet know how big the literal will be. We handle that
				192	// by proceeding to the next iteration of the main loop. We also can
				193	// exit this loop via goto if we get close to exhausting the input.
				194	for {
				195	// Invariant: we have a 4-byte match at s, and no need to emit any
				196	// literal bytes prior to s.
				197	base := s
				198
				199	// Extend the 4-byte match as long as possible.
				200	//
				201	// This is an inlined version of:
				202	// s = extendMatch(src, candidate+4, s+4)
				203	s += 4
				204	for i := candidate + 4; s < len(src) && src[i] == src[s]; i, s = i+1, s+1 {
				205	}
				206
				207	d += emitCopy(dst[d:], base-candidate, s-base)
				208	nextEmit = s
				209	if s >= sLimit {
				210	goto emitRemainder
				211	}
				212
				213	// We could immediately start working at s now, but to improve
				214	// compression we first update the hash table at s-1 and at s. If
				215	// another emitCopy is not our next move, also calculate nextHash
				216	// at s+1. At least on GOARCH=amd64, these three hash calculations
				217	// are faster as one load64 call (with some shifts) instead of
				218	// three load32 calls.
				219	x := load64(src, s-1)
				220	prevHash := hash(uint32(x>>0), shift)
				221	table[prevHash&tableMask] = uint16(s - 1)
				222	currHash := hash(uint32(x>>8), shift)
				223	candidate = int(table[currHash&tableMask])
				224	table[currHash&tableMask] = uint16(s)
				225	if uint32(x>>8) != load32(src, candidate) {
				226	nextHash = hash(uint32(x>>16), shift)
				227	s++
				228	break
				229	}
				230	}
				231	}
				232
				233	emitRemainder:
				234	if nextEmit < len(src) {
				235	d += emitLiteral(dst[d:], src[nextEmit:])
				236	}
				237	return d
				238	}