blob: f2f01fd881c49884585d0b8fc71df9d6fbb8b78c [file] [log] [blame]
// Copyright 2015 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package wal
import (
"bufio"
"encoding/binary"
"hash"
"io"
"sync"
"go.etcd.io/etcd/pkg/crc"
"go.etcd.io/etcd/pkg/pbutil"
"go.etcd.io/etcd/raft/raftpb"
"go.etcd.io/etcd/wal/walpb"
)
const minSectorSize = 512
// frameSizeBytes is frame size in bytes, including record size and padding size.
const frameSizeBytes = 8
type decoder struct {
mu sync.Mutex
brs []*bufio.Reader
// lastValidOff file offset following the last valid decoded record
lastValidOff int64
crc hash.Hash32
}
func newDecoder(r ...io.Reader) *decoder {
readers := make([]*bufio.Reader, len(r))
for i := range r {
readers[i] = bufio.NewReader(r[i])
}
return &decoder{
brs: readers,
crc: crc.New(0, crcTable),
}
}
func (d *decoder) decode(rec *walpb.Record) error {
rec.Reset()
d.mu.Lock()
defer d.mu.Unlock()
return d.decodeRecord(rec)
}
func (d *decoder) decodeRecord(rec *walpb.Record) error {
if len(d.brs) == 0 {
return io.EOF
}
l, err := readInt64(d.brs[0])
if err == io.EOF || (err == nil && l == 0) {
// hit end of file or preallocated space
d.brs = d.brs[1:]
if len(d.brs) == 0 {
return io.EOF
}
d.lastValidOff = 0
return d.decodeRecord(rec)
}
if err != nil {
return err
}
recBytes, padBytes := decodeFrameSize(l)
data := make([]byte, recBytes+padBytes)
if _, err = io.ReadFull(d.brs[0], data); err != nil {
// ReadFull returns io.EOF only if no bytes were read
// the decoder should treat this as an ErrUnexpectedEOF instead.
if err == io.EOF {
err = io.ErrUnexpectedEOF
}
return err
}
if err := rec.Unmarshal(data[:recBytes]); err != nil {
if d.isTornEntry(data) {
return io.ErrUnexpectedEOF
}
return err
}
// skip crc checking if the record type is crcType
if rec.Type != crcType {
d.crc.Write(rec.Data)
if err := rec.Validate(d.crc.Sum32()); err != nil {
if d.isTornEntry(data) {
return io.ErrUnexpectedEOF
}
return err
}
}
// record decoded as valid; point last valid offset to end of record
d.lastValidOff += frameSizeBytes + recBytes + padBytes
return nil
}
func decodeFrameSize(lenField int64) (recBytes int64, padBytes int64) {
// the record size is stored in the lower 56 bits of the 64-bit length
recBytes = int64(uint64(lenField) & ^(uint64(0xff) << 56))
// non-zero padding is indicated by set MSb / a negative length
if lenField < 0 {
// padding is stored in lower 3 bits of length MSB
padBytes = int64((uint64(lenField) >> 56) & 0x7)
}
return recBytes, padBytes
}
// isTornEntry determines whether the last entry of the WAL was partially written
// and corrupted because of a torn write.
func (d *decoder) isTornEntry(data []byte) bool {
if len(d.brs) != 1 {
return false
}
fileOff := d.lastValidOff + frameSizeBytes
curOff := 0
chunks := [][]byte{}
// split data on sector boundaries
for curOff < len(data) {
chunkLen := int(minSectorSize - (fileOff % minSectorSize))
if chunkLen > len(data)-curOff {
chunkLen = len(data) - curOff
}
chunks = append(chunks, data[curOff:curOff+chunkLen])
fileOff += int64(chunkLen)
curOff += chunkLen
}
// if any data for a sector chunk is all 0, it's a torn write
for _, sect := range chunks {
isZero := true
for _, v := range sect {
if v != 0 {
isZero = false
break
}
}
if isZero {
return true
}
}
return false
}
func (d *decoder) updateCRC(prevCrc uint32) {
d.crc = crc.New(prevCrc, crcTable)
}
func (d *decoder) lastCRC() uint32 {
return d.crc.Sum32()
}
func (d *decoder) lastOffset() int64 { return d.lastValidOff }
func mustUnmarshalEntry(d []byte) raftpb.Entry {
var e raftpb.Entry
pbutil.MustUnmarshal(&e, d)
return e
}
func mustUnmarshalState(d []byte) raftpb.HardState {
var s raftpb.HardState
pbutil.MustUnmarshal(&s, d)
return s
}
func readInt64(r io.Reader) (int64, error) {
var n int64
err := binary.Read(r, binary.LittleEndian, &n)
return n, err
}