2019-07-23 18:26:39 +02:00
|
|
|
// Copyright 2019+ Klaus Post. All rights reserved.
|
|
|
|
// License information can be found in the LICENSE file.
|
|
|
|
// Based on work by Yann Collet, released under BSD License.
|
|
|
|
|
|
|
|
package zstd
|
|
|
|
|
|
|
|
import (
|
2022-05-02 15:00:32 +02:00
|
|
|
"bytes"
|
|
|
|
"encoding/binary"
|
2019-07-23 18:26:39 +02:00
|
|
|
"errors"
|
|
|
|
"fmt"
|
|
|
|
"io"
|
2022-05-02 15:00:32 +02:00
|
|
|
"io/ioutil"
|
|
|
|
"os"
|
|
|
|
"path/filepath"
|
2019-07-23 18:26:39 +02:00
|
|
|
"sync"
|
|
|
|
|
|
|
|
"github.com/klauspost/compress/huff0"
|
2019-10-13 22:17:40 +02:00
|
|
|
"github.com/klauspost/compress/zstd/internal/xxhash"
|
2019-07-23 18:26:39 +02:00
|
|
|
)
|
|
|
|
|
|
|
|
type blockType uint8
|
|
|
|
|
|
|
|
//go:generate stringer -type=blockType,literalsBlockType,seqCompMode,tableIndex
|
|
|
|
|
|
|
|
const (
|
|
|
|
blockTypeRaw blockType = iota
|
|
|
|
blockTypeRLE
|
|
|
|
blockTypeCompressed
|
|
|
|
blockTypeReserved
|
|
|
|
)
|
|
|
|
|
|
|
|
type literalsBlockType uint8
|
|
|
|
|
|
|
|
const (
|
|
|
|
literalsBlockRaw literalsBlockType = iota
|
|
|
|
literalsBlockRLE
|
|
|
|
literalsBlockCompressed
|
|
|
|
literalsBlockTreeless
|
|
|
|
)
|
|
|
|
|
|
|
|
const (
|
|
|
|
// maxCompressedBlockSize is the biggest allowed compressed block size (128KB)
|
|
|
|
maxCompressedBlockSize = 128 << 10
|
|
|
|
|
2022-05-02 15:00:32 +02:00
|
|
|
compressedBlockOverAlloc = 16
|
|
|
|
maxCompressedBlockSizeAlloc = 128<<10 + compressedBlockOverAlloc
|
|
|
|
|
2019-07-23 18:26:39 +02:00
|
|
|
// Maximum possible block size (all Raw+Uncompressed).
|
|
|
|
maxBlockSize = (1 << 21) - 1
|
|
|
|
|
|
|
|
// https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#literals_section_header
|
|
|
|
maxCompressedLiteralSize = 1 << 18
|
|
|
|
maxRLELiteralSize = 1 << 20
|
|
|
|
maxMatchLen = 131074
|
|
|
|
maxSequences = 0x7f00 + 0xffff
|
|
|
|
|
|
|
|
// We support slightly less than the reference decoder to be able to
|
|
|
|
// use ints on 32 bit archs.
|
|
|
|
maxOffsetBits = 30
|
|
|
|
)
|
|
|
|
|
|
|
|
var (
|
|
|
|
huffDecoderPool = sync.Pool{New: func() interface{} {
|
|
|
|
return &huff0.Scratch{}
|
|
|
|
}}
|
|
|
|
|
|
|
|
fseDecoderPool = sync.Pool{New: func() interface{} {
|
|
|
|
return &fseDecoder{}
|
|
|
|
}}
|
|
|
|
)
|
|
|
|
|
|
|
|
type blockDec struct {
|
|
|
|
// Raw source data of the block.
|
|
|
|
data []byte
|
|
|
|
dataStorage []byte
|
|
|
|
|
|
|
|
// Destination of the decoded data.
|
|
|
|
dst []byte
|
|
|
|
|
|
|
|
// Buffer for literals data.
|
|
|
|
literalBuf []byte
|
|
|
|
|
|
|
|
// Window size of the block.
|
|
|
|
WindowSize uint64
|
|
|
|
|
2022-03-16 12:55:03 +01:00
|
|
|
err error
|
|
|
|
|
|
|
|
// Check against this crc
|
|
|
|
checkCRC []byte
|
2020-04-10 17:39:15 +02:00
|
|
|
|
2020-06-05 22:51:30 +02:00
|
|
|
// Frame to use for singlethreaded decoding.
|
|
|
|
// Should not be used by the decoder itself since parent may be another frame.
|
|
|
|
localFrame *frameDec
|
|
|
|
|
2022-03-16 12:55:03 +01:00
|
|
|
sequence []seqVals
|
|
|
|
|
|
|
|
async struct {
|
|
|
|
newHist *history
|
|
|
|
literals []byte
|
|
|
|
seqData []byte
|
|
|
|
seqSize int // Size of uncompressed sequences
|
|
|
|
fcs uint64
|
|
|
|
}
|
|
|
|
|
2020-04-10 17:39:15 +02:00
|
|
|
// Block is RLE, this is the size.
|
|
|
|
RLESize uint32
|
|
|
|
tmp [4]byte
|
|
|
|
|
|
|
|
Type blockType
|
|
|
|
|
|
|
|
// Is this the last block of a frame?
|
|
|
|
Last bool
|
|
|
|
|
|
|
|
// Use less memory
|
|
|
|
lowMem bool
|
2019-07-23 18:26:39 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
func (b *blockDec) String() string {
|
|
|
|
if b == nil {
|
|
|
|
return "<nil>"
|
|
|
|
}
|
|
|
|
return fmt.Sprintf("Steam Size: %d, Type: %v, Last: %t, Window: %d", len(b.data), b.Type, b.Last, b.WindowSize)
|
|
|
|
}
|
|
|
|
|
|
|
|
func newBlockDec(lowMem bool) *blockDec {
|
|
|
|
b := blockDec{
|
2022-03-16 12:55:03 +01:00
|
|
|
lowMem: lowMem,
|
2019-07-23 18:26:39 +02:00
|
|
|
}
|
|
|
|
return &b
|
|
|
|
}
|
|
|
|
|
|
|
|
// reset will reset the block.
|
|
|
|
// Input must be a start of a block and will be at the end of the block when returned.
|
|
|
|
func (b *blockDec) reset(br byteBuffer, windowSize uint64) error {
|
|
|
|
b.WindowSize = windowSize
|
2021-05-28 12:18:26 +02:00
|
|
|
tmp, err := br.readSmall(3)
|
|
|
|
if err != nil {
|
|
|
|
println("Reading block header:", err)
|
|
|
|
return err
|
2019-07-23 18:26:39 +02:00
|
|
|
}
|
|
|
|
bh := uint32(tmp[0]) | (uint32(tmp[1]) << 8) | (uint32(tmp[2]) << 16)
|
|
|
|
b.Last = bh&1 != 0
|
|
|
|
b.Type = blockType((bh >> 1) & 3)
|
|
|
|
// find size.
|
|
|
|
cSize := int(bh >> 3)
|
2022-05-02 15:00:32 +02:00
|
|
|
maxSize := maxCompressedBlockSizeAlloc
|
2019-07-23 18:26:39 +02:00
|
|
|
switch b.Type {
|
|
|
|
case blockTypeReserved:
|
|
|
|
return ErrReservedBlockType
|
|
|
|
case blockTypeRLE:
|
2022-03-16 12:55:03 +01:00
|
|
|
if cSize > maxCompressedBlockSize || cSize > int(b.WindowSize) {
|
|
|
|
if debugDecoder {
|
|
|
|
printf("rle block too big: csize:%d block: %+v\n", uint64(cSize), b)
|
|
|
|
}
|
|
|
|
return ErrWindowSizeExceeded
|
|
|
|
}
|
2019-07-23 18:26:39 +02:00
|
|
|
b.RLESize = uint32(cSize)
|
2020-04-24 14:26:57 +02:00
|
|
|
if b.lowMem {
|
|
|
|
maxSize = cSize
|
|
|
|
}
|
2019-07-23 18:26:39 +02:00
|
|
|
cSize = 1
|
|
|
|
case blockTypeCompressed:
|
2021-06-08 14:42:57 +02:00
|
|
|
if debugDecoder {
|
2019-07-23 18:26:39 +02:00
|
|
|
println("Data size on stream:", cSize)
|
|
|
|
}
|
|
|
|
b.RLESize = 0
|
2022-05-02 15:00:32 +02:00
|
|
|
maxSize = maxCompressedBlockSizeAlloc
|
2020-04-24 14:26:57 +02:00
|
|
|
if windowSize < maxCompressedBlockSize && b.lowMem {
|
2022-05-02 15:00:32 +02:00
|
|
|
maxSize = int(windowSize) + compressedBlockOverAlloc
|
2020-04-24 14:26:57 +02:00
|
|
|
}
|
2019-07-23 18:26:39 +02:00
|
|
|
if cSize > maxCompressedBlockSize || uint64(cSize) > b.WindowSize {
|
2021-06-08 14:42:57 +02:00
|
|
|
if debugDecoder {
|
2019-07-23 18:26:39 +02:00
|
|
|
printf("compressed block too big: csize:%d block: %+v\n", uint64(cSize), b)
|
|
|
|
}
|
|
|
|
return ErrCompressedSizeTooBig
|
|
|
|
}
|
2022-03-16 12:55:03 +01:00
|
|
|
// Empty compressed blocks must at least be 2 bytes
|
|
|
|
// for Literals_Block_Type and one for Sequences_Section_Header.
|
|
|
|
if cSize < 2 {
|
|
|
|
return ErrBlockTooSmall
|
|
|
|
}
|
2020-05-24 23:06:52 +02:00
|
|
|
case blockTypeRaw:
|
2022-03-16 12:55:03 +01:00
|
|
|
if cSize > maxCompressedBlockSize || cSize > int(b.WindowSize) {
|
|
|
|
if debugDecoder {
|
|
|
|
printf("rle block too big: csize:%d block: %+v\n", uint64(cSize), b)
|
|
|
|
}
|
|
|
|
return ErrWindowSizeExceeded
|
|
|
|
}
|
|
|
|
|
2019-07-23 18:26:39 +02:00
|
|
|
b.RLESize = 0
|
2020-05-24 23:06:52 +02:00
|
|
|
// We do not need a destination for raw blocks.
|
|
|
|
maxSize = -1
|
|
|
|
default:
|
|
|
|
panic("Invalid block type")
|
2019-07-23 18:26:39 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
// Read block data.
|
|
|
|
if cap(b.dataStorage) < cSize {
|
2021-06-14 12:41:28 +02:00
|
|
|
if b.lowMem || cSize > maxCompressedBlockSize {
|
2022-05-02 15:00:32 +02:00
|
|
|
b.dataStorage = make([]byte, 0, cSize+compressedBlockOverAlloc)
|
2019-07-23 18:26:39 +02:00
|
|
|
} else {
|
2022-05-02 15:00:32 +02:00
|
|
|
b.dataStorage = make([]byte, 0, maxCompressedBlockSizeAlloc)
|
2019-07-23 18:26:39 +02:00
|
|
|
}
|
|
|
|
}
|
2020-04-24 14:26:57 +02:00
|
|
|
if cap(b.dst) <= maxSize {
|
|
|
|
b.dst = make([]byte, 0, maxSize+1)
|
2019-07-23 18:26:39 +02:00
|
|
|
}
|
|
|
|
b.data, err = br.readBig(cSize, b.dataStorage)
|
|
|
|
if err != nil {
|
2021-06-08 14:42:57 +02:00
|
|
|
if debugDecoder {
|
2019-11-19 20:05:37 +01:00
|
|
|
println("Reading block:", err, "(", cSize, ")", len(b.data))
|
|
|
|
printf("%T", br)
|
2019-07-23 18:26:39 +02:00
|
|
|
}
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// sendEOF will make the decoder send EOF on this frame.
|
|
|
|
func (b *blockDec) sendErr(err error) {
|
|
|
|
b.Last = true
|
|
|
|
b.Type = blockTypeReserved
|
|
|
|
b.err = err
|
|
|
|
}
|
|
|
|
|
|
|
|
// Close will release resources.
|
|
|
|
// Closed blockDec cannot be reset.
|
|
|
|
func (b *blockDec) Close() {
|
|
|
|
}
|
|
|
|
|
2022-03-16 12:55:03 +01:00
|
|
|
// decodeBuf
|
2019-07-23 18:26:39 +02:00
|
|
|
func (b *blockDec) decodeBuf(hist *history) error {
|
|
|
|
switch b.Type {
|
|
|
|
case blockTypeRLE:
|
|
|
|
if cap(b.dst) < int(b.RLESize) {
|
|
|
|
if b.lowMem {
|
|
|
|
b.dst = make([]byte, b.RLESize)
|
|
|
|
} else {
|
|
|
|
b.dst = make([]byte, maxBlockSize)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
b.dst = b.dst[:b.RLESize]
|
|
|
|
v := b.data[0]
|
|
|
|
for i := range b.dst {
|
|
|
|
b.dst[i] = v
|
|
|
|
}
|
|
|
|
hist.appendKeep(b.dst)
|
|
|
|
return nil
|
|
|
|
case blockTypeRaw:
|
|
|
|
hist.appendKeep(b.data)
|
|
|
|
return nil
|
|
|
|
case blockTypeCompressed:
|
|
|
|
saved := b.dst
|
2022-03-16 12:55:03 +01:00
|
|
|
// Append directly to history
|
|
|
|
if hist.ignoreBuffer == 0 {
|
|
|
|
b.dst = hist.b
|
|
|
|
hist.b = nil
|
|
|
|
} else {
|
|
|
|
b.dst = b.dst[:0]
|
|
|
|
}
|
2019-07-23 18:26:39 +02:00
|
|
|
err := b.decodeCompressed(hist)
|
2021-06-08 14:42:57 +02:00
|
|
|
if debugDecoder {
|
2019-10-13 22:17:40 +02:00
|
|
|
println("Decompressed to total", len(b.dst), "bytes, hash:", xxhash.Sum64(b.dst), "error:", err)
|
2019-07-23 18:26:39 +02:00
|
|
|
}
|
2022-03-16 12:55:03 +01:00
|
|
|
if hist.ignoreBuffer == 0 {
|
|
|
|
hist.b = b.dst
|
|
|
|
b.dst = saved
|
|
|
|
} else {
|
|
|
|
hist.appendKeep(b.dst)
|
|
|
|
}
|
2019-07-23 18:26:39 +02:00
|
|
|
return err
|
|
|
|
case blockTypeReserved:
|
|
|
|
// Used for returning errors.
|
|
|
|
return b.err
|
|
|
|
default:
|
|
|
|
panic("Invalid block type")
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-03-16 12:55:03 +01:00
|
|
|
func (b *blockDec) decodeLiterals(in []byte, hist *history) (remain []byte, err error) {
|
2019-07-23 18:26:39 +02:00
|
|
|
// There must be at least one byte for Literals_Block_Type and one for Sequences_Section_Header
|
|
|
|
if len(in) < 2 {
|
2022-03-16 12:55:03 +01:00
|
|
|
return in, ErrBlockTooSmall
|
2019-07-23 18:26:39 +02:00
|
|
|
}
|
2022-03-16 12:55:03 +01:00
|
|
|
|
2019-07-23 18:26:39 +02:00
|
|
|
litType := literalsBlockType(in[0] & 3)
|
|
|
|
var litRegenSize int
|
|
|
|
var litCompSize int
|
|
|
|
sizeFormat := (in[0] >> 2) & 3
|
|
|
|
var fourStreams bool
|
2022-03-16 12:55:03 +01:00
|
|
|
var literals []byte
|
2019-07-23 18:26:39 +02:00
|
|
|
switch litType {
|
|
|
|
case literalsBlockRaw, literalsBlockRLE:
|
|
|
|
switch sizeFormat {
|
|
|
|
case 0, 2:
|
|
|
|
// Regenerated_Size uses 5 bits (0-31). Literals_Section_Header uses 1 byte.
|
|
|
|
litRegenSize = int(in[0] >> 3)
|
|
|
|
in = in[1:]
|
|
|
|
case 1:
|
|
|
|
// Regenerated_Size uses 12 bits (0-4095). Literals_Section_Header uses 2 bytes.
|
|
|
|
litRegenSize = int(in[0]>>4) + (int(in[1]) << 4)
|
|
|
|
in = in[2:]
|
|
|
|
case 3:
|
|
|
|
// Regenerated_Size uses 20 bits (0-1048575). Literals_Section_Header uses 3 bytes.
|
|
|
|
if len(in) < 3 {
|
|
|
|
println("too small: litType:", litType, " sizeFormat", sizeFormat, len(in))
|
2022-03-16 12:55:03 +01:00
|
|
|
return in, ErrBlockTooSmall
|
2019-07-23 18:26:39 +02:00
|
|
|
}
|
|
|
|
litRegenSize = int(in[0]>>4) + (int(in[1]) << 4) + (int(in[2]) << 12)
|
|
|
|
in = in[3:]
|
|
|
|
}
|
|
|
|
case literalsBlockCompressed, literalsBlockTreeless:
|
|
|
|
switch sizeFormat {
|
|
|
|
case 0, 1:
|
|
|
|
// Both Regenerated_Size and Compressed_Size use 10 bits (0-1023).
|
|
|
|
if len(in) < 3 {
|
|
|
|
println("too small: litType:", litType, " sizeFormat", sizeFormat, len(in))
|
2022-03-16 12:55:03 +01:00
|
|
|
return in, ErrBlockTooSmall
|
2019-07-23 18:26:39 +02:00
|
|
|
}
|
|
|
|
n := uint64(in[0]>>4) + (uint64(in[1]) << 4) + (uint64(in[2]) << 12)
|
|
|
|
litRegenSize = int(n & 1023)
|
|
|
|
litCompSize = int(n >> 10)
|
|
|
|
fourStreams = sizeFormat == 1
|
|
|
|
in = in[3:]
|
|
|
|
case 2:
|
|
|
|
fourStreams = true
|
|
|
|
if len(in) < 4 {
|
|
|
|
println("too small: litType:", litType, " sizeFormat", sizeFormat, len(in))
|
2022-03-16 12:55:03 +01:00
|
|
|
return in, ErrBlockTooSmall
|
2019-07-23 18:26:39 +02:00
|
|
|
}
|
|
|
|
n := uint64(in[0]>>4) + (uint64(in[1]) << 4) + (uint64(in[2]) << 12) + (uint64(in[3]) << 20)
|
|
|
|
litRegenSize = int(n & 16383)
|
|
|
|
litCompSize = int(n >> 14)
|
|
|
|
in = in[4:]
|
|
|
|
case 3:
|
|
|
|
fourStreams = true
|
|
|
|
if len(in) < 5 {
|
|
|
|
println("too small: litType:", litType, " sizeFormat", sizeFormat, len(in))
|
2022-03-16 12:55:03 +01:00
|
|
|
return in, ErrBlockTooSmall
|
2019-07-23 18:26:39 +02:00
|
|
|
}
|
|
|
|
n := uint64(in[0]>>4) + (uint64(in[1]) << 4) + (uint64(in[2]) << 12) + (uint64(in[3]) << 20) + (uint64(in[4]) << 28)
|
|
|
|
litRegenSize = int(n & 262143)
|
|
|
|
litCompSize = int(n >> 18)
|
|
|
|
in = in[5:]
|
|
|
|
}
|
|
|
|
}
|
2021-06-08 14:42:57 +02:00
|
|
|
if debugDecoder {
|
2019-10-13 22:17:40 +02:00
|
|
|
println("literals type:", litType, "litRegenSize:", litRegenSize, "litCompSize:", litCompSize, "sizeFormat:", sizeFormat, "4X:", fourStreams)
|
2019-07-23 18:26:39 +02:00
|
|
|
}
|
2022-03-16 12:55:03 +01:00
|
|
|
if litRegenSize > int(b.WindowSize) || litRegenSize > maxCompressedBlockSize {
|
|
|
|
return in, ErrWindowSizeExceeded
|
|
|
|
}
|
|
|
|
|
2019-07-23 18:26:39 +02:00
|
|
|
switch litType {
|
|
|
|
case literalsBlockRaw:
|
|
|
|
if len(in) < litRegenSize {
|
|
|
|
println("too small: litType:", litType, " sizeFormat", sizeFormat, "remain:", len(in), "want:", litRegenSize)
|
2022-03-16 12:55:03 +01:00
|
|
|
return in, ErrBlockTooSmall
|
2019-07-23 18:26:39 +02:00
|
|
|
}
|
|
|
|
literals = in[:litRegenSize]
|
|
|
|
in = in[litRegenSize:]
|
|
|
|
//printf("Found %d uncompressed literals\n", litRegenSize)
|
|
|
|
case literalsBlockRLE:
|
|
|
|
if len(in) < 1 {
|
|
|
|
println("too small: litType:", litType, " sizeFormat", sizeFormat, "remain:", len(in), "want:", 1)
|
2022-03-16 12:55:03 +01:00
|
|
|
return in, ErrBlockTooSmall
|
2019-07-23 18:26:39 +02:00
|
|
|
}
|
|
|
|
if cap(b.literalBuf) < litRegenSize {
|
|
|
|
if b.lowMem {
|
|
|
|
b.literalBuf = make([]byte, litRegenSize)
|
|
|
|
} else {
|
|
|
|
if litRegenSize > maxCompressedLiteralSize {
|
|
|
|
// Exceptional
|
|
|
|
b.literalBuf = make([]byte, litRegenSize)
|
|
|
|
} else {
|
|
|
|
b.literalBuf = make([]byte, litRegenSize, maxCompressedLiteralSize)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
literals = b.literalBuf[:litRegenSize]
|
|
|
|
v := in[0]
|
|
|
|
for i := range literals {
|
|
|
|
literals[i] = v
|
|
|
|
}
|
|
|
|
in = in[1:]
|
2021-06-08 14:42:57 +02:00
|
|
|
if debugDecoder {
|
2019-07-23 18:26:39 +02:00
|
|
|
printf("Found %d RLE compressed literals\n", litRegenSize)
|
|
|
|
}
|
|
|
|
case literalsBlockTreeless:
|
|
|
|
if len(in) < litCompSize {
|
|
|
|
println("too small: litType:", litType, " sizeFormat", sizeFormat, "remain:", len(in), "want:", litCompSize)
|
2022-03-16 12:55:03 +01:00
|
|
|
return in, ErrBlockTooSmall
|
2019-07-23 18:26:39 +02:00
|
|
|
}
|
|
|
|
// Store compressed literals, so we defer decoding until we get history.
|
|
|
|
literals = in[:litCompSize]
|
|
|
|
in = in[litCompSize:]
|
2021-06-08 14:42:57 +02:00
|
|
|
if debugDecoder {
|
2019-07-23 18:26:39 +02:00
|
|
|
printf("Found %d compressed literals\n", litCompSize)
|
|
|
|
}
|
2022-03-16 12:55:03 +01:00
|
|
|
huff := hist.huffTree
|
|
|
|
if huff == nil {
|
|
|
|
return in, errors.New("literal block was treeless, but no history was defined")
|
|
|
|
}
|
|
|
|
// Ensure we have space to store it.
|
|
|
|
if cap(b.literalBuf) < litRegenSize {
|
|
|
|
if b.lowMem {
|
|
|
|
b.literalBuf = make([]byte, 0, litRegenSize)
|
|
|
|
} else {
|
|
|
|
b.literalBuf = make([]byte, 0, maxCompressedLiteralSize)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
var err error
|
|
|
|
// Use our out buffer.
|
|
|
|
huff.MaxDecodedSize = maxCompressedBlockSize
|
|
|
|
if fourStreams {
|
|
|
|
literals, err = huff.Decoder().Decompress4X(b.literalBuf[:0:litRegenSize], literals)
|
|
|
|
} else {
|
|
|
|
literals, err = huff.Decoder().Decompress1X(b.literalBuf[:0:litRegenSize], literals)
|
|
|
|
}
|
|
|
|
// Make sure we don't leak our literals buffer
|
|
|
|
if err != nil {
|
|
|
|
println("decompressing literals:", err)
|
|
|
|
return in, err
|
|
|
|
}
|
|
|
|
if len(literals) != litRegenSize {
|
|
|
|
return in, fmt.Errorf("literal output size mismatch want %d, got %d", litRegenSize, len(literals))
|
|
|
|
}
|
|
|
|
|
2019-07-23 18:26:39 +02:00
|
|
|
case literalsBlockCompressed:
|
|
|
|
if len(in) < litCompSize {
|
|
|
|
println("too small: litType:", litType, " sizeFormat", sizeFormat, "remain:", len(in), "want:", litCompSize)
|
2022-03-16 12:55:03 +01:00
|
|
|
return in, ErrBlockTooSmall
|
2019-07-23 18:26:39 +02:00
|
|
|
}
|
|
|
|
literals = in[:litCompSize]
|
|
|
|
in = in[litCompSize:]
|
|
|
|
// Ensure we have space to store it.
|
|
|
|
if cap(b.literalBuf) < litRegenSize {
|
|
|
|
if b.lowMem {
|
|
|
|
b.literalBuf = make([]byte, 0, litRegenSize)
|
|
|
|
} else {
|
2022-03-16 12:55:03 +01:00
|
|
|
b.literalBuf = make([]byte, 0, maxCompressedBlockSize)
|
2019-07-23 18:26:39 +02:00
|
|
|
}
|
|
|
|
}
|
2022-03-16 12:55:03 +01:00
|
|
|
huff := hist.huffTree
|
|
|
|
if huff == nil || (hist.dict != nil && huff == hist.dict.litEnc) {
|
|
|
|
huff = huffDecoderPool.Get().(*huff0.Scratch)
|
|
|
|
if huff == nil {
|
|
|
|
huff = &huff0.Scratch{}
|
|
|
|
}
|
2019-07-23 18:26:39 +02:00
|
|
|
}
|
2022-03-16 12:55:03 +01:00
|
|
|
var err error
|
2019-07-23 18:26:39 +02:00
|
|
|
huff, literals, err = huff0.ReadTable(literals, huff)
|
|
|
|
if err != nil {
|
|
|
|
println("reading huffman table:", err)
|
2022-03-16 12:55:03 +01:00
|
|
|
return in, err
|
2019-07-23 18:26:39 +02:00
|
|
|
}
|
2022-03-16 12:55:03 +01:00
|
|
|
hist.huffTree = huff
|
|
|
|
huff.MaxDecodedSize = maxCompressedBlockSize
|
2019-07-23 18:26:39 +02:00
|
|
|
// Use our out buffer.
|
|
|
|
if fourStreams {
|
2020-06-02 23:10:44 +02:00
|
|
|
literals, err = huff.Decoder().Decompress4X(b.literalBuf[:0:litRegenSize], literals)
|
2019-07-23 18:26:39 +02:00
|
|
|
} else {
|
2020-06-02 23:10:44 +02:00
|
|
|
literals, err = huff.Decoder().Decompress1X(b.literalBuf[:0:litRegenSize], literals)
|
2019-07-23 18:26:39 +02:00
|
|
|
}
|
|
|
|
if err != nil {
|
|
|
|
println("decoding compressed literals:", err)
|
2022-03-16 12:55:03 +01:00
|
|
|
return in, err
|
2019-07-23 18:26:39 +02:00
|
|
|
}
|
|
|
|
// Make sure we don't leak our literals buffer
|
|
|
|
if len(literals) != litRegenSize {
|
2022-03-16 12:55:03 +01:00
|
|
|
return in, fmt.Errorf("literal output size mismatch want %d, got %d", litRegenSize, len(literals))
|
2019-07-23 18:26:39 +02:00
|
|
|
}
|
2021-06-08 14:42:57 +02:00
|
|
|
if debugDecoder {
|
2019-07-23 18:26:39 +02:00
|
|
|
printf("Decompressed %d literals into %d bytes\n", litCompSize, litRegenSize)
|
|
|
|
}
|
|
|
|
}
|
2022-03-16 12:55:03 +01:00
|
|
|
hist.decoders.literals = literals
|
|
|
|
return in, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// decodeCompressed will start decompressing a block.
|
|
|
|
func (b *blockDec) decodeCompressed(hist *history) error {
|
|
|
|
in := b.data
|
|
|
|
in, err := b.decodeLiterals(in, hist)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
err = b.prepareSequences(in, hist)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
if hist.decoders.nSeqs == 0 {
|
|
|
|
b.dst = append(b.dst, hist.decoders.literals...)
|
|
|
|
return nil
|
|
|
|
}
|
2022-05-02 15:00:32 +02:00
|
|
|
before := len(hist.decoders.out)
|
|
|
|
err = hist.decoders.decodeSync(hist.b[hist.ignoreBuffer:])
|
2022-03-16 12:55:03 +01:00
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
2022-05-02 15:00:32 +02:00
|
|
|
if hist.decoders.maxSyncLen > 0 {
|
|
|
|
hist.decoders.maxSyncLen += uint64(before)
|
|
|
|
hist.decoders.maxSyncLen -= uint64(len(hist.decoders.out))
|
|
|
|
}
|
2022-03-16 12:55:03 +01:00
|
|
|
b.dst = hist.decoders.out
|
|
|
|
hist.recentOffsets = hist.decoders.prevOffset
|
|
|
|
return nil
|
|
|
|
}
|
2019-07-23 18:26:39 +02:00
|
|
|
|
2022-03-16 12:55:03 +01:00
|
|
|
func (b *blockDec) prepareSequences(in []byte, hist *history) (err error) {
|
|
|
|
if debugDecoder {
|
|
|
|
printf("prepareSequences: %d byte(s) input\n", len(in))
|
|
|
|
}
|
2019-07-23 18:26:39 +02:00
|
|
|
// Decode Sequences
|
|
|
|
// https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#sequences-section
|
|
|
|
if len(in) < 1 {
|
|
|
|
return ErrBlockTooSmall
|
|
|
|
}
|
2022-03-16 12:55:03 +01:00
|
|
|
var nSeqs int
|
2019-07-23 18:26:39 +02:00
|
|
|
seqHeader := in[0]
|
|
|
|
switch {
|
|
|
|
case seqHeader < 128:
|
|
|
|
nSeqs = int(seqHeader)
|
|
|
|
in = in[1:]
|
|
|
|
case seqHeader < 255:
|
|
|
|
if len(in) < 2 {
|
|
|
|
return ErrBlockTooSmall
|
|
|
|
}
|
|
|
|
nSeqs = int(seqHeader-128)<<8 | int(in[1])
|
|
|
|
in = in[2:]
|
|
|
|
case seqHeader == 255:
|
|
|
|
if len(in) < 3 {
|
|
|
|
return ErrBlockTooSmall
|
|
|
|
}
|
|
|
|
nSeqs = 0x7f00 + int(in[1]) + (int(in[2]) << 8)
|
|
|
|
in = in[3:]
|
|
|
|
}
|
2022-03-16 12:55:03 +01:00
|
|
|
if nSeqs == 0 && len(in) != 0 {
|
|
|
|
// When no sequences, there should not be any more data...
|
|
|
|
if debugDecoder {
|
|
|
|
printf("prepareSequences: 0 sequences, but %d byte(s) left on stream\n", len(in))
|
|
|
|
}
|
|
|
|
return ErrUnexpectedBlockSize
|
|
|
|
}
|
2022-01-27 12:16:16 +01:00
|
|
|
|
2022-03-16 12:55:03 +01:00
|
|
|
var seqs = &hist.decoders
|
|
|
|
seqs.nSeqs = nSeqs
|
2019-07-23 18:26:39 +02:00
|
|
|
if nSeqs > 0 {
|
|
|
|
if len(in) < 1 {
|
|
|
|
return ErrBlockTooSmall
|
|
|
|
}
|
|
|
|
br := byteReader{b: in, off: 0}
|
|
|
|
compMode := br.Uint8()
|
|
|
|
br.advance(1)
|
2021-06-08 14:42:57 +02:00
|
|
|
if debugDecoder {
|
2019-07-23 18:26:39 +02:00
|
|
|
printf("Compression modes: 0b%b", compMode)
|
|
|
|
}
|
|
|
|
for i := uint(0); i < 3; i++ {
|
|
|
|
mode := seqCompMode((compMode >> (6 - i*2)) & 3)
|
2021-06-08 14:42:57 +02:00
|
|
|
if debugDecoder {
|
2019-07-23 18:26:39 +02:00
|
|
|
println("Table", tableIndex(i), "is", mode)
|
|
|
|
}
|
|
|
|
var seq *sequenceDec
|
|
|
|
switch tableIndex(i) {
|
|
|
|
case tableLiteralLengths:
|
|
|
|
seq = &seqs.litLengths
|
|
|
|
case tableOffsets:
|
|
|
|
seq = &seqs.offsets
|
|
|
|
case tableMatchLengths:
|
|
|
|
seq = &seqs.matchLengths
|
|
|
|
default:
|
|
|
|
panic("unknown table")
|
|
|
|
}
|
|
|
|
switch mode {
|
|
|
|
case compModePredefined:
|
2022-03-16 12:55:03 +01:00
|
|
|
if seq.fse != nil && !seq.fse.preDefined {
|
|
|
|
fseDecoderPool.Put(seq.fse)
|
|
|
|
}
|
2019-07-23 18:26:39 +02:00
|
|
|
seq.fse = &fsePredef[i]
|
|
|
|
case compModeRLE:
|
|
|
|
if br.remain() < 1 {
|
|
|
|
return ErrBlockTooSmall
|
|
|
|
}
|
|
|
|
v := br.Uint8()
|
|
|
|
br.advance(1)
|
2022-03-16 12:55:03 +01:00
|
|
|
if seq.fse == nil || seq.fse.preDefined {
|
|
|
|
seq.fse = fseDecoderPool.Get().(*fseDecoder)
|
|
|
|
}
|
2019-07-23 18:26:39 +02:00
|
|
|
symb, err := decSymbolValue(v, symbolTableX[i])
|
|
|
|
if err != nil {
|
|
|
|
printf("RLE Transform table (%v) error: %v", tableIndex(i), err)
|
|
|
|
return err
|
|
|
|
}
|
2022-03-16 12:55:03 +01:00
|
|
|
seq.fse.setRLE(symb)
|
2021-06-08 14:42:57 +02:00
|
|
|
if debugDecoder {
|
2019-07-23 18:26:39 +02:00
|
|
|
printf("RLE set to %+v, code: %v", symb, v)
|
|
|
|
}
|
|
|
|
case compModeFSE:
|
|
|
|
println("Reading table for", tableIndex(i))
|
2022-03-16 12:55:03 +01:00
|
|
|
if seq.fse == nil || seq.fse.preDefined {
|
|
|
|
seq.fse = fseDecoderPool.Get().(*fseDecoder)
|
|
|
|
}
|
|
|
|
err := seq.fse.readNCount(&br, uint16(maxTableSymbol[i]))
|
2019-07-23 18:26:39 +02:00
|
|
|
if err != nil {
|
|
|
|
println("Read table error:", err)
|
|
|
|
return err
|
|
|
|
}
|
2022-03-16 12:55:03 +01:00
|
|
|
err = seq.fse.transform(symbolTableX[i])
|
2019-07-23 18:26:39 +02:00
|
|
|
if err != nil {
|
|
|
|
println("Transform table error:", err)
|
|
|
|
return err
|
|
|
|
}
|
2021-06-08 14:42:57 +02:00
|
|
|
if debugDecoder {
|
2022-03-16 12:55:03 +01:00
|
|
|
println("Read table ok", "symbolLen:", seq.fse.symbolLen)
|
2019-07-23 18:26:39 +02:00
|
|
|
}
|
|
|
|
case compModeRepeat:
|
|
|
|
seq.repeat = true
|
|
|
|
}
|
|
|
|
if br.overread() {
|
|
|
|
return io.ErrUnexpectedEOF
|
|
|
|
}
|
|
|
|
}
|
|
|
|
in = br.unread()
|
|
|
|
}
|
2021-06-08 14:42:57 +02:00
|
|
|
if debugDecoder {
|
2022-03-16 12:55:03 +01:00
|
|
|
println("Literals:", len(seqs.literals), "hash:", xxhash.Sum64(seqs.literals), "and", seqs.nSeqs, "sequences.")
|
2019-07-23 18:26:39 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
if nSeqs == 0 {
|
2022-03-16 12:55:03 +01:00
|
|
|
if len(b.sequence) > 0 {
|
|
|
|
b.sequence = b.sequence[:0]
|
2019-07-23 18:26:39 +02:00
|
|
|
}
|
|
|
|
return nil
|
|
|
|
}
|
2022-03-16 12:55:03 +01:00
|
|
|
br := seqs.br
|
|
|
|
if br == nil {
|
|
|
|
br = &bitReader{}
|
2019-07-23 18:26:39 +02:00
|
|
|
}
|
|
|
|
if err := br.init(in); err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
2022-03-16 12:55:03 +01:00
|
|
|
if err := seqs.initialize(br, hist, b.dst); err != nil {
|
|
|
|
println("initializing sequences:", err)
|
|
|
|
return err
|
|
|
|
}
|
2022-05-02 15:00:32 +02:00
|
|
|
// Extract blocks...
|
|
|
|
if false && hist.dict == nil {
|
|
|
|
fatalErr := func(err error) {
|
|
|
|
if err != nil {
|
|
|
|
panic(err)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
fn := fmt.Sprintf("n-%d-lits-%d-prev-%d-%d-%d-win-%d.blk", hist.decoders.nSeqs, len(hist.decoders.literals), hist.recentOffsets[0], hist.recentOffsets[1], hist.recentOffsets[2], hist.windowSize)
|
|
|
|
var buf bytes.Buffer
|
|
|
|
fatalErr(binary.Write(&buf, binary.LittleEndian, hist.decoders.litLengths.fse))
|
|
|
|
fatalErr(binary.Write(&buf, binary.LittleEndian, hist.decoders.matchLengths.fse))
|
|
|
|
fatalErr(binary.Write(&buf, binary.LittleEndian, hist.decoders.offsets.fse))
|
|
|
|
buf.Write(in)
|
|
|
|
ioutil.WriteFile(filepath.Join("testdata", "seqs", fn), buf.Bytes(), os.ModePerm)
|
|
|
|
}
|
|
|
|
|
2022-03-16 12:55:03 +01:00
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
func (b *blockDec) decodeSequences(hist *history) error {
|
|
|
|
if cap(b.sequence) < hist.decoders.nSeqs {
|
|
|
|
if b.lowMem {
|
|
|
|
b.sequence = make([]seqVals, 0, hist.decoders.nSeqs)
|
|
|
|
} else {
|
|
|
|
b.sequence = make([]seqVals, 0, 0x7F00+0xffff)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
b.sequence = b.sequence[:hist.decoders.nSeqs]
|
|
|
|
if hist.decoders.nSeqs == 0 {
|
|
|
|
hist.decoders.seqSize = len(hist.decoders.literals)
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
hist.decoders.windowSize = hist.windowSize
|
|
|
|
hist.decoders.prevOffset = hist.recentOffsets
|
2022-05-02 15:00:32 +02:00
|
|
|
|
2022-03-16 12:55:03 +01:00
|
|
|
err := hist.decoders.decode(b.sequence)
|
|
|
|
hist.recentOffsets = hist.decoders.prevOffset
|
|
|
|
return err
|
|
|
|
}
|
2019-07-23 18:26:39 +02:00
|
|
|
|
2022-03-16 12:55:03 +01:00
|
|
|
func (b *blockDec) executeSequences(hist *history) error {
|
2020-04-24 14:26:57 +02:00
|
|
|
hbytes := hist.b
|
|
|
|
if len(hbytes) > hist.windowSize {
|
|
|
|
hbytes = hbytes[len(hbytes)-hist.windowSize:]
|
2022-03-16 12:55:03 +01:00
|
|
|
// We do not need history anymore.
|
2020-06-02 23:10:44 +02:00
|
|
|
if hist.dict != nil {
|
|
|
|
hist.dict.content = nil
|
|
|
|
}
|
2020-04-24 14:26:57 +02:00
|
|
|
}
|
2022-03-16 12:55:03 +01:00
|
|
|
hist.decoders.windowSize = hist.windowSize
|
|
|
|
hist.decoders.out = b.dst[:0]
|
|
|
|
err := hist.decoders.execute(b.sequence, hbytes)
|
2019-07-23 18:26:39 +02:00
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
2022-03-16 12:55:03 +01:00
|
|
|
return b.updateHistory(hist)
|
|
|
|
}
|
2019-07-23 18:26:39 +02:00
|
|
|
|
2022-03-16 12:55:03 +01:00
|
|
|
func (b *blockDec) updateHistory(hist *history) error {
|
2019-07-23 18:26:39 +02:00
|
|
|
if len(b.data) > maxCompressedBlockSize {
|
|
|
|
return fmt.Errorf("compressed block size too large (%d)", len(b.data))
|
|
|
|
}
|
|
|
|
// Set output and release references.
|
2022-03-16 12:55:03 +01:00
|
|
|
b.dst = hist.decoders.out
|
|
|
|
hist.recentOffsets = hist.decoders.prevOffset
|
2019-07-23 18:26:39 +02:00
|
|
|
|
|
|
|
if b.Last {
|
|
|
|
// if last block we don't care about history.
|
|
|
|
println("Last block, no history returned")
|
|
|
|
hist.b = hist.b[:0]
|
|
|
|
return nil
|
2022-03-16 12:55:03 +01:00
|
|
|
} else {
|
|
|
|
hist.append(b.dst)
|
|
|
|
if debugDecoder {
|
|
|
|
println("Finished block with ", len(b.sequence), "sequences. Added", len(b.dst), "to history, now length", len(hist.b))
|
|
|
|
}
|
2019-07-23 18:26:39 +02:00
|
|
|
}
|
2022-03-16 12:55:03 +01:00
|
|
|
hist.decoders.out, hist.decoders.literals = nil, nil
|
2019-07-23 18:26:39 +02:00
|
|
|
|
|
|
|
return nil
|
|
|
|
}
|