VictoriaMetrics/vendor/github.com/klauspost/compress/flate/stateless.go

package flate

import (
	"io"
	"math"
	"sync"
)

const (
	maxStatelessBlock = math.MaxInt16
	// dictionary will be taken from maxStatelessBlock, so limit it.
	maxStatelessDict = 8 << 10

	slTableBits  = 13
	slTableSize  = 1 << slTableBits
	slTableShift = 32 - slTableBits
)

type statelessWriter struct {
	dst    io.Writer
	closed bool
}

func (s *statelessWriter) Close() error {
	if s.closed {
		return nil
	}
	s.closed = true
	// Emit EOF block
	return StatelessDeflate(s.dst, nil, true, nil)
}

func (s *statelessWriter) Write(p []byte) (n int, err error) {
	err = StatelessDeflate(s.dst, p, false, nil)
	if err != nil {
		return 0, err
	}
	return len(p), nil
}

func (s *statelessWriter) Reset(w io.Writer) {
	s.dst = w
	s.closed = false
}

// NewStatelessWriter will do compression but without maintaining any state
// between Write calls.
// There will be no memory kept between Write calls,
// but compression and speed will be suboptimal.
// Because of this, the size of actual Write calls will affect output size.
func NewStatelessWriter(dst io.Writer) io.WriteCloser {
	return &statelessWriter{dst: dst}
}

// bitWriterPool contains bit writers that can be reused.
var bitWriterPool = sync.Pool{
	New: func() interface{} {
		return newHuffmanBitWriter(nil)
	},
}

// StatelessDeflate allows to compress directly to a Writer without retaining state.
// When returning everything will be flushed.
// Up to 8KB of an optional dictionary can be given which is presumed to presumed to precede the block.
// Longer dictionaries will be truncated and will still produce valid output.
// Sending nil dictionary is perfectly fine.
func StatelessDeflate(out io.Writer, in []byte, eof bool, dict []byte) error {
	var dst tokens
	bw := bitWriterPool.Get().(*huffmanBitWriter)
	bw.reset(out)
	defer func() {
		// don't keep a reference to our output
		bw.reset(nil)
		bitWriterPool.Put(bw)
	}()
	if eof && len(in) == 0 {
		// Just write an EOF block.
		// Could be faster...
		bw.writeStoredHeader(0, true)
		bw.flush()
		return bw.err
	}

	// Truncate dict
	if len(dict) > maxStatelessDict {
		dict = dict[len(dict)-maxStatelessDict:]
	}

	for len(in) > 0 {
		todo := in
		if len(todo) > maxStatelessBlock-len(dict) {
			todo = todo[:maxStatelessBlock-len(dict)]
		}
		in = in[len(todo):]
		uncompressed := todo
		if len(dict) > 0 {
			// combine dict and source
			bufLen := len(todo) + len(dict)
			combined := make([]byte, bufLen)
			copy(combined, dict)
			copy(combined[len(dict):], todo)
			todo = combined
		}
		// Compress
		statelessEnc(&dst, todo, int16(len(dict)))
		isEof := eof && len(in) == 0

		if dst.n == 0 {
			bw.writeStoredHeader(len(uncompressed), isEof)
			if bw.err != nil {
				return bw.err
			}
			bw.writeBytes(uncompressed)
		} else if int(dst.n) > len(uncompressed)-len(uncompressed)>>4 {
			// If we removed less than 1/16th, huffman compress the block.
			bw.writeBlockHuff(isEof, uncompressed, len(in) == 0)
		} else {
			bw.writeBlockDynamic(&dst, isEof, uncompressed, len(in) == 0)
		}
		if len(in) > 0 {
			// Retain a dict if we have more
			dict = todo[len(todo)-maxStatelessDict:]
			dst.Reset()
		}
		if bw.err != nil {
			return bw.err
		}
	}
	if !eof {
		// Align, only a stored block can do that.
		bw.writeStoredHeader(0, false)
	}
	bw.flush()
	return bw.err
}

func hashSL(u uint32) uint32 {
	return (u * 0x1e35a7bd) >> slTableShift
}

func load3216(b []byte, i int16) uint32 {
	// Help the compiler eliminate bounds checks on the read so it can be done in a single read.
	b = b[i:]
	b = b[:4]
	return uint32(b[0]) | uint32(b[1])<<8 | uint32(b[2])<<16 | uint32(b[3])<<24
}

func load6416(b []byte, i int16) uint64 {
	// Help the compiler eliminate bounds checks on the read so it can be done in a single read.
	b = b[i:]
	b = b[:8]
	return uint64(b[0]) | uint64(b[1])<<8 | uint64(b[2])<<16 | uint64(b[3])<<24 |
		uint64(b[4])<<32 | uint64(b[5])<<40 | uint64(b[6])<<48 | uint64(b[7])<<56
}

func statelessEnc(dst *tokens, src []byte, startAt int16) {
	const (
		inputMargin            = 12 - 1
		minNonLiteralBlockSize = 1 + 1 + inputMargin
	)

	type tableEntry struct {
		offset int16
	}

	var table [slTableSize]tableEntry

	// This check isn't in the Snappy implementation, but there, the caller
	// instead of the callee handles this case.
	if len(src)-int(startAt) < minNonLiteralBlockSize {
		// We do not fill the token table.
		// This will be picked up by caller.
		dst.n = 0
		return
	}
	// Index until startAt
	if startAt > 0 {
		cv := load3232(src, 0)
		for i := int16(0); i < startAt; i++ {
			table[hashSL(cv)] = tableEntry{offset: i}
			cv = (cv >> 8) | (uint32(src[i+4]) << 24)
		}
	}

	s := startAt + 1
	nextEmit := startAt
	// sLimit is when to stop looking for offset/length copies. The inputMargin
	// lets us use a fast path for emitLiteral in the main loop, while we are
	// looking for copies.
	sLimit := int16(len(src) - inputMargin)

	// nextEmit is where in src the next emitLiteral should start from.
	cv := load3216(src, s)

	for {
		const skipLog = 5
		const doEvery = 2

		nextS := s
		var candidate tableEntry
		for {
			nextHash := hashSL(cv)
			candidate = table[nextHash]
			nextS = s + doEvery + (s-nextEmit)>>skipLog
			if nextS > sLimit || nextS <= 0 {
				goto emitRemainder
			}

			now := load6416(src, nextS)
			table[nextHash] = tableEntry{offset: s}
			nextHash = hashSL(uint32(now))

			if cv == load3216(src, candidate.offset) {
				table[nextHash] = tableEntry{offset: nextS}
				break
			}

			// Do one right away...
			cv = uint32(now)
			s = nextS
			nextS++
			candidate = table[nextHash]
			now >>= 8
			table[nextHash] = tableEntry{offset: s}

			if cv == load3216(src, candidate.offset) {
				table[nextHash] = tableEntry{offset: nextS}
				break
			}
			cv = uint32(now)
			s = nextS
		}

		// A 4-byte match has been found. We'll later see if more than 4 bytes
		// match. But, prior to the match, src[nextEmit:s] are unmatched. Emit
		// them as literal bytes.
		for {
			// Invariant: we have a 4-byte match at s, and no need to emit any
			// literal bytes prior to s.

			// Extend the 4-byte match as long as possible.
			t := candidate.offset
			l := int16(matchLen(src[s+4:], src[t+4:]) + 4)

			// Extend backwards
			for t > 0 && s > nextEmit && src[t-1] == src[s-1] {
				s--
				t--
				l++
			}
			if nextEmit < s {
				if false {
					emitLiteral(dst, src[nextEmit:s])
				} else {
					for _, v := range src[nextEmit:s] {
						dst.tokens[dst.n] = token(v)
						dst.litHist[v]++
						dst.n++
					}
				}
			}

			// Save the match found
			dst.AddMatchLong(int32(l), uint32(s-t-baseMatchOffset))
			s += l
			nextEmit = s
			if nextS >= s {
				s = nextS + 1
			}
			if s >= sLimit {
				goto emitRemainder
			}

			// We could immediately start working at s now, but to improve
			// compression we first update the hash table at s-2 and at s. If
			// another emitCopy is not our next move, also calculate nextHash
			// at s+1. At least on GOARCH=amd64, these three hash calculations
			// are faster as one load64 call (with some shifts) instead of
			// three load32 calls.
			x := load6416(src, s-2)
			o := s - 2
			prevHash := hashSL(uint32(x))
			table[prevHash] = tableEntry{offset: o}
			x >>= 16
			currHash := hashSL(uint32(x))
			candidate = table[currHash]
			table[currHash] = tableEntry{offset: o + 2}

			if uint32(x) != load3216(src, candidate.offset) {
				cv = uint32(x >> 8)
				s++
				break
			}
		}
	}

emitRemainder:
	if int(nextEmit) < len(src) {
		// If nothing was added, don't encode literals.
		if dst.n == 0 {
			return
		}
		emitLiteral(dst, src[nextEmit:])
	}
}
all: use `github.com/klauspost/compress/gzip` instead of `compress/gzip` `github.com/klauspost/compress/gzip` is more optimized than `compress/gzip`. This gives better gzip compression and decompression speeds. 2020-01-17 22:57:18 +01:00			`package flate`

			`import (`
			`"io"`
			`"math"`
			`"sync"`
			`)`

			`const (`
			`maxStatelessBlock = math.MaxInt16`
`make vendor-update` 2020-02-10 22:35:08 +01:00			`// dictionary will be taken from maxStatelessBlock, so limit it.`
			`maxStatelessDict = 8 << 10`
all: use `github.com/klauspost/compress/gzip` instead of `compress/gzip` `github.com/klauspost/compress/gzip` is more optimized than `compress/gzip`. This gives better gzip compression and decompression speeds. 2020-01-17 22:57:18 +01:00
			`slTableBits = 13`
			`slTableSize = 1 << slTableBits`
			`slTableShift = 32 - slTableBits`
			`)`

			`type statelessWriter struct {`
			`dst io.Writer`
			`closed bool`
			`}`

			`func (s *statelessWriter) Close() error {`
			`if s.closed {`
			`return nil`
			`}`
			`s.closed = true`
			`// Emit EOF block`
`make vendor-update` 2020-02-10 22:35:08 +01:00			`return StatelessDeflate(s.dst, nil, true, nil)`
all: use `github.com/klauspost/compress/gzip` instead of `compress/gzip` `github.com/klauspost/compress/gzip` is more optimized than `compress/gzip`. This gives better gzip compression and decompression speeds. 2020-01-17 22:57:18 +01:00			`}`

			`func (s *statelessWriter) Write(p []byte) (n int, err error) {`
`make vendor-update` 2020-02-10 22:35:08 +01:00			`err = StatelessDeflate(s.dst, p, false, nil)`
all: use `github.com/klauspost/compress/gzip` instead of `compress/gzip` `github.com/klauspost/compress/gzip` is more optimized than `compress/gzip`. This gives better gzip compression and decompression speeds. 2020-01-17 22:57:18 +01:00			`if err != nil {`
			`return 0, err`
			`}`
			`return len(p), nil`
			`}`

			`func (s *statelessWriter) Reset(w io.Writer) {`
			`s.dst = w`
			`s.closed = false`
			`}`

			`// NewStatelessWriter will do compression but without maintaining any state`
			`// between Write calls.`
			`// There will be no memory kept between Write calls,`
			`// but compression and speed will be suboptimal.`
			`// Because of this, the size of actual Write calls will affect output size.`
			`func NewStatelessWriter(dst io.Writer) io.WriteCloser {`
			`return &statelessWriter{dst: dst}`
			`}`

			`// bitWriterPool contains bit writers that can be reused.`
			`var bitWriterPool = sync.Pool{`
			`New: func() interface{} {`
			`return newHuffmanBitWriter(nil)`
			`},`
			`}`

			`// StatelessDeflate allows to compress directly to a Writer without retaining state.`
			`// When returning everything will be flushed.`
`make vendor-update` 2020-02-10 22:35:08 +01:00			`// Up to 8KB of an optional dictionary can be given which is presumed to presumed to precede the block.`
			`// Longer dictionaries will be truncated and will still produce valid output.`
			`// Sending nil dictionary is perfectly fine.`
			`func StatelessDeflate(out io.Writer, in []byte, eof bool, dict []byte) error {`
all: use `github.com/klauspost/compress/gzip` instead of `compress/gzip` `github.com/klauspost/compress/gzip` is more optimized than `compress/gzip`. This gives better gzip compression and decompression speeds. 2020-01-17 22:57:18 +01:00			`var dst tokens`
			`bw := bitWriterPool.Get().(*huffmanBitWriter)`
			`bw.reset(out)`
			`defer func() {`
			`// don't keep a reference to our output`
			`bw.reset(nil)`
			`bitWriterPool.Put(bw)`
			`}()`
			`if eof && len(in) == 0 {`
			`// Just write an EOF block.`
			`// Could be faster...`
			`bw.writeStoredHeader(0, true)`
			`bw.flush()`
			`return bw.err`
			`}`

`make vendor-update` 2020-02-10 22:35:08 +01:00			`// Truncate dict`
			`if len(dict) > maxStatelessDict {`
			`dict = dict[len(dict)-maxStatelessDict:]`
			`}`

all: use `github.com/klauspost/compress/gzip` instead of `compress/gzip` `github.com/klauspost/compress/gzip` is more optimized than `compress/gzip`. This gives better gzip compression and decompression speeds. 2020-01-17 22:57:18 +01:00			`for len(in) > 0 {`
			`todo := in`
`make vendor-update` 2020-02-10 22:35:08 +01:00			`if len(todo) > maxStatelessBlock-len(dict) {`
			`todo = todo[:maxStatelessBlock-len(dict)]`
all: use `github.com/klauspost/compress/gzip` instead of `compress/gzip` `github.com/klauspost/compress/gzip` is more optimized than `compress/gzip`. This gives better gzip compression and decompression speeds. 2020-01-17 22:57:18 +01:00			`}`
			`in = in[len(todo):]`
`make vendor-update` 2020-02-10 22:35:08 +01:00			`uncompressed := todo`
			`if len(dict) > 0 {`
			`// combine dict and source`
			`bufLen := len(todo) + len(dict)`
			`combined := make([]byte, bufLen)`
			`copy(combined, dict)`
			`copy(combined[len(dict):], todo)`
			`todo = combined`
			`}`
all: use `github.com/klauspost/compress/gzip` instead of `compress/gzip` `github.com/klauspost/compress/gzip` is more optimized than `compress/gzip`. This gives better gzip compression and decompression speeds. 2020-01-17 22:57:18 +01:00			`// Compress`
`make vendor-update` 2020-02-10 22:35:08 +01:00			`statelessEnc(&dst, todo, int16(len(dict)))`
all: use `github.com/klauspost/compress/gzip` instead of `compress/gzip` `github.com/klauspost/compress/gzip` is more optimized than `compress/gzip`. This gives better gzip compression and decompression speeds. 2020-01-17 22:57:18 +01:00			`isEof := eof && len(in) == 0`

			`if dst.n == 0 {`
`make vendor-update` 2020-02-10 22:35:08 +01:00			`bw.writeStoredHeader(len(uncompressed), isEof)`
all: use `github.com/klauspost/compress/gzip` instead of `compress/gzip` `github.com/klauspost/compress/gzip` is more optimized than `compress/gzip`. This gives better gzip compression and decompression speeds. 2020-01-17 22:57:18 +01:00			`if bw.err != nil {`
			`return bw.err`
			`}`
`make vendor-update` 2020-02-10 22:35:08 +01:00			`bw.writeBytes(uncompressed)`
			`} else if int(dst.n) > len(uncompressed)-len(uncompressed)>>4 {`
all: use `github.com/klauspost/compress/gzip` instead of `compress/gzip` `github.com/klauspost/compress/gzip` is more optimized than `compress/gzip`. This gives better gzip compression and decompression speeds. 2020-01-17 22:57:18 +01:00			`// If we removed less than 1/16th, huffman compress the block.`
`make vendor-update` 2020-02-10 22:35:08 +01:00			`bw.writeBlockHuff(isEof, uncompressed, len(in) == 0)`
all: use `github.com/klauspost/compress/gzip` instead of `compress/gzip` `github.com/klauspost/compress/gzip` is more optimized than `compress/gzip`. This gives better gzip compression and decompression speeds. 2020-01-17 22:57:18 +01:00			`} else {`
`make vendor-update` 2020-02-10 22:35:08 +01:00			`bw.writeBlockDynamic(&dst, isEof, uncompressed, len(in) == 0)`
			`}`
			`if len(in) > 0 {`
			`// Retain a dict if we have more`
			`dict = todo[len(todo)-maxStatelessDict:]`
			`dst.Reset()`
all: use `github.com/klauspost/compress/gzip` instead of `compress/gzip` `github.com/klauspost/compress/gzip` is more optimized than `compress/gzip`. This gives better gzip compression and decompression speeds. 2020-01-17 22:57:18 +01:00			`}`
			`if bw.err != nil {`
			`return bw.err`
			`}`
			`}`
			`if !eof {`
`make vendor-update` 2020-02-10 22:35:08 +01:00			`// Align, only a stored block can do that.`
all: use `github.com/klauspost/compress/gzip` instead of `compress/gzip` `github.com/klauspost/compress/gzip` is more optimized than `compress/gzip`. This gives better gzip compression and decompression speeds. 2020-01-17 22:57:18 +01:00			`bw.writeStoredHeader(0, false)`
			`}`
			`bw.flush()`
			`return bw.err`
			`}`

			`func hashSL(u uint32) uint32 {`
			`return (u * 0x1e35a7bd) >> slTableShift`
			`}`

			`func load3216(b []byte, i int16) uint32 {`
			`// Help the compiler eliminate bounds checks on the read so it can be done in a single read.`
			`b = b[i:]`
			`b = b[:4]`
			`return uint32(b[0]) \| uint32(b[1])<<8 \| uint32(b[2])<<16 \| uint32(b[3])<<24`
			`}`

			`func load6416(b []byte, i int16) uint64 {`
			`// Help the compiler eliminate bounds checks on the read so it can be done in a single read.`
			`b = b[i:]`
			`b = b[:8]`
			`return uint64(b[0]) \| uint64(b[1])<<8 \| uint64(b[2])<<16 \| uint64(b[3])<<24 \|`
			`uint64(b[4])<<32 \| uint64(b[5])<<40 \| uint64(b[6])<<48 \| uint64(b[7])<<56`
			`}`

`make vendor-update` 2020-02-10 22:35:08 +01:00			`func statelessEnc(dst *tokens, src []byte, startAt int16) {`
all: use `github.com/klauspost/compress/gzip` instead of `compress/gzip` `github.com/klauspost/compress/gzip` is more optimized than `compress/gzip`. This gives better gzip compression and decompression speeds. 2020-01-17 22:57:18 +01:00			`const (`
			`inputMargin = 12 - 1`
			`minNonLiteralBlockSize = 1 + 1 + inputMargin`
			`)`

			`type tableEntry struct {`
			`offset int16`
			`}`

			`var table [slTableSize]tableEntry`

			`// This check isn't in the Snappy implementation, but there, the caller`
			`// instead of the callee handles this case.`
`make vendor-update` 2020-02-10 22:35:08 +01:00			`if len(src)-int(startAt) < minNonLiteralBlockSize {`
all: use `github.com/klauspost/compress/gzip` instead of `compress/gzip` `github.com/klauspost/compress/gzip` is more optimized than `compress/gzip`. This gives better gzip compression and decompression speeds. 2020-01-17 22:57:18 +01:00			`// We do not fill the token table.`
			`// This will be picked up by caller.`
`make vendor-update` 2020-02-10 22:35:08 +01:00			`dst.n = 0`
all: use `github.com/klauspost/compress/gzip` instead of `compress/gzip` `github.com/klauspost/compress/gzip` is more optimized than `compress/gzip`. This gives better gzip compression and decompression speeds. 2020-01-17 22:57:18 +01:00			`return`
			`}`
`make vendor-update` 2020-02-10 22:35:08 +01:00			`// Index until startAt`
			`if startAt > 0 {`
			`cv := load3232(src, 0)`
			`for i := int16(0); i < startAt; i++ {`
			`table[hashSL(cv)] = tableEntry{offset: i}`
			`cv = (cv >> 8) \| (uint32(src[i+4]) << 24)`
			`}`
			`}`
all: use `github.com/klauspost/compress/gzip` instead of `compress/gzip` `github.com/klauspost/compress/gzip` is more optimized than `compress/gzip`. This gives better gzip compression and decompression speeds. 2020-01-17 22:57:18 +01:00
`make vendor-update` 2020-02-10 22:35:08 +01:00			`s := startAt + 1`
			`nextEmit := startAt`
all: use `github.com/klauspost/compress/gzip` instead of `compress/gzip` `github.com/klauspost/compress/gzip` is more optimized than `compress/gzip`. This gives better gzip compression and decompression speeds. 2020-01-17 22:57:18 +01:00			`// sLimit is when to stop looking for offset/length copies. The inputMargin`
			`// lets us use a fast path for emitLiteral in the main loop, while we are`
			`// looking for copies.`
			`sLimit := int16(len(src) - inputMargin)`

			`// nextEmit is where in src the next emitLiteral should start from.`
			`cv := load3216(src, s)`

			`for {`
			`const skipLog = 5`
			`const doEvery = 2`

			`nextS := s`
			`var candidate tableEntry`
			`for {`
			`nextHash := hashSL(cv)`
			`candidate = table[nextHash]`
			`nextS = s + doEvery + (s-nextEmit)>>skipLog`
			`if nextS > sLimit \|\| nextS <= 0 {`
			`goto emitRemainder`
			`}`

			`now := load6416(src, nextS)`
			`table[nextHash] = tableEntry{offset: s}`
			`nextHash = hashSL(uint32(now))`

			`if cv == load3216(src, candidate.offset) {`
			`table[nextHash] = tableEntry{offset: nextS}`
			`break`
			`}`

			`// Do one right away...`
			`cv = uint32(now)`
			`s = nextS`
			`nextS++`
			`candidate = table[nextHash]`
			`now >>= 8`
			`table[nextHash] = tableEntry{offset: s}`

			`if cv == load3216(src, candidate.offset) {`
			`table[nextHash] = tableEntry{offset: nextS}`
			`break`
			`}`
			`cv = uint32(now)`
			`s = nextS`
			`}`

			`// A 4-byte match has been found. We'll later see if more than 4 bytes`
			`// match. But, prior to the match, src[nextEmit:s] are unmatched. Emit`
			`// them as literal bytes.`
			`for {`
			`// Invariant: we have a 4-byte match at s, and no need to emit any`
			`// literal bytes prior to s.`

			`// Extend the 4-byte match as long as possible.`
			`t := candidate.offset`
			`l := int16(matchLen(src[s+4:], src[t+4:]) + 4)`

			`// Extend backwards`
			`for t > 0 && s > nextEmit && src[t-1] == src[s-1] {`
			`s--`
			`t--`
			`l++`
			`}`
			`if nextEmit < s {`
vendor: `make vendor-update` 2022-03-16 12:55:03 +01:00			`if false {`
			`emitLiteral(dst, src[nextEmit:s])`
			`} else {`
			`for _, v := range src[nextEmit:s] {`
			`dst.tokens[dst.n] = token(v)`
			`dst.litHist[v]++`
			`dst.n++`
			`}`
			`}`
all: use `github.com/klauspost/compress/gzip` instead of `compress/gzip` `github.com/klauspost/compress/gzip` is more optimized than `compress/gzip`. This gives better gzip compression and decompression speeds. 2020-01-17 22:57:18 +01:00			`}`

			`// Save the match found`
			`dst.AddMatchLong(int32(l), uint32(s-t-baseMatchOffset))`
			`s += l`
			`nextEmit = s`
			`if nextS >= s {`
			`s = nextS + 1`
			`}`
			`if s >= sLimit {`
			`goto emitRemainder`
			`}`

			`// We could immediately start working at s now, but to improve`
			`// compression we first update the hash table at s-2 and at s. If`
			`// another emitCopy is not our next move, also calculate nextHash`
			`// at s+1. At least on GOARCH=amd64, these three hash calculations`
			`// are faster as one load64 call (with some shifts) instead of`
			`// three load32 calls.`
			`x := load6416(src, s-2)`
			`o := s - 2`
			`prevHash := hashSL(uint32(x))`
			`table[prevHash] = tableEntry{offset: o}`
			`x >>= 16`
			`currHash := hashSL(uint32(x))`
			`candidate = table[currHash]`
			`table[currHash] = tableEntry{offset: o + 2}`

			`if uint32(x) != load3216(src, candidate.offset) {`
			`cv = uint32(x >> 8)`
			`s++`
			`break`
			`}`
			`}`
			`}`

			`emitRemainder:`
			`if int(nextEmit) < len(src) {`
			`// If nothing was added, don't encode literals.`
			`if dst.n == 0 {`
			`return`
			`}`
			`emitLiteral(dst, src[nextEmit:])`
			`}`
			`}`