VictoriaMetrics/vendor/github.com/klauspost/compress/flate/level3.go

233 lines
5.9 KiB
Go
Raw Normal View History

package flate
2020-02-10 22:28:15 +01:00
import "fmt"
// fastEncL3
type fastEncL3 struct {
fastGen
2022-02-21 12:59:00 +01:00
table [1 << 16]tableEntryPrev
}
// Encode uses a similar algorithm to level 2, will check up to two candidates.
func (e *fastEncL3) Encode(dst *tokens, src []byte) {
const (
inputMargin = 8 - 1
minNonLiteralBlockSize = 1 + 1 + inputMargin
2022-02-21 12:59:00 +01:00
tableBits = 16
tableSize = 1 << tableBits
)
if debugDeflate && e.cur < 0 {
2020-02-10 22:28:15 +01:00
panic(fmt.Sprint("e.cur < 0: ", e.cur))
}
// Protect against e.cur wraparound.
for e.cur >= bufferReset {
if len(e.hist) == 0 {
for i := range e.table[:] {
e.table[i] = tableEntryPrev{}
}
e.cur = maxMatchOffset
break
}
// Shift down everything in the table that isn't already too far away.
minOff := e.cur + int32(len(e.hist)) - maxMatchOffset
for i := range e.table[:] {
v := e.table[i]
if v.Cur.offset <= minOff {
v.Cur.offset = 0
} else {
v.Cur.offset = v.Cur.offset - e.cur + maxMatchOffset
}
if v.Prev.offset <= minOff {
v.Prev.offset = 0
} else {
v.Prev.offset = v.Prev.offset - e.cur + maxMatchOffset
}
e.table[i] = v
}
e.cur = maxMatchOffset
}
s := e.addBlock(src)
// Skip if too small.
if len(src) < minNonLiteralBlockSize {
// We do not fill the token table.
// This will be picked up by caller.
dst.n = uint16(len(src))
return
}
// Override src
src = e.hist
nextEmit := s
// sLimit is when to stop looking for offset/length copies. The inputMargin
// lets us use a fast path for emitLiteral in the main loop, while we are
// looking for copies.
sLimit := int32(len(src) - inputMargin)
// nextEmit is where in src the next emitLiteral should start from.
cv := load3232(src, s)
for {
const skipLog = 6
nextS := s
var candidate tableEntry
for {
2022-02-21 12:59:00 +01:00
nextHash := hash4u(cv, tableBits)
s = nextS
nextS = s + 1 + (s-nextEmit)>>skipLog
if nextS > sLimit {
goto emitRemainder
}
candidates := e.table[nextHash]
now := load3232(src, nextS)
// Safe offset distance until s + 4...
minOffset := e.cur + s - (maxMatchOffset - 4)
e.table[nextHash] = tableEntryPrev{Prev: candidates.Cur, Cur: tableEntry{offset: s + e.cur}}
// Check both candidates
candidate = candidates.Cur
if candidate.offset < minOffset {
cv = now
// Previous will also be invalid, we have nothing.
continue
}
if cv == load3232(src, candidate.offset-e.cur) {
if candidates.Prev.offset < minOffset || cv != load3232(src, candidates.Prev.offset-e.cur) {
break
}
// Both match and are valid, pick longest.
offset := s - (candidate.offset - e.cur)
o2 := s - (candidates.Prev.offset - e.cur)
l1, l2 := matchLen(src[s+4:], src[s-offset+4:]), matchLen(src[s+4:], src[s-o2+4:])
if l2 > l1 {
candidate = candidates.Prev
}
break
} else {
// We only check if value mismatches.
// Offset will always be invalid in other cases.
candidate = candidates.Prev
if candidate.offset > minOffset && cv == load3232(src, candidate.offset-e.cur) {
break
}
}
cv = now
}
// Call emitCopy, and then see if another emitCopy could be our next
// move. Repeat until we find no match for the input immediately after
// what was consumed by the last emitCopy call.
//
// If we exit this loop normally then we need to call emitLiteral next,
// though we don't yet know how big the literal will be. We handle that
// by proceeding to the next iteration of the main loop. We also can
// exit this loop via goto if we get close to exhausting the input.
for {
// Invariant: we have a 4-byte match at s, and no need to emit any
// literal bytes prior to s.
// Extend the 4-byte match as long as possible.
//
t := candidate.offset - e.cur
l := e.matchlenLong(s+4, t+4, src) + 4
// Extend backwards
for t > 0 && s > nextEmit && src[t-1] == src[s-1] {
s--
t--
l++
}
if nextEmit < s {
emitLiteral(dst, src[nextEmit:s])
}
dst.AddMatchLong(l, uint32(s-t-baseMatchOffset))
s += l
nextEmit = s
if nextS >= s {
s = nextS + 1
}
if s >= sLimit {
t += l
// Index first pair after match end.
if int(t+4) < len(src) && t > 0 {
cv := load3232(src, t)
2022-02-21 12:59:00 +01:00
nextHash := hash4u(cv, tableBits)
e.table[nextHash] = tableEntryPrev{
Prev: e.table[nextHash].Cur,
Cur: tableEntry{offset: e.cur + t},
}
}
goto emitRemainder
}
2022-02-21 12:59:00 +01:00
// Store every 5th hash in-between.
for i := s - l + 2; i < s-5; i += 5 {
nextHash := hash4u(load3232(src, i), tableBits)
e.table[nextHash] = tableEntryPrev{
Prev: e.table[nextHash].Cur,
Cur: tableEntry{offset: e.cur + i}}
}
2022-02-21 12:59:00 +01:00
// We could immediately start working at s now, but to improve
// compression we first update the hash table at s-2 to s.
x := load6432(src, s-2)
prevHash := hash4u(uint32(x), tableBits)
e.table[prevHash] = tableEntryPrev{
Prev: e.table[prevHash].Cur,
Cur: tableEntry{offset: e.cur + s - 2},
}
x >>= 8
2022-02-21 12:59:00 +01:00
prevHash = hash4u(uint32(x), tableBits)
e.table[prevHash] = tableEntryPrev{
Prev: e.table[prevHash].Cur,
Cur: tableEntry{offset: e.cur + s - 1},
}
x >>= 8
2022-02-21 12:59:00 +01:00
currHash := hash4u(uint32(x), tableBits)
candidates := e.table[currHash]
cv = uint32(x)
e.table[currHash] = tableEntryPrev{
Prev: candidates.Cur,
Cur: tableEntry{offset: s + e.cur},
}
// Check both candidates
candidate = candidates.Cur
minOffset := e.cur + s - (maxMatchOffset - 4)
2022-02-21 12:59:00 +01:00
if candidate.offset > minOffset {
if cv == load3232(src, candidate.offset-e.cur) {
// Found a match...
continue
}
candidate = candidates.Prev
if candidate.offset > minOffset && cv == load3232(src, candidate.offset-e.cur) {
2022-02-21 12:59:00 +01:00
// Match at prev...
continue
}
}
cv = uint32(x >> 8)
s++
break
}
}
emitRemainder:
if int(nextEmit) < len(src) {
// If nothing was added, don't encode literals.
if dst.n == 0 {
return
}
emitLiteral(dst, src[nextEmit:])
}
}