VictoriaMetrics/lib/encoding/int.go
Fuchun Zhang 78af9b3e30
make encoding.MarshalVarInt64s faster (#5721)
* make encoding.MarshalVarInt64s faster

* add fast path for MarshalVarInt64s

* make UnmarshalVarUint64s faster

* remove comment
2024-02-01 03:33:59 +00:00

393 lines
12 KiB
Go

package encoding
import (
"encoding/binary"
"fmt"
"math/bits"
"sync"
)
// MarshalUint16 appends marshaled v to dst and returns the result.
func MarshalUint16(dst []byte, u uint16) []byte {
return append(dst, byte(u>>8), byte(u))
}
// UnmarshalUint16 returns unmarshaled uint16 from src.
//
// the caller must ensure that len(src) >= 2
func UnmarshalUint16(src []byte) uint16 {
// This is faster than the manual conversion.
return binary.BigEndian.Uint16(src)
}
// MarshalUint32 appends marshaled v to dst and returns the result.
func MarshalUint32(dst []byte, u uint32) []byte {
return append(dst, byte(u>>24), byte(u>>16), byte(u>>8), byte(u))
}
// UnmarshalUint32 returns unmarshaled uint32 from src.
//
// The caller must ensure than len(src) >= 4
func UnmarshalUint32(src []byte) uint32 {
// This is faster than the manual conversion.
return binary.BigEndian.Uint32(src)
}
// MarshalUint64 appends marshaled v to dst and returns the result.
func MarshalUint64(dst []byte, u uint64) []byte {
return append(dst, byte(u>>56), byte(u>>48), byte(u>>40), byte(u>>32), byte(u>>24), byte(u>>16), byte(u>>8), byte(u))
}
// UnmarshalUint64 returns unmarshaled uint64 from src.
//
// The caller must ensure that len(src) >= 8
func UnmarshalUint64(src []byte) uint64 {
// This is faster than the manual conversion.
return binary.BigEndian.Uint64(src)
}
// MarshalInt16 appends marshaled v to dst and returns the result.
func MarshalInt16(dst []byte, v int16) []byte {
// Such encoding for negative v must improve compression.
v = (v << 1) ^ (v >> 15) // zig-zag encoding without branching.
u := uint16(v)
return append(dst, byte(u>>8), byte(u))
}
// UnmarshalInt16 returns unmarshaled int16 from src.
//
// The caller must ensure that len(src) >= 2
func UnmarshalInt16(src []byte) int16 {
// This is faster than the manual conversion.
u := binary.BigEndian.Uint16(src)
v := int16(u>>1) ^ (int16(u<<15) >> 15) // zig-zag decoding without branching.
return v
}
// MarshalInt64 appends marshaled v to dst and returns the result.
func MarshalInt64(dst []byte, v int64) []byte {
// Such encoding for negative v must improve compression.
v = (v << 1) ^ (v >> 63) // zig-zag encoding without branching.
u := uint64(v)
return append(dst, byte(u>>56), byte(u>>48), byte(u>>40), byte(u>>32), byte(u>>24), byte(u>>16), byte(u>>8), byte(u))
}
// UnmarshalInt64 returns unmarshaled int64 from src.
//
// The caller must ensure that len(src) >= 8
func UnmarshalInt64(src []byte) int64 {
// This is faster than the manual conversion.
u := binary.BigEndian.Uint64(src)
v := int64(u>>1) ^ (int64(u<<63) >> 63) // zig-zag decoding without branching.
return v
}
// MarshalVarInt64 appends marshalsed v to dst and returns the result.
func MarshalVarInt64(dst []byte, v int64) []byte {
var tmp [1]int64
tmp[0] = v
return MarshalVarInt64s(dst, tmp[:])
}
// MarshalVarInt64s appends marshaled vs to dst and returns the result.
func MarshalVarInt64s(dst []byte, vs []int64) []byte {
for _, v := range vs {
n := uint64((v << 1) ^ (v >> 63))
if n < (1 << 7) {
dst = append(dst, byte(n))
continue
}
switch (64 - bits.LeadingZeros64(n>>1)) / 7 {
case 0:
dst = append(dst, byte(n))
case 1:
dst = append(dst, byte(n|0x80), byte(n>>7))
case 2:
dst = append(dst, byte(n|0x80), byte((n>>7)|0x80), byte(n>>14))
case 3:
dst = append(dst, byte(n|0x80), byte((n>>7)|0x80), byte((n>>14)|0x80), byte(n>>21))
case 4:
dst = append(dst, byte(n|0x80), byte((n>>7)|0x80), byte((n>>14)|0x80), byte((n>>21)|0x80), byte(n>>28))
case 5:
dst = append(dst, byte(n|0x80), byte((n>>7)|0x80), byte((n>>14)|0x80), byte((n>>21)|0x80), byte(n>>28|0x80), byte(n>>35))
case 6:
dst = append(dst, byte(n|0x80), byte((n>>7)|0x80), byte((n>>14)|0x80), byte((n>>21)|0x80), byte(n>>28|0x80), byte(n>>35|0x80), byte(n>>42))
case 7:
dst = append(dst, byte(n|0x80), byte((n>>7)|0x80), byte((n>>14)|0x80), byte((n>>21)|0x80), byte(n>>28|0x80), byte(n>>35|0x80), byte(n>>42|0x80), byte(n>>49))
case 8:
dst = append(dst, byte(n|0x80), byte((n>>7)|0x80), byte((n>>14)|0x80), byte((n>>21)|0x80), byte(n>>28|0x80), byte(n>>35|0x80), byte(n>>42|0x80), byte(n>>49|0x80), byte(n>>56))
case 9:
fallthrough
default:
dst = append(dst, byte(n|0x80), byte((n>>7)|0x80), byte((n>>14)|0x80), byte((n>>21)|0x80), byte(n>>28|0x80), byte(n>>35|0x80), byte(n>>42|0x80), byte(n>>49|0x80), byte(n>>56|0x80), byte(n>>63))
}
}
return dst
}
// UnmarshalVarInt64 returns unmarshaled int64 from src and returns
// the remaining tail from src.
func UnmarshalVarInt64(src []byte) ([]byte, int64, error) {
var tmp [1]int64
tail, err := UnmarshalVarInt64s(tmp[:], src)
return tail, tmp[0], err
}
// UnmarshalVarInt64s unmarshals len(dst) int64 values from src to dst
// and returns the remaining tail from src.
func UnmarshalVarInt64s(dst []int64, src []byte) ([]byte, error) {
idx := uint(0)
for i := range dst {
if idx >= uint(len(src)) {
return nil, fmt.Errorf("cannot unmarshal varint from empty data")
}
c := src[idx]
idx++
if c < 0x80 {
// Fast path
v := int8(c>>1) ^ (int8(c<<7) >> 7) // zig-zag decoding without branching.
dst[i] = int64(v)
continue
}
if idx < uint(len(src)) && src[idx] < 0x80 {
// Fast path, for 2 bytes
n := uint64(c&0x7f) | (uint64(src[idx]&0x7f) << 7)
dst[i] = int64(n>>1) ^ (int64(n<<63) >> 63)
idx++
continue
}
j := idx + 1
for ; j < uint(len(src)); j++ { // find end loc
if src[j] < 0x80 {
break
}
}
if j-idx > 10 {
return nil, fmt.Errorf("cannot unmarshal varint, buffer too long, len=%d", j-idx)
}
dst[i] = unmarshalVarInt64ForOne(src[idx-1 : j+1])
idx = j + 1
}
return src[idx:], nil
}
func unmarshalVarInt64ForOne(buf []byte) int64 {
var n uint64
switch len(buf) {
case 1:
n = uint64(buf[0] & 0x7F)
case 2:
n = uint64(buf[0]&0x7F) | (uint64(buf[1]&0x7F) << 7)
case 3:
n = uint64(buf[0]&0x7F) | (uint64(buf[1]&0x7F) << 7) | (uint64(buf[2]&0x7F) << 14)
case 4:
n = uint64(buf[0]&0x7F) | (uint64(buf[1]&0x7F) << 7) | (uint64(buf[2]&0x7F) << 14) | (uint64(buf[3]&0x7F) << 21)
case 5:
n = uint64(buf[0]&0x7F) | (uint64(buf[1]&0x7F) << 7) | (uint64(buf[2]&0x7F) << 14) | (uint64(buf[3]&0x7F) << 21) | (uint64(buf[4]&0x7F) << 28)
case 6:
n = uint64(buf[0]&0x7F) | (uint64(buf[1]&0x7F) << 7) | (uint64(buf[2]&0x7F) << 14) | (uint64(buf[3]&0x7F) << 21) | (uint64(buf[4]&0x7F) << 28) | (uint64(buf[5]&0x7F) << 35)
case 7:
n = uint64(buf[0]&0x7F) | (uint64(buf[1]&0x7F) << 7) | (uint64(buf[2]&0x7F) << 14) | (uint64(buf[3]&0x7F) << 21) | (uint64(buf[4]&0x7F) << 28) | (uint64(buf[5]&0x7F) << 35) | (uint64(buf[6]&0x7F) << 42)
case 8:
n = uint64(buf[0]&0x7F) | (uint64(buf[1]&0x7F) << 7) | (uint64(buf[2]&0x7F) << 14) | (uint64(buf[3]&0x7F) << 21) | (uint64(buf[4]&0x7F) << 28) | (uint64(buf[5]&0x7F) << 35) | (uint64(buf[6]&0x7F) << 42) | (uint64(buf[7]&0x7F) << 49)
case 9:
n = uint64(buf[0]&0x7F) | (uint64(buf[1]&0x7F) << 7) | (uint64(buf[2]&0x7F) << 14) | (uint64(buf[3]&0x7F) << 21) | (uint64(buf[4]&0x7F) << 28) | (uint64(buf[5]&0x7F) << 35) | (uint64(buf[6]&0x7F) << 42) | (uint64(buf[7]&0x7F) << 49) | (uint64(buf[8]&0x7F) << 56)
case 10:
n = uint64(buf[0]&0x7F) | (uint64(buf[1]&0x7F) << 7) | (uint64(buf[2]&0x7F) << 14) | (uint64(buf[3]&0x7F) << 21) | (uint64(buf[4]&0x7F) << 28) | (uint64(buf[5]&0x7F) << 35) | (uint64(buf[6]&0x7F) << 42) | (uint64(buf[7]&0x7F) << 49) | (uint64(buf[8]&0x7F) << 56) | (uint64(buf[9]&0x7F) << 63)
default:
panic("impossible error: buf length must be 1 to 10")
}
return int64(n>>1) ^ (int64(n<<63) >> 63)
}
// MarshalVarUint64 appends marshaled u to dst and returns the result.
func MarshalVarUint64(dst []byte, u uint64) []byte {
var tmp [1]uint64
tmp[0] = u
return MarshalVarUint64s(dst, tmp[:])
}
// MarshalVarUint64s appends marshaled us to dst and returns the result.
func MarshalVarUint64s(dst []byte, us []uint64) []byte {
for _, u := range us {
if u < 0x80 {
// Fast path
dst = append(dst, byte(u))
continue
}
for u > 0x7f {
dst = append(dst, 0x80|byte(u))
u >>= 7
}
dst = append(dst, byte(u))
}
return dst
}
// UnmarshalVarUint64 returns unmarshaled uint64 from src and returns
// the remaining tail from src.
func UnmarshalVarUint64(src []byte) ([]byte, uint64, error) {
var tmp [1]uint64
tail, err := UnmarshalVarUint64s(tmp[:], src)
return tail, tmp[0], err
}
// UnmarshalVarUint64s unmarshals len(dst) uint64 values from src to dst
// and returns the remaining tail from src.
func UnmarshalVarUint64s(dst []uint64, src []byte) ([]byte, error) {
idx := uint(0)
for i := range dst {
if idx >= uint(len(src)) {
return nil, fmt.Errorf("cannot unmarshal varuint from empty data")
}
c := src[idx]
idx++
if c < 0x80 {
// Fast path
dst[i] = uint64(c)
continue
}
// Slow path
u := uint64(c & 0x7f)
startIdx := idx - 1
shift := uint8(0)
for c >= 0x80 {
if idx >= uint(len(src)) {
return nil, fmt.Errorf("unexpected end of encoded varint at byte %d; src=%x", idx-startIdx, src[startIdx:])
}
if idx-startIdx > 9 {
return src[idx:], fmt.Errorf("too long encoded varint; the maximum allowed length is 10 bytes; got %d bytes; src=%x",
(idx-startIdx)+1, src[startIdx:])
}
c = src[idx]
idx++
shift += 7
u |= uint64(c&0x7f) << shift
}
dst[i] = u
}
return src[idx:], nil
}
// MarshalBool appends marshaled v to dst and returns the result.
func MarshalBool(dst []byte, v bool) []byte {
x := byte(0)
if v {
x = 1
}
return append(dst, x)
}
// UnmarshalBool unmarshals bool from src.
func UnmarshalBool(src []byte) bool {
return src[0] != 0
}
// MarshalBytes appends marshaled b to dst and returns the result.
func MarshalBytes(dst, b []byte) []byte {
dst = MarshalVarUint64(dst, uint64(len(b)))
dst = append(dst, b...)
return dst
}
// UnmarshalBytes returns unmarshaled bytes from src.
func UnmarshalBytes(src []byte) ([]byte, []byte, error) {
tail, n, err := UnmarshalVarUint64(src)
if err != nil {
return nil, nil, fmt.Errorf("cannot unmarshal string size: %w", err)
}
src = tail
if uint64(len(src)) < n {
return nil, nil, fmt.Errorf("src is too short for reading string with size %d; len(src)=%d", n, len(src))
}
return src[n:], src[:n], nil
}
// GetInt64s returns an int64 slice with the given size.
// The slice contents isn't initialized - it may contain garbage.
func GetInt64s(size int) *Int64s {
v := int64sPool.Get()
if v == nil {
return &Int64s{
A: make([]int64, size),
}
}
is := v.(*Int64s)
if n := size - cap(is.A); n > 0 {
is.A = append(is.A[:cap(is.A)], make([]int64, n)...)
}
is.A = is.A[:size]
return is
}
// PutInt64s returns is to the pool.
func PutInt64s(is *Int64s) {
int64sPool.Put(is)
}
// Int64s holds an int64 slice
type Int64s struct {
A []int64
}
var int64sPool sync.Pool
// GetUint64s returns an uint64 slice with the given size.
// The slice contents isn't initialized - it may contain garbage.
func GetUint64s(size int) *Uint64s {
v := uint64sPool.Get()
if v == nil {
return &Uint64s{
A: make([]uint64, size),
}
}
is := v.(*Uint64s)
if n := size - cap(is.A); n > 0 {
is.A = append(is.A[:cap(is.A)], make([]uint64, n)...)
}
is.A = is.A[:size]
return is
}
// PutUint64s returns is to the pool.
func PutUint64s(is *Uint64s) {
uint64sPool.Put(is)
}
// Uint64s holds an uint64 slice
type Uint64s struct {
A []uint64
}
var uint64sPool sync.Pool
// GetUint32s returns an uint32 slice with the given size.
// The slize contents isn't initialized - it may contain garbage.
func GetUint32s(size int) *Uint32s {
v := uint32sPool.Get()
if v == nil {
return &Uint32s{
A: make([]uint32, size),
}
}
is := v.(*Uint32s)
if n := size - cap(is.A); n > 0 {
is.A = append(is.A[:cap(is.A)], make([]uint32, n)...)
}
is.A = is.A[:size]
return is
}
// PutUint32s returns is to the pool.
func PutUint32s(is *Uint32s) {
uint32sPool.Put(is)
}
// Uint32s holds an uint32 slice
type Uint32s struct {
A []uint32
}
var uint32sPool sync.Pool