VictoriaMetrics/lib/mergeset/part.go

473 lines
11 KiB
Go
Raw Normal View History

2019-05-22 23:16:55 +02:00
package mergeset
import (
"fmt"
"path/filepath"
"sync"
"sync/atomic"
"time"
"unsafe"
2019-05-22 23:16:55 +02:00
"github.com/VictoriaMetrics/VictoriaMetrics/lib/fasttime"
2019-05-22 23:16:55 +02:00
"github.com/VictoriaMetrics/VictoriaMetrics/lib/filestream"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/fs"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/memory"
)
func getMaxCachedIndexBlocksPerPart() int {
maxCachedIndexBlocksPerPartOnce.Do(func() {
n := memory.Allowed() / 1024 / 1024 / 4
2019-05-22 23:16:55 +02:00
if n == 0 {
n = 10
}
maxCachedIndexBlocksPerPart = n
})
return maxCachedIndexBlocksPerPart
}
var (
maxCachedIndexBlocksPerPart int
maxCachedIndexBlocksPerPartOnce sync.Once
)
func getMaxCachedInmemoryBlocksPerPart() int {
maxCachedInmemoryBlocksPerPartOnce.Do(func() {
n := memory.Allowed() / 1024 / 1024 / 4
2019-05-22 23:16:55 +02:00
if n == 0 {
n = 10
}
maxCachedInmemoryBlocksPerPart = n
})
return maxCachedInmemoryBlocksPerPart
}
var (
maxCachedInmemoryBlocksPerPart int
maxCachedInmemoryBlocksPerPartOnce sync.Once
)
type part struct {
2019-05-22 23:16:55 +02:00
ph partHeader
path string
size uint64
2019-05-22 23:16:55 +02:00
mrs []metaindexRow
indexFile fs.MustReadAtCloser
itemsFile fs.MustReadAtCloser
lensFile fs.MustReadAtCloser
2019-05-22 23:16:55 +02:00
idxbCache *indexBlockCache
ibCache *inmemoryBlockCache
2019-05-22 23:16:55 +02:00
}
func openFilePart(path string) (*part, error) {
path = filepath.Clean(path)
var ph partHeader
if err := ph.ParseFromPath(path); err != nil {
return nil, fmt.Errorf("cannot parse path to part: %w", err)
2019-05-22 23:16:55 +02:00
}
metaindexPath := path + "/metaindex.bin"
metaindexFile, err := filestream.Open(metaindexPath, true)
if err != nil {
return nil, fmt.Errorf("cannot open %q: %w", metaindexPath, err)
2019-05-22 23:16:55 +02:00
}
metaindexSize := fs.MustFileSize(metaindexPath)
2019-05-22 23:16:55 +02:00
indexPath := path + "/index.bin"
indexFile := fs.MustOpenReaderAt(indexPath)
indexSize := fs.MustFileSize(indexPath)
2019-05-22 23:16:55 +02:00
itemsPath := path + "/items.bin"
itemsFile := fs.MustOpenReaderAt(itemsPath)
itemsSize := fs.MustFileSize(itemsPath)
2019-05-22 23:16:55 +02:00
lensPath := path + "/lens.bin"
lensFile := fs.MustOpenReaderAt(lensPath)
lensSize := fs.MustFileSize(lensPath)
2019-05-22 23:16:55 +02:00
size := metaindexSize + indexSize + itemsSize + lensSize
return newPart(&ph, path, size, metaindexFile, indexFile, itemsFile, lensFile)
2019-05-22 23:16:55 +02:00
}
func newPart(ph *partHeader, path string, size uint64, metaindexReader filestream.ReadCloser, indexFile, itemsFile, lensFile fs.MustReadAtCloser) (*part, error) {
2019-05-22 23:16:55 +02:00
var errors []error
mrs, err := unmarshalMetaindexRows(nil, metaindexReader)
if err != nil {
errors = append(errors, fmt.Errorf("cannot unmarshal metaindexRows: %w", err))
2019-05-22 23:16:55 +02:00
}
metaindexReader.MustClose()
var p part
p.path = path
p.size = size
p.mrs = mrs
p.indexFile = indexFile
p.itemsFile = itemsFile
p.lensFile = lensFile
2019-05-22 23:16:55 +02:00
p.ph.CopyFrom(ph)
p.idxbCache = newIndexBlockCache()
p.ibCache = newInmemoryBlockCache()
2019-05-22 23:16:55 +02:00
if len(errors) > 0 {
// Return only the first error, since it has no sense in returning all errors.
err := fmt.Errorf("error opening part %s: %w", p.path, errors[0])
2019-05-22 23:16:55 +02:00
p.MustClose()
return nil, err
}
return &p, nil
2019-05-22 23:16:55 +02:00
}
func (p *part) MustClose() {
p.indexFile.MustClose()
p.itemsFile.MustClose()
p.lensFile.MustClose()
p.idxbCache.MustClose()
p.ibCache.MustClose()
2019-05-22 23:16:55 +02:00
}
type indexBlock struct {
bhs []blockHeader
}
func (idxb *indexBlock) SizeBytes() int {
bhs := idxb.bhs[:cap(idxb.bhs)]
n := int(unsafe.Sizeof(*idxb))
for i := range bhs {
n += bhs[i].SizeBytes()
2019-05-22 23:16:55 +02:00
}
return n
2019-05-22 23:16:55 +02:00
}
type indexBlockCache struct {
// Atomically updated counters must go first in the struct, so they are properly
// aligned to 8 bytes on 32-bit architectures.
// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/212
requests uint64
misses uint64
m map[uint64]*indexBlockCacheEntry
mu sync.RWMutex
perKeyMisses map[uint64]int
perKeyMissesLock sync.Mutex
cleanerStopCh chan struct{}
cleanerWG sync.WaitGroup
}
type indexBlockCacheEntry struct {
// Atomically updated counters must go first in the struct, so they are properly
// aligned to 8 bytes on 32-bit architectures.
// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/212
lastAccessTime uint64
idxb *indexBlock
2019-05-22 23:16:55 +02:00
}
func newIndexBlockCache() *indexBlockCache {
var idxbc indexBlockCache
idxbc.m = make(map[uint64]*indexBlockCacheEntry)
idxbc.perKeyMisses = make(map[uint64]int)
idxbc.cleanerStopCh = make(chan struct{})
idxbc.cleanerWG.Add(1)
go func() {
defer idxbc.cleanerWG.Done()
idxbc.cleaner()
}()
return &idxbc
2019-05-22 23:16:55 +02:00
}
func (idxbc *indexBlockCache) MustClose() {
close(idxbc.cleanerStopCh)
idxbc.cleanerWG.Wait()
idxbc.m = nil
idxbc.perKeyMisses = nil
}
// cleaner periodically cleans least recently used items.
func (idxbc *indexBlockCache) cleaner() {
ticker := time.NewTicker(30 * time.Second)
defer ticker.Stop()
perKeyMissesTicker := time.NewTicker(2 * time.Minute)
defer perKeyMissesTicker.Stop()
for {
select {
case <-ticker.C:
idxbc.cleanByTimeout()
case <-perKeyMissesTicker.C:
idxbc.perKeyMissesLock.Lock()
idxbc.perKeyMisses = make(map[uint64]int, len(idxbc.perKeyMisses))
idxbc.perKeyMissesLock.Unlock()
case <-idxbc.cleanerStopCh:
return
}
}
}
func (idxbc *indexBlockCache) cleanByTimeout() {
currentTime := fasttime.UnixTimestamp()
idxbc.mu.Lock()
for k, idxbe := range idxbc.m {
// Delete items accessed more than two minutes ago.
// This time should be enough for repeated queries.
if currentTime-atomic.LoadUint64(&idxbe.lastAccessTime) > 2*60 {
delete(idxbc.m, k)
}
2019-05-22 23:16:55 +02:00
}
idxbc.mu.Unlock()
2019-05-22 23:16:55 +02:00
}
func (idxbc *indexBlockCache) Get(k uint64) *indexBlock {
atomic.AddUint64(&idxbc.requests, 1)
idxbc.mu.RLock()
idxbe := idxbc.m[k]
2019-05-22 23:16:55 +02:00
idxbc.mu.RUnlock()
if idxbe != nil {
currentTime := fasttime.UnixTimestamp()
if atomic.LoadUint64(&idxbe.lastAccessTime) != currentTime {
atomic.StoreUint64(&idxbe.lastAccessTime, currentTime)
}
return idxbe.idxb
2019-05-22 23:16:55 +02:00
}
idxbc.perKeyMissesLock.Lock()
idxbc.perKeyMisses[k]++
idxbc.perKeyMissesLock.Unlock()
2019-05-22 23:16:55 +02:00
atomic.AddUint64(&idxbc.misses, 1)
return nil
}
// Put puts idxb under the key k into idxbc.
func (idxbc *indexBlockCache) Put(k uint64, idxb *indexBlock) {
idxbc.perKeyMissesLock.Lock()
doNotCache := idxbc.perKeyMisses[k] == 1
idxbc.perKeyMissesLock.Unlock()
if doNotCache {
// Do not cache ib if it has been requested only once (aka one-time-wonders items).
// This should reduce memory usage for the ibc cache.
return
}
2019-05-22 23:16:55 +02:00
idxbc.mu.Lock()
// Remove superfluous entries.
2019-05-22 23:16:55 +02:00
if overflow := len(idxbc.m) - getMaxCachedIndexBlocksPerPart(); overflow > 0 {
// Remove 10% of items from the cache.
overflow = int(float64(len(idxbc.m)) * 0.1)
for k := range idxbc.m {
delete(idxbc.m, k)
overflow--
if overflow == 0 {
break
}
}
}
// Store idxb in the cache.
idxbe := &indexBlockCacheEntry{
lastAccessTime: fasttime.UnixTimestamp(),
idxb: idxb,
}
idxbc.m[k] = idxbe
2019-05-22 23:16:55 +02:00
idxbc.mu.Unlock()
}
func (idxbc *indexBlockCache) Len() uint64 {
idxbc.mu.RLock()
n := len(idxbc.m)
idxbc.mu.RUnlock()
return uint64(n)
}
func (idxbc *indexBlockCache) SizeBytes() uint64 {
n := 0
idxbc.mu.RLock()
for _, e := range idxbc.m {
n += e.idxb.SizeBytes()
}
idxbc.mu.RUnlock()
return uint64(n)
}
2019-05-22 23:16:55 +02:00
func (idxbc *indexBlockCache) Requests() uint64 {
return atomic.LoadUint64(&idxbc.requests)
}
func (idxbc *indexBlockCache) Misses() uint64 {
return atomic.LoadUint64(&idxbc.misses)
}
type inmemoryBlockCache struct {
// Atomically updated counters must go first in the struct, so they are properly
// aligned to 8 bytes on 32-bit architectures.
// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/212
requests uint64
misses uint64
m map[inmemoryBlockCacheKey]*inmemoryBlockCacheEntry
mu sync.RWMutex
perKeyMisses map[inmemoryBlockCacheKey]int
perKeyMissesLock sync.Mutex
cleanerStopCh chan struct{}
cleanerWG sync.WaitGroup
2019-05-22 23:16:55 +02:00
}
type inmemoryBlockCacheKey struct {
itemsBlockOffset uint64
}
func (ibck *inmemoryBlockCacheKey) Init(bh *blockHeader) {
ibck.itemsBlockOffset = bh.itemsBlockOffset
}
type inmemoryBlockCacheEntry struct {
// Atomically updated counters must go first in the struct, so they are properly
// aligned to 8 bytes on 32-bit architectures.
// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/212
lastAccessTime uint64
ib *inmemoryBlock
}
func newInmemoryBlockCache() *inmemoryBlockCache {
var ibc inmemoryBlockCache
ibc.m = make(map[inmemoryBlockCacheKey]*inmemoryBlockCacheEntry)
ibc.perKeyMisses = make(map[inmemoryBlockCacheKey]int)
ibc.cleanerStopCh = make(chan struct{})
ibc.cleanerWG.Add(1)
go func() {
defer ibc.cleanerWG.Done()
ibc.cleaner()
}()
return &ibc
2019-05-22 23:16:55 +02:00
}
func (ibc *inmemoryBlockCache) MustClose() {
close(ibc.cleanerStopCh)
ibc.cleanerWG.Wait()
ibc.m = nil
ibc.perKeyMisses = nil
}
// cleaner periodically cleans least recently used items.
func (ibc *inmemoryBlockCache) cleaner() {
ticker := time.NewTicker(30 * time.Second)
defer ticker.Stop()
perKeyMissesTicker := time.NewTicker(2 * time.Minute)
defer perKeyMissesTicker.Stop()
for {
select {
case <-ticker.C:
ibc.cleanByTimeout()
case <-perKeyMissesTicker.C:
ibc.perKeyMissesLock.Lock()
ibc.perKeyMisses = make(map[inmemoryBlockCacheKey]int, len(ibc.perKeyMisses))
ibc.perKeyMissesLock.Unlock()
case <-ibc.cleanerStopCh:
return
}
}
}
func (ibc *inmemoryBlockCache) cleanByTimeout() {
currentTime := fasttime.UnixTimestamp()
ibc.mu.Lock()
for k, ibe := range ibc.m {
// Delete items accessed more than two minutes ago.
// This time should be enough for repeated queries.
if currentTime-atomic.LoadUint64(&ibe.lastAccessTime) > 2*60 {
delete(ibc.m, k)
}
2019-05-22 23:16:55 +02:00
}
ibc.mu.Unlock()
2019-05-22 23:16:55 +02:00
}
func (ibc *inmemoryBlockCache) Get(k inmemoryBlockCacheKey) *inmemoryBlock {
atomic.AddUint64(&ibc.requests, 1)
ibc.mu.RLock()
ibe := ibc.m[k]
2019-05-22 23:16:55 +02:00
ibc.mu.RUnlock()
if ibe != nil {
currentTime := fasttime.UnixTimestamp()
if atomic.LoadUint64(&ibe.lastAccessTime) != currentTime {
atomic.StoreUint64(&ibe.lastAccessTime, currentTime)
}
return ibe.ib
2019-05-22 23:16:55 +02:00
}
ibc.perKeyMissesLock.Lock()
ibc.perKeyMisses[k]++
ibc.perKeyMissesLock.Unlock()
2019-05-22 23:16:55 +02:00
atomic.AddUint64(&ibc.misses, 1)
return nil
}
// Put puts ib under key k into ibc.
func (ibc *inmemoryBlockCache) Put(k inmemoryBlockCacheKey, ib *inmemoryBlock) {
ibc.perKeyMissesLock.Lock()
doNotCache := ibc.perKeyMisses[k] == 1
ibc.perKeyMissesLock.Unlock()
if doNotCache {
// Do not cache ib if it has been requested only once (aka one-time-wonders items).
// This should reduce memory usage for the ibc cache.
return
}
2019-05-22 23:16:55 +02:00
ibc.mu.Lock()
// Clean superfluous entries in cache.
2019-05-22 23:16:55 +02:00
if overflow := len(ibc.m) - getMaxCachedInmemoryBlocksPerPart(); overflow > 0 {
// Remove 10% of items from the cache.
overflow = int(float64(len(ibc.m)) * 0.1)
for k := range ibc.m {
delete(ibc.m, k)
overflow--
if overflow == 0 {
break
}
}
}
// Store ib in the cache.
ibe := &inmemoryBlockCacheEntry{
lastAccessTime: fasttime.UnixTimestamp(),
ib: ib,
}
ibc.m[k] = ibe
2019-05-22 23:16:55 +02:00
ibc.mu.Unlock()
}
func (ibc *inmemoryBlockCache) Len() uint64 {
ibc.mu.RLock()
n := len(ibc.m)
ibc.mu.RUnlock()
return uint64(n)
}
func (ibc *inmemoryBlockCache) SizeBytes() uint64 {
n := 0
ibc.mu.RLock()
for _, e := range ibc.m {
n += e.ib.SizeBytes()
}
ibc.mu.RUnlock()
return uint64(n)
}
2019-05-22 23:16:55 +02:00
func (ibc *inmemoryBlockCache) Requests() uint64 {
return atomic.LoadUint64(&ibc.requests)
}
func (ibc *inmemoryBlockCache) Misses() uint64 {
return atomic.LoadUint64(&ibc.misses)
}