2019-05-22 23:23:23 +02:00
package main
2019-05-22 23:16:55 +02:00
import (
"flag"
"fmt"
"net/http"
2020-05-16 10:59:30 +02:00
"os"
2019-05-22 23:16:55 +02:00
"strings"
"sync"
"time"
2022-06-28 13:04:14 +02:00
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmstorage/servers"
2019-05-22 23:23:23 +02:00
"github.com/VictoriaMetrics/VictoriaMetrics/lib/buildinfo"
2020-02-10 12:26:18 +01:00
"github.com/VictoriaMetrics/VictoriaMetrics/lib/envflag"
2023-02-27 21:12:03 +01:00
"github.com/VictoriaMetrics/VictoriaMetrics/lib/fasttime"
2020-10-20 13:29:26 +02:00
"github.com/VictoriaMetrics/VictoriaMetrics/lib/flagutil"
2019-11-12 15:29:43 +01:00
"github.com/VictoriaMetrics/VictoriaMetrics/lib/fs"
2019-05-22 23:16:55 +02:00
"github.com/VictoriaMetrics/VictoriaMetrics/lib/httpserver"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
2022-02-21 12:50:34 +01:00
"github.com/VictoriaMetrics/VictoriaMetrics/lib/mergeset"
2019-05-22 23:23:23 +02:00
"github.com/VictoriaMetrics/VictoriaMetrics/lib/procutil"
2021-05-08 16:55:44 +02:00
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/common"
2022-07-21 18:58:22 +02:00
"github.com/VictoriaMetrics/VictoriaMetrics/lib/pushmetrics"
2019-05-22 23:16:55 +02:00
"github.com/VictoriaMetrics/VictoriaMetrics/lib/storage"
"github.com/VictoriaMetrics/metrics"
)
var (
2023-06-09 09:46:25 +02:00
retentionPeriod = flagutil . NewDuration ( "retentionPeriod" , "1" , "Data with timestamps outside the retentionPeriod is automatically deleted. The minimum retentionPeriod is 24h or 1d. See also -retentionFilter" )
2023-01-27 08:08:35 +01:00
httpListenAddr = flag . String ( "httpListenAddr" , ":8482" , "Address to listen for http connections. See also -httpListenAddr.useProxyProtocol" )
useProxyProtocol = flag . Bool ( "httpListenAddr.useProxyProtocol" , false , "Whether to use proxy protocol for connections accepted at -httpListenAddr . " +
2023-03-08 10:26:53 +01:00
"See https://www.haproxy.org/download/1.8/doc/proxy-protocol.txt . " +
"With enabled proxy protocol http server cannot serve regular /metrics endpoint. Use -pushmetrics.url for metrics pushing" )
2023-02-27 21:12:03 +01:00
storageDataPath = flag . String ( "storageDataPath" , "vmstorage-data" , "Path to storage data" )
vminsertAddr = flag . String ( "vminsertAddr" , ":8400" , "TCP address to accept connections from vminsert services" )
vmselectAddr = flag . String ( "vmselectAddr" , ":8401" , "TCP address to accept connections from vmselect services" )
snapshotAuthKey = flag . String ( "snapshotAuthKey" , "" , "authKey, which must be passed in query string to /snapshot* pages" )
forceMergeAuthKey = flag . String ( "forceMergeAuthKey" , "" , "authKey, which must be passed in query string to /internal/force_merge pages" )
forceFlushAuthKey = flag . String ( "forceFlushAuthKey" , "" , "authKey, which must be passed in query string to /internal/force_flush pages" )
snapshotsMaxAge = flagutil . NewDuration ( "snapshotsMaxAge" , "0" , "Automatically delete snapshots older than -snapshotsMaxAge if it is set to non-zero duration. Make sure that backup process has enough time to finish the backup before the corresponding snapshot is automatically deleted" )
2023-02-27 21:57:22 +01:00
snapshotCreateTimeout = flag . Duration ( "snapshotCreateTimeout" , 0 , "The timeout for creating new snapshot. If set, make sure that timeout is lower than backup period" )
2019-10-31 15:16:53 +01:00
2021-01-07 23:09:00 +01:00
finalMergeDelay = flag . Duration ( "finalMergeDelay" , 0 , "The delay before starting final merge for per-month partition after no new data is ingested into it. " +
"Final merge may require additional disk IO and CPU resources. Final merge may increase query speed and reduce disk space usage in some cases. " +
"Zero value disables final merge" )
2023-04-14 05:33:33 +02:00
_ = flag . Int ( "bigMergeConcurrency" , 0 , "Deprecated: this flag does nothing. Please use -smallMergeConcurrency " +
"for controlling the concurrency of background merges. See https://docs.victoriametrics.com/#storage" )
smallMergeConcurrency = flag . Int ( "smallMergeConcurrency" , 0 , "The maximum number of workers for background merges. See https://docs.victoriametrics.com/#storage . " +
"It isn't recommended tuning this flag in general case, since this may lead to uncontrolled increase in the number of parts and increased CPU usage during queries" )
2022-05-25 14:57:01 +02:00
retentionTimezoneOffset = flag . Duration ( "retentionTimezoneOffset" , 0 , "The offset for performing indexdb rotation. " +
"If set to 0, then the indexdb rotation is performed at 4am UTC time per each -retentionPeriod. " +
"If set to 2h, then the indexdb rotation is performed at 4am EET time (the timezone with +2h offset)" )
minScrapeInterval = flag . Duration ( "dedup.minScrapeInterval" , 0 , "Leave only the last sample in every time series per each discrete interval " +
2021-07-02 14:02:24 +02:00
"equal to -dedup.minScrapeInterval > 0. See https://docs.victoriametrics.com/#deduplication for details" )
2021-03-15 21:38:50 +01:00
logNewSeries = flag . Bool ( "logNewSeries" , false , "Whether to log new series. This option is for debug purposes only. It can lead to performance issues " +
"when big number of new series are ingested into VictoriaMetrics" )
2021-05-20 13:15:19 +02:00
maxHourlySeries = flag . Int ( "storage.maxHourlySeries" , 0 , "The maximum number of unique series can be added to the storage during the last hour. " +
2022-08-24 12:41:53 +02:00
"Excess series are logged and dropped. This can be useful for limiting series cardinality. See https://docs.victoriametrics.com/#cardinality-limiter . " +
"See also -storage.maxDailySeries" )
2021-05-20 13:15:19 +02:00
maxDailySeries = flag . Int ( "storage.maxDailySeries" , 0 , "The maximum number of unique series can be added to the storage during the last 24 hours. " +
2022-08-24 12:41:53 +02:00
"Excess series are logged and dropped. This can be useful for limiting series churn rate. See https://docs.victoriametrics.com/#cardinality-limiter . " +
"See also -storage.maxHourlySeries" )
2021-10-08 11:52:56 +02:00
2021-10-08 12:52:56 +02:00
minFreeDiskSpaceBytes = flagutil . NewBytes ( "storage.minFreeDiskSpaceBytes" , 10e6 , "The minimum free disk space at -storageDataPath after which the storage stops accepting new data" )
2022-02-21 12:50:34 +01:00
2022-10-23 11:15:24 +02:00
cacheSizeStorageTSID = flagutil . NewBytes ( "storage.cacheSizeStorageTSID" , 0 , "Overrides max size for storage/tsid cache. " +
"See https://docs.victoriametrics.com/Single-server-VictoriaMetrics.html#cache-tuning" )
cacheSizeIndexDBIndexBlocks = flagutil . NewBytes ( "storage.cacheSizeIndexDBIndexBlocks" , 0 , "Overrides max size for indexdb/indexBlocks cache. " +
"See https://docs.victoriametrics.com/Single-server-VictoriaMetrics.html#cache-tuning" )
cacheSizeIndexDBDataBlocks = flagutil . NewBytes ( "storage.cacheSizeIndexDBDataBlocks" , 0 , "Overrides max size for indexdb/dataBlocks cache. " +
"See https://docs.victoriametrics.com/Single-server-VictoriaMetrics.html#cache-tuning" )
cacheSizeIndexDBTagFilters = flagutil . NewBytes ( "storage.cacheSizeIndexDBTagFilters" , 0 , "Overrides max size for indexdb/tagFiltersToMetricIDs cache. " +
"See https://docs.victoriametrics.com/Single-server-VictoriaMetrics.html#cache-tuning" )
2019-05-22 23:16:55 +02:00
)
2019-05-22 23:23:23 +02:00
func main ( ) {
2020-05-16 10:59:30 +02:00
// Write flags and help message to stdout, since it is easier to grep or pipe.
flag . CommandLine . SetOutput ( os . Stdout )
2020-12-03 20:40:30 +01:00
flag . Usage = usage
2020-02-10 12:26:18 +01:00
envflag . Parse ( )
2019-05-22 23:23:23 +02:00
buildinfo . Init ( )
logger . Init ( )
2022-07-22 12:35:58 +02:00
pushmetrics . Init ( )
2019-05-22 23:23:23 +02:00
2021-12-14 19:49:08 +01:00
storage . SetDedupInterval ( * minScrapeInterval )
2021-03-15 21:38:50 +01:00
storage . SetLogNewSeries ( * logNewSeries )
2020-10-07 16:35:42 +02:00
storage . SetFinalMergeDelay ( * finalMergeDelay )
2022-12-06 00:27:57 +01:00
storage . SetMergeWorkersCount ( * smallMergeConcurrency )
2022-05-25 14:57:01 +02:00
storage . SetRetentionTimezoneOffset ( * retentionTimezoneOffset )
2021-10-08 12:52:56 +02:00
storage . SetFreeDiskSpaceLimit ( minFreeDiskSpaceBytes . N )
2022-12-15 04:26:24 +01:00
storage . SetTSIDCacheSize ( cacheSizeStorageTSID . IntN ( ) )
storage . SetTagFiltersCacheSize ( cacheSizeIndexDBTagFilters . IntN ( ) )
mergeset . SetIndexBlocksCacheSize ( cacheSizeIndexDBIndexBlocks . IntN ( ) )
mergeset . SetDataBlocksCacheSize ( cacheSizeIndexDBDataBlocks . IntN ( ) )
2019-10-31 15:16:53 +01:00
2023-09-01 09:27:51 +02:00
if retentionPeriod . Duration ( ) < 24 * time . Hour {
2022-05-06 23:51:24 +02:00
logger . Fatalf ( "-retentionPeriod cannot be smaller than a day; got %s" , retentionPeriod )
}
2020-10-20 13:29:26 +02:00
logger . Infof ( "opening storage at %q with -retentionPeriod=%s" , * storageDataPath , retentionPeriod )
2019-05-22 23:16:55 +02:00
startTime := time . Now ( )
2023-09-01 09:27:51 +02:00
strg := storage . MustOpenStorage ( * storageDataPath , retentionPeriod . Duration ( ) , * maxHourlySeries , * maxDailySeries )
2022-05-02 10:00:15 +02:00
initStaleSnapshotsRemover ( strg )
2019-05-22 23:16:55 +02:00
var m storage . Metrics
2019-05-22 23:23:23 +02:00
strg . UpdateMetrics ( & m )
2019-05-22 23:16:55 +02:00
tm := & m . TableMetrics
partsCount := tm . SmallPartsCount + tm . BigPartsCount
blocksCount := tm . SmallBlocksCount + tm . BigBlocksCount
rowsCount := tm . SmallRowsCount + tm . BigRowsCount
2019-07-04 18:09:40 +02:00
sizeBytes := tm . SmallSizeBytes + tm . BigSizeBytes
2020-01-22 17:27:44 +01:00
logger . Infof ( "successfully opened storage %q in %.3f seconds; partsCount: %d; blocksCount: %d; rowsCount: %d; sizeBytes: %d" ,
* storageDataPath , time . Since ( startTime ) . Seconds ( ) , partsCount , blocksCount , rowsCount , sizeBytes )
2019-05-22 23:16:55 +02:00
2019-05-22 23:23:23 +02:00
registerStorageMetrics ( strg )
2019-05-22 23:16:55 +02:00
2021-05-08 16:55:44 +02:00
common . StartUnmarshalWorkers ( )
2022-06-28 13:04:14 +02:00
vminsertSrv , err := servers . NewVMInsertServer ( * vminsertAddr , strg )
2019-05-22 23:23:23 +02:00
if err != nil {
2022-06-23 18:19:36 +02:00
logger . Fatalf ( "cannot create a server with -vminsertAddr=%s: %s" , * vminsertAddr , err )
}
2022-06-28 13:04:14 +02:00
vmselectSrv , err := servers . NewVMSelectServer ( * vmselectAddr , strg )
2022-06-23 18:19:36 +02:00
if err != nil {
logger . Fatalf ( "cannot create a server with -vmselectAddr=%s: %s" , * vmselectAddr , err )
2019-05-22 23:23:23 +02:00
}
2019-05-22 23:16:55 +02:00
2019-05-22 23:23:23 +02:00
requestHandler := newRequestHandler ( strg )
go func ( ) {
2023-01-27 08:08:35 +01:00
httpserver . Serve ( * httpListenAddr , * useProxyProtocol , requestHandler )
2019-05-22 23:23:23 +02:00
} ( )
2019-05-22 23:16:55 +02:00
2019-05-22 23:23:23 +02:00
sig := procutil . WaitForSigterm ( )
logger . Infof ( "service received signal %s" , sig )
2019-05-22 23:16:55 +02:00
app/vmstorage: add missing shutdown for http server on graceful shutdown
This could result in the following panic during graceful shutdown when `/metrics` page is requested:
http: panic serving 10.101.66.5:57366: runtime error: invalid memory address or nil pointer dereference
goroutine 2050 [running]:
net/http.(*conn).serve.func1(0xc00ef22000)
net/http/server.go:1772 +0x139
panic(0xa0fc00, 0xe91d80)
runtime/panic.go:973 +0x3e3
github.com/VictoriaMetrics/VictoriaMetrics/lib/workingsetcache.(*Cache).UpdateStats(0x0, 0xc0000516c8)
github.com/VictoriaMetrics/VictoriaMetrics/lib/workingsetcache/cache.go:224 +0x37
github.com/VictoriaMetrics/VictoriaMetrics/lib/storage.(*indexDB).UpdateMetrics(0xc00b931d00, 0xc02c41acf8)
github.com/VictoriaMetrics/VictoriaMetrics/lib/storage/index_db.go:258 +0x9f
github.com/VictoriaMetrics/VictoriaMetrics/lib/storage.(*Storage).UpdateMetrics(0xc0000bc7e0, 0xc02c41ac00)
github.com/VictoriaMetrics/VictoriaMetrics/lib/storage/storage.go:413 +0x4c5
main.registerStorageMetrics.func1(0x0)
github.com/VictoriaMetrics/VictoriaMetrics/app/vmstorage/main.go:186 +0xd9
main.registerStorageMetrics.func3(0xc00008c380)
github.com/VictoriaMetrics/VictoriaMetrics/app/vmstorage/main.go:196 +0x26
main.registerStorageMetrics.func7(0xc)
github.com/VictoriaMetrics/VictoriaMetrics/app/vmstorage/main.go:211 +0x26
github.com/VictoriaMetrics/metrics.(*Gauge).marshalTo(0xc000010148, 0xaa407d, 0x20, 0xb50d60, 0xc005319890)
github.com/VictoriaMetrics/metrics@v1.11.2/gauge.go:38 +0x3f
github.com/VictoriaMetrics/metrics.(*Set).WritePrometheus(0xc000084300, 0x7fd56809c940, 0xc005319860)
github.com/VictoriaMetrics/metrics@v1.11.2/set.go:51 +0x1e1
github.com/VictoriaMetrics/metrics.WritePrometheus(0x7fd56809c940, 0xc005319860, 0xa16f01)
github.com/VictoriaMetrics/metrics@v1.11.2/metrics.go:42 +0x41
github.com/VictoriaMetrics/VictoriaMetrics/lib/httpserver.writePrometheusMetrics(0x7fd56809c940, 0xc005319860)
github.com/VictoriaMetrics/VictoriaMetrics/lib/httpserver/metrics.go:16 +0x44
github.com/VictoriaMetrics/VictoriaMetrics/lib/httpserver.handlerWrapper(0xb5a120, 0xc005319860, 0xc005018f00, 0xc00002cc90)
github.com/VictoriaMetrics/VictoriaMetrics/lib/httpserver/httpserver.go:154 +0x58d
github.com/VictoriaMetrics/VictoriaMetrics/lib/httpserver.gzipHandler.func1(0xb5a120, 0xc005319860, 0xc005018f00)
github.com/VictoriaMetrics/VictoriaMetrics/lib/httpserver/httpserver.go:119 +0x8e
net/http.HandlerFunc.ServeHTTP(0xc00002d110, 0xb5a660, 0xc0044141c0, 0xc005018f00)
net/http/server.go:2012 +0x44
net/http.serverHandler.ServeHTTP(0xc004414000, 0xb5a660, 0xc0044141c0, 0xc005018f00)
net/http/server.go:2807 +0xa3
net/http.(*conn).serve(0xc00ef22000, 0xb5bf60, 0xc010532080)
net/http/server.go:1895 +0x86c
created by net/http.(*Server).Serve
net/http/server.go:2933 +0x35c
2020-04-02 20:07:59 +02:00
logger . Infof ( "gracefully shutting down http service at %q" , * httpListenAddr )
startTime = time . Now ( )
if err := httpserver . Stop ( * httpListenAddr ) ; err != nil {
logger . Fatalf ( "cannot stop http service: %s" , err )
}
logger . Infof ( "successfully shut down http service in %.3f seconds" , time . Since ( startTime ) . Seconds ( ) )
2019-05-22 23:23:23 +02:00
logger . Infof ( "gracefully shutting down the service" )
startTime = time . Now ( )
2022-05-02 10:00:15 +02:00
stopStaleSnapshotsRemover ( )
2022-07-05 23:41:49 +02:00
vmselectSrv . MustStop ( )
vminsertSrv . MustStop ( )
2021-05-08 16:55:44 +02:00
common . StopUnmarshalWorkers ( )
2020-01-22 17:27:44 +01:00
logger . Infof ( "successfully shut down the service in %.3f seconds" , time . Since ( startTime ) . Seconds ( ) )
2019-05-22 23:16:55 +02:00
2019-05-22 23:23:23 +02:00
logger . Infof ( "gracefully closing the storage at %s" , * storageDataPath )
startTime = time . Now ( )
strg . MustClose ( )
2020-01-22 17:27:44 +01:00
logger . Infof ( "successfully closed the storage in %.3f seconds" , time . Since ( startTime ) . Seconds ( ) )
2019-05-22 23:16:55 +02:00
2019-11-12 15:29:43 +01:00
fs . MustStopDirRemover ( )
2019-05-22 23:23:23 +02:00
logger . Infof ( "the vmstorage has been stopped" )
2019-05-22 23:16:55 +02:00
}
2019-05-22 23:23:23 +02:00
func newRequestHandler ( strg * storage . Storage ) httpserver . RequestHandler {
return func ( w http . ResponseWriter , r * http . Request ) bool {
2020-12-14 13:02:57 +01:00
if r . URL . Path == "/" {
2023-02-23 03:58:44 +01:00
if r . Method != http . MethodGet {
2021-04-02 21:54:06 +02:00
return false
}
2023-06-01 10:26:52 +02:00
w . Header ( ) . Add ( "Content-Type" , "text/html; charset=utf-8" )
fmt . Fprintf ( w , ` vmstorage - a component of VictoriaMetrics cluster < br / >
< a href = "https://docs.victoriametrics.com/Cluster-VictoriaMetrics.html" > docs < / a > < br >
` )
2020-10-06 14:00:38 +02:00
return true
}
2019-05-22 23:23:23 +02:00
return requestHandler ( w , r , strg )
2019-05-22 23:16:55 +02:00
}
2019-05-22 23:23:23 +02:00
}
func requestHandler ( w http . ResponseWriter , r * http . Request , strg * storage . Storage ) bool {
path := r . URL . Path
2020-09-17 11:01:53 +02:00
if path == "/internal/force_merge" {
2023-01-11 00:51:55 +01:00
if ! httpserver . CheckAuthFlag ( w , r , * forceMergeAuthKey , "forceMergeAuthKey" ) {
2020-09-17 13:21:39 +02:00
return true
}
2020-09-17 11:01:53 +02:00
// Run force merge in background
partitionNamePrefix := r . FormValue ( "partition_prefix" )
go func ( ) {
activeForceMerges . Inc ( )
defer activeForceMerges . Dec ( )
logger . Infof ( "forced merge for partition_prefix=%q has been started" , partitionNamePrefix )
startTime := time . Now ( )
if err := strg . ForceMergePartitions ( partitionNamePrefix ) ; err != nil {
logger . Errorf ( "error in forced merge for partition_prefix=%q: %s" , partitionNamePrefix , err )
return
}
logger . Infof ( "forced merge for partition_prefix=%q has been successfully finished in %.3f seconds" , partitionNamePrefix , time . Since ( startTime ) . Seconds ( ) )
} ( )
return true
}
2020-11-11 13:40:27 +01:00
if path == "/internal/force_flush" {
2023-01-11 00:51:55 +01:00
if ! httpserver . CheckAuthFlag ( w , r , * forceFlushAuthKey , "forceFlushAuthKey" ) {
2020-11-11 13:40:27 +01:00
return true
}
logger . Infof ( "flushing storage to make pending data available for reading" )
strg . DebugFlush ( )
return true
}
2019-05-22 23:16:55 +02:00
if ! strings . HasPrefix ( path , "/snapshot" ) {
return false
}
2023-01-11 00:51:55 +01:00
if ! httpserver . CheckAuthFlag ( w , r , * snapshotAuthKey , "snapshotAuthKey" ) {
2019-05-22 23:16:55 +02:00
return true
}
path = path [ len ( "/snapshot" ) : ]
switch path {
case "/create" :
2023-02-27 21:12:03 +01:00
snapshotsCreateTotal . Inc ( )
2021-11-09 17:03:50 +01:00
w . Header ( ) . Set ( "Content-Type" , "application/json" )
2023-02-27 21:12:03 +01:00
deadline := uint64 ( 0 )
2023-02-27 21:57:22 +01:00
if * snapshotCreateTimeout > 0 {
deadline = fasttime . UnixTimestamp ( ) + uint64 ( snapshotCreateTimeout . Seconds ( ) )
2023-02-27 21:12:03 +01:00
}
snapshotPath , err := strg . CreateSnapshot ( deadline )
2019-05-22 23:16:55 +02:00
if err != nil {
2020-06-30 21:58:18 +02:00
err = fmt . Errorf ( "cannot create snapshot: %w" , err )
2020-03-10 22:51:50 +01:00
jsonResponseError ( w , err )
2023-02-27 21:12:03 +01:00
snapshotsCreateErrorsTotal . Inc ( )
2019-05-22 23:16:55 +02:00
return true
}
2019-05-22 23:23:23 +02:00
fmt . Fprintf ( w , ` { "status":"ok","snapshot":%q} ` , snapshotPath )
2019-05-22 23:16:55 +02:00
return true
case "/list" :
2023-02-27 21:12:03 +01:00
snapshotsListTotal . Inc ( )
2021-11-09 17:03:50 +01:00
w . Header ( ) . Set ( "Content-Type" , "application/json" )
2019-05-22 23:23:23 +02:00
snapshots , err := strg . ListSnapshots ( )
2019-05-22 23:16:55 +02:00
if err != nil {
2020-06-30 21:58:18 +02:00
err = fmt . Errorf ( "cannot list snapshots: %w" , err )
2020-03-10 22:51:50 +01:00
jsonResponseError ( w , err )
2023-02-27 21:12:03 +01:00
snapshotsListErrorsTotal . Inc ( )
2019-05-22 23:16:55 +02:00
return true
}
fmt . Fprintf ( w , ` { "status":"ok","snapshots":[ ` )
if len ( snapshots ) > 0 {
for _ , snapshot := range snapshots [ : len ( snapshots ) - 1 ] {
fmt . Fprintf ( w , "\n%q," , snapshot )
}
fmt . Fprintf ( w , "\n%q\n" , snapshots [ len ( snapshots ) - 1 ] )
}
fmt . Fprintf ( w , ` ]} ` )
return true
case "/delete" :
2023-02-27 21:12:03 +01:00
snapshotsDeleteTotal . Inc ( )
2021-11-09 17:03:50 +01:00
w . Header ( ) . Set ( "Content-Type" , "application/json" )
2019-05-22 23:16:55 +02:00
snapshotName := r . FormValue ( "snapshot" )
2022-11-17 00:29:43 +01:00
snapshots , err := strg . ListSnapshots ( )
if err != nil {
err = fmt . Errorf ( "cannot list snapshots: %w" , err )
2020-03-10 22:51:50 +01:00
jsonResponseError ( w , err )
2023-02-27 21:12:03 +01:00
snapshotsDeleteErrorsTotal . Inc ( )
2019-05-22 23:16:55 +02:00
return true
}
2022-11-17 00:29:43 +01:00
for _ , snName := range snapshots {
if snName == snapshotName {
if err := strg . DeleteSnapshot ( snName ) ; err != nil {
err = fmt . Errorf ( "cannot delete snapshot %q: %w" , snName , err )
jsonResponseError ( w , err )
2023-02-27 21:12:03 +01:00
snapshotsDeleteErrorsTotal . Inc ( )
2022-11-17 00:29:43 +01:00
return true
}
fmt . Fprintf ( w , ` { "status":"ok"} ` )
return true
}
}
2023-02-27 21:57:22 +01:00
err = fmt . Errorf ( "cannot find snapshot %q" , snapshotName )
2022-11-17 00:29:43 +01:00
jsonResponseError ( w , err )
2019-05-22 23:16:55 +02:00
return true
case "/delete_all" :
2023-02-27 21:12:03 +01:00
snapshotsDeleteAllTotal . Inc ( )
2021-11-09 17:03:50 +01:00
w . Header ( ) . Set ( "Content-Type" , "application/json" )
2019-05-22 23:23:23 +02:00
snapshots , err := strg . ListSnapshots ( )
2019-05-22 23:16:55 +02:00
if err != nil {
2020-06-30 21:58:18 +02:00
err = fmt . Errorf ( "cannot list snapshots: %w" , err )
2020-03-10 22:51:50 +01:00
jsonResponseError ( w , err )
2023-02-27 21:12:03 +01:00
snapshotsDeleteAllErrorsTotal . Inc ( )
2019-05-22 23:16:55 +02:00
return true
}
for _ , snapshotName := range snapshots {
2019-05-22 23:23:23 +02:00
if err := strg . DeleteSnapshot ( snapshotName ) ; err != nil {
2020-06-30 21:58:18 +02:00
err = fmt . Errorf ( "cannot delete snapshot %q: %w" , snapshotName , err )
2020-03-10 22:51:50 +01:00
jsonResponseError ( w , err )
2023-02-27 21:12:03 +01:00
snapshotsDeleteAllErrorsTotal . Inc ( )
2019-05-22 23:16:55 +02:00
return true
}
}
fmt . Fprintf ( w , ` { "status":"ok"} ` )
return true
default :
return false
}
}
2022-05-02 10:00:15 +02:00
func initStaleSnapshotsRemover ( strg * storage . Storage ) {
staleSnapshotsRemoverCh = make ( chan struct { } )
2023-09-01 09:27:51 +02:00
if snapshotsMaxAge . Duration ( ) <= 0 {
2022-05-02 10:00:15 +02:00
return
}
2023-09-01 09:27:51 +02:00
snapshotsMaxAgeDur := snapshotsMaxAge . Duration ( )
2022-05-02 10:00:15 +02:00
staleSnapshotsRemoverWG . Add ( 1 )
go func ( ) {
defer staleSnapshotsRemoverWG . Done ( )
t := time . NewTicker ( 11 * time . Second )
defer t . Stop ( )
for {
select {
case <- staleSnapshotsRemoverCh :
return
case <- t . C :
}
2022-05-05 18:43:21 +02:00
if err := strg . DeleteStaleSnapshots ( snapshotsMaxAgeDur ) ; err != nil {
2022-05-02 10:00:15 +02:00
// Use logger.Errorf instead of logger.Fatalf in the hope the error is temporary.
logger . Errorf ( "cannot delete stale snapshots: %s" , err )
}
}
} ( )
}
func stopStaleSnapshotsRemover ( ) {
close ( staleSnapshotsRemoverCh )
staleSnapshotsRemoverWG . Wait ( )
}
var (
staleSnapshotsRemoverCh chan struct { }
staleSnapshotsRemoverWG sync . WaitGroup
)
2023-02-27 21:12:03 +01:00
var (
2023-02-27 21:57:22 +01:00
activeForceMerges = metrics . NewCounter ( "vm_active_force_merges" )
2023-02-27 21:12:03 +01:00
snapshotsCreateTotal = metrics . NewCounter ( ` vm_http_requests_total { path="/snapshot/create"} ` )
snapshotsCreateErrorsTotal = metrics . NewCounter ( ` vm_http_request_errors_total { path="/snapshot/create"} ` )
snapshotsListTotal = metrics . NewCounter ( ` vm_http_requests_total { path="/snapshot/list"} ` )
snapshotsListErrorsTotal = metrics . NewCounter ( ` vm_http_request_errors_total { path="/snapshot/list"} ` )
snapshotsDeleteTotal = metrics . NewCounter ( ` vm_http_requests_total { path="/snapshot/delete"} ` )
snapshotsDeleteErrorsTotal = metrics . NewCounter ( ` vm_http_request_errors_total { path="/snapshot/delete"} ` )
snapshotsDeleteAllTotal = metrics . NewCounter ( ` vm_http_requests_total { path="/snapshot/delete_all"} ` )
snapshotsDeleteAllErrorsTotal = metrics . NewCounter ( ` vm_http_request_errors_total { path="/snapshot/delete_all"} ` )
)
2020-09-17 11:01:53 +02:00
2019-05-22 23:16:55 +02:00
func registerStorageMetrics ( strg * storage . Storage ) {
mCache := & storage . Metrics { }
var mCacheLock sync . Mutex
var lastUpdateTime time . Time
m := func ( ) * storage . Metrics {
mCacheLock . Lock ( )
defer mCacheLock . Unlock ( )
if time . Since ( lastUpdateTime ) < time . Second {
return mCache
}
var mc storage . Metrics
strg . UpdateMetrics ( & mc )
mCache = & mc
lastUpdateTime = time . Now ( )
return mCache
}
tm := func ( ) * storage . TableMetrics {
sm := m ( )
return & sm . TableMetrics
}
idbm := func ( ) * storage . IndexDBMetrics {
sm := m ( )
return & sm . IndexDBMetrics
}
2020-04-01 22:43:09 +02:00
metrics . NewGauge ( fmt . Sprintf ( ` vm_free_disk_space_bytes { path=%q} ` , * storageDataPath ) , func ( ) float64 {
return float64 ( fs . MustGetFreeSpace ( * storageDataPath ) )
2020-04-01 22:08:58 +02:00
} )
2021-10-08 11:52:56 +02:00
metrics . NewGauge ( fmt . Sprintf ( ` vm_free_disk_space_limit_bytes { path=%q} ` , * storageDataPath ) , func ( ) float64 {
2021-10-08 12:52:56 +02:00
return float64 ( minFreeDiskSpaceBytes . N )
2021-10-08 11:52:56 +02:00
} )
2021-10-08 12:52:56 +02:00
metrics . NewGauge ( fmt . Sprintf ( ` vm_storage_is_read_only { path=%q} ` , * storageDataPath ) , func ( ) float64 {
2021-10-08 11:52:56 +02:00
if strg . IsReadOnly ( ) {
return 1
}
return 0
} )
2022-12-06 00:27:57 +01:00
metrics . NewGauge ( ` vm_active_merges { type="storage/inmemory"} ` , func ( ) float64 {
return float64 ( tm ( ) . ActiveInmemoryMerges )
2019-05-22 23:16:55 +02:00
} )
metrics . NewGauge ( ` vm_active_merges { type="storage/small"} ` , func ( ) float64 {
return float64 ( tm ( ) . ActiveSmallMerges )
} )
2022-12-06 00:27:57 +01:00
metrics . NewGauge ( ` vm_active_merges { type="storage/big"} ` , func ( ) float64 {
return float64 ( tm ( ) . ActiveBigMerges )
} )
metrics . NewGauge ( ` vm_active_merges { type="indexdb/inmemory"} ` , func ( ) float64 {
return float64 ( idbm ( ) . ActiveInmemoryMerges )
} )
metrics . NewGauge ( ` vm_active_merges { type="indexdb/file"} ` , func ( ) float64 {
return float64 ( idbm ( ) . ActiveFileMerges )
2019-05-22 23:16:55 +02:00
} )
2022-12-06 00:27:57 +01:00
metrics . NewGauge ( ` vm_merges_total { type="storage/inmemory"} ` , func ( ) float64 {
return float64 ( tm ( ) . InmemoryMergesCount )
2019-05-22 23:16:55 +02:00
} )
metrics . NewGauge ( ` vm_merges_total { type="storage/small"} ` , func ( ) float64 {
return float64 ( tm ( ) . SmallMergesCount )
} )
2022-12-06 00:27:57 +01:00
metrics . NewGauge ( ` vm_merges_total { type="storage/big"} ` , func ( ) float64 {
return float64 ( tm ( ) . BigMergesCount )
} )
metrics . NewGauge ( ` vm_merges_total { type="indexdb/inmemory"} ` , func ( ) float64 {
return float64 ( idbm ( ) . InmemoryMergesCount )
} )
metrics . NewGauge ( ` vm_merges_total { type="indexdb/file"} ` , func ( ) float64 {
return float64 ( idbm ( ) . FileMergesCount )
2019-05-22 23:16:55 +02:00
} )
2022-12-06 00:27:57 +01:00
metrics . NewGauge ( ` vm_rows_merged_total { type="storage/inmemory"} ` , func ( ) float64 {
return float64 ( tm ( ) . InmemoryRowsMerged )
2019-05-22 23:16:55 +02:00
} )
metrics . NewGauge ( ` vm_rows_merged_total { type="storage/small"} ` , func ( ) float64 {
return float64 ( tm ( ) . SmallRowsMerged )
} )
2022-12-06 00:27:57 +01:00
metrics . NewGauge ( ` vm_rows_merged_total { type="storage/big"} ` , func ( ) float64 {
return float64 ( tm ( ) . BigRowsMerged )
} )
metrics . NewGauge ( ` vm_rows_merged_total { type="indexdb/inmemory"} ` , func ( ) float64 {
return float64 ( idbm ( ) . InmemoryItemsMerged )
} )
metrics . NewGauge ( ` vm_rows_merged_total { type="indexdb/file"} ` , func ( ) float64 {
return float64 ( idbm ( ) . FileItemsMerged )
2019-05-22 23:16:55 +02:00
} )
2022-12-06 00:27:57 +01:00
metrics . NewGauge ( ` vm_rows_deleted_total { type="storage/inmemory"} ` , func ( ) float64 {
return float64 ( tm ( ) . InmemoryRowsDeleted )
2019-05-22 23:16:55 +02:00
} )
metrics . NewGauge ( ` vm_rows_deleted_total { type="storage/small"} ` , func ( ) float64 {
return float64 ( tm ( ) . SmallRowsDeleted )
} )
2022-12-06 00:27:57 +01:00
metrics . NewGauge ( ` vm_rows_deleted_total { type="storage/big"} ` , func ( ) float64 {
return float64 ( tm ( ) . BigRowsDeleted )
} )
2019-05-22 23:16:55 +02:00
2022-12-06 00:27:57 +01:00
metrics . NewGauge ( ` vm_part_references { type="storage/inmemory"} ` , func ( ) float64 {
return float64 ( tm ( ) . InmemoryPartsRefCount )
2019-05-22 23:16:55 +02:00
} )
2022-12-06 00:27:57 +01:00
metrics . NewGauge ( ` vm_part_references { type="storage/small"} ` , func ( ) float64 {
2019-05-22 23:16:55 +02:00
return float64 ( tm ( ) . SmallPartsRefCount )
} )
2022-12-06 00:27:57 +01:00
metrics . NewGauge ( ` vm_part_references { type="storage/big"} ` , func ( ) float64 {
return float64 ( tm ( ) . BigPartsRefCount )
} )
metrics . NewGauge ( ` vm_partition_references { type="storage"} ` , func ( ) float64 {
2019-05-22 23:16:55 +02:00
return float64 ( tm ( ) . PartitionsRefCount )
} )
2022-12-06 00:27:57 +01:00
metrics . NewGauge ( ` vm_object_references { type="indexdb"} ` , func ( ) float64 {
2019-05-22 23:16:55 +02:00
return float64 ( idbm ( ) . IndexDBRefCount )
} )
2022-12-06 00:27:57 +01:00
metrics . NewGauge ( ` vm_part_references { type="indexdb"} ` , func ( ) float64 {
2019-05-22 23:16:55 +02:00
return float64 ( idbm ( ) . PartsRefCount )
} )
metrics . NewGauge ( ` vm_missing_tsids_for_metric_id_total ` , func ( ) float64 {
return float64 ( idbm ( ) . MissingTSIDsForMetricID )
} )
2019-11-06 13:24:48 +01:00
metrics . NewGauge ( ` vm_index_blocks_with_metric_ids_processed_total ` , func ( ) float64 {
return float64 ( idbm ( ) . IndexBlocksWithMetricIDsProcessed )
} )
metrics . NewGauge ( ` vm_index_blocks_with_metric_ids_incorrect_order_total ` , func ( ) float64 {
return float64 ( idbm ( ) . IndexBlocksWithMetricIDsIncorrectOrder )
} )
2021-02-10 15:53:26 +01:00
metrics . NewGauge ( ` vm_composite_index_min_timestamp ` , func ( ) float64 {
return float64 ( idbm ( ) . MinTimestampForCompositeIndex ) / 1e3
} )
2021-02-17 18:13:38 +01:00
metrics . NewGauge ( ` vm_composite_filter_success_conversions_total ` , func ( ) float64 {
return float64 ( idbm ( ) . CompositeFilterSuccessConversions )
} )
metrics . NewGauge ( ` vm_composite_filter_missing_conversions_total ` , func ( ) float64 {
return float64 ( idbm ( ) . CompositeFilterMissingConversions )
} )
2019-05-22 23:16:55 +02:00
2022-12-06 00:27:57 +01:00
metrics . NewGauge ( ` vm_assisted_merges_total { type="storage/inmemory"} ` , func ( ) float64 {
return float64 ( tm ( ) . InmemoryAssistedMerges )
2019-05-22 23:16:55 +02:00
} )
2022-12-13 01:49:21 +01:00
metrics . NewGauge ( ` vm_assisted_merges_total { type="storage/small"} ` , func ( ) float64 {
return float64 ( tm ( ) . SmallAssistedMerges )
} )
2022-12-06 00:27:57 +01:00
metrics . NewGauge ( ` vm_assisted_merges_total { type="indexdb/inmemory"} ` , func ( ) float64 {
2022-12-13 01:49:21 +01:00
return float64 ( idbm ( ) . InmemoryAssistedMerges )
} )
metrics . NewGauge ( ` vm_assisted_merges_total { type="indexdb/file"} ` , func ( ) float64 {
return float64 ( idbm ( ) . FileAssistedMerges )
2019-05-22 23:16:55 +02:00
} )
2022-04-21 12:18:05 +02:00
metrics . NewGauge ( ` vm_indexdb_items_added_total ` , func ( ) float64 {
return float64 ( idbm ( ) . ItemsAdded )
} )
metrics . NewGauge ( ` vm_indexdb_items_added_size_bytes_total ` , func ( ) float64 {
return float64 ( idbm ( ) . ItemsAddedSizeBytes )
} )
2020-09-29 20:47:40 +02:00
// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/686
2022-12-06 00:27:57 +01:00
metrics . NewGauge ( ` vm_merge_need_free_disk_space ` , func ( ) float64 {
return float64 ( tm ( ) . MergeNeedFreeDiskSpace )
2020-09-29 20:47:40 +02:00
} )
2019-05-22 23:16:55 +02:00
metrics . NewGauge ( ` vm_pending_rows { type="storage"} ` , func ( ) float64 {
return float64 ( tm ( ) . PendingRows )
} )
metrics . NewGauge ( ` vm_pending_rows { type="indexdb"} ` , func ( ) float64 {
return float64 ( idbm ( ) . PendingItems )
} )
2022-12-06 00:27:57 +01:00
metrics . NewGauge ( ` vm_parts { type="storage/inmemory"} ` , func ( ) float64 {
return float64 ( tm ( ) . InmemoryPartsCount )
2019-05-22 23:16:55 +02:00
} )
metrics . NewGauge ( ` vm_parts { type="storage/small"} ` , func ( ) float64 {
return float64 ( tm ( ) . SmallPartsCount )
} )
2022-12-06 00:27:57 +01:00
metrics . NewGauge ( ` vm_parts { type="storage/big"} ` , func ( ) float64 {
return float64 ( tm ( ) . BigPartsCount )
} )
metrics . NewGauge ( ` vm_parts { type="indexdb/inmemory"} ` , func ( ) float64 {
return float64 ( idbm ( ) . InmemoryPartsCount )
} )
metrics . NewGauge ( ` vm_parts { type="indexdb/file"} ` , func ( ) float64 {
return float64 ( idbm ( ) . FilePartsCount )
2019-05-22 23:16:55 +02:00
} )
2022-12-06 00:27:57 +01:00
metrics . NewGauge ( ` vm_blocks { type="storage/inmemory"} ` , func ( ) float64 {
return float64 ( tm ( ) . InmemoryBlocksCount )
2019-05-22 23:16:55 +02:00
} )
metrics . NewGauge ( ` vm_blocks { type="storage/small"} ` , func ( ) float64 {
return float64 ( tm ( ) . SmallBlocksCount )
} )
2022-12-06 00:27:57 +01:00
metrics . NewGauge ( ` vm_blocks { type="storage/big"} ` , func ( ) float64 {
return float64 ( tm ( ) . BigBlocksCount )
} )
metrics . NewGauge ( ` vm_blocks { type="indexdb/inmemory"} ` , func ( ) float64 {
return float64 ( idbm ( ) . InmemoryBlocksCount )
} )
metrics . NewGauge ( ` vm_blocks { type="indexdb/file"} ` , func ( ) float64 {
return float64 ( idbm ( ) . FileBlocksCount )
2019-05-22 23:16:55 +02:00
} )
2022-12-06 00:27:57 +01:00
metrics . NewGauge ( ` vm_data_size_bytes { type="storage/inmemory"} ` , func ( ) float64 {
return float64 ( tm ( ) . InmemorySizeBytes )
2019-07-04 18:09:40 +02:00
} )
metrics . NewGauge ( ` vm_data_size_bytes { type="storage/small"} ` , func ( ) float64 {
return float64 ( tm ( ) . SmallSizeBytes )
} )
2022-12-06 00:27:57 +01:00
metrics . NewGauge ( ` vm_data_size_bytes { type="storage/big"} ` , func ( ) float64 {
return float64 ( tm ( ) . BigSizeBytes )
} )
metrics . NewGauge ( ` vm_data_size_bytes { type="indexdb/inmemory"} ` , func ( ) float64 {
return float64 ( idbm ( ) . InmemorySizeBytes )
} )
metrics . NewGauge ( ` vm_data_size_bytes { type="indexdb/file"} ` , func ( ) float64 {
return float64 ( idbm ( ) . FileSizeBytes )
2019-07-04 18:09:40 +02:00
} )
2020-10-09 12:35:48 +02:00
metrics . NewGauge ( ` vm_rows_added_to_storage_total ` , func ( ) float64 {
return float64 ( m ( ) . RowsAddedTotal )
} )
2020-02-27 22:47:05 +01:00
metrics . NewGauge ( ` vm_deduplicated_samples_total { type="merge"} ` , func ( ) float64 {
return float64 ( m ( ) . DedupsDuringMerge )
} )
2019-07-26 19:00:35 +02:00
metrics . NewGauge ( ` vm_rows_ignored_total { reason="big_timestamp"} ` , func ( ) float64 {
2019-07-26 13:10:25 +02:00
return float64 ( m ( ) . TooBigTimestampRows )
} )
2019-07-26 19:00:35 +02:00
metrics . NewGauge ( ` vm_rows_ignored_total { reason="small_timestamp"} ` , func ( ) float64 {
2019-07-26 13:10:25 +02:00
return float64 ( m ( ) . TooSmallTimestampRows )
} )
lib/storage: switch from global to per-day index for `MetricName -> TSID` mapping
Previously all the newly ingested time series were registered in global `MetricName -> TSID` index.
This index was used during data ingestion for locating the TSID (internal series id)
for the given canonical metric name (the canonical metric name consists of metric name plus all its labels sorted by label names).
The `MetricName -> TSID` index is stored on disk in order to make sure that the data
isn't lost on VictoriaMetrics restart or unclean shutdown.
The lookup in this index is relatively slow, since VictoriaMetrics needs to read the corresponding
data block from disk, unpack it, put the unpacked block into `indexdb/dataBlocks` cache,
and then search for the given `MetricName -> TSID` entry there. So VictoriaMetrics
uses in-memory cache for speeding up the lookup for active time series.
This cache is named `storage/tsid`. If this cache capacity is enough for all the currently ingested
active time series, then VictoriaMetrics works fast, since it doesn't need to read the data from disk.
VictoriaMetrics starts reading data from `MetricName -> TSID` on-disk index in the following cases:
- If `storage/tsid` cache capacity isn't enough for active time series.
Then just increase available memory for VictoriaMetrics or reduce the number of active time series
ingested into VictoriaMetrics.
- If new time series is ingested into VictoriaMetrics. In this case it cannot find
the needed entry in the `storage/tsid` cache, so it needs to consult on-disk `MetricName -> TSID` index,
since it doesn't know that the index has no the corresponding entry too.
This is a typical event under high churn rate, when old time series are constantly substituted
with new time series.
Reading the data from `MetricName -> TSID` index is slow, so inserts, which lead to reading this index,
are counted as slow inserts, and they can be monitored via `vm_slow_row_inserts_total` metric exposed by VictoriaMetrics.
Prior to this commit the `MetricName -> TSID` index was global, e.g. it contained entries sorted by `MetricName`
for all the time series ever ingested into VictoriaMetrics during the configured -retentionPeriod.
This index can become very large under high churn rate and long retention. VictoriaMetrics
caches data from this index in `indexdb/dataBlocks` in-memory cache for speeding up index lookups.
The `indexdb/dataBlocks` cache may occupy significant share of available memory for storing
recently accessed blocks at `MetricName -> TSID` index when searching for newly ingested time series.
This commit switches from global `MetricName -> TSID` index to per-day index. This allows significantly
reducing the amounts of data, which needs to be cached in `indexdb/dataBlocks`, since now VictoriaMetrics
consults only the index for the current day when new time series is ingested into it.
The downside of this change is increased indexdb size on disk for workloads without high churn rate,
e.g. with static time series, which do no change over time, since now VictoriaMetrics needs to store
identical `MetricName -> TSID` entries for static time series for every day.
This change removes an optimization for reducing CPU and disk IO spikes at indexdb rotation,
since it didn't work correctly - see https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1401 .
At the same time the change fixes the issue, which could result in lost access to time series,
which stop receving new samples during the first hour after indexdb rotation - see https://github.com/VictoriaMetrics/VictoriaMetrics/issues/2698
The issue with the increased CPU and disk IO usage during indexdb rotation will be addressed
in a separate commit according to https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1401#issuecomment-1553488685
This is a follow-up for 1f28b46ae9350795af41cbfc3ca0e8a5af084fce
2023-07-14 00:33:41 +02:00
metrics . NewGauge ( ` vm_timeseries_repopulated_total ` , func ( ) float64 {
return float64 ( m ( ) . TimeseriesRepopulated )
} )
2023-07-23 00:20:21 +02:00
metrics . NewGauge ( ` vm_timeseries_precreated_total ` , func ( ) float64 {
return float64 ( m ( ) . TimeseriesPreCreated )
} )
lib/storage: switch from global to per-day index for `MetricName -> TSID` mapping
Previously all the newly ingested time series were registered in global `MetricName -> TSID` index.
This index was used during data ingestion for locating the TSID (internal series id)
for the given canonical metric name (the canonical metric name consists of metric name plus all its labels sorted by label names).
The `MetricName -> TSID` index is stored on disk in order to make sure that the data
isn't lost on VictoriaMetrics restart or unclean shutdown.
The lookup in this index is relatively slow, since VictoriaMetrics needs to read the corresponding
data block from disk, unpack it, put the unpacked block into `indexdb/dataBlocks` cache,
and then search for the given `MetricName -> TSID` entry there. So VictoriaMetrics
uses in-memory cache for speeding up the lookup for active time series.
This cache is named `storage/tsid`. If this cache capacity is enough for all the currently ingested
active time series, then VictoriaMetrics works fast, since it doesn't need to read the data from disk.
VictoriaMetrics starts reading data from `MetricName -> TSID` on-disk index in the following cases:
- If `storage/tsid` cache capacity isn't enough for active time series.
Then just increase available memory for VictoriaMetrics or reduce the number of active time series
ingested into VictoriaMetrics.
- If new time series is ingested into VictoriaMetrics. In this case it cannot find
the needed entry in the `storage/tsid` cache, so it needs to consult on-disk `MetricName -> TSID` index,
since it doesn't know that the index has no the corresponding entry too.
This is a typical event under high churn rate, when old time series are constantly substituted
with new time series.
Reading the data from `MetricName -> TSID` index is slow, so inserts, which lead to reading this index,
are counted as slow inserts, and they can be monitored via `vm_slow_row_inserts_total` metric exposed by VictoriaMetrics.
Prior to this commit the `MetricName -> TSID` index was global, e.g. it contained entries sorted by `MetricName`
for all the time series ever ingested into VictoriaMetrics during the configured -retentionPeriod.
This index can become very large under high churn rate and long retention. VictoriaMetrics
caches data from this index in `indexdb/dataBlocks` in-memory cache for speeding up index lookups.
The `indexdb/dataBlocks` cache may occupy significant share of available memory for storing
recently accessed blocks at `MetricName -> TSID` index when searching for newly ingested time series.
This commit switches from global `MetricName -> TSID` index to per-day index. This allows significantly
reducing the amounts of data, which needs to be cached in `indexdb/dataBlocks`, since now VictoriaMetrics
consults only the index for the current day when new time series is ingested into it.
The downside of this change is increased indexdb size on disk for workloads without high churn rate,
e.g. with static time series, which do no change over time, since now VictoriaMetrics needs to store
identical `MetricName -> TSID` entries for static time series for every day.
This change removes an optimization for reducing CPU and disk IO spikes at indexdb rotation,
since it didn't work correctly - see https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1401 .
At the same time the change fixes the issue, which could result in lost access to time series,
which stop receving new samples during the first hour after indexdb rotation - see https://github.com/VictoriaMetrics/VictoriaMetrics/issues/2698
The issue with the increased CPU and disk IO usage during indexdb rotation will be addressed
in a separate commit according to https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1401#issuecomment-1553488685
This is a follow-up for 1f28b46ae9350795af41cbfc3ca0e8a5af084fce
2023-07-14 00:33:41 +02:00
metrics . NewGauge ( ` vm_new_timeseries_created_total ` , func ( ) float64 {
return float64 ( m ( ) . NewTimeseriesCreated )
} )
2020-05-15 12:44:23 +02:00
metrics . NewGauge ( ` vm_slow_row_inserts_total ` , func ( ) float64 {
return float64 ( m ( ) . SlowRowInserts )
} )
metrics . NewGauge ( ` vm_slow_per_day_index_inserts_total ` , func ( ) float64 {
return float64 ( m ( ) . SlowPerDayIndexInserts )
} )
2020-05-15 13:11:39 +02:00
metrics . NewGauge ( ` vm_slow_metric_name_loads_total ` , func ( ) float64 {
return float64 ( m ( ) . SlowMetricNameLoads )
} )
2020-05-15 12:44:23 +02:00
2022-08-24 12:41:53 +02:00
if * maxHourlySeries > 0 {
metrics . NewGauge ( ` vm_hourly_series_limit_current_series ` , func ( ) float64 {
return float64 ( m ( ) . HourlySeriesLimitCurrentSeries )
} )
metrics . NewGauge ( ` vm_hourly_series_limit_max_series ` , func ( ) float64 {
return float64 ( m ( ) . HourlySeriesLimitMaxSeries )
} )
metrics . NewGauge ( ` vm_hourly_series_limit_rows_dropped_total ` , func ( ) float64 {
return float64 ( m ( ) . HourlySeriesLimitRowsDropped )
} )
}
if * maxDailySeries > 0 {
metrics . NewGauge ( ` vm_daily_series_limit_current_series ` , func ( ) float64 {
return float64 ( m ( ) . DailySeriesLimitCurrentSeries )
} )
metrics . NewGauge ( ` vm_daily_series_limit_max_series ` , func ( ) float64 {
return float64 ( m ( ) . DailySeriesLimitMaxSeries )
} )
metrics . NewGauge ( ` vm_daily_series_limit_rows_dropped_total ` , func ( ) float64 {
return float64 ( m ( ) . DailySeriesLimitRowsDropped )
} )
}
2021-05-20 13:15:19 +02:00
2020-09-09 22:18:32 +02:00
metrics . NewGauge ( ` vm_timestamps_blocks_merged_total ` , func ( ) float64 {
return float64 ( m ( ) . TimestampsBlocksMerged )
} )
metrics . NewGauge ( ` vm_timestamps_bytes_saved_total ` , func ( ) float64 {
return float64 ( m ( ) . TimestampsBytesSaved )
} )
2022-12-06 00:27:57 +01:00
metrics . NewGauge ( ` vm_rows { type="storage/inmemory"} ` , func ( ) float64 {
return float64 ( tm ( ) . InmemoryRowsCount )
2019-05-22 23:16:55 +02:00
} )
metrics . NewGauge ( ` vm_rows { type="storage/small"} ` , func ( ) float64 {
return float64 ( tm ( ) . SmallRowsCount )
} )
2022-12-06 00:27:57 +01:00
metrics . NewGauge ( ` vm_rows { type="storage/big"} ` , func ( ) float64 {
return float64 ( tm ( ) . BigRowsCount )
} )
metrics . NewGauge ( ` vm_rows { type="indexdb/inmemory"} ` , func ( ) float64 {
return float64 ( idbm ( ) . InmemoryItemsCount )
} )
metrics . NewGauge ( ` vm_rows { type="indexdb/file"} ` , func ( ) float64 {
return float64 ( idbm ( ) . FileItemsCount )
2019-05-22 23:16:55 +02:00
} )
2019-11-09 22:17:42 +01:00
metrics . NewGauge ( ` vm_date_range_search_calls_total ` , func ( ) float64 {
return float64 ( idbm ( ) . DateRangeSearchCalls )
} )
metrics . NewGauge ( ` vm_date_range_hits_total ` , func ( ) float64 {
return float64 ( idbm ( ) . DateRangeSearchHits )
} )
2021-07-30 07:37:10 +02:00
metrics . NewGauge ( ` vm_global_search_calls_total ` , func ( ) float64 {
return float64 ( idbm ( ) . GlobalSearchCalls )
} )
2019-11-09 22:17:42 +01:00
2019-12-02 19:44:18 +01:00
metrics . NewGauge ( ` vm_missing_metric_names_for_metric_id_total ` , func ( ) float64 {
return float64 ( idbm ( ) . MissingMetricNamesForMetricID )
} )
2019-11-11 12:21:05 +01:00
metrics . NewGauge ( ` vm_date_metric_id_cache_syncs_total ` , func ( ) float64 {
return float64 ( m ( ) . DateMetricIDCacheSyncsCount )
} )
metrics . NewGauge ( ` vm_date_metric_id_cache_resets_total ` , func ( ) float64 {
return float64 ( m ( ) . DateMetricIDCacheResetsCount )
} )
2019-05-22 23:16:55 +02:00
metrics . NewGauge ( ` vm_cache_entries { type="storage/tsid"} ` , func ( ) float64 {
return float64 ( m ( ) . TSIDCacheSize )
} )
metrics . NewGauge ( ` vm_cache_entries { type="storage/metricIDs"} ` , func ( ) float64 {
return float64 ( m ( ) . MetricIDCacheSize )
} )
metrics . NewGauge ( ` vm_cache_entries { type="storage/metricName"} ` , func ( ) float64 {
return float64 ( m ( ) . MetricNameCacheSize )
} )
metrics . NewGauge ( ` vm_cache_entries { type="storage/date_metricID"} ` , func ( ) float64 {
return float64 ( m ( ) . DateMetricIDCacheSize )
} )
2019-06-19 17:36:47 +02:00
metrics . NewGauge ( ` vm_cache_entries { type="storage/hour_metric_ids"} ` , func ( ) float64 {
return float64 ( m ( ) . HourMetricIDCacheSize )
} )
2020-05-12 00:06:17 +02:00
metrics . NewGauge ( ` vm_cache_entries { type="storage/next_day_metric_ids"} ` , func ( ) float64 {
return float64 ( m ( ) . NextDayMetricIDCacheSize )
} )
2022-01-20 17:34:59 +01:00
metrics . NewGauge ( ` vm_cache_entries { type="storage/indexBlocks"} ` , func ( ) float64 {
return float64 ( tm ( ) . IndexBlocksCacheSize )
2019-05-22 23:16:55 +02:00
} )
metrics . NewGauge ( ` vm_cache_entries { type="indexdb/dataBlocks"} ` , func ( ) float64 {
return float64 ( idbm ( ) . DataBlocksCacheSize )
} )
metrics . NewGauge ( ` vm_cache_entries { type="indexdb/indexBlocks"} ` , func ( ) float64 {
return float64 ( idbm ( ) . IndexBlocksCacheSize )
} )
2022-10-23 11:15:24 +02:00
metrics . NewGauge ( ` vm_cache_entries { type="indexdb/tagFiltersToMetricIDs"} ` , func ( ) float64 {
return float64 ( idbm ( ) . TagFiltersToMetricIDsCacheSize )
2019-05-22 23:16:55 +02:00
} )
metrics . NewGauge ( ` vm_cache_entries { type="storage/regexps"} ` , func ( ) float64 {
return float64 ( storage . RegexpCacheSize ( ) )
} )
2022-03-26 11:54:50 +01:00
metrics . NewGauge ( ` vm_cache_entries { type="storage/regexpPrefixes"} ` , func ( ) float64 {
return float64 ( storage . RegexpPrefixesCacheSize ( ) )
} )
2020-08-06 15:30:15 +02:00
metrics . NewGauge ( ` vm_cache_entries { type="storage/prefetchedMetricIDs"} ` , func ( ) float64 {
2020-01-30 00:59:43 +01:00
return float64 ( m ( ) . PrefetchedMetricIDsSize )
} )
2019-05-22 23:16:55 +02:00
metrics . NewGauge ( ` vm_cache_size_bytes { type="storage/tsid"} ` , func ( ) float64 {
2019-07-09 23:47:29 +02:00
return float64 ( m ( ) . TSIDCacheSizeBytes )
2019-05-22 23:16:55 +02:00
} )
metrics . NewGauge ( ` vm_cache_size_bytes { type="storage/metricIDs"} ` , func ( ) float64 {
2019-07-09 23:47:29 +02:00
return float64 ( m ( ) . MetricIDCacheSizeBytes )
2019-05-22 23:16:55 +02:00
} )
metrics . NewGauge ( ` vm_cache_size_bytes { type="storage/metricName"} ` , func ( ) float64 {
2019-07-09 23:47:29 +02:00
return float64 ( m ( ) . MetricNameCacheSizeBytes )
2019-05-22 23:16:55 +02:00
} )
2022-01-20 17:34:59 +01:00
metrics . NewGauge ( ` vm_cache_size_bytes { type="storage/indexBlocks"} ` , func ( ) float64 {
return float64 ( tm ( ) . IndexBlocksCacheSizeBytes )
2021-02-08 23:34:18 +01:00
} )
metrics . NewGauge ( ` vm_cache_size_bytes { type="indexdb/dataBlocks"} ` , func ( ) float64 {
return float64 ( idbm ( ) . DataBlocksCacheSizeBytes )
} )
metrics . NewGauge ( ` vm_cache_size_bytes { type="indexdb/indexBlocks"} ` , func ( ) float64 {
return float64 ( idbm ( ) . IndexBlocksCacheSizeBytes )
} )
2019-11-13 16:58:05 +01:00
metrics . NewGauge ( ` vm_cache_size_bytes { type="storage/date_metricID"} ` , func ( ) float64 {
return float64 ( m ( ) . DateMetricIDCacheSizeBytes )
} )
2019-11-13 18:00:02 +01:00
metrics . NewGauge ( ` vm_cache_size_bytes { type="storage/hour_metric_ids"} ` , func ( ) float64 {
return float64 ( m ( ) . HourMetricIDCacheSizeBytes )
} )
2020-05-12 00:06:17 +02:00
metrics . NewGauge ( ` vm_cache_size_bytes { type="storage/next_day_metric_ids"} ` , func ( ) float64 {
return float64 ( m ( ) . NextDayMetricIDCacheSizeBytes )
} )
2022-10-23 11:15:24 +02:00
metrics . NewGauge ( ` vm_cache_size_bytes { type="indexdb/tagFiltersToMetricIDs"} ` , func ( ) float64 {
return float64 ( idbm ( ) . TagFiltersToMetricIDsCacheSizeBytes )
2019-05-22 23:16:55 +02:00
} )
2022-03-26 11:54:50 +01:00
metrics . NewGauge ( ` vm_cache_size_bytes { type="storage/regexps"} ` , func ( ) float64 {
return float64 ( storage . RegexpCacheSizeBytes ( ) )
} )
metrics . NewGauge ( ` vm_cache_size_bytes { type="storage/regexpPrefixes"} ` , func ( ) float64 {
return float64 ( storage . RegexpPrefixesCacheSizeBytes ( ) )
} )
2020-01-30 00:59:43 +01:00
metrics . NewGauge ( ` vm_cache_size_bytes { type="storage/prefetchedMetricIDs"} ` , func ( ) float64 {
return float64 ( m ( ) . PrefetchedMetricIDsSizeBytes )
} )
2019-05-22 23:16:55 +02:00
2021-12-02 09:28:45 +01:00
metrics . NewGauge ( ` vm_cache_size_max_bytes { type="storage/tsid"} ` , func ( ) float64 {
return float64 ( m ( ) . TSIDCacheSizeMaxBytes )
} )
metrics . NewGauge ( ` vm_cache_size_max_bytes { type="storage/metricIDs"} ` , func ( ) float64 {
return float64 ( m ( ) . MetricIDCacheSizeMaxBytes )
} )
metrics . NewGauge ( ` vm_cache_size_max_bytes { type="storage/metricName"} ` , func ( ) float64 {
return float64 ( m ( ) . MetricNameCacheSizeMaxBytes )
} )
2022-01-20 17:34:59 +01:00
metrics . NewGauge ( ` vm_cache_size_max_bytes { type="storage/indexBlocks"} ` , func ( ) float64 {
return float64 ( tm ( ) . IndexBlocksCacheSizeMaxBytes )
2021-12-02 09:28:45 +01:00
} )
metrics . NewGauge ( ` vm_cache_size_max_bytes { type="indexdb/dataBlocks"} ` , func ( ) float64 {
return float64 ( idbm ( ) . DataBlocksCacheSizeMaxBytes )
} )
metrics . NewGauge ( ` vm_cache_size_max_bytes { type="indexdb/indexBlocks"} ` , func ( ) float64 {
return float64 ( idbm ( ) . IndexBlocksCacheSizeMaxBytes )
} )
2022-10-23 11:15:24 +02:00
metrics . NewGauge ( ` vm_cache_size_max_bytes { type="indexdb/tagFiltersToMetricIDs"} ` , func ( ) float64 {
return float64 ( idbm ( ) . TagFiltersToMetricIDsCacheSizeMaxBytes )
2021-12-02 09:28:45 +01:00
} )
2022-03-26 11:54:50 +01:00
metrics . NewGauge ( ` vm_cache_size_max_bytes { type="storage/regexps"} ` , func ( ) float64 {
return float64 ( storage . RegexpCacheMaxSizeBytes ( ) )
} )
metrics . NewGauge ( ` vm_cache_size_max_bytes { type="storage/regexpPrefixes"} ` , func ( ) float64 {
return float64 ( storage . RegexpPrefixesCacheMaxSizeBytes ( ) )
} )
2021-12-02 09:28:45 +01:00
2019-05-22 23:16:55 +02:00
metrics . NewGauge ( ` vm_cache_requests_total { type="storage/tsid"} ` , func ( ) float64 {
return float64 ( m ( ) . TSIDCacheRequests )
} )
metrics . NewGauge ( ` vm_cache_requests_total { type="storage/metricIDs"} ` , func ( ) float64 {
return float64 ( m ( ) . MetricIDCacheRequests )
} )
metrics . NewGauge ( ` vm_cache_requests_total { type="storage/metricName"} ` , func ( ) float64 {
return float64 ( m ( ) . MetricNameCacheRequests )
} )
2022-01-20 17:34:59 +01:00
metrics . NewGauge ( ` vm_cache_requests_total { type="storage/indexBlocks"} ` , func ( ) float64 {
return float64 ( tm ( ) . IndexBlocksCacheRequests )
2019-05-22 23:16:55 +02:00
} )
metrics . NewGauge ( ` vm_cache_requests_total { type="indexdb/dataBlocks"} ` , func ( ) float64 {
return float64 ( idbm ( ) . DataBlocksCacheRequests )
} )
metrics . NewGauge ( ` vm_cache_requests_total { type="indexdb/indexBlocks"} ` , func ( ) float64 {
return float64 ( idbm ( ) . IndexBlocksCacheRequests )
} )
2022-10-23 11:15:24 +02:00
metrics . NewGauge ( ` vm_cache_requests_total { type="indexdb/tagFiltersToMetricIDs"} ` , func ( ) float64 {
return float64 ( idbm ( ) . TagFiltersToMetricIDsCacheRequests )
2019-05-22 23:16:55 +02:00
} )
metrics . NewGauge ( ` vm_cache_requests_total { type="storage/regexps"} ` , func ( ) float64 {
return float64 ( storage . RegexpCacheRequests ( ) )
} )
2022-03-26 11:54:50 +01:00
metrics . NewGauge ( ` vm_cache_requests_total { type="storage/regexpPrefixes"} ` , func ( ) float64 {
return float64 ( storage . RegexpPrefixesCacheRequests ( ) )
} )
2019-05-22 23:16:55 +02:00
metrics . NewGauge ( ` vm_cache_misses_total { type="storage/tsid"} ` , func ( ) float64 {
return float64 ( m ( ) . TSIDCacheMisses )
} )
metrics . NewGauge ( ` vm_cache_misses_total { type="storage/metricIDs"} ` , func ( ) float64 {
return float64 ( m ( ) . MetricIDCacheMisses )
} )
metrics . NewGauge ( ` vm_cache_misses_total { type="storage/metricName"} ` , func ( ) float64 {
return float64 ( m ( ) . MetricNameCacheMisses )
} )
2022-01-20 17:34:59 +01:00
metrics . NewGauge ( ` vm_cache_misses_total { type="storage/indexBlocks"} ` , func ( ) float64 {
return float64 ( tm ( ) . IndexBlocksCacheMisses )
2019-05-22 23:16:55 +02:00
} )
metrics . NewGauge ( ` vm_cache_misses_total { type="indexdb/dataBlocks"} ` , func ( ) float64 {
return float64 ( idbm ( ) . DataBlocksCacheMisses )
} )
metrics . NewGauge ( ` vm_cache_misses_total { type="indexdb/indexBlocks"} ` , func ( ) float64 {
return float64 ( idbm ( ) . IndexBlocksCacheMisses )
} )
2022-10-23 11:15:24 +02:00
metrics . NewGauge ( ` vm_cache_misses_total { type="indexdb/tagFiltersToMetricIDs"} ` , func ( ) float64 {
return float64 ( idbm ( ) . TagFiltersToMetricIDsCacheMisses )
2019-05-22 23:16:55 +02:00
} )
metrics . NewGauge ( ` vm_cache_misses_total { type="storage/regexps"} ` , func ( ) float64 {
return float64 ( storage . RegexpCacheMisses ( ) )
} )
2022-03-26 11:54:50 +01:00
metrics . NewGauge ( ` vm_cache_misses_total { type="storage/regexpPrefixes"} ` , func ( ) float64 {
return float64 ( storage . RegexpPrefixesCacheMisses ( ) )
} )
2019-05-22 23:16:55 +02:00
metrics . NewGauge ( ` vm_deleted_metrics_total { type="indexdb"} ` , func ( ) float64 {
return float64 ( idbm ( ) . DeletedMetricsCount )
} )
metrics . NewGauge ( ` vm_cache_collisions_total { type="storage/tsid"} ` , func ( ) float64 {
return float64 ( m ( ) . TSIDCacheCollisions )
} )
metrics . NewGauge ( ` vm_cache_collisions_total { type="storage/metricName"} ` , func ( ) float64 {
return float64 ( m ( ) . MetricNameCacheCollisions )
} )
2022-07-13 11:37:04 +02:00
metrics . NewGauge ( ` vm_next_retention_seconds ` , func ( ) float64 {
return float64 ( m ( ) . NextRetentionSeconds )
} )
2019-05-22 23:16:55 +02:00
}
2020-03-10 22:51:50 +01:00
func jsonResponseError ( w http . ResponseWriter , err error ) {
logger . Errorf ( "%s" , err )
w . WriteHeader ( http . StatusInternalServerError )
fmt . Fprintf ( w , ` { "status":"error","msg":%q} ` , err )
}
2020-12-03 20:40:30 +01:00
func usage ( ) {
const s = `
vmstorage stores time series data obtained from vminsert and returns the requested data to vmselect .
2021-04-20 19:16:17 +02:00
See the docs at https : //docs.victoriametrics.com/Cluster-VictoriaMetrics.html .
2020-12-03 20:40:30 +01:00
`
flagutil . Usage ( s )
}