2019-05-22 23:23:23 +02:00
package main
2019-05-22 23:16:55 +02:00
import (
"flag"
"fmt"
2024-01-15 23:19:56 +01:00
"io"
2019-05-22 23:16:55 +02:00
"net/http"
2020-05-16 10:59:30 +02:00
"os"
2019-05-22 23:16:55 +02:00
"strings"
"sync"
"time"
2024-01-22 17:12:37 +01:00
"github.com/VictoriaMetrics/metrics"
2022-06-28 13:04:14 +02:00
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmstorage/servers"
2019-05-22 23:23:23 +02:00
"github.com/VictoriaMetrics/VictoriaMetrics/lib/buildinfo"
2020-02-10 12:26:18 +01:00
"github.com/VictoriaMetrics/VictoriaMetrics/lib/envflag"
2020-10-20 13:29:26 +02:00
"github.com/VictoriaMetrics/VictoriaMetrics/lib/flagutil"
2019-11-12 15:29:43 +01:00
"github.com/VictoriaMetrics/VictoriaMetrics/lib/fs"
2019-05-22 23:16:55 +02:00
"github.com/VictoriaMetrics/VictoriaMetrics/lib/httpserver"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
2022-02-21 12:50:34 +01:00
"github.com/VictoriaMetrics/VictoriaMetrics/lib/mergeset"
2019-05-22 23:23:23 +02:00
"github.com/VictoriaMetrics/VictoriaMetrics/lib/procutil"
2021-05-08 16:55:44 +02:00
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/common"
2022-07-21 18:58:22 +02:00
"github.com/VictoriaMetrics/VictoriaMetrics/lib/pushmetrics"
2019-05-22 23:16:55 +02:00
"github.com/VictoriaMetrics/VictoriaMetrics/lib/storage"
2024-07-17 13:52:10 +02:00
"github.com/VictoriaMetrics/VictoriaMetrics/lib/stringsutil"
2024-01-22 17:12:37 +01:00
"github.com/VictoriaMetrics/VictoriaMetrics/lib/timeutil"
2019-05-22 23:16:55 +02:00
)
var (
2024-10-17 13:47:48 +02:00
retentionPeriod = flagutil . NewRetentionDuration ( "retentionPeriod" , "1" , "Data with timestamps outside the retentionPeriod is automatically deleted. The minimum retentionPeriod is 24h or 1d. See also -retentionFilter" )
2024-02-09 02:15:04 +01:00
httpListenAddrs = flagutil . NewArrayString ( "httpListenAddr" , "Address to listen for incoming http requests. See also -httpListenAddr.useProxyProtocol" )
useProxyProtocol = flagutil . NewArrayBool ( "httpListenAddr.useProxyProtocol" , "Whether to use proxy protocol for connections accepted at the given -httpListenAddr . " +
2023-03-08 10:26:53 +01:00
"See https://www.haproxy.org/download/1.8/doc/proxy-protocol.txt . " +
"With enabled proxy protocol http server cannot serve regular /metrics endpoint. Use -pushmetrics.url for metrics pushing" )
2024-02-23 03:52:38 +01:00
storageDataPath = flag . String ( "storageDataPath" , "vmstorage-data" , "Path to storage data" )
vminsertAddr = flag . String ( "vminsertAddr" , ":8400" , "TCP address to accept connections from vminsert services" )
vmselectAddr = flag . String ( "vmselectAddr" , ":8401" , "TCP address to accept connections from vmselect services" )
snapshotAuthKey = flagutil . NewPassword ( "snapshotAuthKey" , "authKey, which must be passed in query string to /snapshot* pages" )
forceMergeAuthKey = flagutil . NewPassword ( "forceMergeAuthKey" , "authKey, which must be passed in query string to /internal/force_merge pages" )
forceFlushAuthKey = flagutil . NewPassword ( "forceFlushAuthKey" , "authKey, which must be passed in query string to /internal/force_flush pages" )
2024-10-17 13:47:48 +02:00
snapshotsMaxAge = flagutil . NewRetentionDuration ( "snapshotsMaxAge" , "0" , "Automatically delete snapshots older than -snapshotsMaxAge if it is set to non-zero duration. Make sure that backup process has enough time to finish the backup before the corresponding snapshot is automatically deleted" )
2024-02-23 03:52:38 +01:00
_ = flag . Duration ( "snapshotCreateTimeout" , 0 , "Deprecated: this flag does nothing" )
2019-10-31 15:16:53 +01:00
lib/{mergeset,storage}: make background merge more responsive and scalable
- Maintain a separate worker pool per each part type (in-memory, file, big and small).
Previously a shared pool was used for merging all the part types.
A single merge worker could merge parts with mixed types at once. For example,
it could merge simultaneously an in-memory part plus a big file part.
Such a merge could take hours for big file part. During the duration of this merge
the in-memory part was pinned in memory and couldn't be persisted to disk
under the configured -inmemoryDataFlushInterval .
Another common issue, which could happen when parts with mixed types are merged,
is uncontrolled growth of in-memory parts or small parts when all the merge workers
were busy with merging big files. Such growth could lead to significant performance
degradataion for queries, since every query needs to check ever growing list of parts.
This could also slow down the registration of new time series, since VictoriaMetrics
searches for the internal series_id in the indexdb for every new time series.
The third issue is graceful shutdown duration, which could be very long when a background
merge is running on in-memory parts plus big file parts. This merge couldn't be interrupted,
since it merges in-memory parts.
A separate pool of merge workers per every part type elegantly resolves both issues:
- In-memory parts are merged to file-based parts in a timely manner, since the maximum
size of in-memory parts is limited.
- Long-running merges for big parts do not block merges for in-memory parts and small parts.
- Graceful shutdown duration is now limited by the time needed for flushing in-memory parts to files.
Merging for file parts is instantly canceled on graceful shutdown now.
- Deprecate -smallMergeConcurrency command-line flag, since the new background merge algorithm
should automatically self-tune according to the number of available CPU cores.
- Deprecate -finalMergeDelay command-line flag, since it wasn't working correctly.
It is better to run forced merge when needed - https://docs.victoriametrics.com/#forced-merge
- Tune the number of shards for pending rows and items before the data goes to in-memory parts
and becomes visible for search. This improves the maximum data ingestion rate and the maximum rate
for registration of new time series. This should reduce the duration of data ingestion slowdown
in VictoriaMetrics cluster on e.g. re-routing events, when some of vmstorage nodes become temporarily
unavailable.
- Prevent from possible "sync: WaitGroup misuse" panic on graceful shutdown.
This is a follow-up for fa566c68a6ccf7385a05f649aee7e5f5a38afb15 .
Thanks @misutoth to for the inspiration at https://github.com/VictoriaMetrics/VictoriaMetrics/pull/5212
Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5190
Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3790
Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3551
Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3337
Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3425
Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3647
Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3641
Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/648
Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/291
2024-01-26 21:39:49 +01:00
_ = flag . Duration ( "finalMergeDelay" , 0 , "Deprecated: this flag does nothing" )
_ = flag . Int ( "bigMergeConcurrency" , 0 , "Deprecated: this flag does nothing" )
_ = flag . Int ( "smallMergeConcurrency" , 0 , "Deprecated: this flag does nothing" )
2022-05-25 14:57:01 +02:00
retentionTimezoneOffset = flag . Duration ( "retentionTimezoneOffset" , 0 , "The offset for performing indexdb rotation. " +
"If set to 0, then the indexdb rotation is performed at 4am UTC time per each -retentionPeriod. " +
"If set to 2h, then the indexdb rotation is performed at 4am EET time (the timezone with +2h offset)" )
minScrapeInterval = flag . Duration ( "dedup.minScrapeInterval" , 0 , "Leave only the last sample in every time series per each discrete interval " +
2021-07-02 14:02:24 +02:00
"equal to -dedup.minScrapeInterval > 0. See https://docs.victoriametrics.com/#deduplication for details" )
2023-12-14 19:47:45 +01:00
inmemoryDataFlushInterval = flag . Duration ( "inmemoryDataFlushInterval" , 5 * time . Second , "The interval for guaranteed saving of in-memory data to disk. " +
"The saved data survives unclean shutdowns such as OOM crash, hardware reset, SIGKILL, etc. " +
"Bigger intervals may help increase the lifetime of flash storage with limited write cycles (e.g. Raspberry PI). " +
"Smaller intervals increase disk IO load. Minimum supported value is 1s" )
2021-03-15 21:38:50 +01:00
logNewSeries = flag . Bool ( "logNewSeries" , false , "Whether to log new series. This option is for debug purposes only. It can lead to performance issues " +
"when big number of new series are ingested into VictoriaMetrics" )
2021-05-20 13:15:19 +02:00
maxHourlySeries = flag . Int ( "storage.maxHourlySeries" , 0 , "The maximum number of unique series can be added to the storage during the last hour. " +
2022-08-24 12:41:53 +02:00
"Excess series are logged and dropped. This can be useful for limiting series cardinality. See https://docs.victoriametrics.com/#cardinality-limiter . " +
"See also -storage.maxDailySeries" )
2021-05-20 13:15:19 +02:00
maxDailySeries = flag . Int ( "storage.maxDailySeries" , 0 , "The maximum number of unique series can be added to the storage during the last 24 hours. " +
2022-08-24 12:41:53 +02:00
"Excess series are logged and dropped. This can be useful for limiting series churn rate. See https://docs.victoriametrics.com/#cardinality-limiter . " +
"See also -storage.maxHourlySeries" )
2021-10-08 11:52:56 +02:00
2021-10-08 12:52:56 +02:00
minFreeDiskSpaceBytes = flagutil . NewBytes ( "storage.minFreeDiskSpaceBytes" , 10e6 , "The minimum free disk space at -storageDataPath after which the storage stops accepting new data" )
2022-02-21 12:50:34 +01:00
2022-10-23 11:15:24 +02:00
cacheSizeStorageTSID = flagutil . NewBytes ( "storage.cacheSizeStorageTSID" , 0 , "Overrides max size for storage/tsid cache. " +
2024-04-18 03:11:03 +02:00
"See https://docs.victoriametrics.com/single-server-victoriametrics/#cache-tuning" )
2022-10-23 11:15:24 +02:00
cacheSizeIndexDBIndexBlocks = flagutil . NewBytes ( "storage.cacheSizeIndexDBIndexBlocks" , 0 , "Overrides max size for indexdb/indexBlocks cache. " +
2024-04-18 03:11:03 +02:00
"See https://docs.victoriametrics.com/single-server-victoriametrics/#cache-tuning" )
2022-10-23 11:15:24 +02:00
cacheSizeIndexDBDataBlocks = flagutil . NewBytes ( "storage.cacheSizeIndexDBDataBlocks" , 0 , "Overrides max size for indexdb/dataBlocks cache. " +
2024-04-18 03:11:03 +02:00
"See https://docs.victoriametrics.com/single-server-victoriametrics/#cache-tuning" )
2022-10-23 11:15:24 +02:00
cacheSizeIndexDBTagFilters = flagutil . NewBytes ( "storage.cacheSizeIndexDBTagFilters" , 0 , "Overrides max size for indexdb/tagFiltersToMetricIDs cache. " +
2024-04-18 03:11:03 +02:00
"See https://docs.victoriametrics.com/single-server-victoriametrics/#cache-tuning" )
2019-05-22 23:16:55 +02:00
)
2019-05-22 23:23:23 +02:00
func main ( ) {
2020-05-16 10:59:30 +02:00
// Write flags and help message to stdout, since it is easier to grep or pipe.
flag . CommandLine . SetOutput ( os . Stdout )
2020-12-03 20:40:30 +01:00
flag . Usage = usage
2020-02-10 12:26:18 +01:00
envflag . Parse ( )
2019-05-22 23:23:23 +02:00
buildinfo . Init ( )
logger . Init ( )
2021-12-14 19:49:08 +01:00
storage . SetDedupInterval ( * minScrapeInterval )
2023-12-14 19:47:45 +01:00
storage . SetDataFlushInterval ( * inmemoryDataFlushInterval )
2021-03-15 21:38:50 +01:00
storage . SetLogNewSeries ( * logNewSeries )
2022-05-25 14:57:01 +02:00
storage . SetRetentionTimezoneOffset ( * retentionTimezoneOffset )
2021-10-08 12:52:56 +02:00
storage . SetFreeDiskSpaceLimit ( minFreeDiskSpaceBytes . N )
2022-12-15 04:26:24 +01:00
storage . SetTSIDCacheSize ( cacheSizeStorageTSID . IntN ( ) )
storage . SetTagFiltersCacheSize ( cacheSizeIndexDBTagFilters . IntN ( ) )
mergeset . SetIndexBlocksCacheSize ( cacheSizeIndexDBIndexBlocks . IntN ( ) )
mergeset . SetDataBlocksCacheSize ( cacheSizeIndexDBDataBlocks . IntN ( ) )
2019-10-31 15:16:53 +01:00
2023-09-01 09:27:51 +02:00
if retentionPeriod . Duration ( ) < 24 * time . Hour {
2022-05-06 23:51:24 +02:00
logger . Fatalf ( "-retentionPeriod cannot be smaller than a day; got %s" , retentionPeriod )
}
2020-10-20 13:29:26 +02:00
logger . Infof ( "opening storage at %q with -retentionPeriod=%s" , * storageDataPath , retentionPeriod )
2019-05-22 23:16:55 +02:00
startTime := time . Now ( )
2023-09-01 09:27:51 +02:00
strg := storage . MustOpenStorage ( * storageDataPath , retentionPeriod . Duration ( ) , * maxHourlySeries , * maxDailySeries )
2022-05-02 10:00:15 +02:00
initStaleSnapshotsRemover ( strg )
2019-05-22 23:16:55 +02:00
var m storage . Metrics
2019-05-22 23:23:23 +02:00
strg . UpdateMetrics ( & m )
2019-05-22 23:16:55 +02:00
tm := & m . TableMetrics
partsCount := tm . SmallPartsCount + tm . BigPartsCount
blocksCount := tm . SmallBlocksCount + tm . BigBlocksCount
rowsCount := tm . SmallRowsCount + tm . BigRowsCount
2019-07-04 18:09:40 +02:00
sizeBytes := tm . SmallSizeBytes + tm . BigSizeBytes
2020-01-22 17:27:44 +01:00
logger . Infof ( "successfully opened storage %q in %.3f seconds; partsCount: %d; blocksCount: %d; rowsCount: %d; sizeBytes: %d" ,
* storageDataPath , time . Since ( startTime ) . Seconds ( ) , partsCount , blocksCount , rowsCount , sizeBytes )
2019-05-22 23:16:55 +02:00
2024-01-15 15:11:39 +01:00
// register storage metrics
2024-01-15 23:19:56 +01:00
storageMetrics := metrics . NewSet ( )
storageMetrics . RegisterMetricsWriter ( func ( w io . Writer ) {
writeStorageMetrics ( w , strg )
} )
2024-01-15 15:11:39 +01:00
metrics . RegisterSet ( storageMetrics )
2019-05-22 23:16:55 +02:00
2021-05-08 16:55:44 +02:00
common . StartUnmarshalWorkers ( )
2022-06-28 13:04:14 +02:00
vminsertSrv , err := servers . NewVMInsertServer ( * vminsertAddr , strg )
2019-05-22 23:23:23 +02:00
if err != nil {
2022-06-23 18:19:36 +02:00
logger . Fatalf ( "cannot create a server with -vminsertAddr=%s: %s" , * vminsertAddr , err )
}
2022-06-28 13:04:14 +02:00
vmselectSrv , err := servers . NewVMSelectServer ( * vmselectAddr , strg )
2022-06-23 18:19:36 +02:00
if err != nil {
logger . Fatalf ( "cannot create a server with -vmselectAddr=%s: %s" , * vmselectAddr , err )
2019-05-22 23:23:23 +02:00
}
2019-05-22 23:16:55 +02:00
2024-02-09 02:15:04 +01:00
listenAddrs := * httpListenAddrs
if len ( listenAddrs ) == 0 {
listenAddrs = [ ] string { ":8482" }
}
2019-05-22 23:23:23 +02:00
requestHandler := newRequestHandler ( strg )
2024-02-09 02:15:04 +01:00
go httpserver . Serve ( listenAddrs , useProxyProtocol , requestHandler )
2019-05-22 23:16:55 +02:00
2024-01-15 12:37:02 +01:00
pushmetrics . Init ( )
2019-05-22 23:23:23 +02:00
sig := procutil . WaitForSigterm ( )
logger . Infof ( "service received signal %s" , sig )
2024-01-15 12:37:02 +01:00
pushmetrics . Stop ( )
2019-05-22 23:16:55 +02:00
2024-02-09 02:15:04 +01:00
logger . Infof ( "gracefully shutting down http service at %q" , listenAddrs )
app/vmstorage: add missing shutdown for http server on graceful shutdown
This could result in the following panic during graceful shutdown when `/metrics` page is requested:
http: panic serving 10.101.66.5:57366: runtime error: invalid memory address or nil pointer dereference
goroutine 2050 [running]:
net/http.(*conn).serve.func1(0xc00ef22000)
net/http/server.go:1772 +0x139
panic(0xa0fc00, 0xe91d80)
runtime/panic.go:973 +0x3e3
github.com/VictoriaMetrics/VictoriaMetrics/lib/workingsetcache.(*Cache).UpdateStats(0x0, 0xc0000516c8)
github.com/VictoriaMetrics/VictoriaMetrics/lib/workingsetcache/cache.go:224 +0x37
github.com/VictoriaMetrics/VictoriaMetrics/lib/storage.(*indexDB).UpdateMetrics(0xc00b931d00, 0xc02c41acf8)
github.com/VictoriaMetrics/VictoriaMetrics/lib/storage/index_db.go:258 +0x9f
github.com/VictoriaMetrics/VictoriaMetrics/lib/storage.(*Storage).UpdateMetrics(0xc0000bc7e0, 0xc02c41ac00)
github.com/VictoriaMetrics/VictoriaMetrics/lib/storage/storage.go:413 +0x4c5
main.registerStorageMetrics.func1(0x0)
github.com/VictoriaMetrics/VictoriaMetrics/app/vmstorage/main.go:186 +0xd9
main.registerStorageMetrics.func3(0xc00008c380)
github.com/VictoriaMetrics/VictoriaMetrics/app/vmstorage/main.go:196 +0x26
main.registerStorageMetrics.func7(0xc)
github.com/VictoriaMetrics/VictoriaMetrics/app/vmstorage/main.go:211 +0x26
github.com/VictoriaMetrics/metrics.(*Gauge).marshalTo(0xc000010148, 0xaa407d, 0x20, 0xb50d60, 0xc005319890)
github.com/VictoriaMetrics/metrics@v1.11.2/gauge.go:38 +0x3f
github.com/VictoriaMetrics/metrics.(*Set).WritePrometheus(0xc000084300, 0x7fd56809c940, 0xc005319860)
github.com/VictoriaMetrics/metrics@v1.11.2/set.go:51 +0x1e1
github.com/VictoriaMetrics/metrics.WritePrometheus(0x7fd56809c940, 0xc005319860, 0xa16f01)
github.com/VictoriaMetrics/metrics@v1.11.2/metrics.go:42 +0x41
github.com/VictoriaMetrics/VictoriaMetrics/lib/httpserver.writePrometheusMetrics(0x7fd56809c940, 0xc005319860)
github.com/VictoriaMetrics/VictoriaMetrics/lib/httpserver/metrics.go:16 +0x44
github.com/VictoriaMetrics/VictoriaMetrics/lib/httpserver.handlerWrapper(0xb5a120, 0xc005319860, 0xc005018f00, 0xc00002cc90)
github.com/VictoriaMetrics/VictoriaMetrics/lib/httpserver/httpserver.go:154 +0x58d
github.com/VictoriaMetrics/VictoriaMetrics/lib/httpserver.gzipHandler.func1(0xb5a120, 0xc005319860, 0xc005018f00)
github.com/VictoriaMetrics/VictoriaMetrics/lib/httpserver/httpserver.go:119 +0x8e
net/http.HandlerFunc.ServeHTTP(0xc00002d110, 0xb5a660, 0xc0044141c0, 0xc005018f00)
net/http/server.go:2012 +0x44
net/http.serverHandler.ServeHTTP(0xc004414000, 0xb5a660, 0xc0044141c0, 0xc005018f00)
net/http/server.go:2807 +0xa3
net/http.(*conn).serve(0xc00ef22000, 0xb5bf60, 0xc010532080)
net/http/server.go:1895 +0x86c
created by net/http.(*Server).Serve
net/http/server.go:2933 +0x35c
2020-04-02 20:07:59 +02:00
startTime = time . Now ( )
2024-02-09 02:15:04 +01:00
if err := httpserver . Stop ( listenAddrs ) ; err != nil {
app/vmstorage: add missing shutdown for http server on graceful shutdown
This could result in the following panic during graceful shutdown when `/metrics` page is requested:
http: panic serving 10.101.66.5:57366: runtime error: invalid memory address or nil pointer dereference
goroutine 2050 [running]:
net/http.(*conn).serve.func1(0xc00ef22000)
net/http/server.go:1772 +0x139
panic(0xa0fc00, 0xe91d80)
runtime/panic.go:973 +0x3e3
github.com/VictoriaMetrics/VictoriaMetrics/lib/workingsetcache.(*Cache).UpdateStats(0x0, 0xc0000516c8)
github.com/VictoriaMetrics/VictoriaMetrics/lib/workingsetcache/cache.go:224 +0x37
github.com/VictoriaMetrics/VictoriaMetrics/lib/storage.(*indexDB).UpdateMetrics(0xc00b931d00, 0xc02c41acf8)
github.com/VictoriaMetrics/VictoriaMetrics/lib/storage/index_db.go:258 +0x9f
github.com/VictoriaMetrics/VictoriaMetrics/lib/storage.(*Storage).UpdateMetrics(0xc0000bc7e0, 0xc02c41ac00)
github.com/VictoriaMetrics/VictoriaMetrics/lib/storage/storage.go:413 +0x4c5
main.registerStorageMetrics.func1(0x0)
github.com/VictoriaMetrics/VictoriaMetrics/app/vmstorage/main.go:186 +0xd9
main.registerStorageMetrics.func3(0xc00008c380)
github.com/VictoriaMetrics/VictoriaMetrics/app/vmstorage/main.go:196 +0x26
main.registerStorageMetrics.func7(0xc)
github.com/VictoriaMetrics/VictoriaMetrics/app/vmstorage/main.go:211 +0x26
github.com/VictoriaMetrics/metrics.(*Gauge).marshalTo(0xc000010148, 0xaa407d, 0x20, 0xb50d60, 0xc005319890)
github.com/VictoriaMetrics/metrics@v1.11.2/gauge.go:38 +0x3f
github.com/VictoriaMetrics/metrics.(*Set).WritePrometheus(0xc000084300, 0x7fd56809c940, 0xc005319860)
github.com/VictoriaMetrics/metrics@v1.11.2/set.go:51 +0x1e1
github.com/VictoriaMetrics/metrics.WritePrometheus(0x7fd56809c940, 0xc005319860, 0xa16f01)
github.com/VictoriaMetrics/metrics@v1.11.2/metrics.go:42 +0x41
github.com/VictoriaMetrics/VictoriaMetrics/lib/httpserver.writePrometheusMetrics(0x7fd56809c940, 0xc005319860)
github.com/VictoriaMetrics/VictoriaMetrics/lib/httpserver/metrics.go:16 +0x44
github.com/VictoriaMetrics/VictoriaMetrics/lib/httpserver.handlerWrapper(0xb5a120, 0xc005319860, 0xc005018f00, 0xc00002cc90)
github.com/VictoriaMetrics/VictoriaMetrics/lib/httpserver/httpserver.go:154 +0x58d
github.com/VictoriaMetrics/VictoriaMetrics/lib/httpserver.gzipHandler.func1(0xb5a120, 0xc005319860, 0xc005018f00)
github.com/VictoriaMetrics/VictoriaMetrics/lib/httpserver/httpserver.go:119 +0x8e
net/http.HandlerFunc.ServeHTTP(0xc00002d110, 0xb5a660, 0xc0044141c0, 0xc005018f00)
net/http/server.go:2012 +0x44
net/http.serverHandler.ServeHTTP(0xc004414000, 0xb5a660, 0xc0044141c0, 0xc005018f00)
net/http/server.go:2807 +0xa3
net/http.(*conn).serve(0xc00ef22000, 0xb5bf60, 0xc010532080)
net/http/server.go:1895 +0x86c
created by net/http.(*Server).Serve
net/http/server.go:2933 +0x35c
2020-04-02 20:07:59 +02:00
logger . Fatalf ( "cannot stop http service: %s" , err )
}
logger . Infof ( "successfully shut down http service in %.3f seconds" , time . Since ( startTime ) . Seconds ( ) )
2019-05-22 23:23:23 +02:00
logger . Infof ( "gracefully shutting down the service" )
startTime = time . Now ( )
2024-01-15 15:11:39 +01:00
// deregister storage metrics
2024-07-15 10:39:05 +02:00
metrics . UnregisterSet ( storageMetrics , true )
2024-01-15 15:11:39 +01:00
storageMetrics = nil
2022-05-02 10:00:15 +02:00
stopStaleSnapshotsRemover ( )
2022-07-05 23:41:49 +02:00
vmselectSrv . MustStop ( )
vminsertSrv . MustStop ( )
2021-05-08 16:55:44 +02:00
common . StopUnmarshalWorkers ( )
2020-01-22 17:27:44 +01:00
logger . Infof ( "successfully shut down the service in %.3f seconds" , time . Since ( startTime ) . Seconds ( ) )
2019-05-22 23:16:55 +02:00
2019-05-22 23:23:23 +02:00
logger . Infof ( "gracefully closing the storage at %s" , * storageDataPath )
startTime = time . Now ( )
strg . MustClose ( )
2020-01-22 17:27:44 +01:00
logger . Infof ( "successfully closed the storage in %.3f seconds" , time . Since ( startTime ) . Seconds ( ) )
2019-05-22 23:16:55 +02:00
2019-11-12 15:29:43 +01:00
fs . MustStopDirRemover ( )
2019-05-22 23:23:23 +02:00
logger . Infof ( "the vmstorage has been stopped" )
2019-05-22 23:16:55 +02:00
}
2019-05-22 23:23:23 +02:00
func newRequestHandler ( strg * storage . Storage ) httpserver . RequestHandler {
return func ( w http . ResponseWriter , r * http . Request ) bool {
2020-12-14 13:02:57 +01:00
if r . URL . Path == "/" {
2023-02-23 03:58:44 +01:00
if r . Method != http . MethodGet {
2021-04-02 21:54:06 +02:00
return false
}
2023-06-01 10:26:52 +02:00
w . Header ( ) . Add ( "Content-Type" , "text/html; charset=utf-8" )
fmt . Fprintf ( w , ` vmstorage - a component of VictoriaMetrics cluster < br / >
2024-04-18 02:54:20 +02:00
< a href = "https://docs.victoriametrics.com/cluster-victoriametrics/" > docs < / a > < br >
2023-06-01 10:26:52 +02:00
` )
2020-10-06 14:00:38 +02:00
return true
}
2019-05-22 23:23:23 +02:00
return requestHandler ( w , r , strg )
2019-05-22 23:16:55 +02:00
}
2019-05-22 23:23:23 +02:00
}
func requestHandler ( w http . ResponseWriter , r * http . Request , strg * storage . Storage ) bool {
path := r . URL . Path
2020-09-17 11:01:53 +02:00
if path == "/internal/force_merge" {
2024-07-16 01:00:42 +02:00
if ! httpserver . CheckAuthFlag ( w , r , forceMergeAuthKey ) {
2020-09-17 13:21:39 +02:00
return true
}
2020-09-17 11:01:53 +02:00
// Run force merge in background
partitionNamePrefix := r . FormValue ( "partition_prefix" )
go func ( ) {
activeForceMerges . Inc ( )
defer activeForceMerges . Dec ( )
logger . Infof ( "forced merge for partition_prefix=%q has been started" , partitionNamePrefix )
startTime := time . Now ( )
if err := strg . ForceMergePartitions ( partitionNamePrefix ) ; err != nil {
logger . Errorf ( "error in forced merge for partition_prefix=%q: %s" , partitionNamePrefix , err )
return
}
logger . Infof ( "forced merge for partition_prefix=%q has been successfully finished in %.3f seconds" , partitionNamePrefix , time . Since ( startTime ) . Seconds ( ) )
} ( )
return true
}
2020-11-11 13:40:27 +01:00
if path == "/internal/force_flush" {
2024-07-16 01:00:42 +02:00
if ! httpserver . CheckAuthFlag ( w , r , forceFlushAuthKey ) {
2020-11-11 13:40:27 +01:00
return true
}
logger . Infof ( "flushing storage to make pending data available for reading" )
strg . DebugFlush ( )
return true
}
2019-05-22 23:16:55 +02:00
if ! strings . HasPrefix ( path , "/snapshot" ) {
return false
}
2024-07-16 01:00:42 +02:00
if ! httpserver . CheckAuthFlag ( w , r , snapshotAuthKey ) {
2019-05-22 23:16:55 +02:00
return true
}
path = path [ len ( "/snapshot" ) : ]
switch path {
case "/create" :
2023-02-27 21:12:03 +01:00
snapshotsCreateTotal . Inc ( )
2021-11-09 17:03:50 +01:00
w . Header ( ) . Set ( "Content-Type" , "application/json" )
2024-02-23 03:46:11 +01:00
snapshotPath , err := strg . CreateSnapshot ( )
2019-05-22 23:16:55 +02:00
if err != nil {
2020-06-30 21:58:18 +02:00
err = fmt . Errorf ( "cannot create snapshot: %w" , err )
2020-03-10 22:51:50 +01:00
jsonResponseError ( w , err )
2023-02-27 21:12:03 +01:00
snapshotsCreateErrorsTotal . Inc ( )
2019-05-22 23:16:55 +02:00
return true
}
2024-07-17 13:52:10 +02:00
fmt . Fprintf ( w , ` { "status":"ok","snapshot":%s} ` , stringsutil . JSONString ( snapshotPath ) )
2019-05-22 23:16:55 +02:00
return true
case "/list" :
2023-02-27 21:12:03 +01:00
snapshotsListTotal . Inc ( )
2021-11-09 17:03:50 +01:00
w . Header ( ) . Set ( "Content-Type" , "application/json" )
2019-05-22 23:23:23 +02:00
snapshots , err := strg . ListSnapshots ( )
2019-05-22 23:16:55 +02:00
if err != nil {
2020-06-30 21:58:18 +02:00
err = fmt . Errorf ( "cannot list snapshots: %w" , err )
2020-03-10 22:51:50 +01:00
jsonResponseError ( w , err )
2023-02-27 21:12:03 +01:00
snapshotsListErrorsTotal . Inc ( )
2019-05-22 23:16:55 +02:00
return true
}
fmt . Fprintf ( w , ` { "status":"ok","snapshots":[ ` )
if len ( snapshots ) > 0 {
for _ , snapshot := range snapshots [ : len ( snapshots ) - 1 ] {
fmt . Fprintf ( w , "\n%q," , snapshot )
}
fmt . Fprintf ( w , "\n%q\n" , snapshots [ len ( snapshots ) - 1 ] )
}
fmt . Fprintf ( w , ` ]} ` )
return true
case "/delete" :
2023-02-27 21:12:03 +01:00
snapshotsDeleteTotal . Inc ( )
2021-11-09 17:03:50 +01:00
w . Header ( ) . Set ( "Content-Type" , "application/json" )
2019-05-22 23:16:55 +02:00
snapshotName := r . FormValue ( "snapshot" )
2022-11-17 00:29:43 +01:00
snapshots , err := strg . ListSnapshots ( )
if err != nil {
err = fmt . Errorf ( "cannot list snapshots: %w" , err )
2020-03-10 22:51:50 +01:00
jsonResponseError ( w , err )
2023-02-27 21:12:03 +01:00
snapshotsDeleteErrorsTotal . Inc ( )
2019-05-22 23:16:55 +02:00
return true
}
2022-11-17 00:29:43 +01:00
for _ , snName := range snapshots {
if snName == snapshotName {
if err := strg . DeleteSnapshot ( snName ) ; err != nil {
err = fmt . Errorf ( "cannot delete snapshot %q: %w" , snName , err )
jsonResponseError ( w , err )
2023-02-27 21:12:03 +01:00
snapshotsDeleteErrorsTotal . Inc ( )
2022-11-17 00:29:43 +01:00
return true
}
fmt . Fprintf ( w , ` { "status":"ok"} ` )
return true
}
}
2023-02-27 21:57:22 +01:00
err = fmt . Errorf ( "cannot find snapshot %q" , snapshotName )
2022-11-17 00:29:43 +01:00
jsonResponseError ( w , err )
2019-05-22 23:16:55 +02:00
return true
case "/delete_all" :
2023-02-27 21:12:03 +01:00
snapshotsDeleteAllTotal . Inc ( )
2021-11-09 17:03:50 +01:00
w . Header ( ) . Set ( "Content-Type" , "application/json" )
2019-05-22 23:23:23 +02:00
snapshots , err := strg . ListSnapshots ( )
2019-05-22 23:16:55 +02:00
if err != nil {
2020-06-30 21:58:18 +02:00
err = fmt . Errorf ( "cannot list snapshots: %w" , err )
2020-03-10 22:51:50 +01:00
jsonResponseError ( w , err )
2023-02-27 21:12:03 +01:00
snapshotsDeleteAllErrorsTotal . Inc ( )
2019-05-22 23:16:55 +02:00
return true
}
for _ , snapshotName := range snapshots {
2019-05-22 23:23:23 +02:00
if err := strg . DeleteSnapshot ( snapshotName ) ; err != nil {
2020-06-30 21:58:18 +02:00
err = fmt . Errorf ( "cannot delete snapshot %q: %w" , snapshotName , err )
2020-03-10 22:51:50 +01:00
jsonResponseError ( w , err )
2023-02-27 21:12:03 +01:00
snapshotsDeleteAllErrorsTotal . Inc ( )
2019-05-22 23:16:55 +02:00
return true
}
}
fmt . Fprintf ( w , ` { "status":"ok"} ` )
return true
default :
return false
}
}
2022-05-02 10:00:15 +02:00
func initStaleSnapshotsRemover ( strg * storage . Storage ) {
staleSnapshotsRemoverCh = make ( chan struct { } )
2023-09-01 09:27:51 +02:00
if snapshotsMaxAge . Duration ( ) <= 0 {
2022-05-02 10:00:15 +02:00
return
}
2023-09-01 09:27:51 +02:00
snapshotsMaxAgeDur := snapshotsMaxAge . Duration ( )
2022-05-02 10:00:15 +02:00
staleSnapshotsRemoverWG . Add ( 1 )
go func ( ) {
defer staleSnapshotsRemoverWG . Done ( )
2024-01-22 17:12:37 +01:00
d := timeutil . AddJitterToDuration ( time . Second * 11 )
t := time . NewTicker ( d )
2022-05-02 10:00:15 +02:00
defer t . Stop ( )
for {
select {
case <- staleSnapshotsRemoverCh :
return
case <- t . C :
}
2022-05-05 18:43:21 +02:00
if err := strg . DeleteStaleSnapshots ( snapshotsMaxAgeDur ) ; err != nil {
2022-05-02 10:00:15 +02:00
// Use logger.Errorf instead of logger.Fatalf in the hope the error is temporary.
logger . Errorf ( "cannot delete stale snapshots: %s" , err )
}
}
} ( )
}
func stopStaleSnapshotsRemover ( ) {
close ( staleSnapshotsRemoverCh )
staleSnapshotsRemoverWG . Wait ( )
}
var (
staleSnapshotsRemoverCh chan struct { }
staleSnapshotsRemoverWG sync . WaitGroup
)
2023-02-27 21:12:03 +01:00
var (
2023-02-27 21:57:22 +01:00
activeForceMerges = metrics . NewCounter ( "vm_active_force_merges" )
2023-02-27 21:12:03 +01:00
snapshotsCreateTotal = metrics . NewCounter ( ` vm_http_requests_total { path="/snapshot/create"} ` )
snapshotsCreateErrorsTotal = metrics . NewCounter ( ` vm_http_request_errors_total { path="/snapshot/create"} ` )
snapshotsListTotal = metrics . NewCounter ( ` vm_http_requests_total { path="/snapshot/list"} ` )
snapshotsListErrorsTotal = metrics . NewCounter ( ` vm_http_request_errors_total { path="/snapshot/list"} ` )
snapshotsDeleteTotal = metrics . NewCounter ( ` vm_http_requests_total { path="/snapshot/delete"} ` )
snapshotsDeleteErrorsTotal = metrics . NewCounter ( ` vm_http_request_errors_total { path="/snapshot/delete"} ` )
snapshotsDeleteAllTotal = metrics . NewCounter ( ` vm_http_requests_total { path="/snapshot/delete_all"} ` )
snapshotsDeleteAllErrorsTotal = metrics . NewCounter ( ` vm_http_request_errors_total { path="/snapshot/delete_all"} ` )
)
2020-09-17 11:01:53 +02:00
2024-01-15 23:19:56 +01:00
func writeStorageMetrics ( w io . Writer , strg * storage . Storage ) {
var m storage . Metrics
strg . UpdateMetrics ( & m )
tm := & m . TableMetrics
idbm := & m . IndexDBMetrics
2024-01-15 15:11:39 +01:00
2024-01-15 23:19:56 +01:00
metrics . WriteGaugeUint64 ( w , fmt . Sprintf ( ` vm_free_disk_space_bytes { path=%q} ` , * storageDataPath ) , fs . MustGetFreeSpace ( * storageDataPath ) )
metrics . WriteGaugeUint64 ( w , fmt . Sprintf ( ` vm_free_disk_space_limit_bytes { path=%q} ` , * storageDataPath ) , uint64 ( minFreeDiskSpaceBytes . N ) )
2019-05-22 23:16:55 +02:00
2024-01-15 23:19:56 +01:00
isReadOnly := 0
if strg . IsReadOnly ( ) {
isReadOnly = 1
2019-05-22 23:16:55 +02:00
}
2024-01-15 23:19:56 +01:00
metrics . WriteGaugeUint64 ( w , fmt . Sprintf ( ` vm_storage_is_read_only { path=%q} ` , * storageDataPath ) , uint64 ( isReadOnly ) )
metrics . WriteGaugeUint64 ( w , ` vm_active_merges { type="storage/inmemory"} ` , tm . ActiveInmemoryMerges )
metrics . WriteGaugeUint64 ( w , ` vm_active_merges { type="storage/small"} ` , tm . ActiveSmallMerges )
metrics . WriteGaugeUint64 ( w , ` vm_active_merges { type="storage/big"} ` , tm . ActiveBigMerges )
metrics . WriteGaugeUint64 ( w , ` vm_active_merges { type="indexdb/inmemory"} ` , idbm . ActiveInmemoryMerges )
metrics . WriteGaugeUint64 ( w , ` vm_active_merges { type="indexdb/file"} ` , idbm . ActiveFileMerges )
metrics . WriteCounterUint64 ( w , ` vm_merges_total { type="storage/inmemory"} ` , tm . InmemoryMergesCount )
metrics . WriteCounterUint64 ( w , ` vm_merges_total { type="storage/small"} ` , tm . SmallMergesCount )
metrics . WriteCounterUint64 ( w , ` vm_merges_total { type="storage/big"} ` , tm . BigMergesCount )
metrics . WriteCounterUint64 ( w , ` vm_merges_total { type="indexdb/inmemory"} ` , idbm . InmemoryMergesCount )
metrics . WriteCounterUint64 ( w , ` vm_merges_total { type="indexdb/file"} ` , idbm . FileMergesCount )
metrics . WriteCounterUint64 ( w , ` vm_rows_merged_total { type="storage/inmemory"} ` , tm . InmemoryRowsMerged )
metrics . WriteCounterUint64 ( w , ` vm_rows_merged_total { type="storage/small"} ` , tm . SmallRowsMerged )
metrics . WriteCounterUint64 ( w , ` vm_rows_merged_total { type="storage/big"} ` , tm . BigRowsMerged )
metrics . WriteCounterUint64 ( w , ` vm_rows_merged_total { type="indexdb/inmemory"} ` , idbm . InmemoryItemsMerged )
metrics . WriteCounterUint64 ( w , ` vm_rows_merged_total { type="indexdb/file"} ` , idbm . FileItemsMerged )
metrics . WriteCounterUint64 ( w , ` vm_rows_deleted_total { type="storage/inmemory"} ` , tm . InmemoryRowsDeleted )
metrics . WriteCounterUint64 ( w , ` vm_rows_deleted_total { type="storage/small"} ` , tm . SmallRowsDeleted )
metrics . WriteCounterUint64 ( w , ` vm_rows_deleted_total { type="storage/big"} ` , tm . BigRowsDeleted )
metrics . WriteGaugeUint64 ( w , ` vm_part_references { type="storage/inmemory"} ` , tm . InmemoryPartsRefCount )
metrics . WriteGaugeUint64 ( w , ` vm_part_references { type="storage/small"} ` , tm . SmallPartsRefCount )
metrics . WriteGaugeUint64 ( w , ` vm_part_references { type="storage/big"} ` , tm . BigPartsRefCount )
metrics . WriteGaugeUint64 ( w , ` vm_partition_references { type="storage"} ` , tm . PartitionsRefCount )
metrics . WriteGaugeUint64 ( w , ` vm_object_references { type="indexdb"} ` , idbm . IndexDBRefCount )
metrics . WriteGaugeUint64 ( w , ` vm_part_references { type="indexdb"} ` , idbm . PartsRefCount )
metrics . WriteCounterUint64 ( w , ` vm_missing_tsids_for_metric_id_total ` , idbm . MissingTSIDsForMetricID )
metrics . WriteCounterUint64 ( w , ` vm_index_blocks_with_metric_ids_processed_total ` , idbm . IndexBlocksWithMetricIDsProcessed )
metrics . WriteCounterUint64 ( w , ` vm_index_blocks_with_metric_ids_incorrect_order_total ` , idbm . IndexBlocksWithMetricIDsIncorrectOrder )
metrics . WriteGaugeUint64 ( w , ` vm_composite_index_min_timestamp ` , idbm . MinTimestampForCompositeIndex / 1e3 )
metrics . WriteCounterUint64 ( w , ` vm_composite_filter_success_conversions_total ` , idbm . CompositeFilterSuccessConversions )
metrics . WriteCounterUint64 ( w , ` vm_composite_filter_missing_conversions_total ` , idbm . CompositeFilterMissingConversions )
lib/{mergeset,storage}: make background merge more responsive and scalable
- Maintain a separate worker pool per each part type (in-memory, file, big and small).
Previously a shared pool was used for merging all the part types.
A single merge worker could merge parts with mixed types at once. For example,
it could merge simultaneously an in-memory part plus a big file part.
Such a merge could take hours for big file part. During the duration of this merge
the in-memory part was pinned in memory and couldn't be persisted to disk
under the configured -inmemoryDataFlushInterval .
Another common issue, which could happen when parts with mixed types are merged,
is uncontrolled growth of in-memory parts or small parts when all the merge workers
were busy with merging big files. Such growth could lead to significant performance
degradataion for queries, since every query needs to check ever growing list of parts.
This could also slow down the registration of new time series, since VictoriaMetrics
searches for the internal series_id in the indexdb for every new time series.
The third issue is graceful shutdown duration, which could be very long when a background
merge is running on in-memory parts plus big file parts. This merge couldn't be interrupted,
since it merges in-memory parts.
A separate pool of merge workers per every part type elegantly resolves both issues:
- In-memory parts are merged to file-based parts in a timely manner, since the maximum
size of in-memory parts is limited.
- Long-running merges for big parts do not block merges for in-memory parts and small parts.
- Graceful shutdown duration is now limited by the time needed for flushing in-memory parts to files.
Merging for file parts is instantly canceled on graceful shutdown now.
- Deprecate -smallMergeConcurrency command-line flag, since the new background merge algorithm
should automatically self-tune according to the number of available CPU cores.
- Deprecate -finalMergeDelay command-line flag, since it wasn't working correctly.
It is better to run forced merge when needed - https://docs.victoriametrics.com/#forced-merge
- Tune the number of shards for pending rows and items before the data goes to in-memory parts
and becomes visible for search. This improves the maximum data ingestion rate and the maximum rate
for registration of new time series. This should reduce the duration of data ingestion slowdown
in VictoriaMetrics cluster on e.g. re-routing events, when some of vmstorage nodes become temporarily
unavailable.
- Prevent from possible "sync: WaitGroup misuse" panic on graceful shutdown.
This is a follow-up for fa566c68a6ccf7385a05f649aee7e5f5a38afb15 .
Thanks @misutoth to for the inspiration at https://github.com/VictoriaMetrics/VictoriaMetrics/pull/5212
Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5190
Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3790
Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3551
Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3337
Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3425
Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3647
Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3641
Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/648
Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/291
2024-01-26 21:39:49 +01:00
// vm_assisted_merges_total name is used for backwards compatibility.
metrics . WriteCounterUint64 ( w , ` vm_assisted_merges_total { type="indexdb/inmemory"} ` , idbm . InmemoryPartsLimitReachedCount )
2024-01-15 23:19:56 +01:00
metrics . WriteCounterUint64 ( w , ` vm_indexdb_items_added_total ` , idbm . ItemsAdded )
metrics . WriteCounterUint64 ( w , ` vm_indexdb_items_added_size_bytes_total ` , idbm . ItemsAddedSizeBytes )
2024-05-24 14:55:20 +02:00
metrics . WriteCounterUint64 ( w , ` vm_indexdb_items_dropped_total { reason="too_long_item"} ` , idbm . TooLongItemsDroppedTotal )
2024-01-15 23:19:56 +01:00
metrics . WriteGaugeUint64 ( w , ` vm_pending_rows { type="storage"} ` , tm . PendingRows )
metrics . WriteGaugeUint64 ( w , ` vm_pending_rows { type="indexdb"} ` , idbm . PendingItems )
metrics . WriteGaugeUint64 ( w , ` vm_parts { type="storage/inmemory"} ` , tm . InmemoryPartsCount )
metrics . WriteGaugeUint64 ( w , ` vm_parts { type="storage/small"} ` , tm . SmallPartsCount )
metrics . WriteGaugeUint64 ( w , ` vm_parts { type="storage/big"} ` , tm . BigPartsCount )
metrics . WriteGaugeUint64 ( w , ` vm_parts { type="indexdb/inmemory"} ` , idbm . InmemoryPartsCount )
metrics . WriteGaugeUint64 ( w , ` vm_parts { type="indexdb/file"} ` , idbm . FilePartsCount )
2024-02-15 13:51:19 +01:00
metrics . WriteGaugeUint64 ( w , ` vm_last_partition_parts { type="storage/inmemory"} ` , tm . LastPartition . InmemoryPartsCount )
metrics . WriteGaugeUint64 ( w , ` vm_last_partition_parts { type="storage/small"} ` , tm . LastPartition . SmallPartsCount )
metrics . WriteGaugeUint64 ( w , ` vm_last_partition_parts { type="storage/big"} ` , tm . LastPartition . BigPartsCount )
2024-01-15 23:19:56 +01:00
metrics . WriteGaugeUint64 ( w , ` vm_blocks { type="storage/inmemory"} ` , tm . InmemoryBlocksCount )
metrics . WriteGaugeUint64 ( w , ` vm_blocks { type="storage/small"} ` , tm . SmallBlocksCount )
metrics . WriteGaugeUint64 ( w , ` vm_blocks { type="storage/big"} ` , tm . BigBlocksCount )
metrics . WriteGaugeUint64 ( w , ` vm_blocks { type="indexdb/inmemory"} ` , idbm . InmemoryBlocksCount )
metrics . WriteGaugeUint64 ( w , ` vm_blocks { type="indexdb/file"} ` , idbm . FileBlocksCount )
metrics . WriteGaugeUint64 ( w , ` vm_data_size_bytes { type="storage/inmemory"} ` , tm . InmemorySizeBytes )
metrics . WriteGaugeUint64 ( w , ` vm_data_size_bytes { type="storage/small"} ` , tm . SmallSizeBytes )
metrics . WriteGaugeUint64 ( w , ` vm_data_size_bytes { type="storage/big"} ` , tm . BigSizeBytes )
metrics . WriteGaugeUint64 ( w , ` vm_data_size_bytes { type="indexdb/inmemory"} ` , idbm . InmemorySizeBytes )
metrics . WriteGaugeUint64 ( w , ` vm_data_size_bytes { type="indexdb/file"} ` , idbm . FileSizeBytes )
2024-09-06 17:57:21 +02:00
metrics . WriteCounterUint64 ( w , ` vm_rows_received_by_storage_total ` , m . RowsReceivedTotal )
2024-01-15 23:19:56 +01:00
metrics . WriteCounterUint64 ( w , ` vm_rows_added_to_storage_total ` , m . RowsAddedTotal )
metrics . WriteCounterUint64 ( w , ` vm_deduplicated_samples_total { type="merge"} ` , m . DedupsDuringMerge )
2024-02-22 17:32:53 +01:00
metrics . WriteGaugeUint64 ( w , ` vm_snapshots ` , m . SnapshotsCount )
2024-01-15 23:19:56 +01:00
metrics . WriteCounterUint64 ( w , ` vm_rows_ignored_total { reason="big_timestamp"} ` , m . TooBigTimestampRows )
metrics . WriteCounterUint64 ( w , ` vm_rows_ignored_total { reason="small_timestamp"} ` , m . TooSmallTimestampRows )
2024-09-06 17:57:21 +02:00
metrics . WriteCounterUint64 ( w , ` vm_rows_ignored_total { reason="invalid_raw_metric_name"} ` , m . InvalidRawMetricNames )
if * maxHourlySeries > 0 {
metrics . WriteCounterUint64 ( w , ` vm_rows_ignored_total { reason="hourly_limit_exceeded"} ` , m . HourlySeriesLimitRowsDropped )
}
if * maxDailySeries > 0 {
metrics . WriteCounterUint64 ( w , ` vm_rows_ignored_total { reason="daily_limit_exceeded"} ` , m . DailySeriesLimitRowsDropped )
}
2024-01-15 23:19:56 +01:00
metrics . WriteCounterUint64 ( w , ` vm_timeseries_repopulated_total ` , m . TimeseriesRepopulated )
metrics . WriteCounterUint64 ( w , ` vm_timeseries_precreated_total ` , m . TimeseriesPreCreated )
metrics . WriteCounterUint64 ( w , ` vm_new_timeseries_created_total ` , m . NewTimeseriesCreated )
metrics . WriteCounterUint64 ( w , ` vm_slow_row_inserts_total ` , m . SlowRowInserts )
metrics . WriteCounterUint64 ( w , ` vm_slow_per_day_index_inserts_total ` , m . SlowPerDayIndexInserts )
metrics . WriteCounterUint64 ( w , ` vm_slow_metric_name_loads_total ` , m . SlowMetricNameLoads )
2020-05-15 12:44:23 +02:00
2022-08-24 12:41:53 +02:00
if * maxHourlySeries > 0 {
2024-01-15 23:19:56 +01:00
metrics . WriteGaugeUint64 ( w , ` vm_hourly_series_limit_current_series ` , m . HourlySeriesLimitCurrentSeries )
metrics . WriteGaugeUint64 ( w , ` vm_hourly_series_limit_max_series ` , m . HourlySeriesLimitMaxSeries )
metrics . WriteCounterUint64 ( w , ` vm_hourly_series_limit_rows_dropped_total ` , m . HourlySeriesLimitRowsDropped )
2022-08-24 12:41:53 +02:00
}
if * maxDailySeries > 0 {
2024-01-15 23:19:56 +01:00
metrics . WriteGaugeUint64 ( w , ` vm_daily_series_limit_current_series ` , m . DailySeriesLimitCurrentSeries )
metrics . WriteGaugeUint64 ( w , ` vm_daily_series_limit_max_series ` , m . DailySeriesLimitMaxSeries )
metrics . WriteCounterUint64 ( w , ` vm_daily_series_limit_rows_dropped_total ` , m . DailySeriesLimitRowsDropped )
2022-08-24 12:41:53 +02:00
}
2021-05-20 13:15:19 +02:00
2024-01-15 23:19:56 +01:00
metrics . WriteCounterUint64 ( w , ` vm_timestamps_blocks_merged_total ` , m . TimestampsBlocksMerged )
metrics . WriteCounterUint64 ( w , ` vm_timestamps_bytes_saved_total ` , m . TimestampsBytesSaved )
metrics . WriteGaugeUint64 ( w , ` vm_rows { type="storage/inmemory"} ` , tm . InmemoryRowsCount )
metrics . WriteGaugeUint64 ( w , ` vm_rows { type="storage/small"} ` , tm . SmallRowsCount )
metrics . WriteGaugeUint64 ( w , ` vm_rows { type="storage/big"} ` , tm . BigRowsCount )
metrics . WriteGaugeUint64 ( w , ` vm_rows { type="indexdb/inmemory"} ` , idbm . InmemoryItemsCount )
metrics . WriteGaugeUint64 ( w , ` vm_rows { type="indexdb/file"} ` , idbm . FileItemsCount )
metrics . WriteCounterUint64 ( w , ` vm_date_range_search_calls_total ` , idbm . DateRangeSearchCalls )
metrics . WriteCounterUint64 ( w , ` vm_date_range_hits_total ` , idbm . DateRangeSearchHits )
metrics . WriteCounterUint64 ( w , ` vm_global_search_calls_total ` , idbm . GlobalSearchCalls )
metrics . WriteCounterUint64 ( w , ` vm_missing_metric_names_for_metric_id_total ` , idbm . MissingMetricNamesForMetricID )
metrics . WriteCounterUint64 ( w , ` vm_date_metric_id_cache_syncs_total ` , m . DateMetricIDCacheSyncsCount )
metrics . WriteCounterUint64 ( w , ` vm_date_metric_id_cache_resets_total ` , m . DateMetricIDCacheResetsCount )
metrics . WriteGaugeUint64 ( w , ` vm_cache_entries { type="storage/tsid"} ` , m . TSIDCacheSize )
metrics . WriteGaugeUint64 ( w , ` vm_cache_entries { type="storage/metricIDs"} ` , m . MetricIDCacheSize )
metrics . WriteGaugeUint64 ( w , ` vm_cache_entries { type="storage/metricName"} ` , m . MetricNameCacheSize )
metrics . WriteGaugeUint64 ( w , ` vm_cache_entries { type="storage/date_metricID"} ` , m . DateMetricIDCacheSize )
metrics . WriteGaugeUint64 ( w , ` vm_cache_entries { type="storage/hour_metric_ids"} ` , m . HourMetricIDCacheSize )
metrics . WriteGaugeUint64 ( w , ` vm_cache_entries { type="storage/next_day_metric_ids"} ` , m . NextDayMetricIDCacheSize )
metrics . WriteGaugeUint64 ( w , ` vm_cache_entries { type="storage/indexBlocks"} ` , tm . IndexBlocksCacheSize )
metrics . WriteGaugeUint64 ( w , ` vm_cache_entries { type="indexdb/dataBlocks"} ` , idbm . DataBlocksCacheSize )
metrics . WriteGaugeUint64 ( w , ` vm_cache_entries { type="indexdb/indexBlocks"} ` , idbm . IndexBlocksCacheSize )
metrics . WriteGaugeUint64 ( w , ` vm_cache_entries { type="indexdb/tagFiltersToMetricIDs"} ` , idbm . TagFiltersToMetricIDsCacheSize )
metrics . WriteGaugeUint64 ( w , ` vm_cache_entries { type="storage/regexps"} ` , uint64 ( storage . RegexpCacheSize ( ) ) )
metrics . WriteGaugeUint64 ( w , ` vm_cache_entries { type="storage/regexpPrefixes"} ` , uint64 ( storage . RegexpPrefixesCacheSize ( ) ) )
metrics . WriteGaugeUint64 ( w , ` vm_cache_entries { type="storage/prefetchedMetricIDs"} ` , m . PrefetchedMetricIDsSize )
metrics . WriteGaugeUint64 ( w , ` vm_cache_size_bytes { type="storage/tsid"} ` , m . TSIDCacheSizeBytes )
metrics . WriteGaugeUint64 ( w , ` vm_cache_size_bytes { type="storage/metricIDs"} ` , m . MetricIDCacheSizeBytes )
metrics . WriteGaugeUint64 ( w , ` vm_cache_size_bytes { type="storage/metricName"} ` , m . MetricNameCacheSizeBytes )
metrics . WriteGaugeUint64 ( w , ` vm_cache_size_bytes { type="storage/indexBlocks"} ` , tm . IndexBlocksCacheSizeBytes )
metrics . WriteGaugeUint64 ( w , ` vm_cache_size_bytes { type="indexdb/dataBlocks"} ` , idbm . DataBlocksCacheSizeBytes )
metrics . WriteGaugeUint64 ( w , ` vm_cache_size_bytes { type="indexdb/indexBlocks"} ` , idbm . IndexBlocksCacheSizeBytes )
metrics . WriteGaugeUint64 ( w , ` vm_cache_size_bytes { type="storage/date_metricID"} ` , m . DateMetricIDCacheSizeBytes )
metrics . WriteGaugeUint64 ( w , ` vm_cache_size_bytes { type="storage/hour_metric_ids"} ` , m . HourMetricIDCacheSizeBytes )
metrics . WriteGaugeUint64 ( w , ` vm_cache_size_bytes { type="storage/next_day_metric_ids"} ` , m . NextDayMetricIDCacheSizeBytes )
metrics . WriteGaugeUint64 ( w , ` vm_cache_size_bytes { type="indexdb/tagFiltersToMetricIDs"} ` , idbm . TagFiltersToMetricIDsCacheSizeBytes )
metrics . WriteGaugeUint64 ( w , ` vm_cache_size_bytes { type="storage/regexps"} ` , uint64 ( storage . RegexpCacheSizeBytes ( ) ) )
metrics . WriteGaugeUint64 ( w , ` vm_cache_size_bytes { type="storage/regexpPrefixes"} ` , uint64 ( storage . RegexpPrefixesCacheSizeBytes ( ) ) )
metrics . WriteGaugeUint64 ( w , ` vm_cache_size_bytes { type="storage/prefetchedMetricIDs"} ` , m . PrefetchedMetricIDsSizeBytes )
metrics . WriteGaugeUint64 ( w , ` vm_cache_size_max_bytes { type="storage/tsid"} ` , m . TSIDCacheSizeMaxBytes )
metrics . WriteGaugeUint64 ( w , ` vm_cache_size_max_bytes { type="storage/metricIDs"} ` , m . MetricIDCacheSizeMaxBytes )
metrics . WriteGaugeUint64 ( w , ` vm_cache_size_max_bytes { type="storage/metricName"} ` , m . MetricNameCacheSizeMaxBytes )
metrics . WriteGaugeUint64 ( w , ` vm_cache_size_max_bytes { type="storage/indexBlocks"} ` , tm . IndexBlocksCacheSizeMaxBytes )
metrics . WriteGaugeUint64 ( w , ` vm_cache_size_max_bytes { type="indexdb/dataBlocks"} ` , idbm . DataBlocksCacheSizeMaxBytes )
metrics . WriteGaugeUint64 ( w , ` vm_cache_size_max_bytes { type="indexdb/indexBlocks"} ` , idbm . IndexBlocksCacheSizeMaxBytes )
metrics . WriteGaugeUint64 ( w , ` vm_cache_size_max_bytes { type="indexdb/tagFiltersToMetricIDs"} ` , idbm . TagFiltersToMetricIDsCacheSizeMaxBytes )
metrics . WriteGaugeUint64 ( w , ` vm_cache_size_max_bytes { type="storage/regexps"} ` , uint64 ( storage . RegexpCacheMaxSizeBytes ( ) ) )
metrics . WriteGaugeUint64 ( w , ` vm_cache_size_max_bytes { type="storage/regexpPrefixes"} ` , uint64 ( storage . RegexpPrefixesCacheMaxSizeBytes ( ) ) )
metrics . WriteCounterUint64 ( w , ` vm_cache_requests_total { type="storage/tsid"} ` , m . TSIDCacheRequests )
metrics . WriteCounterUint64 ( w , ` vm_cache_requests_total { type="storage/metricIDs"} ` , m . MetricIDCacheRequests )
metrics . WriteCounterUint64 ( w , ` vm_cache_requests_total { type="storage/metricName"} ` , m . MetricNameCacheRequests )
metrics . WriteCounterUint64 ( w , ` vm_cache_requests_total { type="storage/indexBlocks"} ` , tm . IndexBlocksCacheRequests )
metrics . WriteCounterUint64 ( w , ` vm_cache_requests_total { type="indexdb/dataBlocks"} ` , idbm . DataBlocksCacheRequests )
metrics . WriteCounterUint64 ( w , ` vm_cache_requests_total { type="indexdb/indexBlocks"} ` , idbm . IndexBlocksCacheRequests )
metrics . WriteCounterUint64 ( w , ` vm_cache_requests_total { type="indexdb/tagFiltersToMetricIDs"} ` , idbm . TagFiltersToMetricIDsCacheRequests )
metrics . WriteCounterUint64 ( w , ` vm_cache_requests_total { type="storage/regexps"} ` , storage . RegexpCacheRequests ( ) )
metrics . WriteCounterUint64 ( w , ` vm_cache_requests_total { type="storage/regexpPrefixes"} ` , storage . RegexpPrefixesCacheRequests ( ) )
metrics . WriteCounterUint64 ( w , ` vm_cache_misses_total { type="storage/tsid"} ` , m . TSIDCacheMisses )
metrics . WriteCounterUint64 ( w , ` vm_cache_misses_total { type="storage/metricIDs"} ` , m . MetricIDCacheMisses )
metrics . WriteCounterUint64 ( w , ` vm_cache_misses_total { type="storage/metricName"} ` , m . MetricNameCacheMisses )
metrics . WriteCounterUint64 ( w , ` vm_cache_misses_total { type="storage/indexBlocks"} ` , tm . IndexBlocksCacheMisses )
metrics . WriteCounterUint64 ( w , ` vm_cache_misses_total { type="indexdb/dataBlocks"} ` , idbm . DataBlocksCacheMisses )
metrics . WriteCounterUint64 ( w , ` vm_cache_misses_total { type="indexdb/indexBlocks"} ` , idbm . IndexBlocksCacheMisses )
metrics . WriteCounterUint64 ( w , ` vm_cache_misses_total { type="indexdb/tagFiltersToMetricIDs"} ` , idbm . TagFiltersToMetricIDsCacheMisses )
metrics . WriteCounterUint64 ( w , ` vm_cache_misses_total { type="storage/regexps"} ` , storage . RegexpCacheMisses ( ) )
metrics . WriteCounterUint64 ( w , ` vm_cache_misses_total { type="storage/regexpPrefixes"} ` , storage . RegexpPrefixesCacheMisses ( ) )
metrics . WriteCounterUint64 ( w , ` vm_deleted_metrics_total { type="indexdb"} ` , idbm . DeletedMetricsCount )
metrics . WriteCounterUint64 ( w , ` vm_cache_collisions_total { type="storage/tsid"} ` , m . TSIDCacheCollisions )
metrics . WriteCounterUint64 ( w , ` vm_cache_collisions_total { type="storage/metricName"} ` , m . MetricNameCacheCollisions )
metrics . WriteGaugeUint64 ( w , ` vm_next_retention_seconds ` , m . NextRetentionSeconds )
2022-06-15 17:37:52 +02:00
metrics . WriteGaugeUint64 ( w , ` vm_downsampling_partitions_scheduled ` , tm . ScheduledDownsamplingPartitions )
metrics . WriteGaugeUint64 ( w , ` vm_downsampling_partitions_scheduled_size_bytes ` , tm . ScheduledDownsamplingPartitionsSize )
2019-05-22 23:16:55 +02:00
}
2020-03-10 22:51:50 +01:00
func jsonResponseError ( w http . ResponseWriter , err error ) {
logger . Errorf ( "%s" , err )
w . WriteHeader ( http . StatusInternalServerError )
2024-07-17 13:52:10 +02:00
errStr := err . Error ( )
fmt . Fprintf ( w , ` { "status":"error","msg":%s} ` , stringsutil . JSONString ( errStr ) )
2020-03-10 22:51:50 +01:00
}
2020-12-03 20:40:30 +01:00
func usage ( ) {
const s = `
vmstorage stores time series data obtained from vminsert and returns the requested data to vmselect .
2024-04-18 02:54:20 +02:00
See the docs at https : //docs.victoriametrics.com/cluster-victoriametrics/ .
2020-12-03 20:40:30 +01:00
`
flagutil . Usage ( s )
}