mirror of
https://github.com/VictoriaMetrics/VictoriaMetrics.git
synced 2024-11-23 12:31:07 +01:00
app/vmstorage: add vm_slow_row_inserts_total
and vm_slow_per_day_index_inserts_total
metrics for determining whether VictoriaMetrics required more RAM for the current number of active time series
This commit is contained in:
parent
ab8f5545bc
commit
82ccdfaa91
@ -910,6 +910,9 @@ The most interesting metrics are:
|
|||||||
* `sum(rate(vm_rows_inserted_total[5m]))` - ingestion rate, i.e. how many samples are inserted int the database per second.
|
* `sum(rate(vm_rows_inserted_total[5m]))` - ingestion rate, i.e. how many samples are inserted int the database per second.
|
||||||
* `vm_free_disk_space_bytes` - free space left at `-storageDataPath`.
|
* `vm_free_disk_space_bytes` - free space left at `-storageDataPath`.
|
||||||
* `sum(vm_data_size_bytes)` - the total size of data on disk.
|
* `sum(vm_data_size_bytes)` - the total size of data on disk.
|
||||||
|
* `increase(vm_slow_row_inserts_total[5m])` - the number of slow inserts during the last 5 minutes.
|
||||||
|
If this value remains high during extended periods of time, then it is likely more RAM is needed for optimal handling
|
||||||
|
for the current number of active time series.
|
||||||
|
|
||||||
|
|
||||||
### Troubleshooting
|
### Troubleshooting
|
||||||
@ -922,8 +925,9 @@ The most interesting metrics are:
|
|||||||
|
|
||||||
* If VictoriaMetrics works slowly and eats more than a CPU core per 100K ingested data points per second,
|
* If VictoriaMetrics works slowly and eats more than a CPU core per 100K ingested data points per second,
|
||||||
then it is likely you have too many active time series for the current amount of RAM.
|
then it is likely you have too many active time series for the current amount of RAM.
|
||||||
|
See `vm_slow_row_inserts_total` and `vm_slow_per_day_index_inserts_total` [metrics](#monitoring).
|
||||||
It is recommended increasing the amount of RAM on the node with VictoriaMetrics in order to improve
|
It is recommended increasing the amount of RAM on the node with VictoriaMetrics in order to improve
|
||||||
ingestion performance.
|
ingestion performance in this case.
|
||||||
Another option is to increase `-memory.allowedPercent` command-line flag value. Be careful with this
|
Another option is to increase `-memory.allowedPercent` command-line flag value. Be careful with this
|
||||||
option, since too big value for `-memory.allowedPercent` may result in high I/O usage.
|
option, since too big value for `-memory.allowedPercent` may result in high I/O usage.
|
||||||
|
|
||||||
|
@ -409,6 +409,13 @@ func registerStorageMetrics() {
|
|||||||
return float64(m().AddRowsConcurrencyCurrent)
|
return float64(m().AddRowsConcurrencyCurrent)
|
||||||
})
|
})
|
||||||
|
|
||||||
|
metrics.NewGauge(`vm_slow_row_inserts_total`, func() float64 {
|
||||||
|
return float64(m().SlowRowInserts)
|
||||||
|
})
|
||||||
|
metrics.NewGauge(`vm_slow_per_day_index_inserts_total`, func() float64 {
|
||||||
|
return float64(m().SlowPerDayIndexInserts)
|
||||||
|
})
|
||||||
|
|
||||||
metrics.NewGauge(`vm_rows{type="storage/big"}`, func() float64 {
|
metrics.NewGauge(`vm_rows{type="storage/big"}`, func() float64 {
|
||||||
return float64(tm().BigRowsCount)
|
return float64(tm().BigRowsCount)
|
||||||
})
|
})
|
||||||
|
@ -910,6 +910,9 @@ The most interesting metrics are:
|
|||||||
* `sum(rate(vm_rows_inserted_total[5m]))` - ingestion rate, i.e. how many samples are inserted int the database per second.
|
* `sum(rate(vm_rows_inserted_total[5m]))` - ingestion rate, i.e. how many samples are inserted int the database per second.
|
||||||
* `vm_free_disk_space_bytes` - free space left at `-storageDataPath`.
|
* `vm_free_disk_space_bytes` - free space left at `-storageDataPath`.
|
||||||
* `sum(vm_data_size_bytes)` - the total size of data on disk.
|
* `sum(vm_data_size_bytes)` - the total size of data on disk.
|
||||||
|
* `increase(vm_slow_row_inserts_total[5m])` - the number of slow inserts during the last 5 minutes.
|
||||||
|
If this value remains high during extended periods of time, then it is likely more RAM is needed for optimal handling
|
||||||
|
for the current number of active time series.
|
||||||
|
|
||||||
|
|
||||||
### Troubleshooting
|
### Troubleshooting
|
||||||
@ -922,8 +925,9 @@ The most interesting metrics are:
|
|||||||
|
|
||||||
* If VictoriaMetrics works slowly and eats more than a CPU core per 100K ingested data points per second,
|
* If VictoriaMetrics works slowly and eats more than a CPU core per 100K ingested data points per second,
|
||||||
then it is likely you have too many active time series for the current amount of RAM.
|
then it is likely you have too many active time series for the current amount of RAM.
|
||||||
|
See `vm_slow_row_inserts_total` and `vm_slow_per_day_index_inserts_total` [metrics](#monitoring).
|
||||||
It is recommended increasing the amount of RAM on the node with VictoriaMetrics in order to improve
|
It is recommended increasing the amount of RAM on the node with VictoriaMetrics in order to improve
|
||||||
ingestion performance.
|
ingestion performance in this case.
|
||||||
Another option is to increase `-memory.allowedPercent` command-line flag value. Be careful with this
|
Another option is to increase `-memory.allowedPercent` command-line flag value. Be careful with this
|
||||||
option, since too big value for `-memory.allowedPercent` may result in high I/O usage.
|
option, since too big value for `-memory.allowedPercent` may result in high I/O usage.
|
||||||
|
|
||||||
|
@ -39,6 +39,9 @@ type Storage struct {
|
|||||||
addRowsConcurrencyLimitTimeout uint64
|
addRowsConcurrencyLimitTimeout uint64
|
||||||
addRowsConcurrencyDroppedRows uint64
|
addRowsConcurrencyDroppedRows uint64
|
||||||
|
|
||||||
|
slowRowInserts uint64
|
||||||
|
slowPerDayIndexInserts uint64
|
||||||
|
|
||||||
path string
|
path string
|
||||||
cachePath string
|
cachePath string
|
||||||
retentionMonths int
|
retentionMonths int
|
||||||
@ -323,6 +326,9 @@ type Metrics struct {
|
|||||||
AddRowsConcurrencyCapacity uint64
|
AddRowsConcurrencyCapacity uint64
|
||||||
AddRowsConcurrencyCurrent uint64
|
AddRowsConcurrencyCurrent uint64
|
||||||
|
|
||||||
|
SlowRowInserts uint64
|
||||||
|
SlowPerDayIndexInserts uint64
|
||||||
|
|
||||||
TSIDCacheSize uint64
|
TSIDCacheSize uint64
|
||||||
TSIDCacheSizeBytes uint64
|
TSIDCacheSizeBytes uint64
|
||||||
TSIDCacheRequests uint64
|
TSIDCacheRequests uint64
|
||||||
@ -377,6 +383,9 @@ func (s *Storage) UpdateMetrics(m *Metrics) {
|
|||||||
m.AddRowsConcurrencyCapacity = uint64(cap(addRowsConcurrencyCh))
|
m.AddRowsConcurrencyCapacity = uint64(cap(addRowsConcurrencyCh))
|
||||||
m.AddRowsConcurrencyCurrent = uint64(len(addRowsConcurrencyCh))
|
m.AddRowsConcurrencyCurrent = uint64(len(addRowsConcurrencyCh))
|
||||||
|
|
||||||
|
m.SlowRowInserts += atomic.LoadUint64(&s.slowRowInserts)
|
||||||
|
m.SlowPerDayIndexInserts += atomic.LoadUint64(&s.slowPerDayIndexInserts)
|
||||||
|
|
||||||
var cs fastcache.Stats
|
var cs fastcache.Stats
|
||||||
s.tsidCache.UpdateStats(&cs)
|
s.tsidCache.UpdateStats(&cs)
|
||||||
m.TSIDCacheSize += cs.EntriesCount
|
m.TSIDCacheSize += cs.EntriesCount
|
||||||
@ -1095,6 +1104,7 @@ func (s *Storage) add(rows []rawRow, mrs []MetricRow, precisionBits uint8) ([]ra
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
if pmrs != nil {
|
if pmrs != nil {
|
||||||
|
atomic.AddUint64(&s.slowRowInserts, uint64(len(pmrs.pmrs)))
|
||||||
// Sort pendingMetricRows by canonical metric name in order to speed up search via `is` in the loop below.
|
// Sort pendingMetricRows by canonical metric name in order to speed up search via `is` in the loop below.
|
||||||
pendingMetricRows := pmrs.pmrs
|
pendingMetricRows := pmrs.pmrs
|
||||||
sort.Slice(pendingMetricRows, func(i, j int) bool {
|
sort.Slice(pendingMetricRows, func(i, j int) bool {
|
||||||
@ -1294,6 +1304,7 @@ func (s *Storage) updatePerDateData(rows []rawRow) error {
|
|||||||
|
|
||||||
// Slow path - add new (date, metricID) entries to indexDB.
|
// Slow path - add new (date, metricID) entries to indexDB.
|
||||||
|
|
||||||
|
atomic.AddUint64(&s.slowPerDayIndexInserts, uint64(len(pendingDateMetricIDs)))
|
||||||
// Sort pendingDateMetricIDs by (date, metricID) in order to speed up `is` search in the loop below.
|
// Sort pendingDateMetricIDs by (date, metricID) in order to speed up `is` search in the loop below.
|
||||||
sort.Slice(pendingDateMetricIDs, func(i, j int) bool {
|
sort.Slice(pendingDateMetricIDs, func(i, j int) bool {
|
||||||
a := pendingDateMetricIDs[i]
|
a := pendingDateMetricIDs[i]
|
||||||
|
Loading…
Reference in New Issue
Block a user