mirror of
https://github.com/VictoriaMetrics/VictoriaMetrics.git
synced 2024-12-14 16:12:15 +01:00
app/vmselect/netstorage: consistently select the sample with the biggest value out of samples with identical timestamps
Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3333 This fix is based on https://github.com/VictoriaMetrics/VictoriaMetrics/pull/3620 , but doesn't slow down the common case with merging replicated data blocks so significantly. Benchmark results: Before the change: BenchmarkMergeSortBlocks/replicationFactor-1-4 13968 85643 ns/op 956.53 MB/s 1700 B/op 1 allocs/op BenchmarkMergeSortBlocks/replicationFactor-2-4 10806 109171 ns/op 1500.77 MB/s 2191 B/op 1 allocs/op BenchmarkMergeSortBlocks/replicationFactor-3-4 8887 130623 ns/op 1881.45 MB/s 2660 B/op 1 allocs/op BenchmarkMergeSortBlocks/replicationFactor-4-4 7440 157348 ns/op 2082.52 MB/s 3174 B/op 1 allocs/op BenchmarkMergeSortBlocks/replicationFactor-5-4 6534 184473 ns/op 2220.38 MB/s 3612 B/op 1 allocs/op BenchmarkMergeSortBlocks/overlapped-blocks-bestcase-4 13419 85205 ns/op 961.44 MB/s 2213 B/op 1 allocs/op BenchmarkMergeSortBlocks/overlapped-blocks-worstcase-4 579 1894900 ns/op 43.23 MB/s 46760 B/op 1 allocs/op After the change: BenchmarkMergeSortBlocks/replicationFactor-1-4 13832 85298 ns/op 960.40 MB/s 1716 B/op 1 allocs/op BenchmarkMergeSortBlocks/replicationFactor-2-4 8833 134222 ns/op 1220.66 MB/s 2675 B/op 1 allocs/op BenchmarkMergeSortBlocks/replicationFactor-3-4 6487 184830 ns/op 1329.65 MB/s 3636 B/op 1 allocs/op BenchmarkMergeSortBlocks/replicationFactor-4-4 4977 236318 ns/op 1386.61 MB/s 4733 B/op 1 allocs/op BenchmarkMergeSortBlocks/replicationFactor-5-4 4088 296734 ns/op 1380.36 MB/s 5761 B/op 1 allocs/op BenchmarkMergeSortBlocks/overlapped-blocks-bestcase-4 14083 84067 ns/op 974.47 MB/s 2110 B/op 1 allocs/op BenchmarkMergeSortBlocks/overlapped-blocks-worstcase-4 536 2043534 ns/op 40.09 MB/s 50511 B/op 1 allocs/op
This commit is contained in:
parent
7792ba3272
commit
96f04c9863
@ -525,18 +525,17 @@ func mergeSortBlocks(dst *Result, sbh sortBlocksHeap, dedupInterval int64) {
|
||||
}
|
||||
sbNext := sbh.getNextBlock()
|
||||
tsNext := sbNext.Timestamps[sbNext.NextIdx]
|
||||
topTimestamps := top.Timestamps
|
||||
topNextIdx := top.NextIdx
|
||||
if n := equalTimestampsPrefix(topTimestamps[topNextIdx:], sbNext.Timestamps[sbNext.NextIdx:]); n > 0 && dedupInterval > 0 {
|
||||
if n := equalSamplesPrefix(top, sbNext); n > 0 && dedupInterval > 0 {
|
||||
// Skip n replicated samples at top if deduplication is enabled.
|
||||
top.NextIdx = topNextIdx + n
|
||||
} else {
|
||||
// Copy samples from top to dst with timestamps not exceeding tsNext.
|
||||
top.NextIdx = topNextIdx + binarySearchTimestamps(topTimestamps[topNextIdx:], tsNext)
|
||||
dst.Timestamps = append(dst.Timestamps, topTimestamps[topNextIdx:top.NextIdx]...)
|
||||
top.NextIdx = topNextIdx + binarySearchTimestamps(top.Timestamps[topNextIdx:], tsNext)
|
||||
dst.Timestamps = append(dst.Timestamps, top.Timestamps[topNextIdx:top.NextIdx]...)
|
||||
dst.Values = append(dst.Values, top.Values[topNextIdx:top.NextIdx]...)
|
||||
}
|
||||
if top.NextIdx < len(topTimestamps) {
|
||||
if top.NextIdx < len(top.Timestamps) {
|
||||
heap.Fix(&sbh, 0)
|
||||
} else {
|
||||
heap.Pop(&sbh)
|
||||
@ -552,6 +551,14 @@ func mergeSortBlocks(dst *Result, sbh sortBlocksHeap, dedupInterval int64) {
|
||||
|
||||
var dedupsDuringSelect = metrics.NewCounter(`vm_deduplicated_samples_total{type="select"}`)
|
||||
|
||||
func equalSamplesPrefix(a, b *sortBlock) int {
|
||||
n := equalTimestampsPrefix(a.Timestamps[a.NextIdx:], b.Timestamps[b.NextIdx:])
|
||||
if n == 0 {
|
||||
return 0
|
||||
}
|
||||
return equalValuesPrefix(a.Values[a.NextIdx:a.NextIdx+n], b.Values[b.NextIdx:b.NextIdx+n])
|
||||
}
|
||||
|
||||
func equalTimestampsPrefix(a, b []int64) int {
|
||||
for i, v := range a {
|
||||
if i >= len(b) || v != b[i] {
|
||||
@ -561,6 +568,15 @@ func equalTimestampsPrefix(a, b []int64) int {
|
||||
return len(a)
|
||||
}
|
||||
|
||||
func equalValuesPrefix(a, b []float64) int {
|
||||
for i, v := range a {
|
||||
if i >= len(b) || v != b[i] {
|
||||
return i
|
||||
}
|
||||
}
|
||||
return len(a)
|
||||
}
|
||||
|
||||
func binarySearchTimestamps(timestamps []int64, ts int64) int {
|
||||
// The code has been adapted from sort.Search.
|
||||
n := len(timestamps)
|
||||
|
@ -129,21 +129,35 @@ func TestMergeSortBlocks(t *testing.T) {
|
||||
Values: []float64{9, 4.2, 2.1, 5.2, 42, 6.1},
|
||||
})
|
||||
|
||||
// Multiple blocks with identical timestamps.
|
||||
// Multiple blocks with identical timestamps and identical values.
|
||||
f([]*sortBlock{
|
||||
{
|
||||
Timestamps: []int64{1, 2, 4, 5},
|
||||
Values: []float64{9, 5.2, 6.1, 9},
|
||||
},
|
||||
{
|
||||
Timestamps: []int64{1, 2, 4},
|
||||
Values: []float64{9, 5.2, 6.1},
|
||||
},
|
||||
}, 1, &Result{
|
||||
Timestamps: []int64{1, 2, 4, 5},
|
||||
Values: []float64{9, 5.2, 6.1, 9},
|
||||
})
|
||||
|
||||
// Multiple blocks with identical timestamps.
|
||||
f([]*sortBlock{
|
||||
{
|
||||
Timestamps: []int64{1, 2, 4, 5},
|
||||
Values: []float64{9, 5.2, 6.1, 9},
|
||||
},
|
||||
{
|
||||
Timestamps: []int64{1, 2, 4},
|
||||
Values: []float64{4.2, 2.1, 42},
|
||||
},
|
||||
}, 1, &Result{
|
||||
Timestamps: []int64{1, 2, 4},
|
||||
Values: []float64{4.2, 2.1, 42},
|
||||
Timestamps: []int64{1, 2, 4, 5},
|
||||
Values: []float64{9, 5.2, 42, 9},
|
||||
})
|
||||
|
||||
// Multiple blocks with identical timestamps, disabled deduplication.
|
||||
f([]*sortBlock{
|
||||
{
|
||||
|
@ -45,6 +45,7 @@ The following tip changes can be tested by building VictoriaMetrics components f
|
||||
* BUGFIX: [vmagent](https://docs.victoriametrics.com/vmagent.html): [dockerswarm_sd_configs](https://docs.victoriametrics.com/sd_configs.html#dockerswarm_sd_configs): properly encode `filters` field. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3579).
|
||||
* BUGFIX: [vmagent](https://docs.victoriametrics.com/vmagent.html): fix possible resource leak after hot reload of the updated [consul_sd_configs](https://docs.victoriametrics.com/sd_configs.html#consul_sd_configs). See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3468).
|
||||
* BUGFIX: properly return label names starting from uppercase such as `CamelCaseLabel` from [/api/v1/labels](https://docs.victoriametrics.com/url-examples.html#apiv1labels). See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3566).
|
||||
* BUGFIX: consistently select the sample with the biggest value out of samples with identical timestamps during querying when the [deduplication](https://docs.victoriametrics.com/#deduplication) is enabled according to [this feature request](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3333). Previously random samples could be selected during querying.
|
||||
|
||||
|
||||
## [v1.85.3](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v1.85.3)
|
||||
|
Loading…
Reference in New Issue
Block a user