mirror of
https://github.com/VictoriaMetrics/VictoriaMetrics.git
synced 2024-12-15 08:23:34 +01:00
lib/promscrape: follow-up for 393876e52a
- Document the change in docs/CHANGELOG.md - Reduce memory usage when sending stale markers even more by parsing the response in stream parsing mode - Update the TestSendStaleSeries Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3668 Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3675
This commit is contained in:
parent
8e2a8a6ae2
commit
71a170d404
@ -15,8 +15,10 @@ The following tip changes can be tested by building VictoriaMetrics components f
|
|||||||
|
|
||||||
## tip
|
## tip
|
||||||
|
|
||||||
|
* FEATURE: [vmagent](https://docs.victoriametrics.com/vmagent.html): reduce memory usage when sending stale markers for targets, which expose big number of metrics. See [this](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3668) and [this](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3675) issues.
|
||||||
|
|
||||||
* BUGFIX: [VictoriaMetrics cluster](https://docs.victoriametrics.com/Cluster-VictoriaMetrics.html): propagate all the timeout-related errors from `vmstorage` to `vmselect` when `vmstorage`. Previously some timeout errors weren't returned from `vmselect` to `vmstorage`. Instead, `vmstorage` could log the error and close the connection to `vmselect`, so `vmselect` was logging cryptic errors such as `cannot execute funcName="..." on vmstorage "...": EOF`.
|
* BUGFIX: [VictoriaMetrics cluster](https://docs.victoriametrics.com/Cluster-VictoriaMetrics.html): propagate all the timeout-related errors from `vmstorage` to `vmselect` when `vmstorage`. Previously some timeout errors weren't returned from `vmselect` to `vmstorage`. Instead, `vmstorage` could log the error and close the connection to `vmselect`, so `vmselect` was logging cryptic errors such as `cannot execute funcName="..." on vmstorage "...": EOF`.
|
||||||
* BUGFIX: [vmui](https://docs.victoriametrics.com/#vmui): add support for time zone selection for older versions of browsers. See [this pull request](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/3680)
|
* BUGFIX: [vmui](https://docs.victoriametrics.com/#vmui): add support for time zone selection for older versions of browsers. See [this pull request](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/3680).
|
||||||
|
|
||||||
## [v1.86.2](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v1.86.2)
|
## [v1.86.2](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v1.86.2)
|
||||||
|
|
||||||
|
@ -1,6 +1,7 @@
|
|||||||
package promscrape
|
package promscrape
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"bytes"
|
||||||
"flag"
|
"flag"
|
||||||
"fmt"
|
"fmt"
|
||||||
"io"
|
"io"
|
||||||
@ -767,11 +768,6 @@ func (sw *scrapeWork) applySeriesLimit(wc *writeRequestCtx) int {
|
|||||||
|
|
||||||
var sendStaleSeriesConcurrencyLimitCh = make(chan struct{}, cgroup.AvailableCPUs())
|
var sendStaleSeriesConcurrencyLimitCh = make(chan struct{}, cgroup.AvailableCPUs())
|
||||||
|
|
||||||
// maxStaleSeriesAtOnce defines the max number of stale series
|
|
||||||
// to process and send at once. It prevents from excessive memory usage
|
|
||||||
// when big number of metrics become stale at the same time.
|
|
||||||
const maxStaleSeriesAtOnce = 1e3
|
|
||||||
|
|
||||||
func (sw *scrapeWork) sendStaleSeries(lastScrape, currScrape string, timestamp int64, addAutoSeries bool) {
|
func (sw *scrapeWork) sendStaleSeries(lastScrape, currScrape string, timestamp int64, addAutoSeries bool) {
|
||||||
// This function is CPU-bound, while it may allocate big amounts of memory.
|
// This function is CPU-bound, while it may allocate big amounts of memory.
|
||||||
// That's why it is a good idea to limit the number of concurrent calls to this function
|
// That's why it is a good idea to limit the number of concurrent calls to this function
|
||||||
@ -794,37 +790,44 @@ func (sw *scrapeWork) sendStaleSeries(lastScrape, currScrape string, timestamp i
|
|||||||
writeRequestCtxPool.Put(wc)
|
writeRequestCtxPool.Put(wc)
|
||||||
}()
|
}()
|
||||||
if bodyString != "" {
|
if bodyString != "" {
|
||||||
wc.rows.UnmarshalWithErrLogger(bodyString, sw.logError)
|
// Send stale markers in streaming mode in order to reduce memory usage
|
||||||
|
// when stale markers for targets exposing big number of metrics must be generated.
|
||||||
|
// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3668
|
||||||
|
// and https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3675
|
||||||
|
var mu sync.Mutex
|
||||||
|
br := bytes.NewBufferString(bodyString)
|
||||||
|
err := parser.ParseStream(br, timestamp, false, func(rows []parser.Row) error {
|
||||||
|
mu.Lock()
|
||||||
|
defer mu.Unlock()
|
||||||
|
for i := range rows {
|
||||||
|
sw.addRowToTimeseries(wc, &rows[i], timestamp, true)
|
||||||
}
|
}
|
||||||
|
|
||||||
srcRows := wc.rows.Rows
|
|
||||||
for from := 0; from < len(srcRows); from += maxStaleSeriesAtOnce {
|
|
||||||
to := from + maxStaleSeriesAtOnce
|
|
||||||
if to > len(srcRows) {
|
|
||||||
to = len(srcRows)
|
|
||||||
}
|
|
||||||
|
|
||||||
for i := range srcRows[from:to] {
|
|
||||||
sw.addRowToTimeseries(wc, &srcRows[i], timestamp, true)
|
|
||||||
}
|
|
||||||
|
|
||||||
// add auto series at the last iteration
|
|
||||||
if addAutoSeries && to == len(srcRows) {
|
|
||||||
am := &autoMetrics{}
|
|
||||||
sw.addAutoMetrics(am, wc, timestamp)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Apply series limit to stale markers in order to prevent sending stale markers for newly created series.
|
// Apply series limit to stale markers in order to prevent sending stale markers for newly created series.
|
||||||
// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3660
|
// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3660
|
||||||
if sw.seriesLimitExceeded {
|
if sw.seriesLimitExceeded {
|
||||||
sw.applySeriesLimit(wc)
|
sw.applySeriesLimit(wc)
|
||||||
}
|
}
|
||||||
|
// Push the collected rows to sw before returning from the callback, since they cannot be held
|
||||||
series := wc.writeRequest.Timeseries
|
// after returning from the callback - this will result in data race.
|
||||||
if len(series) == 0 {
|
// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/825#issuecomment-723198247
|
||||||
continue
|
setStaleMarkersForRows(wc.writeRequest.Timeseries)
|
||||||
|
sw.pushData(sw.Config.AuthToken, &wc.writeRequest)
|
||||||
|
wc.resetNoRows()
|
||||||
|
return nil
|
||||||
|
}, sw.logError)
|
||||||
|
if err != nil {
|
||||||
|
sw.logError(fmt.Errorf("cannot send stale markers: %s", err).Error())
|
||||||
}
|
}
|
||||||
// Substitute all the values with Prometheus stale markers.
|
}
|
||||||
|
if addAutoSeries {
|
||||||
|
am := &autoMetrics{}
|
||||||
|
sw.addAutoMetrics(am, wc, timestamp)
|
||||||
|
}
|
||||||
|
setStaleMarkersForRows(wc.writeRequest.Timeseries)
|
||||||
|
sw.pushData(sw.Config.AuthToken, &wc.writeRequest)
|
||||||
|
}
|
||||||
|
|
||||||
|
func setStaleMarkersForRows(series []prompbmarshal.TimeSeries) {
|
||||||
for _, tss := range series {
|
for _, tss := range series {
|
||||||
samples := tss.Samples
|
samples := tss.Samples
|
||||||
for i := range samples {
|
for i := range samples {
|
||||||
@ -832,9 +835,6 @@ func (sw *scrapeWork) sendStaleSeries(lastScrape, currScrape string, timestamp i
|
|||||||
}
|
}
|
||||||
staleSamplesCreated.Add(len(samples))
|
staleSamplesCreated.Add(len(samples))
|
||||||
}
|
}
|
||||||
sw.pushData(sw.Config.AuthToken, &wc.writeRequest)
|
|
||||||
wc.reset()
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
var staleSamplesCreated = metrics.NewCounter(`vm_promscrape_stale_samples_created_total`)
|
var staleSamplesCreated = metrics.NewCounter(`vm_promscrape_stale_samples_created_total`)
|
||||||
|
@ -10,6 +10,7 @@ import (
|
|||||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promrelabel"
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promrelabel"
|
||||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promutils"
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promutils"
|
||||||
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/common"
|
||||||
parser "github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/prometheus"
|
parser "github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/prometheus"
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -687,19 +688,24 @@ func TestAddRowToTimeseriesNoRelabeling(t *testing.T) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func TestSendStaleSeries(t *testing.T) {
|
func TestSendStaleSeries(t *testing.T) {
|
||||||
|
f := func(lastScrape, currScrape string, staleMarksExpected int) {
|
||||||
|
t.Helper()
|
||||||
var sw scrapeWork
|
var sw scrapeWork
|
||||||
sw.Config = &ScrapeWork{
|
sw.Config = &ScrapeWork{
|
||||||
NoStaleMarkers: false,
|
NoStaleMarkers: false,
|
||||||
}
|
}
|
||||||
|
common.StartUnmarshalWorkers()
|
||||||
|
defer common.StopUnmarshalWorkers()
|
||||||
|
|
||||||
var timeseriesExpectedN int
|
var staleMarks int
|
||||||
sw.PushData = func(at *auth.Token, wr *prompbmarshal.WriteRequest) {
|
sw.PushData = func(at *auth.Token, wr *prompbmarshal.WriteRequest) {
|
||||||
t.Helper()
|
staleMarks += len(wr.Timeseries)
|
||||||
if len(wr.Timeseries) != timeseriesExpectedN {
|
}
|
||||||
t.Fatalf("expected to get %d stale series; got %d", timeseriesExpectedN, len(wr.Timeseries))
|
sw.sendStaleSeries(lastScrape, currScrape, 0, false)
|
||||||
|
if staleMarks != staleMarksExpected {
|
||||||
|
t.Fatalf("unexpected number of stale marks; got %d; want %d", staleMarks, staleMarksExpected)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
generateScrape := func(n int) string {
|
generateScrape := func(n int) string {
|
||||||
w := strings.Builder{}
|
w := strings.Builder{}
|
||||||
for i := 0; i < n; i++ {
|
for i := 0; i < n; i++ {
|
||||||
@ -708,20 +714,13 @@ func TestSendStaleSeries(t *testing.T) {
|
|||||||
return w.String()
|
return w.String()
|
||||||
}
|
}
|
||||||
|
|
||||||
timeseriesExpectedN = 0
|
f("", "", 0)
|
||||||
sw.sendStaleSeries("", "", 0, false)
|
f(generateScrape(10), generateScrape(10), 0)
|
||||||
|
f(generateScrape(10), "", 10)
|
||||||
timeseriesExpectedN = 0
|
f("", generateScrape(10), 0)
|
||||||
sw.sendStaleSeries(generateScrape(10), generateScrape(10), 0, false)
|
f(generateScrape(10), generateScrape(3), 7)
|
||||||
|
f(generateScrape(3), generateScrape(10), 0)
|
||||||
timeseriesExpectedN = 10
|
f(generateScrape(20000), generateScrape(10), 19990)
|
||||||
sw.sendStaleSeries(generateScrape(10), "", 0, false)
|
|
||||||
|
|
||||||
timeseriesExpectedN = 5
|
|
||||||
sw.sendStaleSeries(generateScrape(10), generateScrape(5), 0, false)
|
|
||||||
|
|
||||||
timeseriesExpectedN = maxStaleSeriesAtOnce
|
|
||||||
sw.sendStaleSeries(generateScrape(maxStaleSeriesAtOnce*2), "", 0, false)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func parsePromRow(data string) *parser.Row {
|
func parsePromRow(data string) *parser.Row {
|
||||||
|
Loading…
Reference in New Issue
Block a user