app/vmctl: limit JSON line size by 10K samples (#1394)

This should reduce the maximum memory usage at VictoriaMetrics when importing time series with big number of samples.
This commit is contained in:
Aliaksandr Valialkin 2021-06-18 15:26:47 +03:00 committed by GitHub
parent eb1af09a04
commit 570f36b344
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 27 additions and 16 deletions

View File

@ -56,27 +56,37 @@ func (cw *cWriter) printf(format string, args ...interface{}) {
//"{"metric":{"__name__":"cpu_usage_guest","arch":"x64","hostname":"host_19",},"timestamps":[1567296000000,1567296010000],"values":[1567296000000,66]}
func (ts *TimeSeries) write(w io.Writer) (int, error) {
pointsCount := len(ts.Timestamps)
if pointsCount == 0 {
return 0, nil
}
timestamps := ts.Timestamps
values := ts.Values
cw := &cWriter{w: w}
cw.printf(`{"metric":{"__name__":%q`, ts.Name)
if len(ts.LabelPairs) > 0 {
for len(timestamps) > 0 {
// Split long lines with more than 10K samples into multiple JSON lines.
// This should limit memory usage at VictoriaMetrics during data ingestion,
// since it allocates memory for the whole JSON line and processes it in one go.
batchSize := 10000
if batchSize > len(timestamps) {
batchSize = len(timestamps)
}
timestampsBatch := timestamps[:batchSize]
valuesBatch := values[:batchSize]
timestamps = timestamps[batchSize:]
values = values[batchSize:]
cw.printf(`{"metric":{"__name__":%q`, ts.Name)
for _, lp := range ts.LabelPairs {
cw.printf(",%q:%q", lp.Name, lp.Value)
}
}
cw.printf(`},"timestamps":[`)
for i := 0; i < pointsCount-1; i++ {
cw.printf(`%d,`, ts.Timestamps[i])
pointsCount := len(timestampsBatch)
cw.printf(`},"timestamps":[`)
for i := 0; i < pointsCount-1; i++ {
cw.printf(`%d,`, timestampsBatch[i])
}
cw.printf(`%d],"values":[`, timestampsBatch[pointsCount-1])
for i := 0; i < pointsCount-1; i++ {
cw.printf(`%v,`, valuesBatch[i])
}
cw.printf("%v]}\n", valuesBatch[pointsCount-1])
}
cw.printf(`%d],"values":[`, ts.Timestamps[pointsCount-1])
for i := 0; i < pointsCount-1; i++ {
cw.printf(`%v,`, ts.Values[i])
}
cw.printf("%v]}\n", ts.Values[pointsCount-1])
return cw.n, cw.err
}

View File

@ -10,6 +10,7 @@ sort: 15
* FEATURE: vmagent: change the default value for `-remoteWrite.queues` from 4 to `2 * numCPUs`. This should reduce scrape duration for highly loaded vmagent, which scrapes tens of thousands of targets. See [this pull request](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/1385).
* FEATURE: vmagent: show the number of samples the target returned during the last scrape on `/targets` and `/api/v1/targets` pages. This should simplify debugging targets, which may return too big or too low number of samples. See [this feature request](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1377).
* FEATURE: vmagent: show jobs with zero discovered targets on `/targets` page. This should help debugging improperly configured scrape configs.
* FEATURE: [vmctl](https://docs.victoriametrics.com/vmctl.html): limit the number of samples per each imported JSON line. This should limit the memory usage at VictoriaMetrics when importing time series with big number of samples to it.
* BUGFIX: prevent from adding new samples to deleted time series after the rotation of the inverted index (the rotation is performed once per `-retentionPeriod`). See [this comment](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1347#issuecomment-861232136) for details.
* BUGFIX: vmstorage: reduce disk write IO usage on systems with big number of CPU cores. The issue has been introduced in the release [v1.59.0](#v1590). See [this commit](aa9b56a046b6ae8083fa659df35dd5e994bf9115) and [this comment](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1338#issuecomment-863046999) for details.