app/vmctl: limit JSON line size by 10K samples (#1394)

This should reduce the maximum memory usage at VictoriaMetrics when importing time series with big number of samples.
2025-01-20 07:19:17 +01:00 · 2021-06-18 15:26:47 +03:00 · 2021-06-18 15:26:47 +03:00 · 570f36b344
commit 570f36b344
parent eb1af09a04
2 changed files with 27 additions and 16 deletions
--- a/app/vmctl/vm/timeseries.go
+++ b/app/vmctl/vm/timeseries.go
@ -56,27 +56,37 @@ func (cw *cWriter) printf(format string, args ...interface{}) {

 //"{"metric":{"__name__":"cpu_usage_guest","arch":"x64","hostname":"host_19",},"timestamps":[1567296000000,1567296010000],"values":[1567296000000,66]}
 func (ts *TimeSeries) write(w io.Writer) (int, error) {
-	pointsCount := len(ts.Timestamps)
-	if pointsCount == 0 {
-		return 0, nil
-	}
-
+	timestamps := ts.Timestamps
+	values := ts.Values
 	cw := &cWriter{w: w}
-	cw.printf(`{"metric":{"__name__":%q`, ts.Name)
-	if len(ts.LabelPairs) > 0 {
+	for len(timestamps) > 0 {
+		// Split long lines with more than 10K samples into multiple JSON lines.
+		// This should limit memory usage at VictoriaMetrics during data ingestion,
+		// since it allocates memory for the whole JSON line and processes it in one go.
+		batchSize := 10000
+		if batchSize > len(timestamps) {
+			batchSize = len(timestamps)
+		}
+		timestampsBatch := timestamps[:batchSize]
+		valuesBatch := values[:batchSize]
+		timestamps = timestamps[batchSize:]
+		values = values[batchSize:]
+
+		cw.printf(`{"metric":{"__name__":%q`, ts.Name)
 		for _, lp := range ts.LabelPairs {
 			cw.printf(",%q:%q", lp.Name, lp.Value)
 		}
-	}

-	cw.printf(`},"timestamps":[`)
-	for i := 0; i < pointsCount-1; i++ {
-		cw.printf(`%d,`, ts.Timestamps[i])
+		pointsCount := len(timestampsBatch)
+		cw.printf(`},"timestamps":[`)
+		for i := 0; i < pointsCount-1; i++ {
+			cw.printf(`%d,`, timestampsBatch[i])
+		}
+		cw.printf(`%d],"values":[`, timestampsBatch[pointsCount-1])
+		for i := 0; i < pointsCount-1; i++ {
+			cw.printf(`%v,`, valuesBatch[i])
+		}
+		cw.printf("%v]}\n", valuesBatch[pointsCount-1])
 	}
-	cw.printf(`%d],"values":[`, ts.Timestamps[pointsCount-1])
-	for i := 0; i < pointsCount-1; i++ {
-		cw.printf(`%v,`, ts.Values[i])
-	}
-	cw.printf("%v]}\n", ts.Values[pointsCount-1])
 	return cw.n, cw.err
 }
--- a/docs/CHANGELOG.md
+++ b/docs/CHANGELOG.md
@ -10,6 +10,7 @@ sort: 15
 * FEATURE: vmagent: change the default value for `-remoteWrite.queues` from 4 to `2 * numCPUs`. This should reduce scrape duration for highly loaded vmagent, which scrapes tens of thousands of targets. See [this pull request](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/1385).
 * FEATURE: vmagent: show the number of samples the target returned during the last scrape on `/targets` and `/api/v1/targets` pages. This should simplify debugging targets, which may return too big or too low number of samples. See [this feature request](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1377).
 * FEATURE: vmagent: show jobs with zero discovered targets on `/targets` page. This should help debugging improperly configured scrape configs.
+* FEATURE: [vmctl](https://docs.victoriametrics.com/vmctl.html): limit the number of samples per each imported JSON line. This should limit the memory usage at VictoriaMetrics when importing time series with big number of samples to it.

 * BUGFIX: prevent from adding new samples to deleted time series after the rotation of the inverted index (the rotation is performed once per `-retentionPeriod`). See [this comment](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1347#issuecomment-861232136) for details.
 * BUGFIX: vmstorage: reduce disk write IO usage on systems with big number of CPU cores. The issue has been introduced in the release [v1.59.0](#v1590). See [this commit](aa9b56a046b6ae8083fa659df35dd5e994bf9115) and [this comment](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1338#issuecomment-863046999) for details.