From 0cf55ded34b01e617b155d2cc8e972f10f60355d Mon Sep 17 00:00:00 2001 From: Roman Khavronenko Date: Fri, 24 Nov 2023 11:53:04 +0100 Subject: [PATCH] lib/protoparser: decrease `import.maxLineLen` from 100MB to 10MB (#5364) Tests showed that importing a single line with 70MB size takes 5.3GiB RSS memory for VictoriaMetrics single-node. In the scenario when user exports and imports data from one VM to another, it could possibly lead to OOM exception for destination VM. Importing a single line with 16MB size taks 1.3GiB RSS memory. Hence, the limit for `import.maxLineLen` was decreased from 100MB to 10MB to improve reliability of VictoriaMetrics during imports. Signed-off-by: hagen1778 Co-authored-by: Aliaksandr Valialkin --- README.md | 4 ++-- app/vmagent/README.md | 2 +- docs/CHANGELOG.md | 1 + docs/Cluster-VictoriaMetrics.md | 2 +- docs/README.md | 4 ++-- docs/Single-server-VictoriaMetrics.md | 4 ++-- docs/vmagent.md | 2 +- lib/protoparser/common/lines_reader.go | 2 +- lib/protoparser/vmimport/stream/streamparser.go | 2 +- 9 files changed, 12 insertions(+), 11 deletions(-) diff --git a/README.md b/README.md index 6e91ecf83..d0ccbdb0f 100644 --- a/README.md +++ b/README.md @@ -1591,7 +1591,7 @@ The format follows [JSON streaming concept](http://ndjson.org/), e.g. each line ``` Note that every JSON object must be written in a single line, e.g. all the newline chars must be removed from it. -Every line length is limited by the value passed to `-import.maxLineLen` command-line flag (by default this is 100MB). +Every line length is limited by the value passed to `-import.maxLineLen` command-line flag (by default this is 10MB). It is recommended passing 1K-10K samples per line for achieving the maximum data ingestion performance at [/api/v1/import](#how-to-import-data-in-json-line-format). Too long JSON lines may increase RAM usage at VictoriaMetrics side. @@ -2625,7 +2625,7 @@ Pass `-help` to VictoriaMetrics in order to see the list of supported command-li Whether to use proxy protocol for connections accepted at -httpListenAddr . See https://www.haproxy.org/download/1.8/doc/proxy-protocol.txt . With enabled proxy protocol http server cannot serve regular /metrics endpoint. Use -pushmetrics.url for metrics pushing -import.maxLineLen size The maximum length in bytes of a single line accepted by /api/v1/import; the line length can be limited with 'max_rows_per_line' query arg passed to /api/v1/export - Supports the following optional suffixes for size values: KB, MB, GB, TB, KiB, MiB, GiB, TiB (default 104857600) + Supports the following optional suffixes for size values: KB, MB, GB, TB, KiB, MiB, GiB, TiB (default 10485760) -influx.databaseNames array Comma-separated list of database names to return from /query and /influx/query API. This can be needed for accepting data from Telegraf plugins such as https://github.com/fangli/fluent-plugin-influxdb Supports an array of values separated by comma or specified via multiple flags. diff --git a/app/vmagent/README.md b/app/vmagent/README.md index d6a0a0878..62a621975 100644 --- a/app/vmagent/README.md +++ b/app/vmagent/README.md @@ -1,3 +1,3 @@ See vmagent docs [here](https://docs.victoriametrics.com/vmagent.html). -vmagent docs can be edited at [docs/vmagent.md](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/docs/vmagent.md). +vmagent docs can be edited at [docs/vmagent.md](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/docs/vmagent.md). \ No newline at end of file diff --git a/docs/CHANGELOG.md b/docs/CHANGELOG.md index 644b53cb1..8e2fd6c3b 100644 --- a/docs/CHANGELOG.md +++ b/docs/CHANGELOG.md @@ -38,6 +38,7 @@ The sandbox cluster installation is running under the constant load generated by Released at 2023-11-16 * FEATURE: dashboards: use `version` instead of `short_version` in version change annotation for single/cluster dashboards. The update should reflect version changes even if different flavours of the same release were applied (custom builds). +* FEATURE: lower limit for `import.maxLineLen` cmd-line flag from 100MB to 10MB in order to prevent excessive memory usage during data import. Please note, the line length of exported data can be limited with `max_rows_per_line` query arg passed to `/api/v1/export`. The change affects vminsert/vmagent/VictoriaMetrics single-node. * BUGFIX: fix a bug, which could result in improper results and/or to `cannot merge series: duplicate series found` error during [range query](https://docs.victoriametrics.com/keyConcepts.html#range-query) execution. The issue has been introduced in [v1.95.0](https://docs.victoriametrics.com/CHANGELOG.html#v1950). See [this bugreport](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5332) for details. * BUGFIX: improve deadline detection when using buffered connection for communication between cluster components. Before, due to nature of a buffered connection the deadline could have been exceeded while reading or writing buffered data to connection. See [this pull request](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/5327). diff --git a/docs/Cluster-VictoriaMetrics.md b/docs/Cluster-VictoriaMetrics.md index 3b68ba0b7..cae5314a5 100644 --- a/docs/Cluster-VictoriaMetrics.md +++ b/docs/Cluster-VictoriaMetrics.md @@ -949,7 +949,7 @@ Below is the output for `/path/to/vminsert -help`: Whether to use proxy protocol for connections accepted at -httpListenAddr . See https://www.haproxy.org/download/1.8/doc/proxy-protocol.txt . With enabled proxy protocol http server cannot serve regular /metrics endpoint. Use -pushmetrics.url for metrics pushing -import.maxLineLen size The maximum length in bytes of a single line accepted by /api/v1/import; the line length can be limited with 'max_rows_per_line' query arg passed to /api/v1/export - Supports the following optional suffixes for size values: KB, MB, GB, TB, KiB, MiB, GiB, TiB (default 104857600) + Supports the following optional suffixes for size values: KB, MB, GB, TB, KiB, MiB, GiB, TiB (default 10485760) -influx.databaseNames array Comma-separated list of database names to return from /query and /influx/query API. This can be needed for accepting data from Telegraf plugins such as https://github.com/fangli/fluent-plugin-influxdb Supports an array of values separated by comma or specified via multiple flags. diff --git a/docs/README.md b/docs/README.md index 2e56fcb28..443ba3452 100644 --- a/docs/README.md +++ b/docs/README.md @@ -1594,7 +1594,7 @@ The format follows [JSON streaming concept](http://ndjson.org/), e.g. each line ``` Note that every JSON object must be written in a single line, e.g. all the newline chars must be removed from it. -Every line length is limited by the value passed to `-import.maxLineLen` command-line flag (by default this is 100MB). +Every line length is limited by the value passed to `-import.maxLineLen` command-line flag (by default this is 10MB). It is recommended passing 1K-10K samples per line for achieving the maximum data ingestion performance at [/api/v1/import](#how-to-import-data-in-json-line-format). Too long JSON lines may increase RAM usage at VictoriaMetrics side. @@ -2628,7 +2628,7 @@ Pass `-help` to VictoriaMetrics in order to see the list of supported command-li Whether to use proxy protocol for connections accepted at -httpListenAddr . See https://www.haproxy.org/download/1.8/doc/proxy-protocol.txt . With enabled proxy protocol http server cannot serve regular /metrics endpoint. Use -pushmetrics.url for metrics pushing -import.maxLineLen size The maximum length in bytes of a single line accepted by /api/v1/import; the line length can be limited with 'max_rows_per_line' query arg passed to /api/v1/export - Supports the following optional suffixes for size values: KB, MB, GB, TB, KiB, MiB, GiB, TiB (default 104857600) + Supports the following optional suffixes for size values: KB, MB, GB, TB, KiB, MiB, GiB, TiB (default 10485760) -influx.databaseNames array Comma-separated list of database names to return from /query and /influx/query API. This can be needed for accepting data from Telegraf plugins such as https://github.com/fangli/fluent-plugin-influxdb Supports an array of values separated by comma or specified via multiple flags. diff --git a/docs/Single-server-VictoriaMetrics.md b/docs/Single-server-VictoriaMetrics.md index 9c8f6a66a..1a234a94e 100644 --- a/docs/Single-server-VictoriaMetrics.md +++ b/docs/Single-server-VictoriaMetrics.md @@ -1602,7 +1602,7 @@ The format follows [JSON streaming concept](http://ndjson.org/), e.g. each line ``` Note that every JSON object must be written in a single line, e.g. all the newline chars must be removed from it. -Every line length is limited by the value passed to `-import.maxLineLen` command-line flag (by default this is 100MB). +Every line length is limited by the value passed to `-import.maxLineLen` command-line flag (by default this is 10MB). It is recommended passing 1K-10K samples per line for achieving the maximum data ingestion performance at [/api/v1/import](#how-to-import-data-in-json-line-format). Too long JSON lines may increase RAM usage at VictoriaMetrics side. @@ -2636,7 +2636,7 @@ Pass `-help` to VictoriaMetrics in order to see the list of supported command-li Whether to use proxy protocol for connections accepted at -httpListenAddr . See https://www.haproxy.org/download/1.8/doc/proxy-protocol.txt . With enabled proxy protocol http server cannot serve regular /metrics endpoint. Use -pushmetrics.url for metrics pushing -import.maxLineLen size The maximum length in bytes of a single line accepted by /api/v1/import; the line length can be limited with 'max_rows_per_line' query arg passed to /api/v1/export - Supports the following optional suffixes for size values: KB, MB, GB, TB, KiB, MiB, GiB, TiB (default 104857600) + Supports the following optional suffixes for size values: KB, MB, GB, TB, KiB, MiB, GiB, TiB (default 10485760) -influx.databaseNames array Comma-separated list of database names to return from /query and /influx/query API. This can be needed for accepting data from Telegraf plugins such as https://github.com/fangli/fluent-plugin-influxdb Supports an array of values separated by comma or specified via multiple flags. diff --git a/docs/vmagent.md b/docs/vmagent.md index 754e105db..e577abc1b 100644 --- a/docs/vmagent.md +++ b/docs/vmagent.md @@ -1494,7 +1494,7 @@ See the docs at https://docs.victoriametrics.com/vmagent.html . Whether to use proxy protocol for connections accepted at -httpListenAddr . See https://www.haproxy.org/download/1.8/doc/proxy-protocol.txt . With enabled proxy protocol http server cannot serve regular /metrics endpoint. Use -pushmetrics.url for metrics pushing -import.maxLineLen size The maximum length in bytes of a single line accepted by /api/v1/import; the line length can be limited with 'max_rows_per_line' query arg passed to /api/v1/export - Supports the following optional suffixes for size values: KB, MB, GB, TB, KiB, MiB, GiB, TiB (default 104857600) + Supports the following optional suffixes for size values: KB, MB, GB, TB, KiB, MiB, GiB, TiB (default 10485760) -influx.databaseNames array Comma-separated list of database names to return from /query and /influx/query API. This can be needed for accepting data from Telegraf plugins such as https://github.com/fangli/fluent-plugin-influxdb Supports an array of values separated by comma or specified via multiple flags. diff --git a/lib/protoparser/common/lines_reader.go b/lib/protoparser/common/lines_reader.go index a1fe0d3c6..04e8e33e1 100644 --- a/lib/protoparser/common/lines_reader.go +++ b/lib/protoparser/common/lines_reader.go @@ -72,7 +72,7 @@ again: // Search for the last newline in dstBuf and put the rest into tailBuf. nn := bytes.LastIndexByte(dstBuf[len(dstBuf)-n:], '\n') if nn < 0 { - // Didn't found at least a single line. + // Didn't find at least a single line. if len(dstBuf) > maxLineLen { return dstBuf, tailBuf, fmt.Errorf("too long line: more than %d bytes", maxLineLen) } diff --git a/lib/protoparser/vmimport/stream/streamparser.go b/lib/protoparser/vmimport/stream/streamparser.go index 25e259f22..23bcb5f8e 100644 --- a/lib/protoparser/vmimport/stream/streamparser.go +++ b/lib/protoparser/vmimport/stream/streamparser.go @@ -15,7 +15,7 @@ import ( "github.com/VictoriaMetrics/metrics" ) -var maxLineLen = flagutil.NewBytes("import.maxLineLen", 100*1024*1024, "The maximum length in bytes of a single line accepted by /api/v1/import; "+ +var maxLineLen = flagutil.NewBytes("import.maxLineLen", 10*1024*1024, "The maximum length in bytes of a single line accepted by /api/v1/import; "+ "the line length can be limited with 'max_rows_per_line' query arg passed to /api/v1/export") // Parse parses /api/v1/import lines from req and calls callback for the parsed rows.