app/vmstorage: add -logNewSeries command-line flag for determining the source of series churn rate

2024-11-23 12:31:07 +01:00 · 2021-03-15 22:38:50 +02:00 · 2021-03-15 22:38:50 +02:00 · dd7e82c34f
commit dd7e82c34f
parent 37323c57c9
5 changed files with 20 additions and 0 deletions
--- a/README.md
+++ b/README.md
@ -1375,6 +1375,8 @@ See the example of alerting rules for VM components [here](https://github.com/Vi
  VictoriaMetrics accepts optional `date=YYYY-MM-DD` and `topN=42` args on this page. By default `date` equals to the current date,
  while `topN` equals to 10.

+* New time series can be logged if `-logNewSeries` command-line flag is passed to VictoriaMetrics.
+
 * VictoriaMetrics limits the number of labels per each metric with `-maxLabelsPerTimeseries` command-line flag.
  This prevents from ingesting metrics with too many labels. It is recommended [monitoring](#monitoring) `vm_metrics_with_dropped_labels_total`
  metric in order to determine whether `-maxLabelsPerTimeseries` must be adjusted for your workload.
--- a/app/vmstorage/main.go
+++ b/app/vmstorage/main.go
@ -36,6 +36,8 @@ var (
 	bigMergeConcurrency   = flag.Int("bigMergeConcurrency", 0, "The maximum number of CPU cores to use for big merges. Default value is used if set to 0")
 	smallMergeConcurrency = flag.Int("smallMergeConcurrency", 0, "The maximum number of CPU cores to use for small merges. Default value is used if set to 0")

+	logNewSeries = flag.Bool("logNewSeries", false, "Whether to log new series. This option is for debug purposes only. It can lead to performance issues "+
+		"when big number of new series are ingested into VictoriaMetrics")
 	denyQueriesOutsideRetention = flag.Bool("denyQueriesOutsideRetention", false, "Whether to deny queries outside of the configured -retentionPeriod. "+
 		"When set, then /api/v1/query_range would return '503 Service Unavailable' error for queries with 'from' value outside -retentionPeriod. "+
 		"This may be useful when multiple data sources with distinct retentions are hidden behind query-tee")
@ -71,6 +73,7 @@ func InitWithoutMetrics(resetCacheIfNeeded func(mrs []storage.MetricRow)) {
 	}

 	resetResponseCacheIfNeeded = resetCacheIfNeeded
+	storage.SetLogNewSeries(*logNewSeries)
 	storage.SetFinalMergeDelay(*finalMergeDelay)
 	storage.SetBigMergeWorkersCount(*bigMergeConcurrency)
 	storage.SetSmallMergeWorkersCount(*smallMergeConcurrency)
--- a/docs/CHANGELOG.md
+++ b/docs/CHANGELOG.md
@ -14,6 +14,7 @@
 * FEATURE: vmauth: allow using regexp paths in `url_map`. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1112) for details.
 * FEATURE: accept `round_digits` query arg at `/api/v1/query` and `/api/v1/query_range` handlers. This option can be set at Prometheus datasource in Grafana for limiting the number of digits after the decimal point in response values.
 * FEATURE: add `-influx.databaseNames` command-line flag, which can be used for accepting data from some Telegraf plugins such as [fluentd plugin](https://github.com/fangli/fluent-plugin-influxdb). See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1124).
+* FEATURE: add `-logNewSeries` command-line flag, which can be used for debugging the source of time series churn rate.

 * BUGFIX: vmagent: prevent from high CPU usage bug during failing scrapes with small `scrape_timeout` (less than a few seconds).
 * BUGFIX: vmagent: reduce memory usage when Kubernetes service discovery is used in big number of distinct scrape config jobs by sharing Kubernetes object cache. See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1113
--- a/docs/Single-server-VictoriaMetrics.md
+++ b/docs/Single-server-VictoriaMetrics.md
@ -1375,6 +1375,8 @@ See the example of alerting rules for VM components [here](https://github.com/Vi
  VictoriaMetrics accepts optional `date=YYYY-MM-DD` and `topN=42` args on this page. By default `date` equals to the current date,
  while `topN` equals to 10.

+* New time series can be logged if `-logNewSeries` command-line flag is passed to VictoriaMetrics.
+
 * VictoriaMetrics limits the number of labels per each metric with `-maxLabelsPerTimeseries` command-line flag.
  This prevents from ingesting metrics with too many labels. It is recommended [monitoring](#monitoring) `vm_metrics_with_dropped_labels_total`
  metric in order to determine whether `-maxLabelsPerTimeseries` must be adjusted for your workload.
--- a/lib/storage/index_db.go
+++ b/lib/storage/index_db.go
@ -577,9 +577,21 @@ func (db *indexDB) createTSIDByName(dst *TSID, metricName []byte) error {
 	// on db.tb flush via invalidateTagCache flushCallback passed to OpenTable.

 	atomic.AddUint64(&db.newTimeseriesCreated, 1)
+	if logNewSeries {
+		logger.Infof("new series created: %s", mn.String())
+	}
 	return nil
 }

+// SetLogNewSeries updates new series logging.
+//
+// This function must be called before any calling any storage functions.
+func SetLogNewSeries(ok bool) {
+	logNewSeries = ok
+}
+
+var logNewSeries = false
+
 func (db *indexDB) generateTSID(dst *TSID, metricName []byte, mn *MetricName) error {
 	// Search the TSID in the external storage.
 	// This is usually the db from the previous period.