From 49fa92c1d0e6cfcbf39429a4bff49efa9a6b6961 Mon Sep 17 00:00:00 2001 From: Hui Wang Date: Mon, 22 Jan 2024 05:13:15 +0800 Subject: [PATCH] lib/promscrape/discovery/kubernetes: fix watcher start order for roles endpoints and endpointslice (#5557) * lib/promscrape/discovery/kubernetes: fix watcher start order for roles endpoints and endpointslice Previously the groupWatcher could be mistakenly stopped when requests for pod or services resources take too long. * remove mislead comment * docs/sd_configs.md: mention -promscrape.kubernetes.attachNodeMetadataAll flag in the description for attach_metadata section Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/4640 * wip * lib/promscrape/kubernetes: prevent from stopping groupWatcher when there are in-flight apiWatcher.mustStart() calls groupWatcher is stopped if it has zero registered apiWatchers during 14 seconds. But such a groupWatcher can be still in use if apiWatcher for `role: endpoints` or `role: endpointslice` is being registered and the discovery of the associated `pod` and/or `service` objects takes longer than 14 seconds - see the beginning of groupWatcher.startWatchersForRole() function for details. Track the number of in-flight calls to apiWatcher.mustStart() and prevent from stopping the associated groupWatcher if the number of in-flight calls is non-zero. P.S. postponing the discovery of `pod` and/or `service` objects associated with `endpoints` or `endpointslice` roles isn't the best solution, since it slows down initial discovery of `endpoints` and `endpointslice` targets. * typo fix --------- Co-authored-by: Aliaksandr Valialkin --- docs/CHANGELOG.md | 1 + .../discovery/kubernetes/api_watcher.go | 30 ++++++++++++------- 2 files changed, 20 insertions(+), 11 deletions(-) diff --git a/docs/CHANGELOG.md b/docs/CHANGELOG.md index 76f1082de3..3ef90abe5f 100644 --- a/docs/CHANGELOG.md +++ b/docs/CHANGELOG.md @@ -67,6 +67,7 @@ The sandbox cluster installation is running under the constant load generated by * BUGFIX: [vmctl](https://docs.victoriametrics.com/vmctl.html): retry on import errors in `vm-native` mode. Before, retries happened only on writes into a network connection between source and destination. But errors returned by server after all the data was transmitted were logged, but not retried. * BUGFIX: [vmagent](https://docs.victoriametrics.com/vmagent.html): properly assume role with [AWS IRSA authorization](https://docs.aws.amazon.com/eks/latest/userguide/iam-roles-for-service-accounts.html). Previously role chaining was not supported. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3822) for details. * BUGFIX: [vmagent](https://docs.victoriametrics.com/vmagent.html): exit if there is config syntax error in [`scrape_config_files`](https://docs.victoriametrics.com/vmagent.html#loading-scrape-configs-from-multiple-files) when `-promscrape.config.strictParse=true`. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5508). +* BUGFIX: [vmagent](https://docs.victoriametrics.com/vmagent.html): properly discover targets for `role: endpoints` and `role: endpointslice` in [kubernetes_sd_configs](https://docs.victoriametrics.com/sd_configs.html#kubernetes_sd_configs). Previously some `endpoints` and `endpointslice` targets could be left undiscovered or some targets could have missing `__meta_*` labels when performing service discovery in busy Kubernetes clusters with large number of pods. See [this pull request](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/5557). * BUGFIX: [vmagent](https://docs.victoriametrics.com/vmagent.html): do not store scrape response for target in memory when staleness markers are disabled. See [this pull request](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/5577) for details. * BUGFIX: [vmui](https://docs.victoriametrics.com/#vmui): fix a link for the statistic inaccuracy explanation in the cardinality explorer tool. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5460). * BUGFIX: [vmui](https://docs.victoriametrics.com/#vmui): send `step` param for instant queries. The change reverts [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3896) due to reasons explained in [this comment](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3896#issuecomment-1896704401). diff --git a/lib/promscrape/discovery/kubernetes/api_watcher.go b/lib/promscrape/discovery/kubernetes/api_watcher.go index 4b9a12cad6..a27ad3c5fc 100644 --- a/lib/promscrape/discovery/kubernetes/api_watcher.go +++ b/lib/promscrape/discovery/kubernetes/api_watcher.go @@ -103,7 +103,9 @@ func newAPIWatcher(apiServer string, ac *promauth.Config, sdc *SDConfig, swcFunc } func (aw *apiWatcher) mustStart() { + atomic.AddInt32(&aw.gw.apiWatcherInflightStartCalls, 1) aw.gw.startWatchersForRole(aw.role, aw) + atomic.AddInt32(&aw.gw.apiWatcherInflightStartCalls, -1) } func (aw *apiWatcher) updateSwosCount(multiplier int, swosByKey map[string][]interface{}) { @@ -209,6 +211,10 @@ func (aw *apiWatcher) getScrapeWorkObjects() []interface{} { // groupWatcher watches for Kubernetes objects on the given apiServer with the given namespaces, // selectors and attachNodeMetadata using the given client. type groupWatcher struct { + // The number of in-flight apiWatcher.mustStart() calls for the given groupWatcher. + // This field is used by groupWatchersCleaner() in order to determine when the given groupWatcher can be stopped. + apiWatcherInflightStartCalls int32 + // Old Kubernetes doesn't support /apis/networking.k8s.io/v1/, so /apis/networking.k8s.io/v1beta1/ must be used instead. // This flag is used for automatic substitution of v1 API path with v1beta1 API path during requests to apiServer. useNetworkingV1Beta1 uint32 @@ -309,11 +315,7 @@ func selectorsKey(selectors []Selector) string { var ( groupWatchersLock sync.Mutex - groupWatchers = func() map[string]*groupWatcher { - gws := make(map[string]*groupWatcher) - go groupWatchersCleaner(gws) - return gws - }() + groupWatchers map[string]*groupWatcher _ = metrics.NewGauge(`vm_promscrape_discovery_kubernetes_group_watchers`, func() float64 { groupWatchersLock.Lock() @@ -323,11 +325,16 @@ var ( }) ) -func groupWatchersCleaner(gws map[string]*groupWatcher) { +func init() { + groupWatchers = make(map[string]*groupWatcher) + go groupWatchersCleaner() +} + +func groupWatchersCleaner() { for { time.Sleep(7 * time.Second) groupWatchersLock.Lock() - for key, gw := range gws { + for key, gw := range groupWatchers { gw.mu.Lock() // Calculate the number of apiWatcher instances subscribed to gw. awsTotal := 0 @@ -335,14 +342,14 @@ func groupWatchersCleaner(gws map[string]*groupWatcher) { awsTotal += len(uw.aws) + len(uw.awsPending) } - if awsTotal == 0 { - // There are no API watchers subscribed to gw. - // Stop all the urlWatcher instances at gw and drop gw from gws in this case, + if awsTotal == 0 && atomic.LoadInt32(&gw.apiWatcherInflightStartCalls) == 0 { + // There are no API watchers subscribed to gw and there are no in-flight apiWatcher.mustStart() calls. + // Stop all the urlWatcher instances at gw and drop gw from groupWatchers in this case, // but do it only on the second iteration in order to reduce urlWatcher churn // during scrape config reloads. if gw.noAPIWatchers { gw.cancel() - delete(gws, key) + delete(groupWatchers, key) } else { gw.noAPIWatchers = true } @@ -432,6 +439,7 @@ func (gw *groupWatcher) startWatchersForRole(role string, aw *apiWatcher) { if gw.attachNodeMetadata && (role == "pod" || role == "endpoints" || role == "endpointslice") { gw.startWatchersForRole("node", nil) } + paths := getAPIPathsWithNamespaces(role, gw.namespaces, gw.selectors) for _, path := range paths { apiURL := gw.apiServer + path