lib/promscrape: do not re-use previously loaded scrape targets on failed attempt to load updated scrape targets at file_sd_configs

The logic employed for re-using the previously loaded scrape target was broken initially. The commit cc0427897c tried to fix it, but the new logic became too complex and fragile. So it is better to just remove this logic, since the targets from temporarily broken file should be eventually loaded on next attempts every -promscrape.fileSDCheckInterval This also allows removing fragile hacks around __vm_filepath label. Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3989
2024-12-15 16:30:55 +01:00 · 2023-04-02 21:05:01 -07:00 · 2023-04-02 21:05:01 -07:00 · 02ceebccc0
commit 02ceebccc0
parent 6f0512a81c
5 changed files with 34 additions and 62 deletions
--- a/docs/sd_configs.md
+++ b/docs/sd_configs.md
@ -94,6 +94,7 @@ The following meta labels are available on discovered targets during [relabeling
 * `__meta_azure_subscription_id`: the subscription ID
 * `__meta_azure_tenant_id`: the tenant ID
 The list of discovered Azure targets is refreshed at the interval, which can be configured via `-promscrape.azureSDCheckInterval` command-line flag.
 ## consul_sd_configs
@ -183,6 +184,7 @@ The following meta labels are available on discovered targets during [relabeling
 * `__meta_consul_tagpresent_<tagname>`: "true" for every <tagname> tag of the target
 * `__meta_consul_tags`: the list of tags of the target joined by the `tag_separator`
 The list of discovered Consul targets is refreshed at the interval, which can be configured via `-promscrape.consulSDCheckInterval` command-line flag.
 ## digitalocean_sd_configs
@ -224,6 +226,7 @@ The following meta labels are available on discovered targets during [relabeling
 * `__meta_digitalocean_tags`: the comma-separated list of tags of the droplet
 * `__meta_digitalocean_vpc`: the id of the droplet's VPC
 The list of discovered DigitalOcean targets is refreshed at the interval, which can be configured via `-promscrape.digitaloceanSDCheckInterval` command-line flag.
 ## dns_sd_configs
@ -260,6 +263,7 @@ The following meta labels are available on discovered targets during [relabeling
 * `__meta_dns_srv_record_port`: the port field of the SRV record
 * `__meta_dns_mx_record_target`: the target field of the MX record.
 The list of discovered DNS targets is refreshed at the interval, which can be configured via `-promscrape.dnsSDCheckInterval` command-line flag.
 ## docker_sd_configs
@ -315,6 +319,7 @@ The following meta labels are available on discovered targets during [relabeling
 * `__meta_docker_port_public`: the external port if a port-mapping exists
 * `__meta_docker_port_public_ip`: the public IP if a port-mapping exists
 The list of discovered Docker targets is refreshed at the interval, which can be configured via `-promscrape.dockerSDCheckInterval` command-line flag.
 ## dockerswarm_sd_configs
@ -444,6 +449,7 @@ One of the following roles can be configured to discover targets:
  * `__meta_dockerswarm_node_role`: the role of the node
  * `__meta_dockerswarm_node_status`: the status of the node
 The list of discovered Docker Swarm targets is refreshed at the interval, which can be configured via `-promscrape.dockerswarmSDCheckInterval` command-line flag.
 ## ec2_sd_configs
@ -526,6 +532,7 @@ The following meta labels are available on discovered targets during [relabeling
 * `__meta_ec2_tag_<tagkey>`: each tag value of the instance
 * `__meta_ec2_vpc_id`: the ID of the VPC in which the instance is running, if available
 The list of discovered EC2 targets is refreshed at the interval, which can be configured via `-promscrape.ec2SDCheckInterval` command-line flag.
 ## eureka_sd_configs
@ -570,11 +577,11 @@ The following meta labels are available on discovered targets during [relabeling
 * `__meta_eureka_app_instance_datacenterinfo_name`: the datacenter name of the app instance
 * `__meta_eureka_app_instance_datacenterinfo_metadata_<metadataname>`: the datacenter metadata
 The list of discovered Eureka targets is refreshed at the interval, which can be configured via `-promscrape.eurekaSDCheckInterval` command-line flag.
 ## file_sd_configs
 File-based service discovery reads a set of files with lists of targets to scrape.
 Scrape targets are automatically updated when the underlying files are changed with the interval
 Configuration example:
@ -625,6 +632,7 @@ The following meta labels are available on discovered targets during [relabeling
 See the [list of integrations](https://prometheus.io/docs/operating/integrations/#file-service-discovery) with `file_sd_configs`.
 The list of discovered file-based targets is refreshed at the interval, which can be configured via `-promscrape.fileSDCheckInterval` command-line flag.
 ## gce_sd_configs
@ -685,6 +693,7 @@ The following meta labels are available on discovered targets during [relabeling
 * `__meta_gce_tags`: list of instance tags separated by tag_separator
 * `__meta_gce_zone`: the GCE zone URL in which the instance is running
 The list of discovered GCE targets is refreshed at the interval, which can be configured via `-promscrape.gceSDCheckInterval` command-line flag.
 ## http_sd_configs
@ -728,6 +737,7 @@ The following meta labels are available on discovered targets during [relabeling
 * `__meta_url`: the URL from which the target was extracted
 The list of discovered HTTP-based targets is refreshed at the interval, which can be configured via `-promscrape.httpSDCheckInterval` command-line flag.
 ## kubernetes_sd_configs
@ -945,6 +955,8 @@ One of the following `role` types can be configured to discover targets:
  * `__meta_kubernetes_ingress_scheme`: Protocol scheme of ingress, https if TLS config is set. Defaults to http.
  * `__meta_kubernetes_ingress_path`: Path from ingress spec. Defaults to `/`.
 The list of discovered Kuberntes targets is refreshed at the interval, which can be configured via `-promscrape.kubernetesSDCheckInterval` command-line flag.
 ## kuma_sd_configs
 Kuma service discovery config allows to fetch targets from the specified control plane `server` of [Kuma Service Mesh](https://kuma.io).
@ -979,6 +991,8 @@ The following meta labels are available on discovered targets during [relabeling
 * `__meta_kuma_service`: the name of the service associated with the proxy
 * `__meta_kuma_label_<label_name>`: each label of target given from Kuma Control Plane
 The list of discovered Kuma targets is refreshed at the interval, which can be configured via `-promscrape.kumaSDCheckInterval` command-line flag.
 ## nomad_sd_configs
 Nomad SD configuration allows retrieving scrape targets from [HashiCorp Nomad Services](https://www.hashicorp.com/blog/nomad-service-discovery).
@ -1037,6 +1051,8 @@ The following meta labels are available on discovered targets during [relabeling
 * `__meta_nomad_tagpresent_<tagname>`: "true" for every <tagname> tag of the target
 * `__meta_nomad_tags`: the list of tags of the target joined by the `tag_separator`
 The list of discovered Nomad targets is refreshed at the interval, which can be configured via `-promscrape.nomadSDCheckInterval` command-line flag.
 ## openstack_sd_configs
 OpenStack SD configuration allows retrieving scrape targets from [OpenStack Nova](https://docs.openstack.org/nova/latest/) instances.
@ -1150,6 +1166,7 @@ One of the following `role` types can be configured to discover targets:
  * `__meta_openstack_tag_<tagkey>`: each tag value of the instance.
  * `__meta_openstack_user_id`: the user account owning the tenant.
 The list of discovered OpenStack targets is refreshed at the interval, which can be configured via `-promscrape.openstackSDCheckInterval` command-line flag.
 ## static_configs
@ -1254,6 +1271,7 @@ The following meta labels are available on discovered targets during [relabeling
 * `__meta_yandexcloud_instance_private_dns_<record number>`: if configured DNS records for private IP
 * `__meta_yandexcloud_instance_public_dns_<record number>`: if configured DNS records for public IP
 The list of discovered Yandex Cloud targets is refreshed at the interval, which can be configured via `-promscrape.yandexcloudSDCheckInterval` command-line flag.
 ## scrape_configs
--- a/lib/promscrape/config.go
+++ b/lib/promscrape/config.go
@ -711,26 +711,11 @@ func (cfg *Config) getEurekaSDScrapeWork(prev []*ScrapeWork) []*ScrapeWork {
 // getFileSDScrapeWork returns `file_sd_configs` ScrapeWork from cfg.
 func (cfg *Config) getFileSDScrapeWork(prev []*ScrapeWork) []*ScrapeWork {
 	// Create a map for the previous scrape work.
 	swsMapPrev := make(map[string][]*ScrapeWork)
 	for _, sw := range prev {
 		filepath := sw.Labels.Get("__vm_filepath")
 		if len(filepath) == 0 {
 			logger.Panicf("BUG: missing `__vm_filepath` label")
 		} else {
 			// user can define many file_sd_config with the same path and it will produce the same ScrapeWorks
 			// in this case we just make key for map as job name and filepath with ":" delimiter,
 			// it will create each job with its ScrapeWorks
 			key := fmt.Sprintf("%s:%s", sw.Job(), filepath)
 			swsMapPrev[key] = append(swsMapPrev[key], sw)
 		}
 	}
 	dst := make([]*ScrapeWork, 0, len(prev))
 	for _, sc := range cfg.ScrapeConfigs {
 		configPaths := make(map[string]struct{}, len(sc.FileSDConfigs))
 		for j := range sc.FileSDConfigs {
 			sdc := &sc.FileSDConfigs[j]
-			dst = sdc.appendScrapeWork(dst, swsMapPrev, cfg.baseDir, sc.swc, configPaths)
+			dst = sdc.appendScrapeWork(dst, cfg.baseDir, sc.swc)
 		}
 	}
 	return dst
@ -1127,7 +1112,7 @@ func appendScrapeWorkForTargetLabels(dst []*ScrapeWork, swc *scrapeWorkConfig, t
 	return dst
 }
-func (sdc *FileSDConfig) appendScrapeWork(dst []*ScrapeWork, swsMapPrev map[string][]*ScrapeWork, baseDir string, swc *scrapeWorkConfig, configPaths map[string]struct{}) []*ScrapeWork {
+func (sdc *FileSDConfig) appendScrapeWork(dst []*ScrapeWork, baseDir string, swc *scrapeWorkConfig) []*ScrapeWork {
 	metaLabels := promutils.GetLabels()
 	defer promutils.PutLabels(metaLabels)
 	for _, file := range sdc.Files {
@ -1138,31 +1123,15 @@ func (sdc *FileSDConfig) appendScrapeWork(dst []*ScrapeWork, swsMapPrev map[stri
 			paths, err = filepath.Glob(pathPattern)
 			if err != nil {
 				// Do not return this error, since other files may contain valid scrape configs.
-				logger.Errorf("invalid pattern %q in `files` section: %s; skipping it", file, err)
+				logger.Errorf("invalid pattern %q in `file_sd_config->files` section of job_name=%q: %s; skipping it", file, swc.jobName, err)
 				continue
 			}
 		}
 		for _, path := range paths {
 			// make a key as for previous ScrapeWorks (swsMapPrev map[string][]*ScrapeWork) and show to user
 			// warning about identical file_sd_config.
 			// We skip it because it will make dst with duplicated ScrapeWork.
 			key := fmt.Sprintf("%s:%s", swc.jobName, path)
 			if _, ok := configPaths[key]; ok {
 				logger.Warnf("file_sd_config contains multiple references to %q, ignoring duplicated entry. please check -promscrape.config and remove duplicated configurations", path)
 				continue
 			}
 			configPaths[key] = struct{}{}
 			stcs, err := loadStaticConfigs(path)
 			if err != nil {
 				// Do not return this error, since other paths may contain valid scrape configs.
-				if sws := swsMapPrev[key]; sws != nil {
+				logger.Errorf("cannot load file %q for job_name=%q at `file_sd_configs`: %s; skipping this file", path, swc.jobName, err)
 					// Re-use the previous valid scrape work for this path.
 					logger.Errorf("keeping the previously loaded `static_configs` from %q because of error when re-loading the file: %s", path, err)
 					dst = append(dst, sws...)
 				} else {
 					logger.Errorf("skipping loading `static_configs` from %q because of error: %s", path, err)
 				}
 				continue
 			}
 			pathShort := path
@ -1174,7 +1143,6 @@ func (sdc *FileSDConfig) appendScrapeWork(dst []*ScrapeWork, swsMapPrev map[stri
 			}
 			metaLabels.Reset()
 			metaLabels.Add("__meta_filepath", pathShort)
 			metaLabels.Add("__vm_filepath", path) // This label is needed for internal promscrape logic
 			for i := range stcs {
 				dst = stcs[i].appendScrapeWork(dst, swc, metaLabels)
 			}
--- a/lib/promscrape/config_test.go
+++ b/lib/promscrape/config_test.go
@ -739,16 +739,6 @@ func TestGetFileSDScrapeWorkSuccess(t *testing.T) {
 		}
 		resetNonEssentialFields(sws)
 		// Remove `__vm_filepath` label, since its value depends on the current working dir.
 		for _, sw := range sws {
 			labels := sw.Labels.GetLabels()
 			for j := range labels {
 				label := &labels[j]
 				if label.Name == "__vm_filepath" {
 					label.Value = ""
 				}
 			}
 		}
 		if !reflect.DeepEqual(sws, expectedSws) {
 			t.Fatalf("unexpected scrapeWork; got\n%+v\nwant\n%+v", sws, expectedSws)
 		}
@ -772,7 +762,6 @@ scrape_configs:
 			ScrapeTimeout:   defaultScrapeTimeout,
 			HonorTimestamps: true,
 			Labels: promutils.NewLabelsFromMap(map[string]string{
 				"__vm_filepath": "",
 				"instance": "host1:80",
 				"job":      "foo",
 				"qwe":      "rty",
@ -787,7 +776,6 @@ scrape_configs:
 			ScrapeTimeout:   defaultScrapeTimeout,
 			HonorTimestamps: true,
 			Labels: promutils.NewLabelsFromMap(map[string]string{
 				"__vm_filepath": "",
 				"instance": "host2:80",
 				"job":      "foo",
 				"qwe":      "rty",
@ -802,7 +790,6 @@ scrape_configs:
 			ScrapeTimeout:   defaultScrapeTimeout,
 			HonorTimestamps: true,
 			Labels: promutils.NewLabelsFromMap(map[string]string{
 				"__vm_filepath": "",
 				"instance": "localhost:9090",
 				"job":      "foo",
 				"yml":      "test",
--- a/lib/promutils/labels.go
+++ b/lib/promutils/labels.go
@ -255,8 +255,7 @@ func (x *Labels) RemoveLabelsWithDoubleUnderscorePrefix() {
 	dst := x.Labels[:0]
 	for _, label := range src {
 		name := label.Name
-		// A hack: do not delete __vm_filepath label, since it is used by internal logic for FileSDConfig.
+		if strings.HasPrefix(name, "__") {
 		if strings.HasPrefix(name, "__") && name != "__vm_filepath" {
 			continue
 		}
 		dst = append(dst, label)
--- a/lib/promutils/labels_test.go
+++ b/lib/promutils/labels_test.go
@ -172,6 +172,6 @@ func TestLabelsRemoveLabelsWithDoubleUnderscorePrefix(t *testing.T) {
 	}
 	f(`{}`, `{}`)
 	f(`{foo="bar"}`, `{foo="bar"}`)
-	f(`{__meta_foo="bar",a="b",__name__="foo",__vm_filepath="aa"}`, `{a="b",__vm_filepath="aa"}`)
+	f(`{__meta_foo="bar",a="b",__name__="foo",__vm_filepath="aa"}`, `{a="b"}`)
 	f(`{__meta_foo="bdffr",foo="bar",__meta_xxx="basd"}`, `{foo="bar"}`)
 }