lib/promscrape: add -promscrape.suppressDuplicateScrapeTargetErrors command-line flag in order to suppress duplicate scrape target errors

Show also original labels for duplicate targets in error message in order to simplify debugging the issue.

Now `/targets` endpoint accepts optional `show_original_labels=1` query arg, which shows original labels for each target.
This may simplify debugging for target relabeling.

Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/651
This commit is contained in:
Aliaksandr Valialkin 2020-10-08 18:50:22 +03:00
parent 9b0a5c1028
commit 71ea4935de
8 changed files with 53 additions and 19 deletions

View File

@ -210,6 +210,8 @@ Use official [Grafana dashboard](https://grafana.com/grafana/dashboards/12683) f
If you have suggestions, improvements or found a bug - feel free to open an issue on github or add review to the dashboard.
`vmagent` also exports target statuses at `http://vmagent-host:8429/targets` page in plaintext format.
`/targets` handler accepts optional `show_original_labels=1` query arg, which shows the original labels per each target
before applying relabeling. This information may be useful for debugging target relabeling.
### Troubleshooting
@ -237,7 +239,8 @@ If you have suggestions, improvements or found a bug - feel free to open an issu
Pass `-remoteWrite.showURL` command-line flag when starting `vmagent` in order to see all the valid urls.
* If you see `skipping duplicate scrape target with identical labels` errors when scraping Kubernetes pods, then it is likely these pods listen multiple ports
or they use init container.
or they use init container. These errors can be either suppressed with `-promscrape.suppressDuplicateScrapeTargetErrors` or fixed. If you prefer fixing these
errors, then see available options below:
The following `relabel_configs` section may help determining `__meta_*` labels resulting in duplicate targets:
```yml

View File

@ -5,6 +5,7 @@ import (
"fmt"
"net/http"
"os"
"strconv"
"strings"
"time"
@ -207,7 +208,8 @@ func requestHandler(w http.ResponseWriter, r *http.Request) bool {
case "/targets":
promscrapeTargetsRequests.Inc()
w.Header().Set("Content-Type", "text/plain")
promscrape.WriteHumanReadableTargetsStatus(w)
showOriginalLabels, _ := strconv.ParseBool(r.FormValue("show_original_labels"))
promscrape.WriteHumanReadableTargetsStatus(w, showOriginalLabels)
return true
case "/-/reload":
promscrapeConfigReloadRequests.Inc()

View File

@ -4,6 +4,7 @@ import (
"flag"
"fmt"
"net/http"
"strconv"
"strings"
"sync/atomic"
@ -155,7 +156,8 @@ func RequestHandler(w http.ResponseWriter, r *http.Request) bool {
case "/targets":
promscrapeTargetsRequests.Inc()
w.Header().Set("Content-Type", "text/plain")
promscrape.WriteHumanReadableTargetsStatus(w)
showOriginalLabels, _ := strconv.ParseBool(r.FormValue("show_original_labels"))
promscrape.WriteHumanReadableTargetsStatus(w, showOriginalLabels)
return true
case "/-/reload":
promscrapeConfigReloadRequests.Inc()

View File

@ -210,6 +210,8 @@ Use official [Grafana dashboard](https://grafana.com/grafana/dashboards/12683) f
If you have suggestions, improvements or found a bug - feel free to open an issue on github or add review to the dashboard.
`vmagent` also exports target statuses at `http://vmagent-host:8429/targets` page in plaintext format.
`/targets` handler accepts optional `show_original_labels=1` query arg, which shows the original labels per each target
before applying relabeling. This information may be useful for debugging target relabeling.
### Troubleshooting
@ -237,7 +239,8 @@ If you have suggestions, improvements or found a bug - feel free to open an issu
Pass `-remoteWrite.showURL` command-line flag when starting `vmagent` in order to see all the valid urls.
* If you see `skipping duplicate scrape target with identical labels` errors when scraping Kubernetes pods, then it is likely these pods listen multiple ports
or they use init container.
or they use init container. These errors can be either suppressed with `-promscrape.suppressDuplicateScrapeTargetErrors` or fixed. If you prefer fixing these
errors, then see available options below:
The following `relabel_configs` section may help determining `__meta_*` labels resulting in duplicate targets:
```yml

View File

@ -597,6 +597,8 @@ func (stc *StaticConfig) appendScrapeWork(dst []ScrapeWork, swc *scrapeWorkConfi
func appendScrapeWork(dst []ScrapeWork, swc *scrapeWorkConfig, target string, extraLabels, metaLabels map[string]string) ([]ScrapeWork, error) {
labels := mergeLabels(swc.jobName, swc.scheme, target, swc.metricsPath, extraLabels, swc.externalLabels, metaLabels, swc.params)
originalLabels := append([]prompbmarshal.Label{}, labels...)
promrelabel.SortLabels(originalLabels)
labels = promrelabel.ApplyRelabelConfigs(labels, 0, swc.relabelConfigs, false)
labels = promrelabel.RemoveMetaLabels(labels[:0], labels)
if len(labels) == 0 {
@ -648,6 +650,7 @@ func appendScrapeWork(dst []ScrapeWork, swc *scrapeWorkConfig, target string, ex
ScrapeTimeout: swc.scrapeTimeout,
HonorLabels: swc.honorLabels,
HonorTimestamps: swc.honorTimestamps,
OriginalLabels: originalLabels,
Labels: labels,
AuthConfig: swc.authConfig,
MetricRelabelConfigs: swc.metricRelabelConfigs,

View File

@ -38,6 +38,9 @@ var (
"See https://prometheus.io/docs/prometheus/latest/configuration/configuration/#gce_sd_config for details")
promscrapeConfigFile = flag.String("promscrape.config", "", "Optional path to Prometheus config file with 'scrape_configs' section containing targets to scrape. "+
"See https://victoriametrics.github.io/#how-to-scrape-prometheus-exporters-such-as-node-exporter for details")
suppressDuplicateScrapeTargetErrors = flag.Bool("promscrape.suppressDuplicateScrapeTargetErrors", false, "Whether to suppress `duplicate scrape target` errors; "+
"see https://victoriametrics.github.io/vmagent.html#troubleshooting for details")
)
// CheckConfig checks -promscrape.config for errors and unsupported options.
@ -265,18 +268,22 @@ func (sg *scraperGroup) update(sws []ScrapeWork) {
additionsCount := 0
deletionsCount := 0
swsMap := make(map[string]bool, len(sws))
swsMap := make(map[string][]prompbmarshal.Label, len(sws))
for i := range sws {
sw := &sws[i]
key := sw.key()
if swsMap[key] {
logger.Errorf("skipping duplicate scrape target with identical labels; endpoint=%s, labels=%s; "+
"make sure service discovery and relabeling is set up properly; "+
"see also https://victoriametrics.github.io/vmagent.html#troubleshooting",
sw.ScrapeURL, sw.LabelsString())
originalLabels := swsMap[key]
if originalLabels != nil {
if !*suppressDuplicateScrapeTargetErrors {
logger.Errorf("skipping duplicate scrape target with identical labels; endpoint=%s, labels=%s; "+
"make sure service discovery and relabeling is set up properly; "+
"see also https://victoriametrics.github.io/vmagent.html#troubleshooting; "+
"original labels for target1: %s; original labels for target2: %s",
sw.ScrapeURL, sw.LabelsString(), promLabelsString(originalLabels), promLabelsString(sw.OriginalLabels))
}
continue
}
swsMap[key] = true
swsMap[key] = sw.OriginalLabels
if sg.m[key] != nil {
// The scraper for the given key already exists.
continue
@ -297,7 +304,7 @@ func (sg *scraperGroup) update(sws []ScrapeWork) {
// Stop deleted scrapers, which are missing in sws.
for key, sc := range sg.m {
if !swsMap[key] {
if swsMap[key] == nil {
close(sc.stopCh)
delete(sg.m, key)
deletionsCount++

View File

@ -47,6 +47,11 @@ type ScrapeWork struct {
// See https://prometheus.io/docs/prometheus/latest/configuration/configuration/#scrape_config
HonorTimestamps bool
// OriginalLabels contains original labels before relabeling.
//
// These labels are needed for relabeling troubleshooting at /targets page.
OriginalLabels []prompbmarshal.Label
// Labels to add to the scraped metrics.
//
// The list contains at least the following labels according to https://prometheus.io/docs/prometheus/latest/configuration/configuration/#relabel_config
@ -85,6 +90,7 @@ type ScrapeWork struct {
//
// it can be used for comparing for equality two ScrapeWork objects.
func (sw *ScrapeWork) key() string {
// Do not take into account OriginalLabels.
key := fmt.Sprintf("ScrapeURL=%s, ScrapeInterval=%s, ScrapeTimeout=%s, HonorLabels=%v, HonorTimestamps=%v, Labels=%s, "+
"AuthConfig=%s, MetricRelabelConfigs=%s, SampleLimit=%d, DisableCompression=%v, DisableKeepAlive=%v",
sw.ScrapeURL, sw.ScrapeInterval, sw.ScrapeTimeout, sw.HonorLabels, sw.HonorTimestamps, sw.LabelsString(),
@ -107,11 +113,16 @@ func (sw *ScrapeWork) Job() string {
// LabelsString returns labels in Prometheus format for the given sw.
func (sw *ScrapeWork) LabelsString() string {
labels := make([]string, 0, len(sw.Labels))
for _, label := range promrelabel.FinalizeLabels(nil, sw.Labels) {
labels = append(labels, fmt.Sprintf("%s=%q", label.Name, label.Value))
labelsFinalized := promrelabel.FinalizeLabels(nil, sw.Labels)
return promLabelsString(labelsFinalized)
}
func promLabelsString(labels []prompbmarshal.Label) string {
a := make([]string, 0, len(labels))
for _, label := range labels {
a = append(a, fmt.Sprintf("%s=%q", label.Name, label.Value))
}
return "{" + strings.Join(labels, ", ") + "}"
return "{" + strings.Join(a, ", ") + "}"
}
type scrapeWork struct {

View File

@ -11,8 +11,8 @@ import (
var tsmGlobal = newTargetStatusMap()
// WriteHumanReadableTargetsStatus writes human-readable status for all the scrape targets to w.
func WriteHumanReadableTargetsStatus(w io.Writer) {
tsmGlobal.WriteHumanReadable(w)
func WriteHumanReadableTargetsStatus(w io.Writer, showOriginalLabels bool) {
tsmGlobal.WriteHumanReadable(w, showOriginalLabels)
}
type targetStatusMap struct {
@ -73,7 +73,7 @@ func (tsm *targetStatusMap) StatusByGroup(group string, up bool) int {
return count
}
func (tsm *targetStatusMap) WriteHumanReadable(w io.Writer) {
func (tsm *targetStatusMap) WriteHumanReadable(w io.Writer, showOriginalLabels bool) {
byJob := make(map[string][]targetStatus)
tsm.mu.Lock()
for _, st := range tsm.m {
@ -111,6 +111,9 @@ func (tsm *targetStatusMap) WriteHumanReadable(w io.Writer) {
state = "down"
}
labelsStr := st.sw.LabelsString()
if showOriginalLabels {
labelsStr += ", originalLabels=" + promLabelsString(st.sw.OriginalLabels)
}
lastScrape := st.getDurationFromLastScrape()
errMsg := ""
if st.err != nil {