mirror of
https://github.com/VictoriaMetrics/VictoriaMetrics.git
synced 2025-01-23 08:41:15 +01:00
6ec582acb9
This is a common error whith improperly configured target autodiscovery and/or relabeling. This error leads to duplicate scraping of the same targets with the same set of labels, which leads to duplicate samples in time series.
151 lines
3.4 KiB
Go
151 lines
3.4 KiB
Go
package promscrape
|
|
|
|
import (
|
|
"fmt"
|
|
"io"
|
|
"sort"
|
|
"strings"
|
|
"sync"
|
|
"time"
|
|
|
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promrelabel"
|
|
)
|
|
|
|
var tsmGlobal = newTargetStatusMap()
|
|
|
|
// WriteHumanReadableTargetsStatus writes human-readable status for all the scrape targets to w.
|
|
func WriteHumanReadableTargetsStatus(w io.Writer) {
|
|
tsmGlobal.WriteHumanReadable(w)
|
|
}
|
|
|
|
type targetStatusMap struct {
|
|
mu sync.Mutex
|
|
m map[uint64]targetStatus
|
|
}
|
|
|
|
func newTargetStatusMap() *targetStatusMap {
|
|
return &targetStatusMap{
|
|
m: make(map[uint64]targetStatus),
|
|
}
|
|
}
|
|
|
|
func (tsm *targetStatusMap) Reset() {
|
|
tsm.mu.Lock()
|
|
tsm.m = make(map[uint64]targetStatus)
|
|
tsm.mu.Unlock()
|
|
}
|
|
|
|
func (tsm *targetStatusMap) RegisterAll(sws []ScrapeWork) {
|
|
tsm.mu.Lock()
|
|
for i := range sws {
|
|
sw := &sws[i]
|
|
tsm.m[sw.ID] = targetStatus{
|
|
sw: sw,
|
|
}
|
|
}
|
|
tsm.mu.Unlock()
|
|
}
|
|
|
|
func (tsm *targetStatusMap) UnregisterAll(sws []ScrapeWork) {
|
|
tsm.mu.Lock()
|
|
for i := range sws {
|
|
delete(tsm.m, sws[i].ID)
|
|
}
|
|
tsm.mu.Unlock()
|
|
}
|
|
|
|
func (tsm *targetStatusMap) Update(sw *ScrapeWork, up bool, scrapeTime, scrapeDuration int64, err error) {
|
|
tsm.mu.Lock()
|
|
tsm.m[sw.ID] = targetStatus{
|
|
sw: sw,
|
|
up: up,
|
|
scrapeTime: scrapeTime,
|
|
scrapeDuration: scrapeDuration,
|
|
err: err,
|
|
}
|
|
tsm.mu.Unlock()
|
|
}
|
|
|
|
func (tsm *targetStatusMap) WriteHumanReadable(w io.Writer) {
|
|
byJob := make(map[string][]targetStatus)
|
|
tsm.mu.Lock()
|
|
for _, st := range tsm.m {
|
|
job := st.sw.Job()
|
|
byJob[job] = append(byJob[job], st)
|
|
}
|
|
tsm.mu.Unlock()
|
|
|
|
var jss []jobStatus
|
|
for job, statuses := range byJob {
|
|
jss = append(jss, jobStatus{
|
|
job: job,
|
|
statuses: statuses,
|
|
})
|
|
}
|
|
sort.Slice(jss, func(i, j int) bool {
|
|
return jss[i].job < jss[j].job
|
|
})
|
|
|
|
targetsByEndpoint := make(map[string]int)
|
|
for _, js := range jss {
|
|
sts := js.statuses
|
|
sort.Slice(sts, func(i, j int) bool {
|
|
return sts[i].sw.ScrapeURL < sts[j].sw.ScrapeURL
|
|
})
|
|
ups := 0
|
|
for _, st := range sts {
|
|
if st.up {
|
|
ups++
|
|
}
|
|
}
|
|
fmt.Fprintf(w, "job=%q (%d/%d up)\n", js.job, ups, len(sts))
|
|
for _, st := range sts {
|
|
state := "up"
|
|
if !st.up {
|
|
state = "down"
|
|
}
|
|
var labels []string
|
|
for _, label := range promrelabel.FinalizeLabels(nil, st.sw.Labels) {
|
|
labels = append(labels, fmt.Sprintf("%s=%q", label.Name, label.Value))
|
|
}
|
|
labelsStr := "{" + strings.Join(labels, ", ") + "}"
|
|
lastScrape := st.getDurationFromLastScrape()
|
|
errMsg := ""
|
|
if st.err != nil {
|
|
errMsg = st.err.Error()
|
|
}
|
|
fmt.Fprintf(w, "\tstate=%s, endpoint=%s, labels=%s, last_scrape=%.3fs ago, scrape_duration=%.3fs, error=%q\n",
|
|
state, st.sw.ScrapeURL, labelsStr, lastScrape.Seconds(), float64(st.scrapeDuration)/1000, errMsg)
|
|
key := fmt.Sprintf("endpoint=%s, labels=%s", st.sw.ScrapeURL, labelsStr)
|
|
targetsByEndpoint[key]++
|
|
}
|
|
}
|
|
fmt.Fprintf(w, "\n")
|
|
|
|
// Check whether there are targets with duplicate endpoints and labels.
|
|
for key, n := range targetsByEndpoint {
|
|
if n <= 1 {
|
|
continue
|
|
}
|
|
fmt.Fprintf(w, "!!! Scrape config error: %d duplicate targets with identical endpoint and labels found:\n", n)
|
|
fmt.Fprintf(w, "\t%s\n", key)
|
|
}
|
|
}
|
|
|
|
type jobStatus struct {
|
|
job string
|
|
statuses []targetStatus
|
|
}
|
|
|
|
type targetStatus struct {
|
|
sw *ScrapeWork
|
|
up bool
|
|
scrapeTime int64
|
|
scrapeDuration int64
|
|
err error
|
|
}
|
|
|
|
func (st *targetStatus) getDurationFromLastScrape() time.Duration {
|
|
return time.Since(time.Unix(st.scrapeTime/1000, (st.scrapeTime%1000)*1e6))
|
|
}
|