lib/promscrape: reload only modified scrapers on config changes

This should improve scrape stability when big number of targets are scraped and these targets are frequently changed.

Thanks to @xbsura for the idea and initial implementation attempts at the following pull requests:

- https://github.com/VictoriaMetrics/VictoriaMetrics/pull/449
- https://github.com/VictoriaMetrics/VictoriaMetrics/pull/458
- https://github.com/VictoriaMetrics/VictoriaMetrics/pull/459
- https://github.com/VictoriaMetrics/VictoriaMetrics/pull/460
This commit is contained in:
Aliaksandr Valialkin 2020-05-03 12:41:13 +03:00
parent 8f591b848a
commit f422203e10
7 changed files with 254 additions and 348 deletions

View File

@ -1,6 +1,7 @@
package promauth package promauth
import ( import (
"bytes"
"crypto/tls" "crypto/tls"
"crypto/x509" "crypto/x509"
"encoding/base64" "encoding/base64"
@ -40,6 +41,27 @@ type Config struct {
TLSInsecureSkipVerify bool TLSInsecureSkipVerify bool
} }
// String returns human-(un)readable representation for cfg.
func (ac *Config) String() string {
return fmt.Sprintf("Authorization=%s, TLSRootCA=%s, TLSCertificate=%s, TLSServerName=%s, TLSInsecureSkipVerify=%v",
ac.Authorization, ac.tlsRootCAString(), ac.tlsCertificateString(), ac.TLSServerName, ac.TLSInsecureSkipVerify)
}
func (ac *Config) tlsRootCAString() string {
if ac.TLSRootCA == nil {
return ""
}
data := ac.TLSRootCA.Subjects()
return string(bytes.Join(data, []byte("\n")))
}
func (ac *Config) tlsCertificateString() string {
if ac.TLSCertificate == nil {
return ""
}
return string(bytes.Join(ac.TLSCertificate.Certificate, []byte("\n")))
}
// NewTLSConfig returns new TLS config for the given ac. // NewTLSConfig returns new TLS config for the given ac.
func (ac *Config) NewTLSConfig() *tls.Config { func (ac *Config) NewTLSConfig() *tls.Config {
tlsCfg := &tls.Config{ tlsCfg := &tls.Config{

View File

@ -1,6 +1,7 @@
package promrelabel package promrelabel
import ( import (
"fmt"
"regexp" "regexp"
"strconv" "strconv"
"strings" "strings"
@ -24,6 +25,12 @@ type ParsedRelabelConfig struct {
Action string Action string
} }
// String returns human-readable representation for prc.
func (prc *ParsedRelabelConfig) String() string {
return fmt.Sprintf("SourceLabels=%s, Separator=%s, TargetLabel=%s, Regex=%s, Modulus=%d, Replacement=%s, Action=%s",
prc.SourceLabels, prc.Separator, prc.TargetLabel, prc.Regex.String(), prc.Modulus, prc.Replacement, prc.Action)
}
// ApplyRelabelConfigs applies prcs to labels starting from the labelsOffset. // ApplyRelabelConfigs applies prcs to labels starting from the labelsOffset.
// //
// If isFinalize is set, then FinalizeLabels is called on the labels[labelsOffset:]. // If isFinalize is set, then FinalizeLabels is called on the labels[labelsOffset:].

View File

@ -143,38 +143,6 @@ func unmarshalMaybeStrict(data []byte, dst interface{}) error {
return err return err
} }
func (cfg *Config) kubernetesSDConfigsCount() int {
n := 0
for i := range cfg.ScrapeConfigs {
n += len(cfg.ScrapeConfigs[i].KubernetesSDConfigs)
}
return n
}
func (cfg *Config) ec2SDConfigsCount() int {
n := 0
for i := range cfg.ScrapeConfigs {
n += len(cfg.ScrapeConfigs[i].EC2SDConfigs)
}
return n
}
func (cfg *Config) gceSDConfigsCount() int {
n := 0
for i := range cfg.ScrapeConfigs {
n += len(cfg.ScrapeConfigs[i].GCESDConfigs)
}
return n
}
func (cfg *Config) fileSDConfigsCount() int {
n := 0
for i := range cfg.ScrapeConfigs {
n += len(cfg.ScrapeConfigs[i].FileSDConfigs)
}
return n
}
// getKubernetesSDScrapeWork returns `kubernetes_sd_configs` ScrapeWork from cfg. // getKubernetesSDScrapeWork returns `kubernetes_sd_configs` ScrapeWork from cfg.
func (cfg *Config) getKubernetesSDScrapeWork() []ScrapeWork { func (cfg *Config) getKubernetesSDScrapeWork() []ScrapeWork {
var dst []ScrapeWork var dst []ScrapeWork
@ -215,16 +183,16 @@ func (cfg *Config) getGCESDScrapeWork() []ScrapeWork {
} }
// getFileSDScrapeWork returns `file_sd_configs` ScrapeWork from cfg. // getFileSDScrapeWork returns `file_sd_configs` ScrapeWork from cfg.
func (cfg *Config) getFileSDScrapeWork(prev []ScrapeWork) []ScrapeWork { func (cfg *Config) getFileSDScrapeWork(swsPrev []ScrapeWork) []ScrapeWork {
// Create a map for the previous scrape work. // Create a map for the previous scrape work.
swPrev := make(map[string][]ScrapeWork) swsMapPrev := make(map[string][]ScrapeWork)
for i := range prev { for i := range swsPrev {
sw := &prev[i] sw := &swsPrev[i]
filepath := promrelabel.GetLabelValueByName(sw.Labels, "__vm_filepath") filepath := promrelabel.GetLabelValueByName(sw.Labels, "__vm_filepath")
if len(filepath) == 0 { if len(filepath) == 0 {
logger.Panicf("BUG: missing `__vm_filepath` label") logger.Panicf("BUG: missing `__vm_filepath` label")
} else { } else {
swPrev[filepath] = append(swPrev[filepath], *sw) swsMapPrev[filepath] = append(swsMapPrev[filepath], *sw)
} }
} }
var dst []ScrapeWork var dst []ScrapeWork
@ -232,7 +200,7 @@ func (cfg *Config) getFileSDScrapeWork(prev []ScrapeWork) []ScrapeWork {
sc := &cfg.ScrapeConfigs[i] sc := &cfg.ScrapeConfigs[i]
for j := range sc.FileSDConfigs { for j := range sc.FileSDConfigs {
sdc := &sc.FileSDConfigs[j] sdc := &sc.FileSDConfigs[j]
dst = sdc.appendScrapeWork(dst, swPrev, cfg.baseDir, sc.swc) dst = sdc.appendScrapeWork(dst, swsMapPrev, cfg.baseDir, sc.swc)
} }
} }
return dst return dst
@ -377,7 +345,7 @@ func appendScrapeWorkForTargetLabels(dst []ScrapeWork, swc *scrapeWorkConfig, ta
return dst return dst
} }
func (sdc *FileSDConfig) appendScrapeWork(dst []ScrapeWork, swPrev map[string][]ScrapeWork, baseDir string, swc *scrapeWorkConfig) []ScrapeWork { func (sdc *FileSDConfig) appendScrapeWork(dst []ScrapeWork, swsMapPrev map[string][]ScrapeWork, baseDir string, swc *scrapeWorkConfig) []ScrapeWork {
for _, file := range sdc.Files { for _, file := range sdc.Files {
pathPattern := getFilepath(baseDir, file) pathPattern := getFilepath(baseDir, file)
paths := []string{pathPattern} paths := []string{pathPattern}
@ -394,7 +362,7 @@ func (sdc *FileSDConfig) appendScrapeWork(dst []ScrapeWork, swPrev map[string][]
stcs, err := loadStaticConfigs(path) stcs, err := loadStaticConfigs(path)
if err != nil { if err != nil {
// Do not return this error, since other paths may contain valid scrape configs. // Do not return this error, since other paths may contain valid scrape configs.
if sws := swPrev[path]; sws != nil { if sws := swsMapPrev[path]; sws != nil {
// Re-use the previous valid scrape work for this path. // Re-use the previous valid scrape work for this path.
logger.Errorf("keeping the previously loaded `static_configs` from %q because of error when re-loading the file: %s", path, err) logger.Errorf("keeping the previously loaded `static_configs` from %q because of error when re-loading the file: %s", path, err)
dst = append(dst, sws...) dst = append(dst, sws...)
@ -412,7 +380,7 @@ func (sdc *FileSDConfig) appendScrapeWork(dst []ScrapeWork, swPrev map[string][]
} }
metaLabels := map[string]string{ metaLabels := map[string]string{
"__meta_filepath": pathShort, "__meta_filepath": pathShort,
"__vm_filepath": pathShort, // This label is needed for internal promscrape logic "__vm_filepath": path, // This label is needed for internal promscrape logic
} }
for i := range stcs { for i := range stcs {
dst = stcs[i].appendScrapeWork(dst, swc, metaLabels) dst = stcs[i].appendScrapeWork(dst, swc, metaLabels)

View File

@ -46,8 +46,8 @@ func TestLoadConfig(t *testing.T) {
if err != nil { if err != nil {
t.Fatalf("unexpected error: %s", err) t.Fatalf("unexpected error: %s", err)
} }
if n := cfg.fileSDConfigsCount(); n != 2 { if cfg == nil {
t.Fatalf("unexpected number of `file_sd_configs`; got %d; want %d; cfg:\n%#v", n, 2, cfg) t.Fatalf("expecting non-nil config")
} }
// Try loading non-existing file // Try loading non-existing file
@ -1169,3 +1169,17 @@ scrape_configs:
} }
var defaultRegexForRelabelConfig = regexp.MustCompile("^(.*)$") var defaultRegexForRelabelConfig = regexp.MustCompile("^(.*)$")
func equalStaticConfigForScrapeWorks(a, b []ScrapeWork) bool {
if len(a) != len(b) {
return false
}
for i := range a {
keyA := a[i].key()
keyB := b[i].key()
if keyA != keyB {
return false
}
}
return true
}

View File

@ -3,6 +3,7 @@ package promscrape
import ( import (
"bytes" "bytes"
"flag" "flag"
"fmt"
"os" "os"
"os/signal" "os/signal"
"sync" "sync"
@ -60,8 +61,6 @@ func runScraper(configFile string, pushData func(wr *prompbmarshal.WriteRequest)
// Nothing to scrape. // Nothing to scrape.
return return
} }
sighupCh := make(chan os.Signal, 1)
signal.Notify(sighupCh, syscall.SIGHUP)
logger.Infof("reading Prometheus configs from %q", configFile) logger.Infof("reading Prometheus configs from %q", configFile)
cfg, data, err := loadConfig(configFile) cfg, data, err := loadConfig(configFile)
@ -69,43 +68,24 @@ func runScraper(configFile string, pushData func(wr *prompbmarshal.WriteRequest)
logger.Fatalf("cannot read %q: %s", configFile, err) logger.Fatalf("cannot read %q: %s", configFile, err)
} }
scs := newScrapeConfigs(pushData)
scs.add("static_configs", 0, func(cfg *Config, swsPrev []ScrapeWork) []ScrapeWork { return cfg.getStaticScrapeWork() })
scs.add("file_sd_configs", *fileSDCheckInterval, func(cfg *Config, swsPrev []ScrapeWork) []ScrapeWork { return cfg.getFileSDScrapeWork(swsPrev) })
scs.add("kubernetes_sd_configs", *kubernetesSDCheckInterval, func(cfg *Config, swsPrev []ScrapeWork) []ScrapeWork { return cfg.getKubernetesSDScrapeWork() })
scs.add("ec2_sd_configs", *ec2SDCheckInterval, func(cfg *Config, swsPrev []ScrapeWork) []ScrapeWork { return cfg.getEC2SDScrapeWork() })
scs.add("gce_sd_configs", *gceSDCheckInterval, func(cfg *Config, swsPrev []ScrapeWork) []ScrapeWork { return cfg.getGCESDScrapeWork() })
sighupCh := make(chan os.Signal, 1)
signal.Notify(sighupCh, syscall.SIGHUP)
var tickerCh <-chan time.Time var tickerCh <-chan time.Time
if *configCheckInterval > 0 { if *configCheckInterval > 0 {
ticker := time.NewTicker(*configCheckInterval) ticker := time.NewTicker(*configCheckInterval)
tickerCh = ticker.C tickerCh = ticker.C
defer ticker.Stop() defer ticker.Stop()
} }
for {
mustStop := false scs.updateConfig(cfg)
for !mustStop {
stopCh := make(chan struct{})
var wg sync.WaitGroup
wg.Add(1)
go func() {
defer wg.Done()
runStaticScrapers(cfg, pushData, stopCh)
}()
wg.Add(1)
go func() {
defer wg.Done()
runFileSDScrapers(cfg, pushData, stopCh)
}()
wg.Add(1)
go func() {
defer wg.Done()
runKubernetesSDScrapers(cfg, pushData, stopCh)
}()
wg.Add(1)
go func() {
defer wg.Done()
runEC2SDScrapers(cfg, pushData, stopCh)
}()
wg.Add(1)
go func() {
defer wg.Done()
runGCESDScrapers(cfg, pushData, stopCh)
}()
waitForChans: waitForChans:
select { select {
case <-sighupCh: case <-sighupCh:
@ -134,281 +114,194 @@ func runScraper(configFile string, pushData func(wr *prompbmarshal.WriteRequest)
cfg = cfgNew cfg = cfgNew
data = dataNew data = dataNew
case <-globalStopCh: case <-globalStopCh:
mustStop = true logger.Infof("stopping Prometheus scrapers")
startTime := time.Now()
scs.stop()
logger.Infof("stopped Prometheus scrapers in %.3f seconds", time.Since(startTime).Seconds())
return
} }
logger.Infof("found changes in %q; applying these changes", configFile)
if !mustStop {
logger.Infof("found changes in %q; applying these changes", configFile)
}
logger.Infof("stopping Prometheus scrapers")
startTime := time.Now()
close(stopCh)
wg.Wait()
logger.Infof("stopped Prometheus scrapers in %.3f seconds", time.Since(startTime).Seconds())
configReloads.Inc() configReloads.Inc()
} }
} }
var configReloads = metrics.NewCounter(`vm_promscrape_config_reloads_total`) var configReloads = metrics.NewCounter(`vm_promscrape_config_reloads_total`)
func runStaticScrapers(cfg *Config, pushData func(wr *prompbmarshal.WriteRequest), stopCh <-chan struct{}) { type scrapeConfigs struct {
sws := cfg.getStaticScrapeWork() pushData func(wr *prompbmarshal.WriteRequest)
if len(sws) == 0 { wg sync.WaitGroup
return stopCh chan struct{}
} scfgs []*scrapeConfig
logger.Infof("starting %d scrapers for `static_config` targets", len(sws))
staticTargets.Set(uint64(len(sws)))
runScrapeWorkers(sws, pushData, stopCh)
staticTargets.Set(0)
logger.Infof("stopped all the %d scrapers for `static_config` targets", len(sws))
} }
var staticTargets = metrics.NewCounter(`vm_promscrape_targets{type="static"}`) func newScrapeConfigs(pushData func(wr *prompbmarshal.WriteRequest)) *scrapeConfigs {
return &scrapeConfigs{
func runKubernetesSDScrapers(cfg *Config, pushData func(wr *prompbmarshal.WriteRequest), stopCh <-chan struct{}) { pushData: pushData,
if cfg.kubernetesSDConfigsCount() == 0 { stopCh: make(chan struct{}),
return
} }
sws := cfg.getKubernetesSDScrapeWork() }
ticker := time.NewTicker(*kubernetesSDCheckInterval)
defer ticker.Stop() func (scs *scrapeConfigs) add(name string, checkInterval time.Duration, getScrapeWork func(cfg *Config, swsPrev []ScrapeWork) []ScrapeWork) {
mustStop := false scfg := &scrapeConfig{
for !mustStop { name: name,
localStopCh := make(chan struct{}) pushData: scs.pushData,
var wg sync.WaitGroup getScrapeWork: getScrapeWork,
wg.Add(1) checkInterval: checkInterval,
go func(sws []ScrapeWork) { cfgCh: make(chan *Config, 1),
defer wg.Done() stopCh: scs.stopCh,
logger.Infof("starting %d scrapers for `kubernetes_sd_config` targets", len(sws)) }
kubernetesSDTargets.Set(uint64(len(sws))) scs.wg.Add(1)
runScrapeWorkers(sws, pushData, localStopCh) go func() {
kubernetesSDTargets.Set(0) defer scs.wg.Done()
logger.Infof("stopped all the %d scrapers for `kubernetes_sd_config` targets", len(sws)) scfg.run()
}(sws) }()
waitForChans: scs.scfgs = append(scs.scfgs, scfg)
}
func (scs *scrapeConfigs) updateConfig(cfg *Config) {
for _, scfg := range scs.scfgs {
scfg.cfgCh <- cfg
}
}
func (scs *scrapeConfigs) stop() {
close(scs.stopCh)
scs.wg.Wait()
scs.scfgs = nil
}
type scrapeConfig struct {
name string
pushData func(wr *prompbmarshal.WriteRequest)
getScrapeWork func(cfg *Config, swsPrev []ScrapeWork) []ScrapeWork
checkInterval time.Duration
cfgCh chan *Config
stopCh <-chan struct{}
}
func (scfg *scrapeConfig) run() {
sg := newScraperGroup(scfg.name, scfg.pushData)
defer sg.stop()
var tickerCh <-chan time.Time
if scfg.checkInterval > 0 {
ticker := time.NewTicker(scfg.checkInterval)
defer ticker.Stop()
tickerCh = ticker.C
}
cfg := <-scfg.cfgCh
var swsPrev []ScrapeWork
for {
sws := scfg.getScrapeWork(cfg, swsPrev)
sg.update(sws)
swsPrev = sws
select { select {
case <-ticker.C: case <-scfg.stopCh:
swsNew := cfg.getKubernetesSDScrapeWork() return
if equalStaticConfigForScrapeWorks(swsNew, sws) { case cfg = <-scfg.cfgCh:
// Nothing changed, continue waiting for updated scrape work case <-tickerCh:
goto waitForChans
}
logger.Infof("restarting scrapers for changed `kubernetes_sd_config` targets")
sws = swsNew
case <-stopCh:
mustStop = true
}
close(localStopCh)
wg.Wait()
kubernetesSDReloads.Inc()
}
}
var (
kubernetesSDTargets = metrics.NewCounter(`vm_promscrape_targets{type="kubernetes_sd"}`)
kubernetesSDReloads = metrics.NewCounter(`vm_promscrape_reloads_total{type="kubernetes_sd"}`)
)
func runEC2SDScrapers(cfg *Config, pushData func(wr *prompbmarshal.WriteRequest), stopCh <-chan struct{}) {
if cfg.ec2SDConfigsCount() == 0 {
return
}
sws := cfg.getEC2SDScrapeWork()
ticker := time.NewTicker(*ec2SDCheckInterval)
defer ticker.Stop()
mustStop := false
for !mustStop {
localStopCh := make(chan struct{})
var wg sync.WaitGroup
wg.Add(1)
go func(sws []ScrapeWork) {
defer wg.Done()
logger.Infof("starting %d scrapers for `ec2_sd_config` targets", len(sws))
ec2SDTargets.Set(uint64(len(sws)))
runScrapeWorkers(sws, pushData, localStopCh)
ec2SDTargets.Set(0)
logger.Infof("stopped all the %d scrapers for `ec2_sd_config` targets", len(sws))
}(sws)
waitForChans:
select {
case <-ticker.C:
swsNew := cfg.getEC2SDScrapeWork()
if equalStaticConfigForScrapeWorks(swsNew, sws) {
// Nothing changed, continue waiting for updated scrape work
goto waitForChans
}
logger.Infof("restarting scrapers for changed `ec2_sd_config` targets")
sws = swsNew
case <-stopCh:
mustStop = true
}
close(localStopCh)
wg.Wait()
ec2SDReloads.Inc()
}
}
var (
ec2SDTargets = metrics.NewCounter(`vm_promscrape_targets{type="ec2_sd"}`)
ec2SDReloads = metrics.NewCounter(`vm_promscrape_reloads_total{type="ec2_sd"}`)
)
func runGCESDScrapers(cfg *Config, pushData func(wr *prompbmarshal.WriteRequest), stopCh <-chan struct{}) {
if cfg.gceSDConfigsCount() == 0 {
return
}
sws := cfg.getGCESDScrapeWork()
ticker := time.NewTicker(*gceSDCheckInterval)
defer ticker.Stop()
mustStop := false
for !mustStop {
localStopCh := make(chan struct{})
var wg sync.WaitGroup
wg.Add(1)
go func(sws []ScrapeWork) {
defer wg.Done()
logger.Infof("starting %d scrapers for `gce_sd_config` targets", len(sws))
gceSDTargets.Set(uint64(len(sws)))
runScrapeWorkers(sws, pushData, localStopCh)
gceSDTargets.Set(0)
logger.Infof("stopped all the %d scrapers for `gce_sd_config` targets", len(sws))
}(sws)
waitForChans:
select {
case <-ticker.C:
swsNew := cfg.getGCESDScrapeWork()
if equalStaticConfigForScrapeWorks(swsNew, sws) {
// Nothing changed, continue waiting for updated scrape work
goto waitForChans
}
logger.Infof("restarting scrapers for changed `gce_sd_config` targets")
sws = swsNew
case <-stopCh:
mustStop = true
}
close(localStopCh)
wg.Wait()
gceSDReloads.Inc()
}
}
var (
gceSDTargets = metrics.NewCounter(`vm_promscrape_targets{type="gce_sd"}`)
gceSDReloads = metrics.NewCounter(`vm_promscrape_reloads_total{type="gce_sd"}`)
)
func runFileSDScrapers(cfg *Config, pushData func(wr *prompbmarshal.WriteRequest), stopCh <-chan struct{}) {
if cfg.fileSDConfigsCount() == 0 {
return
}
sws := cfg.getFileSDScrapeWork(nil)
ticker := time.NewTicker(*fileSDCheckInterval)
defer ticker.Stop()
mustStop := false
for !mustStop {
localStopCh := make(chan struct{})
var wg sync.WaitGroup
wg.Add(1)
go func(sws []ScrapeWork) {
defer wg.Done()
logger.Infof("starting %d scrapers for `file_sd_config` targets", len(sws))
fileSDTargets.Set(uint64(len(sws)))
runScrapeWorkers(sws, pushData, localStopCh)
fileSDTargets.Set(0)
logger.Infof("stopped all the %d scrapers for `file_sd_config` targets", len(sws))
}(sws)
waitForChans:
select {
case <-ticker.C:
swsNew := cfg.getFileSDScrapeWork(sws)
if equalStaticConfigForScrapeWorks(swsNew, sws) {
// Nothing changed, continue waiting for updated scrape work
goto waitForChans
}
logger.Infof("restarting scrapers for changed `file_sd_config` targets")
sws = swsNew
case <-stopCh:
mustStop = true
}
close(localStopCh)
wg.Wait()
fileSDReloads.Inc()
}
}
var (
fileSDTargets = metrics.NewCounter(`vm_promscrape_targets{type="file_sd"}`)
fileSDReloads = metrics.NewCounter(`vm_promscrape_reloads_total{type="file_sd"}`)
)
func equalStaticConfigForScrapeWorks(as, bs []ScrapeWork) bool {
if len(as) != len(bs) {
return false
}
for i := range as {
if !equalStaticConfigForScrapeWork(&as[i], &bs[i]) {
return false
} }
} }
return true
} }
func equalStaticConfigForScrapeWork(a, b *ScrapeWork) bool { type scraperGroup struct {
// `static_config` can change only ScrapeURL and Labels. So compare only them. name string
if a.ScrapeURL != b.ScrapeURL { wg sync.WaitGroup
return false mLock sync.Mutex
} m map[string]*scraper
if !equalLabels(a.Labels, b.Labels) { pushData func(wr *prompbmarshal.WriteRequest)
return false changesCount *metrics.Counter
}
return true
} }
func equalLabels(as, bs []prompbmarshal.Label) bool { func newScraperGroup(name string, pushData func(wr *prompbmarshal.WriteRequest)) *scraperGroup {
if len(as) != len(bs) { sg := &scraperGroup{
return false name: name,
m: make(map[string]*scraper),
pushData: pushData,
changesCount: metrics.NewCounter(fmt.Sprintf(`vm_promscrape_config_changes_total{type=%q}`, name)),
} }
for i := range as { metrics.NewGauge(fmt.Sprintf(`vm_promscrape_targets{type=%q}`, name), func() float64 {
if !equalLabel(&as[i], &bs[i]) { sg.mLock.Lock()
return false n := len(sg.m)
} sg.mLock.Unlock()
} return float64(n)
return true })
return sg
} }
func equalLabel(a, b *prompbmarshal.Label) bool { func (sg *scraperGroup) stop() {
if a.Name != b.Name { sg.mLock.Lock()
return false for _, sc := range sg.m {
close(sc.stopCh)
} }
if a.Value != b.Value { sg.m = nil
return false sg.mLock.Unlock()
} sg.wg.Wait()
return true
} }
// runScrapeWorkers runs sws. func (sg *scraperGroup) update(sws []ScrapeWork) {
// sg.mLock.Lock()
// This function returns after closing stopCh. defer sg.mLock.Unlock()
func runScrapeWorkers(sws []ScrapeWork, pushData func(wr *prompbmarshal.WriteRequest), stopCh <-chan struct{}) {
tsmGlobal.RegisterAll(sws) additionsCount := 0
var wg sync.WaitGroup deletionsCount := 0
swsMap := make(map[string]bool, len(sws))
for i := range sws { for i := range sws {
cfg := &sws[i] sw := &sws[i]
c := newClient(cfg) key := sw.key()
var sw scrapeWork if swsMap[key] {
sw.Config = *cfg logger.Errorf("skipping duplicate scrape target with identical labels; endpoint=%s, labels=%s; make sure service discovery and relabeling is set up properly",
sw.ReadData = c.ReadData sw.ScrapeURL, sw.LabelsString())
sw.PushData = pushData continue
wg.Add(1) }
swsMap[key] = true
if sg.m[key] != nil {
// The scraper for the given key already exists.
continue
}
// Start a scraper for the missing key.
sc := newScraper(sw, sg.pushData)
sg.wg.Add(1)
go func() { go func() {
defer wg.Done() defer sg.wg.Done()
sw.run(stopCh) sc.sw.run(sc.stopCh)
tsmGlobal.Unregister(sw)
}() }()
tsmGlobal.Register(sw)
sg.m[key] = sc
additionsCount++
}
// Stop deleted scrapers, which are missing in sws.
for key, sc := range sg.m {
if !swsMap[key] {
close(sc.stopCh)
delete(sg.m, key)
deletionsCount++
}
}
if additionsCount > 0 || deletionsCount > 0 {
sg.changesCount.Add(additionsCount + deletionsCount)
logger.Infof("%s: added targets: %d, removed targets: %d; total targets: %d", sg.name, additionsCount, deletionsCount, len(sg.m))
} }
wg.Wait() }
tsmGlobal.UnregisterAll(sws)
type scraper struct {
sw scrapeWork
stopCh chan struct{}
}
func newScraper(sw *ScrapeWork, pushData func(wr *prompbmarshal.WriteRequest)) *scraper {
sc := &scraper{
stopCh: make(chan struct{}),
}
c := newClient(sw)
sc.sw.Config = *sw
sc.sw.ReadData = c.ReadData
sc.sw.PushData = pushData
return sc
} }

View File

@ -68,6 +68,25 @@ type ScrapeWork struct {
SampleLimit int SampleLimit int
} }
// key returns unique identifier for the given sw.
//
// it can be used for comparing for equality two ScrapeWork objects.
func (sw *ScrapeWork) key() string {
key := fmt.Sprintf("ScrapeURL=%s, ScrapeInterval=%s, ScrapeTimeout=%s, HonorLabels=%v, HonorTimestamps=%v, Labels=%s, "+
"AuthConfig=%s, MetricRelabelConfigs=%s, SampleLimit=%d",
sw.ScrapeURL, sw.ScrapeInterval, sw.ScrapeTimeout, sw.HonorLabels, sw.HonorTimestamps, sw.LabelsString(),
sw.AuthConfig.String(), sw.metricRelabelConfigsString(), sw.SampleLimit)
return key
}
func (sw *ScrapeWork) metricRelabelConfigsString() string {
var sb strings.Builder
for _, prc := range sw.MetricRelabelConfigs {
fmt.Fprintf(&sb, "%s", prc.String())
}
return sb.String()
}
// Job returns job for the ScrapeWork // Job returns job for the ScrapeWork
func (sw *ScrapeWork) Job() string { func (sw *ScrapeWork) Job() string {
return promrelabel.GetLabelValueByName(sw.Labels, "job") return promrelabel.GetLabelValueByName(sw.Labels, "job")

View File

@ -32,22 +32,17 @@ func (tsm *targetStatusMap) Reset() {
tsm.mu.Unlock() tsm.mu.Unlock()
} }
func (tsm *targetStatusMap) RegisterAll(sws []ScrapeWork) { func (tsm *targetStatusMap) Register(sw *ScrapeWork) {
tsm.mu.Lock() tsm.mu.Lock()
for i := range sws { tsm.m[sw.ID] = targetStatus{
sw := &sws[i] sw: sw,
tsm.m[sw.ID] = targetStatus{
sw: sw,
}
} }
tsm.mu.Unlock() tsm.mu.Unlock()
} }
func (tsm *targetStatusMap) UnregisterAll(sws []ScrapeWork) { func (tsm *targetStatusMap) Unregister(sw *ScrapeWork) {
tsm.mu.Lock() tsm.mu.Lock()
for i := range sws { delete(tsm.m, sw.ID)
delete(tsm.m, sws[i].ID)
}
tsm.mu.Unlock() tsm.mu.Unlock()
} }
@ -83,7 +78,6 @@ func (tsm *targetStatusMap) WriteHumanReadable(w io.Writer) {
return jss[i].job < jss[j].job return jss[i].job < jss[j].job
}) })
targetsByEndpoint := make(map[string]int)
for _, js := range jss { for _, js := range jss {
sts := js.statuses sts := js.statuses
sort.Slice(sts, func(i, j int) bool { sort.Slice(sts, func(i, j int) bool {
@ -109,20 +103,9 @@ func (tsm *targetStatusMap) WriteHumanReadable(w io.Writer) {
} }
fmt.Fprintf(w, "\tstate=%s, endpoint=%s, labels=%s, last_scrape=%.3fs ago, scrape_duration=%.3fs, error=%q\n", fmt.Fprintf(w, "\tstate=%s, endpoint=%s, labels=%s, last_scrape=%.3fs ago, scrape_duration=%.3fs, error=%q\n",
state, st.sw.ScrapeURL, labelsStr, lastScrape.Seconds(), float64(st.scrapeDuration)/1000, errMsg) state, st.sw.ScrapeURL, labelsStr, lastScrape.Seconds(), float64(st.scrapeDuration)/1000, errMsg)
key := fmt.Sprintf("endpoint=%s, labels=%s", st.sw.ScrapeURL, labelsStr)
targetsByEndpoint[key]++
} }
} }
fmt.Fprintf(w, "\n") fmt.Fprintf(w, "\n")
// Check whether there are targets with duplicate endpoints and labels.
for key, n := range targetsByEndpoint {
if n <= 1 {
continue
}
fmt.Fprintf(w, "!!! Scrape config error: %d duplicate targets with identical endpoint and labels found:\n", n)
fmt.Fprintf(w, "\t%s\n", key)
}
} }
type jobStatus struct { type jobStatus struct {