mirror of
https://github.com/VictoriaMetrics/VictoriaMetrics.git
synced 2024-12-15 00:13:30 +01:00
lib/promscrape: reload only modified scrapers on config changes
This should improve scrape stability when big number of targets are scraped and these targets are frequently changed. Thanks to @xbsura for the idea and initial implementation attempts at the following pull requests: - https://github.com/VictoriaMetrics/VictoriaMetrics/pull/449 - https://github.com/VictoriaMetrics/VictoriaMetrics/pull/458 - https://github.com/VictoriaMetrics/VictoriaMetrics/pull/459 - https://github.com/VictoriaMetrics/VictoriaMetrics/pull/460
This commit is contained in:
parent
8f591b848a
commit
f422203e10
@ -1,6 +1,7 @@
|
|||||||
package promauth
|
package promauth
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"bytes"
|
||||||
"crypto/tls"
|
"crypto/tls"
|
||||||
"crypto/x509"
|
"crypto/x509"
|
||||||
"encoding/base64"
|
"encoding/base64"
|
||||||
@ -40,6 +41,27 @@ type Config struct {
|
|||||||
TLSInsecureSkipVerify bool
|
TLSInsecureSkipVerify bool
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// String returns human-(un)readable representation for cfg.
|
||||||
|
func (ac *Config) String() string {
|
||||||
|
return fmt.Sprintf("Authorization=%s, TLSRootCA=%s, TLSCertificate=%s, TLSServerName=%s, TLSInsecureSkipVerify=%v",
|
||||||
|
ac.Authorization, ac.tlsRootCAString(), ac.tlsCertificateString(), ac.TLSServerName, ac.TLSInsecureSkipVerify)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (ac *Config) tlsRootCAString() string {
|
||||||
|
if ac.TLSRootCA == nil {
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
data := ac.TLSRootCA.Subjects()
|
||||||
|
return string(bytes.Join(data, []byte("\n")))
|
||||||
|
}
|
||||||
|
|
||||||
|
func (ac *Config) tlsCertificateString() string {
|
||||||
|
if ac.TLSCertificate == nil {
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
return string(bytes.Join(ac.TLSCertificate.Certificate, []byte("\n")))
|
||||||
|
}
|
||||||
|
|
||||||
// NewTLSConfig returns new TLS config for the given ac.
|
// NewTLSConfig returns new TLS config for the given ac.
|
||||||
func (ac *Config) NewTLSConfig() *tls.Config {
|
func (ac *Config) NewTLSConfig() *tls.Config {
|
||||||
tlsCfg := &tls.Config{
|
tlsCfg := &tls.Config{
|
||||||
|
@ -1,6 +1,7 @@
|
|||||||
package promrelabel
|
package promrelabel
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"fmt"
|
||||||
"regexp"
|
"regexp"
|
||||||
"strconv"
|
"strconv"
|
||||||
"strings"
|
"strings"
|
||||||
@ -24,6 +25,12 @@ type ParsedRelabelConfig struct {
|
|||||||
Action string
|
Action string
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// String returns human-readable representation for prc.
|
||||||
|
func (prc *ParsedRelabelConfig) String() string {
|
||||||
|
return fmt.Sprintf("SourceLabels=%s, Separator=%s, TargetLabel=%s, Regex=%s, Modulus=%d, Replacement=%s, Action=%s",
|
||||||
|
prc.SourceLabels, prc.Separator, prc.TargetLabel, prc.Regex.String(), prc.Modulus, prc.Replacement, prc.Action)
|
||||||
|
}
|
||||||
|
|
||||||
// ApplyRelabelConfigs applies prcs to labels starting from the labelsOffset.
|
// ApplyRelabelConfigs applies prcs to labels starting from the labelsOffset.
|
||||||
//
|
//
|
||||||
// If isFinalize is set, then FinalizeLabels is called on the labels[labelsOffset:].
|
// If isFinalize is set, then FinalizeLabels is called on the labels[labelsOffset:].
|
||||||
|
@ -143,38 +143,6 @@ func unmarshalMaybeStrict(data []byte, dst interface{}) error {
|
|||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
func (cfg *Config) kubernetesSDConfigsCount() int {
|
|
||||||
n := 0
|
|
||||||
for i := range cfg.ScrapeConfigs {
|
|
||||||
n += len(cfg.ScrapeConfigs[i].KubernetesSDConfigs)
|
|
||||||
}
|
|
||||||
return n
|
|
||||||
}
|
|
||||||
|
|
||||||
func (cfg *Config) ec2SDConfigsCount() int {
|
|
||||||
n := 0
|
|
||||||
for i := range cfg.ScrapeConfigs {
|
|
||||||
n += len(cfg.ScrapeConfigs[i].EC2SDConfigs)
|
|
||||||
}
|
|
||||||
return n
|
|
||||||
}
|
|
||||||
|
|
||||||
func (cfg *Config) gceSDConfigsCount() int {
|
|
||||||
n := 0
|
|
||||||
for i := range cfg.ScrapeConfigs {
|
|
||||||
n += len(cfg.ScrapeConfigs[i].GCESDConfigs)
|
|
||||||
}
|
|
||||||
return n
|
|
||||||
}
|
|
||||||
|
|
||||||
func (cfg *Config) fileSDConfigsCount() int {
|
|
||||||
n := 0
|
|
||||||
for i := range cfg.ScrapeConfigs {
|
|
||||||
n += len(cfg.ScrapeConfigs[i].FileSDConfigs)
|
|
||||||
}
|
|
||||||
return n
|
|
||||||
}
|
|
||||||
|
|
||||||
// getKubernetesSDScrapeWork returns `kubernetes_sd_configs` ScrapeWork from cfg.
|
// getKubernetesSDScrapeWork returns `kubernetes_sd_configs` ScrapeWork from cfg.
|
||||||
func (cfg *Config) getKubernetesSDScrapeWork() []ScrapeWork {
|
func (cfg *Config) getKubernetesSDScrapeWork() []ScrapeWork {
|
||||||
var dst []ScrapeWork
|
var dst []ScrapeWork
|
||||||
@ -215,16 +183,16 @@ func (cfg *Config) getGCESDScrapeWork() []ScrapeWork {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// getFileSDScrapeWork returns `file_sd_configs` ScrapeWork from cfg.
|
// getFileSDScrapeWork returns `file_sd_configs` ScrapeWork from cfg.
|
||||||
func (cfg *Config) getFileSDScrapeWork(prev []ScrapeWork) []ScrapeWork {
|
func (cfg *Config) getFileSDScrapeWork(swsPrev []ScrapeWork) []ScrapeWork {
|
||||||
// Create a map for the previous scrape work.
|
// Create a map for the previous scrape work.
|
||||||
swPrev := make(map[string][]ScrapeWork)
|
swsMapPrev := make(map[string][]ScrapeWork)
|
||||||
for i := range prev {
|
for i := range swsPrev {
|
||||||
sw := &prev[i]
|
sw := &swsPrev[i]
|
||||||
filepath := promrelabel.GetLabelValueByName(sw.Labels, "__vm_filepath")
|
filepath := promrelabel.GetLabelValueByName(sw.Labels, "__vm_filepath")
|
||||||
if len(filepath) == 0 {
|
if len(filepath) == 0 {
|
||||||
logger.Panicf("BUG: missing `__vm_filepath` label")
|
logger.Panicf("BUG: missing `__vm_filepath` label")
|
||||||
} else {
|
} else {
|
||||||
swPrev[filepath] = append(swPrev[filepath], *sw)
|
swsMapPrev[filepath] = append(swsMapPrev[filepath], *sw)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
var dst []ScrapeWork
|
var dst []ScrapeWork
|
||||||
@ -232,7 +200,7 @@ func (cfg *Config) getFileSDScrapeWork(prev []ScrapeWork) []ScrapeWork {
|
|||||||
sc := &cfg.ScrapeConfigs[i]
|
sc := &cfg.ScrapeConfigs[i]
|
||||||
for j := range sc.FileSDConfigs {
|
for j := range sc.FileSDConfigs {
|
||||||
sdc := &sc.FileSDConfigs[j]
|
sdc := &sc.FileSDConfigs[j]
|
||||||
dst = sdc.appendScrapeWork(dst, swPrev, cfg.baseDir, sc.swc)
|
dst = sdc.appendScrapeWork(dst, swsMapPrev, cfg.baseDir, sc.swc)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return dst
|
return dst
|
||||||
@ -377,7 +345,7 @@ func appendScrapeWorkForTargetLabels(dst []ScrapeWork, swc *scrapeWorkConfig, ta
|
|||||||
return dst
|
return dst
|
||||||
}
|
}
|
||||||
|
|
||||||
func (sdc *FileSDConfig) appendScrapeWork(dst []ScrapeWork, swPrev map[string][]ScrapeWork, baseDir string, swc *scrapeWorkConfig) []ScrapeWork {
|
func (sdc *FileSDConfig) appendScrapeWork(dst []ScrapeWork, swsMapPrev map[string][]ScrapeWork, baseDir string, swc *scrapeWorkConfig) []ScrapeWork {
|
||||||
for _, file := range sdc.Files {
|
for _, file := range sdc.Files {
|
||||||
pathPattern := getFilepath(baseDir, file)
|
pathPattern := getFilepath(baseDir, file)
|
||||||
paths := []string{pathPattern}
|
paths := []string{pathPattern}
|
||||||
@ -394,7 +362,7 @@ func (sdc *FileSDConfig) appendScrapeWork(dst []ScrapeWork, swPrev map[string][]
|
|||||||
stcs, err := loadStaticConfigs(path)
|
stcs, err := loadStaticConfigs(path)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
// Do not return this error, since other paths may contain valid scrape configs.
|
// Do not return this error, since other paths may contain valid scrape configs.
|
||||||
if sws := swPrev[path]; sws != nil {
|
if sws := swsMapPrev[path]; sws != nil {
|
||||||
// Re-use the previous valid scrape work for this path.
|
// Re-use the previous valid scrape work for this path.
|
||||||
logger.Errorf("keeping the previously loaded `static_configs` from %q because of error when re-loading the file: %s", path, err)
|
logger.Errorf("keeping the previously loaded `static_configs` from %q because of error when re-loading the file: %s", path, err)
|
||||||
dst = append(dst, sws...)
|
dst = append(dst, sws...)
|
||||||
@ -412,7 +380,7 @@ func (sdc *FileSDConfig) appendScrapeWork(dst []ScrapeWork, swPrev map[string][]
|
|||||||
}
|
}
|
||||||
metaLabels := map[string]string{
|
metaLabels := map[string]string{
|
||||||
"__meta_filepath": pathShort,
|
"__meta_filepath": pathShort,
|
||||||
"__vm_filepath": pathShort, // This label is needed for internal promscrape logic
|
"__vm_filepath": path, // This label is needed for internal promscrape logic
|
||||||
}
|
}
|
||||||
for i := range stcs {
|
for i := range stcs {
|
||||||
dst = stcs[i].appendScrapeWork(dst, swc, metaLabels)
|
dst = stcs[i].appendScrapeWork(dst, swc, metaLabels)
|
||||||
|
@ -46,8 +46,8 @@ func TestLoadConfig(t *testing.T) {
|
|||||||
if err != nil {
|
if err != nil {
|
||||||
t.Fatalf("unexpected error: %s", err)
|
t.Fatalf("unexpected error: %s", err)
|
||||||
}
|
}
|
||||||
if n := cfg.fileSDConfigsCount(); n != 2 {
|
if cfg == nil {
|
||||||
t.Fatalf("unexpected number of `file_sd_configs`; got %d; want %d; cfg:\n%#v", n, 2, cfg)
|
t.Fatalf("expecting non-nil config")
|
||||||
}
|
}
|
||||||
|
|
||||||
// Try loading non-existing file
|
// Try loading non-existing file
|
||||||
@ -1169,3 +1169,17 @@ scrape_configs:
|
|||||||
}
|
}
|
||||||
|
|
||||||
var defaultRegexForRelabelConfig = regexp.MustCompile("^(.*)$")
|
var defaultRegexForRelabelConfig = regexp.MustCompile("^(.*)$")
|
||||||
|
|
||||||
|
func equalStaticConfigForScrapeWorks(a, b []ScrapeWork) bool {
|
||||||
|
if len(a) != len(b) {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
for i := range a {
|
||||||
|
keyA := a[i].key()
|
||||||
|
keyB := b[i].key()
|
||||||
|
if keyA != keyB {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
@ -3,6 +3,7 @@ package promscrape
|
|||||||
import (
|
import (
|
||||||
"bytes"
|
"bytes"
|
||||||
"flag"
|
"flag"
|
||||||
|
"fmt"
|
||||||
"os"
|
"os"
|
||||||
"os/signal"
|
"os/signal"
|
||||||
"sync"
|
"sync"
|
||||||
@ -60,8 +61,6 @@ func runScraper(configFile string, pushData func(wr *prompbmarshal.WriteRequest)
|
|||||||
// Nothing to scrape.
|
// Nothing to scrape.
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
sighupCh := make(chan os.Signal, 1)
|
|
||||||
signal.Notify(sighupCh, syscall.SIGHUP)
|
|
||||||
|
|
||||||
logger.Infof("reading Prometheus configs from %q", configFile)
|
logger.Infof("reading Prometheus configs from %q", configFile)
|
||||||
cfg, data, err := loadConfig(configFile)
|
cfg, data, err := loadConfig(configFile)
|
||||||
@ -69,43 +68,24 @@ func runScraper(configFile string, pushData func(wr *prompbmarshal.WriteRequest)
|
|||||||
logger.Fatalf("cannot read %q: %s", configFile, err)
|
logger.Fatalf("cannot read %q: %s", configFile, err)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
scs := newScrapeConfigs(pushData)
|
||||||
|
scs.add("static_configs", 0, func(cfg *Config, swsPrev []ScrapeWork) []ScrapeWork { return cfg.getStaticScrapeWork() })
|
||||||
|
scs.add("file_sd_configs", *fileSDCheckInterval, func(cfg *Config, swsPrev []ScrapeWork) []ScrapeWork { return cfg.getFileSDScrapeWork(swsPrev) })
|
||||||
|
scs.add("kubernetes_sd_configs", *kubernetesSDCheckInterval, func(cfg *Config, swsPrev []ScrapeWork) []ScrapeWork { return cfg.getKubernetesSDScrapeWork() })
|
||||||
|
scs.add("ec2_sd_configs", *ec2SDCheckInterval, func(cfg *Config, swsPrev []ScrapeWork) []ScrapeWork { return cfg.getEC2SDScrapeWork() })
|
||||||
|
scs.add("gce_sd_configs", *gceSDCheckInterval, func(cfg *Config, swsPrev []ScrapeWork) []ScrapeWork { return cfg.getGCESDScrapeWork() })
|
||||||
|
|
||||||
|
sighupCh := make(chan os.Signal, 1)
|
||||||
|
signal.Notify(sighupCh, syscall.SIGHUP)
|
||||||
|
|
||||||
var tickerCh <-chan time.Time
|
var tickerCh <-chan time.Time
|
||||||
if *configCheckInterval > 0 {
|
if *configCheckInterval > 0 {
|
||||||
ticker := time.NewTicker(*configCheckInterval)
|
ticker := time.NewTicker(*configCheckInterval)
|
||||||
tickerCh = ticker.C
|
tickerCh = ticker.C
|
||||||
defer ticker.Stop()
|
defer ticker.Stop()
|
||||||
}
|
}
|
||||||
|
for {
|
||||||
mustStop := false
|
scs.updateConfig(cfg)
|
||||||
for !mustStop {
|
|
||||||
stopCh := make(chan struct{})
|
|
||||||
var wg sync.WaitGroup
|
|
||||||
wg.Add(1)
|
|
||||||
go func() {
|
|
||||||
defer wg.Done()
|
|
||||||
runStaticScrapers(cfg, pushData, stopCh)
|
|
||||||
}()
|
|
||||||
wg.Add(1)
|
|
||||||
go func() {
|
|
||||||
defer wg.Done()
|
|
||||||
runFileSDScrapers(cfg, pushData, stopCh)
|
|
||||||
}()
|
|
||||||
wg.Add(1)
|
|
||||||
go func() {
|
|
||||||
defer wg.Done()
|
|
||||||
runKubernetesSDScrapers(cfg, pushData, stopCh)
|
|
||||||
}()
|
|
||||||
wg.Add(1)
|
|
||||||
go func() {
|
|
||||||
defer wg.Done()
|
|
||||||
runEC2SDScrapers(cfg, pushData, stopCh)
|
|
||||||
}()
|
|
||||||
wg.Add(1)
|
|
||||||
go func() {
|
|
||||||
defer wg.Done()
|
|
||||||
runGCESDScrapers(cfg, pushData, stopCh)
|
|
||||||
}()
|
|
||||||
|
|
||||||
waitForChans:
|
waitForChans:
|
||||||
select {
|
select {
|
||||||
case <-sighupCh:
|
case <-sighupCh:
|
||||||
@ -134,281 +114,194 @@ func runScraper(configFile string, pushData func(wr *prompbmarshal.WriteRequest)
|
|||||||
cfg = cfgNew
|
cfg = cfgNew
|
||||||
data = dataNew
|
data = dataNew
|
||||||
case <-globalStopCh:
|
case <-globalStopCh:
|
||||||
mustStop = true
|
logger.Infof("stopping Prometheus scrapers")
|
||||||
|
startTime := time.Now()
|
||||||
|
scs.stop()
|
||||||
|
logger.Infof("stopped Prometheus scrapers in %.3f seconds", time.Since(startTime).Seconds())
|
||||||
|
return
|
||||||
}
|
}
|
||||||
|
logger.Infof("found changes in %q; applying these changes", configFile)
|
||||||
if !mustStop {
|
|
||||||
logger.Infof("found changes in %q; applying these changes", configFile)
|
|
||||||
}
|
|
||||||
logger.Infof("stopping Prometheus scrapers")
|
|
||||||
startTime := time.Now()
|
|
||||||
close(stopCh)
|
|
||||||
wg.Wait()
|
|
||||||
logger.Infof("stopped Prometheus scrapers in %.3f seconds", time.Since(startTime).Seconds())
|
|
||||||
configReloads.Inc()
|
configReloads.Inc()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
var configReloads = metrics.NewCounter(`vm_promscrape_config_reloads_total`)
|
var configReloads = metrics.NewCounter(`vm_promscrape_config_reloads_total`)
|
||||||
|
|
||||||
func runStaticScrapers(cfg *Config, pushData func(wr *prompbmarshal.WriteRequest), stopCh <-chan struct{}) {
|
type scrapeConfigs struct {
|
||||||
sws := cfg.getStaticScrapeWork()
|
pushData func(wr *prompbmarshal.WriteRequest)
|
||||||
if len(sws) == 0 {
|
wg sync.WaitGroup
|
||||||
return
|
stopCh chan struct{}
|
||||||
}
|
scfgs []*scrapeConfig
|
||||||
logger.Infof("starting %d scrapers for `static_config` targets", len(sws))
|
|
||||||
staticTargets.Set(uint64(len(sws)))
|
|
||||||
runScrapeWorkers(sws, pushData, stopCh)
|
|
||||||
staticTargets.Set(0)
|
|
||||||
logger.Infof("stopped all the %d scrapers for `static_config` targets", len(sws))
|
|
||||||
}
|
}
|
||||||
|
|
||||||
var staticTargets = metrics.NewCounter(`vm_promscrape_targets{type="static"}`)
|
func newScrapeConfigs(pushData func(wr *prompbmarshal.WriteRequest)) *scrapeConfigs {
|
||||||
|
return &scrapeConfigs{
|
||||||
func runKubernetesSDScrapers(cfg *Config, pushData func(wr *prompbmarshal.WriteRequest), stopCh <-chan struct{}) {
|
pushData: pushData,
|
||||||
if cfg.kubernetesSDConfigsCount() == 0 {
|
stopCh: make(chan struct{}),
|
||||||
return
|
|
||||||
}
|
}
|
||||||
sws := cfg.getKubernetesSDScrapeWork()
|
}
|
||||||
ticker := time.NewTicker(*kubernetesSDCheckInterval)
|
|
||||||
defer ticker.Stop()
|
func (scs *scrapeConfigs) add(name string, checkInterval time.Duration, getScrapeWork func(cfg *Config, swsPrev []ScrapeWork) []ScrapeWork) {
|
||||||
mustStop := false
|
scfg := &scrapeConfig{
|
||||||
for !mustStop {
|
name: name,
|
||||||
localStopCh := make(chan struct{})
|
pushData: scs.pushData,
|
||||||
var wg sync.WaitGroup
|
getScrapeWork: getScrapeWork,
|
||||||
wg.Add(1)
|
checkInterval: checkInterval,
|
||||||
go func(sws []ScrapeWork) {
|
cfgCh: make(chan *Config, 1),
|
||||||
defer wg.Done()
|
stopCh: scs.stopCh,
|
||||||
logger.Infof("starting %d scrapers for `kubernetes_sd_config` targets", len(sws))
|
}
|
||||||
kubernetesSDTargets.Set(uint64(len(sws)))
|
scs.wg.Add(1)
|
||||||
runScrapeWorkers(sws, pushData, localStopCh)
|
go func() {
|
||||||
kubernetesSDTargets.Set(0)
|
defer scs.wg.Done()
|
||||||
logger.Infof("stopped all the %d scrapers for `kubernetes_sd_config` targets", len(sws))
|
scfg.run()
|
||||||
}(sws)
|
}()
|
||||||
waitForChans:
|
scs.scfgs = append(scs.scfgs, scfg)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (scs *scrapeConfigs) updateConfig(cfg *Config) {
|
||||||
|
for _, scfg := range scs.scfgs {
|
||||||
|
scfg.cfgCh <- cfg
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (scs *scrapeConfigs) stop() {
|
||||||
|
close(scs.stopCh)
|
||||||
|
scs.wg.Wait()
|
||||||
|
scs.scfgs = nil
|
||||||
|
}
|
||||||
|
|
||||||
|
type scrapeConfig struct {
|
||||||
|
name string
|
||||||
|
pushData func(wr *prompbmarshal.WriteRequest)
|
||||||
|
getScrapeWork func(cfg *Config, swsPrev []ScrapeWork) []ScrapeWork
|
||||||
|
checkInterval time.Duration
|
||||||
|
cfgCh chan *Config
|
||||||
|
stopCh <-chan struct{}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (scfg *scrapeConfig) run() {
|
||||||
|
sg := newScraperGroup(scfg.name, scfg.pushData)
|
||||||
|
defer sg.stop()
|
||||||
|
|
||||||
|
var tickerCh <-chan time.Time
|
||||||
|
if scfg.checkInterval > 0 {
|
||||||
|
ticker := time.NewTicker(scfg.checkInterval)
|
||||||
|
defer ticker.Stop()
|
||||||
|
tickerCh = ticker.C
|
||||||
|
}
|
||||||
|
|
||||||
|
cfg := <-scfg.cfgCh
|
||||||
|
var swsPrev []ScrapeWork
|
||||||
|
for {
|
||||||
|
sws := scfg.getScrapeWork(cfg, swsPrev)
|
||||||
|
sg.update(sws)
|
||||||
|
swsPrev = sws
|
||||||
|
|
||||||
select {
|
select {
|
||||||
case <-ticker.C:
|
case <-scfg.stopCh:
|
||||||
swsNew := cfg.getKubernetesSDScrapeWork()
|
return
|
||||||
if equalStaticConfigForScrapeWorks(swsNew, sws) {
|
case cfg = <-scfg.cfgCh:
|
||||||
// Nothing changed, continue waiting for updated scrape work
|
case <-tickerCh:
|
||||||
goto waitForChans
|
|
||||||
}
|
|
||||||
logger.Infof("restarting scrapers for changed `kubernetes_sd_config` targets")
|
|
||||||
sws = swsNew
|
|
||||||
case <-stopCh:
|
|
||||||
mustStop = true
|
|
||||||
}
|
|
||||||
|
|
||||||
close(localStopCh)
|
|
||||||
wg.Wait()
|
|
||||||
kubernetesSDReloads.Inc()
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
var (
|
|
||||||
kubernetesSDTargets = metrics.NewCounter(`vm_promscrape_targets{type="kubernetes_sd"}`)
|
|
||||||
kubernetesSDReloads = metrics.NewCounter(`vm_promscrape_reloads_total{type="kubernetes_sd"}`)
|
|
||||||
)
|
|
||||||
|
|
||||||
func runEC2SDScrapers(cfg *Config, pushData func(wr *prompbmarshal.WriteRequest), stopCh <-chan struct{}) {
|
|
||||||
if cfg.ec2SDConfigsCount() == 0 {
|
|
||||||
return
|
|
||||||
}
|
|
||||||
sws := cfg.getEC2SDScrapeWork()
|
|
||||||
ticker := time.NewTicker(*ec2SDCheckInterval)
|
|
||||||
defer ticker.Stop()
|
|
||||||
mustStop := false
|
|
||||||
for !mustStop {
|
|
||||||
localStopCh := make(chan struct{})
|
|
||||||
var wg sync.WaitGroup
|
|
||||||
wg.Add(1)
|
|
||||||
go func(sws []ScrapeWork) {
|
|
||||||
defer wg.Done()
|
|
||||||
logger.Infof("starting %d scrapers for `ec2_sd_config` targets", len(sws))
|
|
||||||
ec2SDTargets.Set(uint64(len(sws)))
|
|
||||||
runScrapeWorkers(sws, pushData, localStopCh)
|
|
||||||
ec2SDTargets.Set(0)
|
|
||||||
logger.Infof("stopped all the %d scrapers for `ec2_sd_config` targets", len(sws))
|
|
||||||
}(sws)
|
|
||||||
waitForChans:
|
|
||||||
select {
|
|
||||||
case <-ticker.C:
|
|
||||||
swsNew := cfg.getEC2SDScrapeWork()
|
|
||||||
if equalStaticConfigForScrapeWorks(swsNew, sws) {
|
|
||||||
// Nothing changed, continue waiting for updated scrape work
|
|
||||||
goto waitForChans
|
|
||||||
}
|
|
||||||
logger.Infof("restarting scrapers for changed `ec2_sd_config` targets")
|
|
||||||
sws = swsNew
|
|
||||||
case <-stopCh:
|
|
||||||
mustStop = true
|
|
||||||
}
|
|
||||||
|
|
||||||
close(localStopCh)
|
|
||||||
wg.Wait()
|
|
||||||
ec2SDReloads.Inc()
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
var (
|
|
||||||
ec2SDTargets = metrics.NewCounter(`vm_promscrape_targets{type="ec2_sd"}`)
|
|
||||||
ec2SDReloads = metrics.NewCounter(`vm_promscrape_reloads_total{type="ec2_sd"}`)
|
|
||||||
)
|
|
||||||
|
|
||||||
func runGCESDScrapers(cfg *Config, pushData func(wr *prompbmarshal.WriteRequest), stopCh <-chan struct{}) {
|
|
||||||
if cfg.gceSDConfigsCount() == 0 {
|
|
||||||
return
|
|
||||||
}
|
|
||||||
sws := cfg.getGCESDScrapeWork()
|
|
||||||
ticker := time.NewTicker(*gceSDCheckInterval)
|
|
||||||
defer ticker.Stop()
|
|
||||||
mustStop := false
|
|
||||||
for !mustStop {
|
|
||||||
localStopCh := make(chan struct{})
|
|
||||||
var wg sync.WaitGroup
|
|
||||||
wg.Add(1)
|
|
||||||
go func(sws []ScrapeWork) {
|
|
||||||
defer wg.Done()
|
|
||||||
logger.Infof("starting %d scrapers for `gce_sd_config` targets", len(sws))
|
|
||||||
gceSDTargets.Set(uint64(len(sws)))
|
|
||||||
runScrapeWorkers(sws, pushData, localStopCh)
|
|
||||||
gceSDTargets.Set(0)
|
|
||||||
logger.Infof("stopped all the %d scrapers for `gce_sd_config` targets", len(sws))
|
|
||||||
}(sws)
|
|
||||||
waitForChans:
|
|
||||||
select {
|
|
||||||
case <-ticker.C:
|
|
||||||
swsNew := cfg.getGCESDScrapeWork()
|
|
||||||
if equalStaticConfigForScrapeWorks(swsNew, sws) {
|
|
||||||
// Nothing changed, continue waiting for updated scrape work
|
|
||||||
goto waitForChans
|
|
||||||
}
|
|
||||||
logger.Infof("restarting scrapers for changed `gce_sd_config` targets")
|
|
||||||
sws = swsNew
|
|
||||||
case <-stopCh:
|
|
||||||
mustStop = true
|
|
||||||
}
|
|
||||||
|
|
||||||
close(localStopCh)
|
|
||||||
wg.Wait()
|
|
||||||
gceSDReloads.Inc()
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
var (
|
|
||||||
gceSDTargets = metrics.NewCounter(`vm_promscrape_targets{type="gce_sd"}`)
|
|
||||||
gceSDReloads = metrics.NewCounter(`vm_promscrape_reloads_total{type="gce_sd"}`)
|
|
||||||
)
|
|
||||||
|
|
||||||
func runFileSDScrapers(cfg *Config, pushData func(wr *prompbmarshal.WriteRequest), stopCh <-chan struct{}) {
|
|
||||||
if cfg.fileSDConfigsCount() == 0 {
|
|
||||||
return
|
|
||||||
}
|
|
||||||
sws := cfg.getFileSDScrapeWork(nil)
|
|
||||||
ticker := time.NewTicker(*fileSDCheckInterval)
|
|
||||||
defer ticker.Stop()
|
|
||||||
mustStop := false
|
|
||||||
for !mustStop {
|
|
||||||
localStopCh := make(chan struct{})
|
|
||||||
var wg sync.WaitGroup
|
|
||||||
wg.Add(1)
|
|
||||||
go func(sws []ScrapeWork) {
|
|
||||||
defer wg.Done()
|
|
||||||
logger.Infof("starting %d scrapers for `file_sd_config` targets", len(sws))
|
|
||||||
fileSDTargets.Set(uint64(len(sws)))
|
|
||||||
runScrapeWorkers(sws, pushData, localStopCh)
|
|
||||||
fileSDTargets.Set(0)
|
|
||||||
logger.Infof("stopped all the %d scrapers for `file_sd_config` targets", len(sws))
|
|
||||||
}(sws)
|
|
||||||
waitForChans:
|
|
||||||
select {
|
|
||||||
case <-ticker.C:
|
|
||||||
swsNew := cfg.getFileSDScrapeWork(sws)
|
|
||||||
if equalStaticConfigForScrapeWorks(swsNew, sws) {
|
|
||||||
// Nothing changed, continue waiting for updated scrape work
|
|
||||||
goto waitForChans
|
|
||||||
}
|
|
||||||
logger.Infof("restarting scrapers for changed `file_sd_config` targets")
|
|
||||||
sws = swsNew
|
|
||||||
case <-stopCh:
|
|
||||||
mustStop = true
|
|
||||||
}
|
|
||||||
|
|
||||||
close(localStopCh)
|
|
||||||
wg.Wait()
|
|
||||||
fileSDReloads.Inc()
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
var (
|
|
||||||
fileSDTargets = metrics.NewCounter(`vm_promscrape_targets{type="file_sd"}`)
|
|
||||||
fileSDReloads = metrics.NewCounter(`vm_promscrape_reloads_total{type="file_sd"}`)
|
|
||||||
)
|
|
||||||
|
|
||||||
func equalStaticConfigForScrapeWorks(as, bs []ScrapeWork) bool {
|
|
||||||
if len(as) != len(bs) {
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
for i := range as {
|
|
||||||
if !equalStaticConfigForScrapeWork(&as[i], &bs[i]) {
|
|
||||||
return false
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return true
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func equalStaticConfigForScrapeWork(a, b *ScrapeWork) bool {
|
type scraperGroup struct {
|
||||||
// `static_config` can change only ScrapeURL and Labels. So compare only them.
|
name string
|
||||||
if a.ScrapeURL != b.ScrapeURL {
|
wg sync.WaitGroup
|
||||||
return false
|
mLock sync.Mutex
|
||||||
}
|
m map[string]*scraper
|
||||||
if !equalLabels(a.Labels, b.Labels) {
|
pushData func(wr *prompbmarshal.WriteRequest)
|
||||||
return false
|
changesCount *metrics.Counter
|
||||||
}
|
|
||||||
return true
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func equalLabels(as, bs []prompbmarshal.Label) bool {
|
func newScraperGroup(name string, pushData func(wr *prompbmarshal.WriteRequest)) *scraperGroup {
|
||||||
if len(as) != len(bs) {
|
sg := &scraperGroup{
|
||||||
return false
|
name: name,
|
||||||
|
m: make(map[string]*scraper),
|
||||||
|
pushData: pushData,
|
||||||
|
changesCount: metrics.NewCounter(fmt.Sprintf(`vm_promscrape_config_changes_total{type=%q}`, name)),
|
||||||
}
|
}
|
||||||
for i := range as {
|
metrics.NewGauge(fmt.Sprintf(`vm_promscrape_targets{type=%q}`, name), func() float64 {
|
||||||
if !equalLabel(&as[i], &bs[i]) {
|
sg.mLock.Lock()
|
||||||
return false
|
n := len(sg.m)
|
||||||
}
|
sg.mLock.Unlock()
|
||||||
}
|
return float64(n)
|
||||||
return true
|
})
|
||||||
|
return sg
|
||||||
}
|
}
|
||||||
|
|
||||||
func equalLabel(a, b *prompbmarshal.Label) bool {
|
func (sg *scraperGroup) stop() {
|
||||||
if a.Name != b.Name {
|
sg.mLock.Lock()
|
||||||
return false
|
for _, sc := range sg.m {
|
||||||
|
close(sc.stopCh)
|
||||||
}
|
}
|
||||||
if a.Value != b.Value {
|
sg.m = nil
|
||||||
return false
|
sg.mLock.Unlock()
|
||||||
}
|
sg.wg.Wait()
|
||||||
return true
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// runScrapeWorkers runs sws.
|
func (sg *scraperGroup) update(sws []ScrapeWork) {
|
||||||
//
|
sg.mLock.Lock()
|
||||||
// This function returns after closing stopCh.
|
defer sg.mLock.Unlock()
|
||||||
func runScrapeWorkers(sws []ScrapeWork, pushData func(wr *prompbmarshal.WriteRequest), stopCh <-chan struct{}) {
|
|
||||||
tsmGlobal.RegisterAll(sws)
|
additionsCount := 0
|
||||||
var wg sync.WaitGroup
|
deletionsCount := 0
|
||||||
|
swsMap := make(map[string]bool, len(sws))
|
||||||
for i := range sws {
|
for i := range sws {
|
||||||
cfg := &sws[i]
|
sw := &sws[i]
|
||||||
c := newClient(cfg)
|
key := sw.key()
|
||||||
var sw scrapeWork
|
if swsMap[key] {
|
||||||
sw.Config = *cfg
|
logger.Errorf("skipping duplicate scrape target with identical labels; endpoint=%s, labels=%s; make sure service discovery and relabeling is set up properly",
|
||||||
sw.ReadData = c.ReadData
|
sw.ScrapeURL, sw.LabelsString())
|
||||||
sw.PushData = pushData
|
continue
|
||||||
wg.Add(1)
|
}
|
||||||
|
swsMap[key] = true
|
||||||
|
if sg.m[key] != nil {
|
||||||
|
// The scraper for the given key already exists.
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
// Start a scraper for the missing key.
|
||||||
|
sc := newScraper(sw, sg.pushData)
|
||||||
|
sg.wg.Add(1)
|
||||||
go func() {
|
go func() {
|
||||||
defer wg.Done()
|
defer sg.wg.Done()
|
||||||
sw.run(stopCh)
|
sc.sw.run(sc.stopCh)
|
||||||
|
tsmGlobal.Unregister(sw)
|
||||||
}()
|
}()
|
||||||
|
tsmGlobal.Register(sw)
|
||||||
|
sg.m[key] = sc
|
||||||
|
additionsCount++
|
||||||
|
}
|
||||||
|
|
||||||
|
// Stop deleted scrapers, which are missing in sws.
|
||||||
|
for key, sc := range sg.m {
|
||||||
|
if !swsMap[key] {
|
||||||
|
close(sc.stopCh)
|
||||||
|
delete(sg.m, key)
|
||||||
|
deletionsCount++
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if additionsCount > 0 || deletionsCount > 0 {
|
||||||
|
sg.changesCount.Add(additionsCount + deletionsCount)
|
||||||
|
logger.Infof("%s: added targets: %d, removed targets: %d; total targets: %d", sg.name, additionsCount, deletionsCount, len(sg.m))
|
||||||
}
|
}
|
||||||
wg.Wait()
|
}
|
||||||
tsmGlobal.UnregisterAll(sws)
|
|
||||||
|
type scraper struct {
|
||||||
|
sw scrapeWork
|
||||||
|
stopCh chan struct{}
|
||||||
|
}
|
||||||
|
|
||||||
|
func newScraper(sw *ScrapeWork, pushData func(wr *prompbmarshal.WriteRequest)) *scraper {
|
||||||
|
sc := &scraper{
|
||||||
|
stopCh: make(chan struct{}),
|
||||||
|
}
|
||||||
|
c := newClient(sw)
|
||||||
|
sc.sw.Config = *sw
|
||||||
|
sc.sw.ReadData = c.ReadData
|
||||||
|
sc.sw.PushData = pushData
|
||||||
|
return sc
|
||||||
}
|
}
|
||||||
|
@ -68,6 +68,25 @@ type ScrapeWork struct {
|
|||||||
SampleLimit int
|
SampleLimit int
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// key returns unique identifier for the given sw.
|
||||||
|
//
|
||||||
|
// it can be used for comparing for equality two ScrapeWork objects.
|
||||||
|
func (sw *ScrapeWork) key() string {
|
||||||
|
key := fmt.Sprintf("ScrapeURL=%s, ScrapeInterval=%s, ScrapeTimeout=%s, HonorLabels=%v, HonorTimestamps=%v, Labels=%s, "+
|
||||||
|
"AuthConfig=%s, MetricRelabelConfigs=%s, SampleLimit=%d",
|
||||||
|
sw.ScrapeURL, sw.ScrapeInterval, sw.ScrapeTimeout, sw.HonorLabels, sw.HonorTimestamps, sw.LabelsString(),
|
||||||
|
sw.AuthConfig.String(), sw.metricRelabelConfigsString(), sw.SampleLimit)
|
||||||
|
return key
|
||||||
|
}
|
||||||
|
|
||||||
|
func (sw *ScrapeWork) metricRelabelConfigsString() string {
|
||||||
|
var sb strings.Builder
|
||||||
|
for _, prc := range sw.MetricRelabelConfigs {
|
||||||
|
fmt.Fprintf(&sb, "%s", prc.String())
|
||||||
|
}
|
||||||
|
return sb.String()
|
||||||
|
}
|
||||||
|
|
||||||
// Job returns job for the ScrapeWork
|
// Job returns job for the ScrapeWork
|
||||||
func (sw *ScrapeWork) Job() string {
|
func (sw *ScrapeWork) Job() string {
|
||||||
return promrelabel.GetLabelValueByName(sw.Labels, "job")
|
return promrelabel.GetLabelValueByName(sw.Labels, "job")
|
||||||
|
@ -32,22 +32,17 @@ func (tsm *targetStatusMap) Reset() {
|
|||||||
tsm.mu.Unlock()
|
tsm.mu.Unlock()
|
||||||
}
|
}
|
||||||
|
|
||||||
func (tsm *targetStatusMap) RegisterAll(sws []ScrapeWork) {
|
func (tsm *targetStatusMap) Register(sw *ScrapeWork) {
|
||||||
tsm.mu.Lock()
|
tsm.mu.Lock()
|
||||||
for i := range sws {
|
tsm.m[sw.ID] = targetStatus{
|
||||||
sw := &sws[i]
|
sw: sw,
|
||||||
tsm.m[sw.ID] = targetStatus{
|
|
||||||
sw: sw,
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
tsm.mu.Unlock()
|
tsm.mu.Unlock()
|
||||||
}
|
}
|
||||||
|
|
||||||
func (tsm *targetStatusMap) UnregisterAll(sws []ScrapeWork) {
|
func (tsm *targetStatusMap) Unregister(sw *ScrapeWork) {
|
||||||
tsm.mu.Lock()
|
tsm.mu.Lock()
|
||||||
for i := range sws {
|
delete(tsm.m, sw.ID)
|
||||||
delete(tsm.m, sws[i].ID)
|
|
||||||
}
|
|
||||||
tsm.mu.Unlock()
|
tsm.mu.Unlock()
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -83,7 +78,6 @@ func (tsm *targetStatusMap) WriteHumanReadable(w io.Writer) {
|
|||||||
return jss[i].job < jss[j].job
|
return jss[i].job < jss[j].job
|
||||||
})
|
})
|
||||||
|
|
||||||
targetsByEndpoint := make(map[string]int)
|
|
||||||
for _, js := range jss {
|
for _, js := range jss {
|
||||||
sts := js.statuses
|
sts := js.statuses
|
||||||
sort.Slice(sts, func(i, j int) bool {
|
sort.Slice(sts, func(i, j int) bool {
|
||||||
@ -109,20 +103,9 @@ func (tsm *targetStatusMap) WriteHumanReadable(w io.Writer) {
|
|||||||
}
|
}
|
||||||
fmt.Fprintf(w, "\tstate=%s, endpoint=%s, labels=%s, last_scrape=%.3fs ago, scrape_duration=%.3fs, error=%q\n",
|
fmt.Fprintf(w, "\tstate=%s, endpoint=%s, labels=%s, last_scrape=%.3fs ago, scrape_duration=%.3fs, error=%q\n",
|
||||||
state, st.sw.ScrapeURL, labelsStr, lastScrape.Seconds(), float64(st.scrapeDuration)/1000, errMsg)
|
state, st.sw.ScrapeURL, labelsStr, lastScrape.Seconds(), float64(st.scrapeDuration)/1000, errMsg)
|
||||||
key := fmt.Sprintf("endpoint=%s, labels=%s", st.sw.ScrapeURL, labelsStr)
|
|
||||||
targetsByEndpoint[key]++
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
fmt.Fprintf(w, "\n")
|
fmt.Fprintf(w, "\n")
|
||||||
|
|
||||||
// Check whether there are targets with duplicate endpoints and labels.
|
|
||||||
for key, n := range targetsByEndpoint {
|
|
||||||
if n <= 1 {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
fmt.Fprintf(w, "!!! Scrape config error: %d duplicate targets with identical endpoint and labels found:\n", n)
|
|
||||||
fmt.Fprintf(w, "\t%s\n", key)
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
type jobStatus struct {
|
type jobStatus struct {
|
||||||
|
Loading…
Reference in New Issue
Block a user