diff --git a/app/vmagent/README.md b/app/vmagent/README.md index 5d6df50dfd..125decbc2b 100644 --- a/app/vmagent/README.md +++ b/app/vmagent/README.md @@ -138,17 +138,15 @@ The following scrape types in [scrape_config](https://prometheus.io/docs/prometh `vmagent` doesn't support `role_arn` config param yet. * `gce_sd_configs` - for scraping targets in Google Compute Engine (GCE). See [gce_sd_config](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#gce_sd_config) for details. - `vmagent` provides the following additional functionality `gce_sd_config`: + `vmagent` provides the following additional functionality for `gce_sd_config`: * if `project` arg is missing, then `vmagent` uses the project for the instance where it runs; * if `zone` arg is missing, then `vmagent` uses the zone for the instance where it runs; * if `zone` arg equals to `"*"`, then `vmagent` discovers all the zones for the given project; * `zone` may contain arbitrary number of zones, i.e. `zone: [us-east1-a, us-east1-b]`. * `consul_sd_configs` - for scraping targets registered in Consul. See [consul_sd_config](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#consul_sd_config) for details. - -The following service discovery mechanisms will be added to `vmagent` soon: - -* [dns_sd_config](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#dns_sd_config) +* `dns_sd_configs` - for scraping targets discovered from DNS records (SRV, A and AAAA). + See [dns_sd_config](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#dns_sd_config) for details. File feature requests at [our issue tracker](https://github.com/VictoriaMetrics/VictoriaMetrics/issues) if you need other service discovery mechanisms to be supported by `vmagent`. diff --git a/docs/Single-server-VictoriaMetrics.md b/docs/Single-server-VictoriaMetrics.md index 84b85a74dc..d78b4db272 100644 --- a/docs/Single-server-VictoriaMetrics.md +++ b/docs/Single-server-VictoriaMetrics.md @@ -263,6 +263,7 @@ Currently the following [scrape_config](https://prometheus.io/docs/prometheus/la * [ec2_sd_config](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#ec2_sd_config) * [gce_sd_config](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#gce_sd_config) * [consul_sd_config](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#consul_sd_config) +* [dns_sd_config](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#dns_sd_config) In the future other `*_sd_config` types will be supported. diff --git a/docs/vmagent.md b/docs/vmagent.md index 5d6df50dfd..125decbc2b 100644 --- a/docs/vmagent.md +++ b/docs/vmagent.md @@ -138,17 +138,15 @@ The following scrape types in [scrape_config](https://prometheus.io/docs/prometh `vmagent` doesn't support `role_arn` config param yet. * `gce_sd_configs` - for scraping targets in Google Compute Engine (GCE). See [gce_sd_config](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#gce_sd_config) for details. - `vmagent` provides the following additional functionality `gce_sd_config`: + `vmagent` provides the following additional functionality for `gce_sd_config`: * if `project` arg is missing, then `vmagent` uses the project for the instance where it runs; * if `zone` arg is missing, then `vmagent` uses the zone for the instance where it runs; * if `zone` arg equals to `"*"`, then `vmagent` discovers all the zones for the given project; * `zone` may contain arbitrary number of zones, i.e. `zone: [us-east1-a, us-east1-b]`. * `consul_sd_configs` - for scraping targets registered in Consul. See [consul_sd_config](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#consul_sd_config) for details. - -The following service discovery mechanisms will be added to `vmagent` soon: - -* [dns_sd_config](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#dns_sd_config) +* `dns_sd_configs` - for scraping targets discovered from DNS records (SRV, A and AAAA). + See [dns_sd_config](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#dns_sd_config) for details. File feature requests at [our issue tracker](https://github.com/VictoriaMetrics/VictoriaMetrics/issues) if you need other service discovery mechanisms to be supported by `vmagent`. diff --git a/lib/promscrape/config.go b/lib/promscrape/config.go index 5cfaa4051c..2d07a69182 100644 --- a/lib/promscrape/config.go +++ b/lib/promscrape/config.go @@ -15,6 +15,7 @@ import ( "github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal" "github.com/VictoriaMetrics/VictoriaMetrics/lib/promrelabel" "github.com/VictoriaMetrics/VictoriaMetrics/lib/promscrape/discovery/consul" + "github.com/VictoriaMetrics/VictoriaMetrics/lib/promscrape/discovery/dns" "github.com/VictoriaMetrics/VictoriaMetrics/lib/promscrape/discovery/ec2" "github.com/VictoriaMetrics/VictoriaMetrics/lib/promscrape/discovery/gce" "github.com/VictoriaMetrics/VictoriaMetrics/lib/promscrape/discovery/kubernetes" @@ -64,6 +65,7 @@ type ScrapeConfig struct { FileSDConfigs []FileSDConfig `yaml:"file_sd_configs"` KubernetesSDConfigs []kubernetes.SDConfig `yaml:"kubernetes_sd_configs"` ConsulSDConfigs []consul.SDConfig `yaml:"consul_sd_configs"` + DNSSDConfigs []dns.SDConfig `yaml:"dns_sd_configs"` EC2SDConfigs []ec2.SDConfig `yaml:"ec2_sd_configs"` GCESDConfigs []gce.SDConfig `yaml:"gce_sd_configs"` RelabelConfigs []promrelabel.RelabelConfig `yaml:"relabel_configs"` @@ -171,6 +173,19 @@ func (cfg *Config) getConsulSDScrapeWork() []ScrapeWork { return dst } +// getDNSSDScrapeWork returns `dns_sd_configs` ScrapeWork from cfg. +func (cfg *Config) getDNSSDScrapeWork() []ScrapeWork { + var dst []ScrapeWork + for i := range cfg.ScrapeConfigs { + sc := &cfg.ScrapeConfigs[i] + for j := range sc.DNSSDConfigs { + sdc := &sc.DNSSDConfigs[j] + dst = appendDNSScrapeWork(dst, sdc, sc.swc) + } + } + return dst +} + // getEC2SDScrapeWork returns `ec2_sd_configs` ScrapeWork from cfg. func (cfg *Config) getEC2SDScrapeWork() []ScrapeWork { var dst []ScrapeWork @@ -318,7 +333,7 @@ type scrapeWorkConfig struct { func appendKubernetesScrapeWork(dst []ScrapeWork, sdc *kubernetes.SDConfig, baseDir string, swc *scrapeWorkConfig) []ScrapeWork { targetLabels, err := kubernetes.GetLabels(sdc, baseDir) if err != nil { - logger.Errorf("error when discovering kubernetes nodes for `job_name` %q: %s; skipping it", swc.jobName, err) + logger.Errorf("error when discovering kubernetes targets for `job_name` %q: %s; skipping it", swc.jobName, err) return dst } return appendScrapeWorkForTargetLabels(dst, swc, targetLabels, "kubernetes_sd_config") @@ -327,16 +342,25 @@ func appendKubernetesScrapeWork(dst []ScrapeWork, sdc *kubernetes.SDConfig, base func appendConsulScrapeWork(dst []ScrapeWork, sdc *consul.SDConfig, baseDir string, swc *scrapeWorkConfig) []ScrapeWork { targetLabels, err := consul.GetLabels(sdc, baseDir) if err != nil { - logger.Errorf("error when discovering consul nodes for `job_name` %q: %s; skipping it", swc.jobName, err) + logger.Errorf("error when discovering consul targets for `job_name` %q: %s; skipping it", swc.jobName, err) return dst } return appendScrapeWorkForTargetLabels(dst, swc, targetLabels, "consul_sd_config") } +func appendDNSScrapeWork(dst []ScrapeWork, sdc *dns.SDConfig, swc *scrapeWorkConfig) []ScrapeWork { + targetLabels, err := dns.GetLabels(sdc) + if err != nil { + logger.Errorf("error when discovering dns targets for `job_name` %q: %s; skipping it", swc.jobName, err) + return dst + } + return appendScrapeWorkForTargetLabels(dst, swc, targetLabels, "dns_sd_config") +} + func appendEC2ScrapeWork(dst []ScrapeWork, sdc *ec2.SDConfig, swc *scrapeWorkConfig) []ScrapeWork { targetLabels, err := ec2.GetLabels(sdc) if err != nil { - logger.Errorf("error when discovering ec2 nodes for `job_name` %q: %s; skipping it", swc.jobName, err) + logger.Errorf("error when discovering ec2 targets for `job_name` %q: %s; skipping it", swc.jobName, err) return dst } return appendScrapeWorkForTargetLabels(dst, swc, targetLabels, "ec2_sd_config") @@ -345,7 +369,7 @@ func appendEC2ScrapeWork(dst []ScrapeWork, sdc *ec2.SDConfig, swc *scrapeWorkCon func appendGCEScrapeWork(dst []ScrapeWork, sdc *gce.SDConfig, swc *scrapeWorkConfig) []ScrapeWork { targetLabels, err := gce.GetLabels(sdc) if err != nil { - logger.Errorf("error when discovering gce nodes for `job_name` %q: %s; skippint it", swc.jobName, err) + logger.Errorf("error when discovering gce targets for `job_name` %q: %s; skippint it", swc.jobName, err) return dst } return appendScrapeWorkForTargetLabels(dst, swc, targetLabels, "gce_sd_config") diff --git a/lib/promscrape/discovery/dns/dns.go b/lib/promscrape/discovery/dns/dns.go new file mode 100644 index 0000000000..1bf7eb1b1d --- /dev/null +++ b/lib/promscrape/discovery/dns/dns.go @@ -0,0 +1,134 @@ +package dns + +import ( + "context" + "fmt" + "net" + "strings" + "time" + + "github.com/VictoriaMetrics/VictoriaMetrics/lib/logger" + "github.com/VictoriaMetrics/VictoriaMetrics/lib/promscrape/discoveryutils" +) + +// SDConfig represents service discovery config for DNS. +// +// See https://prometheus.io/docs/prometheus/latest/configuration/configuration/#dns_sd_config +type SDConfig struct { + Names []string `yaml:"names"` + Type string `yaml:"type"` + Port *int `yaml:"port"` + // RefreshInterval time.Duration `yaml:"refresh_interval"` + // refresh_interval is obtained from `-promscrape.dnsSDCheckInterval` command-line option. +} + +// GetLabels returns DNS labels according to sdc. +func GetLabels(sdc *SDConfig) ([]map[string]string, error) { + if len(sdc.Names) == 0 { + return nil, fmt.Errorf("`names` cannot be empty in `dns_sd_config`") + } + ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) + defer cancel() + typ := sdc.Type + if typ == "" { + typ = "SRV" + } + typ = strings.ToUpper(typ) + switch typ { + case "SRV": + ms := getSRVAddrLabels(ctx, sdc) + return ms, nil + case "A", "AAAA": + return getAAddrLabels(ctx, sdc, typ) + default: + return nil, fmt.Errorf("unexpected `type` in `dns_sd_config`: %q; supported values: SRV, A, AAAA", typ) + } +} + +func getSRVAddrLabels(ctx context.Context, sdc *SDConfig) []map[string]string { + type result struct { + name string + as []*net.SRV + err error + } + ch := make(chan result, len(sdc.Names)) + for _, name := range sdc.Names { + go func(name string) { + _, as, err := resolver.LookupSRV(ctx, "", "", name) + ch <- result{ + name: name, + as: as, + err: err, + } + }(name) + } + var ms []map[string]string + for range sdc.Names { + r := <-ch + if r.err != nil { + logger.Errorf("error in SRV lookup for %q; skipping it; error: %s", r.name, r.err) + continue + } + for _, a := range r.as { + target := a.Target + for strings.HasSuffix(target, ".") { + target = target[:len(target)-1] + } + ms = appendAddrLabels(ms, r.name, target, int(a.Port)) + } + } + return ms +} + +func getAAddrLabels(ctx context.Context, sdc *SDConfig, lookupType string) ([]map[string]string, error) { + if sdc.Port == nil { + return nil, fmt.Errorf("missing `port` in `dns_sd_config`") + } + port := *sdc.Port + type result struct { + name string + ips []net.IPAddr + err error + } + ch := make(chan result, len(sdc.Names)) + for _, name := range sdc.Names { + go func(name string) { + ips, err := resolver.LookupIPAddr(ctx, name) + ch <- result{ + name: name, + ips: ips, + err: err, + } + }(name) + } + var ms []map[string]string + for range sdc.Names { + r := <-ch + if r.err != nil { + logger.Errorf("error in %s lookup for %q: %s", lookupType, r.name, r.err) + continue + } + for _, ip := range r.ips { + isIPv4 := ip.IP.To4() != nil + if lookupType == "AAAA" && isIPv4 || lookupType == "A" && !isIPv4 { + continue + } + ms = appendAddrLabels(ms, r.name, ip.IP.String(), port) + } + } + return ms, nil +} + +func appendAddrLabels(ms []map[string]string, name, target string, port int) []map[string]string { + addr := discoveryutils.JoinHostPort(target, port) + m := map[string]string{ + "__address__": addr, + "__meta_dns_name": name, + } + return append(ms, m) +} + +var resolver = &net.Resolver{ + PreferGo: true, + StrictErrors: true, +} diff --git a/lib/promscrape/scraper.go b/lib/promscrape/scraper.go index 8d67226311..54751283ac 100644 --- a/lib/promscrape/scraper.go +++ b/lib/promscrape/scraper.go @@ -24,6 +24,9 @@ var ( consulSDCheckInterval = flag.Duration("promscrape.consulSDCheckInterval", 30*time.Second, "Interval for checking for changes in consul. "+ "This works only if `consul_sd_configs` is configured in '-promscrape.config' file. "+ "See https://prometheus.io/docs/prometheus/latest/configuration/configuration/#consul_sd_config for details") + dnsSDCheckInterval = flag.Duration("promscrape.dnsSDCheckInterval", 30*time.Second, "Interval for checking for changes in dns. "+ + "This works only if `dns_sd_configs` is configured in '-promscrape.config' file. "+ + "See https://prometheus.io/docs/prometheus/latest/configuration/configuration/#dns_sd_config for details") ec2SDCheckInterval = flag.Duration("promscrape.ec2SDCheckInterval", time.Minute, "Interval for checking for changes in ec2. "+ "This works only if `ec2_sd_configs` is configured in '-promscrape.config' file. "+ "See https://prometheus.io/docs/prometheus/latest/configuration/configuration/#ec2_sd_config for details") @@ -74,6 +77,7 @@ func runScraper(configFile string, pushData func(wr *prompbmarshal.WriteRequest) scs.add("file_sd_configs", *fileSDCheckInterval, func(cfg *Config, swsPrev []ScrapeWork) []ScrapeWork { return cfg.getFileSDScrapeWork(swsPrev) }) scs.add("kubernetes_sd_configs", *kubernetesSDCheckInterval, func(cfg *Config, swsPrev []ScrapeWork) []ScrapeWork { return cfg.getKubernetesSDScrapeWork() }) scs.add("consul_sd_configs", *consulSDCheckInterval, func(cfg *Config, swsPrev []ScrapeWork) []ScrapeWork { return cfg.getConsulSDScrapeWork() }) + scs.add("dns_sd_configs", *dnsSDCheckInterval, func(cfg *Config, swsPrev []ScrapeWork) []ScrapeWork { return cfg.getDNSSDScrapeWork() }) scs.add("ec2_sd_configs", *ec2SDCheckInterval, func(cfg *Config, swsPrev []ScrapeWork) []ScrapeWork { return cfg.getEC2SDScrapeWork() }) scs.add("gce_sd_configs", *gceSDCheckInterval, func(cfg *Config, swsPrev []ScrapeWork) []ScrapeWork { return cfg.getGCESDScrapeWork() })