app/vmagent: add max_scrape_size to scrape config (#6434)

Related to
https://github.com/VictoriaMetrics/VictoriaMetrics/issues/6429

### Checklist

The following checks are **mandatory**:

- [ ] My change adheres [VictoriaMetrics contributing
guidelines](https://docs.victoriametrics.com/contributing/).

---------

Signed-off-by: hagen1778 <roman@victoriametrics.com>
Co-authored-by: hagen1778 <roman@victoriametrics.com>
This commit is contained in:
Andrii Chubatiuk 2024-06-20 14:58:42 +03:00 committed by GitHub
parent bc37b279aa
commit 1e83598be3
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
12 changed files with 770 additions and 661 deletions

View File

@ -40,6 +40,7 @@ See also [LTS releases](https://docs.victoriametrics.com/lts-releases/).
* FEATURE: [vmauth](https://docs.victoriametrics.com/vmauth/): add `idleConnTimeout` flag set to 50s by default. It should reduce the probability of `broken pipe` or `connection reset by peer` errors in vmauth logs.
* FEATURE: [vmauth](https://docs.victoriametrics.com/vmauth/): add auto request retry for trivial network errors, such as `broken pipe` and `connection reset` for requests to the configured backends.
* FEATURE: [vmagent](https://docs.victoriametrics.com/vmagent/): increase default value of `-promscrape.maxDroppedTargets` command-line flag to 10_000 from 1000. This makes it easier to track down large number of dropped targets.
* FEATURE: [vmagent](https://docs.victoriametrics.com/vmagent.html): add `max_scrape_size` parameter to a scrape config for setting a custom scrape limit for a job. The new [automatically generated metric](https://docs.victoriametrics.com/vmagent/#automatically-generated-metrics) `scrape_response_size_bytes` was added to reflect the response size of the target.
* FEATURE: [vmsingle](https://docs.victoriametrics.com/single-server-victoriametrics/): check for ranged vector arguments in non-rollup expressions when `-search.disableImplicitConversion` or `-search.logImplicitConversion` are enabled. For example, `sum(up[5m])` or `absent(up[5m])` will fail to execute if these flags are set.
* FEATURE: [vmsingle](https://docs.victoriametrics.com/single-server-victoriametrics/): validate that rollup expressions has ranged vector arguments passed when `-search.disableImplicitConversion` or `-search.logImplicitConversion` are enabled. For example, `rate(metric)` or `count_over_time(metric)` will fail to execute if these flags are set.
* FEATURE: [vmalert-tool](https://docs.victoriametrics.com/vmalert-tool/): support file path with hierarchical patterns and regexpes, and http url in unittest cmd-line flag `-files`, e.g. `-files="http://<some-server-addr>/path/to/rules"` or `-files="dir/**/*.yaml"`.

View File

@ -1692,6 +1692,14 @@ scrape_configs:
#
# scrape_timeout: <duration>
# max_scrape_size is an optional parameter for limiting the response size in bytes from scraped targets.
# By default, uses limit from -promscrape.maxScrapeSize command-line flag.
# Example values:
# - "10MiB" - 10 * 1024 * 1024 bytes
# - "100MB" - 100 * 1000 * 1000 bytes
#
# max_scrape_size: <size>
# metrics_path is the path to fetch metrics from targets.
# By default, metrics are fetched from "/metrics" path.
#

View File

@ -486,6 +486,14 @@ and attaches `instance`, `job` and other target-specific labels to these metrics
scrape_duration_seconds > 1.5
```
* `scrape_response_size_bytes` - response size in bytes for the given target. This allows to monitor amount of data scraped
and to adjust `max_scrape_size` for scraped targets. For example, the following [MetricsQL query](https://docs.victoriametrics.com/metricsql/)
returns targets with scrape response > 10MiB:
```metricsql
max_scrape_size > 10MiB
```
* `scrape_timeout_seconds` - the configured timeout for the current scrape target (aka `scrape_timeout`).
This allows detecting targets with scrape durations close to the configured scrape timeout.
For example, the following [MetricsQL query](https://docs.victoriametrics.com/metricsql/) returns targets (identified by `instance` label),

View File

@ -53,79 +53,77 @@ func (b *Bytes) Set(value string) error {
return nil
}
value = normalizeBytesString(value)
n, err := parseBytes(value)
if err != nil {
return err
}
b.N = n
b.valueString = value
return nil
}
// ParseBytes returns int64 in bytes of parsed string with unit suffix
func ParseBytes(value string) (int64, error) {
value = normalizeBytesString(value)
return parseBytes(value)
}
func parseBytes(value string) (int64, error) {
switch {
case strings.HasSuffix(value, "KB"):
f, err := strconv.ParseFloat(value[:len(value)-2], 64)
if err != nil {
return err
return 0, err
}
b.N = int64(f * 1000)
b.valueString = value
return nil
return int64(f * 1000), nil
case strings.HasSuffix(value, "MB"):
f, err := strconv.ParseFloat(value[:len(value)-2], 64)
if err != nil {
return err
return 0, err
}
b.N = int64(f * 1000 * 1000)
b.valueString = value
return nil
return int64(f * 1000 * 1000), nil
case strings.HasSuffix(value, "GB"):
f, err := strconv.ParseFloat(value[:len(value)-2], 64)
if err != nil {
return err
return 0, err
}
b.N = int64(f * 1000 * 1000 * 1000)
b.valueString = value
return nil
return int64(f * 1000 * 1000 * 1000), nil
case strings.HasSuffix(value, "TB"):
f, err := strconv.ParseFloat(value[:len(value)-2], 64)
if err != nil {
return err
return 0, err
}
b.N = int64(f * 1000 * 1000 * 1000 * 1000)
b.valueString = value
return nil
return int64(f * 1000 * 1000 * 1000 * 1000), nil
case strings.HasSuffix(value, "KiB"):
f, err := strconv.ParseFloat(value[:len(value)-3], 64)
if err != nil {
return err
return 0, err
}
b.N = int64(f * 1024)
b.valueString = value
return nil
return int64(f * 1024), nil
case strings.HasSuffix(value, "MiB"):
f, err := strconv.ParseFloat(value[:len(value)-3], 64)
if err != nil {
return err
return 0, err
}
b.N = int64(f * 1024 * 1024)
b.valueString = value
return nil
return int64(f * 1024 * 1024), nil
case strings.HasSuffix(value, "GiB"):
f, err := strconv.ParseFloat(value[:len(value)-3], 64)
if err != nil {
return err
return 0, err
}
b.N = int64(f * 1024 * 1024 * 1024)
b.valueString = value
return nil
return int64(f * 1024 * 1024 * 1024), nil
case strings.HasSuffix(value, "TiB"):
f, err := strconv.ParseFloat(value[:len(value)-3], 64)
if err != nil {
return err
return 0, err
}
b.N = int64(f * 1024 * 1024 * 1024 * 1024)
b.valueString = value
return nil
return int64(f * 1024 * 1024 * 1024 * 1024), nil
default:
f, err := strconv.ParseFloat(value, 64)
if err != nil {
return err
return 0, err
}
b.N = int64(f)
b.valueString = value
return nil
return int64(f), nil
}
}

View File

@ -18,8 +18,6 @@ import (
)
var (
maxScrapeSize = flagutil.NewBytes("promscrape.maxScrapeSize", 16*1024*1024, "The maximum size of scrape response in bytes to process from Prometheus targets. "+
"Bigger responses are rejected")
maxResponseHeadersSize = flagutil.NewBytes("promscrape.maxResponseHeadersSize", 4096, "The maximum size of http response headers from Prometheus scrape targets")
disableCompression = flag.Bool("promscrape.disableCompression", false, "Whether to disable sending 'Accept-Encoding: gzip' request headers to all the scrape targets. "+
"This may reduce CPU usage on scrape targets at the cost of higher network bandwidth utilization. "+
@ -41,6 +39,7 @@ type client struct {
scrapeTimeoutSecondsStr string
setHeaders func(req *http.Request) error
setProxyHeaders func(req *http.Request) error
maxScrapeSize int64
}
func newClient(ctx context.Context, sw *ScrapeWork) (*client, error) {
@ -91,6 +90,7 @@ func newClient(ctx context.Context, sw *ScrapeWork) (*client, error) {
scrapeTimeoutSecondsStr: fmt.Sprintf("%.3f", sw.ScrapeTimeout.Seconds()),
setHeaders: setHeaders,
setProxyHeaders: setProxyHeaders,
maxScrapeSize: sw.MaxScrapeSize,
}
return c, nil
}
@ -149,7 +149,7 @@ func (c *client) ReadData(dst *bytesutil.ByteBuffer) error {
// Read the data from resp.Body
r := &io.LimitedReader{
R: resp.Body,
N: maxScrapeSize.N,
N: c.maxScrapeSize,
}
_, err = dst.ReadFrom(r)
_ = resp.Body.Close()
@ -160,10 +160,11 @@ func (c *client) ReadData(dst *bytesutil.ByteBuffer) error {
}
return fmt.Errorf("cannot read data from %s: %w", c.scrapeURL, err)
}
if int64(len(dst.B)) >= maxScrapeSize.N {
if int64(len(dst.B)) >= c.maxScrapeSize {
maxScrapeSizeExceeded.Inc()
return fmt.Errorf("the response from %q exceeds -promscrape.maxScrapeSize=%d; "+
"either reduce the response size for the target or increase -promscrape.maxScrapeSize command-line flag value", c.scrapeURL, maxScrapeSize.N)
return fmt.Errorf("the response from %q exceeds -promscrape.maxScrapeSize=%d or max_scrape_size in a scrape config. "+
"Possible solutions are: reduce the response size for the target, increase -promscrape.maxScrapeSize command-line flag, "+
"increase max_scrape_size value in scrape config", c.scrapeURL, maxScrapeSize.N)
}
return nil
}

View File

@ -16,6 +16,7 @@ import (
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/cgroup"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/envtemplate"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/flagutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/fs/fscore"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promauth"
@ -76,6 +77,8 @@ var (
clusterName = flag.String("promscrape.cluster.name", "", "Optional name of the cluster. If multiple vmagent clusters scrape the same targets, "+
"then each cluster must have unique name in order to properly de-duplicate samples received from these clusters. "+
"See https://docs.victoriametrics.com/vmagent/#scraping-big-number-of-targets for more info")
maxScrapeSize = flagutil.NewBytes("promscrape.maxScrapeSize", 16*1024*1024, "The maximum size of scrape response in bytes to process from Prometheus targets. "+
"Bigger responses are rejected")
)
var clusterMemberID int
@ -269,6 +272,7 @@ type ScrapeConfig struct {
JobName string `yaml:"job_name"`
ScrapeInterval *promutils.Duration `yaml:"scrape_interval,omitempty"`
ScrapeTimeout *promutils.Duration `yaml:"scrape_timeout,omitempty"`
MaxScrapeSize string `yaml:"max_scrape_size,omitempty"`
MetricsPath string `yaml:"metrics_path,omitempty"`
HonorLabels bool `yaml:"honor_labels,omitempty"`
@ -845,6 +849,14 @@ func getScrapeWorkConfig(sc *ScrapeConfig, baseDir string, globalCfg *GlobalConf
// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1281#issuecomment-840538907
scrapeTimeout = scrapeInterval
}
var err error
mss := maxScrapeSize.N
if len(sc.MaxScrapeSize) > 0 {
mss, err = flagutil.ParseBytes(sc.MaxScrapeSize)
if err != nil {
return nil, fmt.Errorf("unexpected `max_scrape_size` value %q for `job_name` %q`: %w", sc.MaxScrapeSize, jobName, err)
}
}
honorLabels := sc.HonorLabels
honorTimestamps := sc.HonorTimestamps
denyRedirects := false
@ -897,6 +909,7 @@ func getScrapeWorkConfig(sc *ScrapeConfig, baseDir string, globalCfg *GlobalConf
scrapeIntervalString: scrapeInterval.String(),
scrapeTimeout: scrapeTimeout,
scrapeTimeoutString: scrapeTimeout.String(),
maxScrapeSize: mss,
jobName: jobName,
metricsPath: metricsPath,
scheme: scheme,
@ -927,6 +940,7 @@ type scrapeWorkConfig struct {
scrapeIntervalString string
scrapeTimeout time.Duration
scrapeTimeoutString string
maxScrapeSize int64
jobName string
metricsPath string
scheme string
@ -1201,6 +1215,7 @@ func (swc *scrapeWorkConfig) getScrapeWork(target string, extraLabels, metaLabel
ScrapeURL: scrapeURL,
ScrapeInterval: scrapeInterval,
ScrapeTimeout: scrapeTimeout,
MaxScrapeSize: swc.maxScrapeSize,
HonorLabels: swc.honorLabels,
HonorTimestamps: swc.honorTimestamps,
DenyRedirects: swc.denyRedirects,

View File

@ -227,6 +227,7 @@ scrape_configs:
ScrapeURL: "http://host1:80/metric/path1?x=y",
ScrapeInterval: defaultScrapeInterval,
ScrapeTimeout: defaultScrapeTimeout,
MaxScrapeSize: maxScrapeSize.N,
Labels: promutils.NewLabelsFromMap(map[string]string{
"instance": "host1:80",
"job": "abc",
@ -237,6 +238,7 @@ scrape_configs:
ScrapeURL: "https://host2:443/metric/path2?x=y",
ScrapeInterval: defaultScrapeInterval,
ScrapeTimeout: defaultScrapeTimeout,
MaxScrapeSize: maxScrapeSize.N,
Labels: promutils.NewLabelsFromMap(map[string]string{
"instance": "host2:443",
"job": "abc",
@ -247,6 +249,7 @@ scrape_configs:
ScrapeURL: "http://host3:1234/metric/path3?arg1=value1&x=y",
ScrapeInterval: defaultScrapeInterval,
ScrapeTimeout: defaultScrapeTimeout,
MaxScrapeSize: maxScrapeSize.N,
Labels: promutils.NewLabelsFromMap(map[string]string{
"instance": "host3:1234",
"job": "abc",
@ -257,6 +260,7 @@ scrape_configs:
ScrapeURL: "https://host4:1234/foo/bar?x=y",
ScrapeInterval: defaultScrapeInterval,
ScrapeTimeout: defaultScrapeTimeout,
MaxScrapeSize: maxScrapeSize.N,
Labels: promutils.NewLabelsFromMap(map[string]string{
"instance": "host4:1234",
"job": "abc",
@ -295,6 +299,7 @@ scrape_configs:
ScrapeURL: "http://black:9115/probe?module=dns_udp_example&target=8.8.8.8",
ScrapeInterval: defaultScrapeInterval,
ScrapeTimeout: defaultScrapeTimeout,
MaxScrapeSize: maxScrapeSize.N,
Labels: promutils.NewLabelsFromMap(map[string]string{
"instance": "8.8.8.8",
"job": "blackbox",
@ -457,6 +462,7 @@ scrape_configs:
ScrapeURL: "http://host1:80/abc/de",
ScrapeInterval: defaultScrapeInterval,
ScrapeTimeout: defaultScrapeTimeout,
MaxScrapeSize: maxScrapeSize.N,
Labels: promutils.NewLabelsFromMap(map[string]string{
"instance": "host1:80",
"job": "foo",
@ -468,6 +474,7 @@ scrape_configs:
ScrapeURL: "http://host2:80/abc/de",
ScrapeInterval: defaultScrapeInterval,
ScrapeTimeout: defaultScrapeTimeout,
MaxScrapeSize: maxScrapeSize.N,
Labels: promutils.NewLabelsFromMap(map[string]string{
"instance": "host2:80",
"job": "foo",
@ -479,6 +486,7 @@ scrape_configs:
ScrapeURL: "http://localhost:9090/abc/de",
ScrapeInterval: defaultScrapeInterval,
ScrapeTimeout: defaultScrapeTimeout,
MaxScrapeSize: maxScrapeSize.N,
Labels: promutils.NewLabelsFromMap(map[string]string{
"instance": "localhost:9090",
"job": "foo",
@ -685,6 +693,7 @@ scrape_configs:
ScrapeURL: "http://s:80/metrics",
ScrapeInterval: defaultScrapeInterval,
ScrapeTimeout: defaultScrapeTimeout,
MaxScrapeSize: maxScrapeSize.N,
Labels: promutils.NewLabelsFromMap(map[string]string{
"instance": "s:80",
"job": "aa",
@ -706,6 +715,7 @@ scrape_configs:
ScrapeURL: "http://s:80/metrics",
ScrapeInterval: defaultScrapeInterval,
ScrapeTimeout: defaultScrapeTimeout,
MaxScrapeSize: maxScrapeSize.N,
Labels: promutils.NewLabelsFromMap(map[string]string{
"instance": "s:80",
"job": "aa",
@ -727,6 +737,7 @@ scrape_configs:
ScrapeURL: "http://s:80/metrics",
ScrapeInterval: defaultScrapeInterval,
ScrapeTimeout: defaultScrapeTimeout,
MaxScrapeSize: maxScrapeSize.N,
Labels: promutils.NewLabelsFromMap(map[string]string{
"instance": "s:80",
"job": "aa",
@ -748,6 +759,7 @@ scrape_configs:
ScrapeURL: "http://s:80/metrics",
ScrapeInterval: defaultScrapeInterval,
ScrapeTimeout: defaultScrapeTimeout,
MaxScrapeSize: maxScrapeSize.N,
Labels: promutils.NewLabelsFromMap(map[string]string{
"instance": "s:80",
"job": "aa",
@ -766,6 +778,7 @@ scrape_configs:
ScrapeURL: "http://foo.bar:1234/metrics",
ScrapeInterval: defaultScrapeInterval,
ScrapeTimeout: defaultScrapeTimeout,
MaxScrapeSize: maxScrapeSize.N,
Labels: promutils.NewLabelsFromMap(map[string]string{
"instance": "foo.bar:1234",
"job": "foo",
@ -787,6 +800,7 @@ scrape_configs:
ScrapeURL: "http://foo.bar:1234/metrics",
ScrapeInterval: defaultScrapeInterval,
ScrapeTimeout: defaultScrapeTimeout,
MaxScrapeSize: maxScrapeSize.N,
Labels: promutils.NewLabelsFromMap(map[string]string{
"instance": "foo.bar:1234",
"job": "foo",
@ -834,6 +848,7 @@ scrape_configs:
ScrapeURL: "https://foo.bar:443/foo/bar?p=x%26y&p=%3D",
ScrapeInterval: 54 * time.Second,
ScrapeTimeout: 5 * time.Second,
MaxScrapeSize: maxScrapeSize.N,
HonorLabels: true,
HonorTimestamps: true,
DenyRedirects: true,
@ -849,6 +864,7 @@ scrape_configs:
ScrapeURL: "https://aaa:443/foo/bar?p=x%26y&p=%3D",
ScrapeInterval: 54 * time.Second,
ScrapeTimeout: 5 * time.Second,
MaxScrapeSize: maxScrapeSize.N,
HonorLabels: true,
HonorTimestamps: true,
DenyRedirects: true,
@ -864,6 +880,7 @@ scrape_configs:
ScrapeURL: "http://1.2.3.4:80/metrics",
ScrapeInterval: 8 * time.Second,
ScrapeTimeout: 8 * time.Second,
MaxScrapeSize: maxScrapeSize.N,
Labels: promutils.NewLabelsFromMap(map[string]string{
"instance": "1.2.3.4:80",
"job": "qwer",
@ -874,6 +891,7 @@ scrape_configs:
ScrapeURL: "http://foobar:80/metrics",
ScrapeInterval: 8 * time.Second,
ScrapeTimeout: 8 * time.Second,
MaxScrapeSize: maxScrapeSize.N,
Labels: promutils.NewLabelsFromMap(map[string]string{
"instance": "foobar:80",
"job": "asdf",
@ -921,6 +939,7 @@ scrape_configs:
ScrapeURL: "http://foo.bar:1234/metrics?x=keep_me",
ScrapeInterval: defaultScrapeInterval,
ScrapeTimeout: defaultScrapeTimeout,
MaxScrapeSize: maxScrapeSize.N,
Labels: promutils.NewLabelsFromMap(map[string]string{
"hash": "82",
"instance": "foo.bar:1234",
@ -962,6 +981,7 @@ scrape_configs:
ScrapeURL: "mailto://foo.bar:1234/abc.de?a=b",
ScrapeInterval: defaultScrapeInterval,
ScrapeTimeout: defaultScrapeTimeout,
MaxScrapeSize: maxScrapeSize.N,
Labels: promutils.NewLabelsFromMap(map[string]string{
"instance": "fake.addr",
"job": "https",
@ -973,6 +993,7 @@ scrape_configs:
scrape_configs:
- job_name: foo
scheme: https
max_scrape_size: 0
relabel_configs:
- action: keep
source_labels: [__address__]
@ -994,6 +1015,7 @@ scrape_configs:
ScrapeURL: "http://foo.bar:1234/metrics",
ScrapeInterval: defaultScrapeInterval,
ScrapeTimeout: defaultScrapeTimeout,
MaxScrapeSize: 0,
Labels: promutils.NewLabelsFromMap(map[string]string{
"instance": "foo.bar:1234",
"job": "3",
@ -1005,6 +1027,7 @@ scrape_configs:
f(`
scrape_configs:
- job_name: foo
max_scrape_size: 8MiB
metric_relabel_configs:
- source_labels: [foo]
target_label: abc
@ -1015,6 +1038,7 @@ scrape_configs:
ScrapeURL: "http://foo.bar:1234/metrics",
ScrapeInterval: defaultScrapeInterval,
ScrapeTimeout: defaultScrapeTimeout,
MaxScrapeSize: 8 * 1024 * 1024,
Labels: promutils.NewLabelsFromMap(map[string]string{
"instance": "foo.bar:1234",
"job": "foo",
@ -1032,6 +1056,7 @@ scrape_configs:
ScrapeURL: "http://foo.bar:1234/metrics",
ScrapeInterval: defaultScrapeInterval,
ScrapeTimeout: defaultScrapeTimeout,
MaxScrapeSize: maxScrapeSize.N,
Labels: promutils.NewLabelsFromMap(map[string]string{
"instance": "foo.bar:1234",
"job": "foo",
@ -1049,6 +1074,7 @@ scrape_configs:
ScrapeURL: "http://foo.bar:1234/metrics",
ScrapeInterval: defaultScrapeInterval,
ScrapeTimeout: defaultScrapeTimeout,
MaxScrapeSize: maxScrapeSize.N,
Labels: promutils.NewLabelsFromMap(map[string]string{
"instance": "foo.bar:1234",
"job": "foo",
@ -1080,6 +1106,7 @@ scrape_configs:
ScrapeURL: "http://pp:80/metrics?a=c&a=xy",
ScrapeInterval: defaultScrapeInterval,
ScrapeTimeout: defaultScrapeTimeout,
MaxScrapeSize: maxScrapeSize.N,
Labels: promutils.NewLabelsFromMap(map[string]string{
"foo": "bar",
"instance": "pp:80",
@ -1130,6 +1157,7 @@ scrape_configs:
ScrapeURL: "http://127.0.0.1:9116/snmp?module=if_mib&target=192.168.1.2",
ScrapeInterval: defaultScrapeInterval,
ScrapeTimeout: defaultScrapeTimeout,
MaxScrapeSize: maxScrapeSize.N,
Labels: promutils.NewLabelsFromMap(map[string]string{
"instance": "192.168.1.2",
"job": "snmp",
@ -1158,6 +1186,7 @@ scrape_configs:
ScrapeURL: "http://foo.bar:1234/metricspath",
ScrapeInterval: defaultScrapeInterval,
ScrapeTimeout: defaultScrapeTimeout,
MaxScrapeSize: maxScrapeSize.N,
Labels: promutils.NewLabelsFromMap(map[string]string{
"instance": "foo.bar:1234",
"job": "path wo slash",
@ -1184,6 +1213,7 @@ scrape_configs:
ScrapeTimeout: time.Hour * 24,
ScrapeAlignInterval: time.Hour * 24,
ScrapeOffset: time.Hour * 24 * 2,
MaxScrapeSize: maxScrapeSize.N,
NoStaleMarkers: true,
Labels: promutils.NewLabelsFromMap(map[string]string{
"instance": "foo.bar:1234",
@ -1206,6 +1236,7 @@ scrape_configs:
ScrapeURL: "http://foo.bar:1234/metrics",
ScrapeInterval: defaultScrapeInterval,
ScrapeTimeout: defaultScrapeTimeout,
MaxScrapeSize: maxScrapeSize.N,
jobNameOriginal: "foo",
Labels: promutils.NewLabelsFromMap(map[string]string{
"instance": "foo.bar:1234",

View File

@ -53,6 +53,9 @@ type ScrapeWork struct {
// Timeout for scraping the ScrapeURL.
ScrapeTimeout time.Duration
// MaxScrapeSize sets max amount of data, that can be scraped by a job
MaxScrapeSize int64
// How to deal with conflicting labels.
// See https://prometheus.io/docs/prometheus/latest/configuration/configuration/#scrape_config
HonorLabels bool
@ -500,6 +503,7 @@ func (sw *scrapeWork) processDataOneShot(scrapeTimestamp, realTimestamp int64, b
am := &autoMetrics{
up: up,
scrapeDurationSeconds: scrapeDurationSeconds,
scrapeResponseSize: float64(len(bodyString)),
samplesScraped: samplesScraped,
samplesPostRelabeling: samplesPostRelabeling,
seriesAdded: seriesAdded,
@ -519,7 +523,7 @@ func (sw *scrapeWork) processDataOneShot(scrapeTimestamp, realTimestamp int64, b
sw.storeLastScrape(body)
}
sw.finalizeLastScrape()
tsmGlobal.Update(sw, up == 1, realTimestamp, int64(scrapeDurationSeconds*1000), samplesScraped, err)
tsmGlobal.Update(sw, up == 1, realTimestamp, int64(scrapeDurationSeconds*1000), float64(len(bodyString)), samplesScraped, err)
return err
}
@ -580,6 +584,7 @@ func (sw *scrapeWork) processDataInStreamMode(scrapeTimestamp, realTimestamp int
am := &autoMetrics{
up: up,
scrapeDurationSeconds: scrapeDurationSeconds,
scrapeResponseSize: float64(len(bodyString)),
samplesScraped: samplesScraped,
samplesPostRelabeling: samplesPostRelabeling,
seriesAdded: seriesAdded,
@ -598,7 +603,7 @@ func (sw *scrapeWork) processDataInStreamMode(scrapeTimestamp, realTimestamp int
sw.storeLastScrape(body.B)
}
sw.finalizeLastScrape()
tsmGlobal.Update(sw, up == 1, realTimestamp, int64(scrapeDurationSeconds*1000), samplesScraped, err)
tsmGlobal.Update(sw, up == 1, realTimestamp, int64(scrapeDurationSeconds*1000), float64(len(bodyString)), samplesScraped, err)
// Do not track active series in streaming mode, since this may need too big amounts of memory
// when the target exports too big number of metrics.
return err
@ -812,6 +817,7 @@ func (sw *scrapeWork) getLabelsHash(labels []prompbmarshal.Label) uint64 {
type autoMetrics struct {
up int
scrapeDurationSeconds float64
scrapeResponseSize float64
samplesScraped int
samplesPostRelabeling int
seriesAdded int
@ -824,7 +830,7 @@ func isAutoMetric(s string) bool {
"scrape_samples_post_metric_relabeling", "scrape_series_added",
"scrape_timeout_seconds", "scrape_samples_limit",
"scrape_series_limit_samples_dropped", "scrape_series_limit",
"scrape_series_current":
"scrape_series_current", "scrape_response_size_bytes":
return true
}
return false
@ -833,6 +839,7 @@ func isAutoMetric(s string) bool {
func (sw *scrapeWork) addAutoMetrics(am *autoMetrics, wc *writeRequestCtx, timestamp int64) {
sw.addAutoTimeseries(wc, "up", float64(am.up), timestamp)
sw.addAutoTimeseries(wc, "scrape_duration_seconds", am.scrapeDurationSeconds, timestamp)
sw.addAutoTimeseries(wc, "scrape_response_size_bytes", am.scrapeResponseSize, timestamp)
sw.addAutoTimeseries(wc, "scrape_samples_scraped", float64(am.samplesScraped), timestamp)
sw.addAutoTimeseries(wc, "scrape_samples_post_metric_relabeling", float64(am.samplesPostRelabeling), timestamp)
sw.addAutoTimeseries(wc, "scrape_series_added", float64(am.seriesAdded), timestamp)

View File

@ -77,6 +77,7 @@ func TestScrapeWorkScrapeInternalFailure(t *testing.T) {
dataExpected := `
up 0 123
scrape_samples_scraped 0 123
scrape_response_size_bytes 0 123
scrape_duration_seconds 0 123
scrape_samples_post_metric_relabeling 0 123
scrape_series_added 0 123
@ -181,6 +182,7 @@ func TestScrapeWorkScrapeInternalSuccess(t *testing.T) {
}, `
up 1 123
scrape_samples_scraped 0 123
scrape_response_size_bytes 0 123
scrape_duration_seconds 0 123
scrape_samples_post_metric_relabeling 0 123
scrape_series_added 0 123
@ -196,6 +198,7 @@ func TestScrapeWorkScrapeInternalSuccess(t *testing.T) {
abc -2 123
up 1 123
scrape_samples_scraped 2 123
scrape_response_size_bytes 51 123
scrape_duration_seconds 0 123
scrape_samples_post_metric_relabeling 2 123
scrape_series_added 2 123
@ -215,6 +218,7 @@ func TestScrapeWorkScrapeInternalSuccess(t *testing.T) {
abc{foo="x"} -2 123
up{foo="x"} 1 123
scrape_samples_scraped{foo="x"} 2 123
scrape_response_size_bytes{foo="x"} 36 123
scrape_duration_seconds{foo="x"} 0 123
scrape_samples_post_metric_relabeling{foo="x"} 2 123
scrape_series_added{foo="x"} 2 123
@ -234,6 +238,7 @@ func TestScrapeWorkScrapeInternalSuccess(t *testing.T) {
bar{exported_job="aa",job="override",x="1",a="b",y="2"} -3e4 123
up{job="override"} 1 123
scrape_samples_scraped{job="override"} 2 123
scrape_response_size_bytes{job="override"} 80 123
scrape_duration_seconds{job="override"} 0 123
scrape_samples_post_metric_relabeling{job="override"} 2 123
scrape_series_added{job="override"} 2 123
@ -255,6 +260,7 @@ func TestScrapeWorkScrapeInternalSuccess(t *testing.T) {
test_with_instance{instance="some_instance",job="some_job",label="val2"} 1555 123
up{instance="foobar",job="xxx"} 1 123
scrape_samples_scraped{instance="foobar",job="xxx"} 2 123
scrape_response_size_bytes{instance="foobar",job="xxx"} 158 123
scrape_duration_seconds{instance="foobar",job="xxx"} 0 123
scrape_samples_post_metric_relabeling{instance="foobar",job="xxx"} 2 123
scrape_series_added{instance="foobar",job="xxx"} 2 123
@ -275,6 +281,7 @@ func TestScrapeWorkScrapeInternalSuccess(t *testing.T) {
test_with_instance{exported_instance="some_instance",exported_job="some_job",instance="foobar",job="xxx",label="val2"} 1555 123
up{instance="foobar",job="xxx"} 1 123
scrape_samples_scraped{instance="foobar",job="xxx"} 2 123
scrape_response_size_bytes{instance="foobar",job="xxx"} 158 123
scrape_duration_seconds{instance="foobar",job="xxx"} 0 123
scrape_samples_post_metric_relabeling{instance="foobar",job="xxx"} 2 123
scrape_series_added{instance="foobar",job="xxx"} 2 123
@ -294,6 +301,7 @@ func TestScrapeWorkScrapeInternalSuccess(t *testing.T) {
bar{job="aa",a="b"} -3e4 123
up{job="override"} 1 123
scrape_samples_scraped{job="override"} 2 123
scrape_response_size_bytes{job="override"} 68 123
scrape_duration_seconds{job="override"} 0 123
scrape_samples_post_metric_relabeling{job="override"} 2 123
scrape_series_added{job="override"} 2 123
@ -322,6 +330,7 @@ func TestScrapeWorkScrapeInternalSuccess(t *testing.T) {
bar{a="b",job="xx",instance="foo.com/xx"} -3e4 123
up{job="xx"} 1 123
scrape_samples_scraped{job="xx"} 2 123
scrape_response_size_bytes{job="xx"} 49 123
scrape_duration_seconds{job="xx"} 0 123
scrape_samples_post_metric_relabeling{job="xx"} 2 123
scrape_series_added{job="xx"} 2 123
@ -352,6 +361,7 @@ func TestScrapeWorkScrapeInternalSuccess(t *testing.T) {
foo{bar="baz",job="xx",instance="foo.com"} 34.44 123
up{job="xx",instance="foo.com"} 1 123
scrape_samples_scraped{job="xx",instance="foo.com"} 4 123
scrape_response_size_bytes{job="xx",instance="foo.com"} 106 123
scrape_duration_seconds{job="xx",instance="foo.com"} 0 123
scrape_samples_post_metric_relabeling{job="xx",instance="foo.com"} 1 123
scrape_series_added{job="xx",instance="foo.com"} 4 123
@ -371,6 +381,7 @@ func TestScrapeWorkScrapeInternalSuccess(t *testing.T) {
exported_scrape_series_added 3.435 123
up 1 123
scrape_duration_seconds 0 123
scrape_response_size_bytes 76 123
scrape_samples_scraped 3 123
scrape_samples_post_metric_relabeling 3 123
scrape_timeout_seconds 42 123
@ -389,6 +400,7 @@ func TestScrapeWorkScrapeInternalSuccess(t *testing.T) {
scrape_series_added 3.435 123
up 1 123
scrape_samples_scraped 3 123
scrape_response_size_bytes 76 123
scrape_duration_seconds 0 123
scrape_samples_post_metric_relabeling 3 123
scrape_series_added 3 123
@ -406,6 +418,7 @@ func TestScrapeWorkScrapeInternalSuccess(t *testing.T) {
bar{a="b",c="d"} -3e4 123
up 1 123
scrape_samples_limit 2 123
scrape_response_size_bytes 49 123
scrape_samples_scraped 2 123
scrape_duration_seconds 0 123
scrape_samples_post_metric_relabeling 2 123
@ -424,6 +437,7 @@ func TestScrapeWorkScrapeInternalSuccess(t *testing.T) {
}, `
up 0 123
scrape_samples_scraped 2 123
scrape_response_size_bytes 0 123
scrape_duration_seconds 0 123
scrape_samples_post_metric_relabeling 2 123
scrape_samples_limit 1 123
@ -445,6 +459,7 @@ func TestScrapeWorkScrapeInternalSuccess(t *testing.T) {
bar{a="b",c="d"} -3e4 123
up 1 123
scrape_samples_scraped 2 123
scrape_response_size_bytes 49 123
scrape_duration_seconds 0 123
scrape_samples_post_metric_relabeling 2 123
scrape_series_added 2 123
@ -464,6 +479,7 @@ func TestScrapeWorkScrapeInternalSuccess(t *testing.T) {
foo{bar="baz"} 34.44 123
up 1 123
scrape_samples_scraped 2 123
scrape_response_size_bytes 49 123
scrape_duration_seconds 0 123
scrape_samples_post_metric_relabeling 2 123
scrape_series_added 2 123

View File

@ -178,7 +178,7 @@ func (tsm *targetStatusMap) Unregister(sw *scrapeWork) {
tsm.mu.Unlock()
}
func (tsm *targetStatusMap) Update(sw *scrapeWork, up bool, scrapeTime, scrapeDuration int64, samplesScraped int, err error) {
func (tsm *targetStatusMap) Update(sw *scrapeWork, up bool, scrapeTime, scrapeDuration int64, scrapeResponseSize float64, samplesScraped int, err error) {
jobName := sw.Config.jobNameOriginal
tsm.mu.Lock()
@ -197,6 +197,7 @@ func (tsm *targetStatusMap) Update(sw *scrapeWork, up bool, scrapeTime, scrapeDu
ts.scrapeTime = scrapeTime
ts.scrapeDuration = scrapeDuration
ts.samplesScraped = samplesScraped
ts.scrapeResponseSize = scrapeResponseSize
ts.scrapesTotal++
if !up {
ts.scrapesFailed++
@ -295,14 +296,15 @@ func writeLabelsJSON(w io.Writer, labels *promutils.Labels) {
}
type targetStatus struct {
sw *scrapeWork
up bool
scrapeTime int64
scrapeDuration int64
samplesScraped int
scrapesTotal int
scrapesFailed int
err error
sw *scrapeWork
up bool
scrapeTime int64
scrapeDuration int64
scrapeResponseSize float64
samplesScraped int
scrapesTotal int
scrapesFailed int
err error
}
func (ts *targetStatus) getDurationFromLastScrape() string {
@ -313,6 +315,13 @@ func (ts *targetStatus) getDurationFromLastScrape() string {
return fmt.Sprintf("%.3fs ago", d.Seconds())
}
func (ts *targetStatus) getSizeFromLastScrape() string {
if ts.scrapeResponseSize <= 0 {
return "never scraped"
}
return fmt.Sprintf("%.3f kb", float64(ts.scrapeResponseSize)/1024)
}
type droppedTargets struct {
mu sync.Mutex
m map[uint64]droppedTarget

View File

@ -29,6 +29,7 @@
scrapes_failed={%d ts.scrapesFailed %},{% space %}
last_scrape={%s ts.getDurationFromLastScrape() %},{% space %}
scrape_duration={%d int(ts.scrapeDuration) %}ms,{% space %}
scrape_response_size={%s ts.getSizeFromLastScrape() %},{% space %}
samples_scraped={%d ts.samplesScraped %},{% space %}
error={% if ts.err != nil %}{%s= ts.err.Error() %}{% endif %}
{% newline %}
@ -217,6 +218,7 @@
<th scope="col" title="total scrape errors">Errors</th>
<th scope="col" title="the time of the last scrape">Last Scrape</th>
<th scope="col" title="the duration of the last scrape">Duration</th>
<th scope="col" title="the size of the last scrape">Last Scrape Size</th>
<th scope="col" title="the number of metrics scraped during the last scrape">Samples</th>
<th scope="col" title="error from the last scrape (if any)">Last error</th>
</tr>
@ -269,8 +271,9 @@
{% endif %}
<td>{%d ts.scrapesTotal %}</td>
<td>{%d ts.scrapesFailed %}</td>
<td>{%s ts.getDurationFromLastScrape() %}
<td>{%s ts.getDurationFromLastScrape() %}</td>
<td>{%d int(ts.scrapeDuration) %}ms</td>
<td>{%s ts.getSizeFromLastScrape() %}</td>
<td>{%d ts.samplesScraped %}</td>
<td>{% if ts.err != nil %}{%s ts.err.Error() %}{% endif %}</td>
</tr>

File diff suppressed because it is too large Load Diff