diff --git a/docs/CHANGELOG.md b/docs/CHANGELOG.md
index 95154d3dbc..52cdccf5a2 100644
--- a/docs/CHANGELOG.md
+++ b/docs/CHANGELOG.md
@@ -37,7 +37,7 @@ The sandbox cluster installation is running under the constant load generated by
* FEATURE: [vmagent](https://docs.victoriametrics.com/vmagent.html): add `-enableMultitenantHandlers` command-line flag, which allows receiving data via [VictoriaMetrics cluster urls](https://docs.victoriametrics.com/Cluster-VictoriaMetrics.html#url-format) at `vmagent` and converting [tenant ids](https://docs.victoriametrics.com/Cluster-VictoriaMetrics.html#multitenancy) to (`vm_account_id`, `vm_project_id`) labels before sending the data to the configured `-remoteWrite.url`. See [these docs](https://docs.victoriametrics.com/vmagent.html#multitenancy) for details.
* FEATURE: [vmagent](https://docs.victoriametrics.com/vmagent.html): add `-remoteWrite.disableOnDiskQueue` command-line flag, which can be used for disabling data queueing to disk when the remote storage cannot keep up with the data ingestion rate. See [these docs](https://docs.victoriametrics.com/vmagent.html#disabling-on-disk-persistence) and [this feature request](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/2110).
* FEATURE: [vmagent](https://docs.victoriametrics.com/vmagent.html): add support for reading and writing samples via [Google PubSub](https://cloud.google.com/pubsub). See [these docs](https://docs.victoriametrics.com/vmagent.html#google-pubsub-integration).
-* FEATURE: [vmagent](https://docs.victoriametrics.com/vmagent.html): show all the dropped targets together with the reason why they are dropped at `http://vmagent:8429/service-discovery` page. Previously targets, which were dropped because of [target sharding](https://docs.victoriametrics.com/vmagent.html#scraping-big-number-of-targets) weren't displayed on this page. This could complicate service discovery debugging. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5389).
+* FEATURE: [vmagent](https://docs.victoriametrics.com/vmagent.html): show all the dropped targets together with the reason why they are dropped at `http://vmagent:8429/service-discovery` page. Previously targets, which were dropped because of [target sharding](https://docs.victoriametrics.com/vmagent.html#scraping-big-number-of-targets) weren't displayed on this page. This could complicate service discovery debugging. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5389) and [this feature request](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/4018).
* FEATURE: reduce the default value for `-import.maxLineLen` command-line flag from 100MB to 10MB in order to prevent excessive memory usage during data import via [/api/v1/import](https://docs.victoriametrics.com/#how-to-import-data-in-json-line-format).
* FEATURE: [vmagent](https://docs.victoriametrics.com/vmagent.html): add `keep_if_contains` and `drop_if_contains` relabeling actions. See [these docs](https://docs.victoriametrics.com/vmagent.html#relabeling-enhancements) for details.
* FEATURE: [vmagent](https://docs.victoriametrics.com/vmagent.html): export `vm_promscrape_scrape_pool_targets` [metric](https://docs.victoriametrics.com/vmagent.html#monitoring) to track the number of targets each scrape job discovers. See [this feature request](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5311).
diff --git a/docs/vmagent.md b/docs/vmagent.md
index 99291b3182..b650558071 100644
--- a/docs/vmagent.md
+++ b/docs/vmagent.md
@@ -810,6 +810,11 @@ start a cluster of three `vmagent` instances, where each target is scraped by tw
/path/to/vmagent -promscrape.cluster.membersCount=3 -promscrape.cluster.replicationFactor=2 -promscrape.cluster.memberNum=2 -promscrape.config=/path/to/config.yml ...
```
+Every `vmagent` in the cluster exposes all the discovered targets at `http://vmagent:8429/service-discovery` page.
+Each discovered target on this page contains its status (`UP`, `DOWN` or `DROPPED` with the reason why the target has been dropped).
+If the target is dropped because of sharding to other `vmagent` instances in the cluster, then the status column contains
+`-promscrape.cluster.memberNum` values for `vmagent` instances where the given target is scraped.
+
If each target is scraped by multiple `vmagent` instances, then data deduplication must be enabled at remote storage pointed by `-remoteWrite.url`.
The `-dedup.minScrapeInterval` must be set to the `scrape_interval` configured at `-promscrape.config`.
See [these docs](https://docs.victoriametrics.com/#deduplication) for details.
diff --git a/lib/promscrape/config.go b/lib/promscrape/config.go
index adeaa5d815..5f8cb9e17b 100644
--- a/lib/promscrape/config.go
+++ b/lib/promscrape/config.go
@@ -6,6 +6,7 @@ import (
"fmt"
"net/url"
"path/filepath"
+ "slices"
"sort"
"strconv"
"strings"
@@ -1020,25 +1021,24 @@ func appendScrapeWorkKey(dst []byte, labels *promutils.Labels) []byte {
return dst
}
-func needSkipScrapeWork(key string, membersCount, replicasCount, memberNum int) bool {
+func getClusterMemberNumsForScrapeWork(key string, membersCount, replicasCount int) []int {
if membersCount <= 1 {
- return false
+ return []int{0}
}
h := xxhash.Sum64(bytesutil.ToUnsafeBytes(key))
idx := int(h % uint64(membersCount))
if replicasCount < 1 {
replicasCount = 1
}
+ memberNums := make([]int, replicasCount)
for i := 0; i < replicasCount; i++ {
- if idx == memberNum {
- return false
- }
+ memberNums[i] = idx
idx++
if idx >= membersCount {
idx = 0
}
}
- return true
+ return memberNums
}
var scrapeWorkKeyBufPool bytesutil.ByteBufferPool
@@ -1056,7 +1056,7 @@ func (swc *scrapeWorkConfig) getScrapeWork(target string, extraLabels, metaLabel
if labels.Len() == 0 {
// Drop target without labels.
originalLabels = sortOriginalLabelsIfNeeded(originalLabels)
- droppedTargetsMap.Register(originalLabels, swc.relabelConfigs, targetDropReasonRelabeling)
+ droppedTargetsMap.Register(originalLabels, swc.relabelConfigs, targetDropReasonRelabeling, nil)
return nil, nil
}
@@ -1067,11 +1067,11 @@ func (swc *scrapeWorkConfig) getScrapeWork(target string, extraLabels, metaLabel
if *clusterMembersCount > 1 {
bb := scrapeWorkKeyBufPool.Get()
bb.B = appendScrapeWorkKey(bb.B[:0], labels)
- needSkip := needSkipScrapeWork(bytesutil.ToUnsafeString(bb.B), *clusterMembersCount, *clusterReplicationFactor, clusterMemberID)
+ memberNums := getClusterMemberNumsForScrapeWork(bytesutil.ToUnsafeString(bb.B), *clusterMembersCount, *clusterReplicationFactor)
scrapeWorkKeyBufPool.Put(bb)
- if needSkip {
+ if !slices.Contains(memberNums, clusterMemberID) {
originalLabels = sortOriginalLabelsIfNeeded(originalLabels)
- droppedTargetsMap.Register(originalLabels, swc.relabelConfigs, targetDropReasonSharding)
+ droppedTargetsMap.Register(originalLabels, swc.relabelConfigs, targetDropReasonSharding, memberNums)
return nil, nil
}
}
@@ -1079,7 +1079,7 @@ func (swc *scrapeWorkConfig) getScrapeWork(target string, extraLabels, metaLabel
if scrapeURL == "" {
// Drop target without URL.
originalLabels = sortOriginalLabelsIfNeeded(originalLabels)
- droppedTargetsMap.Register(originalLabels, swc.relabelConfigs, targetDropReasonMissingScrapeURL)
+ droppedTargetsMap.Register(originalLabels, swc.relabelConfigs, targetDropReasonMissingScrapeURL, nil)
return nil, nil
}
if _, err := url.Parse(scrapeURL); err != nil {
diff --git a/lib/promscrape/config_test.go b/lib/promscrape/config_test.go
index fa5e8b2463..64473229d9 100644
--- a/lib/promscrape/config_test.go
+++ b/lib/promscrape/config_test.go
@@ -110,31 +110,30 @@ scrape_configs:
`)
}
-func TestNeedSkipScrapeWork(t *testing.T) {
- f := func(key string, membersCount, replicationFactor, memberNum int, needSkipExpected bool) {
+func TestGetClusterMemberNumsForScrapeWork(t *testing.T) {
+ f := func(key string, membersCount, replicationFactor int, expectedMemberNums []int) {
t.Helper()
- needSkip := needSkipScrapeWork(key, membersCount, replicationFactor, memberNum)
- if needSkip != needSkipExpected {
- t.Fatalf("unexpected needSkipScrapeWork(key=%q, membersCount=%d, replicationFactor=%d, memberNum=%d); got %v; want %v",
- key, membersCount, replicationFactor, memberNum, needSkip, needSkipExpected)
+ memberNums := getClusterMemberNumsForScrapeWork(key, membersCount, replicationFactor)
+ if !reflect.DeepEqual(memberNums, expectedMemberNums) {
+ t.Fatalf("unexpected memberNums; got %d; want %d", memberNums, expectedMemberNums)
}
}
// Disabled clustering
- f("foo", 0, 0, 0, false)
- f("foo", 0, 0, 1, false)
+ f("foo", 0, 0, []int{0})
+ f("foo", 0, 0, []int{0})
// A cluster with 2 nodes with disabled replication
- f("foo", 2, 0, 0, true)
- f("foo", 2, 0, 1, false)
+ f("baz", 2, 0, []int{0})
+ f("foo", 2, 0, []int{1})
// A cluster with 2 nodes with replicationFactor=2
- f("foo", 2, 2, 0, false)
- f("foo", 2, 2, 1, false)
+ f("baz", 2, 2, []int{0, 1})
+ f("foo", 2, 2, []int{1, 0})
// A cluster with 3 nodes with replicationFactor=2
- f("foo", 3, 2, 0, false)
- f("foo", 3, 2, 1, true)
- f("foo", 3, 2, 2, false)
+ f("abc", 3, 2, []int{0, 1})
+ f("bar", 3, 2, []int{1, 2})
+ f("foo", 3, 2, []int{2, 0})
}
func TestLoadStaticConfigs(t *testing.T) {
diff --git a/lib/promscrape/scraper.go b/lib/promscrape/scraper.go
index f61cc47586..2ad7b26980 100644
--- a/lib/promscrape/scraper.go
+++ b/lib/promscrape/scraper.go
@@ -370,7 +370,7 @@ func (sg *scraperGroup) update(sws []*ScrapeWork) {
"original labels for target1: %s; original labels for target2: %s",
sw.ScrapeURL, sw.Labels.String(), originalLabels.String(), sw.OriginalLabels.String())
}
- droppedTargetsMap.Register(sw.OriginalLabels, sw.RelabelConfigs, targetDropReasonDuplicate)
+ droppedTargetsMap.Register(sw.OriginalLabels, sw.RelabelConfigs, targetDropReasonDuplicate, nil)
continue
}
swsMap[key] = sw.OriginalLabels
diff --git a/lib/promscrape/targetstatus.go b/lib/promscrape/targetstatus.go
index 3433be9ba8..06a5314410 100644
--- a/lib/promscrape/targetstatus.go
+++ b/lib/promscrape/targetstatus.go
@@ -318,9 +318,10 @@ type droppedTargets struct {
}
type droppedTarget struct {
- originalLabels *promutils.Labels
- relabelConfigs *promrelabel.ParsedConfigs
- dropReason targetDropReason
+ originalLabels *promutils.Labels
+ relabelConfigs *promrelabel.ParsedConfigs
+ dropReason targetDropReason
+ clusterMemberNums []int
}
type targetDropReason string
@@ -352,7 +353,7 @@ func (dt *droppedTargets) getTargetsList() []droppedTarget {
//
// The relabelConfigs must contain relabel configs, which were applied to originalLabels.
// The reason must contain the reason why the target has been dropped.
-func (dt *droppedTargets) Register(originalLabels *promutils.Labels, relabelConfigs *promrelabel.ParsedConfigs, reason targetDropReason) {
+func (dt *droppedTargets) Register(originalLabels *promutils.Labels, relabelConfigs *promrelabel.ParsedConfigs, reason targetDropReason, clusterMemberNums []int) {
if originalLabels == nil {
// Do not register target without originalLabels. This is the case when *dropOriginalLabels is set to true.
return
@@ -361,9 +362,10 @@ func (dt *droppedTargets) Register(originalLabels *promutils.Labels, relabelConf
key := labelsHash(originalLabels)
dt.mu.Lock()
dt.m[key] = droppedTarget{
- originalLabels: originalLabels,
- relabelConfigs: relabelConfigs,
- dropReason: reason,
+ originalLabels: originalLabels,
+ relabelConfigs: relabelConfigs,
+ dropReason: reason,
+ clusterMemberNums: clusterMemberNums,
}
if len(dt.m) >= *maxDroppedTargets {
for k := range dt.m {
@@ -588,10 +590,11 @@ type targetsStatusResult struct {
}
type targetLabels struct {
- up bool
- originalLabels *promutils.Labels
- labels *promutils.Labels
- dropReason targetDropReason
+ up bool
+ originalLabels *promutils.Labels
+ labels *promutils.Labels
+ dropReason targetDropReason
+ clusterMemberNums []int
}
type targetLabelsByJob struct {
jobName string
@@ -681,8 +684,9 @@ func (tsr *targetsStatusResult) getTargetLabelsByJob() []*targetLabelsByJob {
}
m.droppedTargets++
m.targets = append(m.targets, targetLabels{
- originalLabels: dt.originalLabels,
- dropReason: dt.dropReason,
+ originalLabels: dt.originalLabels,
+ dropReason: dt.dropReason,
+ clusterMemberNums: dt.clusterMemberNums,
})
}
a := make([]*targetLabelsByJob, 0, len(byJob))
diff --git a/lib/promscrape/targetstatus.qtpl b/lib/promscrape/targetstatus.qtpl
index c07f8cc760..d4206ee2d2 100644
--- a/lib/promscrape/targetstatus.qtpl
+++ b/lib/promscrape/targetstatus.qtpl
@@ -330,6 +330,15 @@
DOWN
{% else %}
DROPPED ({%s string(t.dropReason) %})
+ {% if len(t.clusterMemberNums) > 0 %}
+
+
+ exists at{% space %}
+ {% for i, memberNum := range t.clusterMemberNums %}
+ {%d memberNum %}
+ {% if i+1 < len(t.clusterMemberNums) %},{% space %}{% endif %}
+ {% endfor %}
+ {% endif %}
{% endif %}