diff --git a/CHANGELOG.md b/CHANGELOG.md index 8941405de..45fdd0817 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,8 @@ See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/781 * FEATURE: vmalert: add `-dryRun` command-line option for validating the provided config files without the need to start `vmalert` service. * FEATURE: accept optional third argument of string type at `topk_*` and `bottomk_*` functions. This is label name for additional time series to return with the sum of time series outside top/bottom K. See [MetricsQL docs](https://victoriametrics.github.io/MetricsQL.html) for more details. +* FEATURE: vmagent: expose `/api/v1/targets` page according to [the corresponding Prometheus API](https://prometheus.io/docs/prometheus/latest/querying/api/#targets). + See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/643 * BUGFIX: vmagent: properly handle OpenStack endpoint ending with `v3.0` such as `https://ostack.example.com:5000/v3.0` in the same way as Prometheus does. See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/728#issuecomment-709914803 diff --git a/README.md b/README.md index 16b85aa26..40b212b6d 100644 --- a/README.md +++ b/README.md @@ -495,6 +495,7 @@ VictoriaMetrics supports the following handlers from [Prometheus querying API](h * [/api/v1/labels](https://prometheus.io/docs/prometheus/latest/querying/api/#getting-label-names) * [/api/v1/label/.../values](https://prometheus.io/docs/prometheus/latest/querying/api/#querying-label-values) * [/api/v1/status/tsdb](https://prometheus.io/docs/prometheus/latest/querying/api/#tsdb-stats) +* [/api/v1/targets](https://prometheus.io/docs/prometheus/latest/querying/api/#targets) - see [these docs](#how-to-scrape-prometheus-exporters-such-as-node-exporter) for more details. These handlers can be queried from Prometheus-compatible clients such as Grafana or curl. diff --git a/app/vmagent/README.md b/app/vmagent/README.md index 428ea6e97..ed1c90b12 100644 --- a/app/vmagent/README.md +++ b/app/vmagent/README.md @@ -211,9 +211,13 @@ either via `vmagent` itself or via Prometheus, so the exported metrics could be Use official [Grafana dashboard](https://grafana.com/grafana/dashboards/12683) for `vmagent` state overview. If you have suggestions, improvements or found a bug - feel free to open an issue on github or add review to the dashboard. -`vmagent` also exports target statuses at `http://vmagent-host:8429/targets` page in plaintext format. -`/targets` handler accepts optional `show_original_labels=1` query arg, which shows the original labels per each target -before applying relabeling. This information may be useful for debugging target relabeling. +`vmagent` also exports target statuses at the following handlers: + +* `http://vmagent-host:8429/targets`. This handler returns human-readable plaintext status for every active target. +This page is convenient to query from command line with `wget`, `curl` or similar tools. +It accepts optional `show_original_labels=1` query arg, which shows the original labels per each target before applying relabeling. +This information may be useful for debugging target relabeling. +* `http://vmagent-host:8429/api/v1/targets`. This handler returns data compatible with [the corresponding page from Prometheus API](https://prometheus.io/docs/prometheus/latest/querying/api/#targets). ### Troubleshooting @@ -224,7 +228,8 @@ before applying relabeling. This information may be useful for debugging target since `vmagent` establishes at least a single TCP connection per each target. * When `vmagent` scrapes many unreliable targets, it can flood error log with scrape errors. These errors can be suppressed - by passing `-promscrape.suppressScrapeErrors` command-line flag to `vmagent`. The most recent scrape error per each target can be observed at `http://vmagent-host:8429/targets`. + by passing `-promscrape.suppressScrapeErrors` command-line flag to `vmagent`. The most recent scrape error per each target can be observed at `http://vmagent-host:8429/targets` + and `http://vmagent-host:8429/api/v1/targets`. * It is recommended to increase `-remoteWrite.queues` if `vmagent_remotewrite_pending_data_bytes` metric exported at `http://vmagent-host:8429/metrics` page constantly grows. diff --git a/app/vmagent/main.go b/app/vmagent/main.go index 2a7432ca4..d3fa1d6b9 100644 --- a/app/vmagent/main.go +++ b/app/vmagent/main.go @@ -211,6 +211,12 @@ func requestHandler(w http.ResponseWriter, r *http.Request) bool { showOriginalLabels, _ := strconv.ParseBool(r.FormValue("show_original_labels")) promscrape.WriteHumanReadableTargetsStatus(w, showOriginalLabels) return true + case "/api/v1/targets": + promscrapeAPIV1TargetsRequests.Inc() + w.Header().Set("Content-Type", "application/json") + state := r.FormValue("state") + promscrape.WriteAPIV1Targets(w, state) + return true case "/-/reload": promscrapeConfigReloadRequests.Inc() procutil.SelfSIGHUP() @@ -241,7 +247,8 @@ var ( influxQueryRequests = metrics.NewCounter(`vmagent_http_requests_total{path="/query", protocol="influx"}`) - promscrapeTargetsRequests = metrics.NewCounter(`vmagent_http_requests_total{path="/targets"}`) + promscrapeTargetsRequests = metrics.NewCounter(`vmagent_http_requests_total{path="/targets"}`) + promscrapeAPIV1TargetsRequests = metrics.NewCounter(`vmagent_http_requests_total{path="/api/v1/targets"}`) promscrapeConfigReloadRequests = metrics.NewCounter(`vmagent_http_requests_total{path="/-/reload"}`) ) diff --git a/app/vminsert/main.go b/app/vminsert/main.go index 616ced4b7..504922c94 100644 --- a/app/vminsert/main.go +++ b/app/vminsert/main.go @@ -159,6 +159,12 @@ func RequestHandler(w http.ResponseWriter, r *http.Request) bool { showOriginalLabels, _ := strconv.ParseBool(r.FormValue("show_original_labels")) promscrape.WriteHumanReadableTargetsStatus(w, showOriginalLabels) return true + case "/api/v1/targets": + promscrapeAPIV1TargetsRequests.Inc() + w.Header().Set("Content-Type", "application/json") + state := r.FormValue("state") + promscrape.WriteAPIV1Targets(w, state) + return true case "/-/reload": promscrapeConfigReloadRequests.Inc() procutil.SelfSIGHUP() @@ -191,7 +197,8 @@ var ( influxQueryRequests = metrics.NewCounter(`vm_http_requests_total{path="/query", protocol="influx"}`) - promscrapeTargetsRequests = metrics.NewCounter(`vm_http_requests_total{path="/targets"}`) + promscrapeTargetsRequests = metrics.NewCounter(`vm_http_requests_total{path="/targets"}`) + promscrapeAPIV1TargetsRequests = metrics.NewCounter(`vm_http_requests_total{path="/api/v1/targets"}`) promscrapeConfigReloadRequests = metrics.NewCounter(`vm_http_requests_total{path="/-/reload"}`) diff --git a/docs/Single-server-VictoriaMetrics.md b/docs/Single-server-VictoriaMetrics.md index 16b85aa26..40b212b6d 100644 --- a/docs/Single-server-VictoriaMetrics.md +++ b/docs/Single-server-VictoriaMetrics.md @@ -495,6 +495,7 @@ VictoriaMetrics supports the following handlers from [Prometheus querying API](h * [/api/v1/labels](https://prometheus.io/docs/prometheus/latest/querying/api/#getting-label-names) * [/api/v1/label/.../values](https://prometheus.io/docs/prometheus/latest/querying/api/#querying-label-values) * [/api/v1/status/tsdb](https://prometheus.io/docs/prometheus/latest/querying/api/#tsdb-stats) +* [/api/v1/targets](https://prometheus.io/docs/prometheus/latest/querying/api/#targets) - see [these docs](#how-to-scrape-prometheus-exporters-such-as-node-exporter) for more details. These handlers can be queried from Prometheus-compatible clients such as Grafana or curl. diff --git a/docs/vmagent.md b/docs/vmagent.md index 428ea6e97..ed1c90b12 100644 --- a/docs/vmagent.md +++ b/docs/vmagent.md @@ -211,9 +211,13 @@ either via `vmagent` itself or via Prometheus, so the exported metrics could be Use official [Grafana dashboard](https://grafana.com/grafana/dashboards/12683) for `vmagent` state overview. If you have suggestions, improvements or found a bug - feel free to open an issue on github or add review to the dashboard. -`vmagent` also exports target statuses at `http://vmagent-host:8429/targets` page in plaintext format. -`/targets` handler accepts optional `show_original_labels=1` query arg, which shows the original labels per each target -before applying relabeling. This information may be useful for debugging target relabeling. +`vmagent` also exports target statuses at the following handlers: + +* `http://vmagent-host:8429/targets`. This handler returns human-readable plaintext status for every active target. +This page is convenient to query from command line with `wget`, `curl` or similar tools. +It accepts optional `show_original_labels=1` query arg, which shows the original labels per each target before applying relabeling. +This information may be useful for debugging target relabeling. +* `http://vmagent-host:8429/api/v1/targets`. This handler returns data compatible with [the corresponding page from Prometheus API](https://prometheus.io/docs/prometheus/latest/querying/api/#targets). ### Troubleshooting @@ -224,7 +228,8 @@ before applying relabeling. This information may be useful for debugging target since `vmagent` establishes at least a single TCP connection per each target. * When `vmagent` scrapes many unreliable targets, it can flood error log with scrape errors. These errors can be suppressed - by passing `-promscrape.suppressScrapeErrors` command-line flag to `vmagent`. The most recent scrape error per each target can be observed at `http://vmagent-host:8429/targets`. + by passing `-promscrape.suppressScrapeErrors` command-line flag to `vmagent`. The most recent scrape error per each target can be observed at `http://vmagent-host:8429/targets` + and `http://vmagent-host:8429/api/v1/targets`. * It is recommended to increase `-remoteWrite.queues` if `vmagent_remotewrite_pending_data_bytes` metric exported at `http://vmagent-host:8429/metrics` page constantly grows. diff --git a/lib/promscrape/config.go b/lib/promscrape/config.go index c174414ce..ac8c11f96 100644 --- a/lib/promscrape/config.go +++ b/lib/promscrape/config.go @@ -642,6 +642,7 @@ func appendScrapeWork(dst []ScrapeWork, swc *scrapeWorkConfig, target string, ex labels = promrelabel.RemoveMetaLabels(labels[:0], labels) if len(labels) == 0 { // Drop target without labels. + droppedTargetsMap.Register(originalLabels) return dst, nil } // See https://www.robustperception.io/life-of-a-label @@ -652,10 +653,12 @@ func appendScrapeWork(dst []ScrapeWork, swc *scrapeWorkConfig, target string, ex addressRelabeled := promrelabel.GetLabelValueByName(labels, "__address__") if len(addressRelabeled) == 0 { // Drop target without scrape address. + droppedTargetsMap.Register(originalLabels) return dst, nil } if strings.Contains(addressRelabeled, "/") { // Drop target with '/' + droppedTargetsMap.Register(originalLabels) return dst, nil } addressRelabeled = addMissingPort(schemeRelabeled, addressRelabeled) diff --git a/lib/promscrape/scraper.go b/lib/promscrape/scraper.go index bd611c13e..4451820cd 100644 --- a/lib/promscrape/scraper.go +++ b/lib/promscrape/scraper.go @@ -284,6 +284,7 @@ func (sg *scraperGroup) update(sws []ScrapeWork) { "original labels for target1: %s; original labels for target2: %s", sw.ScrapeURL, sw.LabelsString(), promLabelsString(originalLabels), promLabelsString(sw.OriginalLabels)) } + droppedTargetsMap.Register(sw.OriginalLabels) continue } swsMap[key] = sw.OriginalLabels diff --git a/lib/promscrape/targetstatus.go b/lib/promscrape/targetstatus.go index 03777a244..48240368c 100644 --- a/lib/promscrape/targetstatus.go +++ b/lib/promscrape/targetstatus.go @@ -6,6 +6,10 @@ import ( "sort" "sync" "time" + + "github.com/VictoriaMetrics/VictoriaMetrics/lib/fasttime" + "github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal" + "github.com/VictoriaMetrics/VictoriaMetrics/lib/promrelabel" ) var tsmGlobal = newTargetStatusMap() @@ -15,6 +19,26 @@ func WriteHumanReadableTargetsStatus(w io.Writer, showOriginalLabels bool) { tsmGlobal.WriteHumanReadable(w, showOriginalLabels) } +// WriteAPIV1Targets writes /api/v1/targets to w according to https://prometheus.io/docs/prometheus/latest/querying/api/#targets +func WriteAPIV1Targets(w io.Writer, state string) { + if state == "" { + state = "any" + } + fmt.Fprintf(w, `{"status":"success","data":{"activeTargets":`) + if state == "active" || state == "any" { + tsmGlobal.WriteActiveTargetsJSON(w) + } else { + fmt.Fprintf(w, `[]`) + } + fmt.Fprintf(w, `,"droppedTargets":`) + if state == "dropped" || state == "any" { + droppedTargetsMap.WriteDroppedTargetsJSON(w) + } else { + fmt.Fprintf(w, `[]`) + } + fmt.Fprintf(w, `}}`) +} + type targetStatusMap struct { mu sync.Mutex m map[uint64]targetStatus @@ -73,6 +97,66 @@ func (tsm *targetStatusMap) StatusByGroup(group string, up bool) int { return count } +// WriteActiveTargetsJSON writes `activeTargets` contents to w according to https://prometheus.io/docs/prometheus/latest/querying/api/#targets +func (tsm *targetStatusMap) WriteActiveTargetsJSON(w io.Writer) { + tsm.mu.Lock() + type keyStatus struct { + key string + st targetStatus + } + kss := make([]keyStatus, 0, len(tsm.m)) + for _, st := range tsm.m { + key := promLabelsString(st.sw.OriginalLabels) + kss = append(kss, keyStatus{ + key: key, + st: st, + }) + } + tsm.mu.Unlock() + + sort.Slice(kss, func(i, j int) bool { + return kss[i].key < kss[j].key + }) + fmt.Fprintf(w, `[`) + for i, ks := range kss { + st := ks.st + fmt.Fprintf(w, `{"discoveredLabels":`) + writeLabelsJSON(w, st.sw.OriginalLabels) + fmt.Fprintf(w, `,"labels":`) + labelsFinalized := promrelabel.FinalizeLabels(nil, st.sw.Labels) + writeLabelsJSON(w, labelsFinalized) + fmt.Fprintf(w, `,"scrapePool":%q`, st.sw.Job()) + fmt.Fprintf(w, `,"scrapeUrl":%q`, st.sw.ScrapeURL) + errMsg := "" + if st.err != nil { + errMsg = st.err.Error() + } + fmt.Fprintf(w, `,"lastError":%q`, errMsg) + fmt.Fprintf(w, `,"lastScrape":%q`, time.Unix(st.scrapeTime/1000, (st.scrapeTime%1000)*1e6).Format(time.RFC3339Nano)) + fmt.Fprintf(w, `,"lastScrapeDuration":%g`, (time.Millisecond * time.Duration(st.scrapeDuration)).Seconds()) + state := "up" + if !st.up { + state = "down" + } + fmt.Fprintf(w, `,"health":%q}`, state) + if i+1 < len(kss) { + fmt.Fprintf(w, `,`) + } + } + fmt.Fprintf(w, `]`) +} + +func writeLabelsJSON(w io.Writer, labels []prompbmarshal.Label) { + fmt.Fprintf(w, `{`) + for i, label := range labels { + fmt.Fprintf(w, "%q:%q", label.Name, label.Value) + if i+1 < len(labels) { + fmt.Fprintf(w, `,`) + } + } + fmt.Fprintf(w, `}`) +} + func (tsm *targetStatusMap) WriteHumanReadable(w io.Writer, showOriginalLabels bool) { byJob := make(map[string][]targetStatus) tsm.mu.Lock() @@ -143,3 +227,69 @@ type targetStatus struct { func (st *targetStatus) getDurationFromLastScrape() time.Duration { return time.Since(time.Unix(st.scrapeTime/1000, (st.scrapeTime%1000)*1e6)) } + +type droppedTargets struct { + mu sync.Mutex + m map[string]droppedTarget + lastCleanupTime uint64 +} + +type droppedTarget struct { + originalLabels []prompbmarshal.Label + deadline uint64 +} + +func (dt *droppedTargets) Register(originalLabels []prompbmarshal.Label) { + key := promLabelsString(originalLabels) + currentTime := fasttime.UnixTimestamp() + dt.mu.Lock() + dt.m[key] = droppedTarget{ + originalLabels: originalLabels, + deadline: currentTime + 10*60, + } + if currentTime-dt.lastCleanupTime > 60 { + for k, v := range dt.m { + if currentTime > v.deadline { + delete(dt.m, k) + } + } + dt.lastCleanupTime = currentTime + } + dt.mu.Unlock() +} + +// WriteDroppedTargetsJSON writes `droppedTargets` contents to w according to https://prometheus.io/docs/prometheus/latest/querying/api/#targets +func (dt *droppedTargets) WriteDroppedTargetsJSON(w io.Writer) { + dt.mu.Lock() + type keyStatus struct { + key string + originalLabels []prompbmarshal.Label + } + kss := make([]keyStatus, 0, len(dt.m)) + for _, v := range dt.m { + key := promLabelsString(v.originalLabels) + kss = append(kss, keyStatus{ + key: key, + originalLabels: v.originalLabels, + }) + } + dt.mu.Unlock() + + sort.Slice(kss, func(i, j int) bool { + return kss[i].key < kss[j].key + }) + fmt.Fprintf(w, `[`) + for i, ks := range kss { + fmt.Fprintf(w, `{"discoveredLabels":`) + writeLabelsJSON(w, ks.originalLabels) + fmt.Fprintf(w, `}`) + if i+1 < len(kss) { + fmt.Fprintf(w, `,`) + } + } + fmt.Fprintf(w, `]`) +} + +var droppedTargetsMap = &droppedTargets{ + m: make(map[string]droppedTarget), +}