VictoriaMetrics/lib/promscrape/targetstatus.go

307 lines
7.7 KiB
Go
Raw Normal View History

package promscrape
import (
"flag"
"fmt"
"io"
"sort"
"sync"
"time"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/fasttime"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promrelabel"
)
var maxDroppedTargets = flag.Int("promscrape.maxDroppedTargets", 1000, "The maximum number of `droppedTargets` shown at /api/v1/targets page. "+
"Increase this value if your setup drops more scrape targets during relabeling and you need investigating labels for all the dropped targets. "+
"Note that the increased number of tracked dropped targets may result in increased memory usage")
var tsmGlobal = newTargetStatusMap()
// WriteHumanReadableTargetsStatus writes human-readable status for all the scrape targets to w.
func WriteHumanReadableTargetsStatus(w io.Writer, showOriginalLabels bool) {
tsmGlobal.WriteHumanReadable(w, showOriginalLabels)
}
// WriteAPIV1Targets writes /api/v1/targets to w according to https://prometheus.io/docs/prometheus/latest/querying/api/#targets
func WriteAPIV1Targets(w io.Writer, state string) {
if state == "" {
state = "any"
}
fmt.Fprintf(w, `{"status":"success","data":{"activeTargets":`)
if state == "active" || state == "any" {
tsmGlobal.WriteActiveTargetsJSON(w)
} else {
fmt.Fprintf(w, `[]`)
}
fmt.Fprintf(w, `,"droppedTargets":`)
if state == "dropped" || state == "any" {
droppedTargetsMap.WriteDroppedTargetsJSON(w)
} else {
fmt.Fprintf(w, `[]`)
}
fmt.Fprintf(w, `}}`)
}
type targetStatusMap struct {
mu sync.Mutex
m map[uint64]targetStatus
}
func newTargetStatusMap() *targetStatusMap {
return &targetStatusMap{
m: make(map[uint64]targetStatus),
}
}
func (tsm *targetStatusMap) Reset() {
tsm.mu.Lock()
tsm.m = make(map[uint64]targetStatus)
tsm.mu.Unlock()
}
func (tsm *targetStatusMap) Register(sw *ScrapeWork) {
tsm.mu.Lock()
tsm.m[sw.ID] = targetStatus{
sw: *sw,
}
tsm.mu.Unlock()
}
func (tsm *targetStatusMap) Unregister(sw *ScrapeWork) {
tsm.mu.Lock()
delete(tsm.m, sw.ID)
tsm.mu.Unlock()
}
func (tsm *targetStatusMap) Update(sw *ScrapeWork, group string, up bool, scrapeTime, scrapeDuration int64, err error) {
tsm.mu.Lock()
tsm.m[sw.ID] = targetStatus{
sw: *sw,
up: up,
scrapeGroup: group,
scrapeTime: scrapeTime,
scrapeDuration: scrapeDuration,
err: err,
}
tsm.mu.Unlock()
}
// StatusByGroup returns the number of targets with status==up
// for the given group name
func (tsm *targetStatusMap) StatusByGroup(group string, up bool) int {
var count int
tsm.mu.Lock()
for _, st := range tsm.m {
if st.scrapeGroup == group && st.up == up {
count++
}
}
tsm.mu.Unlock()
return count
}
// WriteActiveTargetsJSON writes `activeTargets` contents to w according to https://prometheus.io/docs/prometheus/latest/querying/api/#targets
func (tsm *targetStatusMap) WriteActiveTargetsJSON(w io.Writer) {
tsm.mu.Lock()
type keyStatus struct {
key string
st targetStatus
}
kss := make([]keyStatus, 0, len(tsm.m))
for _, st := range tsm.m {
key := promLabelsString(st.sw.OriginalLabels)
kss = append(kss, keyStatus{
key: key,
st: st,
})
}
tsm.mu.Unlock()
sort.Slice(kss, func(i, j int) bool {
return kss[i].key < kss[j].key
})
fmt.Fprintf(w, `[`)
for i, ks := range kss {
st := ks.st
fmt.Fprintf(w, `{"discoveredLabels":`)
writeLabelsJSON(w, st.sw.OriginalLabels)
fmt.Fprintf(w, `,"labels":`)
labelsFinalized := promrelabel.FinalizeLabels(nil, st.sw.Labels)
writeLabelsJSON(w, labelsFinalized)
fmt.Fprintf(w, `,"scrapePool":%q`, st.sw.Job())
fmt.Fprintf(w, `,"scrapeUrl":%q`, st.sw.ScrapeURL)
errMsg := ""
if st.err != nil {
errMsg = st.err.Error()
}
fmt.Fprintf(w, `,"lastError":%q`, errMsg)
fmt.Fprintf(w, `,"lastScrape":%q`, time.Unix(st.scrapeTime/1000, (st.scrapeTime%1000)*1e6).Format(time.RFC3339Nano))
fmt.Fprintf(w, `,"lastScrapeDuration":%g`, (time.Millisecond * time.Duration(st.scrapeDuration)).Seconds())
state := "up"
if !st.up {
state = "down"
}
fmt.Fprintf(w, `,"health":%q}`, state)
if i+1 < len(kss) {
fmt.Fprintf(w, `,`)
}
}
fmt.Fprintf(w, `]`)
}
func writeLabelsJSON(w io.Writer, labels []prompbmarshal.Label) {
fmt.Fprintf(w, `{`)
for i, label := range labels {
fmt.Fprintf(w, "%q:%q", label.Name, label.Value)
if i+1 < len(labels) {
fmt.Fprintf(w, `,`)
}
}
fmt.Fprintf(w, `}`)
}
func (tsm *targetStatusMap) WriteHumanReadable(w io.Writer, showOriginalLabels bool) {
byJob := make(map[string][]targetStatus)
tsm.mu.Lock()
for _, st := range tsm.m {
job := st.sw.Job()
byJob[job] = append(byJob[job], st)
}
tsm.mu.Unlock()
var jss []jobStatus
for job, statuses := range byJob {
jss = append(jss, jobStatus{
job: job,
statuses: statuses,
})
}
sort.Slice(jss, func(i, j int) bool {
return jss[i].job < jss[j].job
})
for _, js := range jss {
sts := js.statuses
sort.Slice(sts, func(i, j int) bool {
return sts[i].sw.ScrapeURL < sts[j].sw.ScrapeURL
})
ups := 0
for _, st := range sts {
if st.up {
ups++
}
}
fmt.Fprintf(w, "job=%q (%d/%d up)\n", js.job, ups, len(sts))
for _, st := range sts {
state := "up"
if !st.up {
state = "down"
}
labelsStr := st.sw.LabelsString()
if showOriginalLabels {
labelsStr += ", originalLabels=" + promLabelsString(st.sw.OriginalLabels)
}
lastScrape := st.getDurationFromLastScrape()
errMsg := ""
if st.err != nil {
errMsg = st.err.Error()
}
fmt.Fprintf(w, "\tstate=%s, endpoint=%s, labels=%s, last_scrape=%.3fs ago, scrape_duration=%.3fs, error=%q\n",
state, st.sw.ScrapeURL, labelsStr, lastScrape.Seconds(), float64(st.scrapeDuration)/1000, errMsg)
}
}
fmt.Fprintf(w, "\n")
}
type jobStatus struct {
job string
statuses []targetStatus
}
type targetStatus struct {
sw ScrapeWork
up bool
scrapeGroup string
scrapeTime int64
scrapeDuration int64
err error
}
func (st *targetStatus) getDurationFromLastScrape() time.Duration {
return time.Since(time.Unix(st.scrapeTime/1000, (st.scrapeTime%1000)*1e6))
}
type droppedTargets struct {
mu sync.Mutex
m map[string]droppedTarget
lastCleanupTime uint64
}
type droppedTarget struct {
originalLabels []prompbmarshal.Label
deadline uint64
}
func (dt *droppedTargets) Register(originalLabels []prompbmarshal.Label) {
key := promLabelsString(originalLabels)
currentTime := fasttime.UnixTimestamp()
dt.mu.Lock()
if k, ok := dt.m[key]; ok {
k.deadline = currentTime + 10*60
dt.m[key] = k
} else if len(dt.m) < *maxDroppedTargets {
dt.m[key] = droppedTarget{
originalLabels: originalLabels,
deadline: currentTime + 10*60,
}
}
if currentTime-dt.lastCleanupTime > 60 {
for k, v := range dt.m {
if currentTime > v.deadline {
delete(dt.m, k)
}
}
dt.lastCleanupTime = currentTime
}
dt.mu.Unlock()
}
// WriteDroppedTargetsJSON writes `droppedTargets` contents to w according to https://prometheus.io/docs/prometheus/latest/querying/api/#targets
func (dt *droppedTargets) WriteDroppedTargetsJSON(w io.Writer) {
dt.mu.Lock()
type keyStatus struct {
key string
originalLabels []prompbmarshal.Label
}
kss := make([]keyStatus, 0, len(dt.m))
for _, v := range dt.m {
key := promLabelsString(v.originalLabels)
kss = append(kss, keyStatus{
key: key,
originalLabels: v.originalLabels,
})
}
dt.mu.Unlock()
sort.Slice(kss, func(i, j int) bool {
return kss[i].key < kss[j].key
})
fmt.Fprintf(w, `[`)
for i, ks := range kss {
fmt.Fprintf(w, `{"discoveredLabels":`)
writeLabelsJSON(w, ks.originalLabels)
fmt.Fprintf(w, `}`)
if i+1 < len(kss) {
fmt.Fprintf(w, `,`)
}
}
fmt.Fprintf(w, `]`)
}
var droppedTargetsMap = &droppedTargets{
m: make(map[string]droppedTarget),
}