mirror of
https://github.com/VictoriaMetrics/VictoriaMetrics.git
synced 2025-01-06 08:02:17 +01:00
app/vmselect: add per-tenant /api/v1/status/top_queries
handler
This commit is contained in:
parent
0e739efc88
commit
5bbf200de2
@ -198,6 +198,7 @@ or [an alternative dashboard for VictoriaMetrics cluster](https://grafana.com/gr
|
||||
and `YYYY-MM-DD` is the date for collecting the stats. By default the stats is collected for the current day.
|
||||
- `api/v1/status/active_queries` - for currently executed active queries. Note that every `vmselect` maintains an independent list of active queries,
|
||||
which is returned in the response.
|
||||
- `api/v1/status/top_queries` - for listing the most frequently executed queries and queries taking the most duration.
|
||||
|
||||
* URLs for [Graphite Metrics API](https://graphite-api.readthedocs.io/en/latest/api.html#the-metrics-api): `http://<vmselect>:8481/select/<accountID>/graphite/<suffix>`, where:
|
||||
- `<accountID>` is an arbitrary number identifying data namespace for query (aka tenant)
|
||||
@ -214,7 +215,7 @@ or [an alternative dashboard for VictoriaMetrics cluster](https://grafana.com/gr
|
||||
- `tags/autoComplete/values` - returns tag values matching the given `valuePrefix` and/or `expr`. See [these docs](https://graphite.readthedocs.io/en/stable/tags.html#auto-complete-support).
|
||||
- `tags/delSeries` - deletes series matching the given `path`. See [these docs](https://graphite.readthedocs.io/en/stable/tags.html#removing-series-from-the-tagdb).
|
||||
|
||||
* URL for query stats across all tenants: `http://<vmselect>:8481/api/v1/status/top_queries`. It returns query lists with the most frequently executed queries and queries taking the most duration.
|
||||
* URL for query stats across all tenants: `http://<vmselect>:8481/api/v1/status/top_queries`. It lists with the most frequently executed queries and queries taking the most duration.
|
||||
|
||||
* URL for time series deletion: `http://<vmselect>:8481/delete/<accountID>/prometheus/api/v1/admin/tsdb/delete_series?match[]=<timeseries_selector_for_delete>`.
|
||||
Note that the `delete_series` handler should be used only in exceptional cases such as deletion of accidentally ingested incorrect time series. It shouldn't
|
||||
|
@ -172,10 +172,10 @@ func requestHandler(w http.ResponseWriter, r *http.Request) bool {
|
||||
return true
|
||||
}
|
||||
if path == "/api/v1/status/top_queries" {
|
||||
topQueriesRequests.Inc()
|
||||
if err := prometheus.QueryStatsHandler(startTime, w, r); err != nil {
|
||||
topQueriesErrors.Inc()
|
||||
sendPrometheusError(w, r, fmt.Errorf("cannot query status endpoint: %w", err))
|
||||
globalTopQueriesRequests.Inc()
|
||||
if err := prometheus.QueryStatsHandler(startTime, nil, w, r); err != nil {
|
||||
globalTopQueriesErrors.Inc()
|
||||
sendPrometheusError(w, r, err)
|
||||
return true
|
||||
}
|
||||
return true
|
||||
@ -295,6 +295,14 @@ func selectHandler(startTime time.Time, w http.ResponseWriter, r *http.Request,
|
||||
statusActiveQueriesRequests.Inc()
|
||||
promql.WriteActiveQueries(w)
|
||||
return true
|
||||
case "prometheus/api/v1/status/top_queries":
|
||||
topQueriesRequests.Inc()
|
||||
if err := prometheus.QueryStatsHandler(startTime, at, w, r); err != nil {
|
||||
topQueriesErrors.Inc()
|
||||
sendPrometheusError(w, r, err)
|
||||
return true
|
||||
}
|
||||
return true
|
||||
case "prometheus/api/v1/export":
|
||||
exportRequests.Inc()
|
||||
if err := prometheus.ExportHandler(startTime, at, w, r); err != nil {
|
||||
@ -501,11 +509,14 @@ var (
|
||||
statusTSDBRequests = metrics.NewCounter(`vm_http_requests_total{path="/select/{}/prometheus/api/v1/status/tsdb"}`)
|
||||
statusTSDBErrors = metrics.NewCounter(`vm_http_request_errors_total{path="/select/{}/prometheus/api/v1/status/tsdb"}`)
|
||||
|
||||
topQueriesRequests = metrics.NewCounter(`vm_http_requests_total{path="/api/v1/status/top_queries"}`)
|
||||
topQueriesErrors = metrics.NewCounter(`vm_http_request_errors_total{path="/api/v1/status/top_queries"}`)
|
||||
|
||||
statusActiveQueriesRequests = metrics.NewCounter(`vm_http_requests_total{path="/select/{}prometheus/api/v1/status/active_queries"}`)
|
||||
|
||||
topQueriesRequests = metrics.NewCounter(`vm_http_requests_total{path="/select/{}/prometheus/api/v1/status/top_queries"}`)
|
||||
topQueriesErrors = metrics.NewCounter(`vm_http_request_errors_total{path="/select/{}/prometheus/api/v1/status/top_queries"}`)
|
||||
|
||||
globalTopQueriesRequests = metrics.NewCounter(`vm_http_requests_total{path="/api/v1/status/top_queries"}`)
|
||||
globalTopQueriesErrors = metrics.NewCounter(`vm_http_request_errors_total{path="/api/v1/status/top_queries"}`)
|
||||
|
||||
deleteRequests = metrics.NewCounter(`vm_http_requests_total{path="/delete/{}/prometheus/api/v1/admin/tsdb/delete_series"}`)
|
||||
deleteErrors = metrics.NewCounter(`vm_http_request_errors_total{path="/delete/{}/prometheus/api/v1/admin/tsdb/delete_series"}`)
|
||||
|
||||
|
@ -1329,7 +1329,7 @@ func getLatencyOffsetMilliseconds() int64 {
|
||||
}
|
||||
|
||||
// QueryStatsHandler returns query stats at `/api/v1/status/top_queries`
|
||||
func QueryStatsHandler(startTime time.Time, w http.ResponseWriter, r *http.Request) error {
|
||||
func QueryStatsHandler(startTime time.Time, at *auth.Token, w http.ResponseWriter, r *http.Request) error {
|
||||
if err := r.ParseForm(); err != nil {
|
||||
return fmt.Errorf("cannot parse form values: %w", err)
|
||||
}
|
||||
@ -1346,10 +1346,15 @@ func QueryStatsHandler(startTime time.Time, w http.ResponseWriter, r *http.Reque
|
||||
if err != nil {
|
||||
return fmt.Errorf("cannot parse `maxLifetime` arg: %w", err)
|
||||
}
|
||||
maxLifetime := time.Duration(maxLifetimeMsecs) * time.Millisecond
|
||||
w.Header().Set("Content-Type", "application/json; charset=utf-8")
|
||||
bw := bufferedwriter.Get(w)
|
||||
defer bufferedwriter.Put(bw)
|
||||
querystats.WriteJSONQueryStats(bw, topN, time.Duration(maxLifetimeMsecs)*time.Millisecond)
|
||||
if at == nil {
|
||||
querystats.WriteJSONQueryStats(bw, topN, maxLifetime)
|
||||
} else {
|
||||
querystats.WriteJSONQueryStatsForAccountProject(bw, topN, at.AccountID, at.ProjectID, maxLifetime)
|
||||
}
|
||||
if err := bw.Flush(); err != nil {
|
||||
return err
|
||||
}
|
||||
|
@ -39,7 +39,17 @@ func RegisterQuery(accountID, projectID uint32, query string, timeRangeMsecs int
|
||||
// WriteJSONQueryStats writes query stats to given writer in json format.
|
||||
func WriteJSONQueryStats(w io.Writer, topN int, maxLifetime time.Duration) {
|
||||
initOnce.Do(initQueryStats)
|
||||
qsTracker.writeJSONQueryStats(w, topN, maxLifetime)
|
||||
qsTracker.writeJSONQueryStats(w, topN, nil, maxLifetime)
|
||||
}
|
||||
|
||||
// WriteJSONQueryStatsForAccountProject writes query stats for the given (accountID, projectID) to given writer in json format.
|
||||
func WriteJSONQueryStatsForAccountProject(w io.Writer, topN int, accountID, projectID uint32, maxLifetime time.Duration) {
|
||||
initOnce.Do(initQueryStats)
|
||||
apFilter := &accountProjectFilter{
|
||||
accountID: accountID,
|
||||
projectID: projectID,
|
||||
}
|
||||
qsTracker.writeJSONQueryStats(w, topN, apFilter, maxLifetime)
|
||||
}
|
||||
|
||||
// queryStatsTracker holds statistics for queries
|
||||
@ -65,6 +75,11 @@ type queryStatKey struct {
|
||||
timeRangeSecs int64
|
||||
}
|
||||
|
||||
type accountProjectFilter struct {
|
||||
accountID uint32
|
||||
projectID uint32
|
||||
}
|
||||
|
||||
func initQueryStats() {
|
||||
recordsCount := *lastQueriesCount
|
||||
if recordsCount <= 0 {
|
||||
@ -78,12 +93,12 @@ func initQueryStats() {
|
||||
}
|
||||
}
|
||||
|
||||
func (qst *queryStatsTracker) writeJSONQueryStats(w io.Writer, topN int, maxLifetime time.Duration) {
|
||||
func (qst *queryStatsTracker) writeJSONQueryStats(w io.Writer, topN int, apFilter *accountProjectFilter, maxLifetime time.Duration) {
|
||||
fmt.Fprintf(w, `{"topN":"%d","maxLifetime":%q,`, topN, maxLifetime)
|
||||
fmt.Fprintf(w, `"search.queryStats.lastQueriesCount":%d,`, *lastQueriesCount)
|
||||
fmt.Fprintf(w, `"search.queryStats.minQueryDuration":%q,`, *minQueryDuration)
|
||||
fmt.Fprintf(w, `"topByCount":[`)
|
||||
topByCount := qst.getTopByCount(topN, maxLifetime)
|
||||
topByCount := qst.getTopByCount(topN, apFilter, maxLifetime)
|
||||
for i, r := range topByCount {
|
||||
fmt.Fprintf(w, `{"accountID":%d,"projectID":%d,"query":%q,"timeRangeSeconds":%d,"count":%d}`, r.accountID, r.projectID, r.query, r.timeRangeSecs, r.count)
|
||||
if i+1 < len(topByCount) {
|
||||
@ -91,7 +106,7 @@ func (qst *queryStatsTracker) writeJSONQueryStats(w io.Writer, topN int, maxLife
|
||||
}
|
||||
}
|
||||
fmt.Fprintf(w, `],"topByAvgDuration":[`)
|
||||
topByAvgDuration := qst.getTopByAvgDuration(topN, maxLifetime)
|
||||
topByAvgDuration := qst.getTopByAvgDuration(topN, apFilter, maxLifetime)
|
||||
for i, r := range topByAvgDuration {
|
||||
fmt.Fprintf(w, `{"accountID":%d,"projectID":%d,"query":%q,"timeRangeSeconds":%d,"avgDurationSeconds":%.3f}`,
|
||||
r.accountID, r.projectID, r.query, r.timeRangeSecs, r.duration.Seconds())
|
||||
@ -100,7 +115,7 @@ func (qst *queryStatsTracker) writeJSONQueryStats(w io.Writer, topN int, maxLife
|
||||
}
|
||||
}
|
||||
fmt.Fprintf(w, `],"topBySumDuration":[`)
|
||||
topBySumDuration := qst.getTopBySumDuration(topN, maxLifetime)
|
||||
topBySumDuration := qst.getTopBySumDuration(topN, apFilter, maxLifetime)
|
||||
for i, r := range topBySumDuration {
|
||||
fmt.Fprintf(w, `{"accountID":%d,"projectID":%d,"query":%q,"timeRangeSeconds":%d,"sumDurationSeconds":%.3f}`,
|
||||
r.accountID, r.projectID, r.query, r.timeRangeSecs, r.duration.Seconds())
|
||||
@ -136,21 +151,34 @@ func (qst *queryStatsTracker) registerQuery(accountID, projectID uint32, query s
|
||||
r.duration = duration
|
||||
}
|
||||
|
||||
func (qst *queryStatsTracker) getTopByCount(topN int, maxLifetime time.Duration) []queryStatByCount {
|
||||
func (r *queryStatRecord) matches(apFilter *accountProjectFilter, currentTime time.Time, maxLifetime time.Duration) bool {
|
||||
if r.query == "" || currentTime.Sub(r.registerTime) > maxLifetime {
|
||||
return false
|
||||
}
|
||||
if apFilter != nil && (apFilter.accountID != r.accountID || apFilter.projectID != r.projectID) {
|
||||
return false
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
func (r *queryStatRecord) key() queryStatKey {
|
||||
return queryStatKey{
|
||||
accountID: r.accountID,
|
||||
projectID: r.projectID,
|
||||
query: r.query,
|
||||
timeRangeSecs: r.timeRangeSecs,
|
||||
}
|
||||
}
|
||||
|
||||
func (qst *queryStatsTracker) getTopByCount(topN int, apFilter *accountProjectFilter, maxLifetime time.Duration) []queryStatByCount {
|
||||
currentTime := time.Now()
|
||||
qst.mu.Lock()
|
||||
m := make(map[queryStatKey]int)
|
||||
for _, r := range qst.a {
|
||||
if r.query == "" || currentTime.Sub(r.registerTime) > maxLifetime {
|
||||
continue
|
||||
if r.matches(apFilter, currentTime, maxLifetime) {
|
||||
k := r.key()
|
||||
m[k] = m[k] + 1
|
||||
}
|
||||
k := queryStatKey{
|
||||
accountID: r.accountID,
|
||||
projectID: r.projectID,
|
||||
query: r.query,
|
||||
timeRangeSecs: r.timeRangeSecs,
|
||||
}
|
||||
m[k] = m[k] + 1
|
||||
}
|
||||
qst.mu.Unlock()
|
||||
|
||||
@ -181,7 +209,7 @@ type queryStatByCount struct {
|
||||
count int
|
||||
}
|
||||
|
||||
func (qst *queryStatsTracker) getTopByAvgDuration(topN int, maxLifetime time.Duration) []queryStatByDuration {
|
||||
func (qst *queryStatsTracker) getTopByAvgDuration(topN int, apFilter *accountProjectFilter, maxLifetime time.Duration) []queryStatByDuration {
|
||||
currentTime := time.Now()
|
||||
qst.mu.Lock()
|
||||
type countSum struct {
|
||||
@ -190,19 +218,13 @@ func (qst *queryStatsTracker) getTopByAvgDuration(topN int, maxLifetime time.Dur
|
||||
}
|
||||
m := make(map[queryStatKey]countSum)
|
||||
for _, r := range qst.a {
|
||||
if r.query == "" || currentTime.Sub(r.registerTime) > maxLifetime {
|
||||
continue
|
||||
if r.matches(apFilter, currentTime, maxLifetime) {
|
||||
k := r.key()
|
||||
ks := m[k]
|
||||
ks.count++
|
||||
ks.sum += r.duration
|
||||
m[k] = ks
|
||||
}
|
||||
k := queryStatKey{
|
||||
accountID: r.accountID,
|
||||
projectID: r.projectID,
|
||||
query: r.query,
|
||||
timeRangeSecs: r.timeRangeSecs,
|
||||
}
|
||||
ks := m[k]
|
||||
ks.count++
|
||||
ks.sum += r.duration
|
||||
m[k] = ks
|
||||
}
|
||||
qst.mu.Unlock()
|
||||
|
||||
@ -233,21 +255,15 @@ type queryStatByDuration struct {
|
||||
duration time.Duration
|
||||
}
|
||||
|
||||
func (qst *queryStatsTracker) getTopBySumDuration(topN int, maxLifetime time.Duration) []queryStatByDuration {
|
||||
func (qst *queryStatsTracker) getTopBySumDuration(topN int, apFilter *accountProjectFilter, maxLifetime time.Duration) []queryStatByDuration {
|
||||
currentTime := time.Now()
|
||||
qst.mu.Lock()
|
||||
m := make(map[queryStatKey]time.Duration)
|
||||
for _, r := range qst.a {
|
||||
if r.query == "" || currentTime.Sub(r.registerTime) > maxLifetime {
|
||||
continue
|
||||
if r.matches(apFilter, currentTime, maxLifetime) {
|
||||
k := r.key()
|
||||
m[k] = m[k] + r.duration
|
||||
}
|
||||
k := queryStatKey{
|
||||
accountID: r.accountID,
|
||||
projectID: r.projectID,
|
||||
query: r.query,
|
||||
timeRangeSecs: r.timeRangeSecs,
|
||||
}
|
||||
m[k] = m[k] + r.duration
|
||||
}
|
||||
qst.mu.Unlock()
|
||||
|
||||
|
@ -198,6 +198,7 @@ or [an alternative dashboard for VictoriaMetrics cluster](https://grafana.com/gr
|
||||
and `YYYY-MM-DD` is the date for collecting the stats. By default the stats is collected for the current day.
|
||||
- `api/v1/status/active_queries` - for currently executed active queries. Note that every `vmselect` maintains an independent list of active queries,
|
||||
which is returned in the response.
|
||||
- `api/v1/status/top_queries` - for listing the most frequently executed queries and queries taking the most duration.
|
||||
|
||||
* URLs for [Graphite Metrics API](https://graphite-api.readthedocs.io/en/latest/api.html#the-metrics-api): `http://<vmselect>:8481/select/<accountID>/graphite/<suffix>`, where:
|
||||
- `<accountID>` is an arbitrary number identifying data namespace for query (aka tenant)
|
||||
@ -214,6 +215,8 @@ or [an alternative dashboard for VictoriaMetrics cluster](https://grafana.com/gr
|
||||
- `tags/autoComplete/values` - returns tag values matching the given `valuePrefix` and/or `expr`. See [these docs](https://graphite.readthedocs.io/en/stable/tags.html#auto-complete-support).
|
||||
- `tags/delSeries` - deletes series matching the given `path`. See [these docs](https://graphite.readthedocs.io/en/stable/tags.html#removing-series-from-the-tagdb).
|
||||
|
||||
* URL for query stats across all tenants: `http://<vmselect>:8481/api/v1/status/top_queries`. It lists with the most frequently executed queries and queries taking the most duration.
|
||||
|
||||
* URL for time series deletion: `http://<vmselect>:8481/delete/<accountID>/prometheus/api/v1/admin/tsdb/delete_series?match[]=<timeseries_selector_for_delete>`.
|
||||
Note that the `delete_series` handler should be used only in exceptional cases such as deletion of accidentally ingested incorrect time series. It shouldn't
|
||||
be used on a regular basis, since it carries non-zero overhead.
|
||||
|
Loading…
Reference in New Issue
Block a user