app/vmselect: add /api/v1/labels/count handler for quick detection of labels with the maximum number of distinct values

This commit is contained in:
Aliaksandr Valialkin 2019-06-10 18:55:20 +03:00
parent 547bcdce63
commit 75a0acf72d
7 changed files with 396 additions and 12 deletions

View File

@ -185,6 +185,15 @@ func selectHandler(w http.ResponseWriter, r *http.Request, p *httpserver.Path, a
return true
}
return true
case "prometheus/api/v1/labels/count":
labelsCountRequests.Inc()
httpserver.EnableCORS(w, r)
if err := prometheus.LabelsCountHandler(at, w, r); err != nil {
labelsCountErrors.Inc()
sendPrometheusError(w, r, err)
return true
}
return true
case "prometheus/api/v1/export":
exportRequests.Inc()
if err := prometheus.ExportHandler(at, w, r); err != nil {
@ -250,6 +259,9 @@ var (
labelsRequests = metrics.NewCounter(`vm_http_requests_total{path="/select/{}/prometheus/api/v1/labels"}`)
labelsErrors = metrics.NewCounter(`vm_http_request_errors_total{path="/select/{}/prometheus/api/v1/labels"}`)
labelsCountRequests = metrics.NewCounter(`vm_http_requests_total{path="/select/{}/prometheus/api/v1/labels/count"}`)
labelsCountErrors = metrics.NewCounter(`vm_http_request_errors_total{path="/select/{}/prometheus/api/v1/labels/count"}`)
deleteRequests = metrics.NewCounter(`vm_http_requests_total{path="/delete/{}/prometheus/api/v1/admin/tsdb/delete_series"}`)
deleteErrors = metrics.NewCounter(`vm_http_request_errors_total{path="/delete/{}/prometheus/api/v1/admin/tsdb/delete_series"}`)

View File

@ -506,7 +506,7 @@ func GetLabelValues(at *auth.Token, labelName string, deadline Deadline) ([]stri
if len(errors) > 0 {
if len(labelValues) == 0 {
// Return only the first error, since it has no sense in returning all errors.
return nil, true, fmt.Errorf("error occured during fetching labels: %s", errors[0])
return nil, true, fmt.Errorf("error occured during fetching label values: %s", errors[0])
}
// Just log errors and return partial results.
@ -514,11 +514,11 @@ func GetLabelValues(at *auth.Token, labelName string, deadline Deadline) ([]stri
// if certain storageNodes are temporarily unavailable.
partialLabelValuesResults.Inc()
// Log only the first error, since it has no sense in returning all errors.
logger.Errorf("certain storageNodes are unhealthy when fetching labels: %s", errors[0])
logger.Errorf("certain storageNodes are unhealthy when fetching label values: %s", errors[0])
isPartialResult = true
}
// Deduplicate labels
// Deduplicate label values
labelValues = deduplicateStrings(labelValues)
// Sort labelValues like Prometheus does
@ -527,6 +527,102 @@ func GetLabelValues(at *auth.Token, labelName string, deadline Deadline) ([]stri
return labelValues, isPartialResult, nil
}
// GetLabelEntries returns all the label entries for at until the given deadline.
func GetLabelEntries(at *auth.Token, deadline Deadline) ([]storage.TagEntry, bool, error) {
// Send the query to all the storage nodes in parallel.
type nodeResult struct {
labelEntries []storage.TagEntry
err error
}
resultsCh := make(chan nodeResult, len(storageNodes))
for _, sn := range storageNodes {
go func(sn *storageNode) {
sn.labelEntriesRequests.Inc()
labelEntries, err := sn.getLabelEntries(at.AccountID, at.ProjectID, deadline)
if err != nil {
sn.labelEntriesRequestErrors.Inc()
err = fmt.Errorf("cannot get label entries from vmstorage %s: %s", sn.connPool.Addr(), err)
}
resultsCh <- nodeResult{
labelEntries: labelEntries,
err: err,
}
}(sn)
}
// Collect results
var labelEntries []storage.TagEntry
var errors []error
for i := 0; i < len(storageNodes); i++ {
// There is no need in timer here, since all the goroutines executing
// sn.getLabelEntries must be finished until the deadline.
nr := <-resultsCh
if nr.err != nil {
errors = append(errors, nr.err)
continue
}
labelEntries = append(labelEntries, nr.labelEntries...)
}
isPartialResult := false
if len(errors) > 0 {
if len(labelEntries) == 0 {
// Return only the first error, since it has no sense in returning all errors.
return nil, true, fmt.Errorf("error occured during fetching label entries: %s", errors[0])
}
// Just log errors and return partial results.
// This allows gracefully degrade vmselect in the case
// if certain storageNodes are temporarily unavailable.
partialLabelEntriesResults.Inc()
// Log only the first error, since it has no sense in returning all errors.
logger.Errorf("certain storageNodes are unhealthy when fetching label entries: %s", errors[0])
isPartialResult = true
}
// Deduplicate label entries
labelEntries = deduplicateLabelEntries(labelEntries)
// Substitute "" with "__name__"
for i := range labelEntries {
e := &labelEntries[i]
if e.Key == "" {
e.Key = "__name__"
}
}
// Sort labelEntries by the number of label values in each entry.
sort.Slice(labelEntries, func(i, j int) bool {
a, b := labelEntries[i].Values, labelEntries[j].Values
if len(a) < len(b) {
return true
}
if len(a) > len(b) {
return false
}
return labelEntries[i].Key < labelEntries[j].Key
})
return labelEntries, isPartialResult, nil
}
func deduplicateLabelEntries(src []storage.TagEntry) []storage.TagEntry {
m := make(map[string][]string, len(src))
for i := range src {
e := &src[i]
m[e.Key] = append(m[e.Key], e.Values...)
}
dst := make([]storage.TagEntry, 0, len(m))
for key, values := range m {
values := deduplicateStrings(values)
sort.Strings(values)
dst = append(dst, storage.TagEntry{
Key: key,
Values: values,
})
}
return dst
}
func deduplicateStrings(a []string) []string {
m := make(map[string]bool, len(a))
for _, s := range a {
@ -707,6 +803,12 @@ type storageNode struct {
// The number of errors during requests to labelValues.
labelValuesRequestErrors *metrics.Counter
// The number of requests to labelEntries.
labelEntriesRequests *metrics.Counter
// The number of errors during requests to labelEntries.
labelEntriesRequestErrors *metrics.Counter
// The number of requests to seriesCount.
seriesCountRequests *metrics.Counter
@ -786,6 +888,26 @@ func (sn *storageNode) getLabelValues(accountID, projectID uint32, labelName str
return labelValues, nil
}
func (sn *storageNode) getLabelEntries(accountID, projectID uint32, deadline Deadline) ([]storage.TagEntry, error) {
var tagEntries []storage.TagEntry
f := func(bc *handshake.BufferedConn) error {
tes, err := sn.getLabelEntriesOnConn(bc, accountID, projectID)
if err != nil {
return err
}
tagEntries = tes
return nil
}
if err := sn.execOnConn("labelEntries", f, deadline); err != nil {
// Try again before giving up.
tagEntries = nil
if err = sn.execOnConn("labelEntries", f, deadline); err != nil {
return nil, err
}
}
return tagEntries, nil
}
func (sn *storageNode) getSeriesCount(accountID, projectID uint32, deadline Deadline) (uint64, error) {
var n uint64
f := func(bc *handshake.BufferedConn) error {
@ -903,7 +1025,7 @@ func (sn *storageNode) deleteMetricsOnConn(bc *handshake.BufferedConn, requestDa
return int(deletedCount), nil
}
const maxLabelsSize = 16 * 1024 * 1024
const maxLabelSize = 16 * 1024 * 1024
func (sn *storageNode) getLabelsOnConn(bc *handshake.BufferedConn, accountID, projectID uint32) ([]string, error) {
// Send the request to sn.
@ -929,7 +1051,7 @@ func (sn *storageNode) getLabelsOnConn(bc *handshake.BufferedConn, accountID, pr
// Read response
var labels []string
for {
buf, err = readBytes(buf[:0], bc, maxLabelsSize)
buf, err = readBytes(buf[:0], bc, maxLabelSize)
if err != nil {
return nil, fmt.Errorf("cannot read labels: %s", err)
}
@ -968,20 +1090,74 @@ func (sn *storageNode) getLabelValuesOnConn(bc *handshake.BufferedConn, accountI
}
// Read response
labelValues, _, err := readLabelValues(buf, bc)
if err != nil {
return nil, err
}
return labelValues, nil
}
func readLabelValues(buf []byte, bc *handshake.BufferedConn) ([]string, []byte, error) {
var labelValues []string
for {
var err error
buf, err = readBytes(buf[:0], bc, maxLabelValueSize)
if err != nil {
return nil, fmt.Errorf("cannot read labelValue: %s", err)
return nil, buf, fmt.Errorf("cannot read labelValue: %s", err)
}
if len(buf) == 0 {
// Reached the end of the response
return labelValues, nil
return labelValues, buf, nil
}
labelValues = append(labelValues, string(buf))
}
}
func (sn *storageNode) getLabelEntriesOnConn(bc *handshake.BufferedConn, accountID, projectID uint32) ([]storage.TagEntry, error) {
// Send the request to sn.
if err := writeUint32(bc, accountID); err != nil {
return nil, fmt.Errorf("cannot send accountID=%d to conn: %s", accountID, err)
}
if err := writeUint32(bc, projectID); err != nil {
return nil, fmt.Errorf("cannot send projectID=%d to conn: %s", projectID, err)
}
if err := bc.Flush(); err != nil {
return nil, fmt.Errorf("cannot flush request to conn: %s", err)
}
// Read response error.
buf, err := readBytes(nil, bc, maxErrorMessageSize)
if err != nil {
return nil, fmt.Errorf("cannot read error message: %s", err)
}
if len(buf) > 0 {
return nil, &errRemote{msg: string(buf)}
}
// Read response
var labelEntries []storage.TagEntry
for {
buf, err = readBytes(buf[:0], bc, maxLabelSize)
if err != nil {
return nil, fmt.Errorf("cannot read label: %s", err)
}
if len(buf) == 0 {
// Reached the end of the response
return labelEntries, nil
}
label := string(buf)
var values []string
values, buf, err = readLabelValues(buf, bc)
if err != nil {
return nil, fmt.Errorf("cannot read values for label %q: %s", label, err)
}
labelEntries = append(labelEntries, storage.TagEntry{
Key: label,
Values: values,
})
}
}
func (sn *storageNode) getSeriesCountOnConn(bc *handshake.BufferedConn, accountID, projectID uint32) (uint64, error) {
// Send the request to sn.
if err := writeUint32(bc, accountID); err != nil {
@ -1135,6 +1311,8 @@ func InitStorageNodes(addrs []string) {
labelsRequestErrors: metrics.NewCounter(fmt.Sprintf(`vm_request_errors_total{action="labels", type="rpcClient", name="vmselect", addr=%q}`, addr)),
labelValuesRequests: metrics.NewCounter(fmt.Sprintf(`vm_requests_total{action="labelValues", type="rpcClient", name="vmselect", addr=%q}`, addr)),
labelValuesRequestErrors: metrics.NewCounter(fmt.Sprintf(`vm_request_errors_total{action="labelValues", type="rpcClient", name="vmselect", addr=%q}`, addr)),
labelEntriesRequests: metrics.NewCounter(fmt.Sprintf(`vm_requests_total{action="labelEntries", type="rpcClient", name="vmselect", addr=%q}`, addr)),
labelEntriesRequestErrors: metrics.NewCounter(fmt.Sprintf(`vm_request_errors_total{action="labelEntries", type="rpcClient", name="vmselect", addr=%q}`, addr)),
seriesCountRequests: metrics.NewCounter(fmt.Sprintf(`vm_requests_total{action="seriesCount", type="rpcClient", name="vmselect", addr=%q}`, addr)),
seriesCountRequestErrors: metrics.NewCounter(fmt.Sprintf(`vm_request_errors_total{action="seriesCount", type="rpcClient", name="vmselect", addr=%q}`, addr)),
searchRequests: metrics.NewCounter(fmt.Sprintf(`vm_requests_total{action="search", type="rpcClient", name="vmselect", addr=%q}`, addr)),
@ -1155,10 +1333,11 @@ func Stop() {
}
var (
partialLabelsResults = metrics.NewCounter(`vm_partial_labels_results_total{name="vmselect"}`)
partialLabelValuesResults = metrics.NewCounter(`vm_partial_label_values_results_total{name="vmselect"}`)
partialSeriesCountResults = metrics.NewCounter(`vm_partial_series_count_results_total{name="vmselect"}`)
partialSearchResults = metrics.NewCounter(`vm_partial_search_results_total{name="vmselect"}`)
partialLabelsResults = metrics.NewCounter(`vm_partial_labels_results_total{name="vmselect"}`)
partialLabelValuesResults = metrics.NewCounter(`vm_partial_label_values_results_total{name="vmselect"}`)
partialLabelEntriesResults = metrics.NewCounter(`vm_partial_label_entries_results_total{name="vmselect"}`)
partialSeriesCountResults = metrics.NewCounter(`vm_partial_series_count_results_total{name="vmselect"}`)
partialSearchResults = metrics.NewCounter(`vm_partial_search_results_total{name="vmselect"}`)
)
// The maximum number of concurrent queries per storageNode.

View File

@ -0,0 +1,17 @@
{% import "github.com/VictoriaMetrics/VictoriaMetrics/lib/storage" %}
{% stripspace %}
LabelsCountResponse generates response for /api/v1/label_entries .
{% func LabelsCountResponse(labelEntries []storage.TagEntry) %}
{
"status":"success",
"data":{
{% for i, e := range labelEntries %}
{%q= e.Key %}:{%d= len(e.Values) %}
{% if i+1 < len(labelEntries) %},{% endif %}
{% endfor %}
}
}
{% endfunc %}
{% endstripspace %}

View File

@ -0,0 +1,74 @@
// Code generated by qtc from "labels_count_response.qtpl". DO NOT EDIT.
// See https://github.com/valyala/quicktemplate for details.
//line app/vmselect/prometheus/labels_count_response.qtpl:1
package prometheus
//line app/vmselect/prometheus/labels_count_response.qtpl:1
import "github.com/VictoriaMetrics/VictoriaMetrics/lib/storage"
// LabelsCountResponse generates response for /api/v1/label_entries .
//line app/vmselect/prometheus/labels_count_response.qtpl:5
import (
qtio422016 "io"
qt422016 "github.com/valyala/quicktemplate"
)
//line app/vmselect/prometheus/labels_count_response.qtpl:5
var (
_ = qtio422016.Copy
_ = qt422016.AcquireByteBuffer
)
//line app/vmselect/prometheus/labels_count_response.qtpl:5
func StreamLabelsCountResponse(qw422016 *qt422016.Writer, labelEntries []storage.TagEntry) {
//line app/vmselect/prometheus/labels_count_response.qtpl:5
qw422016.N().S(`{"status":"success","data":{`)
//line app/vmselect/prometheus/labels_count_response.qtpl:9
for i, e := range labelEntries {
//line app/vmselect/prometheus/labels_count_response.qtpl:10
qw422016.N().Q(e.Key)
//line app/vmselect/prometheus/labels_count_response.qtpl:10
qw422016.N().S(`:`)
//line app/vmselect/prometheus/labels_count_response.qtpl:10
qw422016.N().D(len(e.Values))
//line app/vmselect/prometheus/labels_count_response.qtpl:11
if i+1 < len(labelEntries) {
//line app/vmselect/prometheus/labels_count_response.qtpl:11
qw422016.N().S(`,`)
//line app/vmselect/prometheus/labels_count_response.qtpl:11
}
//line app/vmselect/prometheus/labels_count_response.qtpl:12
}
//line app/vmselect/prometheus/labels_count_response.qtpl:12
qw422016.N().S(`}}`)
//line app/vmselect/prometheus/labels_count_response.qtpl:15
}
//line app/vmselect/prometheus/labels_count_response.qtpl:15
func WriteLabelsCountResponse(qq422016 qtio422016.Writer, labelEntries []storage.TagEntry) {
//line app/vmselect/prometheus/labels_count_response.qtpl:15
qw422016 := qt422016.AcquireWriter(qq422016)
//line app/vmselect/prometheus/labels_count_response.qtpl:15
StreamLabelsCountResponse(qw422016, labelEntries)
//line app/vmselect/prometheus/labels_count_response.qtpl:15
qt422016.ReleaseWriter(qw422016)
//line app/vmselect/prometheus/labels_count_response.qtpl:15
}
//line app/vmselect/prometheus/labels_count_response.qtpl:15
func LabelsCountResponse(labelEntries []storage.TagEntry) string {
//line app/vmselect/prometheus/labels_count_response.qtpl:15
qb422016 := qt422016.AcquireByteBuffer()
//line app/vmselect/prometheus/labels_count_response.qtpl:15
WriteLabelsCountResponse(qb422016, labelEntries)
//line app/vmselect/prometheus/labels_count_response.qtpl:15
qs422016 := string(qb422016.B)
//line app/vmselect/prometheus/labels_count_response.qtpl:15
qt422016.ReleaseByteBuffer(qb422016)
//line app/vmselect/prometheus/labels_count_response.qtpl:15
return qs422016
//line app/vmselect/prometheus/labels_count_response.qtpl:15
}

View File

@ -289,6 +289,23 @@ func LabelValuesHandler(at *auth.Token, labelName string, w http.ResponseWriter,
var labelValuesDuration = metrics.NewSummary(`vm_request_duration_seconds{path="/api/v1/label/{}/values"}`)
// LabelsCountHandler processes /api/v1/labels/count request.
func LabelsCountHandler(at *auth.Token, w http.ResponseWriter, r *http.Request) error {
startTime := time.Now()
deadline := getDeadline(r)
labelEntries, _, err := netstorage.GetLabelEntries(at, deadline)
if err != nil {
return fmt.Errorf(`cannot obtain label entries: %s`, err)
}
w.Header().Set("Content-Type", "application/json")
WriteLabelsCountResponse(w, labelEntries)
labelsCountDuration.UpdateDuration(startTime)
return nil
}
var labelsCountDuration = metrics.NewSummary(`vm_request_duration_seconds{path="/api/v1/labels/count"}`)
// LabelsHandler processes /api/v1/labels request.
//
// See https://prometheus.io/docs/prometheus/latest/querying/api/#getting-label-names

View File

@ -447,6 +447,8 @@ func (s *Server) processVMSelectRequest(ctx *vmselectRequestCtx) error {
return s.processVMSelectSearchQuery(ctx)
case "labelValues":
return s.processVMSelectLabelValues(ctx)
case "labelEntries":
return s.processVMSelectLabelEntries(ctx)
case "labels":
return s.processVMSelectLabels(ctx)
case "seriesCount":
@ -587,7 +589,10 @@ func (s *Server) processVMSelectLabelValues(ctx *vmselectRequestCtx) error {
return fmt.Errorf("cannot send empty error message: %s", err)
}
// Send labelValues to vmselect
return writeLabelValues(ctx, labelValues)
}
func writeLabelValues(ctx *vmselectRequestCtx, labelValues []string) error {
for _, labelValue := range labelValues {
if len(labelValue) == 0 {
// Skip empty label values, since they have no sense for prometheus.
@ -597,6 +602,52 @@ func (s *Server) processVMSelectLabelValues(ctx *vmselectRequestCtx) error {
return fmt.Errorf("cannot write labelValue %q: %s", labelValue, err)
}
}
// Send 'end of label values' marker
if err := ctx.writeString(""); err != nil {
return fmt.Errorf("cannot send 'end of response' marker")
}
return nil
}
func (s *Server) processVMSelectLabelEntries(ctx *vmselectRequestCtx) error {
vmselectLabelEntriesRequests.Inc()
// Read request
accountID, err := ctx.readUint32()
if err != nil {
return fmt.Errorf("cannot read accountID: %s", err)
}
projectID, err := ctx.readUint32()
if err != nil {
return fmt.Errorf("cannot read projectID: %s", err)
}
// Perform the request
labelEntries, err := s.storage.SearchTagEntries(accountID, projectID, *maxTagKeysPerSearch, *maxTagValuesPerSearch)
if err != nil {
// Send the error message to vmselect.
errMsg := fmt.Sprintf("error during label entries search: %s", err)
if err := ctx.writeString(errMsg); err != nil {
return fmt.Errorf("cannot send error message: %s", err)
}
return nil
}
// Send an empty error message to vmselect.
if err := ctx.writeString(""); err != nil {
return fmt.Errorf("cannot send empty error message: %s", err)
}
// Send labelEntries to vmselect
for i := range labelEntries {
e := &labelEntries[i]
if err := ctx.writeString(e.Key); err != nil {
return fmt.Errorf("cannot write label %q: %s", e.Key, err)
}
if err := writeLabelValues(ctx, e.Values); err != nil {
return fmt.Errorf("cannot write label values for %q: %s", e.Key, err)
}
}
// Send 'end of response' marker
if err := ctx.writeString(""); err != nil {
@ -715,6 +766,7 @@ var (
vmselectDeleteMetricsRequests = metrics.NewCounter("vm_vmselect_delete_metrics_requests_total")
vmselectLabelsRequests = metrics.NewCounter("vm_vmselect_labels_requests_total")
vmselectLabelValuesRequests = metrics.NewCounter("vm_vmselect_label_values_requests_total")
vmselectLabelEntriesRequests = metrics.NewCounter("vm_vmselect_label_entries_requests_total")
vmselectSeriesCountRequests = metrics.NewCounter("vm_vmselect_series_count_requests_total")
vmselectSearchQueryRequests = metrics.NewCounter("vm_vmselect_search_query_requests_total")
vmselectMetricBlocksRead = metrics.NewCounter("vm_vmselect_metric_blocks_read_total")

View File

@ -516,6 +516,39 @@ func (s *Storage) SearchTagValues(accountID, projectID uint32, tagKey []byte, ma
return s.idb().SearchTagValues(accountID, projectID, tagKey, maxTagValues)
}
// SearchTagEntries returns a list of (tagName -> tagValues) for (accountID, projectID).
func (s *Storage) SearchTagEntries(accountID, projectID uint32, maxTagKeys, maxTagValues int) ([]TagEntry, error) {
idb := s.idb()
keys, err := idb.SearchTagKeys(accountID, projectID, maxTagKeys)
if err != nil {
return nil, fmt.Errorf("cannot search tag keys: %s", err)
}
// Sort keys for faster seeks below
sort.Strings(keys)
tes := make([]TagEntry, len(keys))
for i, key := range keys {
values, err := idb.SearchTagValues(accountID, projectID, []byte(key), maxTagValues)
if err != nil {
return nil, fmt.Errorf("cannot search values for tag %q: %s", key, err)
}
te := &tes[i]
te.Key = key
te.Values = values
}
return tes, nil
}
// TagEntry contains (tagName -> tagValues) mapping
type TagEntry struct {
// Key is tagName
Key string
// Values contains all the values for Key.
Values []string
}
// GetSeriesCount returns the approximate number of unique time series for the given (accountID, projectID).
//
// It includes the deleted series too and may count the same series