From 12ac255dae8956ca395e3ae7d61053024448e810 Mon Sep 17 00:00:00 2001 From: Aliaksandr Valialkin Date: Wed, 8 Jun 2022 21:05:17 +0300 Subject: [PATCH] lib/querytracer: make it easier to use by passing trace context message to New and NewChild The context message can be extended by calling Donef. If there is no need to extend the message, then just call Done. --- README.md | 2 +- app/vmselect/main.go | 2 +- app/vmselect/netstorage/netstorage.go | 64 +++++++++---------- .../prometheus/label_values_response.qtpl | 4 +- .../prometheus/label_values_response.qtpl.go | 12 ++-- app/vmselect/prometheus/labels_response.qtpl | 4 +- .../prometheus/labels_response.qtpl.go | 12 ++-- app/vmselect/prometheus/prometheus.go | 24 +++---- app/vmselect/promql/eval.go | 39 +++++------ app/vmselect/promql/rollup_result_cache.go | 8 +-- docs/README.md | 2 +- docs/Single-server-VictoriaMetrics.md | 2 +- lib/querytracer/tracer.go | 52 ++++++++++----- lib/querytracer/tracer_test.go | 46 ++++++------- lib/storage/search.go | 4 +- lib/storage/storage.go | 12 ++-- 16 files changed, 151 insertions(+), 138 deletions(-) diff --git a/README.md b/README.md index d126a4861..084367ee3 100644 --- a/README.md +++ b/README.md @@ -1425,7 +1425,7 @@ In this case VictoriaMetrics puts query trace into `trace` field in the output J For example, the following command: ```bash -curl http://localhost:8428/api/v1/query_range -d 'query=2*rand()' -d 'start=-1h' -d 'step=1m' -d 'trace=1' | jq -r '.trace' +curl http://localhost:8428/api/v1/query_range -d 'query=2*rand()' -d 'start=-1h' -d 'step=1m' -d 'trace=1' | jq '.trace' ``` would return the following trace: diff --git a/app/vmselect/main.go b/app/vmselect/main.go index 87e205ef5..2ff484b51 100644 --- a/app/vmselect/main.go +++ b/app/vmselect/main.go @@ -93,7 +93,7 @@ func RequestHandler(w http.ResponseWriter, r *http.Request) bool { startTime := time.Now() defer requestDuration.UpdateDuration(startTime) tracerEnabled := searchutils.GetBool(r, "trace") - qt := querytracer.New(tracerEnabled) + qt := querytracer.New(tracerEnabled, r.URL.Path) // Limit the number of concurrent queries. select { diff --git a/app/vmselect/netstorage/netstorage.go b/app/vmselect/netstorage/netstorage.go index 533c85aff..86046011f 100644 --- a/app/vmselect/netstorage/netstorage.go +++ b/app/vmselect/netstorage/netstorage.go @@ -194,7 +194,7 @@ var resultPool sync.Pool // // rss becomes unusable after the call to RunParallel. func (rss *Results) RunParallel(qt *querytracer.Tracer, f func(rs *Result, workerID uint) error) error { - qt = qt.NewChild() + qt = qt.NewChild("parallel process of fetched data") defer rss.mustClose() // Spin up local workers. @@ -256,7 +256,7 @@ func (rss *Results) RunParallel(qt *querytracer.Tracer, f func(rs *Result, worke close(workCh) } workChsWG.Wait() - qt.Donef("parallel process of fetched data: series=%d, samples=%d", seriesProcessedTotal, rowsProcessedTotal) + qt.Donef("series=%d, samples=%d", seriesProcessedTotal, rowsProcessedTotal) return firstErr } @@ -598,8 +598,8 @@ func (sbh *sortBlocksHeap) Pop() interface{} { // DeleteSeries deletes time series matching the given tagFilterss. func DeleteSeries(qt *querytracer.Tracer, sq *storage.SearchQuery, deadline searchutils.Deadline) (int, error) { - qt = qt.NewChild() - defer qt.Donef("delete series: %s", sq) + qt = qt.NewChild("delete series: %s", sq) + defer qt.Done() tr := storage.TimeRange{ MinTimestamp: sq.MinTimestamp, MaxTimestamp: sq.MaxTimestamp, @@ -613,8 +613,8 @@ func DeleteSeries(qt *querytracer.Tracer, sq *storage.SearchQuery, deadline sear // GetLabelsOnTimeRange returns labels for the given tr until the given deadline. func GetLabelsOnTimeRange(qt *querytracer.Tracer, tr storage.TimeRange, deadline searchutils.Deadline) ([]string, error) { - qt = qt.NewChild() - defer qt.Donef("get labels on timeRange=%s", &tr) + qt = qt.NewChild("get labels on timeRange=%s", &tr) + defer qt.Done() if deadline.Exceeded() { return nil, fmt.Errorf("timeout exceeded before starting the query processing: %s", deadline.String()) } @@ -637,8 +637,8 @@ func GetLabelsOnTimeRange(qt *querytracer.Tracer, tr storage.TimeRange, deadline // GetGraphiteTags returns Graphite tags until the given deadline. func GetGraphiteTags(qt *querytracer.Tracer, filter string, limit int, deadline searchutils.Deadline) ([]string, error) { - qt = qt.NewChild() - defer qt.Donef("get graphite tags: filter=%s, limit=%d", filter, limit) + qt = qt.NewChild("get graphite tags: filter=%s, limit=%d", filter, limit) + defer qt.Done() if deadline.Exceeded() { return nil, fmt.Errorf("timeout exceeded before starting the query processing: %s", deadline.String()) } @@ -684,8 +684,8 @@ func hasString(a []string, s string) bool { // GetLabels returns labels until the given deadline. func GetLabels(qt *querytracer.Tracer, deadline searchutils.Deadline) ([]string, error) { - qt = qt.NewChild() - defer qt.Donef("get labels") + qt = qt.NewChild("get labels") + defer qt.Done() if deadline.Exceeded() { return nil, fmt.Errorf("timeout exceeded before starting the query processing: %s", deadline.String()) } @@ -709,8 +709,8 @@ func GetLabels(qt *querytracer.Tracer, deadline searchutils.Deadline) ([]string, // GetLabelValuesOnTimeRange returns label values for the given labelName on the given tr // until the given deadline. func GetLabelValuesOnTimeRange(qt *querytracer.Tracer, labelName string, tr storage.TimeRange, deadline searchutils.Deadline) ([]string, error) { - qt = qt.NewChild() - defer qt.Donef("get values for label %s on a timeRange %s", labelName, &tr) + qt = qt.NewChild("get values for label %s on a timeRange %s", labelName, &tr) + defer qt.Done() if deadline.Exceeded() { return nil, fmt.Errorf("timeout exceeded before starting the query processing: %s", deadline.String()) } @@ -731,8 +731,8 @@ func GetLabelValuesOnTimeRange(qt *querytracer.Tracer, labelName string, tr stor // GetGraphiteTagValues returns tag values for the given tagName until the given deadline. func GetGraphiteTagValues(qt *querytracer.Tracer, tagName, filter string, limit int, deadline searchutils.Deadline) ([]string, error) { - qt = qt.NewChild() - defer qt.Donef("get graphite tag values for tagName=%s, filter=%s, limit=%d", tagName, filter, limit) + qt = qt.NewChild("get graphite tag values for tagName=%s, filter=%s, limit=%d", tagName, filter, limit) + defer qt.Done() if deadline.Exceeded() { return nil, fmt.Errorf("timeout exceeded before starting the query processing: %s", deadline.String()) } @@ -758,8 +758,8 @@ func GetGraphiteTagValues(qt *querytracer.Tracer, tagName, filter string, limit // GetLabelValues returns label values for the given labelName // until the given deadline. func GetLabelValues(qt *querytracer.Tracer, labelName string, deadline searchutils.Deadline) ([]string, error) { - qt = qt.NewChild() - defer qt.Donef("get values for label %s", labelName) + qt = qt.NewChild("get values for label %s", labelName) + defer qt.Done() if deadline.Exceeded() { return nil, fmt.Errorf("timeout exceeded before starting the query processing: %s", deadline.String()) } @@ -782,8 +782,8 @@ func GetLabelValues(qt *querytracer.Tracer, labelName string, deadline searchuti // // It can be used for implementing https://graphite-api.readthedocs.io/en/latest/api.html#metrics-find func GetTagValueSuffixes(qt *querytracer.Tracer, tr storage.TimeRange, tagKey, tagValuePrefix string, delimiter byte, deadline searchutils.Deadline) ([]string, error) { - qt = qt.NewChild() - defer qt.Donef("get tag value suffixes for tagKey=%s, tagValuePrefix=%s, timeRange=%s", tagKey, tagValuePrefix, &tr) + qt = qt.NewChild("get tag value suffixes for tagKey=%s, tagValuePrefix=%s, timeRange=%s", tagKey, tagValuePrefix, &tr) + defer qt.Done() if deadline.Exceeded() { return nil, fmt.Errorf("timeout exceeded before starting the query processing: %s", deadline.String()) } @@ -802,8 +802,8 @@ func GetTagValueSuffixes(qt *querytracer.Tracer, tr storage.TimeRange, tagKey, t // GetLabelEntries returns all the label entries until the given deadline. func GetLabelEntries(qt *querytracer.Tracer, deadline searchutils.Deadline) ([]storage.TagEntry, error) { - qt = qt.NewChild() - defer qt.Donef("get label entries") + qt = qt.NewChild("get label entries") + defer qt.Done() if deadline.Exceeded() { return nil, fmt.Errorf("timeout exceeded before starting the query processing: %s", deadline.String()) } @@ -836,8 +836,8 @@ func GetLabelEntries(qt *querytracer.Tracer, deadline searchutils.Deadline) ([]s // GetTSDBStatusForDate returns tsdb status according to https://prometheus.io/docs/prometheus/latest/querying/api/#tsdb-stats func GetTSDBStatusForDate(qt *querytracer.Tracer, deadline searchutils.Deadline, date uint64, topN, maxMetrics int) (*storage.TSDBStatus, error) { - qt = qt.NewChild() - defer qt.Donef("get tsdb stats for date=%d, topN=%d", date, topN) + qt = qt.NewChild("get tsdb stats for date=%d, topN=%d", date, topN) + defer qt.Done() if deadline.Exceeded() { return nil, fmt.Errorf("timeout exceeded before starting the query processing: %s", deadline.String()) } @@ -852,8 +852,8 @@ func GetTSDBStatusForDate(qt *querytracer.Tracer, deadline searchutils.Deadline, // // It accepts aribtrary filters on time series in sq. func GetTSDBStatusWithFilters(qt *querytracer.Tracer, deadline searchutils.Deadline, sq *storage.SearchQuery, topN int) (*storage.TSDBStatus, error) { - qt = qt.NewChild() - defer qt.Donef("get tsdb stats: %s, topN=%d", sq, topN) + qt = qt.NewChild("get tsdb stats: %s, topN=%d", sq, topN) + defer qt.Done() if deadline.Exceeded() { return nil, fmt.Errorf("timeout exceeded before starting the query processing: %s", deadline.String()) } @@ -875,8 +875,8 @@ func GetTSDBStatusWithFilters(qt *querytracer.Tracer, deadline searchutils.Deadl // GetSeriesCount returns the number of unique series. func GetSeriesCount(qt *querytracer.Tracer, deadline searchutils.Deadline) (uint64, error) { - qt = qt.NewChild() - defer qt.Donef("get series count") + qt = qt.NewChild("get series count") + defer qt.Done() if deadline.Exceeded() { return 0, fmt.Errorf("timeout exceeded before starting the query processing: %s", deadline.String()) } @@ -909,8 +909,8 @@ var ssPool sync.Pool // It is the responsibility of f to call b.UnmarshalData before reading timestamps and values from the block. // It is the responsibility of f to filter blocks according to the given tr. func ExportBlocks(qt *querytracer.Tracer, sq *storage.SearchQuery, deadline searchutils.Deadline, f func(mn *storage.MetricName, b *storage.Block, tr storage.TimeRange) error) error { - qt = qt.NewChild() - defer qt.Donef("export blocks: %s", sq) + qt = qt.NewChild("export blocks: %s", sq) + defer qt.Done() if deadline.Exceeded() { return fmt.Errorf("timeout exceeded before starting data export: %s", deadline.String()) } @@ -1021,8 +1021,8 @@ var exportWorkPool = &sync.Pool{ // SearchMetricNames returns all the metric names matching sq until the given deadline. func SearchMetricNames(qt *querytracer.Tracer, sq *storage.SearchQuery, deadline searchutils.Deadline) ([]storage.MetricName, error) { - qt = qt.NewChild() - defer qt.Donef("fetch metric names: %s", sq) + qt = qt.NewChild("fetch metric names: %s", sq) + defer qt.Done() if deadline.Exceeded() { return nil, fmt.Errorf("timeout exceeded before starting to search metric names: %s", deadline.String()) } @@ -1051,8 +1051,8 @@ func SearchMetricNames(qt *querytracer.Tracer, sq *storage.SearchQuery, deadline // // Results.RunParallel or Results.Cancel must be called on the returned Results. func ProcessSearchQuery(qt *querytracer.Tracer, sq *storage.SearchQuery, fetchData bool, deadline searchutils.Deadline) (*Results, error) { - qt = qt.NewChild() - defer qt.Donef("fetch matching series: %s, fetchData=%v", sq, fetchData) + qt = qt.NewChild("fetch matching series: %s, fetchData=%v", sq, fetchData) + defer qt.Done() if deadline.Exceeded() { return nil, fmt.Errorf("timeout exceeded before starting the query processing: %s", deadline.String()) } diff --git a/app/vmselect/prometheus/label_values_response.qtpl b/app/vmselect/prometheus/label_values_response.qtpl index 24bfbb72b..29d539dcf 100644 --- a/app/vmselect/prometheus/label_values_response.qtpl +++ b/app/vmselect/prometheus/label_values_response.qtpl @@ -6,7 +6,7 @@ LabelValuesResponse generates response for /api/v1/label//values . See https://prometheus.io/docs/prometheus/latest/querying/api/#querying-label-values -{% func LabelValuesResponse(labelValues []string, qt *querytracer.Tracer, qtDone func()) %} +{% func LabelValuesResponse(labelValues []string, qt *querytracer.Tracer) %} { "status":"success", "data":[ @@ -17,7 +17,7 @@ See https://prometheus.io/docs/prometheus/latest/querying/api/#querying-label-va ] {% code qt.Printf("generate response for %d label values", len(labelValues)) - qtDone() + qt.Done() %} {%= dumpQueryTrace(qt) %} } diff --git a/app/vmselect/prometheus/label_values_response.qtpl.go b/app/vmselect/prometheus/label_values_response.qtpl.go index b8757ce16..aca097828 100644 --- a/app/vmselect/prometheus/label_values_response.qtpl.go +++ b/app/vmselect/prometheus/label_values_response.qtpl.go @@ -25,7 +25,7 @@ var ( ) //line app/vmselect/prometheus/label_values_response.qtpl:9 -func StreamLabelValuesResponse(qw422016 *qt422016.Writer, labelValues []string, qt *querytracer.Tracer, qtDone func()) { +func StreamLabelValuesResponse(qw422016 *qt422016.Writer, labelValues []string, qt *querytracer.Tracer) { //line app/vmselect/prometheus/label_values_response.qtpl:9 qw422016.N().S(`{"status":"success","data":[`) //line app/vmselect/prometheus/label_values_response.qtpl:13 @@ -44,7 +44,7 @@ func StreamLabelValuesResponse(qw422016 *qt422016.Writer, labelValues []string, qw422016.N().S(`]`) //line app/vmselect/prometheus/label_values_response.qtpl:19 qt.Printf("generate response for %d label values", len(labelValues)) - qtDone() + qt.Done() //line app/vmselect/prometheus/label_values_response.qtpl:22 streamdumpQueryTrace(qw422016, qt) @@ -54,22 +54,22 @@ func StreamLabelValuesResponse(qw422016 *qt422016.Writer, labelValues []string, } //line app/vmselect/prometheus/label_values_response.qtpl:24 -func WriteLabelValuesResponse(qq422016 qtio422016.Writer, labelValues []string, qt *querytracer.Tracer, qtDone func()) { +func WriteLabelValuesResponse(qq422016 qtio422016.Writer, labelValues []string, qt *querytracer.Tracer) { //line app/vmselect/prometheus/label_values_response.qtpl:24 qw422016 := qt422016.AcquireWriter(qq422016) //line app/vmselect/prometheus/label_values_response.qtpl:24 - StreamLabelValuesResponse(qw422016, labelValues, qt, qtDone) + StreamLabelValuesResponse(qw422016, labelValues, qt) //line app/vmselect/prometheus/label_values_response.qtpl:24 qt422016.ReleaseWriter(qw422016) //line app/vmselect/prometheus/label_values_response.qtpl:24 } //line app/vmselect/prometheus/label_values_response.qtpl:24 -func LabelValuesResponse(labelValues []string, qt *querytracer.Tracer, qtDone func()) string { +func LabelValuesResponse(labelValues []string, qt *querytracer.Tracer) string { //line app/vmselect/prometheus/label_values_response.qtpl:24 qb422016 := qt422016.AcquireByteBuffer() //line app/vmselect/prometheus/label_values_response.qtpl:24 - WriteLabelValuesResponse(qb422016, labelValues, qt, qtDone) + WriteLabelValuesResponse(qb422016, labelValues, qt) //line app/vmselect/prometheus/label_values_response.qtpl:24 qs422016 := string(qb422016.B) //line app/vmselect/prometheus/label_values_response.qtpl:24 diff --git a/app/vmselect/prometheus/labels_response.qtpl b/app/vmselect/prometheus/labels_response.qtpl index 9db265c3b..4e5ab6279 100644 --- a/app/vmselect/prometheus/labels_response.qtpl +++ b/app/vmselect/prometheus/labels_response.qtpl @@ -6,7 +6,7 @@ LabelsResponse generates response for /api/v1/labels . See https://prometheus.io/docs/prometheus/latest/querying/api/#getting-label-names -{% func LabelsResponse(labels []string, qt *querytracer.Tracer, qtDone func()) %} +{% func LabelsResponse(labels []string, qt *querytracer.Tracer) %} { "status":"success", "data":[ @@ -17,7 +17,7 @@ See https://prometheus.io/docs/prometheus/latest/querying/api/#getting-label-nam ] {% code qt.Printf("generate response for %d labels", len(labels)) - qtDone() + qt.Done() %} {%= dumpQueryTrace(qt) %} } diff --git a/app/vmselect/prometheus/labels_response.qtpl.go b/app/vmselect/prometheus/labels_response.qtpl.go index d7540d5d5..8c6853bb3 100644 --- a/app/vmselect/prometheus/labels_response.qtpl.go +++ b/app/vmselect/prometheus/labels_response.qtpl.go @@ -25,7 +25,7 @@ var ( ) //line app/vmselect/prometheus/labels_response.qtpl:9 -func StreamLabelsResponse(qw422016 *qt422016.Writer, labels []string, qt *querytracer.Tracer, qtDone func()) { +func StreamLabelsResponse(qw422016 *qt422016.Writer, labels []string, qt *querytracer.Tracer) { //line app/vmselect/prometheus/labels_response.qtpl:9 qw422016.N().S(`{"status":"success","data":[`) //line app/vmselect/prometheus/labels_response.qtpl:13 @@ -44,7 +44,7 @@ func StreamLabelsResponse(qw422016 *qt422016.Writer, labels []string, qt *queryt qw422016.N().S(`]`) //line app/vmselect/prometheus/labels_response.qtpl:19 qt.Printf("generate response for %d labels", len(labels)) - qtDone() + qt.Done() //line app/vmselect/prometheus/labels_response.qtpl:22 streamdumpQueryTrace(qw422016, qt) @@ -54,22 +54,22 @@ func StreamLabelsResponse(qw422016 *qt422016.Writer, labels []string, qt *queryt } //line app/vmselect/prometheus/labels_response.qtpl:24 -func WriteLabelsResponse(qq422016 qtio422016.Writer, labels []string, qt *querytracer.Tracer, qtDone func()) { +func WriteLabelsResponse(qq422016 qtio422016.Writer, labels []string, qt *querytracer.Tracer) { //line app/vmselect/prometheus/labels_response.qtpl:24 qw422016 := qt422016.AcquireWriter(qq422016) //line app/vmselect/prometheus/labels_response.qtpl:24 - StreamLabelsResponse(qw422016, labels, qt, qtDone) + StreamLabelsResponse(qw422016, labels, qt) //line app/vmselect/prometheus/labels_response.qtpl:24 qt422016.ReleaseWriter(qw422016) //line app/vmselect/prometheus/labels_response.qtpl:24 } //line app/vmselect/prometheus/labels_response.qtpl:24 -func LabelsResponse(labels []string, qt *querytracer.Tracer, qtDone func()) string { +func LabelsResponse(labels []string, qt *querytracer.Tracer) string { //line app/vmselect/prometheus/labels_response.qtpl:24 qb422016 := qt422016.AcquireByteBuffer() //line app/vmselect/prometheus/labels_response.qtpl:24 - WriteLabelsResponse(qb422016, labels, qt, qtDone) + WriteLabelsResponse(qb422016, labels, qt) //line app/vmselect/prometheus/labels_response.qtpl:24 qs422016 := string(qb422016.B) //line app/vmselect/prometheus/labels_response.qtpl:24 diff --git a/app/vmselect/prometheus/prometheus.go b/app/vmselect/prometheus/prometheus.go index ecd4341eb..560599f05 100644 --- a/app/vmselect/prometheus/prometheus.go +++ b/app/vmselect/prometheus/prometheus.go @@ -361,7 +361,7 @@ func exportHandler(qt *querytracer.Tracer, w http.ResponseWriter, ep *exportPara if err != nil { return fmt.Errorf("cannot fetch data for %q: %w", sq, err) } - qtChild := qt.NewChild() + qtChild := qt.NewChild("background export format=%s", format) go func() { err := rss.RunParallel(qtChild, func(rs *netstorage.Result, workerID uint) error { if err := bw.Error(); err != nil { @@ -376,12 +376,12 @@ func exportHandler(qt *querytracer.Tracer, w http.ResponseWriter, ep *exportPara exportBlockPool.Put(xb) return nil }) - qtChild.Donef("background export format=%s", format) + qtChild.Done() close(resultsCh) doneCh <- err }() } else { - qtChild := qt.NewChild() + qtChild := qt.NewChild("background export format=%s", format) go func() { err := netstorage.ExportBlocks(qtChild, sq, ep.deadline, func(mn *storage.MetricName, b *storage.Block, tr storage.TimeRange) error { if err := bw.Error(); err != nil { @@ -400,7 +400,7 @@ func exportHandler(qt *querytracer.Tracer, w http.ResponseWriter, ep *exportPara exportBlockPool.Put(xb) return nil }) - qtChild.Donef("background export format=%s", format) + qtChild.Done() close(resultsCh) doneCh <- err }() @@ -535,10 +535,7 @@ func LabelValuesHandler(qt *querytracer.Tracer, startTime time.Time, labelName s w.Header().Set("Content-Type", "application/json") bw := bufferedwriter.Get(w) defer bufferedwriter.Put(bw) - qtDone := func() { - qt.Donef("/api/v1/labels") - } - WriteLabelValuesResponse(bw, labelValues, qt, qtDone) + WriteLabelValuesResponse(bw, labelValues, qt) if err := bw.Flush(); err != nil { return fmt.Errorf("canot flush label values to remote client: %w", err) } @@ -791,10 +788,7 @@ func LabelsHandler(qt *querytracer.Tracer, startTime time.Time, w http.ResponseW w.Header().Set("Content-Type", "application/json") bw := bufferedwriter.Get(w) defer bufferedwriter.Put(bw) - qtDone := func() { - qt.Donef("/api/v1/labels") - } - WriteLabelsResponse(bw, labels, qt, qtDone) + WriteLabelsResponse(bw, labels, qt) if err := bw.Flush(); err != nil { return fmt.Errorf("cannot send labels response to remote client: %w", err) } @@ -914,7 +908,7 @@ func SeriesHandler(qt *querytracer.Tracer, startTime time.Time, w http.ResponseW } sq := storage.NewSearchQuery(start, end, tagFilterss, *maxSeriesLimit) qtDone := func() { - qt.Donef("/api/v1/series: start=%d, end=%d", start, end) + qt.Donef("start=%d, end=%d", start, end) } if end-start > 24*3600*1000 { // It is cheaper to call SearchMetricNames on time ranges exceeding a day. @@ -1101,7 +1095,7 @@ func QueryHandler(qt *querytracer.Tracer, startTime time.Time, w http.ResponseWr bw := bufferedwriter.Get(w) defer bufferedwriter.Put(bw) qtDone := func() { - qt.Donef("/api/v1/query: query=%s, time=%d: series=%d", query, start, len(result)) + qt.Donef("query=%s, time=%d: series=%d", query, start, len(result)) } WriteQueryResponse(bw, result, qt, qtDone) if err := bw.Flush(); err != nil { @@ -1199,7 +1193,7 @@ func queryRangeHandler(qt *querytracer.Tracer, startTime time.Time, w http.Respo bw := bufferedwriter.Get(w) defer bufferedwriter.Put(bw) qtDone := func() { - qt.Donef("/api/v1/query_range: start=%d, end=%d, step=%d, query=%q: series=%d", start, end, step, query, len(result)) + qt.Donef("start=%d, end=%d, step=%d, query=%q: series=%d", start, end, step, query, len(result)) } WriteQueryRangeResponse(bw, result, qt, qtDone) if err := bw.Flush(); err != nil { diff --git a/app/vmselect/promql/eval.go b/app/vmselect/promql/eval.go index 3a1e33216..a0a26deaa 100644 --- a/app/vmselect/promql/eval.go +++ b/app/vmselect/promql/eval.go @@ -193,22 +193,23 @@ func getTimestamps(start, end, step int64) []int64 { } func evalExpr(qt *querytracer.Tracer, ec *EvalConfig, e metricsql.Expr) ([]*timeseries, error) { - qt = qt.NewChild() + if qt.Enabled() { + query := e.AppendString(nil) + mayCache := ec.mayCache() + qt = qt.NewChild("eval: query=%s, timeRange=[%d..%d], step=%d, mayCache=%v", query, ec.Start, ec.End, ec.Step, mayCache) + } rv, err := evalExprInternal(qt, ec, e) if err != nil { return nil, err } if qt.Enabled() { - query := e.AppendString(nil) seriesCount := len(rv) pointsPerSeries := 0 if len(rv) > 0 { pointsPerSeries = len(rv[0].Timestamps) } pointsCount := seriesCount * pointsPerSeries - mayCache := ec.mayCache() - qt.Donef("eval: query=%s, timeRange=[%d..%d], step=%d, mayCache=%v: series=%d, points=%d, pointsPerSeries=%d", - query, ec.Start, ec.End, ec.Step, mayCache, seriesCount, pointsCount, pointsPerSeries) + qt.Donef("series=%d, points=%d, pointsPerSeries=%d", seriesCount, pointsCount, pointsPerSeries) } return rv, nil } @@ -234,9 +235,9 @@ func evalExprInternal(qt *querytracer.Tracer, ec *EvalConfig, e metricsql.Expr) if fe, ok := e.(*metricsql.FuncExpr); ok { nrf := getRollupFunc(fe.Name) if nrf == nil { - qtChild := qt.NewChild() + qtChild := qt.NewChild("transform %s()", fe.Name) rv, err := evalTransformFunc(qtChild, ec, fe) - qtChild.Donef("transform %s(): series=%d", fe.Name, len(rv)) + qtChild.Donef("series=%d", len(rv)) return rv, err } args, re, err := evalRollupFuncArgs(qt, ec, fe) @@ -254,15 +255,15 @@ func evalExprInternal(qt *querytracer.Tracer, ec *EvalConfig, e metricsql.Expr) return rv, nil } if ae, ok := e.(*metricsql.AggrFuncExpr); ok { - qtChild := qt.NewChild() + qtChild := qt.NewChild("aggregate %s()", ae.Name) rv, err := evalAggrFunc(qtChild, ec, ae) - qtChild.Donef("aggregate %s(): series=%d", ae.Name, len(rv)) + qtChild.Donef("series=%d", len(rv)) return rv, err } if be, ok := e.(*metricsql.BinaryOpExpr); ok { - qtChild := qt.NewChild() + qtChild := qt.NewChild("binary op %q", be.Op) rv, err := evalBinaryOp(qtChild, ec, be) - qtChild.Donef("binary op %q: series=%d", be.Op, len(rv)) + qtChild.Donef("series=%d", len(rv)) return rv, err } if ne, ok := e.(*metricsql.NumberExpr); ok { @@ -724,8 +725,8 @@ func aggregateAbsentOverTime(ec *EvalConfig, expr metricsql.Expr, tss []*timeser func evalRollupFuncWithSubquery(qt *querytracer.Tracer, ec *EvalConfig, funcName string, rf rollupFunc, expr metricsql.Expr, re *metricsql.RollupExpr) ([]*timeseries, error) { // TODO: determine whether to use rollupResultCacheV here. - qt = qt.NewChild() - defer qt.Donef("subquery") + qt = qt.NewChild("subquery") + defer qt.Done() step := re.Step.Duration(ec.Step) if step == 0 { step = ec.Step @@ -855,9 +856,9 @@ func evalRollupFuncWithMetricExpr(qt *querytracer.Tracer, ec *EvalConfig, funcNa expr metricsql.Expr, me *metricsql.MetricExpr, iafc *incrementalAggrFuncContext, windowExpr *metricsql.DurationExpr) ([]*timeseries, error) { var rollupMemorySize int64 window := windowExpr.Duration(ec.Step) - qt = qt.NewChild() + qt = qt.NewChild("rollup %s(): timeRange=[%d..%d], step=%d, window=%d", funcName, ec.Start, ec.End, ec.Step, window) defer func() { - qt.Donef("rollup %s(): timeRange=[%d..%d], step=%d, window=%d, neededMemoryBytes=%d", funcName, ec.Start, ec.End, ec.Step, window, rollupMemorySize) + qt.Donef("neededMemoryBytes=%d", rollupMemorySize) }() if me.IsEmpty() { return evalNumber(ec, nan), nil @@ -972,8 +973,8 @@ func getRollupMemoryLimiter() *memoryLimiter { func evalRollupWithIncrementalAggregate(qt *querytracer.Tracer, funcName string, keepMetricNames bool, iafc *incrementalAggrFuncContext, rss *netstorage.Results, rcs []*rollupConfig, preFunc func(values []float64, timestamps []int64), sharedTimestamps []int64) ([]*timeseries, error) { - qt = qt.NewChild() - defer qt.Donef("rollup %s() with incremental aggregation %s() over %d series", funcName, iafc.ae.Name, rss.Len()) + qt = qt.NewChild("rollup %s() with incremental aggregation %s() over %d series", funcName, iafc.ae.Name, rss.Len()) + defer qt.Done() err := rss.RunParallel(qt, func(rs *netstorage.Result, workerID uint) error { rs.Values, rs.Timestamps = dropStaleNaNs(funcName, rs.Values, rs.Timestamps) preFunc(rs.Values, rs.Timestamps) @@ -1007,8 +1008,8 @@ func evalRollupWithIncrementalAggregate(qt *querytracer.Tracer, funcName string, func evalRollupNoIncrementalAggregate(qt *querytracer.Tracer, funcName string, keepMetricNames bool, rss *netstorage.Results, rcs []*rollupConfig, preFunc func(values []float64, timestamps []int64), sharedTimestamps []int64) ([]*timeseries, error) { - qt = qt.NewChild() - defer qt.Donef("rollup %s() over %d series", funcName, rss.Len()) + qt = qt.NewChild("rollup %s() over %d series", funcName, rss.Len()) + defer qt.Done() tss := make([]*timeseries, 0, rss.Len()*len(rcs)) var tssLock sync.Mutex err := rss.RunParallel(qt, func(rs *netstorage.Result, workerID uint) error { diff --git a/app/vmselect/promql/rollup_result_cache.go b/app/vmselect/promql/rollup_result_cache.go index b5449afa4..d005402e3 100644 --- a/app/vmselect/promql/rollup_result_cache.go +++ b/app/vmselect/promql/rollup_result_cache.go @@ -200,10 +200,10 @@ func ResetRollupResultCache() { } func (rrc *rollupResultCache) Get(qt *querytracer.Tracer, ec *EvalConfig, expr metricsql.Expr, window int64) (tss []*timeseries, newStart int64) { - qt = qt.NewChild() if qt.Enabled() { query := expr.AppendString(nil) - defer qt.Donef("rollup cache get: query=%s, timeRange=[%d..%d], step=%d, window=%d", query, ec.Start, ec.End, ec.Step, window) + qt = qt.NewChild("rollup cache get: query=%s, timeRange=[%d..%d], step=%d, window=%d", query, ec.Start, ec.End, ec.Step, window) + defer qt.Done() } if !ec.mayCache() { qt.Printf("do not fetch series from cache, since it is disabled in the current context") @@ -296,10 +296,10 @@ func (rrc *rollupResultCache) Get(qt *querytracer.Tracer, ec *EvalConfig, expr m var resultBufPool bytesutil.ByteBufferPool func (rrc *rollupResultCache) Put(qt *querytracer.Tracer, ec *EvalConfig, expr metricsql.Expr, window int64, tss []*timeseries) { - qt = qt.NewChild() if qt.Enabled() { query := expr.AppendString(nil) - defer qt.Donef("rollup cache put: query=%s, timeRange=[%d..%d], step=%d, window=%d, series=%d", query, ec.Start, ec.End, ec.Step, window, len(tss)) + qt = qt.NewChild("rollup cache put: query=%s, timeRange=[%d..%d], step=%d, window=%d, series=%d", query, ec.Start, ec.End, ec.Step, window, len(tss)) + defer qt.Done() } if len(tss) == 0 || !ec.mayCache() { qt.Printf("do not store series to cache, since it is disabled in the current context") diff --git a/docs/README.md b/docs/README.md index d126a4861..084367ee3 100644 --- a/docs/README.md +++ b/docs/README.md @@ -1425,7 +1425,7 @@ In this case VictoriaMetrics puts query trace into `trace` field in the output J For example, the following command: ```bash -curl http://localhost:8428/api/v1/query_range -d 'query=2*rand()' -d 'start=-1h' -d 'step=1m' -d 'trace=1' | jq -r '.trace' +curl http://localhost:8428/api/v1/query_range -d 'query=2*rand()' -d 'start=-1h' -d 'step=1m' -d 'trace=1' | jq '.trace' ``` would return the following trace: diff --git a/docs/Single-server-VictoriaMetrics.md b/docs/Single-server-VictoriaMetrics.md index e47210099..c6b8e65c3 100644 --- a/docs/Single-server-VictoriaMetrics.md +++ b/docs/Single-server-VictoriaMetrics.md @@ -1429,7 +1429,7 @@ In this case VictoriaMetrics puts query trace into `trace` field in the output J For example, the following command: ```bash -curl http://localhost:8428/api/v1/query_range -d 'query=2*rand()' -d 'start=-1h' -d 'step=1m' -d 'trace=1' | jq -r '.trace' +curl http://localhost:8428/api/v1/query_range -d 'query=2*rand()' -d 'start=-1h' -d 'step=1m' -d 'trace=1' | jq '.trace' ``` would return the following trace: diff --git a/lib/querytracer/tracer.go b/lib/querytracer/tracer.go index 0e22234c3..a6467973a 100644 --- a/lib/querytracer/tracer.go +++ b/lib/querytracer/tracer.go @@ -15,16 +15,16 @@ var denyQueryTracing = flag.Bool("denyQueryTracing", false, "Whether to disable // Tracer represents query tracer. // // It must be created via New call. -// Each created tracer must be finalized via Donef call. +// Each created tracer must be finalized via Done or Donef call. // // Tracer may contain sub-tracers (branches) in order to build tree-like execution order. // Call Tracer.NewChild func for adding sub-tracer. type Tracer struct { // startTime is the time when Tracer was created startTime time.Time - // doneTime is the time when Donef was called + // doneTime is the time when Done or Donef was called doneTime time.Time - // message is the message generated by Printf or Donef call. + // message is the message generated by NewChild, Printf or Donef call. message string // children is a list of children Tracer objects children []*Tracer @@ -33,16 +33,17 @@ type Tracer struct { span *span } -// New creates a new instance of the tracer. +// New creates a new instance of the tracer with the given fmt.Sprintf(format, args...) message. // // If enabled isn't set, then all function calls to the returned object will be no-op. // -// Donef must be called when the tracer should be finished. -func New(enabled bool) *Tracer { +// Done or Donef must be called when the tracer should be finished. +func New(enabled bool, format string, args ...interface{}) *Tracer { if *denyQueryTracing || !enabled { return nil } return &Tracer{ + message: fmt.Sprintf(format, args...), startTime: time.Now(), } } @@ -52,26 +53,43 @@ func (t *Tracer) Enabled() bool { return t != nil } -// NewChild adds a new child Tracer to t. +// NewChild adds a new child Tracer to t with the given fmt.Sprintf(format, args...) message. +// +// The returned child must be closed via Done or Donef calls. // // NewChild cannot be called from concurrent goroutines. // Create children tracers from a single goroutine and then pass them // to concurrent goroutines. -func (t *Tracer) NewChild() *Tracer { +func (t *Tracer) NewChild(format string, args ...interface{}) *Tracer { if t == nil { return nil } - if t.message != "" { + if !t.doneTime.IsZero() { panic(fmt.Errorf("BUG: NewChild() cannot be called after Donef(%q) call", t.message)) } child := &Tracer{ + message: fmt.Sprintf(format, args...), startTime: time.Now(), } t.children = append(t.children, child) return child } -// Donef finishes t. +// Done finishes t. +// +// Done cannot be called multiple times. +// Other Tracer functions cannot be called after Done call. +func (t *Tracer) Done() { + if t == nil { + return + } + if !t.doneTime.IsZero() { + panic(fmt.Errorf("BUG: Donef(%q) already called", t.message)) + } + t.doneTime = time.Now() +} + +// Donef appends the given fmt.Sprintf(format, args..) message to t and finished it. // // Donef cannot be called multiple times. // Other Tracer functions cannot be called after Donef call. @@ -79,21 +97,21 @@ func (t *Tracer) Donef(format string, args ...interface{}) { if t == nil { return } - if t.message != "" { - panic(fmt.Errorf("BUG: Donef() already called with message %q", t.message)) + if !t.doneTime.IsZero() { + panic(fmt.Errorf("BUG: Donef(%q) already called", t.message)) } - t.message = fmt.Sprintf(format, args...) + t.message += ": " + fmt.Sprintf(format, args...) t.doneTime = time.Now() } -// Printf adds new message to t. +// Printf adds new fmt.Sprintf(format, args...) message to t. // // Printf cannot be called from concurrent goroutines. func (t *Tracer) Printf(format string, args ...interface{}) { if t == nil { return } - if t.message != "" { + if !t.doneTime.IsZero() { panic(fmt.Errorf("BUG: Printf() cannot be called after Done(%q) call", t.message)) } now := time.Now() @@ -177,8 +195,8 @@ func (t *Tracer) toSpanInternal(prevTime time.Time) (*span, time.Time) { // tracer with children msg := t.message doneTime := t.doneTime - if msg == "" { - msg = "missing Tracer.Donef() call" + if doneTime.IsZero() { + msg += ": missing Tracer.Done() call" doneTime = t.getLastChildDoneTime(t.startTime) } d := doneTime.Sub(t.startTime) diff --git a/lib/querytracer/tracer_test.go b/lib/querytracer/tracer_test.go index 8e8660aa5..fa278d2b7 100644 --- a/lib/querytracer/tracer_test.go +++ b/lib/querytracer/tracer_test.go @@ -6,21 +6,21 @@ import ( ) func TestTracerDisabled(t *testing.T) { - qt := New(false) + qt := New(false, "test") if qt.Enabled() { t.Fatalf("query tracer must be disabled") } - qtChild := qt.NewChild() + qtChild := qt.NewChild("child done %d", 456) if qtChild.Enabled() { t.Fatalf("query tracer must be disabled") } qtChild.Printf("foo %d", 123) - qtChild.Donef("child done %d", 456) + qtChild.Done() qt.Printf("parent %d", 789) if err := qt.AddJSON([]byte("foobar")); err != nil { t.Fatalf("unexpected error in AddJSON: %s", err) } - qt.Donef("test") + qt.Done() s := qt.String() if s != "" { t.Fatalf("unexpected trace; got %s; want empty", s) @@ -32,20 +32,20 @@ func TestTracerDisabled(t *testing.T) { } func TestTracerEnabled(t *testing.T) { - qt := New(true) + qt := New(true, "test") if !qt.Enabled() { t.Fatalf("query tracer must be enabled") } - qtChild := qt.NewChild() + qtChild := qt.NewChild("child done %d", 456) if !qtChild.Enabled() { t.Fatalf("child query tracer must be enabled") } qtChild.Printf("foo %d", 123) - qtChild.Donef("child done %d", 456) + qtChild.Done() qt.Printf("parent %d", 789) - qt.Donef("test") + qt.Donef("foo %d", 33) s := qt.String() - sExpected := `- 0ms: test + sExpected := `- 0ms: test: foo 33 | - 0ms: child done 456 | | - 0ms: foo 123 | - 0ms: parent 789 @@ -56,9 +56,9 @@ func TestTracerEnabled(t *testing.T) { } func TestTracerMultiline(t *testing.T) { - qt := New(true) + qt := New(true, "line1\nline2") qt.Printf("line3\nline4\n") - qt.Donef("line1\nline2") + qt.Done() s := qt.String() sExpected := `- 0ms: line1 | line2 @@ -71,18 +71,18 @@ func TestTracerMultiline(t *testing.T) { } func TestTracerToJSON(t *testing.T) { - qt := New(true) + qt := New(true, "test") if !qt.Enabled() { t.Fatalf("query tracer must be enabled") } - qtChild := qt.NewChild() + qtChild := qt.NewChild("child done %d", 456) if !qtChild.Enabled() { t.Fatalf("child query tracer must be enabled") } qtChild.Printf("foo %d", 123) - qtChild.Donef("child done %d", 456) + qtChild.Done() qt.Printf("parent %d", 789) - qt.Donef("test") + qt.Done() s := qt.ToJSON() sExpected := `{"duration_msec":0,"message":"test","children":[` + `{"duration_msec":0,"message":"child done 456","children":[` + @@ -94,11 +94,11 @@ func TestTracerToJSON(t *testing.T) { } func TestTraceAddJSON(t *testing.T) { - qtChild := New(true) + qtChild := New(true, "child") qtChild.Printf("foo") - qtChild.Donef("child") + qtChild.Done() jsonTrace := qtChild.ToJSON() - qt := New(true) + qt := New(true, "parent") qt.Printf("first_line") if err := qt.AddJSON([]byte(jsonTrace)); err != nil { t.Fatalf("unexpected error in AddJSON: %s", err) @@ -107,7 +107,7 @@ func TestTraceAddJSON(t *testing.T) { if err := qt.AddJSON(nil); err != nil { t.Fatalf("unexpected error in AddJSON(nil): %s", err) } - qt.Donef("parent") + qt.Done() s := qt.String() sExpected := `- 0ms: parent | - 0ms: first_line @@ -131,15 +131,15 @@ func TestTraceAddJSON(t *testing.T) { } func TestTraceMissingDonef(t *testing.T) { - qt := New(true) + qt := New(true, "parent") qt.Printf("parent printf") - qtChild := qt.NewChild() + qtChild := qt.NewChild("child") qtChild.Printf("child printf") qt.Printf("another parent printf") s := qt.String() - sExpected := `- 0ms: missing Tracer.Donef() call + sExpected := `- 0ms: parent: missing Tracer.Done() call | - 0ms: parent printf -| - 0ms: missing Tracer.Donef() call +| - 0ms: child: missing Tracer.Done() call | | - 0ms: child printf | - 0ms: another parent printf ` diff --git a/lib/storage/search.go b/lib/storage/search.go index d74788e21..761b19845 100644 --- a/lib/storage/search.go +++ b/lib/storage/search.go @@ -141,8 +141,8 @@ func (s *Search) reset() { // // Init returns the upper bound on the number of found time series. func (s *Search) Init(qt *querytracer.Tracer, storage *Storage, tfss []*TagFilters, tr TimeRange, maxMetrics int, deadline uint64) int { - qt = qt.NewChild() - defer qt.Donef("init series search: filters=%s, timeRange=%s", tfss, &tr) + qt = qt.NewChild("init series search: filters=%s, timeRange=%s", tfss, &tr) + defer qt.Done() if s.needClosing { logger.Panicf("BUG: missing MustClose call before the next call to Init") } diff --git a/lib/storage/storage.go b/lib/storage/storage.go index 6807efca2..f968d0663 100644 --- a/lib/storage/storage.go +++ b/lib/storage/storage.go @@ -1061,8 +1061,8 @@ func nextRetentionDuration(retentionMsecs int64) time.Duration { // SearchMetricNames returns metric names matching the given tfss on the given tr. func (s *Storage) SearchMetricNames(qt *querytracer.Tracer, tfss []*TagFilters, tr TimeRange, maxMetrics int, deadline uint64) ([]MetricName, error) { - qt = qt.NewChild() - defer qt.Donef("search for matching metric names") + qt = qt.NewChild("search for matching metric names") + defer qt.Done() tsids, err := s.searchTSIDs(qt, tfss, tr, maxMetrics, deadline) if err != nil { return nil, err @@ -1104,8 +1104,8 @@ func (s *Storage) SearchMetricNames(qt *querytracer.Tracer, tfss []*TagFilters, // searchTSIDs returns sorted TSIDs for the given tfss and the given tr. func (s *Storage) searchTSIDs(qt *querytracer.Tracer, tfss []*TagFilters, tr TimeRange, maxMetrics int, deadline uint64) ([]TSID, error) { - qt = qt.NewChild() - defer qt.Donef("search for matching series ids") + qt = qt.NewChild("search for matching series ids") + defer qt.Done() // Do not cache tfss -> tsids here, since the caching is performed // on idb level. @@ -1154,8 +1154,8 @@ var ( // // This should speed-up further searchMetricNameWithCache calls for metricIDs from tsids. func (s *Storage) prefetchMetricNames(qt *querytracer.Tracer, tsids []TSID, deadline uint64) error { - qt = qt.NewChild() - defer qt.Donef("prefetch metric names for %d series ids", len(tsids)) + qt = qt.NewChild("prefetch metric names for %d series ids", len(tsids)) + defer qt.Done() if len(tsids) == 0 { qt.Printf("nothing to prefetch") return nil