2024-01-14 21:46:06 +01:00
|
|
|
package prompb
|
|
|
|
|
|
|
|
import (
|
|
|
|
"fmt"
|
|
|
|
|
|
|
|
"github.com/VictoriaMetrics/easyproto"
|
|
|
|
)
|
|
|
|
|
|
|
|
// WriteRequest represents Prometheus remote write API request.
|
|
|
|
type WriteRequest struct {
|
|
|
|
// Timeseries is a list of time series in the given WriteRequest
|
Revert "Exemplar support (#5982)"
This reverts commit 5a3abfa0414ab495cbc34a58146b540aa8289636.
Reason for revert: exemplars aren't in wide use because they have numerous issues which prevent their adoption (see below).
Adding support for examplars into VictoriaMetrics introduces non-trivial code changes. These code changes need to be supported forever
once the release of VictoriaMetrics with exemplar support is published. That's why I don't think this is a good feature despite
that the source code of the reverted commit has an excellent quality. See https://docs.victoriametrics.com/goals/ .
Issues with Prometheus exemplars:
- Prometheus still has only experimental support for exemplars after more than three years since they were introduced.
It stores exemplars in memory, so they are lost after Prometheus restart. This doesn't look like production-ready feature.
See https://github.com/prometheus/docs/blob/0a2f3b37940e2949eefe752ed7b6c768e0b00128/content/docs/instrumenting/exposition_formats.md?plain=1#L153-L159
and https://prometheus.io/docs/prometheus/latest/feature_flags/#exemplars-storage
- It is very non-trivial to expose exemplars alongside metrics in your application, since the official Prometheus SDKs
for metrics' exposition ( https://prometheus.io/docs/instrumenting/clientlibs/ ) either have very hard-to-use API
for exposing histograms or do not have this API at all. For example, try figuring out how to expose exemplars
via https://pkg.go.dev/github.com/prometheus/client_golang@v1.19.1/prometheus .
- It looks like exemplars are supported for Histogram metric types only -
see https://pkg.go.dev/github.com/prometheus/client_golang@v1.19.1/prometheus#Timer.ObserveDurationWithExemplar .
Exemplars aren't supported for Counter, Gauge and Summary metric types.
- Grafana has very poor support for Prometheus exemplars. It looks like it supports exemplars only when the query
contains histogram_quantile() function. It queries exemplars via special Prometheus API -
https://prometheus.io/docs/prometheus/latest/querying/api/#querying-exemplars - (which is still marked as experimental, btw.)
and then displays all the returned exemplars on the graph as special dots. The issue is that this doesn't work
in production in most cases when the histogram_quantile() is calculated over thousands of histogram buckets
exposed by big number of application instances. Every histogram bucket may expose an exemplar on every timestamp shown on the graph.
This makes the graph unusable, since it is litterally filled with thousands of exemplar dots.
Neither Prometheus API nor Grafana doesn't provide the ability to filter out unneeded exemplars.
- Exemplars are usually connected to traces. While traces are good for some
I doubt exemplars will become production-ready in the near future because of the issues outlined above.
Alternative to exemplars:
Exemplars are marketed as a silver bullet for the correlation between metrics, traces and logs -
just click the exemplar dot on some graph in Grafana and instantly see the corresponding trace or log entry!
This doesn't work as expected in production as shown above. Are there better solutions, which work in production?
Yes - just use time-based and label-based correlation between metrics, traces and logs. Assign the same `job`
and `instance` labels to metrics, logs and traces, so you can quickly find the needed trace or log entry
by these labes on the time range with the anomaly on metrics' graph.
2024-07-03 15:30:11 +02:00
|
|
|
Timeseries []TimeSeries
|
|
|
|
|
|
|
|
labelsPool []Label
|
|
|
|
samplesPool []Sample
|
2024-01-14 21:46:06 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
// Reset resets wr for subsequent re-use.
|
|
|
|
func (wr *WriteRequest) Reset() {
|
|
|
|
tss := wr.Timeseries
|
|
|
|
for i := range tss {
|
|
|
|
tss[i] = TimeSeries{}
|
|
|
|
}
|
|
|
|
wr.Timeseries = tss[:0]
|
|
|
|
|
|
|
|
labelsPool := wr.labelsPool
|
|
|
|
for i := range labelsPool {
|
|
|
|
labelsPool[i] = Label{}
|
|
|
|
}
|
|
|
|
wr.labelsPool = labelsPool[:0]
|
|
|
|
|
|
|
|
samplesPool := wr.samplesPool
|
|
|
|
for i := range samplesPool {
|
|
|
|
samplesPool[i] = Sample{}
|
|
|
|
}
|
|
|
|
wr.samplesPool = samplesPool[:0]
|
|
|
|
}
|
|
|
|
|
|
|
|
// TimeSeries is a timeseries.
|
|
|
|
type TimeSeries struct {
|
|
|
|
// Labels is a list of labels for the given TimeSeries
|
|
|
|
Labels []Label
|
|
|
|
|
|
|
|
// Samples is a list of samples for the given TimeSeries
|
Revert "Exemplar support (#5982)"
This reverts commit 5a3abfa0414ab495cbc34a58146b540aa8289636.
Reason for revert: exemplars aren't in wide use because they have numerous issues which prevent their adoption (see below).
Adding support for examplars into VictoriaMetrics introduces non-trivial code changes. These code changes need to be supported forever
once the release of VictoriaMetrics with exemplar support is published. That's why I don't think this is a good feature despite
that the source code of the reverted commit has an excellent quality. See https://docs.victoriametrics.com/goals/ .
Issues with Prometheus exemplars:
- Prometheus still has only experimental support for exemplars after more than three years since they were introduced.
It stores exemplars in memory, so they are lost after Prometheus restart. This doesn't look like production-ready feature.
See https://github.com/prometheus/docs/blob/0a2f3b37940e2949eefe752ed7b6c768e0b00128/content/docs/instrumenting/exposition_formats.md?plain=1#L153-L159
and https://prometheus.io/docs/prometheus/latest/feature_flags/#exemplars-storage
- It is very non-trivial to expose exemplars alongside metrics in your application, since the official Prometheus SDKs
for metrics' exposition ( https://prometheus.io/docs/instrumenting/clientlibs/ ) either have very hard-to-use API
for exposing histograms or do not have this API at all. For example, try figuring out how to expose exemplars
via https://pkg.go.dev/github.com/prometheus/client_golang@v1.19.1/prometheus .
- It looks like exemplars are supported for Histogram metric types only -
see https://pkg.go.dev/github.com/prometheus/client_golang@v1.19.1/prometheus#Timer.ObserveDurationWithExemplar .
Exemplars aren't supported for Counter, Gauge and Summary metric types.
- Grafana has very poor support for Prometheus exemplars. It looks like it supports exemplars only when the query
contains histogram_quantile() function. It queries exemplars via special Prometheus API -
https://prometheus.io/docs/prometheus/latest/querying/api/#querying-exemplars - (which is still marked as experimental, btw.)
and then displays all the returned exemplars on the graph as special dots. The issue is that this doesn't work
in production in most cases when the histogram_quantile() is calculated over thousands of histogram buckets
exposed by big number of application instances. Every histogram bucket may expose an exemplar on every timestamp shown on the graph.
This makes the graph unusable, since it is litterally filled with thousands of exemplar dots.
Neither Prometheus API nor Grafana doesn't provide the ability to filter out unneeded exemplars.
- Exemplars are usually connected to traces. While traces are good for some
I doubt exemplars will become production-ready in the near future because of the issues outlined above.
Alternative to exemplars:
Exemplars are marketed as a silver bullet for the correlation between metrics, traces and logs -
just click the exemplar dot on some graph in Grafana and instantly see the corresponding trace or log entry!
This doesn't work as expected in production as shown above. Are there better solutions, which work in production?
Yes - just use time-based and label-based correlation between metrics, traces and logs. Assign the same `job`
and `instance` labels to metrics, logs and traces, so you can quickly find the needed trace or log entry
by these labes on the time range with the anomaly on metrics' graph.
2024-07-03 15:30:11 +02:00
|
|
|
Samples []Sample
|
2024-01-14 21:46:06 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
// Sample is a timeseries sample.
|
|
|
|
type Sample struct {
|
|
|
|
// Value is sample value.
|
|
|
|
Value float64
|
|
|
|
|
|
|
|
// Timestamp is unix timestamp for the sample in milliseconds.
|
|
|
|
Timestamp int64
|
|
|
|
}
|
|
|
|
|
|
|
|
// Label is a timeseries label.
|
|
|
|
type Label struct {
|
|
|
|
// Name is label name.
|
|
|
|
Name string
|
|
|
|
|
|
|
|
// Value is label value.
|
|
|
|
Value string
|
|
|
|
}
|
|
|
|
|
|
|
|
// UnmarshalProtobuf unmarshals wr from src.
|
|
|
|
//
|
|
|
|
// src mustn't change while wr is in use, since wr points to src.
|
|
|
|
func (wr *WriteRequest) UnmarshalProtobuf(src []byte) (err error) {
|
|
|
|
wr.Reset()
|
|
|
|
|
|
|
|
// message WriteRequest {
|
|
|
|
// repeated TimeSeries timeseries = 1;
|
|
|
|
// }
|
|
|
|
tss := wr.Timeseries
|
|
|
|
labelsPool := wr.labelsPool
|
|
|
|
samplesPool := wr.samplesPool
|
|
|
|
var fc easyproto.FieldContext
|
|
|
|
for len(src) > 0 {
|
|
|
|
src, err = fc.NextField(src)
|
|
|
|
if err != nil {
|
|
|
|
return fmt.Errorf("cannot read the next field: %w", err)
|
|
|
|
}
|
|
|
|
switch fc.FieldNum {
|
|
|
|
case 1:
|
|
|
|
data, ok := fc.MessageData()
|
|
|
|
if !ok {
|
|
|
|
return fmt.Errorf("cannot read timeseries data")
|
|
|
|
}
|
|
|
|
if len(tss) < cap(tss) {
|
|
|
|
tss = tss[:len(tss)+1]
|
|
|
|
} else {
|
|
|
|
tss = append(tss, TimeSeries{})
|
|
|
|
}
|
|
|
|
ts := &tss[len(tss)-1]
|
Revert "Exemplar support (#5982)"
This reverts commit 5a3abfa0414ab495cbc34a58146b540aa8289636.
Reason for revert: exemplars aren't in wide use because they have numerous issues which prevent their adoption (see below).
Adding support for examplars into VictoriaMetrics introduces non-trivial code changes. These code changes need to be supported forever
once the release of VictoriaMetrics with exemplar support is published. That's why I don't think this is a good feature despite
that the source code of the reverted commit has an excellent quality. See https://docs.victoriametrics.com/goals/ .
Issues with Prometheus exemplars:
- Prometheus still has only experimental support for exemplars after more than three years since they were introduced.
It stores exemplars in memory, so they are lost after Prometheus restart. This doesn't look like production-ready feature.
See https://github.com/prometheus/docs/blob/0a2f3b37940e2949eefe752ed7b6c768e0b00128/content/docs/instrumenting/exposition_formats.md?plain=1#L153-L159
and https://prometheus.io/docs/prometheus/latest/feature_flags/#exemplars-storage
- It is very non-trivial to expose exemplars alongside metrics in your application, since the official Prometheus SDKs
for metrics' exposition ( https://prometheus.io/docs/instrumenting/clientlibs/ ) either have very hard-to-use API
for exposing histograms or do not have this API at all. For example, try figuring out how to expose exemplars
via https://pkg.go.dev/github.com/prometheus/client_golang@v1.19.1/prometheus .
- It looks like exemplars are supported for Histogram metric types only -
see https://pkg.go.dev/github.com/prometheus/client_golang@v1.19.1/prometheus#Timer.ObserveDurationWithExemplar .
Exemplars aren't supported for Counter, Gauge and Summary metric types.
- Grafana has very poor support for Prometheus exemplars. It looks like it supports exemplars only when the query
contains histogram_quantile() function. It queries exemplars via special Prometheus API -
https://prometheus.io/docs/prometheus/latest/querying/api/#querying-exemplars - (which is still marked as experimental, btw.)
and then displays all the returned exemplars on the graph as special dots. The issue is that this doesn't work
in production in most cases when the histogram_quantile() is calculated over thousands of histogram buckets
exposed by big number of application instances. Every histogram bucket may expose an exemplar on every timestamp shown on the graph.
This makes the graph unusable, since it is litterally filled with thousands of exemplar dots.
Neither Prometheus API nor Grafana doesn't provide the ability to filter out unneeded exemplars.
- Exemplars are usually connected to traces. While traces are good for some
I doubt exemplars will become production-ready in the near future because of the issues outlined above.
Alternative to exemplars:
Exemplars are marketed as a silver bullet for the correlation between metrics, traces and logs -
just click the exemplar dot on some graph in Grafana and instantly see the corresponding trace or log entry!
This doesn't work as expected in production as shown above. Are there better solutions, which work in production?
Yes - just use time-based and label-based correlation between metrics, traces and logs. Assign the same `job`
and `instance` labels to metrics, logs and traces, so you can quickly find the needed trace or log entry
by these labes on the time range with the anomaly on metrics' graph.
2024-07-03 15:30:11 +02:00
|
|
|
labelsPool, samplesPool, err = ts.unmarshalProtobuf(data, labelsPool, samplesPool)
|
2024-01-14 21:46:06 +01:00
|
|
|
if err != nil {
|
|
|
|
return fmt.Errorf("cannot unmarshal timeseries: %w", err)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
wr.Timeseries = tss
|
|
|
|
wr.labelsPool = labelsPool
|
|
|
|
wr.samplesPool = samplesPool
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
Revert "Exemplar support (#5982)"
This reverts commit 5a3abfa0414ab495cbc34a58146b540aa8289636.
Reason for revert: exemplars aren't in wide use because they have numerous issues which prevent their adoption (see below).
Adding support for examplars into VictoriaMetrics introduces non-trivial code changes. These code changes need to be supported forever
once the release of VictoriaMetrics with exemplar support is published. That's why I don't think this is a good feature despite
that the source code of the reverted commit has an excellent quality. See https://docs.victoriametrics.com/goals/ .
Issues with Prometheus exemplars:
- Prometheus still has only experimental support for exemplars after more than three years since they were introduced.
It stores exemplars in memory, so they are lost after Prometheus restart. This doesn't look like production-ready feature.
See https://github.com/prometheus/docs/blob/0a2f3b37940e2949eefe752ed7b6c768e0b00128/content/docs/instrumenting/exposition_formats.md?plain=1#L153-L159
and https://prometheus.io/docs/prometheus/latest/feature_flags/#exemplars-storage
- It is very non-trivial to expose exemplars alongside metrics in your application, since the official Prometheus SDKs
for metrics' exposition ( https://prometheus.io/docs/instrumenting/clientlibs/ ) either have very hard-to-use API
for exposing histograms or do not have this API at all. For example, try figuring out how to expose exemplars
via https://pkg.go.dev/github.com/prometheus/client_golang@v1.19.1/prometheus .
- It looks like exemplars are supported for Histogram metric types only -
see https://pkg.go.dev/github.com/prometheus/client_golang@v1.19.1/prometheus#Timer.ObserveDurationWithExemplar .
Exemplars aren't supported for Counter, Gauge and Summary metric types.
- Grafana has very poor support for Prometheus exemplars. It looks like it supports exemplars only when the query
contains histogram_quantile() function. It queries exemplars via special Prometheus API -
https://prometheus.io/docs/prometheus/latest/querying/api/#querying-exemplars - (which is still marked as experimental, btw.)
and then displays all the returned exemplars on the graph as special dots. The issue is that this doesn't work
in production in most cases when the histogram_quantile() is calculated over thousands of histogram buckets
exposed by big number of application instances. Every histogram bucket may expose an exemplar on every timestamp shown on the graph.
This makes the graph unusable, since it is litterally filled with thousands of exemplar dots.
Neither Prometheus API nor Grafana doesn't provide the ability to filter out unneeded exemplars.
- Exemplars are usually connected to traces. While traces are good for some
I doubt exemplars will become production-ready in the near future because of the issues outlined above.
Alternative to exemplars:
Exemplars are marketed as a silver bullet for the correlation between metrics, traces and logs -
just click the exemplar dot on some graph in Grafana and instantly see the corresponding trace or log entry!
This doesn't work as expected in production as shown above. Are there better solutions, which work in production?
Yes - just use time-based and label-based correlation between metrics, traces and logs. Assign the same `job`
and `instance` labels to metrics, logs and traces, so you can quickly find the needed trace or log entry
by these labes on the time range with the anomaly on metrics' graph.
2024-07-03 15:30:11 +02:00
|
|
|
func (ts *TimeSeries) unmarshalProtobuf(src []byte, labelsPool []Label, samplesPool []Sample) ([]Label, []Sample, error) {
|
2024-01-14 21:46:06 +01:00
|
|
|
// message TimeSeries {
|
|
|
|
// repeated Label labels = 1;
|
|
|
|
// repeated Sample samples = 2;
|
|
|
|
// }
|
|
|
|
labelsPoolLen := len(labelsPool)
|
|
|
|
samplesPoolLen := len(samplesPool)
|
|
|
|
var fc easyproto.FieldContext
|
|
|
|
for len(src) > 0 {
|
|
|
|
var err error
|
|
|
|
src, err = fc.NextField(src)
|
|
|
|
if err != nil {
|
Revert "Exemplar support (#5982)"
This reverts commit 5a3abfa0414ab495cbc34a58146b540aa8289636.
Reason for revert: exemplars aren't in wide use because they have numerous issues which prevent their adoption (see below).
Adding support for examplars into VictoriaMetrics introduces non-trivial code changes. These code changes need to be supported forever
once the release of VictoriaMetrics with exemplar support is published. That's why I don't think this is a good feature despite
that the source code of the reverted commit has an excellent quality. See https://docs.victoriametrics.com/goals/ .
Issues with Prometheus exemplars:
- Prometheus still has only experimental support for exemplars after more than three years since they were introduced.
It stores exemplars in memory, so they are lost after Prometheus restart. This doesn't look like production-ready feature.
See https://github.com/prometheus/docs/blob/0a2f3b37940e2949eefe752ed7b6c768e0b00128/content/docs/instrumenting/exposition_formats.md?plain=1#L153-L159
and https://prometheus.io/docs/prometheus/latest/feature_flags/#exemplars-storage
- It is very non-trivial to expose exemplars alongside metrics in your application, since the official Prometheus SDKs
for metrics' exposition ( https://prometheus.io/docs/instrumenting/clientlibs/ ) either have very hard-to-use API
for exposing histograms or do not have this API at all. For example, try figuring out how to expose exemplars
via https://pkg.go.dev/github.com/prometheus/client_golang@v1.19.1/prometheus .
- It looks like exemplars are supported for Histogram metric types only -
see https://pkg.go.dev/github.com/prometheus/client_golang@v1.19.1/prometheus#Timer.ObserveDurationWithExemplar .
Exemplars aren't supported for Counter, Gauge and Summary metric types.
- Grafana has very poor support for Prometheus exemplars. It looks like it supports exemplars only when the query
contains histogram_quantile() function. It queries exemplars via special Prometheus API -
https://prometheus.io/docs/prometheus/latest/querying/api/#querying-exemplars - (which is still marked as experimental, btw.)
and then displays all the returned exemplars on the graph as special dots. The issue is that this doesn't work
in production in most cases when the histogram_quantile() is calculated over thousands of histogram buckets
exposed by big number of application instances. Every histogram bucket may expose an exemplar on every timestamp shown on the graph.
This makes the graph unusable, since it is litterally filled with thousands of exemplar dots.
Neither Prometheus API nor Grafana doesn't provide the ability to filter out unneeded exemplars.
- Exemplars are usually connected to traces. While traces are good for some
I doubt exemplars will become production-ready in the near future because of the issues outlined above.
Alternative to exemplars:
Exemplars are marketed as a silver bullet for the correlation between metrics, traces and logs -
just click the exemplar dot on some graph in Grafana and instantly see the corresponding trace or log entry!
This doesn't work as expected in production as shown above. Are there better solutions, which work in production?
Yes - just use time-based and label-based correlation between metrics, traces and logs. Assign the same `job`
and `instance` labels to metrics, logs and traces, so you can quickly find the needed trace or log entry
by these labes on the time range with the anomaly on metrics' graph.
2024-07-03 15:30:11 +02:00
|
|
|
return labelsPool, samplesPool, fmt.Errorf("cannot read the next field: %w", err)
|
2024-01-14 21:46:06 +01:00
|
|
|
}
|
|
|
|
switch fc.FieldNum {
|
|
|
|
case 1:
|
|
|
|
data, ok := fc.MessageData()
|
|
|
|
if !ok {
|
Revert "Exemplar support (#5982)"
This reverts commit 5a3abfa0414ab495cbc34a58146b540aa8289636.
Reason for revert: exemplars aren't in wide use because they have numerous issues which prevent their adoption (see below).
Adding support for examplars into VictoriaMetrics introduces non-trivial code changes. These code changes need to be supported forever
once the release of VictoriaMetrics with exemplar support is published. That's why I don't think this is a good feature despite
that the source code of the reverted commit has an excellent quality. See https://docs.victoriametrics.com/goals/ .
Issues with Prometheus exemplars:
- Prometheus still has only experimental support for exemplars after more than three years since they were introduced.
It stores exemplars in memory, so they are lost after Prometheus restart. This doesn't look like production-ready feature.
See https://github.com/prometheus/docs/blob/0a2f3b37940e2949eefe752ed7b6c768e0b00128/content/docs/instrumenting/exposition_formats.md?plain=1#L153-L159
and https://prometheus.io/docs/prometheus/latest/feature_flags/#exemplars-storage
- It is very non-trivial to expose exemplars alongside metrics in your application, since the official Prometheus SDKs
for metrics' exposition ( https://prometheus.io/docs/instrumenting/clientlibs/ ) either have very hard-to-use API
for exposing histograms or do not have this API at all. For example, try figuring out how to expose exemplars
via https://pkg.go.dev/github.com/prometheus/client_golang@v1.19.1/prometheus .
- It looks like exemplars are supported for Histogram metric types only -
see https://pkg.go.dev/github.com/prometheus/client_golang@v1.19.1/prometheus#Timer.ObserveDurationWithExemplar .
Exemplars aren't supported for Counter, Gauge and Summary metric types.
- Grafana has very poor support for Prometheus exemplars. It looks like it supports exemplars only when the query
contains histogram_quantile() function. It queries exemplars via special Prometheus API -
https://prometheus.io/docs/prometheus/latest/querying/api/#querying-exemplars - (which is still marked as experimental, btw.)
and then displays all the returned exemplars on the graph as special dots. The issue is that this doesn't work
in production in most cases when the histogram_quantile() is calculated over thousands of histogram buckets
exposed by big number of application instances. Every histogram bucket may expose an exemplar on every timestamp shown on the graph.
This makes the graph unusable, since it is litterally filled with thousands of exemplar dots.
Neither Prometheus API nor Grafana doesn't provide the ability to filter out unneeded exemplars.
- Exemplars are usually connected to traces. While traces are good for some
I doubt exemplars will become production-ready in the near future because of the issues outlined above.
Alternative to exemplars:
Exemplars are marketed as a silver bullet for the correlation between metrics, traces and logs -
just click the exemplar dot on some graph in Grafana and instantly see the corresponding trace or log entry!
This doesn't work as expected in production as shown above. Are there better solutions, which work in production?
Yes - just use time-based and label-based correlation between metrics, traces and logs. Assign the same `job`
and `instance` labels to metrics, logs and traces, so you can quickly find the needed trace or log entry
by these labes on the time range with the anomaly on metrics' graph.
2024-07-03 15:30:11 +02:00
|
|
|
return labelsPool, samplesPool, fmt.Errorf("cannot read label data")
|
2024-01-14 21:46:06 +01:00
|
|
|
}
|
|
|
|
if len(labelsPool) < cap(labelsPool) {
|
|
|
|
labelsPool = labelsPool[:len(labelsPool)+1]
|
|
|
|
} else {
|
|
|
|
labelsPool = append(labelsPool, Label{})
|
|
|
|
}
|
|
|
|
label := &labelsPool[len(labelsPool)-1]
|
|
|
|
if err := label.unmarshalProtobuf(data); err != nil {
|
Revert "Exemplar support (#5982)"
This reverts commit 5a3abfa0414ab495cbc34a58146b540aa8289636.
Reason for revert: exemplars aren't in wide use because they have numerous issues which prevent their adoption (see below).
Adding support for examplars into VictoriaMetrics introduces non-trivial code changes. These code changes need to be supported forever
once the release of VictoriaMetrics with exemplar support is published. That's why I don't think this is a good feature despite
that the source code of the reverted commit has an excellent quality. See https://docs.victoriametrics.com/goals/ .
Issues with Prometheus exemplars:
- Prometheus still has only experimental support for exemplars after more than three years since they were introduced.
It stores exemplars in memory, so they are lost after Prometheus restart. This doesn't look like production-ready feature.
See https://github.com/prometheus/docs/blob/0a2f3b37940e2949eefe752ed7b6c768e0b00128/content/docs/instrumenting/exposition_formats.md?plain=1#L153-L159
and https://prometheus.io/docs/prometheus/latest/feature_flags/#exemplars-storage
- It is very non-trivial to expose exemplars alongside metrics in your application, since the official Prometheus SDKs
for metrics' exposition ( https://prometheus.io/docs/instrumenting/clientlibs/ ) either have very hard-to-use API
for exposing histograms or do not have this API at all. For example, try figuring out how to expose exemplars
via https://pkg.go.dev/github.com/prometheus/client_golang@v1.19.1/prometheus .
- It looks like exemplars are supported for Histogram metric types only -
see https://pkg.go.dev/github.com/prometheus/client_golang@v1.19.1/prometheus#Timer.ObserveDurationWithExemplar .
Exemplars aren't supported for Counter, Gauge and Summary metric types.
- Grafana has very poor support for Prometheus exemplars. It looks like it supports exemplars only when the query
contains histogram_quantile() function. It queries exemplars via special Prometheus API -
https://prometheus.io/docs/prometheus/latest/querying/api/#querying-exemplars - (which is still marked as experimental, btw.)
and then displays all the returned exemplars on the graph as special dots. The issue is that this doesn't work
in production in most cases when the histogram_quantile() is calculated over thousands of histogram buckets
exposed by big number of application instances. Every histogram bucket may expose an exemplar on every timestamp shown on the graph.
This makes the graph unusable, since it is litterally filled with thousands of exemplar dots.
Neither Prometheus API nor Grafana doesn't provide the ability to filter out unneeded exemplars.
- Exemplars are usually connected to traces. While traces are good for some
I doubt exemplars will become production-ready in the near future because of the issues outlined above.
Alternative to exemplars:
Exemplars are marketed as a silver bullet for the correlation between metrics, traces and logs -
just click the exemplar dot on some graph in Grafana and instantly see the corresponding trace or log entry!
This doesn't work as expected in production as shown above. Are there better solutions, which work in production?
Yes - just use time-based and label-based correlation between metrics, traces and logs. Assign the same `job`
and `instance` labels to metrics, logs and traces, so you can quickly find the needed trace or log entry
by these labes on the time range with the anomaly on metrics' graph.
2024-07-03 15:30:11 +02:00
|
|
|
return labelsPool, samplesPool, fmt.Errorf("cannot unmarshal label: %w", err)
|
2024-01-14 21:46:06 +01:00
|
|
|
}
|
|
|
|
case 2:
|
|
|
|
data, ok := fc.MessageData()
|
|
|
|
if !ok {
|
Revert "Exemplar support (#5982)"
This reverts commit 5a3abfa0414ab495cbc34a58146b540aa8289636.
Reason for revert: exemplars aren't in wide use because they have numerous issues which prevent their adoption (see below).
Adding support for examplars into VictoriaMetrics introduces non-trivial code changes. These code changes need to be supported forever
once the release of VictoriaMetrics with exemplar support is published. That's why I don't think this is a good feature despite
that the source code of the reverted commit has an excellent quality. See https://docs.victoriametrics.com/goals/ .
Issues with Prometheus exemplars:
- Prometheus still has only experimental support for exemplars after more than three years since they were introduced.
It stores exemplars in memory, so they are lost after Prometheus restart. This doesn't look like production-ready feature.
See https://github.com/prometheus/docs/blob/0a2f3b37940e2949eefe752ed7b6c768e0b00128/content/docs/instrumenting/exposition_formats.md?plain=1#L153-L159
and https://prometheus.io/docs/prometheus/latest/feature_flags/#exemplars-storage
- It is very non-trivial to expose exemplars alongside metrics in your application, since the official Prometheus SDKs
for metrics' exposition ( https://prometheus.io/docs/instrumenting/clientlibs/ ) either have very hard-to-use API
for exposing histograms or do not have this API at all. For example, try figuring out how to expose exemplars
via https://pkg.go.dev/github.com/prometheus/client_golang@v1.19.1/prometheus .
- It looks like exemplars are supported for Histogram metric types only -
see https://pkg.go.dev/github.com/prometheus/client_golang@v1.19.1/prometheus#Timer.ObserveDurationWithExemplar .
Exemplars aren't supported for Counter, Gauge and Summary metric types.
- Grafana has very poor support for Prometheus exemplars. It looks like it supports exemplars only when the query
contains histogram_quantile() function. It queries exemplars via special Prometheus API -
https://prometheus.io/docs/prometheus/latest/querying/api/#querying-exemplars - (which is still marked as experimental, btw.)
and then displays all the returned exemplars on the graph as special dots. The issue is that this doesn't work
in production in most cases when the histogram_quantile() is calculated over thousands of histogram buckets
exposed by big number of application instances. Every histogram bucket may expose an exemplar on every timestamp shown on the graph.
This makes the graph unusable, since it is litterally filled with thousands of exemplar dots.
Neither Prometheus API nor Grafana doesn't provide the ability to filter out unneeded exemplars.
- Exemplars are usually connected to traces. While traces are good for some
I doubt exemplars will become production-ready in the near future because of the issues outlined above.
Alternative to exemplars:
Exemplars are marketed as a silver bullet for the correlation between metrics, traces and logs -
just click the exemplar dot on some graph in Grafana and instantly see the corresponding trace or log entry!
This doesn't work as expected in production as shown above. Are there better solutions, which work in production?
Yes - just use time-based and label-based correlation between metrics, traces and logs. Assign the same `job`
and `instance` labels to metrics, logs and traces, so you can quickly find the needed trace or log entry
by these labes on the time range with the anomaly on metrics' graph.
2024-07-03 15:30:11 +02:00
|
|
|
return labelsPool, samplesPool, fmt.Errorf("cannot read the sample data")
|
2024-01-14 21:46:06 +01:00
|
|
|
}
|
|
|
|
if len(samplesPool) < cap(samplesPool) {
|
|
|
|
samplesPool = samplesPool[:len(samplesPool)+1]
|
|
|
|
} else {
|
|
|
|
samplesPool = append(samplesPool, Sample{})
|
|
|
|
}
|
|
|
|
sample := &samplesPool[len(samplesPool)-1]
|
|
|
|
if err := sample.unmarshalProtobuf(data); err != nil {
|
Revert "Exemplar support (#5982)"
This reverts commit 5a3abfa0414ab495cbc34a58146b540aa8289636.
Reason for revert: exemplars aren't in wide use because they have numerous issues which prevent their adoption (see below).
Adding support for examplars into VictoriaMetrics introduces non-trivial code changes. These code changes need to be supported forever
once the release of VictoriaMetrics with exemplar support is published. That's why I don't think this is a good feature despite
that the source code of the reverted commit has an excellent quality. See https://docs.victoriametrics.com/goals/ .
Issues with Prometheus exemplars:
- Prometheus still has only experimental support for exemplars after more than three years since they were introduced.
It stores exemplars in memory, so they are lost after Prometheus restart. This doesn't look like production-ready feature.
See https://github.com/prometheus/docs/blob/0a2f3b37940e2949eefe752ed7b6c768e0b00128/content/docs/instrumenting/exposition_formats.md?plain=1#L153-L159
and https://prometheus.io/docs/prometheus/latest/feature_flags/#exemplars-storage
- It is very non-trivial to expose exemplars alongside metrics in your application, since the official Prometheus SDKs
for metrics' exposition ( https://prometheus.io/docs/instrumenting/clientlibs/ ) either have very hard-to-use API
for exposing histograms or do not have this API at all. For example, try figuring out how to expose exemplars
via https://pkg.go.dev/github.com/prometheus/client_golang@v1.19.1/prometheus .
- It looks like exemplars are supported for Histogram metric types only -
see https://pkg.go.dev/github.com/prometheus/client_golang@v1.19.1/prometheus#Timer.ObserveDurationWithExemplar .
Exemplars aren't supported for Counter, Gauge and Summary metric types.
- Grafana has very poor support for Prometheus exemplars. It looks like it supports exemplars only when the query
contains histogram_quantile() function. It queries exemplars via special Prometheus API -
https://prometheus.io/docs/prometheus/latest/querying/api/#querying-exemplars - (which is still marked as experimental, btw.)
and then displays all the returned exemplars on the graph as special dots. The issue is that this doesn't work
in production in most cases when the histogram_quantile() is calculated over thousands of histogram buckets
exposed by big number of application instances. Every histogram bucket may expose an exemplar on every timestamp shown on the graph.
This makes the graph unusable, since it is litterally filled with thousands of exemplar dots.
Neither Prometheus API nor Grafana doesn't provide the ability to filter out unneeded exemplars.
- Exemplars are usually connected to traces. While traces are good for some
I doubt exemplars will become production-ready in the near future because of the issues outlined above.
Alternative to exemplars:
Exemplars are marketed as a silver bullet for the correlation between metrics, traces and logs -
just click the exemplar dot on some graph in Grafana and instantly see the corresponding trace or log entry!
This doesn't work as expected in production as shown above. Are there better solutions, which work in production?
Yes - just use time-based and label-based correlation between metrics, traces and logs. Assign the same `job`
and `instance` labels to metrics, logs and traces, so you can quickly find the needed trace or log entry
by these labes on the time range with the anomaly on metrics' graph.
2024-07-03 15:30:11 +02:00
|
|
|
return labelsPool, samplesPool, fmt.Errorf("cannot unmarshal sample: %w", err)
|
2024-01-14 21:46:06 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
ts.Labels = labelsPool[labelsPoolLen:]
|
|
|
|
ts.Samples = samplesPool[samplesPoolLen:]
|
Revert "Exemplar support (#5982)"
This reverts commit 5a3abfa0414ab495cbc34a58146b540aa8289636.
Reason for revert: exemplars aren't in wide use because they have numerous issues which prevent their adoption (see below).
Adding support for examplars into VictoriaMetrics introduces non-trivial code changes. These code changes need to be supported forever
once the release of VictoriaMetrics with exemplar support is published. That's why I don't think this is a good feature despite
that the source code of the reverted commit has an excellent quality. See https://docs.victoriametrics.com/goals/ .
Issues with Prometheus exemplars:
- Prometheus still has only experimental support for exemplars after more than three years since they were introduced.
It stores exemplars in memory, so they are lost after Prometheus restart. This doesn't look like production-ready feature.
See https://github.com/prometheus/docs/blob/0a2f3b37940e2949eefe752ed7b6c768e0b00128/content/docs/instrumenting/exposition_formats.md?plain=1#L153-L159
and https://prometheus.io/docs/prometheus/latest/feature_flags/#exemplars-storage
- It is very non-trivial to expose exemplars alongside metrics in your application, since the official Prometheus SDKs
for metrics' exposition ( https://prometheus.io/docs/instrumenting/clientlibs/ ) either have very hard-to-use API
for exposing histograms or do not have this API at all. For example, try figuring out how to expose exemplars
via https://pkg.go.dev/github.com/prometheus/client_golang@v1.19.1/prometheus .
- It looks like exemplars are supported for Histogram metric types only -
see https://pkg.go.dev/github.com/prometheus/client_golang@v1.19.1/prometheus#Timer.ObserveDurationWithExemplar .
Exemplars aren't supported for Counter, Gauge and Summary metric types.
- Grafana has very poor support for Prometheus exemplars. It looks like it supports exemplars only when the query
contains histogram_quantile() function. It queries exemplars via special Prometheus API -
https://prometheus.io/docs/prometheus/latest/querying/api/#querying-exemplars - (which is still marked as experimental, btw.)
and then displays all the returned exemplars on the graph as special dots. The issue is that this doesn't work
in production in most cases when the histogram_quantile() is calculated over thousands of histogram buckets
exposed by big number of application instances. Every histogram bucket may expose an exemplar on every timestamp shown on the graph.
This makes the graph unusable, since it is litterally filled with thousands of exemplar dots.
Neither Prometheus API nor Grafana doesn't provide the ability to filter out unneeded exemplars.
- Exemplars are usually connected to traces. While traces are good for some
I doubt exemplars will become production-ready in the near future because of the issues outlined above.
Alternative to exemplars:
Exemplars are marketed as a silver bullet for the correlation between metrics, traces and logs -
just click the exemplar dot on some graph in Grafana and instantly see the corresponding trace or log entry!
This doesn't work as expected in production as shown above. Are there better solutions, which work in production?
Yes - just use time-based and label-based correlation between metrics, traces and logs. Assign the same `job`
and `instance` labels to metrics, logs and traces, so you can quickly find the needed trace or log entry
by these labes on the time range with the anomaly on metrics' graph.
2024-07-03 15:30:11 +02:00
|
|
|
return labelsPool, samplesPool, nil
|
2024-01-14 21:46:06 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
func (lbl *Label) unmarshalProtobuf(src []byte) (err error) {
|
|
|
|
// message Label {
|
|
|
|
// string name = 1;
|
|
|
|
// string value = 2;
|
|
|
|
// }
|
|
|
|
var fc easyproto.FieldContext
|
|
|
|
for len(src) > 0 {
|
|
|
|
src, err = fc.NextField(src)
|
|
|
|
if err != nil {
|
|
|
|
return fmt.Errorf("cannot read the next field: %w", err)
|
|
|
|
}
|
|
|
|
switch fc.FieldNum {
|
|
|
|
case 1:
|
|
|
|
name, ok := fc.String()
|
|
|
|
if !ok {
|
|
|
|
return fmt.Errorf("cannot read label name")
|
|
|
|
}
|
|
|
|
lbl.Name = name
|
|
|
|
case 2:
|
|
|
|
value, ok := fc.String()
|
|
|
|
if !ok {
|
|
|
|
return fmt.Errorf("cannot read label value")
|
|
|
|
}
|
|
|
|
lbl.Value = value
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
func (s *Sample) unmarshalProtobuf(src []byte) (err error) {
|
|
|
|
// message Sample {
|
|
|
|
// double value = 1;
|
|
|
|
// int64 timestamp = 2;
|
|
|
|
// }
|
|
|
|
var fc easyproto.FieldContext
|
|
|
|
for len(src) > 0 {
|
|
|
|
src, err = fc.NextField(src)
|
|
|
|
if err != nil {
|
|
|
|
return fmt.Errorf("cannot read the next field: %w", err)
|
|
|
|
}
|
|
|
|
switch fc.FieldNum {
|
|
|
|
case 1:
|
|
|
|
value, ok := fc.Double()
|
|
|
|
if !ok {
|
|
|
|
return fmt.Errorf("cannot read sample value")
|
|
|
|
}
|
|
|
|
s.Value = value
|
|
|
|
case 2:
|
|
|
|
timestamp, ok := fc.Int64()
|
|
|
|
if !ok {
|
|
|
|
return fmt.Errorf("cannot read sample timestamp")
|
|
|
|
}
|
|
|
|
s.Timestamp = timestamp
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return nil
|
|
|
|
}
|