mirror of
https://github.com/VictoriaMetrics/VictoriaMetrics.git
synced 2024-11-23 12:31:07 +01:00
lib/promrelabel: optimize matching for commonly used regex patterns in if
option
The following regex patterns are optimized: - literal string match, e.g. "foo" - prefix match, e.g. "foo.*" and "foo.+" - substring match, e.g. ".*foo.*" and ".+foo.+" - alternate values match, e.g. "foo|bar|baz"
This commit is contained in:
parent
0ad3bbadd3
commit
7afe8450fc
@ -23,7 +23,7 @@ The following tip changes can be tested by building VictoriaMetrics components f
|
||||
|
||||
* FEATURE: return shorter error messages to Grafana and to other clients requesting [/api/v1/query](https://docs.victoriametrics.com/keyConcepts.html#instant-query) and [/api/v1/query_range](https://docs.victoriametrics.com/keyConcepts.html#range-query) endpoints. This should simplify reading these errors by humans. The long error message with full context is still written to logs.
|
||||
* FEATURE: add the ability to fine-tune the number of points, which can be generated per each matching time series during [subquery](https://docs.victoriametrics.com/MetricsQL.html#subqueries) evaluation. This can be done with the `-search.maxPointsSubqueryPerTimeseries` command-line flag. See [this feature request](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/2922).
|
||||
* FEATURE: improve the performance for `action: keep`, `action: drop`, `action: labelkeep` and `action: labeldrop` relabeling rules for `regex` containing the list of matching values. For example, `regex: "foo|bar|baz"`.
|
||||
* FEATURE: improve the performance for relabeling rules with commonly used regular expressions in `regex` and `if` fields such as `some_string`, `prefix.*`, `prefix.+`, `foo|bar|baz`, `.*foo.*` and `.+foo.+`.
|
||||
* FEATURE: [vmagent](https://docs.victoriametrics.com/vmagent.html): add ability to accept [multitenant](https://docs.victoriametrics.com/Cluster-VictoriaMetrics.html#multitenancy) data via OpenTSDB `/api/put` protocol at `/insert/<tenantID>/opentsdb/api/put` http endpoint if [multitenant support](https://docs.victoriametrics.com/vmagent.html#multitenancy) is enabled at `vmagent`. Thanks to @chengjianyun for [the pull request](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/3015).
|
||||
* FEATURE: [monitoring](https://docs.victoriametrics.com/#monitoring): expose `vm_hourly_series_limit_max_series`, `vm_hourly_series_limit_current_series`, `vm_daily_series_limit_max_series` and `vm_daily_series_limit_current_series` metrics when `-search.maxHourlySeries` or `-search.maxDailySeries` limits are set. This allows alerting when the number of unique series reaches the configured limits. See [these docs](https://docs.victoriametrics.com/#cardinality-limiter) for details.
|
||||
* FEATURE: [VictoriaMetrics cluster](https://docs.victoriametrics.com/Cluster-VictoriaMetrics.html): reduce the amounts of logging at `vmstorage` when `vmselect` connects/disconnects to `vmstorage`.
|
||||
|
@ -3,10 +3,10 @@ package promrelabel
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"regexp"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/regexutil"
|
||||
"github.com/VictoriaMetrics/metricsql"
|
||||
)
|
||||
|
||||
@ -105,7 +105,7 @@ type labelFilter struct {
|
||||
value string
|
||||
|
||||
// re contains compiled regexp for `=~` and `!~` op.
|
||||
re *regexp.Regexp
|
||||
re *regexutil.PromRegex
|
||||
}
|
||||
|
||||
func newLabelFilter(mlf *metricsql.LabelFilter) (*labelFilter, error) {
|
||||
@ -115,10 +115,7 @@ func newLabelFilter(mlf *metricsql.LabelFilter) (*labelFilter, error) {
|
||||
value: mlf.Value,
|
||||
}
|
||||
if lf.op == "=~" || lf.op == "!~" {
|
||||
// PromQL regexps are anchored by default.
|
||||
// See https://prometheus.io/docs/prometheus/latest/querying/basics/#time-series-selectors
|
||||
reString := "^(?:" + lf.value + ")$"
|
||||
re, err := regexp.Compile(reString)
|
||||
re, err := regexutil.NewPromRegex(lf.value)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cannot parse regexp for %s: %w", mlf.AppendString(nil), err)
|
||||
}
|
||||
@ -134,9 +131,9 @@ func (lf *labelFilter) match(labels []prompbmarshal.Label) bool {
|
||||
case "!=":
|
||||
return !lf.equalValue(labels)
|
||||
case "=~":
|
||||
return lf.equalRegexp(labels)
|
||||
return lf.matchRegexp(labels)
|
||||
case "!~":
|
||||
return !lf.equalRegexp(labels)
|
||||
return !lf.matchRegexp(labels)
|
||||
default:
|
||||
logger.Panicf("BUG: unexpected operation for label filter: %s", lf.op)
|
||||
}
|
||||
@ -161,7 +158,7 @@ func (lf *labelFilter) equalValue(labels []prompbmarshal.Label) bool {
|
||||
return false
|
||||
}
|
||||
|
||||
func (lf *labelFilter) equalRegexp(labels []prompbmarshal.Label) bool {
|
||||
func (lf *labelFilter) matchRegexp(labels []prompbmarshal.Label) bool {
|
||||
labelNameMatches := 0
|
||||
for _, label := range labels {
|
||||
if toCanonicalLabelName(label.Name) != lf.label {
|
||||
|
119
lib/regexutil/promregex.go
Normal file
119
lib/regexutil/promregex.go
Normal file
@ -0,0 +1,119 @@
|
||||
package regexutil
|
||||
|
||||
import (
|
||||
"regexp"
|
||||
"strings"
|
||||
)
|
||||
|
||||
// PromRegex implements an optimized string matching for Prometheus-like regex.
|
||||
//
|
||||
// The following regexs are optimized:
|
||||
//
|
||||
// - plain string such as "foobar"
|
||||
// - alternate strings such as "foo|bar|baz"
|
||||
// - prefix match such as "foo.*" or "foo.+"
|
||||
// - substring match such as ".*foo.*" or ".+bar.+"
|
||||
type PromRegex struct {
|
||||
// prefix contains literal prefix for regex.
|
||||
// For example, prefix="foo" for regex="foo(a|b)"
|
||||
prefix string
|
||||
|
||||
// Suffix contains regex suffix left after removing the prefix.
|
||||
// For example, suffix="a|b" for regex="foo(a|b)"
|
||||
suffix string
|
||||
|
||||
// substrDotStar contains literal string for regex suffix=".*string.*"
|
||||
substrDotStar string
|
||||
|
||||
// substrDotPlus contains literal string for regex suffix=".+string.+"
|
||||
substrDotPlus string
|
||||
|
||||
// orValues contains or values for the suffix regex.
|
||||
// For example, orValues contain ["foo","bar","baz"] for regex suffix="foo|bar|baz"
|
||||
orValues []string
|
||||
|
||||
// reSuffix contains an anchored regexp built from suffix:
|
||||
// "^(?:suffix)$"
|
||||
reSuffix *regexp.Regexp
|
||||
}
|
||||
|
||||
// NewPromRegex returns PromRegex for the given expr.
|
||||
func NewPromRegex(expr string) (*PromRegex, error) {
|
||||
if _, err := regexp.Compile(expr); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
prefix, suffix := Simplify(expr)
|
||||
orValues := GetOrValues(suffix)
|
||||
substrDotStar := getSubstringLiteral(suffix, ".*")
|
||||
substrDotPlus := getSubstringLiteral(suffix, ".+")
|
||||
// It is expected that Optimize returns valid regexp in suffix, so use MustCompile here.
|
||||
// Anchor suffix to the beginning and the end of the matching string.
|
||||
suffixExpr := "^(?:" + suffix + ")$"
|
||||
reSuffix := regexp.MustCompile(suffixExpr)
|
||||
pr := &PromRegex{
|
||||
prefix: prefix,
|
||||
suffix: suffix,
|
||||
substrDotStar: substrDotStar,
|
||||
substrDotPlus: substrDotPlus,
|
||||
orValues: orValues,
|
||||
reSuffix: reSuffix,
|
||||
}
|
||||
return pr, nil
|
||||
}
|
||||
|
||||
// MatchString retruns true if s matches pr.
|
||||
//
|
||||
// The pr is automatically anchored to the beginning and to the end
|
||||
// of the matching string with '^' and '$'.
|
||||
func (pr *PromRegex) MatchString(s string) bool {
|
||||
if !strings.HasPrefix(s, pr.prefix) {
|
||||
// Fast path - s has another prefix than pr.
|
||||
return false
|
||||
}
|
||||
s = s[len(pr.prefix):]
|
||||
if len(pr.orValues) > 0 {
|
||||
// Fast path - pr contains only alternate strings such as 'foo|bar|baz'
|
||||
for _, v := range pr.orValues {
|
||||
if s == v {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
if pr.substrDotStar != "" {
|
||||
// Fast path - pr contains ".*someText.*"
|
||||
return strings.Contains(s, pr.substrDotStar)
|
||||
}
|
||||
if pr.substrDotPlus != "" {
|
||||
// Fast path - pr contains ".+someText.+"
|
||||
n := strings.Index(s, pr.substrDotPlus)
|
||||
return n > 0 && n + len(pr.substrDotPlus) < len(s)
|
||||
}
|
||||
switch pr.suffix {
|
||||
case ".*":
|
||||
// Fast path - the pr contains "prefix.*"
|
||||
return true
|
||||
case ".+":
|
||||
// Fast path - the pr contains "prefix.+"
|
||||
return len(s) > 0
|
||||
}
|
||||
// Fall back to slow path by matching the original regexp.
|
||||
return pr.reSuffix.MatchString(s)
|
||||
}
|
||||
|
||||
func getSubstringLiteral(expr, prefixSuffix string) string {
|
||||
if !strings.HasPrefix(expr, prefixSuffix) {
|
||||
return ""
|
||||
}
|
||||
expr = expr[len(prefixSuffix):]
|
||||
if !strings.HasSuffix(expr, prefixSuffix) {
|
||||
return ""
|
||||
}
|
||||
expr = expr[:len(expr)-len(prefixSuffix)]
|
||||
prefix, suffix := Simplify(expr)
|
||||
if suffix != "" {
|
||||
return ""
|
||||
}
|
||||
return prefix
|
||||
}
|
||||
|
90
lib/regexutil/promregex_test.go
Normal file
90
lib/regexutil/promregex_test.go
Normal file
@ -0,0 +1,90 @@
|
||||
package regexutil
|
||||
|
||||
import (
|
||||
"regexp"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestPromRegexParseFailure(t *testing.T) {
|
||||
f := func(expr string) {
|
||||
t.Helper()
|
||||
pr, err := NewPromRegex(expr)
|
||||
if err == nil {
|
||||
t.Fatalf("expecting non-nil error for expr=%s", expr)
|
||||
}
|
||||
if pr != nil {
|
||||
t.Fatalf("expecting nil pr for expr=%s", expr)
|
||||
}
|
||||
}
|
||||
f("fo[bar")
|
||||
f("foo(bar")
|
||||
}
|
||||
|
||||
func TestPromRegex(t *testing.T) {
|
||||
f := func(expr, s string, resultExpected bool) {
|
||||
t.Helper()
|
||||
pr, err := NewPromRegex(expr)
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error: %s", err)
|
||||
}
|
||||
result := pr.MatchString(s)
|
||||
if result != resultExpected {
|
||||
t.Fatalf("unexpected result when matching %s against %s; got %v; want %v", expr, s, result, resultExpected)
|
||||
}
|
||||
|
||||
// Make sure the result is the same for regular regexp
|
||||
exprAnchored := "^(?:" + expr + ")$"
|
||||
re := regexp.MustCompile(exprAnchored)
|
||||
result = re.MatchString(s)
|
||||
if result != resultExpected {
|
||||
t.Fatalf("unexpected result when matching %s against %s during sanity check; got %v; want %v", exprAnchored, s, result, resultExpected)
|
||||
}
|
||||
}
|
||||
f("", "", true)
|
||||
f("", "foo", false)
|
||||
f("foo", "", false)
|
||||
f(".*", "", true)
|
||||
f(".*", "foo", true)
|
||||
f(".+", "", false)
|
||||
f(".+", "foo", true)
|
||||
f("foo.*", "bar", false)
|
||||
f("foo.*", "foo", true)
|
||||
f("foo.*", "foobar", true)
|
||||
f("foo.+", "bar", false)
|
||||
f("foo.+", "foo", false)
|
||||
f("foo.+", "foobar", true)
|
||||
f("foo|bar", "", false)
|
||||
f("foo|bar", "a", false)
|
||||
f("foo|bar", "foo", true)
|
||||
f("foo|bar", "bar", true)
|
||||
f("foo|bar", "foobar", false)
|
||||
f("foo(bar|baz)", "a", false)
|
||||
f("foo(bar|baz)", "foobar", true)
|
||||
f("foo(bar|baz)", "foobaz", true)
|
||||
f("foo(bar|baz)", "foobaza", false)
|
||||
f("foo(bar|baz)", "foobal", false)
|
||||
f("^foo|b(ar)$", "foo", true)
|
||||
f("^foo|b(ar)$", "bar", true)
|
||||
f("^foo|b(ar)$", "ar", false)
|
||||
f(".*foo.*", "foo", true)
|
||||
f(".*foo.*", "afoobar", true)
|
||||
f(".*foo.*", "abc", false)
|
||||
f("foo.*bar.*", "foobar", true)
|
||||
f("foo.*bar.*", "foo_bar_", true)
|
||||
f("foo.*bar.*", "foobaz", false)
|
||||
f(".+foo.+", "foo", false)
|
||||
f(".+foo.+", "afoobar", true)
|
||||
f(".+foo.+", "afoo", false)
|
||||
f(".+foo.+", "abc", false)
|
||||
f("foo.+bar.+", "foobar", false)
|
||||
f("foo.+bar.+", "foo_bar_", true)
|
||||
f("foo.+bar.+", "foobaz", false)
|
||||
f(".+foo.*", "foo", false)
|
||||
f(".+foo.*", "afoo", true)
|
||||
f(".+foo.*", "afoobar", true)
|
||||
f(".*(a|b).*", "a", true)
|
||||
f(".*(a|b).*", "ax", true)
|
||||
f(".*(a|b).*", "xa", true)
|
||||
f(".*(a|b).*", "xay", true)
|
||||
f(".*(a|b).*", "xzy", false)
|
||||
}
|
102
lib/regexutil/promregex_timing_test.go
Normal file
102
lib/regexutil/promregex_timing_test.go
Normal file
@ -0,0 +1,102 @@
|
||||
package regexutil
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"regexp"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func BenchmarkPromRegexMatchString(b *testing.B) {
|
||||
b.Run("unpotimized-noprefix-match", func(b *testing.B) {
|
||||
benchmarkPromRegexMatchString(b, "xbar.*|baz", "xbarz", true)
|
||||
})
|
||||
b.Run("unpotimized-noprefix-mismatch", func(b *testing.B) {
|
||||
benchmarkPromRegexMatchString(b, "xbar.*|baz", "zfoobarz", false)
|
||||
})
|
||||
b.Run("unpotimized-prefix-match", func(b *testing.B) {
|
||||
benchmarkPromRegexMatchString(b, "foo(bar.*|baz)", "foobarz", true)
|
||||
})
|
||||
b.Run("unpotimized-prefix-mismatch", func(b *testing.B) {
|
||||
benchmarkPromRegexMatchString(b, "foo(bar.*|baz)", "zfoobarz", false)
|
||||
})
|
||||
b.Run("literal-match", func(b *testing.B) {
|
||||
benchmarkPromRegexMatchString(b, "foo", "foo", true)
|
||||
})
|
||||
b.Run("literal-mismatch", func(b *testing.B) {
|
||||
benchmarkPromRegexMatchString(b, "foo", "bar", false)
|
||||
})
|
||||
b.Run("prefix-dot-star-match", func(b *testing.B) {
|
||||
benchmarkPromRegexMatchString(b, "foo.*", "foobar", true)
|
||||
})
|
||||
b.Run("prefix-dot-star-mismatch", func(b *testing.B) {
|
||||
benchmarkPromRegexMatchString(b, "foo.*", "afoobar", false)
|
||||
})
|
||||
b.Run("prefix-dot-plus-match", func(b *testing.B) {
|
||||
benchmarkPromRegexMatchString(b, "foo.+", "foobar", true)
|
||||
})
|
||||
b.Run("prefix-dot-plus-mismatch", func(b *testing.B) {
|
||||
benchmarkPromRegexMatchString(b, "foo.+", "afoobar", false)
|
||||
})
|
||||
b.Run("or-values-match", func(b *testing.B) {
|
||||
benchmarkPromRegexMatchString(b, "foo|bar|baz", "baz", true)
|
||||
})
|
||||
b.Run("or-values-mismatch", func(b *testing.B) {
|
||||
benchmarkPromRegexMatchString(b, "foo|bar|baz", "abaz", false)
|
||||
})
|
||||
b.Run("prefix-or-values-match", func(b *testing.B) {
|
||||
benchmarkPromRegexMatchString(b, "x(foo|bar|baz)", "xbaz", true)
|
||||
})
|
||||
b.Run("prefix-or-values-mismatch", func(b *testing.B) {
|
||||
benchmarkPromRegexMatchString(b, "x(foo|bar|baz)", "abaz", false)
|
||||
})
|
||||
b.Run("substring-dot-star-match", func(b *testing.B) {
|
||||
benchmarkPromRegexMatchString(b, ".*foo.*", "afoobar", true)
|
||||
})
|
||||
b.Run("substring-dot-star-mismatch", func(b *testing.B) {
|
||||
benchmarkPromRegexMatchString(b, ".*foo.*", "abarbaz", false)
|
||||
})
|
||||
b.Run("substring-dot-plus-match", func(b *testing.B) {
|
||||
benchmarkPromRegexMatchString(b, ".+foo.+", "afoobar", true)
|
||||
})
|
||||
b.Run("substring-dot-plus-mismatch", func(b *testing.B) {
|
||||
benchmarkPromRegexMatchString(b, ".+foo.+", "abarbaz", false)
|
||||
})
|
||||
b.Run("prefix-substring-dot-star-match", func(b *testing.B) {
|
||||
benchmarkPromRegexMatchString(b, "a.*foo.*", "afoobar", true)
|
||||
})
|
||||
b.Run("prefix-substring-dot-star-mismatch", func(b *testing.B) {
|
||||
benchmarkPromRegexMatchString(b, "a.*foo.*", "abarbaz", false)
|
||||
})
|
||||
b.Run("prefix-substring-dot-plus-match", func(b *testing.B) {
|
||||
benchmarkPromRegexMatchString(b, "a.+foo.+", "abfoobar", true)
|
||||
})
|
||||
b.Run("prefix-substring-dot-plus-mismatch", func(b *testing.B) {
|
||||
benchmarkPromRegexMatchString(b, "a.+foo.+", "abarbaz", false)
|
||||
})
|
||||
}
|
||||
|
||||
func benchmarkPromRegexMatchString(b *testing.B, expr, s string, resultExpected bool) {
|
||||
pr, err := NewPromRegex(expr)
|
||||
if err != nil {
|
||||
panic(fmt.Errorf("unexpected error: %s", err))
|
||||
}
|
||||
re := regexp.MustCompile("^(?:" + expr + ")$")
|
||||
f := func(b *testing.B, matchString func(s string) bool) {
|
||||
b.SetBytes(1)
|
||||
b.ReportAllocs()
|
||||
b.RunParallel(func(pb *testing.PB) {
|
||||
for pb.Next() {
|
||||
result := matchString(s)
|
||||
if result != resultExpected {
|
||||
panic(fmt.Errorf("unexpected result when matching %s against %s; got %v; want %v", s, expr, result, resultExpected))
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
b.Run("PromRegex", func(b *testing.B) {
|
||||
f(b, pr.MatchString)
|
||||
})
|
||||
b.Run("StandardRegex", func(b *testing.B) {
|
||||
f(b, re.MatchString)
|
||||
})
|
||||
}
|
Loading…
Reference in New Issue
Block a user