From fb2071a01eed1b6112c7741f528c44739dd03810 Mon Sep 17 00:00:00 2001 From: Aliaksandr Valialkin Date: Mon, 13 Nov 2023 18:23:36 +0100 Subject: [PATCH] lib/regexutil: properly handle alternate regexps surrounded by .+ or .* Previously the following regexps were improperly handled: .+foo|bar.+ .*foo|bar.* This could lead to unexpected regexp match results. See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5297 Thanks to @Haleygo for the initial attempt to fix the issue at https://github.com/VictoriaMetrics/VictoriaMetrics/pull/5308 --- docs/CHANGELOG.md | 1 + lib/regexutil/promregex.go | 16 ++++++++++++++++ lib/regexutil/promregex_test.go | 33 +++++++++++++++++++++++++++++++-- lib/regexutil/regexutil_test.go | 8 ++++++++ 4 files changed, 56 insertions(+), 2 deletions(-) diff --git a/docs/CHANGELOG.md b/docs/CHANGELOG.md index ab5cebf337..f878ccfbf7 100644 --- a/docs/CHANGELOG.md +++ b/docs/CHANGELOG.md @@ -97,6 +97,7 @@ The sandbox cluster installation is running under the constant load generated by * BUGFIX: dashboards/cluster: fix description about `max` threshold for `Concurrent selects` panel. Before, it was mistakenly implying that `max` is equal to the double of available CPUs. * BUGFIX: [VictoriaMetrics cluster](https://docs.victoriametrics.com/Cluster-VictoriaMetrics.html): bump hard-coded limit for search query size at `vmstorage` from 1MB to 5MB. The change should be more suitable for real-world scenarios and protect vmstorage from excessive memory usage. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5154) for details * BUGFIX: [vmbackup](https://docs.victoriametrics.com/vmbackup.html): fix error when creating an incremental backup with the `-origin` command-line flag. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5144) for details. +* BUGFIX: [vmagent](https://docs.victoriametrics.com/vmagent.html): properly apply [relabeling](https://docs.victoriametrics.com/vmagent.html#relabeling) with `regex`, which start and end with `.+` or `.*` and which contain alternate sub-regexps. For example, `.+;|;.+` or `.*foo|bar|baz.*`. Previously such regexps were improperly parsed, which could result in undexpected relabeling results. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5297). * BUGFIX: [vmagent](https://docs.victoriametrics.com/vmagent.html): properly discover Kubernetes targets via [kubernetes_sd_configs](https://docs.victoriametrics.com/sd_configs.html#kubernetes_sd_configs). Previously some targets and some labels could be skipped during service discovery because of the bug introduced in [v1.93.5](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v1.93.5) when implementing [this feature](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/4850). See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5216) for more details. * BUGFIX: [vmagent](https://docs.victoriametrics.com/vmagent.html): fix vmagent ignoring configuration reload for streaming aggregation if it was started with empty streaming aggregation config. Thanks to @aluode99 for the [pull request](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/5178). * BUGFIX: [vmagent](https://docs.victoriametrics.com/vmagent.html): do not scrape targets if the corresponding [scrape_configs](https://docs.victoriametrics.com/sd_configs.html#scrape_configs) refer to files with invalid auth configs. Previously the targets were scraped without properly set auth headers in this case. Now targets are scraped only after the files are updated with valid auth configs. See [this pull request](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/5153). diff --git a/lib/regexutil/promregex.go b/lib/regexutil/promregex.go index eafc422fed..11313b6d50 100644 --- a/lib/regexutil/promregex.go +++ b/lib/regexutil/promregex.go @@ -2,6 +2,7 @@ package regexutil import ( "regexp" + "regexp/syntax" "strings" "github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil" @@ -105,7 +106,22 @@ func (pr *PromRegex) MatchString(s string) bool { return pr.reSuffixMatcher.Match(s) } +// getSubstringLiteral returns regex part from expr surrounded by prefixSuffix. +// +// For example, if expr=".+foo.+" and prefixSuffix=".+", then the function returns "foo". +// +// An empty string is returned if expr doesn't contain the given prefixSuffix prefix and suffix +// or if the regex part surrounded by prefixSuffix contains alternate regexps. func getSubstringLiteral(expr, prefixSuffix string) string { + // Verify that the expr doesn't contain alternate regexps. In this case it is unsafe removing prefix and suffix. + sre, err := syntax.Parse(expr, syntax.Perl) + if err != nil { + return "" + } + if sre.Op == syntax.OpAlternate { + return "" + } + if !strings.HasPrefix(expr, prefixSuffix) { return "" } diff --git a/lib/regexutil/promregex_test.go b/lib/regexutil/promregex_test.go index 20a04312ad..83cea682fa 100644 --- a/lib/regexutil/promregex_test.go +++ b/lib/regexutil/promregex_test.go @@ -29,7 +29,7 @@ func TestPromRegex(t *testing.T) { } result := pr.MatchString(s) if result != resultExpected { - t.Fatalf("unexpected result when matching %s against %s; got %v; want %v", expr, s, result, resultExpected) + t.Fatalf("unexpected result when matching %q against %q; got %v; want %v", expr, s, result, resultExpected) } // Make sure the result is the same for regular regexp @@ -37,7 +37,7 @@ func TestPromRegex(t *testing.T) { re := regexp.MustCompile(exprAnchored) result = re.MatchString(s) if result != resultExpected { - t.Fatalf("unexpected result when matching %s against %s during sanity check; got %v; want %v", exprAnchored, s, result, resultExpected) + t.Fatalf("unexpected result when matching %q against %q during sanity check; got %v; want %v", exprAnchored, s, result, resultExpected) } } f("", "", true) @@ -89,4 +89,33 @@ func TestPromRegex(t *testing.T) { f(".*(a|b).*", "xzy", false) f("^(?:true)$", "true", true) f("^(?:true)$", "false", false) + + // See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5297 + f(".+;|;.+", ";", false) + f(".+;|;.+", "foo", false) + f(".+;|;.+", "foo;bar", false) + f(".+;|;.+", "foo;", true) + f(".+;|;.+", ";foo", true) + f(".+foo|bar|baz.+", "foo", false) + f(".+foo|bar|baz.+", "afoo", true) + f(".+foo|bar|baz.+", "fooa", false) + f(".+foo|bar|baz.+", "afooa", false) + f(".+foo|bar|baz.+", "bar", true) + f(".+foo|bar|baz.+", "abar", false) + f(".+foo|bar|baz.+", "abara", false) + f(".+foo|bar|baz.+", "bara", false) + f(".+foo|bar|baz.+", "baz", false) + f(".+foo|bar|baz.+", "baza", true) + f(".+foo|bar|baz.+", "abaz", false) + f(".+foo|bar|baz.+", "abaza", false) + f(".+foo|bar|baz.+", "afoo|bar|baza", false) + f(".+(foo|bar|baz).+", "abara", true) + f(".+(foo|bar|baz).+", "afooa", true) + f(".+(foo|bar|baz).+", "abaza", true) + + f(".*;|;.*", ";", true) + f(".*;|;.*", "foo", false) + f(".*;|;.*", "foo;bar", false) + f(".*;|;.*", "foo;", true) + f(".*;|;.*", ";foo", true) } diff --git a/lib/regexutil/regexutil_test.go b/lib/regexutil/regexutil_test.go index e45781d25e..7357e115f4 100644 --- a/lib/regexutil/regexutil_test.go +++ b/lib/regexutil/regexutil_test.go @@ -109,6 +109,14 @@ func TestSimplify(t *testing.T) { // The transformed regexp mustn't match barx f("(foo|bar$)x*", "", "(?:foo|bar$)x*") + + // See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5297 + f(".+;|;.+", "", ".+;|;.+") + f("^(.+);|;(.+)$", "", ".+;|;.+") + f("^(.+);$|^;(.+)$", "", ".+;|;.+") + f(".*;|;.*", "", ".*;|;.*") + f("^(.*);|;(.*)$", "", ".*;|;.*") + f("^(.*);$|^;(.*)$", "", ".*;|;.*") } func TestRemoveStartEndAnchors(t *testing.T) {