lib/regexutil: properly handle alternate regexps surrounded by .+ or .*

Previously the following regexps were improperly handled:

  .+foo|bar.+
  .*foo|bar.*

This could lead to unexpected regexp match results.

See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5297

Thanks to @Haleygo for the initial attempt to fix the issue at https://github.com/VictoriaMetrics/VictoriaMetrics/pull/5308
This commit is contained in:
Aliaksandr Valialkin 2023-11-13 18:23:36 +01:00
parent 8256937d5e
commit fb2071a01e
No known key found for this signature in database
GPG Key ID: A72BEC6CD3D0DED1
4 changed files with 56 additions and 2 deletions

View File

@ -97,6 +97,7 @@ The sandbox cluster installation is running under the constant load generated by
* BUGFIX: dashboards/cluster: fix description about `max` threshold for `Concurrent selects` panel. Before, it was mistakenly implying that `max` is equal to the double of available CPUs. * BUGFIX: dashboards/cluster: fix description about `max` threshold for `Concurrent selects` panel. Before, it was mistakenly implying that `max` is equal to the double of available CPUs.
* BUGFIX: [VictoriaMetrics cluster](https://docs.victoriametrics.com/Cluster-VictoriaMetrics.html): bump hard-coded limit for search query size at `vmstorage` from 1MB to 5MB. The change should be more suitable for real-world scenarios and protect vmstorage from excessive memory usage. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5154) for details * BUGFIX: [VictoriaMetrics cluster](https://docs.victoriametrics.com/Cluster-VictoriaMetrics.html): bump hard-coded limit for search query size at `vmstorage` from 1MB to 5MB. The change should be more suitable for real-world scenarios and protect vmstorage from excessive memory usage. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5154) for details
* BUGFIX: [vmbackup](https://docs.victoriametrics.com/vmbackup.html): fix error when creating an incremental backup with the `-origin` command-line flag. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5144) for details. * BUGFIX: [vmbackup](https://docs.victoriametrics.com/vmbackup.html): fix error when creating an incremental backup with the `-origin` command-line flag. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5144) for details.
* BUGFIX: [vmagent](https://docs.victoriametrics.com/vmagent.html): properly apply [relabeling](https://docs.victoriametrics.com/vmagent.html#relabeling) with `regex`, which start and end with `.+` or `.*` and which contain alternate sub-regexps. For example, `.+;|;.+` or `.*foo|bar|baz.*`. Previously such regexps were improperly parsed, which could result in undexpected relabeling results. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5297).
* BUGFIX: [vmagent](https://docs.victoriametrics.com/vmagent.html): properly discover Kubernetes targets via [kubernetes_sd_configs](https://docs.victoriametrics.com/sd_configs.html#kubernetes_sd_configs). Previously some targets and some labels could be skipped during service discovery because of the bug introduced in [v1.93.5](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v1.93.5) when implementing [this feature](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/4850). See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5216) for more details. * BUGFIX: [vmagent](https://docs.victoriametrics.com/vmagent.html): properly discover Kubernetes targets via [kubernetes_sd_configs](https://docs.victoriametrics.com/sd_configs.html#kubernetes_sd_configs). Previously some targets and some labels could be skipped during service discovery because of the bug introduced in [v1.93.5](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v1.93.5) when implementing [this feature](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/4850). See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5216) for more details.
* BUGFIX: [vmagent](https://docs.victoriametrics.com/vmagent.html): fix vmagent ignoring configuration reload for streaming aggregation if it was started with empty streaming aggregation config. Thanks to @aluode99 for the [pull request](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/5178). * BUGFIX: [vmagent](https://docs.victoriametrics.com/vmagent.html): fix vmagent ignoring configuration reload for streaming aggregation if it was started with empty streaming aggregation config. Thanks to @aluode99 for the [pull request](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/5178).
* BUGFIX: [vmagent](https://docs.victoriametrics.com/vmagent.html): do not scrape targets if the corresponding [scrape_configs](https://docs.victoriametrics.com/sd_configs.html#scrape_configs) refer to files with invalid auth configs. Previously the targets were scraped without properly set auth headers in this case. Now targets are scraped only after the files are updated with valid auth configs. See [this pull request](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/5153). * BUGFIX: [vmagent](https://docs.victoriametrics.com/vmagent.html): do not scrape targets if the corresponding [scrape_configs](https://docs.victoriametrics.com/sd_configs.html#scrape_configs) refer to files with invalid auth configs. Previously the targets were scraped without properly set auth headers in this case. Now targets are scraped only after the files are updated with valid auth configs. See [this pull request](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/5153).

View File

@ -2,6 +2,7 @@ package regexutil
import ( import (
"regexp" "regexp"
"regexp/syntax"
"strings" "strings"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil" "github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
@ -105,7 +106,22 @@ func (pr *PromRegex) MatchString(s string) bool {
return pr.reSuffixMatcher.Match(s) return pr.reSuffixMatcher.Match(s)
} }
// getSubstringLiteral returns regex part from expr surrounded by prefixSuffix.
//
// For example, if expr=".+foo.+" and prefixSuffix=".+", then the function returns "foo".
//
// An empty string is returned if expr doesn't contain the given prefixSuffix prefix and suffix
// or if the regex part surrounded by prefixSuffix contains alternate regexps.
func getSubstringLiteral(expr, prefixSuffix string) string { func getSubstringLiteral(expr, prefixSuffix string) string {
// Verify that the expr doesn't contain alternate regexps. In this case it is unsafe removing prefix and suffix.
sre, err := syntax.Parse(expr, syntax.Perl)
if err != nil {
return ""
}
if sre.Op == syntax.OpAlternate {
return ""
}
if !strings.HasPrefix(expr, prefixSuffix) { if !strings.HasPrefix(expr, prefixSuffix) {
return "" return ""
} }

View File

@ -29,7 +29,7 @@ func TestPromRegex(t *testing.T) {
} }
result := pr.MatchString(s) result := pr.MatchString(s)
if result != resultExpected { if result != resultExpected {
t.Fatalf("unexpected result when matching %s against %s; got %v; want %v", expr, s, result, resultExpected) t.Fatalf("unexpected result when matching %q against %q; got %v; want %v", expr, s, result, resultExpected)
} }
// Make sure the result is the same for regular regexp // Make sure the result is the same for regular regexp
@ -37,7 +37,7 @@ func TestPromRegex(t *testing.T) {
re := regexp.MustCompile(exprAnchored) re := regexp.MustCompile(exprAnchored)
result = re.MatchString(s) result = re.MatchString(s)
if result != resultExpected { if result != resultExpected {
t.Fatalf("unexpected result when matching %s against %s during sanity check; got %v; want %v", exprAnchored, s, result, resultExpected) t.Fatalf("unexpected result when matching %q against %q during sanity check; got %v; want %v", exprAnchored, s, result, resultExpected)
} }
} }
f("", "", true) f("", "", true)
@ -89,4 +89,33 @@ func TestPromRegex(t *testing.T) {
f(".*(a|b).*", "xzy", false) f(".*(a|b).*", "xzy", false)
f("^(?:true)$", "true", true) f("^(?:true)$", "true", true)
f("^(?:true)$", "false", false) f("^(?:true)$", "false", false)
// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5297
f(".+;|;.+", ";", false)
f(".+;|;.+", "foo", false)
f(".+;|;.+", "foo;bar", false)
f(".+;|;.+", "foo;", true)
f(".+;|;.+", ";foo", true)
f(".+foo|bar|baz.+", "foo", false)
f(".+foo|bar|baz.+", "afoo", true)
f(".+foo|bar|baz.+", "fooa", false)
f(".+foo|bar|baz.+", "afooa", false)
f(".+foo|bar|baz.+", "bar", true)
f(".+foo|bar|baz.+", "abar", false)
f(".+foo|bar|baz.+", "abara", false)
f(".+foo|bar|baz.+", "bara", false)
f(".+foo|bar|baz.+", "baz", false)
f(".+foo|bar|baz.+", "baza", true)
f(".+foo|bar|baz.+", "abaz", false)
f(".+foo|bar|baz.+", "abaza", false)
f(".+foo|bar|baz.+", "afoo|bar|baza", false)
f(".+(foo|bar|baz).+", "abara", true)
f(".+(foo|bar|baz).+", "afooa", true)
f(".+(foo|bar|baz).+", "abaza", true)
f(".*;|;.*", ";", true)
f(".*;|;.*", "foo", false)
f(".*;|;.*", "foo;bar", false)
f(".*;|;.*", "foo;", true)
f(".*;|;.*", ";foo", true)
} }

View File

@ -109,6 +109,14 @@ func TestSimplify(t *testing.T) {
// The transformed regexp mustn't match barx // The transformed regexp mustn't match barx
f("(foo|bar$)x*", "", "(?:foo|bar$)x*") f("(foo|bar$)x*", "", "(?:foo|bar$)x*")
// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5297
f(".+;|;.+", "", ".+;|;.+")
f("^(.+);|;(.+)$", "", ".+;|;.+")
f("^(.+);$|^;(.+)$", "", ".+;|;.+")
f(".*;|;.*", "", ".*;|;.*")
f("^(.*);|;(.*)$", "", ".*;|;.*")
f("^(.*);$|^;(.*)$", "", ".*;|;.*")
} }
func TestRemoveStartEndAnchors(t *testing.T) { func TestRemoveStartEndAnchors(t *testing.T) {