lib/regexutil: add Simplify() function for simplifying the regular expression

This commit is contained in:
Aliaksandr Valialkin 2022-08-26 11:57:12 +03:00
parent b373661988
commit 0ad3bbadd3
No known key found for this signature in database
GPG Key ID: A72BEC6CD3D0DED1
8 changed files with 368 additions and 212 deletions

View File

@ -197,14 +197,19 @@ func parseRelabelConfig(rc *RelabelConfig) (*parsedRelabelConfig, error) {
if rc.Separator != nil { if rc.Separator != nil {
separator = *rc.Separator separator = *rc.Separator
} }
action := strings.ToLower(rc.Action)
if action == "" {
action = "replace"
}
targetLabel := rc.TargetLabel targetLabel := rc.TargetLabel
regexCompiled := defaultRegexForRelabelConfig regexCompiled := defaultRegexForRelabelConfig
regexOriginalCompiled := defaultOriginalRegexForRelabelConfig regexOriginalCompiled := defaultOriginalRegexForRelabelConfig
var regexOrValues []string var regexOrValues []string
if rc.Regex != nil { if rc.Regex != nil && !isDefaultRegex(rc.Regex.S) {
regex := regexutil.RemoveStartEndAnchors(rc.Regex.S) regex := rc.Regex.S
regexOrig := regex regexOrig := regex
if rc.Action != "replace_all" && rc.Action != "labelmap_all" { if rc.Action != "replace_all" && rc.Action != "labelmap_all" {
regex = regexutil.RemoveStartEndAnchors(regex)
regex = "^(?:" + regex + ")$" regex = "^(?:" + regex + ")$"
} }
re, err := regexp.Compile(regex) re, err := regexp.Compile(regex)
@ -232,10 +237,6 @@ func parseRelabelConfig(rc *RelabelConfig) (*parsedRelabelConfig, error) {
if rc.Labels != nil { if rc.Labels != nil {
graphiteLabelRules = newGraphiteLabelRules(rc.Labels) graphiteLabelRules = newGraphiteLabelRules(rc.Labels)
} }
action := rc.Action
if action == "" {
action = "replace"
}
switch action { switch action {
case "graphite": case "graphite":
if graphiteMatchTemplate == nil { if graphiteMatchTemplate == nil {
@ -354,3 +355,11 @@ func parseRelabelConfig(rc *RelabelConfig) (*parsedRelabelConfig, error) {
hasLabelReferenceInReplacement: strings.Contains(replacement, "{{"), hasLabelReferenceInReplacement: strings.Contains(replacement, "{{"),
}, nil }, nil
} }
func isDefaultRegex(expr string) bool {
prefix, suffix := regexutil.Simplify(expr)
if prefix != "" {
return false
}
return suffix == ".*"
}

View File

@ -455,3 +455,21 @@ func TestParseRelabelConfigsFailure(t *testing.T) {
}) })
}) })
} }
func TestIsDefaultRegex(t *testing.T) {
f := func(s string, resultExpected bool) {
t.Helper()
result := isDefaultRegex(s)
if result != resultExpected {
t.Fatalf("unexpected result for isDefaultRegex(%q); got %v; want %v", s, result, resultExpected)
}
}
f("", false)
f("foo", false)
f(".+", false)
f("a.*", false)
f(".*", true)
f("(.*)", true)
f("^.*$", true)
f("(?:.*)", true)
}

View File

@ -1,11 +1,10 @@
package regexutil package regexutil
import ( import (
"fmt"
"regexp/syntax" "regexp/syntax"
"sort" "sort"
"strings" "strings"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
) )
// RemoveStartEndAnchors removes '^' at the start of expr and '$' at the end of the expr. // RemoveStartEndAnchors removes '^' at the start of expr and '$' at the end of the expr.
@ -28,15 +27,26 @@ func RemoveStartEndAnchors(expr string) string {
// It returns an empty list if it is impossible to extract "or" values from the regexp. // It returns an empty list if it is impossible to extract "or" values from the regexp.
func GetOrValues(expr string) []string { func GetOrValues(expr string) []string {
expr = RemoveStartEndAnchors(expr) expr = RemoveStartEndAnchors(expr)
sre, err := syntax.Parse(expr, syntax.Perl) prefix, tailExpr := Simplify(expr)
if tailExpr == "" {
return []string{prefix}
}
sre, err := syntax.Parse(tailExpr, syntax.Perl)
if err != nil { if err != nil {
logger.Panicf("BUG: unexpected error when parsing verified expr=%q: %s", expr, err) panic(fmt.Errorf("BUG: unexpected error when parsing verified tailExpr=%q: %w", tailExpr, err))
} }
orValues := getOrValuesExt(sre) orValues := getOrValuesExt(sre)
// Sort orValues for faster index seek later // Sort orValues for faster index seek later
sort.Strings(orValues) sort.Strings(orValues)
if len(prefix) > 0 {
// Add prefix to orValues
for i, orValue := range orValues {
orValues[i] = prefix + orValue
}
}
return orValues return orValues
} }
@ -51,8 +61,6 @@ func getOrValuesExt(sre *syntax.Regexp) []string {
return []string{string(sre.Rune)} return []string{string(sre.Rune)}
case syntax.OpEmptyMatch: case syntax.OpEmptyMatch:
return []string{""} return []string{""}
case syntax.OpBeginText, syntax.OpEndText:
return []string{""}
case syntax.OpAlternate: case syntax.OpAlternate:
a := make([]string, 0, len(sre.Sub)) a := make([]string, 0, len(sre.Sub))
for _, reSub := range sre.Sub { for _, reSub := range sre.Sub {
@ -90,6 +98,9 @@ func getOrValuesExt(sre *syntax.Regexp) []string {
if len(prefixes) == 0 { if len(prefixes) == 0 {
return nil return nil
} }
if len(sre.Sub) == 1 {
return prefixes
}
sre.Sub = sre.Sub[1:] sre.Sub = sre.Sub[1:]
suffixes := getOrValuesExt(sre) suffixes := getOrValuesExt(sre)
if len(suffixes) == 0 { if len(suffixes) == 0 {
@ -120,3 +131,129 @@ func isLiteral(sre *syntax.Regexp) bool {
} }
const maxOrValues = 100 const maxOrValues = 100
// Simplify simplifies the given expr.
//
// It returns plaintext prefix and the remaining regular expression
// with dropped '^' and '$' anchors at the beginning and the end
// of the regular expression.
//
// The function removes capturing parens from the expr,
// so it cannot be used when capturing parens are necessary.
func Simplify(expr string) (string, string) {
sre, err := syntax.Parse(expr, syntax.Perl)
if err != nil {
// Cannot parse the regexp. Return it all as prefix.
return expr, ""
}
sre = simplifyRegexp(sre, false)
if sre == emptyRegexp {
return "", ""
}
if isLiteral(sre) {
return string(sre.Rune), ""
}
var prefix string
if sre.Op == syntax.OpConcat {
sub0 := sre.Sub[0]
if isLiteral(sub0) {
prefix = string(sub0.Rune)
sre.Sub = sre.Sub[1:]
if len(sre.Sub) == 0 {
return prefix, ""
}
sre = simplifyRegexp(sre, true)
}
}
if _, err := syntax.Compile(sre); err != nil {
// Cannot compile the regexp. Return it all as prefix.
return expr, ""
}
s := sre.String()
s = strings.ReplaceAll(s, "(?:)", "")
s = strings.ReplaceAll(s, "(?-s:.)", ".")
s = strings.ReplaceAll(s, "(?-m:$)", "$")
return prefix, s
}
func simplifyRegexp(sre *syntax.Regexp, hasPrefix bool) *syntax.Regexp {
s := sre.String()
for {
sre = simplifyRegexpExt(sre, hasPrefix, false)
sre = sre.Simplify()
if sre.Op == syntax.OpBeginText || sre.Op == syntax.OpEndText {
sre = emptyRegexp
}
sNew := sre.String()
if sNew == s {
return sre
}
var err error
sre, err = syntax.Parse(sNew, syntax.Perl)
if err != nil {
panic(fmt.Errorf("BUG: cannot parse simplified regexp %q: %w", sNew, err))
}
s = sNew
}
}
func simplifyRegexpExt(sre *syntax.Regexp, hasPrefix, hasSuffix bool) *syntax.Regexp {
switch sre.Op {
case syntax.OpCapture:
// Substitute all the capture regexps with non-capture regexps.
sre.Op = syntax.OpAlternate
sre.Sub[0] = simplifyRegexpExt(sre.Sub[0], hasPrefix, hasSuffix)
if sre.Sub[0] == emptyRegexp {
return emptyRegexp
}
return sre
case syntax.OpStar, syntax.OpPlus, syntax.OpQuest, syntax.OpRepeat:
sre.Sub[0] = simplifyRegexpExt(sre.Sub[0], hasPrefix, hasSuffix)
if sre.Sub[0] == emptyRegexp {
return emptyRegexp
}
return sre
case syntax.OpAlternate:
// Do not remove empty captures from OpAlternate, since this may break regexp.
for i, sub := range sre.Sub {
sre.Sub[i] = simplifyRegexpExt(sub, hasPrefix, hasSuffix)
}
return sre
case syntax.OpConcat:
subs := sre.Sub[:0]
for i, sub := range sre.Sub {
sub = simplifyRegexpExt(sub, hasPrefix || len(subs) > 0, hasSuffix || i+1 < len(sre.Sub))
if sub != emptyRegexp {
subs = append(subs, sub)
}
}
sre.Sub = subs
// Remove anchros from the beginning and the end of regexp, since they
// will be added later.
if !hasPrefix {
for len(sre.Sub) > 0 && sre.Sub[0].Op == syntax.OpBeginText {
sre.Sub = sre.Sub[1:]
}
}
if !hasSuffix {
for len(sre.Sub) > 0 && sre.Sub[len(sre.Sub)-1].Op == syntax.OpEndText {
sre.Sub = sre.Sub[:len(sre.Sub)-1]
}
}
if len(sre.Sub) == 0 {
return emptyRegexp
}
if len(sre.Sub) == 1 {
return sre.Sub[0]
}
return sre
case syntax.OpEmptyMatch:
return emptyRegexp
default:
return sre
}
}
var emptyRegexp = &syntax.Regexp{
Op: syntax.OpEmptyMatch,
}

View File

@ -23,6 +23,7 @@ func TestGetOrValues(t *testing.T) {
f("foo.*", nil) f("foo.*", nil)
f(".*", nil) f(".*", nil)
f("foo|.*", nil) f("foo|.*", nil)
f("(fo((o)))|(bar)", []string{"bar", "foo"})
f("foobar", []string{"foobar"}) f("foobar", []string{"foobar"})
f("z|x|c", []string{"c", "x", "z"}) f("z|x|c", []string{"c", "x", "z"})
f("foo|bar", []string{"bar", "foo"}) f("foo|bar", []string{"bar", "foo"})
@ -41,8 +42,71 @@ func TestGetOrValues(t *testing.T) {
f("^foo|bar$", []string{"bar", "foo"}) f("^foo|bar$", []string{"bar", "foo"})
f("^(foo|bar)$", []string{"bar", "foo"}) f("^(foo|bar)$", []string{"bar", "foo"})
f("^a(foo|b(?:a|r))$", []string{"aba", "abr", "afoo"}) f("^a(foo|b(?:a|r))$", []string{"aba", "abr", "afoo"})
// This is incorrect conversion, because the regexp matches nothing. f("^a(foo$|b(?:a$|r))$", []string{"aba", "abr", "afoo"})
// It is OK for now, since such regexps are uncommon in practice. f("^a(^foo|bar$)z$", nil)
// TODO: properly handle this case. }
f("^a(^foo|bar$)z$", []string{"abarz", "afooz"})
func TestSimplify(t *testing.T) {
f := func(s, expectedPrefix, expectedSuffix string) {
t.Helper()
prefix, suffix := Simplify(s)
if prefix != expectedPrefix {
t.Fatalf("unexpected prefix for s=%q; got %q; want %q", s, prefix, expectedPrefix)
}
if suffix != expectedSuffix {
t.Fatalf("unexpected suffix for s=%q; got %q; want %q", s, suffix, expectedSuffix)
}
}
f("", "", "")
f("^", "", "")
f("$", "", "")
f("^()$", "", "")
f("^(?:)$", "", "")
f("^foo|^bar$|baz", "", "foo|ba[rz]")
f("^(foo$|^bar)$", "", "foo|bar")
f("^a(foo$|bar)$", "a", "foo|bar")
f("^a(^foo|bar$)z$", "a", "(?:\\Afoo|bar$)z")
f("foobar", "foobar", "")
f("foo$|^foobar", "foo", "|bar")
f("^(foo$|^foobar)$", "foo", "|bar")
f("foobar|foobaz", "fooba", "[rz]")
f("(fo|(zar|bazz)|x)", "", "fo|zar|bazz|x")
f("(тестЧЧ|тест)", "тест", "ЧЧ|")
f("foo(bar|baz|bana)", "fooba", "[rz]|na")
f("^foobar|foobaz", "fooba", "[rz]")
f("^foobar|^foobaz$", "fooba", "[rz]")
f("foobar|foobaz", "fooba", "[rz]")
f("(?:^foobar|^foobaz)aa.*", "fooba", "[rz]aa.*")
f("foo[bar]+", "foo", "[a-br]+")
f("foo[a-z]+", "foo", "[a-z]+")
f("foo[bar]*", "foo", "[a-br]*")
f("foo[a-z]*", "foo", "[a-z]*")
f("foo[x]+", "foo", "x+")
f("foo[^x]+", "foo", "[^x]+")
f("foo[x]*", "foo", "x*")
f("foo[^x]*", "foo", "[^x]*")
f("foo[x]*bar", "foo", "x*bar")
f("fo\\Bo[x]*bar?", "fo", "\\Box*bar?")
f("foo.+bar", "foo", ".+bar")
f("a(b|c.*).+", "a", "(?:b|c.*).+")
f("ab|ac", "a", "[b-c]")
f("(?i)xyz", "", "(?i:XYZ)")
f("(?i)foo|bar", "", "(?i:FOO)|(?i:BAR)")
f("(?i)up.+x", "", "(?i:UP).+(?i:X)")
f("(?smi)xy.*z$", "", "(?i:XY)(?s:.)*(?i:Z)(?m:$)")
// test invalid regexps
f("a(", "a(", "")
f("a[", "a[", "")
f("a[]", "a[]", "")
f("a{", "a{", "")
f("a{}", "a{}", "")
f("invalid(regexp", "invalid(regexp", "")
// The transformed regexp mustn't match aba
f("a?(^ba|c)", "", "a?(?:\\Aba|c)")
// The transformed regexp mustn't match barx
f("(foo|bar$)x*", "", "(?:foo|bar$)x*")
} }

View File

@ -66,8 +66,8 @@ func (tag *Tag) copyFrom(src *Tag) {
tag.Value = append(tag.Value[:0], src.Value...) tag.Value = append(tag.Value[:0], src.Value...)
} }
func marshalTagValueNoTrailingTagSeparator(dst, src []byte) []byte { func marshalTagValueNoTrailingTagSeparator(dst []byte, src string) []byte {
dst = marshalTagValue(dst, src) dst = marshalTagValue(dst, bytesutil.ToUnsafeBytes(src))
// Remove trailing tagSeparatorChar // Remove trailing tagSeparatorChar
return dst[:len(dst)-1] return dst[:len(dst)-1]
} }

View File

@ -363,7 +363,7 @@ func (tf *tagFilter) InitFromGraphiteQuery(commonPrefix, query []byte, paths []s
tf.regexpPrefix = prefix tf.regexpPrefix = prefix
tf.prefix = append(tf.prefix[:0], commonPrefix...) tf.prefix = append(tf.prefix[:0], commonPrefix...)
tf.prefix = marshalTagValue(tf.prefix, nil) tf.prefix = marshalTagValue(tf.prefix, nil)
tf.prefix = marshalTagValueNoTrailingTagSeparator(tf.prefix, []byte(prefix)) tf.prefix = marshalTagValueNoTrailingTagSeparator(tf.prefix, prefix)
tf.orSuffixes = append(tf.orSuffixes[:0], orSuffixes...) tf.orSuffixes = append(tf.orSuffixes[:0], orSuffixes...)
tf.reSuffixMatch, tf.matchCost = newMatchFuncForOrSuffixes(orSuffixes) tf.reSuffixMatch, tf.matchCost = newMatchFuncForOrSuffixes(orSuffixes)
} }
@ -419,15 +419,15 @@ func (tf *tagFilter) Init(commonPrefix, key, value []byte, isNegative, isRegexp
tf.prefix = append(tf.prefix, commonPrefix...) tf.prefix = append(tf.prefix, commonPrefix...)
tf.prefix = marshalTagValue(tf.prefix, key) tf.prefix = marshalTagValue(tf.prefix, key)
var expr []byte var expr string
prefix := tf.value prefix := bytesutil.ToUnsafeString(tf.value)
if tf.isRegexp { if tf.isRegexp {
prefix, expr = getRegexpPrefix(tf.value) prefix, expr = simplifyRegexp(prefix)
if len(expr) == 0 { if len(expr) == 0 {
tf.value = append(tf.value[:0], prefix...) tf.value = append(tf.value[:0], prefix...)
tf.isRegexp = false tf.isRegexp = false
} else { } else {
tf.regexpPrefix = string(prefix) tf.regexpPrefix = prefix
} }
} }
tf.prefix = marshalTagValueNoTrailingTagSeparator(tf.prefix, prefix) tf.prefix = marshalTagValueNoTrailingTagSeparator(tf.prefix, prefix)
@ -508,22 +508,22 @@ func RegexpCacheMisses() uint64 {
return regexpCache.Misses() return regexpCache.Misses()
} }
func getRegexpFromCache(expr []byte) (*regexpCacheValue, error) { func getRegexpFromCache(expr string) (*regexpCacheValue, error) {
if rcv := regexpCache.GetEntry(bytesutil.ToUnsafeString(expr)); rcv != nil { if rcv := regexpCache.GetEntry(expr); rcv != nil {
// Fast path - the regexp found in the cache. // Fast path - the regexp found in the cache.
return rcv.(*regexpCacheValue), nil return rcv.(*regexpCacheValue), nil
} }
// Slow path - build the regexp. // Slow path - build the regexp.
exprOrig := string(expr) exprOrig := expr
expr = []byte(tagCharsRegexpEscaper.Replace(exprOrig)) expr = tagCharsRegexpEscaper.Replace(exprOrig)
exprStr := fmt.Sprintf("^(%s)$", expr) exprStr := fmt.Sprintf("^(%s)$", expr)
re, err := regexp.Compile(exprStr) re, err := regexp.Compile(exprStr)
if err != nil { if err != nil {
return nil, fmt.Errorf("invalid regexp %q: %w", exprStr, err) return nil, fmt.Errorf("invalid regexp %q: %w", exprStr, err)
} }
sExpr := string(expr) sExpr := expr
orValues := regexutil.GetOrValues(sExpr) orValues := regexutil.GetOrValues(sExpr)
var reMatch func(b []byte) bool var reMatch func(b []byte) bool
var reCost uint64 var reCost uint64
@ -835,22 +835,28 @@ func (rcv *regexpCacheValue) SizeBytes() int {
return rcv.sizeBytes return rcv.sizeBytes
} }
func getRegexpPrefix(b []byte) ([]byte, []byte) { func simplifyRegexp(expr string) (string, string) {
// Fast path - search the prefix in the cache. // It is safe to pass the expr constructed via bytesutil.ToUnsafeString()
if ps := prefixesCache.GetEntry(bytesutil.ToUnsafeString(b)); ps != nil { // to GetEntry() here.
if ps := prefixesCache.GetEntry(expr); ps != nil {
// Fast path - the simplified expr is found in the cache.
ps := ps.(*prefixSuffix) ps := ps.(*prefixSuffix)
return ps.prefix, ps.suffix return ps.prefix, ps.suffix
} }
// Slow path - extract the regexp prefix from b. // Slow path - simplify the expr.
prefix, suffix := extractRegexpPrefix(b)
// Make a copy of expr before using it,
// since it may be constructed via bytesutil.ToUnsafeString()
expr = string(append([]byte{}, expr...))
prefix, suffix := regexutil.Simplify(expr)
// Put the prefix and the suffix to the cache. // Put the prefix and the suffix to the cache.
ps := &prefixSuffix{ ps := &prefixSuffix{
prefix: prefix, prefix: prefix,
suffix: suffix, suffix: suffix,
} }
prefixesCache.PutEntry(string(b), ps) prefixesCache.PutEntry(expr, ps)
return prefix, suffix return prefix, suffix
} }
@ -897,120 +903,11 @@ func RegexpPrefixesCacheMisses() uint64 {
} }
type prefixSuffix struct { type prefixSuffix struct {
prefix []byte prefix string
suffix []byte suffix string
} }
// SizeBytes implements lrucache.Entry interface // SizeBytes implements lrucache.Entry interface
func (ps *prefixSuffix) SizeBytes() int { func (ps *prefixSuffix) SizeBytes() int {
return cap(ps.prefix) + cap(ps.suffix) + int(unsafe.Sizeof(*ps)) return len(ps.prefix) + len(ps.suffix) + int(unsafe.Sizeof(*ps))
}
func extractRegexpPrefix(b []byte) ([]byte, []byte) {
sre, err := syntax.Parse(string(b), syntax.Perl)
if err != nil {
// Cannot parse the regexp. Return it all as prefix.
return b, nil
}
sre = simplifyRegexp(sre)
if sre == emptyRegexp {
return nil, nil
}
if isLiteral(sre) {
return []byte(string(sre.Rune)), nil
}
var prefix []byte
if sre.Op == syntax.OpConcat {
sub0 := sre.Sub[0]
if isLiteral(sub0) {
prefix = []byte(string(sub0.Rune))
sre.Sub = sre.Sub[1:]
if len(sre.Sub) == 0 {
return nil, nil
}
}
}
if _, err := syntax.Compile(sre); err != nil {
// Cannot compile the regexp. Return it all as prefix.
return b, nil
}
return prefix, []byte(sre.String())
}
func simplifyRegexp(sre *syntax.Regexp) *syntax.Regexp {
s := sre.String()
for {
sre = simplifyRegexpExt(sre, false, false)
sre = sre.Simplify()
if sre.Op == syntax.OpBeginText || sre.Op == syntax.OpEndText {
sre = emptyRegexp
}
sNew := sre.String()
if sNew == s {
return sre
}
var err error
sre, err = syntax.Parse(sNew, syntax.Perl)
if err != nil {
logger.Panicf("BUG: cannot parse simplified regexp %q: %s", sNew, err)
}
s = sNew
}
}
func simplifyRegexpExt(sre *syntax.Regexp, hasPrefix, hasSuffix bool) *syntax.Regexp {
switch sre.Op {
case syntax.OpCapture:
// Substitute all the capture regexps with non-capture regexps.
sre.Op = syntax.OpAlternate
sre.Sub[0] = simplifyRegexpExt(sre.Sub[0], hasPrefix, hasSuffix)
if sre.Sub[0] == emptyRegexp {
return emptyRegexp
}
return sre
case syntax.OpStar, syntax.OpPlus, syntax.OpQuest, syntax.OpRepeat:
sre.Sub[0] = simplifyRegexpExt(sre.Sub[0], hasPrefix, hasSuffix)
if sre.Sub[0] == emptyRegexp {
return emptyRegexp
}
return sre
case syntax.OpAlternate:
// Do not remove empty captures from OpAlternate, since this may break regexp.
for i, sub := range sre.Sub {
sre.Sub[i] = simplifyRegexpExt(sub, hasPrefix, hasSuffix)
}
return sre
case syntax.OpConcat:
subs := sre.Sub[:0]
for i, sub := range sre.Sub {
if sub = simplifyRegexpExt(sub, i > 0, i+1 < len(sre.Sub)); sub != emptyRegexp {
subs = append(subs, sub)
}
}
sre.Sub = subs
// Remove anchros from the beginning and the end of regexp, since they
// will be added later.
if !hasPrefix {
for len(sre.Sub) > 0 && sre.Sub[0].Op == syntax.OpBeginText {
sre.Sub = sre.Sub[1:]
}
}
if !hasSuffix {
for len(sre.Sub) > 0 && sre.Sub[len(sre.Sub)-1].Op == syntax.OpEndText {
sre.Sub = sre.Sub[:len(sre.Sub)-1]
}
}
if len(sre.Sub) == 0 {
return emptyRegexp
}
return sre
case syntax.OpEmptyMatch:
return emptyRegexp
default:
return sre
}
}
var emptyRegexp = &syntax.Regexp{
Op: syntax.OpEmptyMatch,
} }

View File

@ -675,26 +675,11 @@ func TestGetCommonPrefix(t *testing.T) {
f([]string{"foo1", "foo2", "foo34"}, "foo") f([]string{"foo1", "foo2", "foo34"}, "foo")
} }
func TestExtractRegexpPrefix(t *testing.T) {
f := func(s string, expectedPrefix, expectedSuffix string) {
t.Helper()
prefix, suffix := extractRegexpPrefix([]byte(s))
if string(prefix) != expectedPrefix {
t.Fatalf("unexpected prefix for %q; got %q; want %q", s, prefix, expectedPrefix)
}
if string(suffix) != expectedSuffix {
t.Fatalf("unexpected suffix for %q; got %q; want %q", s, suffix, expectedSuffix)
}
}
f("", "", "")
f("foobar", "foobar", "")
}
func TestGetRegexpFromCache(t *testing.T) { func TestGetRegexpFromCache(t *testing.T) {
f := func(s string, orValuesExpected, expectedMatches, expectedMismatches []string, suffixExpected string) { f := func(s string, orValuesExpected, expectedMatches, expectedMismatches []string, suffixExpected string) {
t.Helper() t.Helper()
for i := 0; i < 3; i++ { for i := 0; i < 3; i++ {
rcv, err := getRegexpFromCache([]byte(s)) rcv, err := getRegexpFromCache(s)
if err != nil { if err != nil {
t.Fatalf("unexpected error for s=%q: %s", s, err) t.Fatalf("unexpected error for s=%q: %s", s, err)
} }
@ -764,7 +749,7 @@ func TestTagFilterMatchSuffix(t *testing.T) {
var tf tagFilter var tf tagFilter
tvNoTrailingTagSeparator := func(s string) string { tvNoTrailingTagSeparator := func(s string) string {
return string(marshalTagValueNoTrailingTagSeparator(nil, []byte(s))) return string(marshalTagValueNoTrailingTagSeparator(nil, s))
} }
init := func(value string, isNegative, isRegexp bool, expectedPrefix string) { init := func(value string, isNegative, isRegexp bool, expectedPrefix string) {
t.Helper() t.Helper()
@ -1145,75 +1130,75 @@ func TestTagFilterMatchSuffix(t *testing.T) {
}) })
} }
func TestGetRegexpPrefix(t *testing.T) { func TestSimplifyRegexp(t *testing.T) {
f := func(t *testing.T, s, expectedPrefix, expectedSuffix string) { f := func(s, expectedPrefix, expectedSuffix string) {
t.Helper() t.Helper()
prefix, suffix := getRegexpPrefix([]byte(s)) prefix, suffix := simplifyRegexp(s)
if string(prefix) != expectedPrefix { if prefix != expectedPrefix {
t.Fatalf("unexpected prefix for s=%q; got %q; want %q", s, prefix, expectedPrefix) t.Fatalf("unexpected prefix for s=%q; got %q; want %q", s, prefix, expectedPrefix)
} }
if string(suffix) != expectedSuffix { if suffix != expectedSuffix {
t.Fatalf("unexpected suffix for s=%q; got %q; want %q", s, suffix, expectedSuffix) t.Fatalf("unexpected suffix for s=%q; got %q; want %q", s, suffix, expectedSuffix)
} }
// Get the prefix from cache. // Get the prefix from cache.
prefix, suffix = getRegexpPrefix([]byte(s)) prefix, suffix = simplifyRegexp(s)
if string(prefix) != expectedPrefix { if prefix != expectedPrefix {
t.Fatalf("unexpected prefix for s=%q; got %q; want %q", s, prefix, expectedPrefix) t.Fatalf("unexpected prefix for s=%q; got %q; want %q", s, prefix, expectedPrefix)
} }
if string(suffix) != expectedSuffix { if suffix != expectedSuffix {
t.Fatalf("unexpected suffix for s=%q; got %q; want %q", s, suffix, expectedSuffix) t.Fatalf("unexpected suffix for s=%q; got %q; want %q", s, suffix, expectedSuffix)
} }
} }
f(t, "", "", "") f("", "", "")
f(t, "^", "", "") f("^", "", "")
f(t, "$", "", "") f("$", "", "")
f(t, "^()$", "", "") f("^()$", "", "")
f(t, "^(?:)$", "", "") f("^(?:)$", "", "")
f(t, "foobar", "foobar", "") f("foobar", "foobar", "")
f(t, "foo$|^foobar", "foo", "(?:(?:)|bar)") f("foo$|^foobar", "foo", "|bar")
f(t, "^(foo$|^foobar)$", "foo", "(?:(?:)|bar)") f("^(foo$|^foobar)$", "foo", "|bar")
f(t, "foobar|foobaz", "fooba", "[rz]") f("foobar|foobaz", "fooba", "[rz]")
f(t, "(fo|(zar|bazz)|x)", "", "fo|zar|bazz|x") f("(fo|(zar|bazz)|x)", "", "fo|zar|bazz|x")
f(t, "(тестЧЧ|тест)", "тест", "(?:ЧЧ|(?:))") f("(тестЧЧ|тест)", "тест", "ЧЧ|")
f(t, "foo(bar|baz|bana)", "fooba", "(?:[rz]|na)") f("foo(bar|baz|bana)", "fooba", "[rz]|na")
f(t, "^foobar|foobaz", "fooba", "[rz]") f("^foobar|foobaz", "fooba", "[rz]")
f(t, "^foobar|^foobaz$", "fooba", "[rz]") f("^foobar|^foobaz$", "fooba", "[rz]")
f(t, "foobar|foobaz", "fooba", "[rz]") f("foobar|foobaz", "fooba", "[rz]")
f(t, "(?:^foobar|^foobaz)aa.*", "fooba", "[rz]aa(?-s:.)*") f("(?:^foobar|^foobaz)aa.*", "fooba", "[rz]aa.*")
f(t, "foo[bar]+", "foo", "[a-br]+") f("foo[bar]+", "foo", "[a-br]+")
f(t, "foo[a-z]+", "foo", "[a-z]+") f("foo[a-z]+", "foo", "[a-z]+")
f(t, "foo[bar]*", "foo", "[a-br]*") f("foo[bar]*", "foo", "[a-br]*")
f(t, "foo[a-z]*", "foo", "[a-z]*") f("foo[a-z]*", "foo", "[a-z]*")
f(t, "foo[x]+", "foo", "x+") f("foo[x]+", "foo", "x+")
f(t, "foo[^x]+", "foo", "[^x]+") f("foo[^x]+", "foo", "[^x]+")
f(t, "foo[x]*", "foo", "x*") f("foo[x]*", "foo", "x*")
f(t, "foo[^x]*", "foo", "[^x]*") f("foo[^x]*", "foo", "[^x]*")
f(t, "foo[x]*bar", "foo", "x*bar") f("foo[x]*bar", "foo", "x*bar")
f(t, "fo\\Bo[x]*bar?", "fo", "\\Box*bar?") f("fo\\Bo[x]*bar?", "fo", "\\Box*bar?")
f(t, "foo.+bar", "foo", "(?-s:.)+bar") f("foo.+bar", "foo", ".+bar")
f(t, "a(b|c.*).+", "a", "(?:b|c(?-s:.)*)(?-s:.)+") f("a(b|c.*).+", "a", "(?:b|c.*).+")
f(t, "ab|ac", "a", "[b-c]") f("ab|ac", "a", "[b-c]")
f(t, "(?i)xyz", "", "(?i:XYZ)") f("(?i)xyz", "", "(?i:XYZ)")
f(t, "(?i)foo|bar", "", "(?i:FOO)|(?i:BAR)") f("(?i)foo|bar", "", "(?i:FOO)|(?i:BAR)")
f(t, "(?i)up.+x", "", "(?i:UP)(?-s:.)+(?i:X)") f("(?i)up.+x", "", "(?i:UP).+(?i:X)")
f(t, "(?smi)xy.*z$", "", "(?i:XY)(?s:.)*(?i:Z)(?m:$)") f("(?smi)xy.*z$", "", "(?i:XY)(?s:.)*(?i:Z)(?m:$)")
// test invalid regexps // test invalid regexps
f(t, "a(", "a(", "") f("a(", "a(", "")
f(t, "a[", "a[", "") f("a[", "a[", "")
f(t, "a[]", "a[]", "") f("a[]", "a[]", "")
f(t, "a{", "a{", "") f("a{", "a{", "")
f(t, "a{}", "a{}", "") f("a{}", "a{}", "")
f(t, "invalid(regexp", "invalid(regexp", "") f("invalid(regexp", "invalid(regexp", "")
// The transformed regexp mustn't match aba // The transformed regexp mustn't match aba
f(t, "a?(^ba|c)", "", "a?(?:\\Aba|c)") f("a?(^ba|c)", "", "a?(?:\\Aba|c)")
// The transformed regexp mustn't match barx // The transformed regexp mustn't match barx
f(t, "(foo|bar$)x*", "", "(?:foo|bar(?-m:$))x*") f("(foo|bar$)x*", "", "(?:foo|bar$)x*")
} }
func TestTagFiltersString(t *testing.T) { func TestTagFiltersString(t *testing.T) {

View File

@ -32,6 +32,29 @@ func BenchmarkTagFilterMatchSuffix(b *testing.B) {
} }
}) })
}) })
b.Run("regexp-any-suffix-match-anchored", func(b *testing.B) {
key := []byte("^foo.*$")
isNegative := false
isRegexp := true
suffix := marshalTagValue(nil, []byte("ojksdfds"))
b.ReportAllocs()
b.SetBytes(int64(1))
b.RunParallel(func(pb *testing.PB) {
var tf tagFilter
if err := tf.Init(nil, nil, key, isNegative, isRegexp); err != nil {
logger.Panicf("BUG: unexpected error: %s", err)
}
for pb.Next() {
ok, err := tf.matchSuffix(suffix)
if err != nil {
logger.Panicf("BUG: unexpected error: %s", err)
}
if !ok {
logger.Panicf("BUG: unexpected suffix mismatch")
}
}
})
})
b.Run("regexp-any-nonzero-suffix-match", func(b *testing.B) { b.Run("regexp-any-nonzero-suffix-match", func(b *testing.B) {
key := []byte("foo.+") key := []byte("foo.+")
isNegative := false isNegative := false
@ -55,6 +78,29 @@ func BenchmarkTagFilterMatchSuffix(b *testing.B) {
} }
}) })
}) })
b.Run("regexp-any-nonzero-suffix-match-anchored", func(b *testing.B) {
key := []byte("^foo.+$")
isNegative := false
isRegexp := true
suffix := marshalTagValue(nil, []byte("ojksdfds"))
b.ReportAllocs()
b.SetBytes(int64(1))
b.RunParallel(func(pb *testing.PB) {
var tf tagFilter
if err := tf.Init(nil, nil, key, isNegative, isRegexp); err != nil {
logger.Panicf("BUG: unexpected error: %s", err)
}
for pb.Next() {
ok, err := tf.matchSuffix(suffix)
if err != nil {
logger.Panicf("BUG: unexpected error: %s", err)
}
if !ok {
logger.Panicf("BUG: unexpected suffix mismatch")
}
}
})
})
b.Run("regexp-any-nonzero-suffix-mismatch", func(b *testing.B) { b.Run("regexp-any-nonzero-suffix-mismatch", func(b *testing.B) {
key := []byte("foo.+") key := []byte("foo.+")
isNegative := false isNegative := false