VictoriaMetrics/vendor/github.com/bmatcuk/doublestar/v4/match.go
2023-10-31 20:19:51 +01:00

382 lines
11 KiB
Go

package doublestar
import (
"path/filepath"
"unicode/utf8"
)
// Match reports whether name matches the shell pattern.
// The pattern syntax is:
//
// pattern:
// { term }
// term:
// '*' matches any sequence of non-path-separators
// '/**/' matches zero or more directories
// '?' matches any single non-path-separator character
// '[' [ '^' '!' ] { character-range } ']'
// character class (must be non-empty)
// starting with `^` or `!` negates the class
// '{' { term } [ ',' { term } ... ] '}'
// alternatives
// c matches character c (c != '*', '?', '\\', '[')
// '\\' c matches character c
//
// character-range:
// c matches character c (c != '\\', '-', ']')
// '\\' c matches character c
// lo '-' hi matches character c for lo <= c <= hi
//
// Match returns true if `name` matches the file name `pattern`. `name` and
// `pattern` are split on forward slash (`/`) characters and may be relative or
// absolute.
//
// Match requires pattern to match all of name, not just a substring.
// The only possible returned error is ErrBadPattern, when pattern
// is malformed.
//
// A doublestar (`**`) should appear surrounded by path separators such as
// `/**/`. A mid-pattern doublestar (`**`) behaves like bash's globstar
// option: a pattern such as `path/to/**.txt` would return the same results as
// `path/to/*.txt`. The pattern you're looking for is `path/to/**/*.txt`.
//
// Note: this is meant as a drop-in replacement for path.Match() which
// always uses '/' as the path separator. If you want to support systems
// which use a different path separator (such as Windows), what you want
// is PathMatch(). Alternatively, you can run filepath.ToSlash() on both
// pattern and name and then use this function.
//
// Note: users should _not_ count on the returned error,
// doublestar.ErrBadPattern, being equal to path.ErrBadPattern.
//
func Match(pattern, name string) (bool, error) {
return matchWithSeparator(pattern, name, '/', true)
}
// PathMatch returns true if `name` matches the file name `pattern`. The
// difference between Match and PathMatch is that PathMatch will automatically
// use your system's path separator to split `name` and `pattern`. On systems
// where the path separator is `'\'`, escaping will be disabled.
//
// Note: this is meant as a drop-in replacement for filepath.Match(). It
// assumes that both `pattern` and `name` are using the system's path
// separator. If you can't be sure of that, use filepath.ToSlash() on both
// `pattern` and `name`, and then use the Match() function instead.
//
func PathMatch(pattern, name string) (bool, error) {
return matchWithSeparator(pattern, name, filepath.Separator, true)
}
func matchWithSeparator(pattern, name string, separator rune, validate bool) (matched bool, err error) {
return doMatchWithSeparator(pattern, name, separator, validate, -1, -1, -1, -1, 0, 0)
}
func doMatchWithSeparator(pattern, name string, separator rune, validate bool, doublestarPatternBacktrack, doublestarNameBacktrack, starPatternBacktrack, starNameBacktrack, patIdx, nameIdx int) (matched bool, err error) {
patLen := len(pattern)
nameLen := len(name)
startOfSegment := true
MATCH:
for nameIdx < nameLen {
if patIdx < patLen {
switch pattern[patIdx] {
case '*':
if patIdx++; patIdx < patLen && pattern[patIdx] == '*' {
// doublestar - must begin with a path separator, otherwise we'll
// treat it like a single star like bash
patIdx++
if startOfSegment {
if patIdx >= patLen {
// pattern ends in `/**`: return true
return true, nil
}
// doublestar must also end with a path separator, otherwise we're
// just going to treat the doublestar as a single star like bash
patRune, patRuneLen := utf8.DecodeRuneInString(pattern[patIdx:])
if patRune == separator {
patIdx += patRuneLen
doublestarPatternBacktrack = patIdx
doublestarNameBacktrack = nameIdx
starPatternBacktrack = -1
starNameBacktrack = -1
continue
}
}
}
startOfSegment = false
starPatternBacktrack = patIdx
starNameBacktrack = nameIdx
continue
case '?':
startOfSegment = false
nameRune, nameRuneLen := utf8.DecodeRuneInString(name[nameIdx:])
if nameRune == separator {
// `?` cannot match the separator
break
}
patIdx++
nameIdx += nameRuneLen
continue
case '[':
startOfSegment = false
if patIdx++; patIdx >= patLen {
// class didn't end
return false, ErrBadPattern
}
nameRune, nameRuneLen := utf8.DecodeRuneInString(name[nameIdx:])
matched := false
negate := pattern[patIdx] == '!' || pattern[patIdx] == '^'
if negate {
patIdx++
}
if patIdx >= patLen || pattern[patIdx] == ']' {
// class didn't end or empty character class
return false, ErrBadPattern
}
last := utf8.MaxRune
for patIdx < patLen && pattern[patIdx] != ']' {
patRune, patRuneLen := utf8.DecodeRuneInString(pattern[patIdx:])
patIdx += patRuneLen
// match a range
if last < utf8.MaxRune && patRune == '-' && patIdx < patLen && pattern[patIdx] != ']' {
if pattern[patIdx] == '\\' {
// next character is escaped
patIdx++
}
patRune, patRuneLen = utf8.DecodeRuneInString(pattern[patIdx:])
patIdx += patRuneLen
if last <= nameRune && nameRune <= patRune {
matched = true
break
}
// didn't match range - reset `last`
last = utf8.MaxRune
continue
}
// not a range - check if the next rune is escaped
if patRune == '\\' {
patRune, patRuneLen = utf8.DecodeRuneInString(pattern[patIdx:])
patIdx += patRuneLen
}
// check if the rune matches
if patRune == nameRune {
matched = true
break
}
// no matches yet
last = patRune
}
if matched == negate {
// failed to match - if we reached the end of the pattern, that means
// we never found a closing `]`
if patIdx >= patLen {
return false, ErrBadPattern
}
break
}
closingIdx := indexUnescapedByte(pattern[patIdx:], ']', true)
if closingIdx == -1 {
// no closing `]`
return false, ErrBadPattern
}
patIdx += closingIdx + 1
nameIdx += nameRuneLen
continue
case '{':
startOfSegment = false
beforeIdx := patIdx
patIdx++
closingIdx := indexMatchedClosingAlt(pattern[patIdx:], separator != '\\')
if closingIdx == -1 {
// no closing `}`
return false, ErrBadPattern
}
closingIdx += patIdx
for {
commaIdx := indexNextAlt(pattern[patIdx:closingIdx], separator != '\\')
if commaIdx == -1 {
break
}
commaIdx += patIdx
result, err := doMatchWithSeparator(pattern[:beforeIdx]+pattern[patIdx:commaIdx]+pattern[closingIdx+1:], name, separator, validate, doublestarPatternBacktrack, doublestarNameBacktrack, starPatternBacktrack, starNameBacktrack, beforeIdx, nameIdx)
if result || err != nil {
return result, err
}
patIdx = commaIdx + 1
}
return doMatchWithSeparator(pattern[:beforeIdx]+pattern[patIdx:closingIdx]+pattern[closingIdx+1:], name, separator, validate, doublestarPatternBacktrack, doublestarNameBacktrack, starPatternBacktrack, starNameBacktrack, beforeIdx, nameIdx)
case '\\':
if separator != '\\' {
// next rune is "escaped" in the pattern - literal match
if patIdx++; patIdx >= patLen {
// pattern ended
return false, ErrBadPattern
}
}
fallthrough
default:
patRune, patRuneLen := utf8.DecodeRuneInString(pattern[patIdx:])
nameRune, nameRuneLen := utf8.DecodeRuneInString(name[nameIdx:])
if patRune != nameRune {
if separator != '\\' && patIdx > 0 && pattern[patIdx-1] == '\\' {
// if this rune was meant to be escaped, we need to move patIdx
// back to the backslash before backtracking or validating below
patIdx--
}
break
}
patIdx += patRuneLen
nameIdx += nameRuneLen
startOfSegment = patRune == separator
continue
}
}
if starPatternBacktrack >= 0 {
// `*` backtrack, but only if the `name` rune isn't the separator
nameRune, nameRuneLen := utf8.DecodeRuneInString(name[starNameBacktrack:])
if nameRune != separator {
starNameBacktrack += nameRuneLen
patIdx = starPatternBacktrack
nameIdx = starNameBacktrack
startOfSegment = false
continue
}
}
if doublestarPatternBacktrack >= 0 {
// `**` backtrack, advance `name` past next separator
nameIdx = doublestarNameBacktrack
for nameIdx < nameLen {
nameRune, nameRuneLen := utf8.DecodeRuneInString(name[nameIdx:])
nameIdx += nameRuneLen
if nameRune == separator {
doublestarNameBacktrack = nameIdx
patIdx = doublestarPatternBacktrack
startOfSegment = true
continue MATCH
}
}
}
if validate && patIdx < patLen && !doValidatePattern(pattern[patIdx:], separator) {
return false, ErrBadPattern
}
return false, nil
}
if nameIdx < nameLen {
// we reached the end of `pattern` before the end of `name`
return false, nil
}
// we've reached the end of `name`; we've successfully matched if we've also
// reached the end of `pattern`, or if the rest of `pattern` can match a
// zero-length string
return isZeroLengthPattern(pattern[patIdx:], separator)
}
func isZeroLengthPattern(pattern string, separator rune) (ret bool, err error) {
// `/**`, `**/`, and `/**/` are special cases - a pattern such as `path/to/a/**` or `path/to/a/**/`
// *should* match `path/to/a` because `a` might be a directory
if pattern == "" ||
pattern == "*" ||
pattern == "**" ||
pattern == string(separator)+"**" ||
pattern == "**"+string(separator) ||
pattern == string(separator)+"**"+string(separator) {
return true, nil
}
if pattern[0] == '{' {
closingIdx := indexMatchedClosingAlt(pattern[1:], separator != '\\')
if closingIdx == -1 {
// no closing '}'
return false, ErrBadPattern
}
closingIdx += 1
patIdx := 1
for {
commaIdx := indexNextAlt(pattern[patIdx:closingIdx], separator != '\\')
if commaIdx == -1 {
break
}
commaIdx += patIdx
ret, err = isZeroLengthPattern(pattern[patIdx:commaIdx]+pattern[closingIdx+1:], separator)
if ret || err != nil {
return
}
patIdx = commaIdx + 1
}
return isZeroLengthPattern(pattern[patIdx:closingIdx]+pattern[closingIdx+1:], separator)
}
// no luck - validate the rest of the pattern
if !doValidatePattern(pattern, separator) {
return false, ErrBadPattern
}
return false, nil
}
// Finds the index of the first unescaped byte `c`, or negative 1.
func indexUnescapedByte(s string, c byte, allowEscaping bool) int {
l := len(s)
for i := 0; i < l; i++ {
if allowEscaping && s[i] == '\\' {
// skip next byte
i++
} else if s[i] == c {
return i
}
}
return -1
}
// Assuming the byte before the beginning of `s` is an opening `{`, this
// function will find the index of the matching `}`. That is, it'll skip over
// any nested `{}` and account for escaping
func indexMatchedClosingAlt(s string, allowEscaping bool) int {
alts := 1
l := len(s)
for i := 0; i < l; i++ {
if allowEscaping && s[i] == '\\' {
// skip next byte
i++
} else if s[i] == '{' {
alts++
} else if s[i] == '}' {
if alts--; alts == 0 {
return i
}
}
}
return -1
}