mirror of
https://github.com/VictoriaMetrics/VictoriaMetrics.git
synced 2024-12-12 12:46:23 +01:00
lib/logstorage: pre-calculate hashes from tokens used in bloom filter search
Previously per-token hashes for per-block bloom filters were re-calculated on every scanned block. This could be slow when the number of tokens is big or when the number of blocks to scan is big. Pre-calculate hashes for bloom filters and then use them for searching in bloom filters. This improves performance by 2.5x for in(...) filters with many values to search inside `in()`.
This commit is contained in:
parent
6fe0a2700e
commit
7dcce1ca02
@ -15,6 +15,7 @@ according to [these docs](https://docs.victoriametrics.com/victorialogs/quicksta
|
||||
|
||||
## tip
|
||||
|
||||
* FEATURE: optimize [multi-exact queries](https://docs.victoriametrics.com/victorialogs/logsql/#multi-exact-filter) with many phrases to search. For example, `ip:in(path:="/foo/bar" | keep ip)` when there are many unique values for `ip` field among log entries with `/foo/bar` path.
|
||||
* FEATURE: [web UI](https://docs.victoriametrics.com/victorialogs/querying/#web-ui): add support for displaying the top 5 log streams in the hits graph. The remaining log streams are grouped into an "other" label. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/6545).
|
||||
* FEATURE: [web UI](https://docs.victoriametrics.com/victorialogs/querying/#web-ui): add the ability to customize the graph display with options for bar, line, stepped line, and points.
|
||||
* FEATURE: [web UI](https://docs.victoriametrics.com/victorialogs/querying/#web-ui): add fields for setting AccountID and ProjectID. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/6631).
|
||||
|
@ -72,49 +72,64 @@ func (bf *bloomFilter) mustInit(tokens []string) {
|
||||
|
||||
// bloomFilterAdd adds the given tokens to the bloom filter bits
|
||||
func bloomFilterAdd(bits []uint64, tokens []string) {
|
||||
hashesCount := len(tokens) * bloomFilterHashesCount
|
||||
a := encoding.GetUint64s(hashesCount)
|
||||
a.A = appendTokensHashes(a.A[:0], tokens)
|
||||
|
||||
maxBits := uint64(len(bits)) * 64
|
||||
for _, h := range a.A {
|
||||
idx := h % maxBits
|
||||
i := idx / 64
|
||||
j := idx % 64
|
||||
mask := uint64(1) << j
|
||||
w := bits[i]
|
||||
if (w & mask) == 0 {
|
||||
bits[i] = w | mask
|
||||
}
|
||||
}
|
||||
|
||||
encoding.PutUint64s(a)
|
||||
}
|
||||
|
||||
// appendTokensHashes appends hashes for the given tokens to dst and returns the result.
|
||||
//
|
||||
// the appended hashes can be then passed to bloomFilter.containsAll().
|
||||
func appendTokensHashes(dst []uint64, tokens []string) []uint64 {
|
||||
dstLen := len(dst)
|
||||
hashesCount := len(tokens) * bloomFilterHashesCount
|
||||
|
||||
dst = slicesutil.SetLength(dst, dstLen+hashesCount)
|
||||
dst = dst[:dstLen]
|
||||
|
||||
var buf [8]byte
|
||||
hp := (*uint64)(unsafe.Pointer(&buf[0]))
|
||||
for _, token := range tokens {
|
||||
*hp = xxhash.Sum64(bytesutil.ToUnsafeBytes(token))
|
||||
for i := 0; i < bloomFilterHashesCount; i++ {
|
||||
hi := xxhash.Sum64(buf[:])
|
||||
h := xxhash.Sum64(buf[:])
|
||||
(*hp)++
|
||||
idx := hi % maxBits
|
||||
i := idx / 64
|
||||
j := idx % 64
|
||||
mask := uint64(1) << j
|
||||
w := bits[i]
|
||||
if (w & mask) == 0 {
|
||||
bits[i] = w | mask
|
||||
}
|
||||
dst = append(dst, h)
|
||||
}
|
||||
}
|
||||
return dst
|
||||
}
|
||||
|
||||
// containsAll returns true if bf contains all the given tokens.
|
||||
func (bf *bloomFilter) containsAll(tokens []string) bool {
|
||||
// containsAll returns true if bf contains all the given tokens hashes generated by appendTokensHashes.
|
||||
func (bf *bloomFilter) containsAll(hashes []uint64) bool {
|
||||
bits := bf.bits
|
||||
if len(bits) == 0 {
|
||||
return true
|
||||
}
|
||||
maxBits := uint64(len(bits)) * 64
|
||||
var buf [8]byte
|
||||
hp := (*uint64)(unsafe.Pointer(&buf[0]))
|
||||
for _, token := range tokens {
|
||||
*hp = xxhash.Sum64(bytesutil.ToUnsafeBytes(token))
|
||||
for i := 0; i < bloomFilterHashesCount; i++ {
|
||||
hi := xxhash.Sum64(buf[:])
|
||||
(*hp)++
|
||||
idx := hi % maxBits
|
||||
i := idx / 64
|
||||
j := idx % 64
|
||||
mask := uint64(1) << j
|
||||
w := bits[i]
|
||||
if (w & mask) == 0 {
|
||||
// The token is missing
|
||||
return false
|
||||
}
|
||||
for _, h := range hashes {
|
||||
idx := h % maxBits
|
||||
i := idx / 64
|
||||
j := idx % 64
|
||||
mask := uint64(1) << j
|
||||
w := bits[i]
|
||||
if (w & mask) == 0 {
|
||||
// The token is missing
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
|
@ -14,8 +14,9 @@ func TestBloomFilter(t *testing.T) {
|
||||
if err := bf.unmarshal(data); err != nil {
|
||||
t.Fatalf("unexpected error when unmarshaling bloom filter: %s", err)
|
||||
}
|
||||
if !bf.containsAll(tokens) {
|
||||
t.Fatalf("bloomFilterContains must return true for the added tokens")
|
||||
tokensHashes := appendTokensHashes(nil, tokens)
|
||||
if !bf.containsAll(tokensHashes) {
|
||||
t.Fatalf("containsAll must return true for the added tokens")
|
||||
}
|
||||
}
|
||||
f(nil)
|
||||
@ -67,7 +68,8 @@ func TestBloomFilterFalsePositive(t *testing.T) {
|
||||
falsePositives := 0
|
||||
for i := range tokens {
|
||||
token := fmt.Sprintf("non-existing-token_%d", i)
|
||||
if bf.containsAll([]string{token}) {
|
||||
tokensHashes := appendTokensHashes(nil, []string{token})
|
||||
if bf.containsAll(tokensHashes) {
|
||||
falsePositives++
|
||||
}
|
||||
}
|
||||
|
@ -18,8 +18,9 @@ type filterAnd struct {
|
||||
}
|
||||
|
||||
type fieldTokens struct {
|
||||
field string
|
||||
tokens []string
|
||||
field string
|
||||
tokens []string
|
||||
tokensHashes []uint64
|
||||
}
|
||||
|
||||
func (fa *filterAnd) String() string {
|
||||
@ -76,16 +77,16 @@ func (fa *filterAnd) matchBloomFilters(bs *blockSearch) bool {
|
||||
return true
|
||||
}
|
||||
|
||||
for _, fieldTokens := range byFieldTokens {
|
||||
fieldName := fieldTokens.field
|
||||
tokens := fieldTokens.tokens
|
||||
for _, ft := range byFieldTokens {
|
||||
fieldName := ft.field
|
||||
tokens := ft.tokens
|
||||
|
||||
v := bs.csh.getConstColumnValue(fieldName)
|
||||
if v != "" {
|
||||
if !matchStringByAllTokens(v, tokens) {
|
||||
return false
|
||||
if matchStringByAllTokens(v, tokens) {
|
||||
continue
|
||||
}
|
||||
continue
|
||||
return false
|
||||
}
|
||||
|
||||
ch := bs.csh.getColumnHeader(fieldName)
|
||||
@ -94,12 +95,12 @@ func (fa *filterAnd) matchBloomFilters(bs *blockSearch) bool {
|
||||
}
|
||||
|
||||
if ch.valueType == valueTypeDict {
|
||||
if !matchDictValuesByAllTokens(ch.valuesDict.values, tokens) {
|
||||
return false
|
||||
if matchDictValuesByAllTokens(ch.valuesDict.values, tokens) {
|
||||
continue
|
||||
}
|
||||
continue
|
||||
return false
|
||||
}
|
||||
if !matchBloomFilterAllTokens(bs, ch, tokens) {
|
||||
if !matchBloomFilterAllTokens(bs, ch, ft.tokensHashes) {
|
||||
return false
|
||||
}
|
||||
}
|
||||
@ -170,8 +171,9 @@ func (fa *filterAnd) initByFieldTokens() {
|
||||
}
|
||||
|
||||
byFieldTokens = append(byFieldTokens, fieldTokens{
|
||||
field: fieldName,
|
||||
tokens: tokens,
|
||||
field: fieldName,
|
||||
tokens: tokens,
|
||||
tokensHashes: appendTokensHashes(nil, tokens),
|
||||
})
|
||||
}
|
||||
|
||||
|
@ -24,11 +24,9 @@ type filterAnyCasePhrase struct {
|
||||
phraseUppercaseOnce sync.Once
|
||||
phraseUppercase string
|
||||
|
||||
tokensOnce sync.Once
|
||||
tokens []string
|
||||
|
||||
tokensUppercaseOnce sync.Once
|
||||
tokensUppercase []string
|
||||
tokensOnce sync.Once
|
||||
tokensHashes []uint64
|
||||
tokensHashesUppercase []uint64
|
||||
}
|
||||
|
||||
func (fp *filterAnyCasePhrase) String() string {
|
||||
@ -39,27 +37,25 @@ func (fp *filterAnyCasePhrase) updateNeededFields(neededFields fieldsSet) {
|
||||
neededFields.add(fp.fieldName)
|
||||
}
|
||||
|
||||
func (fp *filterAnyCasePhrase) getTokens() []string {
|
||||
func (fp *filterAnyCasePhrase) getTokensHashes() []uint64 {
|
||||
fp.tokensOnce.Do(fp.initTokens)
|
||||
return fp.tokens
|
||||
return fp.tokensHashes
|
||||
}
|
||||
|
||||
func (fp *filterAnyCasePhrase) getTokensHashesUppercase() []uint64 {
|
||||
fp.tokensOnce.Do(fp.initTokens)
|
||||
return fp.tokensHashesUppercase
|
||||
}
|
||||
|
||||
func (fp *filterAnyCasePhrase) initTokens() {
|
||||
fp.tokens = tokenizeStrings(nil, []string{fp.phrase})
|
||||
}
|
||||
tokens := tokenizeStrings(nil, []string{fp.phrase})
|
||||
fp.tokensHashes = appendTokensHashes(nil, tokens)
|
||||
|
||||
func (fp *filterAnyCasePhrase) getTokensUppercase() []string {
|
||||
fp.tokensUppercaseOnce.Do(fp.initTokensUppercase)
|
||||
return fp.tokensUppercase
|
||||
}
|
||||
|
||||
func (fp *filterAnyCasePhrase) initTokensUppercase() {
|
||||
tokens := fp.getTokens()
|
||||
tokensUppercase := make([]string, len(tokens))
|
||||
for i, token := range tokens {
|
||||
tokensUppercase[i] = strings.ToUpper(token)
|
||||
}
|
||||
fp.tokensUppercase = tokensUppercase
|
||||
fp.tokensHashesUppercase = appendTokensHashes(nil, tokensUppercase)
|
||||
}
|
||||
|
||||
func (fp *filterAnyCasePhrase) getPhraseLowercase() string {
|
||||
@ -109,7 +105,7 @@ func (fp *filterAnyCasePhrase) applyToBlockSearch(bs *blockSearch, bm *bitmap) {
|
||||
return
|
||||
}
|
||||
|
||||
tokens := fp.getTokens()
|
||||
tokens := fp.getTokensHashes()
|
||||
|
||||
switch ch.valueType {
|
||||
case valueTypeString:
|
||||
@ -130,7 +126,7 @@ func (fp *filterAnyCasePhrase) applyToBlockSearch(bs *blockSearch, bm *bitmap) {
|
||||
matchIPv4ByPhrase(bs, ch, bm, phraseLowercase, tokens)
|
||||
case valueTypeTimestampISO8601:
|
||||
phraseUppercase := fp.getPhraseUppercase()
|
||||
tokensUppercase := fp.getTokensUppercase()
|
||||
tokensUppercase := fp.getTokensHashesUppercase()
|
||||
matchTimestampISO8601ByPhrase(bs, ch, bm, phraseUppercase, tokensUppercase)
|
||||
default:
|
||||
logger.Panicf("FATAL: %s: unknown valueType=%d", bs.partPath(), ch.valueType)
|
||||
|
@ -25,11 +25,9 @@ type filterAnyCasePrefix struct {
|
||||
prefixUppercaseOnce sync.Once
|
||||
prefixUppercase string
|
||||
|
||||
tokensOnce sync.Once
|
||||
tokens []string
|
||||
|
||||
tokensUppercaseOnce sync.Once
|
||||
tokensUppercase []string
|
||||
tokensOnce sync.Once
|
||||
tokensHashes []uint64
|
||||
tokensUppercaseHashes []uint64
|
||||
}
|
||||
|
||||
func (fp *filterAnyCasePrefix) String() string {
|
||||
@ -43,27 +41,25 @@ func (fp *filterAnyCasePrefix) updateNeededFields(neededFields fieldsSet) {
|
||||
neededFields.add(fp.fieldName)
|
||||
}
|
||||
|
||||
func (fp *filterAnyCasePrefix) getTokens() []string {
|
||||
func (fp *filterAnyCasePrefix) getTokensHashes() []uint64 {
|
||||
fp.tokensOnce.Do(fp.initTokens)
|
||||
return fp.tokens
|
||||
return fp.tokensHashes
|
||||
}
|
||||
|
||||
func (fp *filterAnyCasePrefix) getTokensUppercaseHashes() []uint64 {
|
||||
fp.tokensOnce.Do(fp.initTokens)
|
||||
return fp.tokensUppercaseHashes
|
||||
}
|
||||
|
||||
func (fp *filterAnyCasePrefix) initTokens() {
|
||||
fp.tokens = getTokensSkipLast(fp.prefix)
|
||||
}
|
||||
tokens := getTokensSkipLast(fp.prefix)
|
||||
fp.tokensHashes = appendTokensHashes(nil, tokens)
|
||||
|
||||
func (fp *filterAnyCasePrefix) getTokensUppercase() []string {
|
||||
fp.tokensUppercaseOnce.Do(fp.initTokensUppercase)
|
||||
return fp.tokensUppercase
|
||||
}
|
||||
|
||||
func (fp *filterAnyCasePrefix) initTokensUppercase() {
|
||||
tokens := fp.getTokens()
|
||||
tokensUppercase := make([]string, len(tokens))
|
||||
for i, token := range tokens {
|
||||
tokensUppercase[i] = strings.ToUpper(token)
|
||||
}
|
||||
fp.tokensUppercase = tokensUppercase
|
||||
fp.tokensUppercaseHashes = appendTokensHashes(nil, tokensUppercase)
|
||||
}
|
||||
|
||||
func (fp *filterAnyCasePrefix) getPrefixLowercase() string {
|
||||
@ -110,7 +106,7 @@ func (fp *filterAnyCasePrefix) applyToBlockSearch(bs *blockSearch, bm *bitmap) {
|
||||
return
|
||||
}
|
||||
|
||||
tokens := fp.getTokens()
|
||||
tokens := fp.getTokensHashes()
|
||||
|
||||
switch ch.valueType {
|
||||
case valueTypeString:
|
||||
@ -131,7 +127,7 @@ func (fp *filterAnyCasePrefix) applyToBlockSearch(bs *blockSearch, bm *bitmap) {
|
||||
matchIPv4ByPrefix(bs, ch, bm, prefixLowercase, tokens)
|
||||
case valueTypeTimestampISO8601:
|
||||
prefixUppercase := fp.getPrefixUppercase()
|
||||
tokensUppercase := fp.getTokensUppercase()
|
||||
tokensUppercase := fp.getTokensUppercaseHashes()
|
||||
matchTimestampISO8601ByPrefix(bs, ch, bm, prefixUppercase, tokensUppercase)
|
||||
default:
|
||||
logger.Panicf("FATAL: %s: unknown valueType=%d", bs.partPath(), ch.valueType)
|
||||
|
@ -16,8 +16,9 @@ type filterExact struct {
|
||||
fieldName string
|
||||
value string
|
||||
|
||||
tokensOnce sync.Once
|
||||
tokens []string
|
||||
tokensOnce sync.Once
|
||||
tokens []string
|
||||
tokensHashes []uint64
|
||||
}
|
||||
|
||||
func (fe *filterExact) String() string {
|
||||
@ -33,8 +34,14 @@ func (fe *filterExact) getTokens() []string {
|
||||
return fe.tokens
|
||||
}
|
||||
|
||||
func (fe *filterExact) getTokensHashes() []uint64 {
|
||||
fe.tokensOnce.Do(fe.initTokens)
|
||||
return fe.tokensHashes
|
||||
}
|
||||
|
||||
func (fe *filterExact) initTokens() {
|
||||
fe.tokens = tokenizeStrings(nil, []string{fe.value})
|
||||
fe.tokensHashes = appendTokensHashes(nil, fe.tokens)
|
||||
}
|
||||
|
||||
func (fe *filterExact) applyToBlockResult(br *blockResult, bm *bitmap) {
|
||||
@ -186,7 +193,7 @@ func (fe *filterExact) applyToBlockSearch(bs *blockSearch, bm *bitmap) {
|
||||
return
|
||||
}
|
||||
|
||||
tokens := fe.getTokens()
|
||||
tokens := fe.getTokensHashes()
|
||||
|
||||
switch ch.valueType {
|
||||
case valueTypeString:
|
||||
@ -212,7 +219,7 @@ func (fe *filterExact) applyToBlockSearch(bs *blockSearch, bm *bitmap) {
|
||||
}
|
||||
}
|
||||
|
||||
func matchTimestampISO8601ByExactValue(bs *blockSearch, ch *columnHeader, bm *bitmap, value string, tokens []string) {
|
||||
func matchTimestampISO8601ByExactValue(bs *blockSearch, ch *columnHeader, bm *bitmap, value string, tokens []uint64) {
|
||||
n, ok := tryParseTimestampISO8601(value)
|
||||
if !ok || n < int64(ch.minValue) || n > int64(ch.maxValue) {
|
||||
bm.resetBits()
|
||||
@ -224,7 +231,7 @@ func matchTimestampISO8601ByExactValue(bs *blockSearch, ch *columnHeader, bm *bi
|
||||
bbPool.Put(bb)
|
||||
}
|
||||
|
||||
func matchIPv4ByExactValue(bs *blockSearch, ch *columnHeader, bm *bitmap, value string, tokens []string) {
|
||||
func matchIPv4ByExactValue(bs *blockSearch, ch *columnHeader, bm *bitmap, value string, tokens []uint64) {
|
||||
n, ok := tryParseIPv4(value)
|
||||
if !ok || uint64(n) < ch.minValue || uint64(n) > ch.maxValue {
|
||||
bm.resetBits()
|
||||
@ -236,7 +243,7 @@ func matchIPv4ByExactValue(bs *blockSearch, ch *columnHeader, bm *bitmap, value
|
||||
bbPool.Put(bb)
|
||||
}
|
||||
|
||||
func matchFloat64ByExactValue(bs *blockSearch, ch *columnHeader, bm *bitmap, value string, tokens []string) {
|
||||
func matchFloat64ByExactValue(bs *blockSearch, ch *columnHeader, bm *bitmap, value string, tokens []uint64) {
|
||||
f, ok := tryParseFloat64(value)
|
||||
if !ok || f < math.Float64frombits(ch.minValue) || f > math.Float64frombits(ch.maxValue) {
|
||||
bm.resetBits()
|
||||
@ -262,7 +269,7 @@ func matchValuesDictByExactValue(bs *blockSearch, ch *columnHeader, bm *bitmap,
|
||||
bbPool.Put(bb)
|
||||
}
|
||||
|
||||
func matchStringByExactValue(bs *blockSearch, ch *columnHeader, bm *bitmap, value string, tokens []string) {
|
||||
func matchStringByExactValue(bs *blockSearch, ch *columnHeader, bm *bitmap, value string, tokens []uint64) {
|
||||
if !matchBloomFilterAllTokens(bs, ch, tokens) {
|
||||
bm.resetBits()
|
||||
return
|
||||
@ -272,7 +279,7 @@ func matchStringByExactValue(bs *blockSearch, ch *columnHeader, bm *bitmap, valu
|
||||
})
|
||||
}
|
||||
|
||||
func matchUint8ByExactValue(bs *blockSearch, ch *columnHeader, bm *bitmap, phrase string, tokens []string) {
|
||||
func matchUint8ByExactValue(bs *blockSearch, ch *columnHeader, bm *bitmap, phrase string, tokens []uint64) {
|
||||
n, ok := tryParseUint64(phrase)
|
||||
if !ok || n < ch.minValue || n > ch.maxValue {
|
||||
bm.resetBits()
|
||||
@ -284,7 +291,7 @@ func matchUint8ByExactValue(bs *blockSearch, ch *columnHeader, bm *bitmap, phras
|
||||
bbPool.Put(bb)
|
||||
}
|
||||
|
||||
func matchUint16ByExactValue(bs *blockSearch, ch *columnHeader, bm *bitmap, phrase string, tokens []string) {
|
||||
func matchUint16ByExactValue(bs *blockSearch, ch *columnHeader, bm *bitmap, phrase string, tokens []uint64) {
|
||||
n, ok := tryParseUint64(phrase)
|
||||
if !ok || n < ch.minValue || n > ch.maxValue {
|
||||
bm.resetBits()
|
||||
@ -296,7 +303,7 @@ func matchUint16ByExactValue(bs *blockSearch, ch *columnHeader, bm *bitmap, phra
|
||||
bbPool.Put(bb)
|
||||
}
|
||||
|
||||
func matchUint32ByExactValue(bs *blockSearch, ch *columnHeader, bm *bitmap, phrase string, tokens []string) {
|
||||
func matchUint32ByExactValue(bs *blockSearch, ch *columnHeader, bm *bitmap, phrase string, tokens []uint64) {
|
||||
n, ok := tryParseUint64(phrase)
|
||||
if !ok || n < ch.minValue || n > ch.maxValue {
|
||||
bm.resetBits()
|
||||
@ -308,7 +315,7 @@ func matchUint32ByExactValue(bs *blockSearch, ch *columnHeader, bm *bitmap, phra
|
||||
bbPool.Put(bb)
|
||||
}
|
||||
|
||||
func matchUint64ByExactValue(bs *blockSearch, ch *columnHeader, bm *bitmap, phrase string, tokens []string) {
|
||||
func matchUint64ByExactValue(bs *blockSearch, ch *columnHeader, bm *bitmap, phrase string, tokens []uint64) {
|
||||
n, ok := tryParseUint64(phrase)
|
||||
if !ok || n < ch.minValue || n > ch.maxValue {
|
||||
bm.resetBits()
|
||||
@ -320,7 +327,7 @@ func matchUint64ByExactValue(bs *blockSearch, ch *columnHeader, bm *bitmap, phra
|
||||
bbPool.Put(bb)
|
||||
}
|
||||
|
||||
func matchBinaryValue(bs *blockSearch, ch *columnHeader, bm *bitmap, binValue []byte, tokens []string) {
|
||||
func matchBinaryValue(bs *blockSearch, ch *columnHeader, bm *bitmap, binValue []byte, tokens []uint64) {
|
||||
if !matchBloomFilterAllTokens(bs, ch, tokens) {
|
||||
bm.resetBits()
|
||||
return
|
||||
|
@ -15,8 +15,9 @@ type filterExactPrefix struct {
|
||||
fieldName string
|
||||
prefix string
|
||||
|
||||
tokensOnce sync.Once
|
||||
tokens []string
|
||||
tokensOnce sync.Once
|
||||
tokens []string
|
||||
tokensHashes []uint64
|
||||
}
|
||||
|
||||
func (fep *filterExactPrefix) String() string {
|
||||
@ -32,8 +33,14 @@ func (fep *filterExactPrefix) getTokens() []string {
|
||||
return fep.tokens
|
||||
}
|
||||
|
||||
func (fep *filterExactPrefix) getTokensHashes() []uint64 {
|
||||
fep.tokensOnce.Do(fep.initTokens)
|
||||
return fep.tokensHashes
|
||||
}
|
||||
|
||||
func (fep *filterExactPrefix) initTokens() {
|
||||
fep.tokens = getTokensSkipLast(fep.prefix)
|
||||
fep.tokensHashes = appendTokensHashes(nil, fep.tokens)
|
||||
}
|
||||
|
||||
func (fep *filterExactPrefix) applyToBlockResult(br *blockResult, bm *bitmap) {
|
||||
@ -62,7 +69,7 @@ func (fep *filterExactPrefix) applyToBlockSearch(bs *blockSearch, bm *bitmap) {
|
||||
return
|
||||
}
|
||||
|
||||
tokens := fep.getTokens()
|
||||
tokens := fep.getTokensHashes()
|
||||
|
||||
switch ch.valueType {
|
||||
case valueTypeString:
|
||||
@ -88,7 +95,7 @@ func (fep *filterExactPrefix) applyToBlockSearch(bs *blockSearch, bm *bitmap) {
|
||||
}
|
||||
}
|
||||
|
||||
func matchTimestampISO8601ByExactPrefix(bs *blockSearch, ch *columnHeader, bm *bitmap, prefix string, tokens []string) {
|
||||
func matchTimestampISO8601ByExactPrefix(bs *blockSearch, ch *columnHeader, bm *bitmap, prefix string, tokens []uint64) {
|
||||
if prefix == "" {
|
||||
return
|
||||
}
|
||||
@ -105,11 +112,11 @@ func matchTimestampISO8601ByExactPrefix(bs *blockSearch, ch *columnHeader, bm *b
|
||||
bbPool.Put(bb)
|
||||
}
|
||||
|
||||
func matchIPv4ByExactPrefix(bs *blockSearch, ch *columnHeader, bm *bitmap, prefix string, tokens []string) {
|
||||
func matchIPv4ByExactPrefix(bs *blockSearch, ch *columnHeader, bm *bitmap, prefix string, tokens []uint64) {
|
||||
if prefix == "" {
|
||||
return
|
||||
}
|
||||
if prefix < "0" || prefix > "9" || len(tokens) > 3 || !matchBloomFilterAllTokens(bs, ch, tokens) {
|
||||
if prefix < "0" || prefix > "9" || len(tokens) > 3*bloomFilterHashesCount || !matchBloomFilterAllTokens(bs, ch, tokens) {
|
||||
bm.resetBits()
|
||||
return
|
||||
}
|
||||
@ -122,12 +129,12 @@ func matchIPv4ByExactPrefix(bs *blockSearch, ch *columnHeader, bm *bitmap, prefi
|
||||
bbPool.Put(bb)
|
||||
}
|
||||
|
||||
func matchFloat64ByExactPrefix(bs *blockSearch, ch *columnHeader, bm *bitmap, prefix string, tokens []string) {
|
||||
func matchFloat64ByExactPrefix(bs *blockSearch, ch *columnHeader, bm *bitmap, prefix string, tokens []uint64) {
|
||||
if prefix == "" {
|
||||
// An empty prefix matches all the values
|
||||
return
|
||||
}
|
||||
if len(tokens) > 2 || !matchBloomFilterAllTokens(bs, ch, tokens) {
|
||||
if len(tokens) > 2*bloomFilterHashesCount || !matchBloomFilterAllTokens(bs, ch, tokens) {
|
||||
bm.resetBits()
|
||||
return
|
||||
}
|
||||
@ -153,7 +160,7 @@ func matchValuesDictByExactPrefix(bs *blockSearch, ch *columnHeader, bm *bitmap,
|
||||
bbPool.Put(bb)
|
||||
}
|
||||
|
||||
func matchStringByExactPrefix(bs *blockSearch, ch *columnHeader, bm *bitmap, prefix string, tokens []string) {
|
||||
func matchStringByExactPrefix(bs *blockSearch, ch *columnHeader, bm *bitmap, prefix string, tokens []uint64) {
|
||||
if !matchBloomFilterAllTokens(bs, ch, tokens) {
|
||||
bm.resetBits()
|
||||
return
|
||||
@ -163,7 +170,7 @@ func matchStringByExactPrefix(bs *blockSearch, ch *columnHeader, bm *bitmap, pre
|
||||
})
|
||||
}
|
||||
|
||||
func matchUint8ByExactPrefix(bs *blockSearch, ch *columnHeader, bm *bitmap, prefix string, tokens []string) {
|
||||
func matchUint8ByExactPrefix(bs *blockSearch, ch *columnHeader, bm *bitmap, prefix string, tokens []uint64) {
|
||||
if !matchMinMaxExactPrefix(ch, bm, prefix, tokens) {
|
||||
return
|
||||
}
|
||||
@ -176,7 +183,7 @@ func matchUint8ByExactPrefix(bs *blockSearch, ch *columnHeader, bm *bitmap, pref
|
||||
bbPool.Put(bb)
|
||||
}
|
||||
|
||||
func matchUint16ByExactPrefix(bs *blockSearch, ch *columnHeader, bm *bitmap, prefix string, tokens []string) {
|
||||
func matchUint16ByExactPrefix(bs *blockSearch, ch *columnHeader, bm *bitmap, prefix string, tokens []uint64) {
|
||||
if !matchMinMaxExactPrefix(ch, bm, prefix, tokens) {
|
||||
return
|
||||
}
|
||||
@ -189,7 +196,7 @@ func matchUint16ByExactPrefix(bs *blockSearch, ch *columnHeader, bm *bitmap, pre
|
||||
bbPool.Put(bb)
|
||||
}
|
||||
|
||||
func matchUint32ByExactPrefix(bs *blockSearch, ch *columnHeader, bm *bitmap, prefix string, tokens []string) {
|
||||
func matchUint32ByExactPrefix(bs *blockSearch, ch *columnHeader, bm *bitmap, prefix string, tokens []uint64) {
|
||||
if !matchMinMaxExactPrefix(ch, bm, prefix, tokens) {
|
||||
return
|
||||
}
|
||||
@ -202,7 +209,7 @@ func matchUint32ByExactPrefix(bs *blockSearch, ch *columnHeader, bm *bitmap, pre
|
||||
bbPool.Put(bb)
|
||||
}
|
||||
|
||||
func matchUint64ByExactPrefix(bs *blockSearch, ch *columnHeader, bm *bitmap, prefix string, tokens []string) {
|
||||
func matchUint64ByExactPrefix(bs *blockSearch, ch *columnHeader, bm *bitmap, prefix string, tokens []uint64) {
|
||||
if !matchMinMaxExactPrefix(ch, bm, prefix, tokens) {
|
||||
return
|
||||
}
|
||||
@ -215,7 +222,7 @@ func matchUint64ByExactPrefix(bs *blockSearch, ch *columnHeader, bm *bitmap, pre
|
||||
bbPool.Put(bb)
|
||||
}
|
||||
|
||||
func matchMinMaxExactPrefix(ch *columnHeader, bm *bitmap, prefix string, tokens []string) bool {
|
||||
func matchMinMaxExactPrefix(ch *columnHeader, bm *bitmap, prefix string, tokens []uint64) bool {
|
||||
if prefix == "" {
|
||||
// An empty prefix matches all the values
|
||||
return false
|
||||
|
@ -28,9 +28,9 @@ type filterIn struct {
|
||||
// qFieldName must be set to field name for obtaining values from if q is non-nil.
|
||||
qFieldName string
|
||||
|
||||
tokensOnce sync.Once
|
||||
commonTokens []string
|
||||
tokenSets [][]string
|
||||
tokensOnce sync.Once
|
||||
commonTokensHashes []uint64
|
||||
tokenSetsHashes [][]uint64
|
||||
|
||||
stringValuesOnce sync.Once
|
||||
stringValues map[string]struct{}
|
||||
@ -76,16 +76,21 @@ func (fi *filterIn) updateNeededFields(neededFields fieldsSet) {
|
||||
neededFields.add(fi.fieldName)
|
||||
}
|
||||
|
||||
func (fi *filterIn) getTokens() ([]string, [][]string) {
|
||||
func (fi *filterIn) getTokensHashes() ([]uint64, [][]uint64) {
|
||||
fi.tokensOnce.Do(fi.initTokens)
|
||||
return fi.commonTokens, fi.tokenSets
|
||||
return fi.commonTokensHashes, fi.tokenSetsHashes
|
||||
}
|
||||
|
||||
func (fi *filterIn) initTokens() {
|
||||
commonTokens, tokenSets := getCommonTokensAndTokenSets(fi.values)
|
||||
|
||||
fi.commonTokens = commonTokens
|
||||
fi.tokenSets = tokenSets
|
||||
fi.commonTokensHashes = appendTokensHashes(nil, commonTokens)
|
||||
|
||||
tokenSetsHashes := make([][]uint64, len(tokenSets))
|
||||
for i, tokens := range tokenSets {
|
||||
tokenSetsHashes[i] = appendTokensHashes(nil, tokens)
|
||||
}
|
||||
fi.tokenSetsHashes = tokenSetsHashes
|
||||
}
|
||||
|
||||
func (fi *filterIn) getStringValues() map[string]struct{} {
|
||||
@ -374,7 +379,7 @@ func (fi *filterIn) applyToBlockSearch(bs *blockSearch, bm *bitmap) {
|
||||
return
|
||||
}
|
||||
|
||||
commonTokens, tokenSets := fi.getTokens()
|
||||
commonTokens, tokenSets := fi.getTokensHashes()
|
||||
|
||||
switch ch.valueType {
|
||||
case valueTypeString:
|
||||
@ -409,7 +414,7 @@ func (fi *filterIn) applyToBlockSearch(bs *blockSearch, bm *bitmap) {
|
||||
}
|
||||
}
|
||||
|
||||
func matchAnyValue(bs *blockSearch, ch *columnHeader, bm *bitmap, values map[string]struct{}, commonTokens []string, tokenSets [][]string) {
|
||||
func matchAnyValue(bs *blockSearch, ch *columnHeader, bm *bitmap, values map[string]struct{}, commonTokens []uint64, tokenSets [][]uint64) {
|
||||
if len(values) == 0 {
|
||||
bm.resetBits()
|
||||
return
|
||||
@ -424,7 +429,7 @@ func matchAnyValue(bs *blockSearch, ch *columnHeader, bm *bitmap, values map[str
|
||||
})
|
||||
}
|
||||
|
||||
func matchBloomFilterAnyTokenSet(bs *blockSearch, ch *columnHeader, commonTokens []string, tokenSets [][]string) bool {
|
||||
func matchBloomFilterAnyTokenSet(bs *blockSearch, ch *columnHeader, commonTokens []uint64, tokenSets [][]uint64) bool {
|
||||
if len(commonTokens) > 0 {
|
||||
if !matchBloomFilterAllTokens(bs, ch, commonTokens) {
|
||||
return false
|
||||
@ -511,6 +516,9 @@ func getCommonTokens(tokenSets [][]string) []string {
|
||||
}
|
||||
}
|
||||
}
|
||||
if len(m) == 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
tokens := make([]string, 0, len(m))
|
||||
for token := range m {
|
||||
|
@ -89,9 +89,9 @@ func (fo *filterOr) matchBloomFilters(bs *blockSearch) bool {
|
||||
return true
|
||||
}
|
||||
|
||||
for _, fieldTokens := range byFieldTokens {
|
||||
fieldName := fieldTokens.field
|
||||
tokens := fieldTokens.tokens
|
||||
for _, ft := range byFieldTokens {
|
||||
fieldName := ft.field
|
||||
tokens := ft.tokens
|
||||
|
||||
v := bs.csh.getConstColumnValue(fieldName)
|
||||
if v != "" {
|
||||
@ -112,7 +112,7 @@ func (fo *filterOr) matchBloomFilters(bs *blockSearch) bool {
|
||||
}
|
||||
continue
|
||||
}
|
||||
if matchBloomFilterAllTokens(bs, ch, tokens) {
|
||||
if matchBloomFilterAllTokens(bs, ch, ft.tokensHashes) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
@ -190,8 +190,9 @@ func (fo *filterOr) initByFieldTokens() {
|
||||
break
|
||||
}
|
||||
byFieldTokens = append(byFieldTokens, fieldTokens{
|
||||
field: fieldName,
|
||||
tokens: commonTokens,
|
||||
field: fieldName,
|
||||
tokens: commonTokens,
|
||||
tokensHashes: appendTokensHashes(nil, commonTokens),
|
||||
})
|
||||
}
|
||||
|
||||
|
@ -24,8 +24,9 @@ type filterPhrase struct {
|
||||
fieldName string
|
||||
phrase string
|
||||
|
||||
tokensOnce sync.Once
|
||||
tokens []string
|
||||
tokensOnce sync.Once
|
||||
tokens []string
|
||||
tokensHashes []uint64
|
||||
}
|
||||
|
||||
func (fp *filterPhrase) String() string {
|
||||
@ -41,8 +42,14 @@ func (fp *filterPhrase) getTokens() []string {
|
||||
return fp.tokens
|
||||
}
|
||||
|
||||
func (fp *filterPhrase) getTokensHashes() []uint64 {
|
||||
fp.tokensOnce.Do(fp.initTokens)
|
||||
return fp.tokensHashes
|
||||
}
|
||||
|
||||
func (fp *filterPhrase) initTokens() {
|
||||
fp.tokens = tokenizeStrings(nil, []string{fp.phrase})
|
||||
fp.tokensHashes = appendTokensHashes(nil, fp.tokens)
|
||||
}
|
||||
|
||||
func (fp *filterPhrase) applyToBlockResult(br *blockResult, bm *bitmap) {
|
||||
@ -73,7 +80,7 @@ func (fp *filterPhrase) applyToBlockSearch(bs *blockSearch, bm *bitmap) {
|
||||
return
|
||||
}
|
||||
|
||||
tokens := fp.getTokens()
|
||||
tokens := fp.getTokensHashes()
|
||||
|
||||
switch ch.valueType {
|
||||
case valueTypeString:
|
||||
@ -99,7 +106,7 @@ func (fp *filterPhrase) applyToBlockSearch(bs *blockSearch, bm *bitmap) {
|
||||
}
|
||||
}
|
||||
|
||||
func matchTimestampISO8601ByPhrase(bs *blockSearch, ch *columnHeader, bm *bitmap, phrase string, tokens []string) {
|
||||
func matchTimestampISO8601ByPhrase(bs *blockSearch, ch *columnHeader, bm *bitmap, phrase string, tokens []uint64) {
|
||||
_, ok := tryParseTimestampISO8601(phrase)
|
||||
if ok {
|
||||
// Fast path - the phrase contains complete timestamp, so we can use exact search
|
||||
@ -121,7 +128,7 @@ func matchTimestampISO8601ByPhrase(bs *blockSearch, ch *columnHeader, bm *bitmap
|
||||
bbPool.Put(bb)
|
||||
}
|
||||
|
||||
func matchIPv4ByPhrase(bs *blockSearch, ch *columnHeader, bm *bitmap, phrase string, tokens []string) {
|
||||
func matchIPv4ByPhrase(bs *blockSearch, ch *columnHeader, bm *bitmap, phrase string, tokens []uint64) {
|
||||
_, ok := tryParseIPv4(phrase)
|
||||
if ok {
|
||||
// Fast path - phrase contains the full IP address, so we can use exact matching
|
||||
@ -145,7 +152,7 @@ func matchIPv4ByPhrase(bs *blockSearch, ch *columnHeader, bm *bitmap, phrase str
|
||||
bbPool.Put(bb)
|
||||
}
|
||||
|
||||
func matchFloat64ByPhrase(bs *blockSearch, ch *columnHeader, bm *bitmap, phrase string, tokens []string) {
|
||||
func matchFloat64ByPhrase(bs *blockSearch, ch *columnHeader, bm *bitmap, phrase string, tokens []uint64) {
|
||||
// The phrase may contain a part of the floating-point number.
|
||||
// For example, `foo:"123"` must match `123`, `123.456` and `-0.123`.
|
||||
// This means we cannot search in binary representation of floating-point numbers.
|
||||
@ -187,7 +194,7 @@ func matchValuesDictByPhrase(bs *blockSearch, ch *columnHeader, bm *bitmap, phra
|
||||
bbPool.Put(bb)
|
||||
}
|
||||
|
||||
func matchStringByPhrase(bs *blockSearch, ch *columnHeader, bm *bitmap, phrase string, tokens []string) {
|
||||
func matchStringByPhrase(bs *blockSearch, ch *columnHeader, bm *bitmap, phrase string, tokens []uint64) {
|
||||
if !matchBloomFilterAllTokens(bs, ch, tokens) {
|
||||
bm.resetBits()
|
||||
return
|
||||
@ -288,7 +295,7 @@ func visitValues(bs *blockSearch, ch *columnHeader, bm *bitmap, f func(value str
|
||||
})
|
||||
}
|
||||
|
||||
func matchBloomFilterAllTokens(bs *blockSearch, ch *columnHeader, tokens []string) bool {
|
||||
func matchBloomFilterAllTokens(bs *blockSearch, ch *columnHeader, tokens []uint64) bool {
|
||||
if len(tokens) == 0 {
|
||||
return true
|
||||
}
|
||||
|
@ -19,8 +19,9 @@ type filterPrefix struct {
|
||||
fieldName string
|
||||
prefix string
|
||||
|
||||
tokensOnce sync.Once
|
||||
tokens []string
|
||||
tokensOnce sync.Once
|
||||
tokens []string
|
||||
tokensHashes []uint64
|
||||
}
|
||||
|
||||
func (fp *filterPrefix) String() string {
|
||||
@ -39,8 +40,14 @@ func (fp *filterPrefix) getTokens() []string {
|
||||
return fp.tokens
|
||||
}
|
||||
|
||||
func (fp *filterPrefix) getTokensHashes() []uint64 {
|
||||
fp.tokensOnce.Do(fp.initTokens)
|
||||
return fp.tokensHashes
|
||||
}
|
||||
|
||||
func (fp *filterPrefix) initTokens() {
|
||||
fp.tokens = getTokensSkipLast(fp.prefix)
|
||||
fp.tokensHashes = appendTokensHashes(nil, fp.tokens)
|
||||
}
|
||||
|
||||
func (fp *filterPrefix) applyToBlockResult(bs *blockResult, bm *bitmap) {
|
||||
@ -68,7 +75,7 @@ func (fp *filterPrefix) applyToBlockSearch(bs *blockSearch, bm *bitmap) {
|
||||
return
|
||||
}
|
||||
|
||||
tokens := fp.getTokens()
|
||||
tokens := fp.getTokensHashes()
|
||||
|
||||
switch ch.valueType {
|
||||
case valueTypeString:
|
||||
@ -94,7 +101,7 @@ func (fp *filterPrefix) applyToBlockSearch(bs *blockSearch, bm *bitmap) {
|
||||
}
|
||||
}
|
||||
|
||||
func matchTimestampISO8601ByPrefix(bs *blockSearch, ch *columnHeader, bm *bitmap, prefix string, tokens []string) {
|
||||
func matchTimestampISO8601ByPrefix(bs *blockSearch, ch *columnHeader, bm *bitmap, prefix string, tokens []uint64) {
|
||||
if prefix == "" {
|
||||
// Fast path - all the timestamp values match an empty prefix aka `*`
|
||||
return
|
||||
@ -115,7 +122,7 @@ func matchTimestampISO8601ByPrefix(bs *blockSearch, ch *columnHeader, bm *bitmap
|
||||
bbPool.Put(bb)
|
||||
}
|
||||
|
||||
func matchIPv4ByPrefix(bs *blockSearch, ch *columnHeader, bm *bitmap, prefix string, tokens []string) {
|
||||
func matchIPv4ByPrefix(bs *blockSearch, ch *columnHeader, bm *bitmap, prefix string, tokens []uint64) {
|
||||
if prefix == "" {
|
||||
// Fast path - all the ipv4 values match an empty prefix aka `*`
|
||||
return
|
||||
@ -136,7 +143,7 @@ func matchIPv4ByPrefix(bs *blockSearch, ch *columnHeader, bm *bitmap, prefix str
|
||||
bbPool.Put(bb)
|
||||
}
|
||||
|
||||
func matchFloat64ByPrefix(bs *blockSearch, ch *columnHeader, bm *bitmap, prefix string, tokens []string) {
|
||||
func matchFloat64ByPrefix(bs *blockSearch, ch *columnHeader, bm *bitmap, prefix string, tokens []uint64) {
|
||||
if prefix == "" {
|
||||
// Fast path - all the float64 values match an empty prefix aka `*`
|
||||
return
|
||||
@ -177,7 +184,7 @@ func matchValuesDictByPrefix(bs *blockSearch, ch *columnHeader, bm *bitmap, pref
|
||||
bbPool.Put(bb)
|
||||
}
|
||||
|
||||
func matchStringByPrefix(bs *blockSearch, ch *columnHeader, bm *bitmap, prefix string, tokens []string) {
|
||||
func matchStringByPrefix(bs *blockSearch, ch *columnHeader, bm *bitmap, prefix string, tokens []uint64) {
|
||||
if !matchBloomFilterAllTokens(bs, ch, tokens) {
|
||||
bm.resetBits()
|
||||
return
|
||||
|
@ -16,8 +16,9 @@ type filterRegexp struct {
|
||||
fieldName string
|
||||
re *regexutil.Regex
|
||||
|
||||
tokens []string
|
||||
tokensOnce sync.Once
|
||||
tokensOnce sync.Once
|
||||
tokens []string
|
||||
tokensHashes []uint64
|
||||
}
|
||||
|
||||
func (fr *filterRegexp) String() string {
|
||||
@ -33,12 +34,18 @@ func (fr *filterRegexp) getTokens() []string {
|
||||
return fr.tokens
|
||||
}
|
||||
|
||||
func (fr *filterRegexp) getTokensHashes() []uint64 {
|
||||
fr.tokensOnce.Do(fr.initTokens)
|
||||
return fr.tokensHashes
|
||||
}
|
||||
|
||||
func (fr *filterRegexp) initTokens() {
|
||||
literals := fr.re.GetLiterals()
|
||||
for i, literal := range literals {
|
||||
literals[i] = skipFirstLastToken(literal)
|
||||
}
|
||||
fr.tokens = tokenizeStrings(nil, literals)
|
||||
fr.tokensHashes = appendTokensHashes(nil, fr.tokens)
|
||||
}
|
||||
|
||||
func skipFirstLastToken(s string) string {
|
||||
@ -89,7 +96,7 @@ func (fr *filterRegexp) applyToBlockSearch(bs *blockSearch, bm *bitmap) {
|
||||
return
|
||||
}
|
||||
|
||||
tokens := fr.getTokens()
|
||||
tokens := fr.getTokensHashes()
|
||||
|
||||
switch ch.valueType {
|
||||
case valueTypeString:
|
||||
@ -115,7 +122,7 @@ func (fr *filterRegexp) applyToBlockSearch(bs *blockSearch, bm *bitmap) {
|
||||
}
|
||||
}
|
||||
|
||||
func matchTimestampISO8601ByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap, re *regexutil.Regex, tokens []string) {
|
||||
func matchTimestampISO8601ByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap, re *regexutil.Regex, tokens []uint64) {
|
||||
if !matchBloomFilterAllTokens(bs, ch, tokens) {
|
||||
bm.resetBits()
|
||||
return
|
||||
@ -128,7 +135,7 @@ func matchTimestampISO8601ByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap
|
||||
bbPool.Put(bb)
|
||||
}
|
||||
|
||||
func matchIPv4ByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap, re *regexutil.Regex, tokens []string) {
|
||||
func matchIPv4ByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap, re *regexutil.Regex, tokens []uint64) {
|
||||
if !matchBloomFilterAllTokens(bs, ch, tokens) {
|
||||
bm.resetBits()
|
||||
return
|
||||
@ -141,7 +148,7 @@ func matchIPv4ByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap, re *regexu
|
||||
bbPool.Put(bb)
|
||||
}
|
||||
|
||||
func matchFloat64ByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap, re *regexutil.Regex, tokens []string) {
|
||||
func matchFloat64ByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap, re *regexutil.Regex, tokens []uint64) {
|
||||
if !matchBloomFilterAllTokens(bs, ch, tokens) {
|
||||
bm.resetBits()
|
||||
return
|
||||
@ -167,7 +174,7 @@ func matchValuesDictByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap, re *
|
||||
bbPool.Put(bb)
|
||||
}
|
||||
|
||||
func matchStringByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap, re *regexutil.Regex, tokens []string) {
|
||||
func matchStringByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap, re *regexutil.Regex, tokens []uint64) {
|
||||
if !matchBloomFilterAllTokens(bs, ch, tokens) {
|
||||
bm.resetBits()
|
||||
return
|
||||
@ -177,7 +184,7 @@ func matchStringByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap, re *rege
|
||||
})
|
||||
}
|
||||
|
||||
func matchUint8ByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap, re *regexutil.Regex, tokens []string) {
|
||||
func matchUint8ByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap, re *regexutil.Regex, tokens []uint64) {
|
||||
if !matchBloomFilterAllTokens(bs, ch, tokens) {
|
||||
bm.resetBits()
|
||||
return
|
||||
@ -190,7 +197,7 @@ func matchUint8ByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap, re *regex
|
||||
bbPool.Put(bb)
|
||||
}
|
||||
|
||||
func matchUint16ByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap, re *regexutil.Regex, tokens []string) {
|
||||
func matchUint16ByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap, re *regexutil.Regex, tokens []uint64) {
|
||||
if !matchBloomFilterAllTokens(bs, ch, tokens) {
|
||||
bm.resetBits()
|
||||
return
|
||||
@ -203,7 +210,7 @@ func matchUint16ByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap, re *rege
|
||||
bbPool.Put(bb)
|
||||
}
|
||||
|
||||
func matchUint32ByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap, re *regexutil.Regex, tokens []string) {
|
||||
func matchUint32ByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap, re *regexutil.Regex, tokens []uint64) {
|
||||
if !matchBloomFilterAllTokens(bs, ch, tokens) {
|
||||
bm.resetBits()
|
||||
return
|
||||
@ -216,7 +223,7 @@ func matchUint32ByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap, re *rege
|
||||
bbPool.Put(bb)
|
||||
}
|
||||
|
||||
func matchUint64ByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap, re *regexutil.Regex, tokens []string) {
|
||||
func matchUint64ByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap, re *regexutil.Regex, tokens []uint64) {
|
||||
if !matchBloomFilterAllTokens(bs, ch, tokens) {
|
||||
bm.resetBits()
|
||||
return
|
||||
|
@ -15,8 +15,9 @@ type filterSequence struct {
|
||||
fieldName string
|
||||
phrases []string
|
||||
|
||||
tokensOnce sync.Once
|
||||
tokens []string
|
||||
tokensOnce sync.Once
|
||||
tokens []string
|
||||
tokensHashes []uint64
|
||||
|
||||
nonEmptyPhrasesOnce sync.Once
|
||||
nonEmptyPhrases []string
|
||||
@ -40,10 +41,15 @@ func (fs *filterSequence) getTokens() []string {
|
||||
return fs.tokens
|
||||
}
|
||||
|
||||
func (fs *filterSequence) getTokensHashes() []uint64 {
|
||||
fs.tokensOnce.Do(fs.initTokens)
|
||||
return fs.tokensHashes
|
||||
}
|
||||
|
||||
func (fs *filterSequence) initTokens() {
|
||||
phrases := fs.getNonEmptyPhrases()
|
||||
tokens := tokenizeStrings(nil, phrases)
|
||||
fs.tokens = tokens
|
||||
fs.tokens = tokenizeStrings(nil, phrases)
|
||||
fs.tokensHashes = appendTokensHashes(nil, fs.tokens)
|
||||
}
|
||||
|
||||
func (fs *filterSequence) getNonEmptyPhrases() []string {
|
||||
@ -100,7 +106,7 @@ func (fs *filterSequence) applyToBlockSearch(bs *blockSearch, bm *bitmap) {
|
||||
return
|
||||
}
|
||||
|
||||
tokens := fs.getTokens()
|
||||
tokens := fs.getTokensHashes()
|
||||
|
||||
switch ch.valueType {
|
||||
case valueTypeString:
|
||||
@ -126,7 +132,7 @@ func (fs *filterSequence) applyToBlockSearch(bs *blockSearch, bm *bitmap) {
|
||||
}
|
||||
}
|
||||
|
||||
func matchTimestampISO8601BySequence(bs *blockSearch, ch *columnHeader, bm *bitmap, phrases, tokens []string) {
|
||||
func matchTimestampISO8601BySequence(bs *blockSearch, ch *columnHeader, bm *bitmap, phrases []string, tokens []uint64) {
|
||||
if len(phrases) == 1 {
|
||||
matchTimestampISO8601ByPhrase(bs, ch, bm, phrases[0], tokens)
|
||||
return
|
||||
@ -145,7 +151,7 @@ func matchTimestampISO8601BySequence(bs *blockSearch, ch *columnHeader, bm *bitm
|
||||
bbPool.Put(bb)
|
||||
}
|
||||
|
||||
func matchIPv4BySequence(bs *blockSearch, ch *columnHeader, bm *bitmap, phrases, tokens []string) {
|
||||
func matchIPv4BySequence(bs *blockSearch, ch *columnHeader, bm *bitmap, phrases []string, tokens []uint64) {
|
||||
if len(phrases) == 1 {
|
||||
matchIPv4ByPhrase(bs, ch, bm, phrases[0], tokens)
|
||||
return
|
||||
@ -166,7 +172,7 @@ func matchIPv4BySequence(bs *blockSearch, ch *columnHeader, bm *bitmap, phrases,
|
||||
bbPool.Put(bb)
|
||||
}
|
||||
|
||||
func matchFloat64BySequence(bs *blockSearch, ch *columnHeader, bm *bitmap, phrases, tokens []string) {
|
||||
func matchFloat64BySequence(bs *blockSearch, ch *columnHeader, bm *bitmap, phrases []string, tokens []uint64) {
|
||||
if !matchBloomFilterAllTokens(bs, ch, tokens) {
|
||||
bm.resetBits()
|
||||
return
|
||||
@ -197,7 +203,7 @@ func matchValuesDictBySequence(bs *blockSearch, ch *columnHeader, bm *bitmap, ph
|
||||
bbPool.Put(bb)
|
||||
}
|
||||
|
||||
func matchStringBySequence(bs *blockSearch, ch *columnHeader, bm *bitmap, phrases []string, tokens []string) {
|
||||
func matchStringBySequence(bs *blockSearch, ch *columnHeader, bm *bitmap, phrases []string, tokens []uint64) {
|
||||
if !matchBloomFilterAllTokens(bs, ch, tokens) {
|
||||
bm.resetBits()
|
||||
return
|
||||
@ -207,7 +213,7 @@ func matchStringBySequence(bs *blockSearch, ch *columnHeader, bm *bitmap, phrase
|
||||
})
|
||||
}
|
||||
|
||||
func matchUint8BySequence(bs *blockSearch, ch *columnHeader, bm *bitmap, phrases, tokens []string) {
|
||||
func matchUint8BySequence(bs *blockSearch, ch *columnHeader, bm *bitmap, phrases []string, tokens []uint64) {
|
||||
if len(phrases) > 1 {
|
||||
bm.resetBits()
|
||||
return
|
||||
@ -215,7 +221,7 @@ func matchUint8BySequence(bs *blockSearch, ch *columnHeader, bm *bitmap, phrases
|
||||
matchUint8ByExactValue(bs, ch, bm, phrases[0], tokens)
|
||||
}
|
||||
|
||||
func matchUint16BySequence(bs *blockSearch, ch *columnHeader, bm *bitmap, phrases, tokens []string) {
|
||||
func matchUint16BySequence(bs *blockSearch, ch *columnHeader, bm *bitmap, phrases []string, tokens []uint64) {
|
||||
if len(phrases) > 1 {
|
||||
bm.resetBits()
|
||||
return
|
||||
@ -223,7 +229,7 @@ func matchUint16BySequence(bs *blockSearch, ch *columnHeader, bm *bitmap, phrase
|
||||
matchUint16ByExactValue(bs, ch, bm, phrases[0], tokens)
|
||||
}
|
||||
|
||||
func matchUint32BySequence(bs *blockSearch, ch *columnHeader, bm *bitmap, phrases, tokens []string) {
|
||||
func matchUint32BySequence(bs *blockSearch, ch *columnHeader, bm *bitmap, phrases []string, tokens []uint64) {
|
||||
if len(phrases) > 1 {
|
||||
bm.resetBits()
|
||||
return
|
||||
@ -231,7 +237,7 @@ func matchUint32BySequence(bs *blockSearch, ch *columnHeader, bm *bitmap, phrase
|
||||
matchUint32ByExactValue(bs, ch, bm, phrases[0], tokens)
|
||||
}
|
||||
|
||||
func matchUint64BySequence(bs *blockSearch, ch *columnHeader, bm *bitmap, phrases, tokens []string) {
|
||||
func matchUint64BySequence(bs *blockSearch, ch *columnHeader, bm *bitmap, phrases []string, tokens []uint64) {
|
||||
if len(phrases) > 1 {
|
||||
bm.resetBits()
|
||||
return
|
||||
|
Loading…
Reference in New Issue
Block a user