VictoriaMetrics/lib/logstorage/storage_search.go

1292 lines
32 KiB
Go
Raw Normal View History

package logstorage
import (
"context"
2024-05-20 04:08:30 +02:00
"fmt"
"math"
"slices"
"sort"
2024-05-20 04:08:30 +02:00
"strings"
"sync"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/cgroup"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/encoding"
2024-05-20 04:08:30 +02:00
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
)
// genericSearchOptions contain options used for search.
type genericSearchOptions struct {
// tenantIDs must contain the list of tenantIDs for the search.
tenantIDs []TenantID
2024-06-20 03:08:07 +02:00
// streamIDs is an optional sorted list of streamIDs for the search.
// If it is empty, then the search is performed by tenantIDs
streamIDs []streamID
// minTimestamp is the minimum timestamp for the search
minTimestamp int64
// maxTimestamp is the maximum timestamp for the search
maxTimestamp int64
// filter is the filter to use for the search
filter filter
// neededColumnNames contains names of columns to return in the result
neededColumnNames []string
// unneededColumnNames contains names of columns, which mustn't be returned in the result.
//
// This list is consulted if needAllColumns=true
unneededColumnNames []string
// needAllColumns is set to true when all the columns except of unneededColumnNames must be returned in the result
needAllColumns bool
}
type searchOptions struct {
// Optional sorted list of tenantIDs for the search.
// If it is empty, then the search is performed by streamIDs
tenantIDs []TenantID
// Optional sorted list of streamIDs for the search.
// If it is empty, then the search is performed by tenantIDs
streamIDs []streamID
// minTimestamp is the minimum timestamp for the search
minTimestamp int64
// maxTimestamp is the maximum timestamp for the search
maxTimestamp int64
// filter is the filter to use for the search
filter filter
// neededColumnNames contains names of columns to return in the result
neededColumnNames []string
// unneededColumnNames contains names of columns, which mustn't be returned in the result.
//
// This list is consulted when needAllColumns=true.
unneededColumnNames []string
// needAllColumns is set to true when all the columns except of unneededColumnNames must be returned in the result
needAllColumns bool
}
2024-05-20 04:08:30 +02:00
// WriteBlockFunc must write a block with the given timestamps and columns.
//
// WriteBlockFunc cannot hold references to timestamps and columns after returning.
type WriteBlockFunc func(workerID uint, timestamps []int64, columns []BlockColumn)
// RunQuery runs the given q and calls writeBlock for results.
2024-05-20 04:08:30 +02:00
func (s *Storage) RunQuery(ctx context.Context, tenantIDs []TenantID, q *Query, writeBlock WriteBlockFunc) error {
writeBlockResult := func(workerID uint, br *blockResult) {
if br.rowsLen == 0 {
2024-05-20 04:08:30 +02:00
return
}
brs := getBlockRows()
csDst := brs.cs
2024-05-20 04:08:30 +02:00
cs := br.getColumns()
for _, c := range cs {
values := c.getValues(br)
csDst = append(csDst, BlockColumn{
Name: c.name,
Values: values,
})
}
timestamps := br.getTimestamps()
writeBlock(workerID, timestamps, csDst)
brs.cs = csDst
putBlockRows(brs)
2024-05-20 04:08:30 +02:00
}
return s.runQuery(ctx, tenantIDs, q, writeBlockResult)
2024-05-20 04:08:30 +02:00
}
2024-05-20 04:08:30 +02:00
func (s *Storage) runQuery(ctx context.Context, tenantIDs []TenantID, q *Query, writeBlockResultFunc func(workerID uint, br *blockResult)) error {
qNew, err := s.initFilterInValues(ctx, tenantIDs, q)
if err != nil {
return err
}
qNew, err = s.initJoinMaps(ctx, tenantIDs, qNew)
if err != nil {
return err
}
q = qNew
2024-06-24 23:27:12 +02:00
streamIDs := q.getStreamIDs()
sort.Slice(streamIDs, func(i, j int) bool {
return streamIDs[i].less(&streamIDs[j])
})
minTimestamp, maxTimestamp := q.GetFilterTimeRange()
2024-05-20 04:08:30 +02:00
neededColumnNames, unneededColumnNames := q.getNeededColumns()
so := &genericSearchOptions{
tenantIDs: tenantIDs,
2024-06-20 03:08:07 +02:00
streamIDs: streamIDs,
minTimestamp: minTimestamp,
maxTimestamp: maxTimestamp,
2024-05-20 04:08:30 +02:00
filter: q.f,
neededColumnNames: neededColumnNames,
unneededColumnNames: unneededColumnNames,
needAllColumns: slices.Contains(neededColumnNames, "*"),
}
workersCount := cgroup.AvailableCPUs()
ppMain := newDefaultPipeProcessor(writeBlockResultFunc)
pp := ppMain
stopCh := ctx.Done()
cancels := make([]func(), len(q.pipes))
pps := make([]pipeProcessor, len(q.pipes))
var errPipe error
for i := len(q.pipes) - 1; i >= 0; i-- {
p := q.pipes[i]
ctxChild, cancel := context.WithCancel(ctx)
pp = p.newPipeProcessor(workersCount, stopCh, cancel, pp)
pcp, ok := pp.(*pipeStreamContextProcessor)
if ok {
pcp.init(s, neededColumnNames, unneededColumnNames)
if i > 0 {
errPipe = fmt.Errorf("[%s] pipe must go after [%s] filter; now it goes after the [%s] pipe", p, q.f, q.pipes[i-1])
}
}
stopCh = ctxChild.Done()
ctx = ctxChild
cancels[i] = cancel
pps[i] = pp
}
if errPipe == nil {
s.search(workersCount, so, stopCh, pp.writeBlock)
}
var errFlush error
for i, pp := range pps {
if err := pp.flush(); err != nil && errFlush == nil {
errFlush = err
}
cancel := cancels[i]
cancel()
}
if err := ppMain.flush(); err != nil && errFlush == nil {
errFlush = err
}
if errPipe != nil {
return errPipe
}
return errFlush
}
2024-05-20 04:08:30 +02:00
// GetFieldNames returns field names from q results for the given tenantIDs.
2024-05-24 03:06:55 +02:00
func (s *Storage) GetFieldNames(ctx context.Context, tenantIDs []TenantID, q *Query) ([]ValueWithHits, error) {
2024-05-20 04:08:30 +02:00
pipes := append([]pipe{}, q.pipes...)
2024-05-24 03:06:55 +02:00
pipeStr := "field_names"
2024-05-20 04:08:30 +02:00
lex := newLexer(pipeStr)
2024-05-22 21:01:20 +02:00
2024-05-20 04:08:30 +02:00
pf, err := parsePipeFieldNames(lex)
if err != nil {
2024-05-22 21:01:20 +02:00
logger.Panicf("BUG: unexpected error when parsing 'field_names' pipe at [%s]: %s", pipeStr, err)
2024-05-20 04:08:30 +02:00
}
pf.isFirstPipe = len(pipes) == 0
2024-05-22 21:01:20 +02:00
2024-05-24 03:06:55 +02:00
if !lex.isEnd() {
logger.Panicf("BUG: unexpected tail left after parsing pipes [%s]: %q", pipeStr, lex.s)
}
pipes = append(pipes, pf)
q = &Query{
f: q.f,
pipes: pipes,
2024-05-22 21:01:20 +02:00
}
2024-05-24 03:06:55 +02:00
return s.runValuesWithHitsQuery(ctx, tenantIDs, q)
}
func (s *Storage) getJoinMap(ctx context.Context, tenantIDs []TenantID, q *Query, byFields []string) (map[string][][]Field, error) {
// TODO: track memory usage
m := make(map[string][][]Field)
var mLock sync.Mutex
writeBlockResult := func(_ uint, br *blockResult) {
if br.rowsLen == 0 {
return
}
cs := br.getColumns()
columnNames := make([]string, len(cs))
for i := range cs {
columnNames[i] = strings.Clone(cs[i].name)
}
byValues := make([]string, len(byFields))
var tmpBuf []byte
for rowIdx := 0; rowIdx < br.rowsLen; rowIdx++ {
fields := make([]Field, 0, len(cs))
clear(byValues)
for j := range cs {
name := columnNames[j]
v := cs[j].getValueAtRow(br, rowIdx)
if cIdx := slices.Index(byFields, name); cIdx >= 0 {
byValues[cIdx] = v
continue
}
if v == "" {
continue
}
fields = append(fields, Field{
Name: name,
Value: strings.Clone(v),
})
}
tmpBuf = marshalStrings(tmpBuf[:0], byValues)
k := string(tmpBuf)
mLock.Lock()
m[k] = append(m[k], fields)
mLock.Unlock()
}
}
if err := s.runQuery(ctx, tenantIDs, q, writeBlockResult); err != nil {
return nil, err
}
return m, nil
}
func marshalStrings(dst []byte, a []string) []byte {
for _, v := range a {
dst = encoding.MarshalBytes(dst, bytesutil.ToUnsafeBytes(v))
}
return dst
}
2024-05-24 03:06:55 +02:00
func (s *Storage) getFieldValuesNoHits(ctx context.Context, tenantIDs []TenantID, q *Query, fieldName string) ([]string, error) {
// TODO: track memory usage
2024-05-24 03:06:55 +02:00
pipes := append([]pipe{}, q.pipes...)
quotedFieldName := quoteTokenIfNeeded(fieldName)
pipeStr := fmt.Sprintf("uniq by (%s)", quotedFieldName)
lex := newLexer(pipeStr)
pu, err := parsePipeUniq(lex)
2024-05-22 21:01:20 +02:00
if err != nil {
2024-05-24 03:06:55 +02:00
logger.Panicf("BUG: unexpected error when parsing 'uniq' pipe at [%s]: %s", pipeStr, err)
2024-05-22 21:01:20 +02:00
}
2024-05-24 03:06:55 +02:00
2024-05-22 21:01:20 +02:00
if !lex.isEnd() {
logger.Panicf("BUG: unexpected tail left after parsing pipes [%s]: %q", pipeStr, lex.s)
}
2024-05-24 03:06:55 +02:00
pipes = append(pipes, pu)
2024-05-20 04:08:30 +02:00
q = &Query{
f: q.f,
pipes: pipes,
}
2024-05-24 03:06:55 +02:00
var values []string
var valuesLock sync.Mutex
writeBlockResult := func(_ uint, br *blockResult) {
if br.rowsLen == 0 {
2024-05-24 03:06:55 +02:00
return
}
cs := br.getColumns()
if len(cs) != 1 {
logger.Panicf("BUG: expecting one column; got %d columns", len(cs))
}
columnValues := cs[0].getValues(br)
columnValuesCopy := make([]string, len(columnValues))
for i := range columnValues {
columnValuesCopy[i] = strings.Clone(columnValues[i])
}
valuesLock.Lock()
values = append(values, columnValuesCopy...)
valuesLock.Unlock()
}
if err := s.runQuery(ctx, tenantIDs, q, writeBlockResult); err != nil {
return nil, err
}
return values, nil
2024-05-20 04:08:30 +02:00
}
2024-05-24 03:06:55 +02:00
// GetFieldValues returns unique values with the number of hits for the given fieldName returned by q for the given tenantIDs.
2024-05-20 04:08:30 +02:00
//
2024-05-22 21:01:20 +02:00
// If limit > 0, then up to limit unique values are returned.
2024-05-24 03:06:55 +02:00
func (s *Storage) GetFieldValues(ctx context.Context, tenantIDs []TenantID, q *Query, fieldName string, limit uint64) ([]ValueWithHits, error) {
2024-05-22 21:01:20 +02:00
pipes := append([]pipe{}, q.pipes...)
quotedFieldName := quoteTokenIfNeeded(fieldName)
2024-05-28 19:29:41 +02:00
pipeStr := fmt.Sprintf("field_values %s limit %d", quotedFieldName, limit)
2024-05-22 21:01:20 +02:00
lex := newLexer(pipeStr)
2024-05-20 04:08:30 +02:00
2024-05-28 19:29:41 +02:00
pu, err := parsePipeFieldValues(lex)
2024-05-22 21:01:20 +02:00
if err != nil {
2024-05-28 19:29:41 +02:00
logger.Panicf("BUG: unexpected error when parsing 'field_values' pipe at [%s]: %s", pipeStr, err)
2024-05-22 21:01:20 +02:00
}
2024-05-20 04:08:30 +02:00
2024-05-22 21:01:20 +02:00
if !lex.isEnd() {
logger.Panicf("BUG: unexpected tail left after parsing pipes [%s]: %q", pipeStr, lex.s)
}
2024-05-24 03:06:55 +02:00
pipes = append(pipes, pu)
2024-05-22 21:01:20 +02:00
q = &Query{
f: q.f,
pipes: pipes,
2024-05-20 04:08:30 +02:00
}
2024-05-24 03:06:55 +02:00
return s.runValuesWithHitsQuery(ctx, tenantIDs, q)
}
// ValueWithHits contains value and hits.
type ValueWithHits struct {
Value string
Hits uint64
}
func toValuesWithHits(m map[string]*uint64) []ValueWithHits {
results := make([]ValueWithHits, 0, len(m))
for k, pHits := range m {
results = append(results, ValueWithHits{
Value: k,
Hits: *pHits,
})
}
sortValuesWithHits(results)
return results
}
func sortValuesWithHits(results []ValueWithHits) {
slices.SortFunc(results, func(a, b ValueWithHits) int {
if a.Hits == b.Hits {
if a.Value == b.Value {
return 0
}
if lessString(a.Value, b.Value) {
return -1
}
return 1
}
// Sort in descending order of hits
if a.Hits < b.Hits {
return 1
}
return -1
})
2024-05-20 04:08:30 +02:00
}
2024-05-25 21:36:16 +02:00
// GetStreamFieldNames returns stream field names from q results for the given tenantIDs.
func (s *Storage) GetStreamFieldNames(ctx context.Context, tenantIDs []TenantID, q *Query) ([]ValueWithHits, error) {
2024-05-22 21:01:20 +02:00
streams, err := s.GetStreams(ctx, tenantIDs, q, math.MaxUint64)
if err != nil {
return nil, err
2024-05-20 04:08:30 +02:00
}
2024-05-22 21:01:20 +02:00
2024-05-24 03:06:55 +02:00
m := make(map[string]*uint64)
2024-05-25 21:36:16 +02:00
forEachStreamField(streams, func(f Field, hits uint64) {
pHits := m[f.Name]
if pHits == nil {
2024-05-25 21:36:16 +02:00
nameCopy := strings.Clone(f.Name)
2024-05-24 03:06:55 +02:00
hitsLocal := uint64(0)
pHits = &hitsLocal
m[nameCopy] = pHits
2024-05-22 21:01:20 +02:00
}
2024-05-24 03:06:55 +02:00
*pHits += hits
2024-05-22 21:01:20 +02:00
})
2024-05-24 03:06:55 +02:00
names := toValuesWithHits(m)
2024-05-22 21:01:20 +02:00
return names, nil
}
2024-05-25 21:36:16 +02:00
// GetStreamFieldValues returns stream field values for the given fieldName from q results for the given tenantIDs.
2024-05-22 21:01:20 +02:00
//
2024-05-25 21:36:16 +02:00
// If limit > 9, then up to limit unique values are returned.
func (s *Storage) GetStreamFieldValues(ctx context.Context, tenantIDs []TenantID, q *Query, fieldName string, limit uint64) ([]ValueWithHits, error) {
2024-05-22 21:01:20 +02:00
streams, err := s.GetStreams(ctx, tenantIDs, q, math.MaxUint64)
if err != nil {
return nil, err
2024-05-20 04:08:30 +02:00
}
2024-05-22 21:01:20 +02:00
2024-05-24 03:06:55 +02:00
m := make(map[string]*uint64)
2024-05-25 21:36:16 +02:00
forEachStreamField(streams, func(f Field, hits uint64) {
if f.Name != fieldName {
2024-05-22 21:01:20 +02:00
return
}
pHits := m[f.Value]
if pHits == nil {
2024-05-25 21:36:16 +02:00
valueCopy := strings.Clone(f.Value)
2024-05-24 03:06:55 +02:00
hitsLocal := uint64(0)
pHits = &hitsLocal
m[valueCopy] = pHits
2024-05-22 21:01:20 +02:00
}
2024-05-24 03:06:55 +02:00
*pHits += hits
2024-05-22 21:01:20 +02:00
})
2024-05-24 03:06:55 +02:00
values := toValuesWithHits(m)
if limit > 0 && uint64(len(values)) > limit {
2024-05-22 21:01:20 +02:00
values = values[:limit]
}
return values, nil
}
// GetStreams returns streams from q results for the given tenantIDs.
//
// If limit > 0, then up to limit unique streams are returned.
2024-05-24 03:06:55 +02:00
func (s *Storage) GetStreams(ctx context.Context, tenantIDs []TenantID, q *Query, limit uint64) ([]ValueWithHits, error) {
2024-05-22 21:01:20 +02:00
return s.GetFieldValues(ctx, tenantIDs, q, "_stream", limit)
2024-05-20 04:08:30 +02:00
}
2024-06-27 14:18:42 +02:00
// GetStreamIDs returns stream_id field values from q results for the given tenantIDs.
//
// If limit > 0, then up to limit unique streams are returned.
func (s *Storage) GetStreamIDs(ctx context.Context, tenantIDs []TenantID, q *Query, limit uint64) ([]ValueWithHits, error) {
return s.GetFieldValues(ctx, tenantIDs, q, "_stream_id", limit)
}
2024-05-24 03:06:55 +02:00
func (s *Storage) runValuesWithHitsQuery(ctx context.Context, tenantIDs []TenantID, q *Query) ([]ValueWithHits, error) {
var results []ValueWithHits
var resultsLock sync.Mutex
writeBlockResult := func(_ uint, br *blockResult) {
if br.rowsLen == 0 {
2024-05-20 04:08:30 +02:00
return
}
cs := br.getColumns()
2024-05-24 03:06:55 +02:00
if len(cs) != 2 {
logger.Panicf("BUG: expecting two columns; got %d columns", len(cs))
2024-05-20 04:08:30 +02:00
}
2024-05-24 03:06:55 +02:00
columnValues := cs[0].getValues(br)
columnHits := cs[1].getValues(br)
valuesWithHits := make([]ValueWithHits, len(columnValues))
for i := range columnValues {
x := &valuesWithHits[i]
hits, _ := tryParseUint64(columnHits[i])
x.Value = strings.Clone(columnValues[i])
x.Hits = hits
2024-05-20 04:08:30 +02:00
}
2024-05-24 03:06:55 +02:00
resultsLock.Lock()
results = append(results, valuesWithHits...)
resultsLock.Unlock()
2024-05-20 04:08:30 +02:00
}
err := s.runQuery(ctx, tenantIDs, q, writeBlockResult)
if err != nil {
return nil, err
}
2024-05-24 03:06:55 +02:00
sortValuesWithHits(results)
2024-05-20 04:08:30 +02:00
2024-05-24 03:06:55 +02:00
return results, nil
2024-05-20 04:08:30 +02:00
}
func (s *Storage) initFilterInValues(ctx context.Context, tenantIDs []TenantID, q *Query) (*Query, error) {
if !hasFilterInWithQueryForFilter(q.f) && !hasFilterInWithQueryForPipes(q.pipes) {
return q, nil
}
getFieldValues := func(q *Query, fieldName string) ([]string, error) {
2024-05-24 03:06:55 +02:00
return s.getFieldValuesNoHits(ctx, tenantIDs, q, fieldName)
2024-05-20 04:08:30 +02:00
}
cache := make(map[string][]string)
fNew, err := initFilterInValuesForFilter(cache, q.f, getFieldValues)
if err != nil {
return nil, err
}
pipesNew, err := initFilterInValuesForPipes(cache, q.pipes, getFieldValues)
if err != nil {
return nil, err
}
qNew := &Query{
f: fNew,
pipes: pipesNew,
}
return qNew, nil
}
type getJoinMapFunc func(q *Query, byFields []string) (map[string][][]Field, error)
func (s *Storage) initJoinMaps(ctx context.Context, tenantIDs []TenantID, q *Query) (*Query, error) {
if !hasJoinPipes(q.pipes) {
return q, nil
}
getJoinMap := func(q *Query, byFields []string) (map[string][][]Field, error) {
return s.getJoinMap(ctx, tenantIDs, q, byFields)
}
pipesNew := make([]pipe, len(q.pipes))
for i := range q.pipes {
p := q.pipes[i]
if pj, ok := p.(*pipeJoin); ok {
pNew, err := pj.initJoinMap(getJoinMap)
if err != nil {
return nil, err
}
p = pNew
}
pipesNew[i] = p
}
qNew := &Query{
f: q.f,
pipes: pipesNew,
}
return qNew, nil
}
func hasJoinPipes(pipes []pipe) bool {
for _, p := range pipes {
if _, ok := p.(*pipeJoin); ok {
return true
}
}
return false
}
2024-05-22 21:01:20 +02:00
func (iff *ifFilter) hasFilterInWithQuery() bool {
if iff == nil {
return false
}
return hasFilterInWithQueryForFilter(iff.f)
}
2024-05-20 04:08:30 +02:00
func hasFilterInWithQueryForFilter(f filter) bool {
2024-05-22 21:01:20 +02:00
if f == nil {
return false
}
2024-05-20 04:08:30 +02:00
visitFunc := func(f filter) bool {
2024-06-24 23:27:12 +02:00
switch t := f.(type) {
case *filterIn:
return t.needExecuteQuery
case *filterStreamID:
return t.needExecuteQuery
default:
return false
}
2024-05-20 04:08:30 +02:00
}
return visitFilter(f, visitFunc)
}
func hasFilterInWithQueryForPipes(pipes []pipe) bool {
for _, p := range pipes {
2024-05-25 21:36:16 +02:00
if p.hasFilterInWithQuery() {
return true
2024-05-20 04:08:30 +02:00
}
}
return false
}
type getFieldValuesFunc func(q *Query, fieldName string) ([]string, error)
2024-05-22 21:01:20 +02:00
func (iff *ifFilter) initFilterInValues(cache map[string][]string, getFieldValuesFunc getFieldValuesFunc) (*ifFilter, error) {
if iff == nil {
return nil, nil
}
f, err := initFilterInValuesForFilter(cache, iff.f, getFieldValuesFunc)
if err != nil {
return nil, err
}
iffNew := *iff
iffNew.f = f
return &iffNew, nil
}
2024-05-20 04:08:30 +02:00
func initFilterInValuesForFilter(cache map[string][]string, f filter, getFieldValuesFunc getFieldValuesFunc) (filter, error) {
2024-05-22 21:01:20 +02:00
if f == nil {
return nil, nil
}
2024-05-20 04:08:30 +02:00
visitFunc := func(f filter) bool {
2024-06-24 23:27:12 +02:00
switch t := f.(type) {
case *filterIn:
return t.needExecuteQuery
case *filterStreamID:
return t.needExecuteQuery
default:
return false
}
2024-05-20 04:08:30 +02:00
}
copyFunc := func(f filter) (filter, error) {
2024-06-24 23:27:12 +02:00
switch t := f.(type) {
case *filterIn:
values, err := getValuesForQuery(t.q, t.qFieldName, cache, getFieldValuesFunc)
if err != nil {
return nil, fmt.Errorf("cannot obtain unique values for %s: %w", t, err)
}
2024-05-20 04:08:30 +02:00
2024-06-24 23:27:12 +02:00
fiNew := &filterIn{
fieldName: t.fieldName,
q: t.q,
values: values,
}
return fiNew, nil
case *filterStreamID:
values, err := getValuesForQuery(t.q, t.qFieldName, cache, getFieldValuesFunc)
2024-05-20 04:08:30 +02:00
if err != nil {
2024-06-24 23:27:12 +02:00
return nil, fmt.Errorf("cannot obtain unique values for %s: %w", t, err)
2024-05-20 04:08:30 +02:00
}
2024-06-24 23:27:12 +02:00
// convert values to streamID list
streamIDs := make([]streamID, 0, len(values))
for _, v := range values {
var sid streamID
if sid.tryUnmarshalFromString(v) {
streamIDs = append(streamIDs, sid)
}
}
fsNew := &filterStreamID{
streamIDs: streamIDs,
q: t.q,
}
return fsNew, nil
default:
return f, nil
2024-05-20 04:08:30 +02:00
}
}
return copyFilter(f, visitFunc, copyFunc)
}
2024-06-24 23:27:12 +02:00
func getValuesForQuery(q *Query, qFieldName string, cache map[string][]string, getFieldValuesFunc getFieldValuesFunc) ([]string, error) {
qStr := q.String()
values, ok := cache[qStr]
if ok {
return values, nil
}
vs, err := getFieldValuesFunc(q, qFieldName)
if err != nil {
return nil, err
}
cache[qStr] = vs
return vs, nil
}
2024-05-20 04:08:30 +02:00
func initFilterInValuesForPipes(cache map[string][]string, pipes []pipe, getFieldValuesFunc getFieldValuesFunc) ([]pipe, error) {
pipesNew := make([]pipe, len(pipes))
for i, p := range pipes {
2024-05-25 21:36:16 +02:00
pNew, err := p.initFilterInValues(cache, getFieldValuesFunc)
if err != nil {
return nil, err
2024-05-20 04:08:30 +02:00
}
2024-05-25 21:36:16 +02:00
pipesNew[i] = pNew
2024-05-20 04:08:30 +02:00
}
return pipesNew, nil
}
type blockRows struct {
cs []BlockColumn
}
func (brs *blockRows) reset() {
cs := brs.cs
for i := range cs {
cs[i].reset()
}
brs.cs = cs[:0]
}
func getBlockRows() *blockRows {
v := blockRowsPool.Get()
if v == nil {
return &blockRows{}
}
return v.(*blockRows)
}
func putBlockRows(brs *blockRows) {
brs.reset()
blockRowsPool.Put(brs)
}
var blockRowsPool sync.Pool
// BlockColumn is a single column of a block of data
type BlockColumn struct {
// Name is the column name
Name string
// Values is column values
Values []string
}
func (c *BlockColumn) reset() {
c.Name = ""
c.Values = nil
}
// searchResultFunc must process sr.
//
// The callback is called at the worker with the given workerID.
type searchResultFunc func(workerID uint, br *blockResult)
// search searches for the matching rows according to so.
//
2024-05-20 04:08:30 +02:00
// It calls processBlockResult for each matching block.
func (s *Storage) search(workersCount int, so *genericSearchOptions, stopCh <-chan struct{}, processBlockResult searchResultFunc) {
// Spin up workers
var wgWorkers sync.WaitGroup
workCh := make(chan *blockSearchWorkBatch, workersCount)
wgWorkers.Add(workersCount)
for i := 0; i < workersCount; i++ {
go func(workerID uint) {
bs := getBlockSearch()
2024-05-20 04:08:30 +02:00
bm := getBitmap(0)
for bswb := range workCh {
bsws := bswb.bsws
for i := range bsws {
bsw := &bsws[i]
if needStop(stopCh) {
// The search has been canceled. Just skip all the scheduled work in order to save CPU time.
bsw.reset()
continue
}
2024-05-20 04:08:30 +02:00
bs.search(bsw, bm)
if bs.br.rowsLen > 0 {
processBlockResult(workerID, &bs.br)
}
bsw.reset()
}
bswb.bsws = bswb.bsws[:0]
putBlockSearchWorkBatch(bswb)
}
putBlockSearch(bs)
2024-05-20 04:08:30 +02:00
putBitmap(bm)
wgWorkers.Done()
}(uint(i))
}
// Select partitions according to the selected time range
s.partitionsLock.Lock()
ptws := s.partitions
minDay := so.minTimestamp / nsecsPerDay
n := sort.Search(len(ptws), func(i int) bool {
return ptws[i].day >= minDay
})
ptws = ptws[n:]
maxDay := so.maxTimestamp / nsecsPerDay
n = sort.Search(len(ptws), func(i int) bool {
return ptws[i].day > maxDay
})
ptws = ptws[:n]
// Copy the selected partitions, so they don't interfere with s.partitions.
ptws = append([]*partitionWrapper{}, ptws...)
for _, ptw := range ptws {
ptw.incRef()
}
s.partitionsLock.Unlock()
// Obtain common filterStream from f
sf, f := getCommonStreamFilter(so.filter)
// Schedule concurrent search across matching partitions.
psfs := make([]partitionSearchFinalizer, len(ptws))
var wgSearchers sync.WaitGroup
for i, ptw := range ptws {
partitionSearchConcurrencyLimitCh <- struct{}{}
wgSearchers.Add(1)
go func(idx int, pt *partition) {
psfs[idx] = pt.search(sf, f, so, workCh, stopCh)
wgSearchers.Done()
<-partitionSearchConcurrencyLimitCh
}(i, ptw.pt)
}
wgSearchers.Wait()
// Wait until workers finish their work
close(workCh)
wgWorkers.Wait()
// Finalize partition search
for _, psf := range psfs {
psf()
}
// Decrement references to partitions
for _, ptw := range ptws {
ptw.decRef()
}
}
// partitionSearchConcurrencyLimitCh limits the number of concurrent searches in partition.
//
// This is needed for limiting memory usage under high load.
var partitionSearchConcurrencyLimitCh = make(chan struct{}, cgroup.AvailableCPUs())
type partitionSearchFinalizer func()
func (pt *partition) search(sf *StreamFilter, f filter, so *genericSearchOptions, workCh chan<- *blockSearchWorkBatch, stopCh <-chan struct{}) partitionSearchFinalizer {
if needStop(stopCh) {
// Do not spend CPU time on search, since it is already stopped.
return func() {}
}
tenantIDs := so.tenantIDs
var streamIDs []streamID
if sf != nil {
streamIDs = pt.idb.searchStreamIDs(tenantIDs, sf)
2024-06-20 03:08:07 +02:00
if len(so.streamIDs) > 0 {
streamIDs = intersectStreamIDs(streamIDs, so.streamIDs)
}
tenantIDs = nil
} else if len(so.streamIDs) > 0 {
streamIDs = getStreamIDsForTenantIDs(so.streamIDs, tenantIDs)
tenantIDs = nil
}
if hasStreamFilters(f) {
f = initStreamFilters(tenantIDs, pt.idb, f)
}
soInternal := &searchOptions{
tenantIDs: tenantIDs,
streamIDs: streamIDs,
minTimestamp: so.minTimestamp,
maxTimestamp: so.maxTimestamp,
filter: f,
neededColumnNames: so.neededColumnNames,
unneededColumnNames: so.unneededColumnNames,
needAllColumns: so.needAllColumns,
}
return pt.ddb.search(soInternal, workCh, stopCh)
}
2024-06-20 03:08:07 +02:00
func intersectStreamIDs(a, b []streamID) []streamID {
m := make(map[streamID]struct{}, len(b))
for _, streamID := range b {
m[streamID] = struct{}{}
}
result := make([]streamID, 0, len(a))
for _, streamID := range a {
if _, ok := m[streamID]; ok {
result = append(result, streamID)
}
}
return result
}
func getStreamIDsForTenantIDs(streamIDs []streamID, tenantIDs []TenantID) []streamID {
m := make(map[TenantID]struct{}, len(tenantIDs))
for _, tenantID := range tenantIDs {
m[tenantID] = struct{}{}
}
result := make([]streamID, 0, len(streamIDs))
for _, streamID := range streamIDs {
if _, ok := m[streamID.tenantID]; ok {
result = append(result, streamID)
}
}
return result
}
func hasStreamFilters(f filter) bool {
2024-05-20 04:08:30 +02:00
visitFunc := func(f filter) bool {
_, ok := f.(*filterStream)
return ok
}
2024-05-20 04:08:30 +02:00
return visitFilter(f, visitFunc)
}
func initStreamFilters(tenantIDs []TenantID, idb *indexdb, f filter) filter {
2024-05-20 04:08:30 +02:00
visitFunc := func(f filter) bool {
_, ok := f.(*filterStream)
return ok
}
copyFunc := func(f filter) (filter, error) {
fs := f.(*filterStream)
fsNew := &filterStream{
f: fs.f,
tenantIDs: tenantIDs,
idb: idb,
}
2024-05-20 04:08:30 +02:00
return fsNew, nil
}
2024-05-20 04:08:30 +02:00
f, err := copyFilter(f, visitFunc, copyFunc)
if err != nil {
logger.Panicf("BUG: unexpected error: %s", err)
}
2024-05-20 04:08:30 +02:00
return f
}
func (ddb *datadb) search(so *searchOptions, workCh chan<- *blockSearchWorkBatch, stopCh <-chan struct{}) partitionSearchFinalizer {
// Select parts with data for the given time range
ddb.partsLock.Lock()
pws := appendPartsInTimeRange(nil, ddb.bigParts, so.minTimestamp, so.maxTimestamp)
pws = appendPartsInTimeRange(pws, ddb.smallParts, so.minTimestamp, so.maxTimestamp)
pws = appendPartsInTimeRange(pws, ddb.inmemoryParts, so.minTimestamp, so.maxTimestamp)
// Increase references to the searched parts, so they aren't deleted during search.
// References to the searched parts must be decremented by calling the returned partitionSearchFinalizer.
for _, pw := range pws {
pw.incRef()
}
ddb.partsLock.Unlock()
// Apply search to matching parts
for _, pw := range pws {
pw.p.search(so, workCh, stopCh)
}
return func() {
for _, pw := range pws {
pw.decRef()
}
}
}
func (p *part) search(so *searchOptions, workCh chan<- *blockSearchWorkBatch, stopCh <-chan struct{}) {
bhss := getBlockHeaders()
if len(so.tenantIDs) > 0 {
p.searchByTenantIDs(so, bhss, workCh, stopCh)
} else {
p.searchByStreamIDs(so, bhss, workCh, stopCh)
}
putBlockHeaders(bhss)
}
func getBlockHeaders() *blockHeaders {
v := blockHeadersPool.Get()
if v == nil {
return &blockHeaders{}
}
return v.(*blockHeaders)
}
func putBlockHeaders(bhss *blockHeaders) {
bhss.reset()
blockHeadersPool.Put(bhss)
}
var blockHeadersPool sync.Pool
type blockHeaders struct {
bhs []blockHeader
}
func (bhss *blockHeaders) reset() {
bhs := bhss.bhs
for i := range bhs {
bhs[i].reset()
}
bhss.bhs = bhs[:0]
}
func (p *part) searchByTenantIDs(so *searchOptions, bhss *blockHeaders, workCh chan<- *blockSearchWorkBatch, stopCh <-chan struct{}) {
// it is assumed that tenantIDs are sorted
tenantIDs := so.tenantIDs
bswb := getBlockSearchWorkBatch()
scheduleBlockSearch := func(bh *blockHeader) bool {
if bswb.appendBlockSearchWork(p, so, bh) {
return true
}
select {
case <-stopCh:
return false
case workCh <- bswb:
bswb = getBlockSearchWorkBatch()
return true
}
}
// it is assumed that ibhs are sorted
ibhs := p.indexBlockHeaders
for len(ibhs) > 0 && len(tenantIDs) > 0 {
if needStop(stopCh) {
return
}
// locate tenantID equal or bigger than the tenantID in ibhs[0]
tenantID := &tenantIDs[0]
if tenantID.less(&ibhs[0].streamID.tenantID) {
tenantID = &ibhs[0].streamID.tenantID
n := sort.Search(len(tenantIDs), func(i int) bool {
return !tenantIDs[i].less(tenantID)
})
if n == len(tenantIDs) {
tenantIDs = nil
break
}
tenantID = &tenantIDs[n]
tenantIDs = tenantIDs[n:]
}
// locate indexBlockHeader with equal or bigger tenantID than the given tenantID
n := 0
if ibhs[0].streamID.tenantID.less(tenantID) {
n = sort.Search(len(ibhs), func(i int) bool {
return !ibhs[i].streamID.tenantID.less(tenantID)
})
// The end of ibhs[n-1] may contain blocks for the given tenantID, so move it backwards
n--
}
ibh := &ibhs[n]
ibhs = ibhs[n+1:]
if so.minTimestamp > ibh.maxTimestamp || so.maxTimestamp < ibh.minTimestamp {
// Skip the ibh, since it doesn't contain entries on the requested time range
continue
}
bhss.bhs = ibh.mustReadBlockHeaders(bhss.bhs[:0], p)
bhs := bhss.bhs
for len(bhs) > 0 {
// search for blocks with the given tenantID
n = sort.Search(len(bhs), func(i int) bool {
return !bhs[i].streamID.tenantID.less(tenantID)
})
bhs = bhs[n:]
for len(bhs) > 0 && bhs[0].streamID.tenantID.equal(tenantID) {
bh := &bhs[0]
bhs = bhs[1:]
th := &bh.timestampsHeader
if so.minTimestamp > th.maxTimestamp || so.maxTimestamp < th.minTimestamp {
continue
}
if !scheduleBlockSearch(bh) {
return
}
}
if len(bhs) == 0 {
break
}
// search for the next tenantID, which can potentially match tenantID from bhs[0]
tenantID = &bhs[0].streamID.tenantID
n = sort.Search(len(tenantIDs), func(i int) bool {
return !tenantIDs[i].less(tenantID)
})
if n == len(tenantIDs) {
tenantIDs = nil
break
}
tenantID = &tenantIDs[n]
tenantIDs = tenantIDs[n:]
}
}
// Flush the remaining work
select {
case <-stopCh:
case workCh <- bswb:
}
}
func (p *part) searchByStreamIDs(so *searchOptions, bhss *blockHeaders, workCh chan<- *blockSearchWorkBatch, stopCh <-chan struct{}) {
// it is assumed that streamIDs are sorted
streamIDs := so.streamIDs
bswb := getBlockSearchWorkBatch()
scheduleBlockSearch := func(bh *blockHeader) bool {
if bswb.appendBlockSearchWork(p, so, bh) {
return true
}
select {
case <-stopCh:
return false
case workCh <- bswb:
bswb = getBlockSearchWorkBatch()
return true
}
}
// it is assumed that ibhs are sorted
ibhs := p.indexBlockHeaders
for len(ibhs) > 0 && len(streamIDs) > 0 {
if needStop(stopCh) {
return
}
// locate streamID equal or bigger than the streamID in ibhs[0]
streamID := &streamIDs[0]
if streamID.less(&ibhs[0].streamID) {
streamID = &ibhs[0].streamID
n := sort.Search(len(streamIDs), func(i int) bool {
return !streamIDs[i].less(streamID)
})
if n == len(streamIDs) {
streamIDs = nil
break
}
streamID = &streamIDs[n]
streamIDs = streamIDs[n:]
}
// locate indexBlockHeader with equal or bigger streamID than the given streamID
n := 0
if ibhs[0].streamID.less(streamID) {
n = sort.Search(len(ibhs), func(i int) bool {
return !ibhs[i].streamID.less(streamID)
})
// The end of ibhs[n-1] may contain blocks for the given streamID, so move it backwards.
n--
}
ibh := &ibhs[n]
ibhs = ibhs[n+1:]
if so.minTimestamp > ibh.maxTimestamp || so.maxTimestamp < ibh.minTimestamp {
// Skip the ibh, since it doesn't contain entries on the requested time range
continue
}
bhss.bhs = ibh.mustReadBlockHeaders(bhss.bhs[:0], p)
bhs := bhss.bhs
for len(bhs) > 0 {
// search for blocks with the given streamID
n = sort.Search(len(bhs), func(i int) bool {
return !bhs[i].streamID.less(streamID)
})
bhs = bhs[n:]
for len(bhs) > 0 && bhs[0].streamID.equal(streamID) {
bh := &bhs[0]
bhs = bhs[1:]
th := &bh.timestampsHeader
if so.minTimestamp > th.maxTimestamp || so.maxTimestamp < th.minTimestamp {
continue
}
if !scheduleBlockSearch(bh) {
return
}
}
if len(bhs) == 0 {
break
}
// search for the next streamID, which can potentially match streamID from bhs[0]
streamID = &bhs[0].streamID
n = sort.Search(len(streamIDs), func(i int) bool {
return !streamIDs[i].less(streamID)
})
if n == len(streamIDs) {
streamIDs = nil
break
}
streamID = &streamIDs[n]
streamIDs = streamIDs[n:]
}
}
// Flush the remaining work
select {
case <-stopCh:
case workCh <- bswb:
}
}
func appendPartsInTimeRange(dst, src []*partWrapper, minTimestamp, maxTimestamp int64) []*partWrapper {
for _, pw := range src {
if maxTimestamp < pw.p.ph.MinTimestamp || minTimestamp > pw.p.ph.MaxTimestamp {
continue
}
dst = append(dst, pw)
}
return dst
}
func getCommonStreamFilter(f filter) (*StreamFilter, filter) {
switch t := f.(type) {
case *filterAnd:
filters := t.filters
for i, filter := range filters {
sf, ok := filter.(*filterStream)
if ok && !sf.f.isEmpty() {
// Remove sf from filters, since it doesn't filter out anything then.
fa := &filterAnd{
filters: append(filters[:i:i], filters[i+1:]...),
}
return sf.f, fa
}
}
case *filterStream:
return t.f, &filterNoop{}
}
return nil, f
}
2024-05-25 21:36:16 +02:00
func forEachStreamField(streams []ValueWithHits, f func(f Field, hits uint64)) {
var fields []Field
2024-05-24 03:06:55 +02:00
for i := range streams {
2024-05-22 21:01:20 +02:00
var err error
2024-05-25 21:36:16 +02:00
fields, err = parseStreamFields(fields[:0], streams[i].Value)
2024-05-22 21:01:20 +02:00
if err != nil {
continue
}
2024-05-24 03:06:55 +02:00
hits := streams[i].Hits
2024-05-25 21:36:16 +02:00
for j := range fields {
f(fields[j], hits)
2024-05-22 21:01:20 +02:00
}
}
}
2024-05-25 21:36:16 +02:00
func parseStreamFields(dst []Field, s string) ([]Field, error) {
2024-05-22 21:01:20 +02:00
if len(s) == 0 || s[0] != '{' {
return dst, fmt.Errorf("missing '{' at the beginning of stream name")
}
s = s[1:]
if len(s) == 0 || s[len(s)-1] != '}' {
return dst, fmt.Errorf("missing '}' at the end of stream name")
}
s = s[:len(s)-1]
if len(s) == 0 {
return dst, nil
}
for {
n := strings.Index(s, `="`)
if n < 0 {
2024-05-25 21:36:16 +02:00
return dst, fmt.Errorf("cannot find field value in double quotes at [%s]", s)
2024-05-22 21:01:20 +02:00
}
name := s[:n]
s = s[n+1:]
2024-05-24 03:06:55 +02:00
value, nOffset := tryUnquoteString(s, "")
2024-05-22 21:01:20 +02:00
if nOffset < 0 {
2024-05-25 21:36:16 +02:00
return dst, fmt.Errorf("cannot find parse field value in double quotes at [%s]", s)
2024-05-22 21:01:20 +02:00
}
s = s[nOffset:]
dst = append(dst, Field{
Name: name,
Value: value,
})
if len(s) == 0 {
return dst, nil
}
if s[0] != ',' {
return dst, fmt.Errorf("missing ',' after %s=%q", name, value)
}
s = s[1:]
}
}