VictoriaMetrics/lib/logstorage/pipe_field_names.go

package logstorage

import (
	"fmt"
	"strings"
	"unsafe"
)

// pipeFieldNames processes '| field_names' pipe.
//
// See https://docs.victoriametrics.com/victorialogs/logsql/#field_names-pipe
type pipeFieldNames struct {
	// resultName is an optional name of the column to write results to.
	// By default results are written into 'name' column.
	resultName string

	// isFirstPipe is set to true if '| field_names' pipe is the first in the query.
	//
	// This allows skipping loading of _time column.
	isFirstPipe bool
}

func (pf *pipeFieldNames) String() string {
	s := "field_names"
	if pf.resultName != "name" {
		s += " as " + quoteTokenIfNeeded(pf.resultName)
	}
	return s
}

func (pf *pipeFieldNames) canLiveTail() bool {
	return false
}

func (pf *pipeFieldNames) updateNeededFields(neededFields, unneededFields fieldsSet) {
	neededFields.add("*")
	unneededFields.reset()

	if pf.isFirstPipe {
		unneededFields.add("_time")
	}
}

func (pf *pipeFieldNames) optimize() {
	// nothing to do
}

func (pf *pipeFieldNames) hasFilterInWithQuery() bool {
	return false
}

func (pf *pipeFieldNames) initFilterInValues(_ map[string][]string, _ getFieldValuesFunc) (pipe, error) {
	return pf, nil
}

func (pf *pipeFieldNames) newPipeProcessor(workersCount int, stopCh <-chan struct{}, _ func(), ppNext pipeProcessor) pipeProcessor {
	shards := make([]pipeFieldNamesProcessorShard, workersCount)

	pfp := &pipeFieldNamesProcessor{
		pf:     pf,
		stopCh: stopCh,
		ppNext: ppNext,

		shards: shards,
	}
	return pfp
}

type pipeFieldNamesProcessor struct {
	pf     *pipeFieldNames
	stopCh <-chan struct{}
	ppNext pipeProcessor

	shards []pipeFieldNamesProcessorShard
}

type pipeFieldNamesProcessorShard struct {
	pipeFieldNamesProcessorShardNopad

	// The padding prevents false sharing on widespread platforms with 128 mod (cache line size) = 0 .
	_ [128 - unsafe.Sizeof(pipeFieldNamesProcessorShardNopad{})%128]byte
}

type pipeFieldNamesProcessorShardNopad struct {
	// m holds hits per each field name
	m map[string]*uint64
}

func (shard *pipeFieldNamesProcessorShard) getM() map[string]*uint64 {
	if shard.m == nil {
		shard.m = make(map[string]*uint64)
	}
	return shard.m
}

func (pfp *pipeFieldNamesProcessor) writeBlock(workerID uint, br *blockResult) {
	if br.rowsLen == 0 {
		return
	}

	shard := &pfp.shards[workerID]
	m := shard.getM()

	cs := br.getColumns()
	for _, c := range cs {
		pHits, ok := m[c.name]
		if !ok {
			nameCopy := strings.Clone(c.name)
			hits := uint64(0)
			pHits = &hits
			m[nameCopy] = pHits
		}

		// Assume that the column is set for all the rows in the block.
		// This is much faster than reading all the column values and counting non-empty rows.
		*pHits += uint64(br.rowsLen)
	}
}

func (pfp *pipeFieldNamesProcessor) flush() error {
	if needStop(pfp.stopCh) {
		return nil
	}

	// merge state across shards
	shards := pfp.shards
	m := shards[0].getM()
	shards = shards[1:]
	for i := range shards {
		for name, pHitsSrc := range shards[i].getM() {
			pHits, ok := m[name]
			if !ok {
				m[name] = pHitsSrc
			} else {
				*pHits += *pHitsSrc
			}
		}
	}
	if pfp.pf.isFirstPipe {
		pHits := m["_stream"]
		if pHits == nil {
			hits := uint64(0)
			pHits = &hits
		}
		m["_time"] = pHits
	}

	// write result
	wctx := &pipeFieldNamesWriteContext{
		pfp: pfp,
	}
	wctx.rcs[0].name = pfp.pf.resultName
	wctx.rcs[1].name = "hits"

	for name, pHits := range m {
		hits := string(marshalUint64String(nil, *pHits))
		wctx.writeRow(name, hits)
	}
	wctx.flush()

	return nil
}

type pipeFieldNamesWriteContext struct {
	pfp *pipeFieldNamesProcessor
	rcs [2]resultColumn
	br  blockResult

	// rowsCount is the number of rows in the current block
	rowsCount int

	// valuesLen is the total length of values in the current block
	valuesLen int
}

func (wctx *pipeFieldNamesWriteContext) writeRow(name, hits string) {
	wctx.rcs[0].addValue(name)
	wctx.rcs[1].addValue(hits)
	wctx.valuesLen += len(name) + len(hits)
	wctx.rowsCount++
	if wctx.valuesLen >= 1_000_000 {
		wctx.flush()
	}
}

func (wctx *pipeFieldNamesWriteContext) flush() {
	br := &wctx.br

	wctx.valuesLen = 0

	// Flush rcs to ppNext
	br.setResultColumns(wctx.rcs[:], wctx.rowsCount)
	wctx.rowsCount = 0
	wctx.pfp.ppNext.writeBlock(0, br)
	br.reset()
	wctx.rcs[0].resetValues()
	wctx.rcs[1].resetValues()
}

func parsePipeFieldNames(lex *lexer) (*pipeFieldNames, error) {
	if !lex.isKeyword("field_names") {
		return nil, fmt.Errorf("expecting 'field_names'; got %q", lex.token)
	}
	lex.nextToken()

	resultName := "name"
	if lex.isKeyword("as") {
		lex.nextToken()
		name, err := parseFieldName(lex)
		if err != nil {
			return nil, fmt.Errorf("cannot parse result name for 'field_names': %w", err)
		}
		resultName = name
	} else if !lex.isKeyword("", "|") {
		name, err := parseFieldName(lex)
		if err != nil {
			return nil, fmt.Errorf("cannot parse result name for 'field_names': %w", err)
		}
		resultName = name
	}

	pf := &pipeFieldNames{
		resultName: resultName,
	}
	return pf, nil
}
lib/logstorage: work-in-progress 2024-05-20 04:08:30 +02:00			`package logstorage`

			`import (`
			`"fmt"`
			`"strings"`
			`"unsafe"`
			`)`

			`// pipeFieldNames processes '\| field_names' pipe.`
			`//`
lib/logstorage: add `blocks_count` pipe This pipe is useful for debugging purposes when the number of processed blocks must be calculated for the given query: <query> \| blocks_count This helps detecting the root cause of query performance slowdown in cases like https://github.com/VictoriaMetrics/VictoriaMetrics/issues/7070 2024-09-25 19:15:32 +02:00			`// See https://docs.victoriametrics.com/victorialogs/logsql/#field_names-pipe`
lib/logstorage: work-in-progress 2024-05-20 04:08:30 +02:00			`type pipeFieldNames struct {`
lib/logstorage: work-in-progress 2024-05-24 03:06:55 +02:00			`// resultName is an optional name of the column to write results to.`
			`// By default results are written into 'name' column.`
lib/logstorage: work-in-progress 2024-05-20 04:08:30 +02:00			`resultName string`

			`// isFirstPipe is set to true if '\| field_names' pipe is the first in the query.`
			`//`
			`// This allows skipping loading of _time column.`
			`isFirstPipe bool`
			`}`

			`func (pf *pipeFieldNames) String() string {`
lib/logstorage: work-in-progress 2024-05-24 03:06:55 +02:00			`s := "field_names"`
			`if pf.resultName != "name" {`
			`s += " as " + quoteTokenIfNeeded(pf.resultName)`
			`}`
			`return s`
lib/logstorage: work-in-progress 2024-05-20 04:08:30 +02:00			`}`

lib/logstorage: work-in-progress 2024-06-27 14:18:42 +02:00			`func (pf *pipeFieldNames) canLiveTail() bool {`
			`return false`
			`}`

lib/logstorage: work-in-progress 2024-05-20 04:08:30 +02:00			`func (pf *pipeFieldNames) updateNeededFields(neededFields, unneededFields fieldsSet) {`
			`neededFields.add("*")`
			`unneededFields.reset()`

			`if pf.isFirstPipe {`
			`unneededFields.add("_time")`
			`}`
			`}`

lib/logstorage: work-in-progress 2024-05-25 21:36:16 +02:00			`func (pf *pipeFieldNames) optimize() {`
			`// nothing to do`
			`}`

			`func (pf *pipeFieldNames) hasFilterInWithQuery() bool {`
			`return false`
			`}`

lib/logstorage: fix golangci-lint warnings 2024-05-26 02:01:32 +02:00			`func (pf *pipeFieldNames) initFilterInValues(_ map[string][]string, _ getFieldValuesFunc) (pipe, error) {`
lib/logstorage: work-in-progress 2024-05-25 21:36:16 +02:00			`return pf, nil`
			`}`

			`func (pf *pipeFieldNames) newPipeProcessor(workersCount int, stopCh <-chan struct{}, _ func(), ppNext pipeProcessor) pipeProcessor {`
lib/logstorage: work-in-progress 2024-05-20 04:08:30 +02:00			`shards := make([]pipeFieldNamesProcessorShard, workersCount)`

			`pfp := &pipeFieldNamesProcessor{`
			`pf: pf,`
			`stopCh: stopCh,`
lib/logstorage: work-in-progress 2024-05-25 21:36:16 +02:00			`ppNext: ppNext,`
lib/logstorage: work-in-progress 2024-05-20 04:08:30 +02:00
			`shards: shards,`
			`}`
			`return pfp`
			`}`

			`type pipeFieldNamesProcessor struct {`
			`pf *pipeFieldNames`
			`stopCh <-chan struct{}`
lib/logstorage: work-in-progress 2024-05-25 21:36:16 +02:00			`ppNext pipeProcessor`
lib/logstorage: work-in-progress 2024-05-20 04:08:30 +02:00
			`shards []pipeFieldNamesProcessorShard`
			`}`

			`type pipeFieldNamesProcessorShard struct {`
			`pipeFieldNamesProcessorShardNopad`

			`// The padding prevents false sharing on widespread platforms with 128 mod (cache line size) = 0 .`
			`_ [128 - unsafe.Sizeof(pipeFieldNamesProcessorShardNopad{})%128]byte`
			`}`

			`type pipeFieldNamesProcessorShardNopad struct {`
lib/logstorage: work-in-progress 2024-05-24 03:06:55 +02:00			`// m holds hits per each field name`
			`m map[string]*uint64`
			`}`

			`func (shard pipeFieldNamesProcessorShard) getM() map[string]uint64 {`
			`if shard.m == nil {`
			`shard.m = make(map[string]*uint64)`
			`}`
			`return shard.m`
lib/logstorage: work-in-progress 2024-05-20 04:08:30 +02:00			`}`

			`func (pfp pipeFieldNamesProcessor) writeBlock(workerID uint, br blockResult) {`
lib/logstorage: read timestamps column when it is really needed during query execution Previously timestamps column was read unconditionally on every query. This could significantly slow down queries, which do not need reading this column like in https://github.com/VictoriaMetrics/VictoriaMetrics/issues/7070 . 2024-09-25 16:16:53 +02:00			`if br.rowsLen == 0 {`
lib/logstorage: work-in-progress 2024-05-20 04:08:30 +02:00			`return`
			`}`

			`shard := &pfp.shards[workerID]`
lib/logstorage: work-in-progress 2024-05-24 03:06:55 +02:00			`m := shard.getM()`

lib/logstorage: work-in-progress 2024-05-20 04:08:30 +02:00			`cs := br.getColumns()`
			`for _, c := range cs {`
lib/logstorage: work-in-progress 2024-05-24 03:06:55 +02:00			`pHits, ok := m[c.name]`
			`if !ok {`
lib/logstorage: work-in-progress 2024-05-20 04:08:30 +02:00			`nameCopy := strings.Clone(c.name)`
lib/logstorage: work-in-progress 2024-05-24 03:06:55 +02:00			`hits := uint64(0)`
			`pHits = &hits`
			`m[nameCopy] = pHits`
lib/logstorage: work-in-progress 2024-05-20 04:08:30 +02:00			`}`
lib/logstorage: work-in-progress 2024-05-24 03:06:55 +02:00
			`// Assume that the column is set for all the rows in the block.`
			`// This is much faster than reading all the column values and counting non-empty rows.`
lib/logstorage: read timestamps column when it is really needed during query execution Previously timestamps column was read unconditionally on every query. This could significantly slow down queries, which do not need reading this column like in https://github.com/VictoriaMetrics/VictoriaMetrics/issues/7070 . 2024-09-25 16:16:53 +02:00			`*pHits += uint64(br.rowsLen)`
lib/logstorage: work-in-progress 2024-05-20 04:08:30 +02:00			`}`
			`}`

			`func (pfp *pipeFieldNamesProcessor) flush() error {`
			`if needStop(pfp.stopCh) {`
			`return nil`
			`}`

			`// merge state across shards`
			`shards := pfp.shards`
lib/logstorage: work-in-progress 2024-05-24 03:06:55 +02:00			`m := shards[0].getM()`
lib/logstorage: work-in-progress 2024-05-20 04:08:30 +02:00			`shards = shards[1:]`
			`for i := range shards {`
lib/logstorage: work-in-progress 2024-05-24 03:06:55 +02:00			`for name, pHitsSrc := range shards[i].getM() {`
			`pHits, ok := m[name]`
			`if !ok {`
			`m[name] = pHitsSrc`
			`} else {`
			`pHits += pHitsSrc`
			`}`
lib/logstorage: work-in-progress 2024-05-20 04:08:30 +02:00			`}`
			`}`
			`if pfp.pf.isFirstPipe {`
lib/logstorage: work-in-progress 2024-05-24 03:06:55 +02:00			`pHits := m["_stream"]`
			`if pHits == nil {`
			`hits := uint64(0)`
			`pHits = &hits`
			`}`
			`m["_time"] = pHits`
lib/logstorage: work-in-progress 2024-05-20 04:08:30 +02:00			`}`

			`// write result`
			`wctx := &pipeFieldNamesWriteContext{`
			`pfp: pfp,`
			`}`
			`wctx.rcs[0].name = pfp.pf.resultName`
lib/logstorage: work-in-progress 2024-05-24 03:06:55 +02:00			`wctx.rcs[1].name = "hits"`

			`for name, pHits := range m {`
			`hits := string(marshalUint64String(nil, *pHits))`
			`wctx.writeRow(name, hits)`
lib/logstorage: work-in-progress 2024-05-20 04:08:30 +02:00			`}`
			`wctx.flush()`

			`return nil`
			`}`

			`type pipeFieldNamesWriteContext struct {`
			`pfp *pipeFieldNamesProcessor`
lib/logstorage: work-in-progress 2024-05-24 03:06:55 +02:00			`rcs [2]resultColumn`
lib/logstorage: work-in-progress 2024-05-20 04:08:30 +02:00			`br blockResult`

lib/logstorage: work-in-progress 2024-05-22 21:01:20 +02:00			`// rowsCount is the number of rows in the current block`
			`rowsCount int`

			`// valuesLen is the total length of values in the current block`
lib/logstorage: work-in-progress 2024-05-20 04:08:30 +02:00			`valuesLen int`
			`}`

lib/logstorage: work-in-progress 2024-05-24 03:06:55 +02:00			`func (wctx *pipeFieldNamesWriteContext) writeRow(name, hits string) {`
			`wctx.rcs[0].addValue(name)`
			`wctx.rcs[1].addValue(hits)`
			`wctx.valuesLen += len(name) + len(hits)`
lib/logstorage: work-in-progress 2024-05-22 21:01:20 +02:00			`wctx.rowsCount++`
lib/logstorage: work-in-progress 2024-05-20 04:08:30 +02:00			`if wctx.valuesLen >= 1_000_000 {`
			`wctx.flush()`
			`}`
			`}`

			`func (wctx *pipeFieldNamesWriteContext) flush() {`
			`br := &wctx.br`

			`wctx.valuesLen = 0`

lib/logstorage: work-in-progress 2024-05-25 21:36:16 +02:00			`// Flush rcs to ppNext`
lib/logstorage: work-in-progress 2024-05-24 03:06:55 +02:00			`br.setResultColumns(wctx.rcs[:], wctx.rowsCount)`
lib/logstorage: work-in-progress 2024-05-22 21:01:20 +02:00			`wctx.rowsCount = 0`
lib/logstorage: work-in-progress 2024-05-25 21:36:16 +02:00			`wctx.pfp.ppNext.writeBlock(0, br)`
lib/logstorage: work-in-progress 2024-05-20 04:08:30 +02:00			`br.reset()`
			`wctx.rcs[0].resetValues()`
lib/logstorage: work-in-progress 2024-05-24 03:06:55 +02:00			`wctx.rcs[1].resetValues()`
lib/logstorage: work-in-progress 2024-05-20 04:08:30 +02:00			`}`

			`func parsePipeFieldNames(lex lexer) (pipeFieldNames, error) {`
			`if !lex.isKeyword("field_names") {`
			`return nil, fmt.Errorf("expecting 'field_names'; got %q", lex.token)`
			`}`
			`lex.nextToken()`

lib/logstorage: work-in-progress 2024-05-24 03:06:55 +02:00			`resultName := "name"`
lib/logstorage: work-in-progress 2024-05-20 04:08:30 +02:00			`if lex.isKeyword("as") {`
			`lex.nextToken()`
lib/logstorage: work-in-progress 2024-05-24 03:06:55 +02:00			`name, err := parseFieldName(lex)`
			`if err != nil {`
			`return nil, fmt.Errorf("cannot parse result name for 'field_names': %w", err)`
			`}`
			`resultName = name`
			`} else if !lex.isKeyword("", "\|") {`
			`name, err := parseFieldName(lex)`
			`if err != nil {`
			`return nil, fmt.Errorf("cannot parse result name for 'field_names': %w", err)`
			`}`
			`resultName = name`
lib/logstorage: work-in-progress 2024-05-20 04:08:30 +02:00			`}`

			`pf := &pipeFieldNames{`
			`resultName: resultName,`
			`}`
			`return pf, nil`
			`}`