VictoriaMetrics/lib/logstorage/pipe_unpack_json.go

package logstorage

import (
	"fmt"
	"unsafe"
)

// pipeUnpackJSON processes '| unpack_json ...' pipe.
//
// See https://docs.victoriametrics.com/victorialogs/logsql/#unpack_json-pipe
type pipeUnpackJSON struct {
	fromField string

	resultPrefix string
}

func (pu *pipeUnpackJSON) String() string {
	s := "unpack_json"
	if !isMsgFieldName(pu.fromField) {
		s += " from " + quoteTokenIfNeeded(pu.fromField)
	}
	if pu.resultPrefix != "" {
		s += " result_prefix " + quoteTokenIfNeeded(pu.resultPrefix)
	}
	return s
}

func (pu *pipeUnpackJSON) updateNeededFields(neededFields, unneededFields fieldsSet) {
	if neededFields.contains("*") {
		unneededFields.remove(pu.fromField)
	} else {
		neededFields.add(pu.fromField)
	}
}

func (pu *pipeUnpackJSON) newPipeProcessor(workersCount int, _ <-chan struct{}, _ func(), ppBase pipeProcessor) pipeProcessor {
	shards := make([]pipeUnpackJSONProcessorShard, workersCount)

	pup := &pipeUnpackJSONProcessor{
		pu:     pu,
		ppBase: ppBase,

		shards: shards,
	}
	return pup
}

type pipeUnpackJSONProcessor struct {
	pu     *pipeUnpackJSON
	ppBase pipeProcessor

	shards []pipeUnpackJSONProcessorShard
}

type pipeUnpackJSONProcessorShard struct {
	pipeUnpackJSONProcessorShardNopad

	// The padding prevents false sharing on widespread platforms with 128 mod (cache line size) = 0 .
	_ [128 - unsafe.Sizeof(pipeUnpackJSONProcessorShardNopad{})%128]byte
}

type pipeUnpackJSONProcessorShardNopad struct {
	p JSONParser

	wctx pipeUnpackWriteContext
}

func (shard *pipeUnpackJSONProcessorShard) parseJSON(v, resultPrefix string) []Field {
	if len(v) == 0 || v[0] != '{' {
		// This isn't a JSON object
		return nil
	}
	if err := shard.p.ParseLogMessageNoResetBuf(v, resultPrefix); err != nil {
		// Cannot parse v
		return nil
	}
	return shard.p.Fields
}

func (pup *pipeUnpackJSONProcessor) writeBlock(workerID uint, br *blockResult) {
	if len(br.timestamps) == 0 {
		return
	}

	resultPrefix := pup.pu.resultPrefix
	shard := &pup.shards[workerID]
	wctx := &shard.wctx
	wctx.init(br, pup.ppBase)

	c := br.getColumnByName(pup.pu.fromField)
	if c.isConst {
		v := c.valuesEncoded[0]
		extraFields := shard.parseJSON(v, resultPrefix)
		for rowIdx := range br.timestamps {
			wctx.writeRow(rowIdx, extraFields)
		}
	} else {
		values := c.getValues(br)
		var extraFields []Field
		for i, v := range values {
			if i == 0 || values[i-1] != v {
				extraFields = shard.parseJSON(v, resultPrefix)
			}
			wctx.writeRow(i, extraFields)
		}
	}

	wctx.flush()
	shard.p.reset()
}

func (pup *pipeUnpackJSONProcessor) flush() error {
	return nil
}

func parsePipeUnpackJSON(lex *lexer) (*pipeUnpackJSON, error) {
	if !lex.isKeyword("unpack_json") {
		return nil, fmt.Errorf("unexpected token: %q; want %q", lex.token, "unpack_json")
	}
	lex.nextToken()

	fromField := "_msg"
	if lex.isKeyword("from") {
		lex.nextToken()
		f, err := parseFieldName(lex)
		if err != nil {
			return nil, fmt.Errorf("cannot parse 'from' field name: %w", err)
		}
		fromField = f
	}

	resultPrefix := ""
	if lex.isKeyword("result_prefix") {
		lex.nextToken()
		p, err := getCompoundToken(lex)
		if err != nil {
			return nil, fmt.Errorf("cannot parse 'result_prefix': %w", err)
		}
		resultPrefix = p
	}

	pu := &pipeUnpackJSON{
		fromField:    fromField,
		resultPrefix: resultPrefix,
	}
	return pu, nil
}
lib/logstorage: work-in-progress 2024-05-20 04:08:30 +02:00			`package logstorage`

			`import (`
			`"fmt"`
			`"unsafe"`
			`)`

			`// pipeUnpackJSON processes '\| unpack_json ...' pipe.`
			`//`
			`// See https://docs.victoriametrics.com/victorialogs/logsql/#unpack_json-pipe`
			`type pipeUnpackJSON struct {`
			`fromField string`

			`resultPrefix string`
			`}`

			`func (pu *pipeUnpackJSON) String() string {`
			`s := "unpack_json"`
			`if !isMsgFieldName(pu.fromField) {`
			`s += " from " + quoteTokenIfNeeded(pu.fromField)`
			`}`
			`if pu.resultPrefix != "" {`
			`s += " result_prefix " + quoteTokenIfNeeded(pu.resultPrefix)`
			`}`
			`return s`
			`}`

			`func (pu *pipeUnpackJSON) updateNeededFields(neededFields, unneededFields fieldsSet) {`
			`if neededFields.contains("*") {`
			`unneededFields.remove(pu.fromField)`
			`} else {`
			`neededFields.add(pu.fromField)`
			`}`
			`}`

			`func (pu *pipeUnpackJSON) newPipeProcessor(workersCount int, _ <-chan struct{}, _ func(), ppBase pipeProcessor) pipeProcessor {`
			`shards := make([]pipeUnpackJSONProcessorShard, workersCount)`

			`pup := &pipeUnpackJSONProcessor{`
			`pu: pu,`
			`ppBase: ppBase,`

			`shards: shards,`
			`}`
			`return pup`
			`}`

			`type pipeUnpackJSONProcessor struct {`
			`pu *pipeUnpackJSON`
			`ppBase pipeProcessor`

			`shards []pipeUnpackJSONProcessorShard`
			`}`

			`type pipeUnpackJSONProcessorShard struct {`
			`pipeUnpackJSONProcessorShardNopad`

			`// The padding prevents false sharing on widespread platforms with 128 mod (cache line size) = 0 .`
			`_ [128 - unsafe.Sizeof(pipeUnpackJSONProcessorShardNopad{})%128]byte`
			`}`

			`type pipeUnpackJSONProcessorShardNopad struct {`
			`p JSONParser`

			`wctx pipeUnpackWriteContext`
			`}`

			`func (shard *pipeUnpackJSONProcessorShard) parseJSON(v, resultPrefix string) []Field {`
			`if len(v) == 0 \|\| v[0] != '{' {`
			`// This isn't a JSON object`
			`return nil`
			`}`
			`if err := shard.p.ParseLogMessageNoResetBuf(v, resultPrefix); err != nil {`
			`// Cannot parse v`
			`return nil`
			`}`
			`return shard.p.Fields`
			`}`

			`func (pup pipeUnpackJSONProcessor) writeBlock(workerID uint, br blockResult) {`
			`if len(br.timestamps) == 0 {`
			`return`
			`}`

			`resultPrefix := pup.pu.resultPrefix`
			`shard := &pup.shards[workerID]`
			`wctx := &shard.wctx`
			`wctx.init(br, pup.ppBase)`

			`c := br.getColumnByName(pup.pu.fromField)`
			`if c.isConst {`
			`v := c.valuesEncoded[0]`
			`extraFields := shard.parseJSON(v, resultPrefix)`
			`for rowIdx := range br.timestamps {`
			`wctx.writeRow(rowIdx, extraFields)`
			`}`
			`} else {`
			`values := c.getValues(br)`
			`var extraFields []Field`
			`for i, v := range values {`
			`if i == 0 \|\| values[i-1] != v {`
			`extraFields = shard.parseJSON(v, resultPrefix)`
			`}`
			`wctx.writeRow(i, extraFields)`
			`}`
			`}`

			`wctx.flush()`
			`shard.p.reset()`
			`}`

			`func (pup *pipeUnpackJSONProcessor) flush() error {`
			`return nil`
			`}`

			`func parsePipeUnpackJSON(lex lexer) (pipeUnpackJSON, error) {`
			`if !lex.isKeyword("unpack_json") {`
			`return nil, fmt.Errorf("unexpected token: %q; want %q", lex.token, "unpack_json")`
			`}`
			`lex.nextToken()`

			`fromField := "_msg"`
			`if lex.isKeyword("from") {`
			`lex.nextToken()`
			`f, err := parseFieldName(lex)`
			`if err != nil {`
			`return nil, fmt.Errorf("cannot parse 'from' field name: %w", err)`
			`}`
			`fromField = f`
			`}`

			`resultPrefix := ""`
			`if lex.isKeyword("result_prefix") {`
			`lex.nextToken()`
			`p, err := getCompoundToken(lex)`
			`if err != nil {`
			`return nil, fmt.Errorf("cannot parse 'result_prefix': %w", err)`
			`}`
			`resultPrefix = p`
			`}`

			`pu := &pipeUnpackJSON{`
			`fromField: fromField,`
			`resultPrefix: resultPrefix,`
			`}`
			`return pu, nil`
			`}`