mirror of
https://github.com/VictoriaMetrics/VictoriaMetrics.git
synced 2025-01-20 07:19:17 +01:00
lib/logstorage: add len
pipe for calculating byte length of log field values
(cherry picked from commit 364f084b43
)
This commit is contained in:
parent
4d91e536f1
commit
7a44614e0b
@ -15,6 +15,9 @@ according to [these docs](https://docs.victoriametrics.com/victorialogs/quicksta
|
||||
|
||||
## tip
|
||||
|
||||
* FEATURE: [vlogscli](https://docs.victoriametrics.com/victorialogs/querying/vlogscli/): preserve `less` output after the exit from scrolling mode. This should help re-using previous query results in subsequent queries.
|
||||
* FEATURE: add [`len` pipe](https://docs.victoriametrics.com/victorialogs/logsql/#len-pipe) for calculating the length for the given [log field](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model) value in bytes.
|
||||
|
||||
## [v0.33.0](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v0.33.0-victorialogs)
|
||||
|
||||
Released at 2024-10-01
|
||||
|
@ -1304,6 +1304,7 @@ LogsQL supports the following pipes:
|
||||
- [`fields`](#fields-pipe) selects the given set of [log fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model).
|
||||
- [`filter`](#filter-pipe) applies additional [filters](#filters) to results.
|
||||
- [`format`](#format-pipe) formats output field from input [log fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model).
|
||||
- [`len`](#len-pipe) calculates byte length of the given [log field](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model) value.
|
||||
- [`limit`](#limit-pipe) limits the number selected logs.
|
||||
- [`math`](#math-pipe) performs mathematical calculations over [log fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model).
|
||||
- [`offset`](#offset-pipe) skips the given number of selected logs.
|
||||
@ -1758,6 +1759,22 @@ only if `ip` and `host` [fields](https://docs.victoriametrics.com/victorialogs/k
|
||||
_time:5m | format if (ip:* and host:*) "request from <ip>:<host>" as message
|
||||
```
|
||||
|
||||
### len pipe
|
||||
|
||||
The `| len(field) as result` pipe stores byte length of the given `field` value into the `result` field.
|
||||
For example, the following query shows top 5 log entries with the maximum byte length of `_msg` field across
|
||||
logs for the last 5 minutes:
|
||||
|
||||
```logsql
|
||||
_time:5m | len(_msg) as msg_len | sort by (msg_len desc) | limit 1
|
||||
```
|
||||
|
||||
See also:
|
||||
|
||||
- [`sum_len` stats function](#sum-len-stats)
|
||||
- [`sort` pipe](#sort-pipe)
|
||||
- [`limit` pipe](#limit-pipe)
|
||||
|
||||
### limit pipe
|
||||
|
||||
If only a subset of selected logs must be processed, then `| limit N` [pipe](#pipes) can be used, where `N` can contain any [supported integer numeric value](#numeric-values).
|
||||
@ -3027,10 +3044,10 @@ See also:
|
||||
|
||||
### sum_len stats
|
||||
|
||||
`sum_len(field1, ..., fieldN)` [stats pipe function](#stats-pipe-functions) calculates the sum of lengths of all the values
|
||||
`sum_len(field1, ..., fieldN)` [stats pipe function](#stats-pipe-functions) calculates the sum of byte lengths of all the values
|
||||
for the given [log fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model).
|
||||
|
||||
For example, the following query returns the sum of lengths of [`_msg` fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#message-field)
|
||||
For example, the following query returns the sum of byte lengths of [`_msg` fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#message-field)
|
||||
across all the logs for the last 5 minutes:
|
||||
|
||||
```logsql
|
||||
@ -3040,6 +3057,7 @@ _time:5m | stats sum_len(_msg) messages_len
|
||||
See also:
|
||||
|
||||
- [`count`](#count-stats)
|
||||
- [`len` pipe](#len-pipe)
|
||||
|
||||
### uniq_values stats
|
||||
|
||||
|
@ -978,6 +978,10 @@ func TestParseQuerySuccess(t *testing.T) {
|
||||
f(`foo | field_names y`, `foo | field_names as y`)
|
||||
f(`foo | field_names`, `foo | field_names`)
|
||||
|
||||
// field_values pipe
|
||||
f(`* | field_values x`, `* | field_values x`)
|
||||
f(`* | field_values (x)`, `* | field_values x`)
|
||||
|
||||
// blocks_count pipe
|
||||
f(`foo | blocks_count as x`, `foo | blocks_count as x`)
|
||||
f(`foo | blocks_count y`, `foo | blocks_count as y`)
|
||||
@ -999,6 +1003,14 @@ func TestParseQuerySuccess(t *testing.T) {
|
||||
f(`* | rm foo`, `* | delete foo`)
|
||||
f(`* | DELETE foo, bar`, `* | delete foo, bar`)
|
||||
|
||||
// len pipe
|
||||
f(`* | len(x)`, `* | len(x)`)
|
||||
f(`* | len(x) as _msg`, `* | len(x)`)
|
||||
f(`* | len(x) y`, `* | len(x) as y`)
|
||||
f(`* | len ( x ) as y`, `* | len(x) as y`)
|
||||
f(`* | len x y`, `* | len(x) as y`)
|
||||
f(`* | len x as y`, `* | len(x) as y`)
|
||||
|
||||
// limit and head pipe
|
||||
f(`foo | limit`, `foo | limit 10`)
|
||||
f(`foo | head`, `foo | limit 10`)
|
||||
@ -1519,6 +1531,12 @@ func TestParseQueryFailure(t *testing.T) {
|
||||
f(`foo | delete foo,`)
|
||||
f(`foo | delete foo,,`)
|
||||
|
||||
// invalid len pipe
|
||||
f(`foo | len`)
|
||||
f(`foo | len(`)
|
||||
f(`foo | len()`)
|
||||
f(`foo | len (x) y z`)
|
||||
|
||||
// invalid limit pipe value
|
||||
f(`foo | limit bar`)
|
||||
f(`foo | limit -123`)
|
||||
@ -2078,6 +2096,7 @@ func TestQueryCanReturnLastNResults(t *testing.T) {
|
||||
f("* | rm x", true)
|
||||
f("* | stats count() rows", false)
|
||||
f("* | sort by (x)", false)
|
||||
f("* | len(x)", true)
|
||||
f("* | limit 10", false)
|
||||
f("* | offset 10", false)
|
||||
f("* | uniq (x)", false)
|
||||
@ -2114,6 +2133,7 @@ func TestQueryCanLiveTail(t *testing.T) {
|
||||
f("* | field_values a", false)
|
||||
f("* | filter foo", true)
|
||||
f("* | format 'a<b>c'", true)
|
||||
f("* | len(x)", true)
|
||||
f("* | limit 10", false)
|
||||
f("* | math a/b as c", true)
|
||||
f("* | offset 10", false)
|
||||
|
@ -165,6 +165,12 @@ func parsePipe(lex *lexer) (pipe, error) {
|
||||
return nil, fmt.Errorf("cannot parse 'format' pipe: %w", err)
|
||||
}
|
||||
return pf, nil
|
||||
case lex.isKeyword("len"):
|
||||
pl, err := parsePipeLen(lex)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cannot parse 'len' pipe: %w", err)
|
||||
}
|
||||
return pl, nil
|
||||
case lex.isKeyword("limit", "head"):
|
||||
pl, err := parsePipeLimit(lex)
|
||||
if err != nil {
|
||||
@ -301,6 +307,7 @@ var pipeNames = func() map[string]struct{} {
|
||||
"fields", "keep",
|
||||
"filter", "where",
|
||||
"format",
|
||||
"len",
|
||||
"limit", "head",
|
||||
"math", "eval",
|
||||
"offset", "skip",
|
||||
|
@ -77,7 +77,7 @@ func parsePipeFieldValues(lex *lexer) (*pipeFieldValues, error) {
|
||||
}
|
||||
lex.nextToken()
|
||||
|
||||
field, err := parseFieldName(lex)
|
||||
field, err := parseFieldNameWithOptionalParens(lex)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cannot parse field name for 'field_values': %w", err)
|
||||
}
|
||||
|
@ -53,7 +53,7 @@ func TestPipeFieldValues(t *testing.T) {
|
||||
},
|
||||
})
|
||||
|
||||
f("field_values b", [][]Field{
|
||||
f("field_values (b)", [][]Field{
|
||||
{
|
||||
{"a", `2`},
|
||||
{"b", `3`},
|
||||
|
174
lib/logstorage/pipe_len.go
Normal file
174
lib/logstorage/pipe_len.go
Normal file
@ -0,0 +1,174 @@
|
||||
package logstorage
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"unsafe"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
|
||||
)
|
||||
|
||||
// pipeLen processes '| len ...' pipe.
|
||||
//
|
||||
// See https://docs.victoriametrics.com/victorialogs/logsql/#len-pipe
|
||||
type pipeLen struct {
|
||||
fieldName string
|
||||
resultField string
|
||||
}
|
||||
|
||||
func (pl *pipeLen) String() string {
|
||||
s := "len(" + quoteTokenIfNeeded(pl.fieldName) + ")"
|
||||
if !isMsgFieldName(pl.resultField) {
|
||||
s += " as " + quoteTokenIfNeeded(pl.resultField)
|
||||
}
|
||||
return s
|
||||
}
|
||||
|
||||
func (pl *pipeLen) canLiveTail() bool {
|
||||
return true
|
||||
}
|
||||
|
||||
func (pl *pipeLen) updateNeededFields(neededFields, unneededFields fieldsSet) {
|
||||
if neededFields.contains("*") {
|
||||
if !unneededFields.contains(pl.resultField) {
|
||||
unneededFields.add(pl.resultField)
|
||||
unneededFields.remove(pl.fieldName)
|
||||
}
|
||||
} else {
|
||||
if neededFields.contains(pl.resultField) {
|
||||
neededFields.remove(pl.resultField)
|
||||
neededFields.add(pl.fieldName)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (pl *pipeLen) optimize() {
|
||||
// Nothing to do
|
||||
}
|
||||
|
||||
func (pl *pipeLen) hasFilterInWithQuery() bool {
|
||||
return false
|
||||
}
|
||||
|
||||
func (pl *pipeLen) initFilterInValues(_ map[string][]string, _ getFieldValuesFunc) (pipe, error) {
|
||||
return pl, nil
|
||||
}
|
||||
|
||||
func (pl *pipeLen) newPipeProcessor(workersCount int, _ <-chan struct{}, _ func(), ppNext pipeProcessor) pipeProcessor {
|
||||
return &pipeLenProcessor{
|
||||
pl: pl,
|
||||
ppNext: ppNext,
|
||||
|
||||
shards: make([]pipeLenProcessorShard, workersCount),
|
||||
}
|
||||
}
|
||||
|
||||
type pipeLenProcessor struct {
|
||||
pl *pipeLen
|
||||
ppNext pipeProcessor
|
||||
|
||||
shards []pipeLenProcessorShard
|
||||
}
|
||||
|
||||
type pipeLenProcessorShard struct {
|
||||
pipeLenProcessorShardNopad
|
||||
|
||||
// The padding prevents false sharing on widespread platforms with 128 mod (cache line size) = 0 .
|
||||
_ [128 - unsafe.Sizeof(pipeLenProcessorShardNopad{})%128]byte
|
||||
}
|
||||
|
||||
type pipeLenProcessorShardNopad struct {
|
||||
a arena
|
||||
rc resultColumn
|
||||
}
|
||||
|
||||
func (plp *pipeLenProcessor) writeBlock(workerID uint, br *blockResult) {
|
||||
if br.rowsLen == 0 {
|
||||
return
|
||||
}
|
||||
|
||||
shard := &plp.shards[workerID]
|
||||
shard.rc.name = plp.pl.resultField
|
||||
|
||||
c := br.getColumnByName(plp.pl.fieldName)
|
||||
if c.isConst {
|
||||
// Fast path for const column
|
||||
vLen := len(c.valuesEncoded[0])
|
||||
shard.a.b = marshalUint64String(shard.a.b[:0], uint64(vLen))
|
||||
vLenStr := bytesutil.ToUnsafeString(shard.a.b)
|
||||
for rowIdx := 0; rowIdx < br.rowsLen; rowIdx++ {
|
||||
shard.rc.addValue(vLenStr)
|
||||
}
|
||||
} else {
|
||||
// Slow path for other columns
|
||||
for rowIdx := 0; rowIdx < br.rowsLen; rowIdx++ {
|
||||
v := c.getValueAtRow(br, rowIdx)
|
||||
vLen := len(v)
|
||||
aLen := len(shard.a.b)
|
||||
shard.a.b = marshalUint64String(shard.a.b, uint64(vLen))
|
||||
vLenStr := bytesutil.ToUnsafeString(shard.a.b[aLen:])
|
||||
shard.rc.addValue(vLenStr)
|
||||
}
|
||||
}
|
||||
|
||||
// Write the result to ppNext
|
||||
br.addResultColumn(&shard.rc)
|
||||
plp.ppNext.writeBlock(workerID, br)
|
||||
|
||||
shard.a.reset()
|
||||
shard.rc.reset()
|
||||
}
|
||||
|
||||
func (plp *pipeLenProcessor) flush() error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func parsePipeLen(lex *lexer) (*pipeLen, error) {
|
||||
if !lex.isKeyword("len") {
|
||||
return nil, fmt.Errorf("unexpected token: %q; want %q", lex.token, "len")
|
||||
}
|
||||
lex.nextToken()
|
||||
|
||||
fieldName, err := parseFieldNameWithOptionalParens(lex)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cannot parse field name for 'len' pipe: %w", err)
|
||||
}
|
||||
|
||||
// parse optional 'as ...` part
|
||||
resultField := "_msg"
|
||||
if lex.isKeyword("as") {
|
||||
lex.nextToken()
|
||||
}
|
||||
if !lex.isKeyword("|", ")", "") {
|
||||
field, err := parseFieldName(lex)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cannot parse result field after 'len(%s)': %w", quoteTokenIfNeeded(fieldName), err)
|
||||
}
|
||||
resultField = field
|
||||
}
|
||||
|
||||
pl := &pipeLen{
|
||||
fieldName: fieldName,
|
||||
resultField: resultField,
|
||||
}
|
||||
|
||||
return pl, nil
|
||||
}
|
||||
|
||||
func parseFieldNameWithOptionalParens(lex *lexer) (string, error) {
|
||||
hasParens := false
|
||||
if lex.isKeyword("(") {
|
||||
lex.nextToken()
|
||||
hasParens = true
|
||||
}
|
||||
fieldName, err := parseFieldName(lex)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
if hasParens {
|
||||
if !lex.isKeyword(")") {
|
||||
return "", fmt.Errorf("missing ')' after '%s'", quoteTokenIfNeeded(fieldName))
|
||||
}
|
||||
lex.nextToken()
|
||||
}
|
||||
return fieldName, nil
|
||||
}
|
92
lib/logstorage/pipe_len_test.go
Normal file
92
lib/logstorage/pipe_len_test.go
Normal file
@ -0,0 +1,92 @@
|
||||
package logstorage
|
||||
|
||||
import (
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestParsePipeLenSuccess(t *testing.T) {
|
||||
f := func(pipeStr string) {
|
||||
t.Helper()
|
||||
expectParsePipeSuccess(t, pipeStr)
|
||||
}
|
||||
|
||||
f(`len(foo)`)
|
||||
f(`len(foo) as bar`)
|
||||
}
|
||||
|
||||
func TestParsePipeLenFailure(t *testing.T) {
|
||||
f := func(pipeStr string) {
|
||||
t.Helper()
|
||||
expectParsePipeFailure(t, pipeStr)
|
||||
}
|
||||
|
||||
f(`len`)
|
||||
f(`len(`)
|
||||
f(`len()`)
|
||||
f(`len(x) y z`)
|
||||
}
|
||||
|
||||
func TestPipeLen(t *testing.T) {
|
||||
f := func(pipeStr string, rows, rowsExpected [][]Field) {
|
||||
t.Helper()
|
||||
expectPipeResults(t, pipeStr, rows, rowsExpected)
|
||||
}
|
||||
|
||||
f(`len(foo) x`, [][]Field{
|
||||
{
|
||||
{"foo", `abcde`},
|
||||
{"baz", "1234567890"},
|
||||
},
|
||||
{
|
||||
{"foo", `abc`},
|
||||
{"bar", `de`},
|
||||
},
|
||||
{
|
||||
{"baz", "xyz"},
|
||||
},
|
||||
}, [][]Field{
|
||||
{
|
||||
{"foo", `abcde`},
|
||||
{"baz", "1234567890"},
|
||||
{"x", "5"},
|
||||
},
|
||||
{
|
||||
{"foo", `abc`},
|
||||
{"bar", `de`},
|
||||
{"x", "3"},
|
||||
},
|
||||
{
|
||||
{"baz", "xyz"},
|
||||
{"x", "0"},
|
||||
},
|
||||
})
|
||||
}
|
||||
|
||||
func TestPipeLenUpdateNeededFields(t *testing.T) {
|
||||
f := func(s string, neededFields, unneededFields, neededFieldsExpected, unneededFieldsExpected string) {
|
||||
t.Helper()
|
||||
expectPipeNeededFields(t, s, neededFields, unneededFields, neededFieldsExpected, unneededFieldsExpected)
|
||||
}
|
||||
|
||||
// all the needed fields
|
||||
f(`len(y) x`, "*", "", "*", "x")
|
||||
f(`len(x) x`, "*", "", "*", "")
|
||||
|
||||
// unneeded fields do not intersect with output field
|
||||
f(`len(y) as x`, "*", "f1,f2", "*", "f1,f2,x")
|
||||
f(`len(x) as x`, "*", "f1,f2", "*", "f1,f2")
|
||||
|
||||
// unneeded fields intersect with output field
|
||||
f(`len(z) as x`, "*", "x,y", "*", "x,y")
|
||||
f(`len(y) as x`, "*", "x,y", "*", "x,y")
|
||||
f(`len(x) as x`, "*", "x,y", "*", "x,y")
|
||||
|
||||
// needed fields do not intersect with output field
|
||||
f(`len(y) as z`, "x,y", "", "x,y", "")
|
||||
f(`len(z) as z`, "x,y", "", "x,y", "")
|
||||
|
||||
// needed fields intersect with output field
|
||||
f(`len (z) as f2`, "f2,y", "", "y,z", "")
|
||||
f(`len (y) as f2`, "f2,y", "", "y", "")
|
||||
f(`len (y) as y`, "f2,y", "", "f2,y", "")
|
||||
}
|
Loading…
Reference in New Issue
Block a user