mirror of
https://github.com/VictoriaMetrics/VictoriaMetrics.git
synced 2025-01-20 07:19:17 +01:00
lib/logstorage: work-in-progress
This commit is contained in:
parent
3661373cc2
commit
ad505a7a9a
@ -20,7 +20,6 @@ import (
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/httpserver"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logjson"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logstorage"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/common"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/writeconcurrencylimiter"
|
||||
@ -210,8 +209,8 @@ func readBulkLine(sc *bufio.Scanner, timeField, msgField string,
|
||||
return false, fmt.Errorf(`missing log message after the "create" or "index" command`)
|
||||
}
|
||||
line = sc.Bytes()
|
||||
p := logjson.GetParser()
|
||||
if err := p.ParseLogMessage(line); err != nil {
|
||||
p := logstorage.GetJSONParser()
|
||||
if err := p.ParseLogMessage(line, ""); err != nil {
|
||||
return false, fmt.Errorf("cannot parse json-encoded log entry: %w", err)
|
||||
}
|
||||
|
||||
@ -224,7 +223,7 @@ func readBulkLine(sc *bufio.Scanner, timeField, msgField string,
|
||||
}
|
||||
p.RenameField(msgField, "_msg")
|
||||
processLogMessage(ts, p.Fields)
|
||||
logjson.PutParser(p)
|
||||
logstorage.PutJSONParser(p)
|
||||
|
||||
return true, nil
|
||||
}
|
||||
|
@ -12,7 +12,6 @@ import (
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/httpserver"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logjson"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logstorage"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/common"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/writeconcurrencylimiter"
|
||||
@ -105,8 +104,8 @@ func readLine(sc *bufio.Scanner, timeField, msgField string, processLogMessage f
|
||||
line = sc.Bytes()
|
||||
}
|
||||
|
||||
p := logjson.GetParser()
|
||||
if err := p.ParseLogMessage(line); err != nil {
|
||||
p := logstorage.GetJSONParser()
|
||||
if err := p.ParseLogMessage(line, ""); err != nil {
|
||||
return false, fmt.Errorf("cannot parse json-encoded log entry: %w", err)
|
||||
}
|
||||
ts, err := extractTimestampFromFields(timeField, p.Fields)
|
||||
@ -118,7 +117,7 @@ func readLine(sc *bufio.Scanner, timeField, msgField string, processLogMessage f
|
||||
}
|
||||
p.RenameField(msgField, "_msg")
|
||||
processLogMessage(ts, p.Fields)
|
||||
logjson.PutParser(p)
|
||||
logstorage.PutJSONParser(p)
|
||||
|
||||
return true, nil
|
||||
}
|
||||
|
@ -50,7 +50,8 @@ Below is an example output:
|
||||
"u64_0": "4810489083243239145",
|
||||
"float_0": "1.868",
|
||||
"ip_0": "250.34.75.125",
|
||||
"timestamp_0": "1799-03-16T01:34:18.311Z"
|
||||
"timestamp_0": "1799-03-16T01:34:18.311Z",
|
||||
"json_0": "{\"foo\":\"bar_3\",\"baz\":{\"a\":[\"x\",\"y\"]},\"f3\":NaN,\"f4\":32}"
|
||||
}
|
||||
{
|
||||
"_time": "2024-05-08T14:34:00.854Z",
|
||||
@ -70,7 +71,8 @@ Below is an example output:
|
||||
"u64_0": "6593354256620219850",
|
||||
"float_0": "1.085",
|
||||
"ip_0": "253.151.88.158",
|
||||
"timestamp_0": "2042-10-05T16:42:57.082Z"
|
||||
"timestamp_0": "2042-10-05T16:42:57.082Z",
|
||||
"json_0": "{\"foo\":\"bar_5\",\"baz\":{\"a\":[\"x\",\"y\"]},\"f3\":NaN,\"f4\":27}"
|
||||
}
|
||||
```
|
||||
|
||||
|
@ -51,6 +51,8 @@ var (
|
||||
"see https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model")
|
||||
timestampFieldsPerLog = flag.Int("timestampFieldsPerLog", 1, "The number of fields with ISO8601 timestamps per each log entry; "+
|
||||
"see https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model")
|
||||
jsonFieldsPerLog = flag.Int("jsonFieldsPerLog", 1, "The number of JSON fields to generate per each log entry; "+
|
||||
"see https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model")
|
||||
|
||||
statInterval = flag.Duration("statInterval", 10*time.Second, "The interval between publishing the stats")
|
||||
)
|
||||
@ -263,6 +265,9 @@ func generateLogsAtTimestamp(bw *bufio.Writer, workerID int, ts int64, firstStre
|
||||
timestamp := toISO8601(int64(rand.Uint64()))
|
||||
fmt.Fprintf(bw, `,"timestamp_%d":"%s"`, j, timestamp)
|
||||
}
|
||||
for j := 0; j < *jsonFieldsPerLog; j++ {
|
||||
fmt.Fprintf(bw, `,"json_%d":"{\"foo\":\"bar_%d\",\"baz\":{\"a\":[\"x\",\"y\"]},\"f3\":NaN,\"f4\":%d}"`, j, rand.Intn(10), rand.Intn(100))
|
||||
}
|
||||
fmt.Fprintf(bw, "}\n")
|
||||
|
||||
logEntriesCount.Add(1)
|
||||
|
17
app/vlselect/logsql/field_names_response.qtpl
Normal file
17
app/vlselect/logsql/field_names_response.qtpl
Normal file
@ -0,0 +1,17 @@
|
||||
{% stripspace %}
|
||||
|
||||
// FieldNamesResponse formats /select/logsql/field_names response
|
||||
{% func FieldNamesResponse(names []string) %}
|
||||
{
|
||||
"names":[
|
||||
{% if len(names) > 0 %}
|
||||
{%q= names[0] %}
|
||||
{% for _, v := range names[1:] %}
|
||||
,{%q= v %}
|
||||
{% endfor %}
|
||||
{% endif %}
|
||||
]
|
||||
}
|
||||
{% endfunc %}
|
||||
|
||||
{% endstripspace %}
|
69
app/vlselect/logsql/field_names_response.qtpl.go
Normal file
69
app/vlselect/logsql/field_names_response.qtpl.go
Normal file
@ -0,0 +1,69 @@
|
||||
// Code generated by qtc from "field_names_response.qtpl". DO NOT EDIT.
|
||||
// See https://github.com/valyala/quicktemplate for details.
|
||||
|
||||
// FieldNamesResponse formats /select/logsql/field_names response
|
||||
|
||||
//line app/vlselect/logsql/field_names_response.qtpl:4
|
||||
package logsql
|
||||
|
||||
//line app/vlselect/logsql/field_names_response.qtpl:4
|
||||
import (
|
||||
qtio422016 "io"
|
||||
|
||||
qt422016 "github.com/valyala/quicktemplate"
|
||||
)
|
||||
|
||||
//line app/vlselect/logsql/field_names_response.qtpl:4
|
||||
var (
|
||||
_ = qtio422016.Copy
|
||||
_ = qt422016.AcquireByteBuffer
|
||||
)
|
||||
|
||||
//line app/vlselect/logsql/field_names_response.qtpl:4
|
||||
func StreamFieldNamesResponse(qw422016 *qt422016.Writer, names []string) {
|
||||
//line app/vlselect/logsql/field_names_response.qtpl:4
|
||||
qw422016.N().S(`{"names":[`)
|
||||
//line app/vlselect/logsql/field_names_response.qtpl:7
|
||||
if len(names) > 0 {
|
||||
//line app/vlselect/logsql/field_names_response.qtpl:8
|
||||
qw422016.N().Q(names[0])
|
||||
//line app/vlselect/logsql/field_names_response.qtpl:9
|
||||
for _, v := range names[1:] {
|
||||
//line app/vlselect/logsql/field_names_response.qtpl:9
|
||||
qw422016.N().S(`,`)
|
||||
//line app/vlselect/logsql/field_names_response.qtpl:10
|
||||
qw422016.N().Q(v)
|
||||
//line app/vlselect/logsql/field_names_response.qtpl:11
|
||||
}
|
||||
//line app/vlselect/logsql/field_names_response.qtpl:12
|
||||
}
|
||||
//line app/vlselect/logsql/field_names_response.qtpl:12
|
||||
qw422016.N().S(`]}`)
|
||||
//line app/vlselect/logsql/field_names_response.qtpl:15
|
||||
}
|
||||
|
||||
//line app/vlselect/logsql/field_names_response.qtpl:15
|
||||
func WriteFieldNamesResponse(qq422016 qtio422016.Writer, names []string) {
|
||||
//line app/vlselect/logsql/field_names_response.qtpl:15
|
||||
qw422016 := qt422016.AcquireWriter(qq422016)
|
||||
//line app/vlselect/logsql/field_names_response.qtpl:15
|
||||
StreamFieldNamesResponse(qw422016, names)
|
||||
//line app/vlselect/logsql/field_names_response.qtpl:15
|
||||
qt422016.ReleaseWriter(qw422016)
|
||||
//line app/vlselect/logsql/field_names_response.qtpl:15
|
||||
}
|
||||
|
||||
//line app/vlselect/logsql/field_names_response.qtpl:15
|
||||
func FieldNamesResponse(names []string) string {
|
||||
//line app/vlselect/logsql/field_names_response.qtpl:15
|
||||
qb422016 := qt422016.AcquireByteBuffer()
|
||||
//line app/vlselect/logsql/field_names_response.qtpl:15
|
||||
WriteFieldNamesResponse(qb422016, names)
|
||||
//line app/vlselect/logsql/field_names_response.qtpl:15
|
||||
qs422016 := string(qb422016.B)
|
||||
//line app/vlselect/logsql/field_names_response.qtpl:15
|
||||
qt422016.ReleaseByteBuffer(qb422016)
|
||||
//line app/vlselect/logsql/field_names_response.qtpl:15
|
||||
return qs422016
|
||||
//line app/vlselect/logsql/field_names_response.qtpl:15
|
||||
}
|
17
app/vlselect/logsql/field_values_response.qtpl
Normal file
17
app/vlselect/logsql/field_values_response.qtpl
Normal file
@ -0,0 +1,17 @@
|
||||
{% stripspace %}
|
||||
|
||||
// FieldValuesResponse formats /select/logsql/field_values response
|
||||
{% func FieldValuesResponse(values []string) %}
|
||||
{
|
||||
"values":[
|
||||
{% if len(values) > 0 %}
|
||||
{%q= values[0] %}
|
||||
{% for _, v := range values[1:] %}
|
||||
,{%q= v %}
|
||||
{% endfor %}
|
||||
{% endif %}
|
||||
]
|
||||
}
|
||||
{% endfunc %}
|
||||
|
||||
{% endstripspace %}
|
69
app/vlselect/logsql/field_values_response.qtpl.go
Normal file
69
app/vlselect/logsql/field_values_response.qtpl.go
Normal file
@ -0,0 +1,69 @@
|
||||
// Code generated by qtc from "field_values_response.qtpl". DO NOT EDIT.
|
||||
// See https://github.com/valyala/quicktemplate for details.
|
||||
|
||||
// FieldValuesResponse formats /select/logsql/field_values response
|
||||
|
||||
//line app/vlselect/logsql/field_values_response.qtpl:4
|
||||
package logsql
|
||||
|
||||
//line app/vlselect/logsql/field_values_response.qtpl:4
|
||||
import (
|
||||
qtio422016 "io"
|
||||
|
||||
qt422016 "github.com/valyala/quicktemplate"
|
||||
)
|
||||
|
||||
//line app/vlselect/logsql/field_values_response.qtpl:4
|
||||
var (
|
||||
_ = qtio422016.Copy
|
||||
_ = qt422016.AcquireByteBuffer
|
||||
)
|
||||
|
||||
//line app/vlselect/logsql/field_values_response.qtpl:4
|
||||
func StreamFieldValuesResponse(qw422016 *qt422016.Writer, values []string) {
|
||||
//line app/vlselect/logsql/field_values_response.qtpl:4
|
||||
qw422016.N().S(`{"values":[`)
|
||||
//line app/vlselect/logsql/field_values_response.qtpl:7
|
||||
if len(values) > 0 {
|
||||
//line app/vlselect/logsql/field_values_response.qtpl:8
|
||||
qw422016.N().Q(values[0])
|
||||
//line app/vlselect/logsql/field_values_response.qtpl:9
|
||||
for _, v := range values[1:] {
|
||||
//line app/vlselect/logsql/field_values_response.qtpl:9
|
||||
qw422016.N().S(`,`)
|
||||
//line app/vlselect/logsql/field_values_response.qtpl:10
|
||||
qw422016.N().Q(v)
|
||||
//line app/vlselect/logsql/field_values_response.qtpl:11
|
||||
}
|
||||
//line app/vlselect/logsql/field_values_response.qtpl:12
|
||||
}
|
||||
//line app/vlselect/logsql/field_values_response.qtpl:12
|
||||
qw422016.N().S(`]}`)
|
||||
//line app/vlselect/logsql/field_values_response.qtpl:15
|
||||
}
|
||||
|
||||
//line app/vlselect/logsql/field_values_response.qtpl:15
|
||||
func WriteFieldValuesResponse(qq422016 qtio422016.Writer, values []string) {
|
||||
//line app/vlselect/logsql/field_values_response.qtpl:15
|
||||
qw422016 := qt422016.AcquireWriter(qq422016)
|
||||
//line app/vlselect/logsql/field_values_response.qtpl:15
|
||||
StreamFieldValuesResponse(qw422016, values)
|
||||
//line app/vlselect/logsql/field_values_response.qtpl:15
|
||||
qt422016.ReleaseWriter(qw422016)
|
||||
//line app/vlselect/logsql/field_values_response.qtpl:15
|
||||
}
|
||||
|
||||
//line app/vlselect/logsql/field_values_response.qtpl:15
|
||||
func FieldValuesResponse(values []string) string {
|
||||
//line app/vlselect/logsql/field_values_response.qtpl:15
|
||||
qb422016 := qt422016.AcquireByteBuffer()
|
||||
//line app/vlselect/logsql/field_values_response.qtpl:15
|
||||
WriteFieldValuesResponse(qb422016, values)
|
||||
//line app/vlselect/logsql/field_values_response.qtpl:15
|
||||
qs422016 := string(qb422016.B)
|
||||
//line app/vlselect/logsql/field_values_response.qtpl:15
|
||||
qt422016.ReleaseByteBuffer(qb422016)
|
||||
//line app/vlselect/logsql/field_values_response.qtpl:15
|
||||
return qs422016
|
||||
//line app/vlselect/logsql/field_values_response.qtpl:15
|
||||
}
|
69
app/vlselect/logsql/hits_response.qtpl
Normal file
69
app/vlselect/logsql/hits_response.qtpl
Normal file
@ -0,0 +1,69 @@
|
||||
{% import (
|
||||
"slices"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logstorage"
|
||||
) %}
|
||||
|
||||
{% stripspace %}
|
||||
|
||||
// LabelsForHits formats labels for /select/logsql/hits response
|
||||
{% func LabelsForHits(columns []logstorage.BlockColumn, rowIdx int) %}
|
||||
{
|
||||
{% if len(columns) > 0 %}
|
||||
{%q= columns[0].Name %}:{%q= columns[0].Values[rowIdx] %}
|
||||
{% for _, c := range columns[1:] %}
|
||||
,{%q= c.Name %}:{%q= c.Values[rowIdx] %}
|
||||
{% endfor %}
|
||||
{% endif %}
|
||||
}
|
||||
{% endfunc %}
|
||||
|
||||
{% func HitsSeries(m map[string]*hitsSeries) %}
|
||||
{
|
||||
{% code
|
||||
sortedKeys := make([]string, 0, len(m))
|
||||
for k := range m {
|
||||
sortedKeys = append(sortedKeys, k)
|
||||
}
|
||||
slices.Sort(sortedKeys)
|
||||
%}
|
||||
"hits":[
|
||||
{% if len(sortedKeys) > 0 %}
|
||||
{%= hitsSeriesLine(m, sortedKeys[0]) %}
|
||||
{% for _, k := range sortedKeys[1:] %}
|
||||
,{%= hitsSeriesLine(m, k) %}
|
||||
{% endfor %}
|
||||
{% endif %}
|
||||
]
|
||||
}
|
||||
{% endfunc %}
|
||||
|
||||
{% func hitsSeriesLine(m map[string]*hitsSeries, k string) %}
|
||||
{
|
||||
{% code
|
||||
hs := m[k]
|
||||
hs.sort()
|
||||
timestamps := hs.timestamps
|
||||
values := hs.values
|
||||
%}
|
||||
"fields":{%s= k %},
|
||||
"timestamps":[
|
||||
{% if len(timestamps) > 0 %}
|
||||
{%q= timestamps[0] %}
|
||||
{% for _, ts := range timestamps[1:] %}
|
||||
,{%q= ts %}
|
||||
{% endfor %}
|
||||
{% endif %}
|
||||
],
|
||||
"values":[
|
||||
{% if len(values) > 0 %}
|
||||
{%s= values[0] %}
|
||||
{% for _, v := range values[1:] %}
|
||||
,{%s= v %}
|
||||
{% endfor %}
|
||||
{% endif %}
|
||||
]
|
||||
}
|
||||
{% endfunc %}
|
||||
|
||||
{% endstripspace %}
|
219
app/vlselect/logsql/hits_response.qtpl.go
Normal file
219
app/vlselect/logsql/hits_response.qtpl.go
Normal file
@ -0,0 +1,219 @@
|
||||
// Code generated by qtc from "hits_response.qtpl". DO NOT EDIT.
|
||||
// See https://github.com/valyala/quicktemplate for details.
|
||||
|
||||
//line app/vlselect/logsql/hits_response.qtpl:1
|
||||
package logsql
|
||||
|
||||
//line app/vlselect/logsql/hits_response.qtpl:1
|
||||
import (
|
||||
"slices"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logstorage"
|
||||
)
|
||||
|
||||
// LabelsForHits formats labels for /select/logsql/hits response
|
||||
|
||||
//line app/vlselect/logsql/hits_response.qtpl:10
|
||||
import (
|
||||
qtio422016 "io"
|
||||
|
||||
qt422016 "github.com/valyala/quicktemplate"
|
||||
)
|
||||
|
||||
//line app/vlselect/logsql/hits_response.qtpl:10
|
||||
var (
|
||||
_ = qtio422016.Copy
|
||||
_ = qt422016.AcquireByteBuffer
|
||||
)
|
||||
|
||||
//line app/vlselect/logsql/hits_response.qtpl:10
|
||||
func StreamLabelsForHits(qw422016 *qt422016.Writer, columns []logstorage.BlockColumn, rowIdx int) {
|
||||
//line app/vlselect/logsql/hits_response.qtpl:10
|
||||
qw422016.N().S(`{`)
|
||||
//line app/vlselect/logsql/hits_response.qtpl:12
|
||||
if len(columns) > 0 {
|
||||
//line app/vlselect/logsql/hits_response.qtpl:13
|
||||
qw422016.N().Q(columns[0].Name)
|
||||
//line app/vlselect/logsql/hits_response.qtpl:13
|
||||
qw422016.N().S(`:`)
|
||||
//line app/vlselect/logsql/hits_response.qtpl:13
|
||||
qw422016.N().Q(columns[0].Values[rowIdx])
|
||||
//line app/vlselect/logsql/hits_response.qtpl:14
|
||||
for _, c := range columns[1:] {
|
||||
//line app/vlselect/logsql/hits_response.qtpl:14
|
||||
qw422016.N().S(`,`)
|
||||
//line app/vlselect/logsql/hits_response.qtpl:15
|
||||
qw422016.N().Q(c.Name)
|
||||
//line app/vlselect/logsql/hits_response.qtpl:15
|
||||
qw422016.N().S(`:`)
|
||||
//line app/vlselect/logsql/hits_response.qtpl:15
|
||||
qw422016.N().Q(c.Values[rowIdx])
|
||||
//line app/vlselect/logsql/hits_response.qtpl:16
|
||||
}
|
||||
//line app/vlselect/logsql/hits_response.qtpl:17
|
||||
}
|
||||
//line app/vlselect/logsql/hits_response.qtpl:17
|
||||
qw422016.N().S(`}`)
|
||||
//line app/vlselect/logsql/hits_response.qtpl:19
|
||||
}
|
||||
|
||||
//line app/vlselect/logsql/hits_response.qtpl:19
|
||||
func WriteLabelsForHits(qq422016 qtio422016.Writer, columns []logstorage.BlockColumn, rowIdx int) {
|
||||
//line app/vlselect/logsql/hits_response.qtpl:19
|
||||
qw422016 := qt422016.AcquireWriter(qq422016)
|
||||
//line app/vlselect/logsql/hits_response.qtpl:19
|
||||
StreamLabelsForHits(qw422016, columns, rowIdx)
|
||||
//line app/vlselect/logsql/hits_response.qtpl:19
|
||||
qt422016.ReleaseWriter(qw422016)
|
||||
//line app/vlselect/logsql/hits_response.qtpl:19
|
||||
}
|
||||
|
||||
//line app/vlselect/logsql/hits_response.qtpl:19
|
||||
func LabelsForHits(columns []logstorage.BlockColumn, rowIdx int) string {
|
||||
//line app/vlselect/logsql/hits_response.qtpl:19
|
||||
qb422016 := qt422016.AcquireByteBuffer()
|
||||
//line app/vlselect/logsql/hits_response.qtpl:19
|
||||
WriteLabelsForHits(qb422016, columns, rowIdx)
|
||||
//line app/vlselect/logsql/hits_response.qtpl:19
|
||||
qs422016 := string(qb422016.B)
|
||||
//line app/vlselect/logsql/hits_response.qtpl:19
|
||||
qt422016.ReleaseByteBuffer(qb422016)
|
||||
//line app/vlselect/logsql/hits_response.qtpl:19
|
||||
return qs422016
|
||||
//line app/vlselect/logsql/hits_response.qtpl:19
|
||||
}
|
||||
|
||||
//line app/vlselect/logsql/hits_response.qtpl:21
|
||||
func StreamHitsSeries(qw422016 *qt422016.Writer, m map[string]*hitsSeries) {
|
||||
//line app/vlselect/logsql/hits_response.qtpl:21
|
||||
qw422016.N().S(`{`)
|
||||
//line app/vlselect/logsql/hits_response.qtpl:24
|
||||
sortedKeys := make([]string, 0, len(m))
|
||||
for k := range m {
|
||||
sortedKeys = append(sortedKeys, k)
|
||||
}
|
||||
slices.Sort(sortedKeys)
|
||||
|
||||
//line app/vlselect/logsql/hits_response.qtpl:29
|
||||
qw422016.N().S(`"hits":[`)
|
||||
//line app/vlselect/logsql/hits_response.qtpl:31
|
||||
if len(sortedKeys) > 0 {
|
||||
//line app/vlselect/logsql/hits_response.qtpl:32
|
||||
streamhitsSeriesLine(qw422016, m, sortedKeys[0])
|
||||
//line app/vlselect/logsql/hits_response.qtpl:33
|
||||
for _, k := range sortedKeys[1:] {
|
||||
//line app/vlselect/logsql/hits_response.qtpl:33
|
||||
qw422016.N().S(`,`)
|
||||
//line app/vlselect/logsql/hits_response.qtpl:34
|
||||
streamhitsSeriesLine(qw422016, m, k)
|
||||
//line app/vlselect/logsql/hits_response.qtpl:35
|
||||
}
|
||||
//line app/vlselect/logsql/hits_response.qtpl:36
|
||||
}
|
||||
//line app/vlselect/logsql/hits_response.qtpl:36
|
||||
qw422016.N().S(`]}`)
|
||||
//line app/vlselect/logsql/hits_response.qtpl:39
|
||||
}
|
||||
|
||||
//line app/vlselect/logsql/hits_response.qtpl:39
|
||||
func WriteHitsSeries(qq422016 qtio422016.Writer, m map[string]*hitsSeries) {
|
||||
//line app/vlselect/logsql/hits_response.qtpl:39
|
||||
qw422016 := qt422016.AcquireWriter(qq422016)
|
||||
//line app/vlselect/logsql/hits_response.qtpl:39
|
||||
StreamHitsSeries(qw422016, m)
|
||||
//line app/vlselect/logsql/hits_response.qtpl:39
|
||||
qt422016.ReleaseWriter(qw422016)
|
||||
//line app/vlselect/logsql/hits_response.qtpl:39
|
||||
}
|
||||
|
||||
//line app/vlselect/logsql/hits_response.qtpl:39
|
||||
func HitsSeries(m map[string]*hitsSeries) string {
|
||||
//line app/vlselect/logsql/hits_response.qtpl:39
|
||||
qb422016 := qt422016.AcquireByteBuffer()
|
||||
//line app/vlselect/logsql/hits_response.qtpl:39
|
||||
WriteHitsSeries(qb422016, m)
|
||||
//line app/vlselect/logsql/hits_response.qtpl:39
|
||||
qs422016 := string(qb422016.B)
|
||||
//line app/vlselect/logsql/hits_response.qtpl:39
|
||||
qt422016.ReleaseByteBuffer(qb422016)
|
||||
//line app/vlselect/logsql/hits_response.qtpl:39
|
||||
return qs422016
|
||||
//line app/vlselect/logsql/hits_response.qtpl:39
|
||||
}
|
||||
|
||||
//line app/vlselect/logsql/hits_response.qtpl:41
|
||||
func streamhitsSeriesLine(qw422016 *qt422016.Writer, m map[string]*hitsSeries, k string) {
|
||||
//line app/vlselect/logsql/hits_response.qtpl:41
|
||||
qw422016.N().S(`{`)
|
||||
//line app/vlselect/logsql/hits_response.qtpl:44
|
||||
hs := m[k]
|
||||
hs.sort()
|
||||
timestamps := hs.timestamps
|
||||
values := hs.values
|
||||
|
||||
//line app/vlselect/logsql/hits_response.qtpl:48
|
||||
qw422016.N().S(`"fields":`)
|
||||
//line app/vlselect/logsql/hits_response.qtpl:49
|
||||
qw422016.N().S(k)
|
||||
//line app/vlselect/logsql/hits_response.qtpl:49
|
||||
qw422016.N().S(`,"timestamps":[`)
|
||||
//line app/vlselect/logsql/hits_response.qtpl:51
|
||||
if len(timestamps) > 0 {
|
||||
//line app/vlselect/logsql/hits_response.qtpl:52
|
||||
qw422016.N().Q(timestamps[0])
|
||||
//line app/vlselect/logsql/hits_response.qtpl:53
|
||||
for _, ts := range timestamps[1:] {
|
||||
//line app/vlselect/logsql/hits_response.qtpl:53
|
||||
qw422016.N().S(`,`)
|
||||
//line app/vlselect/logsql/hits_response.qtpl:54
|
||||
qw422016.N().Q(ts)
|
||||
//line app/vlselect/logsql/hits_response.qtpl:55
|
||||
}
|
||||
//line app/vlselect/logsql/hits_response.qtpl:56
|
||||
}
|
||||
//line app/vlselect/logsql/hits_response.qtpl:56
|
||||
qw422016.N().S(`],"values":[`)
|
||||
//line app/vlselect/logsql/hits_response.qtpl:59
|
||||
if len(values) > 0 {
|
||||
//line app/vlselect/logsql/hits_response.qtpl:60
|
||||
qw422016.N().S(values[0])
|
||||
//line app/vlselect/logsql/hits_response.qtpl:61
|
||||
for _, v := range values[1:] {
|
||||
//line app/vlselect/logsql/hits_response.qtpl:61
|
||||
qw422016.N().S(`,`)
|
||||
//line app/vlselect/logsql/hits_response.qtpl:62
|
||||
qw422016.N().S(v)
|
||||
//line app/vlselect/logsql/hits_response.qtpl:63
|
||||
}
|
||||
//line app/vlselect/logsql/hits_response.qtpl:64
|
||||
}
|
||||
//line app/vlselect/logsql/hits_response.qtpl:64
|
||||
qw422016.N().S(`]}`)
|
||||
//line app/vlselect/logsql/hits_response.qtpl:67
|
||||
}
|
||||
|
||||
//line app/vlselect/logsql/hits_response.qtpl:67
|
||||
func writehitsSeriesLine(qq422016 qtio422016.Writer, m map[string]*hitsSeries, k string) {
|
||||
//line app/vlselect/logsql/hits_response.qtpl:67
|
||||
qw422016 := qt422016.AcquireWriter(qq422016)
|
||||
//line app/vlselect/logsql/hits_response.qtpl:67
|
||||
streamhitsSeriesLine(qw422016, m, k)
|
||||
//line app/vlselect/logsql/hits_response.qtpl:67
|
||||
qt422016.ReleaseWriter(qw422016)
|
||||
//line app/vlselect/logsql/hits_response.qtpl:67
|
||||
}
|
||||
|
||||
//line app/vlselect/logsql/hits_response.qtpl:67
|
||||
func hitsSeriesLine(m map[string]*hitsSeries, k string) string {
|
||||
//line app/vlselect/logsql/hits_response.qtpl:67
|
||||
qb422016 := qt422016.AcquireByteBuffer()
|
||||
//line app/vlselect/logsql/hits_response.qtpl:67
|
||||
writehitsSeriesLine(qb422016, m, k)
|
||||
//line app/vlselect/logsql/hits_response.qtpl:67
|
||||
qs422016 := string(qb422016.B)
|
||||
//line app/vlselect/logsql/hits_response.qtpl:67
|
||||
qt422016.ReleaseByteBuffer(qb422016)
|
||||
//line app/vlselect/logsql/hits_response.qtpl:67
|
||||
return qs422016
|
||||
//line app/vlselect/logsql/hits_response.qtpl:67
|
||||
}
|
@ -5,6 +5,10 @@ import (
|
||||
"fmt"
|
||||
"math"
|
||||
"net/http"
|
||||
"slices"
|
||||
"sort"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/app/vlstorage"
|
||||
@ -15,44 +19,196 @@ import (
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promutils"
|
||||
)
|
||||
|
||||
// ProcessHitsRequest handles /select/logsql/hits request.
|
||||
//
|
||||
// See https://docs.victoriametrics.com/victorialogs/querying/#querying-hits-stats
|
||||
func ProcessHitsRequest(ctx context.Context, w http.ResponseWriter, r *http.Request) {
|
||||
q, tenantIDs, err := parseCommonArgs(r)
|
||||
if err != nil {
|
||||
httpserver.Errorf(w, r, "%s", err)
|
||||
return
|
||||
}
|
||||
|
||||
// Obtain step
|
||||
stepStr := r.FormValue("step")
|
||||
if stepStr == "" {
|
||||
stepStr = "1d"
|
||||
}
|
||||
step, err := promutils.ParseDuration(stepStr)
|
||||
if err != nil {
|
||||
httpserver.Errorf(w, r, "cannot parse 'step' arg: %s", err)
|
||||
return
|
||||
}
|
||||
if step <= 0 {
|
||||
httpserver.Errorf(w, r, "'step' must be bigger than zero")
|
||||
}
|
||||
|
||||
// Obtain offset
|
||||
offsetStr := r.FormValue("offset")
|
||||
if offsetStr == "" {
|
||||
offsetStr = "0s"
|
||||
}
|
||||
offset, err := promutils.ParseDuration(offsetStr)
|
||||
if err != nil {
|
||||
httpserver.Errorf(w, r, "cannot parse 'offset' arg: %s", err)
|
||||
return
|
||||
}
|
||||
|
||||
// Obtain field entries
|
||||
fields := r.Form["field"]
|
||||
|
||||
// Prepare the query
|
||||
q.AddCountByTimePipe(int64(step), int64(offset), fields)
|
||||
q.Optimize()
|
||||
|
||||
var mLock sync.Mutex
|
||||
m := make(map[string]*hitsSeries)
|
||||
writeBlock := func(_ uint, timestamps []int64, columns []logstorage.BlockColumn) {
|
||||
if len(columns) == 0 || len(columns[0].Values) == 0 {
|
||||
return
|
||||
}
|
||||
|
||||
timestampValues := columns[0].Values
|
||||
hitsValues := columns[len(columns)-1].Values
|
||||
columns = columns[1 : len(columns)-1]
|
||||
|
||||
bb := blockResultPool.Get()
|
||||
for i := range timestamps {
|
||||
timestampStr := strings.Clone(timestampValues[i])
|
||||
hitsStr := strings.Clone(hitsValues[i])
|
||||
|
||||
bb.Reset()
|
||||
WriteLabelsForHits(bb, columns, i)
|
||||
|
||||
mLock.Lock()
|
||||
hs, ok := m[string(bb.B)]
|
||||
if !ok {
|
||||
k := string(bb.B)
|
||||
hs = &hitsSeries{}
|
||||
m[k] = hs
|
||||
}
|
||||
hs.timestamps = append(hs.timestamps, timestampStr)
|
||||
hs.values = append(hs.values, hitsStr)
|
||||
mLock.Unlock()
|
||||
}
|
||||
blockResultPool.Put(bb)
|
||||
}
|
||||
|
||||
// Execute the query
|
||||
if err := vlstorage.RunQuery(ctx, tenantIDs, q, writeBlock); err != nil {
|
||||
httpserver.Errorf(w, r, "cannot execute query [%s]: %s", q, err)
|
||||
return
|
||||
}
|
||||
|
||||
// Write response
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
WriteHitsSeries(w, m)
|
||||
}
|
||||
|
||||
type hitsSeries struct {
|
||||
timestamps []string
|
||||
values []string
|
||||
}
|
||||
|
||||
func (hs *hitsSeries) sort() {
|
||||
sort.Sort(hs)
|
||||
}
|
||||
|
||||
func (hs *hitsSeries) Len() int {
|
||||
return len(hs.timestamps)
|
||||
}
|
||||
|
||||
func (hs *hitsSeries) Swap(i, j int) {
|
||||
hs.timestamps[i], hs.timestamps[j] = hs.timestamps[j], hs.timestamps[i]
|
||||
hs.values[i], hs.values[j] = hs.values[j], hs.values[i]
|
||||
}
|
||||
|
||||
func (hs *hitsSeries) Less(i, j int) bool {
|
||||
return hs.timestamps[i] < hs.timestamps[j]
|
||||
}
|
||||
|
||||
// ProcessFieldNamesRequest handles /select/logsql/field_names request.
|
||||
//
|
||||
// See https://docs.victoriametrics.com/victorialogs/querying/#querying-field-names
|
||||
func ProcessFieldNamesRequest(ctx context.Context, w http.ResponseWriter, r *http.Request) {
|
||||
q, tenantIDs, err := parseCommonArgs(r)
|
||||
if err != nil {
|
||||
httpserver.Errorf(w, r, "%s", err)
|
||||
return
|
||||
}
|
||||
|
||||
// Obtain field names for the given query
|
||||
q.Optimize()
|
||||
fieldNames, err := vlstorage.GetFieldNames(ctx, tenantIDs, q)
|
||||
if err != nil {
|
||||
httpserver.Errorf(w, r, "cannot obtain field names: %s", err)
|
||||
return
|
||||
}
|
||||
|
||||
slices.Sort(fieldNames)
|
||||
|
||||
// Write results
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
WriteFieldNamesResponse(w, fieldNames)
|
||||
}
|
||||
|
||||
// ProcessFieldValuesRequest handles /select/logsql/field_values request.
|
||||
//
|
||||
// See https://docs.victoriametrics.com/victorialogs/querying/#querying-field-values
|
||||
func ProcessFieldValuesRequest(ctx context.Context, w http.ResponseWriter, r *http.Request) {
|
||||
q, tenantIDs, err := parseCommonArgs(r)
|
||||
if err != nil {
|
||||
httpserver.Errorf(w, r, "%s", err)
|
||||
return
|
||||
}
|
||||
|
||||
// Parse fieldName query arg
|
||||
fieldName := r.FormValue("field_name")
|
||||
if fieldName == "" {
|
||||
httpserver.Errorf(w, r, "missing 'field_name' query arg")
|
||||
return
|
||||
}
|
||||
|
||||
// Parse limit query arg
|
||||
limit, err := httputils.GetInt(r, "limit")
|
||||
if err != nil {
|
||||
httpserver.Errorf(w, r, "%s", err)
|
||||
return
|
||||
}
|
||||
if limit < 0 {
|
||||
limit = 0
|
||||
}
|
||||
|
||||
// Obtain unique values for the given field
|
||||
q.Optimize()
|
||||
values, err := vlstorage.GetFieldValues(ctx, tenantIDs, q, fieldName, uint64(limit))
|
||||
if err != nil {
|
||||
httpserver.Errorf(w, r, "cannot obtain values for field %q: %s", fieldName, err)
|
||||
return
|
||||
}
|
||||
|
||||
if limit == 0 || len(values) < limit {
|
||||
// Sort values only if their number is below the limit.
|
||||
// Otherwise there is little sense in sorting, since the query may return
|
||||
// different subset of values on every execution.
|
||||
slices.Sort(values)
|
||||
}
|
||||
|
||||
// Write results
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
WriteFieldValuesResponse(w, values)
|
||||
}
|
||||
|
||||
// ProcessQueryRequest handles /select/logsql/query request.
|
||||
//
|
||||
// See https://docs.victoriametrics.com/victorialogs/querying/#http-api
|
||||
func ProcessQueryRequest(ctx context.Context, w http.ResponseWriter, r *http.Request) {
|
||||
// Extract tenantID
|
||||
tenantID, err := logstorage.GetTenantIDFromRequest(r)
|
||||
q, tenantIDs, err := parseCommonArgs(r)
|
||||
if err != nil {
|
||||
httpserver.Errorf(w, r, "%s", err)
|
||||
return
|
||||
}
|
||||
|
||||
// Parse query
|
||||
qStr := r.FormValue("query")
|
||||
q, err := logstorage.ParseQuery(qStr)
|
||||
if err != nil {
|
||||
httpserver.Errorf(w, r, "cannot parse query [%s]: %s", qStr, err)
|
||||
return
|
||||
}
|
||||
|
||||
// Parse optional start and end args
|
||||
start, okStart, err := getTimeNsec(r, "start")
|
||||
if err != nil {
|
||||
httpserver.Errorf(w, r, "%s", err)
|
||||
return
|
||||
}
|
||||
end, okEnd, err := getTimeNsec(r, "end")
|
||||
if err != nil {
|
||||
httpserver.Errorf(w, r, "%s", err)
|
||||
return
|
||||
}
|
||||
if okStart || okEnd {
|
||||
if !okStart {
|
||||
start = math.MinInt64
|
||||
}
|
||||
if !okEnd {
|
||||
end = math.MaxInt64
|
||||
}
|
||||
q.AddTimeFilter(start, end)
|
||||
}
|
||||
|
||||
// Parse limit query arg
|
||||
limit, err := httputils.GetInt(r, "limit")
|
||||
if err != nil {
|
||||
@ -62,14 +218,11 @@ func ProcessQueryRequest(ctx context.Context, w http.ResponseWriter, r *http.Req
|
||||
if limit > 0 {
|
||||
q.AddPipeLimit(uint64(limit))
|
||||
}
|
||||
q.Optimize()
|
||||
|
||||
tenantIDs := []logstorage.TenantID{tenantID}
|
||||
|
||||
bw := getBufferedWriter(w)
|
||||
|
||||
writeBlock := func(_ uint, timestamps []int64, columns []logstorage.BlockColumn) {
|
||||
if len(columns) == 0 {
|
||||
if len(columns) == 0 || len(columns[0].Values) == 0 {
|
||||
return
|
||||
}
|
||||
|
||||
@ -81,20 +234,57 @@ func ProcessQueryRequest(ctx context.Context, w http.ResponseWriter, r *http.Req
|
||||
blockResultPool.Put(bb)
|
||||
}
|
||||
|
||||
w.Header().Set("Content-Type", "application/stream+json; charset=utf-8")
|
||||
w.Header().Set("Content-Type", "application/stream+json")
|
||||
q.Optimize()
|
||||
err = vlstorage.RunQuery(ctx, tenantIDs, q, writeBlock)
|
||||
|
||||
bw.FlushIgnoreErrors()
|
||||
putBufferedWriter(bw)
|
||||
|
||||
if err != nil {
|
||||
httpserver.Errorf(w, r, "cannot execute query [%s]: %s", qStr, err)
|
||||
httpserver.Errorf(w, r, "cannot execute query [%s]: %s", q, err)
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
var blockResultPool bytesutil.ByteBufferPool
|
||||
|
||||
func parseCommonArgs(r *http.Request) (*logstorage.Query, []logstorage.TenantID, error) {
|
||||
// Extract tenantID
|
||||
tenantID, err := logstorage.GetTenantIDFromRequest(r)
|
||||
if err != nil {
|
||||
return nil, nil, fmt.Errorf("cannot obtain tenanID: %w", err)
|
||||
}
|
||||
tenantIDs := []logstorage.TenantID{tenantID}
|
||||
|
||||
// Parse query
|
||||
qStr := r.FormValue("query")
|
||||
q, err := logstorage.ParseQuery(qStr)
|
||||
if err != nil {
|
||||
return nil, nil, fmt.Errorf("cannot parse query [%s]: %s", qStr, err)
|
||||
}
|
||||
|
||||
// Parse optional start and end args
|
||||
start, okStart, err := getTimeNsec(r, "start")
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
end, okEnd, err := getTimeNsec(r, "end")
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
if okStart || okEnd {
|
||||
if !okStart {
|
||||
start = math.MinInt64
|
||||
}
|
||||
if !okEnd {
|
||||
end = math.MaxInt64
|
||||
}
|
||||
q.AddTimeFilter(start, end)
|
||||
}
|
||||
|
||||
return q, tenantIDs, nil
|
||||
}
|
||||
|
||||
func getTimeNsec(r *http.Request, argName string) (int64, bool, error) {
|
||||
s := r.FormValue(argName)
|
||||
if s == "" {
|
||||
|
@ -140,12 +140,27 @@ func RequestHandler(w http.ResponseWriter, r *http.Request) bool {
|
||||
}
|
||||
}
|
||||
|
||||
switch {
|
||||
case path == "/logsql/query":
|
||||
switch path {
|
||||
case "/logsql/query":
|
||||
logsqlQueryRequests.Inc()
|
||||
httpserver.EnableCORS(w, r)
|
||||
logsql.ProcessQueryRequest(ctx, w, r)
|
||||
return true
|
||||
case "/logsql/field_values":
|
||||
logsqlFieldValuesRequests.Inc()
|
||||
httpserver.EnableCORS(w, r)
|
||||
logsql.ProcessFieldValuesRequest(ctx, w, r)
|
||||
return true
|
||||
case "/logsql/field_names":
|
||||
logsqlFieldNamesRequests.Inc()
|
||||
httpserver.EnableCORS(w, r)
|
||||
logsql.ProcessFieldNamesRequest(ctx, w, r)
|
||||
return true
|
||||
case "/logsql/hits":
|
||||
logsqlHitsRequests.Inc()
|
||||
httpserver.EnableCORS(w, r)
|
||||
logsql.ProcessHitsRequest(ctx, w, r)
|
||||
return true
|
||||
default:
|
||||
return false
|
||||
}
|
||||
@ -165,5 +180,8 @@ func getMaxQueryDuration(r *http.Request) time.Duration {
|
||||
}
|
||||
|
||||
var (
|
||||
logsqlQueryRequests = metrics.NewCounter(`vl_http_requests_total{path="/select/logsql/query"}`)
|
||||
logsqlQueryRequests = metrics.NewCounter(`vl_http_requests_total{path="/select/logsql/query"}`)
|
||||
logsqlFieldValuesRequests = metrics.NewCounter(`vl_http_requests_total{path="/select/logsql/field_values"}`)
|
||||
logsqlFieldNamesRequests = metrics.NewCounter(`vl_http_requests_total{path="/select/logsql/field_names"}`)
|
||||
logsqlHitsRequests = metrics.NewCounter(`vl_http_requests_total{path="/select/logsql/hits"}`)
|
||||
)
|
||||
|
@ -107,10 +107,22 @@ func MustAddRows(lr *logstorage.LogRows) {
|
||||
}
|
||||
|
||||
// RunQuery runs the given q and calls writeBlock for the returned data blocks
|
||||
func RunQuery(ctx context.Context, tenantIDs []logstorage.TenantID, q *logstorage.Query, writeBlock func(workerID uint, timestamps []int64, columns []logstorage.BlockColumn)) error {
|
||||
func RunQuery(ctx context.Context, tenantIDs []logstorage.TenantID, q *logstorage.Query, writeBlock logstorage.WriteBlockFunc) error {
|
||||
return strg.RunQuery(ctx, tenantIDs, q, writeBlock)
|
||||
}
|
||||
|
||||
// GetFieldNames executes q and returns field names seen in results.
|
||||
func GetFieldNames(ctx context.Context, tenantIDs []logstorage.TenantID, q *logstorage.Query) ([]string, error) {
|
||||
return strg.GetFieldNames(ctx, tenantIDs, q)
|
||||
}
|
||||
|
||||
// GetFieldValues executes q and returns unique values for the fieldName seen in results.
|
||||
//
|
||||
// If limit > 0, then up to limit unique values are returned.
|
||||
func GetFieldValues(ctx context.Context, tenantIDs []logstorage.TenantID, q *logstorage.Query, fieldName string, limit uint64) ([]string, error) {
|
||||
return strg.GetFieldValues(ctx, tenantIDs, q, fieldName, limit)
|
||||
}
|
||||
|
||||
func writeStorageMetrics(w io.Writer, strg *logstorage.Storage) {
|
||||
var ss logstorage.StorageStats
|
||||
strg.UpdateStats(&ss)
|
||||
|
@ -19,6 +19,25 @@ according to [these docs](https://docs.victoriametrics.com/VictoriaLogs/QuickSta
|
||||
|
||||
## tip
|
||||
|
||||
## [v0.8.0](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v0.8.0-victorialogs)
|
||||
|
||||
Released at 2024-05-20
|
||||
|
||||
* FEATURE: add ability to extract JSON fields from [log fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model). See [these docs](https://docs.victoriametrics.com/victorialogs/logsql/#unpack_json-pipe).
|
||||
* FEATURE: add ability to extract [logfmt](https://brandur.org/logfmt) fields from [log fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model). See [these docs](https://docs.victoriametrics.com/victorialogs/logsql/#unpack_logfmt-pipe).
|
||||
* FEATURE: add ability to extract arbitrary text from [log fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model) into the output fields. See [these docs](https://docs.victoriametrics.com/victorialogs/logsql/#extact-pipe).
|
||||
* FEATURE: add ability to put arbitrary [queries](https://docs.victoriametrics.com/victorialogs/logsql/#query-syntax) inside [`in()` filter](https://docs.victoriametrics.com/victorialogs/logsql/#multi-exact-filter).
|
||||
* FEATURE: add support for post-filtering of query results with [`filter` pipe](https://docs.victoriametrics.com/victorialogs/logsql/#filter-pipe).
|
||||
* FEATURE: allow applying individual [filters](https://docs.victoriametrics.com/victorialogs/logsql/#filters) per each [stats function](https://docs.victoriametrics.com/victorialogs/logsql/#stats-pipe-functions). See [these docs](https://docs.victoriametrics.com/victorialogs/logsql/#stats-with-additional-filters).
|
||||
* FEATURE: allow passing string values to [`min`](https://docs.victoriametrics.com/victorialogs/logsql/#min-stats) and [`max`](https://docs.victoriametrics.com/victorialogs/logsql/#max-stats) functions. Previously only numeric values could be passed to them.
|
||||
* FEATURE: speed up [`sort ... limit N` pipe](https://docs.victoriametrics.com/victorialogs/logsql/#sort-pipe) for typical cases.
|
||||
* FEATURE: allow using more convenient syntax for [`range` filters](https://docs.victoriametrics.com/victorialogs/logsql/#range-filter) if upper or lower bound isn't needed. For example, it is possible to write `response_size:>=10KiB` instead of `response_size:range[10KiB, inf)`, or `temperature:<42` instead of `temperature:range(-inf, 42)`.
|
||||
* FEATURE: add `/select/logsql/hits` HTTP endpoint for returning the number of matching logs per the given time bucket over the selected time range. See [tese docs](https://docs.victoriametrics.com/victorialogs/querying/#querying-hits-stats) for details.
|
||||
* FEATURE: add `/select/logsql/field_names` HTTP endpoint for returning [field](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model) names from results of the given query. See [these docs](https://docs.victoriametrics.com/victorialogs/querying/#querying-field-names) for details.
|
||||
* FEATURE: add `/select/logsql/field_values` HTTP endpoint for returning unique values for the given [field](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model) obtained from results of the given query. See [these docs](https://docs.victoriametrics.com/victorialogs/querying/#querying-field-values) for details.
|
||||
|
||||
* BUGFIX: properly take into account `offset` [`sort` pipe](https://docs.victoriametrics.com/victorialogs/logsql/#sort-pipe) when it already has `limit`. For example, `_time:5m | sort by (foo) offset 20 limit 10`.
|
||||
|
||||
## [v0.7.0](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v0.7.0-victorialogs)
|
||||
|
||||
Released at 2024-05-15
|
||||
|
@ -184,7 +184,7 @@ For example, the following query selects all the logs for the last 5 minutes by
|
||||
_time:5m
|
||||
```
|
||||
|
||||
Additionally to filters, LogQL query may contain arbitrary mix of optional actions for processing the selected logs. These actions are delimited by `|` and are known as `pipes`.
|
||||
Additionally to filters, LogQL query may contain arbitrary mix of optional actions for processing the selected logs. These actions are delimited by `|` and are known as [`pipes`](#pipes).
|
||||
For example, the following query uses [`stats` pipe](#stats-pipe) for returning the number of [log messages](https://docs.victoriametrics.com/victorialogs/keyconcepts/#message-field)
|
||||
with the `error` [word](#word) for the last 5 minutes:
|
||||
|
||||
@ -213,7 +213,6 @@ single quotes `'` and backticks:
|
||||
|
||||
If doubt, it is recommended quoting field names and filter args.
|
||||
|
||||
|
||||
The list of LogsQL filters:
|
||||
|
||||
- [Time filter](#time-filter) - matches logs with [`_time` field](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#time-field) in the given time range
|
||||
@ -653,16 +652,16 @@ log.level:in("error", "fatal")
|
||||
|
||||
It works very fast for long lists passed to `in()`.
|
||||
|
||||
The future VictoriaLogs versions will allow passing arbitrary [queries](#query-syntax) into `in()` filter.
|
||||
For example, the following query selects all the logs for the last hour for users, who visited pages with `admin` [word](#word) in the `path`
|
||||
It is possible to pass arbitrary [query](#query-syntax) inside `in(...)` filter in order to match against the results of this query.
|
||||
The query inside `in(...)` must end with [`fields`](#fields-pipe) pipe containing a single field name, so VictoriaLogs could
|
||||
fetch results from this field. For example, the following query selects all the logs for the last 5 minutes for users,
|
||||
who visited pages with `admin` [word](#word) in the `path` [field](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model)
|
||||
during the last day:
|
||||
|
||||
```logsql
|
||||
_time:1h AND user_id:in(_time:1d AND path:admin | fields user_id)
|
||||
_time:5m AND user_id:in(_time:1d AND path:admin | fields user_id)
|
||||
```
|
||||
|
||||
See the [Roadmap](https://docs.victoriametrics.com/VictoriaLogs/Roadmap.html) for details.
|
||||
|
||||
See also:
|
||||
|
||||
- [Exact filter](#exact-filter)
|
||||
@ -825,6 +824,18 @@ for searching for log entries with request durations exceeding 4.2 seconds:
|
||||
request.duration:range(4.2, Inf)
|
||||
```
|
||||
|
||||
This query can be shortened to:
|
||||
|
||||
```logsql
|
||||
request.duration:>4.2
|
||||
```
|
||||
|
||||
The following query returns logs with request durations smaller or equal to 1.5 seconds:
|
||||
|
||||
```logsql
|
||||
request.duration:<=1.5
|
||||
```
|
||||
|
||||
The lower and the upper bounds of the range are excluded by default. If they must be included, then substitute the corresponding
|
||||
parentheses with square brackets. For example:
|
||||
|
||||
@ -832,17 +843,13 @@ parentheses with square brackets. For example:
|
||||
- `range(1, 10]` includes `10` in the matching range
|
||||
- `range[1, 10]` includes `1` and `10` in the matching range
|
||||
|
||||
The range boundaries can be expressed in the following forms:
|
||||
|
||||
- Hexadecimal form. For example, `range(0xff, 0xABCD)`.
|
||||
- Binary form. Form example, `range(0b100110, 0b11111101)`
|
||||
- Integer form with `_` delimiters for better readability. For example, `range(1_000, 2_345_678)`.
|
||||
The range boundaries can contain any [supported numeric values](#numeric-values).
|
||||
|
||||
Note that the `range()` filter doesn't match [log fields](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model)
|
||||
with non-numeric values alongside numeric values. For example, `range(1, 10)` doesn't match `the request took 4.2 seconds`
|
||||
[log message](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#message-field), since the `4.2` number is surrounded by other text.
|
||||
Extract the numeric value from the message with `parse(_msg, "the request took <request_duration> seconds")` [transformation](#transformations)
|
||||
and then apply the `range()` [post-filter](#post-filters) to the extracted `request_duration` field.
|
||||
and then apply the `range()` [filter pipe](#filter-pipe) to the extracted `request_duration` field.
|
||||
|
||||
Performance tips:
|
||||
|
||||
@ -884,7 +891,7 @@ user.ip:ipv4_range("1.2.3.4")
|
||||
Note that the `ipv4_range()` doesn't match a string with IPv4 address if this string contains other text. For example, `ipv4_range("127.0.0.0/24")`
|
||||
doesn't match `request from 127.0.0.1: done` [log message](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#message-field),
|
||||
since the `127.0.0.1` ip is surrounded by other text. Extract the IP from the message with `parse(_msg, "request from <ip>: done")` [transformation](#transformations)
|
||||
and then apply the `ipv4_range()` [post-filter](#post-filters) to the extracted `ip` field.
|
||||
and then apply the `ipv4_range()` [filter pipe](#filter-pipe) to the extracted `ip` field.
|
||||
|
||||
Hints:
|
||||
|
||||
@ -1045,13 +1052,18 @@ LogsQL supports the following pipes:
|
||||
|
||||
- [`copy`](#copy-pipe) copies [log fields](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model).
|
||||
- [`delete`](#delete-pipe) deletes [log fields](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model).
|
||||
- [`extract`](#extract-pipe) extracts the sepcified text into the given log fields.
|
||||
- [`field_names`](#field_names-pipe) returns all the names of [log fields](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model).
|
||||
- [`fields`](#fields-pipe) selects the given set of [log fields](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model).
|
||||
- [`filter`](#filter-pipe) applies additional [filters](#filters) to results.
|
||||
- [`limit`](#limit-pipe) limits the number selected logs.
|
||||
- [`offset`](#offset-pipe) skips the given number of selected logs.
|
||||
- [`rename`](#rename-pipe) renames [log fields](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model).
|
||||
- [`sort`](#sort-pipe) sorts logs by the given [fields](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model).
|
||||
- [`stats`](#stats-pipe) calculates various stats over the selected logs.
|
||||
- [`uniq`](#uniq-pipe) returns unique log entires.
|
||||
- [`unpack_json`](#unpack_json-pipe) unpacks JSON fields from [log fields](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model).
|
||||
- [`unpack_logfmt`](#unpack_logfmt-pipe) unpacks [logfmt](https://brandur.org/logfmt) fields from [log fields](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model).
|
||||
|
||||
### copy pipe
|
||||
|
||||
@ -1096,6 +1108,97 @@ See also:
|
||||
- [`rename` pipe](#rename-pipe)
|
||||
- [`fields` pipe](#fields-pipe)
|
||||
|
||||
### extract pipe
|
||||
|
||||
`| extract from field_name "pattern"` [pipe](#pipes) allows extracting additional fields specified in the `pattern` from the given
|
||||
`field_name` [log field](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model). Existing log fields remain unchanged
|
||||
after the `| extract ...` pipe.
|
||||
|
||||
`| extract ...` pipe can be useful for extracting additional fields needed for further data processing with other pipes such as [`stats` pipe](#stats-pipe) or [`sort` pipe](#sort-pipe).
|
||||
|
||||
For example, the following query selects logs with the `error` [word](#word) for the last day,
|
||||
extracts ip address from [`_msg` field](https://docs.victoriametrics.com/victorialogs/keyconcepts/#message-field) into `ip` field and then calculates top 10 ip addresses
|
||||
with the biggest number of logs:
|
||||
|
||||
```logsql
|
||||
_time:1d error | extract from _msg "ip=<ip> " | stats by (ip) count() logs | sort by (logs) desc limit 10
|
||||
```
|
||||
|
||||
It is expected that `_msg` field contains `ip=...` substring, which ends with space. For example, `error from ip=1.2.3.4, user_id=42`.
|
||||
|
||||
If the `| extract ...` pipe is applied to [`_msg` field](https://docs.victoriametrics.com/victorialogs/keyconcepts/#message-field), then the `from _msg` part can be omitted.
|
||||
For example, the following query is equivalent to the previous one:
|
||||
|
||||
```logsql
|
||||
_time:1d error | extract "ip=<ip> " | stats by (ip) count() logs | sort by (logs) desc limit 10
|
||||
```
|
||||
|
||||
See also:
|
||||
|
||||
- [format for extract pipe pattern](#format-for-extract-pipe-pattern)
|
||||
- [`unpack_json` pipe](#unpack_json-pipe)
|
||||
- [`unpack_logfmt` pipe](#unpack_logfmt-pipe)
|
||||
|
||||
#### Format for extract pipe pattern
|
||||
|
||||
The `pattern` part from [`| extract from src_field "pattern"` pipe](#extract-pipes) may contain arbitrary text, which matches as is to the `src_field` value.
|
||||
Additionally to arbitrary text, the `pattern` may contain placeholders in the form `<...>`, which match any strings, including empty strings.
|
||||
Placeholders may be named, such as `<ip>`, or anonymous, such as `<_>`. Named placeholders extract the matching text into
|
||||
the corresponding [log field](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model).
|
||||
Anonymous placeholders are useful for skipping arbitrary text during pattern matching.
|
||||
|
||||
For example, if [`_msg` field](https://docs.victoriametrics.com/victorialogs/keyconcepts/#message-field) contains the following text:
|
||||
|
||||
```
|
||||
1.2.3.4 GET /foo/bar?baz 404 "Mozilla foo bar baz" some tail here
|
||||
```
|
||||
|
||||
Then the following `| extract ...` [pipe](#pipes) can be used for extracting `ip`, `path` and `user_agent` fields from it:
|
||||
|
||||
```
|
||||
| extract '<ip> <_> <path> <_> "<user_agent>"'
|
||||
```
|
||||
|
||||
Note that the user-agent part of the log message is in double quotes. This means that it may contain special chars, including escaped double quote, e.g. `\"`.
|
||||
This may break proper matching of the string in double quotes.
|
||||
|
||||
VictoriaLogs automatically detects the whole string in quotes and automatically decodes it if the first char in the placeholder is double quote or backtick.
|
||||
So it is better to use the following `pattern` for proper matching of quoted strings:
|
||||
|
||||
```
|
||||
| extract "<ip> <_> <path> <_> <user_agent>"
|
||||
```
|
||||
|
||||
Note that the `user_agent` now matches double quotes, but VictoriaLogs automatically unquotes the matching string before storing it in the `user_agent` field.
|
||||
This propery is useful for extracting JSON strings. For example, the following `pattern` properly extracts the `message` JSON string into `msg` field:
|
||||
|
||||
```
|
||||
| extract '"message":<msg>'
|
||||
```
|
||||
|
||||
If some special chars such as `<` must be matched by the `pattern`, then they can be [html-escaped](https://en.wikipedia.org/wiki/List_of_XML_and_HTML_character_entity_references).
|
||||
For example, the following `pattern` properly matches `a < 123.456` text:
|
||||
|
||||
```
|
||||
| extract "<left> < <right>"
|
||||
```
|
||||
|
||||
### field_names pipe
|
||||
|
||||
Sometimes it may be needed to get all the field names for the selected results. This may be done with `| field_names ...` [pipe](#pipes).
|
||||
For example, the following query returns all the names of [log fields](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model)
|
||||
from the logs over the last 5 minutes:
|
||||
|
||||
```logsql
|
||||
_time:5m | field_names as names
|
||||
```
|
||||
|
||||
Field names are returned in arbitrary order. Use [`sort` pipe](#sort-pipe) in order to sort them if needed.
|
||||
|
||||
See also:
|
||||
|
||||
- [`uniq` pipe](#uniq-pipe)
|
||||
|
||||
### fields pipe
|
||||
|
||||
By default all the [log fields](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model) are returned in the response.
|
||||
@ -1112,6 +1215,23 @@ See also:
|
||||
- [`rename` pipe](#rename-pipe)
|
||||
- [`delete` pipe](#delete-pipe)
|
||||
|
||||
### filter pipe
|
||||
|
||||
Sometimes it is needed to apply additional filters on the calculated results. This can be done with `| filter ...` [pipe](#pipes).
|
||||
The `filter` pipe can contain arbitrary [filters](#filters).
|
||||
|
||||
For example, the following query returns `host` [field](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model) values
|
||||
if the number of log messages with the `error` [word](#word) for them over the last hour exceeds `1_000`:
|
||||
|
||||
```logsql
|
||||
_time:1h error | stats by (host) count() logs_count | filter logs_count:> 1_000
|
||||
```
|
||||
|
||||
See also:
|
||||
|
||||
- [`stats` pipe](#stats-pipe)
|
||||
- [`sort` pipe](#sort-pipe)
|
||||
|
||||
### limit pipe
|
||||
|
||||
If only a subset of selected logs must be processed, then `| limit N` [pipe](#pipes) can be used, where `N` can contain any [supported integer numeric value](#numeric-values).
|
||||
@ -1200,6 +1320,12 @@ The reverse order can be applied globally via `desc` keyword after `by(...)` cla
|
||||
_time:5m | sort by (foo, bar) desc
|
||||
```
|
||||
|
||||
The `by` keyword can be skipped in `sort ...` pipe. For example, the following query is equivalent to the previous one:
|
||||
|
||||
```logsql
|
||||
_time:5m | sort (foo, bar) desc
|
||||
```
|
||||
|
||||
Sorting of big number of logs can consume a lot of CPU time and memory. Sometimes it is enough to return the first `N` entries with the biggest
|
||||
or the smallest values. This can be done by adding `limit N` to the end of `sort ...` pipe.
|
||||
Such a query consumes lower amounts of memory when sorting big number of logs, since it keeps in memory only `N` log entries.
|
||||
@ -1232,35 +1358,6 @@ See also:
|
||||
- [`limit` pipe](#limit-pipe)
|
||||
- [`offset` pipe](#offset-pipe)
|
||||
|
||||
### uniq pipe
|
||||
|
||||
`| uniq ...` pipe allows returning only unique results over the selected logs. For example, the following LogsQL query
|
||||
returns unique values for `ip` [log field](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model)
|
||||
over logs for the last 5 minutes:
|
||||
|
||||
```logsql
|
||||
_time:5m | uniq by (ip)
|
||||
```
|
||||
|
||||
It is possible to specify multiple fields inside `by(...)` clause. In this case all the unique sets for the given fields
|
||||
are returned. For example, the following query returns all the unique `(host, path)` pairs for the logs over the last 5 minutes:
|
||||
|
||||
```logsql
|
||||
_time:5m | uniq by (host, path)
|
||||
```
|
||||
|
||||
Unique entries are stored in memory during query execution. Big number of unique selected entries may require a lot of memory.
|
||||
Sometimes it is enough to return up to `N` unique entries. This can be done by adding `limit N` after `by (...)` clause.
|
||||
This allows limiting memory usage. For example, the following query returns up to 100 unique `(host, path)` pairs for the logs over the last 5 minutes:
|
||||
|
||||
```logsql
|
||||
_time:5m | uniq by (host, path) limit 100
|
||||
```
|
||||
|
||||
See also:
|
||||
|
||||
- [`uniq_values` stats function](#uniq_values-stats)
|
||||
|
||||
### stats pipe
|
||||
|
||||
`| stats ...` pipe allows calculating various stats over the selected logs. For example, the following LogsQL query
|
||||
@ -1293,6 +1390,13 @@ _time:5m | stats count() logs_total, count_uniq(_stream) streams_total
|
||||
|
||||
See also:
|
||||
|
||||
- [stats by fields](#stats-by-fields)
|
||||
- [stats by time buckets](#stats-by-time-buckets)
|
||||
- [stats by time buckets with timezone offset](#stats-by-time-buckets-with-timezone-offset)
|
||||
- [stats by field buckets](#stats-by-field-buckets)
|
||||
- [stats by IPv4 buckets](#stats-by-ipv4-buckets)
|
||||
- [stats with additional filters](#stats-with-additional-filters)
|
||||
- [stats pipe functions](#stats-pipe-functions)
|
||||
- [`sort` pipe](#sort-pipe)
|
||||
|
||||
|
||||
@ -1316,6 +1420,12 @@ grouped by `(host, path)` fields:
|
||||
_time:5m | stats by (host, path) count() logs_total, count_uniq(ip) ips_total
|
||||
```
|
||||
|
||||
The `by` keyword can be skipped in `stats ...` pipe. For example, the following query is equvalent to the previous one:
|
||||
|
||||
```logsql
|
||||
_time:5m | stats (host, path) count() logs_total, count_uniq(ip) ips_total
|
||||
```
|
||||
|
||||
#### Stats by time buckets
|
||||
|
||||
The following syntax can be used for calculating stats grouped by time buckets:
|
||||
@ -1384,6 +1494,139 @@ extracted from the `ip` [log field](https://docs.victoriametrics.com/victorialog
|
||||
_time:5m | stats by (ip:/24) count() requests_per_subnet
|
||||
```
|
||||
|
||||
#### Stats with additional filters
|
||||
|
||||
Sometimes it is needed to calculate stats on different subsets of matching logs. This can be done by inserting `if (<any_filters>)` condition
|
||||
between [stats function](#stats-pipe-functions) and `result_name`, where `any_filter` can contain arbitrary [filters](#filters).
|
||||
For example, the following query calculates individually the number of [logs messages](https://docs.victoriametrics.com/victorialogs/keyconcepts/#message-field)
|
||||
with `GET`, `POST` and `PUT` [words](#word), additionally to the total number of logs over the last 5 minutes:
|
||||
|
||||
```logsql
|
||||
_time:5m | stats
|
||||
count() if (GET) gets,
|
||||
count() if (POST) posts,
|
||||
count() if (PUT) puts,
|
||||
count() total
|
||||
```
|
||||
|
||||
### uniq pipe
|
||||
|
||||
`| uniq ...` pipe allows returning only unique results over the selected logs. For example, the following LogsQL query
|
||||
returns unique values for `ip` [log field](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model)
|
||||
over logs for the last 5 minutes:
|
||||
|
||||
```logsql
|
||||
_time:5m | uniq by (ip)
|
||||
```
|
||||
|
||||
It is possible to specify multiple fields inside `by(...)` clause. In this case all the unique sets for the given fields
|
||||
are returned. For example, the following query returns all the unique `(host, path)` pairs for the logs over the last 5 minutes:
|
||||
|
||||
```logsql
|
||||
_time:5m | uniq by (host, path)
|
||||
```
|
||||
|
||||
The unique entries are returned in arbitrary order. Use [`sort` pipe](#sort-pipe) in order to sort them if needed.
|
||||
|
||||
Unique entries are stored in memory during query execution. Big number of unique selected entries may require a lot of memory.
|
||||
Sometimes it is enough to return up to `N` unique entries. This can be done by adding `limit N` after `by (...)` clause.
|
||||
This allows limiting memory usage. For example, the following query returns up to 100 unique `(host, path)` pairs for the logs over the last 5 minutes:
|
||||
|
||||
```logsql
|
||||
_time:5m | uniq by (host, path) limit 100
|
||||
```
|
||||
|
||||
The `by` keyword can be skipped in `uniq ...` pipe. For example, the following query is equivalent to the previous one:
|
||||
|
||||
```logsql
|
||||
_time:5m | uniq (host, path) limit 100
|
||||
```
|
||||
|
||||
See also:
|
||||
|
||||
- [`uniq_values` stats function](#uniq_values-stats)
|
||||
|
||||
### unpack_json pipe
|
||||
|
||||
`| unpack_json from field_name` pipe unpacks `{"k1":"v1", ..., "kN":"vN"}` JSON from the given `field_name` [field](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model)
|
||||
into `k1`, ... `kN` field names with the corresponding `v1`, ..., `vN` values. It overrides existing fields with names from the `k1`, ..., `kN` list. Other fields remain untouched.
|
||||
|
||||
Nested JSON is unpacked according to the rules defined [here](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model).
|
||||
|
||||
For example, the following query unpacks JSON fields from the [`_msg` field](https://docs.victoriametrics.com/victorialogs/keyconcepts/#message-field) across logs for the last 5 minutes:
|
||||
|
||||
```logsql
|
||||
_time:5m | unpack_json from _msg
|
||||
```
|
||||
|
||||
The `from _json` part can be omitted when JSON fields are unpacked from the [`_msg` field](https://docs.victoriametrics.com/victorialogs/keyconcepts/#message-field).
|
||||
The following query is equivalent to the previous one:
|
||||
|
||||
```logsql
|
||||
_time:5m | unpack_json
|
||||
```
|
||||
|
||||
If you want to make sure that the unpacked JSON fields do not clash with the existing fields, then specify common prefix for all the fields extracted from JSON,
|
||||
by adding `result_prefix "prefix_name"` to `unpack_json`. For example, the following query adds `foo_` prefix for all the unpacked fields
|
||||
form [`_msg` field](https://docs.victoriametrics.com/victorialogs/keyconcepts/#message-field):
|
||||
|
||||
```logsql
|
||||
_time:5m | unpack_json result_prefix "foo_"
|
||||
```
|
||||
|
||||
Performance tip: if you need extracting a single field from long JSON, it is faster to use [`extract` pipe](#extract-pipe). For example, the following query extracts `"ip"` field from JSON
|
||||
stored in [`_msg` field](https://docs.victoriametrics.com/victorialogs/keyconcepts/#message-field):
|
||||
|
||||
```
|
||||
_time:5m | extract '"ip":<field_value>'
|
||||
```
|
||||
|
||||
See also:
|
||||
|
||||
- [`unpack_logfmt` pipe](#unpack_logfmt-pipe)
|
||||
- [`extract` pipe](#extract-pipe)
|
||||
|
||||
### unpack_logfmt pipe
|
||||
|
||||
`| unpack_logfmt from field_name` pipe unpacks `k1=v1 ... kN=vN` [logfmt](https://brandur.org/logfmt) fields
|
||||
from the given `field_name` [field](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model) into `k1`, ... `kN` field names
|
||||
with the corresponding `v1`, ..., `vN` values. It overrides existing fields with names from the `k1`, ..., `kN` list. Other fields remain untouched.
|
||||
|
||||
For example, the following query unpacks [logfmt](https://brandur.org/logfmt) fields from the [`_msg` field](https://docs.victoriametrics.com/victorialogs/keyconcepts/#message-field)
|
||||
across logs for the last 5 minutes:
|
||||
|
||||
```logsql
|
||||
_time:5m | unpack_logfmt from _msg
|
||||
```
|
||||
|
||||
The `from _json` part can be omitted when [logfmt](https://brandur.org/logfmt) fields are unpacked from the [`_msg` field](https://docs.victoriametrics.com/victorialogs/keyconcepts/#message-field).
|
||||
The following query is equivalent to the previous one:
|
||||
|
||||
```logsql
|
||||
_time:5m | unpack_logfmt
|
||||
```
|
||||
|
||||
If you want to make sure that the unpacked [logfmt](https://brandur.org/logfmt) fields do not clash with the existing fields, then specify common prefix for all the fields extracted from JSON,
|
||||
by adding `result_prefix "prefix_name"` to `unpack_logfmt`. For example, the following query adds `foo_` prefix for all the unpacked fields
|
||||
from [`_msg` field](https://docs.victoriametrics.com/victorialogs/keyconcepts/#message-field):
|
||||
|
||||
```logsql
|
||||
_time:5m | unpack_logfmt result_prefix "foo_"
|
||||
```
|
||||
|
||||
Performance tip: if you need extracting a single field from long [logfmt](https://brandur.org/logfmt) line, it is faster to use [`extract` pipe](#extract-pipe).
|
||||
For example, the following query extracts `"ip"` field from [logfmt](https://brandur.org/logfmt) line stored
|
||||
in [`_msg` field](https://docs.victoriametrics.com/victorialogs/keyconcepts/#message-field):
|
||||
|
||||
```
|
||||
_time:5m | extract ' ip=<field_value>'
|
||||
```
|
||||
|
||||
See also:
|
||||
|
||||
- [`unpack_json` pipe](#unpack_json-pipe)
|
||||
- [`extract` pipe](#extract-pipe)
|
||||
|
||||
## stats pipe functions
|
||||
|
||||
LogsQL supports the following functions for [`stats` pipe](#stats-pipe):
|
||||
@ -1505,9 +1748,8 @@ See also:
|
||||
|
||||
### max stats
|
||||
|
||||
`max(field1, ..., fieldN)` [stats pipe](#stats-pipe) calculates the maximum value across
|
||||
`max(field1, ..., fieldN)` [stats pipe](#stats-pipe) returns the maximum value across
|
||||
all the mentioned [log fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model).
|
||||
Non-numeric values are ignored.
|
||||
|
||||
For example, the following query returns the maximum value for the `duration` [field](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model)
|
||||
over logs for the last 5 minutes:
|
||||
@ -1543,9 +1785,8 @@ See also:
|
||||
|
||||
### min stats
|
||||
|
||||
`min(field1, ..., fieldN)` [stats pipe](#stats-pipe) calculates the minimum value across
|
||||
`min(field1, ..., fieldN)` [stats pipe](#stats-pipe) returns the minimum value across
|
||||
all the mentioned [log fields](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model).
|
||||
Non-numeric values are ignored.
|
||||
|
||||
For example, the following query returns the minimum value for the `duration` [field](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model)
|
||||
over logs for the last 5 minutes:
|
||||
@ -1678,48 +1919,37 @@ See the [Roadmap](https://docs.victoriametrics.com/VictoriaLogs/Roadmap.html) fo
|
||||
|
||||
## Transformations
|
||||
|
||||
It is possible to perform various transformations on the [selected log entries](#filters) at client side
|
||||
with `jq`, `awk`, `cut`, etc. Unix commands according to [these docs](https://docs.victoriametrics.com/VictoriaLogs/querying/#command-line).
|
||||
LogsQL supports the following transformations on the log entries selected with [filters](#filters):
|
||||
|
||||
LogsQL will support the following transformations for the [selected](#filters) log entries:
|
||||
- Extracting arbitrary text from [log fields](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model) according to the provided pattern.
|
||||
See [these docs](#extract-pipe) for details.
|
||||
- Unpacking JSON fields from [log fields](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model). See [these docs](#unpack_json-pipe).
|
||||
- Unpacking [logfmt](https://brandur.org/logfmt) fields from [log fields](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model). See [these docs](#unpack_logfmt-pipe).
|
||||
|
||||
LogsQL will support the following transformations in the future:
|
||||
|
||||
- Extracting the specified fields from text [log fields](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model) according to the provided pattern.
|
||||
- Extracting the specified fields from JSON strings stored inside [log fields](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model).
|
||||
- Extracting the specified fields from [logfmt](https://brandur.org/logfmt) strings stored
|
||||
inside [log fields](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model).
|
||||
- Creating a new field from existing [log fields](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model)
|
||||
according to the provided format.
|
||||
- Creating a new field according to math calculations over existing [log fields](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model).
|
||||
- Parsing duration strings into floating-point seconds for further [stats calculations](#stats-pipe).
|
||||
- Creating a boolean field with the result of arbitrary [post-filters](#post-filters) applied to the current fields.
|
||||
- Creating an integer field with the length of the given field value. This can be useful for [stats calculations](#stats-pipe).
|
||||
|
||||
See the [Roadmap](https://docs.victoriametrics.com/VictoriaLogs/Roadmap.html) for details.
|
||||
|
||||
It is also possible to perform various transformations on the [selected log entries](#filters) at client side
|
||||
with `jq`, `awk`, `cut`, etc. Unix commands according to [these docs](https://docs.victoriametrics.com/VictoriaLogs/querying/#command-line).
|
||||
|
||||
## Post-filters
|
||||
|
||||
It is possible to perform post-filtering on the [selected log entries](#filters) at client side with `grep` or similar Unix commands
|
||||
Post-filtering of query results can be performed at any step by using [`filter` pipe](#filter-pipe).
|
||||
|
||||
It is also possible to perform post-filtering of the [selected log entries](#filters) at client side with `grep` and similar Unix commands
|
||||
according to [these docs](https://docs.victoriametrics.com/VictoriaLogs/querying/#command-line).
|
||||
|
||||
LogsQL will support post-filtering on the original [log fields](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model)
|
||||
and fields created by various [transformations](#transformations). The following post-filters will be supported:
|
||||
|
||||
- Full-text [filtering](#filters).
|
||||
- [Logical filtering](#logical-filter).
|
||||
|
||||
See the [Roadmap](https://docs.victoriametrics.com/VictoriaLogs/Roadmap.html) for details.
|
||||
|
||||
## Stats
|
||||
|
||||
Stats over the selected logs can be calculated via [`stats` pipe](#stats-pipe).
|
||||
|
||||
LogsQL will support calculating the following additional stats based on the [log fields](https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model)
|
||||
and fields created by [transformations](#transformations):
|
||||
|
||||
It will be possible specifying an optional condition [filter](#post-filters) when calculating the stats.
|
||||
For example, `sum(response_size) if (is_admin:true)` calculates the total response size for admins only.
|
||||
|
||||
It is possible to perform stats calculations on the [selected log entries](#filters) at client side with `sort`, `uniq`, etc. Unix commands
|
||||
It is also possible to perform stats calculations on the [selected log entries](#filters) at client side with `sort`, `uniq`, etc. Unix commands
|
||||
according to [these docs](https://docs.victoriametrics.com/VictoriaLogs/querying/#command-line).
|
||||
|
||||
## Sorting
|
||||
|
@ -35,15 +35,13 @@ The following functionality is planned in the future versions of VictoriaLogs:
|
||||
- Syslog
|
||||
- Journald (systemd)
|
||||
- Add missing functionality to [LogsQL](https://docs.victoriametrics.com/VictoriaLogs/LogsQL.html):
|
||||
- [Stream context](https://docs.victoriametrics.com/VictoriaLogs/LogsQL.html#stream-context).
|
||||
- [Transformation functions](https://docs.victoriametrics.com/VictoriaLogs/LogsQL.html#transformations).
|
||||
- [Post-filtering](https://docs.victoriametrics.com/VictoriaLogs/LogsQL.html#post-filters).
|
||||
- The ability to use subqueries inside [in()](https://docs.victoriametrics.com/VictoriaLogs/LogsQL.html#multi-exact-filter) function.
|
||||
- [Stream context](https://docs.victoriametrics.com/VictoriaLogs/LogsQL.html#stream-context).
|
||||
- Live tailing for [LogsQL filters](https://docs.victoriametrics.com/VictoriaLogs/LogsQL.html#filters) aka `tail -f`.
|
||||
- Web UI with the following abilities:
|
||||
- Explore the ingested logs ([partially done](https://docs.victoriametrics.com/VictoriaLogs/querying/#web-ui)).
|
||||
- Build graphs over time for the ingested logs.
|
||||
- Integration with Grafana.
|
||||
- Build graphs over time for the ingested logs via [hits HTTP API](https://docs.victoriametrics.com/victorialogs/querying/#querying-hits-stats).
|
||||
- Integration with Grafana ([partially done](https://github.com/VictoriaMetrics/victorialogs-datasource)).
|
||||
- Ability to make instant snapshots and backups in the way [similar to VictoriaMetrics](https://docs.victoriametrics.com/#how-to-work-with-snapshots).
|
||||
- Cluster version of VictoriaLogs.
|
||||
- Ability to store data to object storage (such as S3, GCS, Minio).
|
||||
|
@ -88,6 +88,188 @@ curl http://localhost:9428/select/logsql/query -H 'AccountID: 12' -H 'ProjectID:
|
||||
The number of requests to `/select/logsql/query` can be [monitored](https://docs.victoriametrics.com/VictoriaLogs/#monitoring)
|
||||
with `vl_http_requests_total{path="/select/logsql/query"}` metric.
|
||||
|
||||
### Querying hits stats
|
||||
|
||||
VictoriaMetrics provides `/select/logsql/hits?query=<query>&start=<start>&end=<end>&step=<step>` HTTP endpoint, which returns the number
|
||||
of matching log entries for the given `<query>` [LogsQL query](https://docs.victoriametrics.com/victorialogs/logsql/) on the given `[<start> ... <end>]`
|
||||
time range grouped by `<step>` buckets. The returned results are sorted by time.
|
||||
|
||||
The `<start>` and `<end>` args can contain values in [any supported format](https://docs.victoriametrics.com/#timestamp-formats).
|
||||
If `<start>` is missing, then it equals to the minimum timestamp across logs stored in VictoriaLogs.
|
||||
If `<end>` is missing, then it equals to the maximum timestamp across logs stored in VictoriaLogs.
|
||||
|
||||
The `<step>` arg can contain values in [the format specified here](https://docs.victoriametrics.com/victorialogs/logsql/#stats-by-time-buckets).
|
||||
If `<step>` is missing, then it equals to `1d` (one day).
|
||||
|
||||
For example, the following command returns per-hour number of [log messages](https://docs.victoriametrics.com/victorialogs/keyconcepts/#message-field)
|
||||
with the `error` [word](https://docs.victoriametrics.com/victorialogs/logsql/#word) over logs for the 3 hours:
|
||||
|
||||
```sh
|
||||
curl http://localhost:9428/select/logsql/hits -d 'query=error' -d 'start=3h' -d 'step=1h'
|
||||
```
|
||||
|
||||
Below is an example JSON output returned from this endpoint:
|
||||
|
||||
```json
|
||||
{
|
||||
"hits": [
|
||||
{
|
||||
"fields": {},
|
||||
"timestamps": [
|
||||
"2024-01-01T00:00:00Z",
|
||||
"2024-01-01T01:00:00Z",
|
||||
"2024-01-01T02:00:00Z"
|
||||
],
|
||||
"values": [
|
||||
410339,
|
||||
450311,
|
||||
899506
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
Additionally, the `offset=<offset>` arg can be passed to `/select/logsql/hits` in order to group buckets according to the given timezone offset.
|
||||
The `<offset>` can contain values in [the format specified here](https://docs.victoriametrics.com/victorialogs/logsql/#duration-values).
|
||||
For example, the following command returns per-day number of logs with `error` [word](https://docs.victoriametrics.com/victorialogs/logsql/#word)
|
||||
over the last week in New York time zone (`-4h`):
|
||||
|
||||
```logsql
|
||||
curl http://localhost:9428/select/logsql/hits -d 'query=error' -d 'start=1w' -d 'step=1d' -d 'offset=-4h'
|
||||
```
|
||||
|
||||
Additionally, any number of `field=<field_name>` args can be passed to `/select/logsql/hits` for grouping hits buckets by the mentioned `<field_name>` fields.
|
||||
For example, the following query groups hits by `level` [field](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model) additionally to the provided `step`:
|
||||
|
||||
```logsql
|
||||
curl http://localhost:9428/select/logsql/hits -d 'query=*' -d 'start=3h' -d 'step=1h' -d 'field=level'
|
||||
```
|
||||
|
||||
The grouped fields are put inside `"fields"` object:
|
||||
|
||||
```json
|
||||
{
|
||||
"hits": [
|
||||
{
|
||||
"fields": {
|
||||
"level": "error"
|
||||
},
|
||||
"timestamps": [
|
||||
"2024-01-01T00:00:00Z",
|
||||
"2024-01-01T01:00:00Z",
|
||||
"2024-01-01T02:00:00Z"
|
||||
],
|
||||
"values": [
|
||||
25,
|
||||
20,
|
||||
15
|
||||
]
|
||||
},
|
||||
{
|
||||
"fields": {
|
||||
"level": "info"
|
||||
},
|
||||
"timestamps": [
|
||||
"2024-01-01T00:00:00Z",
|
||||
"2024-01-01T01:00:00Z",
|
||||
"2024-01-01T02:00:00Z"
|
||||
],
|
||||
"values": [
|
||||
25625,
|
||||
35043,
|
||||
25230
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
See also:
|
||||
|
||||
- [Querying field names](#querying-field-names)
|
||||
- [Querying field values](#querying-field-values)
|
||||
- [HTTP API](#http-api)
|
||||
|
||||
|
||||
### Querying field names
|
||||
|
||||
VictoriaLogs provides `/select/logsql/field_names?query=<query>&start=<start>&end=<end>` HTTP endpoint, which returns field names
|
||||
from result of the given `<query>` [LogsQL query](https://docs.victoriametrics.com/victorialogs/logsql/) on the given `[<start> ... <end>]` time range.
|
||||
|
||||
The `<start>` and `<end>` args can contain values in [any supported format](https://docs.victoriametrics.com/#timestamp-formats).
|
||||
If `<start>` is missing, then it equals to the minimum timestamp across logs stored in VictoriaLogs.
|
||||
If `<end>` is missing, then it equals to the maximum timestamp across logs stored in VictoriaLogs.
|
||||
|
||||
For example, the following command returns field names across logs with the `error` [word](https://docs.victoriametrics.com/victorialogs/logsql/#word)
|
||||
for the last 5 minutes:
|
||||
|
||||
```sh
|
||||
curl http://localhost:9428/select/logsql/field_names -d 'query=error' -d 'start=5m'
|
||||
```
|
||||
|
||||
Below is an example JSON output returned from this endpoint:
|
||||
|
||||
```json
|
||||
{
|
||||
"names": [
|
||||
"_msg",
|
||||
"_stream",
|
||||
"_time",
|
||||
"host",
|
||||
"level",
|
||||
"location"
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
See also:
|
||||
|
||||
- [Querying field values](#querying-field-values)
|
||||
- [Querying hits stats](#querying-hits-stats)
|
||||
- [HTTP API](#http-api)
|
||||
|
||||
### Querying field values
|
||||
|
||||
VictoriaLogs provides `/select/logsql/field_values?query=<query>&field_name=<fieldName>&start=<start>&end=<end>` HTTP endpoint, which returns
|
||||
unique values for the given `<fieldName>` [field](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model)
|
||||
from results of the given `<query>` [LogsQL query](https://docs.victoriametrics.com/victorialogs/logsql/) on the given `[<start> ... <end>]` time range.
|
||||
|
||||
The `<start>` and `<end>` args can contain values in [any supported format](https://docs.victoriametrics.com/#timestamp-formats).
|
||||
If `<start>` is missing, then it equals to the minimum timestamp across logs stored in VictoriaLogs.
|
||||
If `<end>` is missing, then it equals to the maximum timestamp across logs stored in VictoriaLogs.
|
||||
|
||||
For example, the following command returns unique the values for `host` [field](https://docs.victoriametrics.com/victorialogs/keyconcepts/#data-model)
|
||||
across logs with the `error` [word](https://docs.victoriametrics.com/victorialogs/logsql/#word) for the last 5 minutes:
|
||||
|
||||
```sh
|
||||
curl http://localhost:9428/select/logsql/field_values -d 'query=error' -d 'field_name=host' -d 'start=5m'
|
||||
```
|
||||
|
||||
Below is an example JSON output returned from this endpoint:
|
||||
|
||||
```json
|
||||
{
|
||||
"values": [
|
||||
"host_0",
|
||||
"host_1",
|
||||
"host_10",
|
||||
"host_100",
|
||||
"host_1000"
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
The `/select/logsql/field_names` endpoint supports optional `limit=N` query arg, which allows limiting the number of returned values to `N`.
|
||||
The endpoint returns arbitrary subset of values if their number exceeds `N`, so `limit=N` cannot be used for pagination over big number of field values.
|
||||
|
||||
See also:
|
||||
|
||||
- [Querying field names](#querying-field-names)
|
||||
- [Querying hits stats](#querying-hits-stats)
|
||||
- [HTTP API](#http-api)
|
||||
|
||||
|
||||
## Web UI
|
||||
|
||||
VictoriaLogs provides a simple Web UI for logs [querying](https://docs.victoriametrics.com/VictoriaLogs/LogsQL.html) and exploration
|
||||
|
@ -1,71 +0,0 @@
|
||||
package logjson
|
||||
|
||||
import (
|
||||
"reflect"
|
||||
"testing"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logstorage"
|
||||
)
|
||||
|
||||
func TestParserFailure(t *testing.T) {
|
||||
f := func(data string) {
|
||||
t.Helper()
|
||||
|
||||
p := GetParser()
|
||||
err := p.ParseLogMessage([]byte(data))
|
||||
if err == nil {
|
||||
t.Fatalf("expecting non-nil error")
|
||||
}
|
||||
PutParser(p)
|
||||
}
|
||||
f("")
|
||||
f("{foo")
|
||||
f("[1,2,3]")
|
||||
f(`{"foo",}`)
|
||||
}
|
||||
|
||||
func TestParserSuccess(t *testing.T) {
|
||||
f := func(data string, fieldsExpected []logstorage.Field) {
|
||||
t.Helper()
|
||||
|
||||
p := GetParser()
|
||||
err := p.ParseLogMessage([]byte(data))
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error: %s", err)
|
||||
}
|
||||
if !reflect.DeepEqual(p.Fields, fieldsExpected) {
|
||||
t.Fatalf("unexpected fields;\ngot\n%s\nwant\n%s", p.Fields, fieldsExpected)
|
||||
}
|
||||
PutParser(p)
|
||||
}
|
||||
|
||||
f("{}", nil)
|
||||
f(`{"foo":"bar"}`, []logstorage.Field{
|
||||
{
|
||||
Name: "foo",
|
||||
Value: "bar",
|
||||
},
|
||||
})
|
||||
f(`{"foo":{"bar":"baz"},"a":1,"b":true,"c":[1,2],"d":false}`, []logstorage.Field{
|
||||
{
|
||||
Name: "foo.bar",
|
||||
Value: "baz",
|
||||
},
|
||||
{
|
||||
Name: "a",
|
||||
Value: "1",
|
||||
},
|
||||
{
|
||||
Name: "b",
|
||||
Value: "true",
|
||||
},
|
||||
{
|
||||
Name: "c",
|
||||
Value: "[1,2]",
|
||||
},
|
||||
{
|
||||
Name: "d",
|
||||
Value: "false",
|
||||
},
|
||||
})
|
||||
}
|
@ -4,6 +4,7 @@ import (
|
||||
"sync"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/slicesutil"
|
||||
)
|
||||
|
||||
func getArena() *arena {
|
||||
@ -29,8 +30,12 @@ func (a *arena) reset() {
|
||||
a.b = a.b[:0]
|
||||
}
|
||||
|
||||
func (a *arena) preallocate(n int) {
|
||||
a.b = slicesutil.ExtendCapacity(a.b, n)
|
||||
}
|
||||
|
||||
func (a *arena) sizeBytes() int {
|
||||
return len(a.b)
|
||||
return cap(a.b)
|
||||
}
|
||||
|
||||
func (a *arena) copyBytes(b []byte) []byte {
|
||||
@ -41,9 +46,8 @@ func (a *arena) copyBytes(b []byte) []byte {
|
||||
ab := a.b
|
||||
abLen := len(ab)
|
||||
ab = append(ab, b...)
|
||||
result := ab[abLen:]
|
||||
a.b = ab
|
||||
return result
|
||||
return ab[abLen:]
|
||||
}
|
||||
|
||||
func (a *arena) copyBytesToString(b []byte) string {
|
||||
|
@ -11,8 +11,8 @@ func TestArena(t *testing.T) {
|
||||
|
||||
for i := 0; i < 10; i++ {
|
||||
a := getArena()
|
||||
if n := a.sizeBytes(); n != 0 {
|
||||
t.Fatalf("unexpected non-zero size of empty arena: %d", n)
|
||||
if n := len(a.b); n != 0 {
|
||||
t.Fatalf("unexpected non-zero length of empty arena: %d", n)
|
||||
}
|
||||
|
||||
// add values to arena
|
||||
@ -35,9 +35,12 @@ func TestArena(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
if n := a.sizeBytes(); n != valuesLen {
|
||||
if n := len(a.b); n != valuesLen {
|
||||
t.Fatalf("unexpected arena size; got %d; want %d", n, valuesLen)
|
||||
}
|
||||
if n := a.sizeBytes(); n < valuesLen {
|
||||
t.Fatalf("unexpected arena capacity; got %d; want at least %d", n, valuesLen)
|
||||
}
|
||||
|
||||
// Try allocating slices with different lengths
|
||||
bs := make([]string, 100)
|
||||
@ -47,9 +50,12 @@ func TestArena(t *testing.T) {
|
||||
t.Fatalf("unexpected len(b); got %d; want %d", len(b), j)
|
||||
}
|
||||
valuesLen += j
|
||||
if n := a.sizeBytes(); n != valuesLen {
|
||||
if n := len(a.b); n != valuesLen {
|
||||
t.Fatalf("unexpected arena size; got %d; want %d", n, valuesLen)
|
||||
}
|
||||
if n := a.sizeBytes(); n < valuesLen {
|
||||
t.Fatalf("unexpected arena capacity; got %d; want at least %d", n, valuesLen)
|
||||
}
|
||||
for k := range b {
|
||||
b[k] = byte(k)
|
||||
}
|
||||
|
@ -45,6 +45,8 @@ func (bm *bitmap) copyFrom(src *bitmap) {
|
||||
}
|
||||
|
||||
func (bm *bitmap) init(bitsLen int) {
|
||||
bm.reset()
|
||||
|
||||
a := bm.a
|
||||
wordsLen := (bitsLen + 63) / 64
|
||||
a = slicesutil.SetLength(a, wordsLen)
|
||||
@ -126,6 +128,7 @@ func (bm *bitmap) forEachSetBit(f func(idx int) bool) {
|
||||
if word == 0 {
|
||||
continue
|
||||
}
|
||||
wordNew := word
|
||||
for j := 0; j < 64; j++ {
|
||||
mask := uint64(1) << j
|
||||
if (word & mask) == 0 {
|
||||
@ -136,9 +139,42 @@ func (bm *bitmap) forEachSetBit(f func(idx int) bool) {
|
||||
break
|
||||
}
|
||||
if !f(idx) {
|
||||
a[i] &= ^mask
|
||||
wordNew &= ^mask
|
||||
}
|
||||
}
|
||||
if word != wordNew {
|
||||
a[i] = wordNew
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// forEachSetBitReadonly calls f for each set bit
|
||||
func (bm *bitmap) forEachSetBitReadonly(f func(idx int)) {
|
||||
if bm.areAllBitsSet() {
|
||||
n := bm.bitsLen
|
||||
for i := 0; i < n; i++ {
|
||||
f(i)
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
a := bm.a
|
||||
bitsLen := bm.bitsLen
|
||||
for i, word := range a {
|
||||
if word == 0 {
|
||||
continue
|
||||
}
|
||||
for j := 0; j < 64; j++ {
|
||||
mask := uint64(1) << j
|
||||
if (word & mask) == 0 {
|
||||
continue
|
||||
}
|
||||
idx := i*64 + j
|
||||
if idx >= bitsLen {
|
||||
break
|
||||
}
|
||||
f(idx)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -32,7 +32,7 @@ func TestBitmap(t *testing.T) {
|
||||
|
||||
// Make sure that all the bits are set.
|
||||
nextIdx := 0
|
||||
bm.forEachSetBit(func(idx int) bool {
|
||||
bm.forEachSetBitReadonly(func(idx int) {
|
||||
if idx >= i {
|
||||
t.Fatalf("index must be smaller than %d", i)
|
||||
}
|
||||
@ -40,7 +40,6 @@ func TestBitmap(t *testing.T) {
|
||||
t.Fatalf("unexpected idx; got %d; want %d", idx, nextIdx)
|
||||
}
|
||||
nextIdx++
|
||||
return true
|
||||
})
|
||||
|
||||
if !bm.areAllBitsSet() {
|
||||
@ -66,12 +65,11 @@ func TestBitmap(t *testing.T) {
|
||||
}
|
||||
|
||||
nextIdx = 1
|
||||
bm.forEachSetBit(func(idx int) bool {
|
||||
bm.forEachSetBitReadonly(func(idx int) {
|
||||
if idx != nextIdx {
|
||||
t.Fatalf("unexpected idx; got %d; want %d", idx, nextIdx)
|
||||
}
|
||||
nextIdx += 2
|
||||
return true
|
||||
})
|
||||
|
||||
// Clear all the bits
|
||||
@ -93,9 +91,8 @@ func TestBitmap(t *testing.T) {
|
||||
}
|
||||
|
||||
bitsCount := 0
|
||||
bm.forEachSetBit(func(_ int) bool {
|
||||
bm.forEachSetBitReadonly(func(_ int) {
|
||||
bitsCount++
|
||||
return true
|
||||
})
|
||||
if bitsCount != 0 {
|
||||
t.Fatalf("unexpected non-zero number of set bits remained: %d", bitsCount)
|
||||
|
131
lib/logstorage/bitmap_timing_test.go
Normal file
131
lib/logstorage/bitmap_timing_test.go
Normal file
@ -0,0 +1,131 @@
|
||||
package logstorage
|
||||
|
||||
import (
|
||||
"testing"
|
||||
)
|
||||
|
||||
func BenchmarkBitmapForEachSetBitReadonly(b *testing.B) {
|
||||
const bitsLen = 64 * 1024
|
||||
|
||||
b.Run("no-zero-bits", func(b *testing.B) {
|
||||
bm := getBitmap(bitsLen)
|
||||
bm.setBits()
|
||||
benchmarkBitmapForEachSetBitReadonly(b, bm)
|
||||
putBitmap(bm)
|
||||
})
|
||||
b.Run("half-zero-bits", func(b *testing.B) {
|
||||
bm := getBitmap(bitsLen)
|
||||
bm.setBits()
|
||||
bm.forEachSetBit(func(idx int) bool {
|
||||
return idx%2 == 0
|
||||
})
|
||||
benchmarkBitmapForEachSetBitReadonly(b, bm)
|
||||
putBitmap(bm)
|
||||
})
|
||||
b.Run("one-set-bit", func(b *testing.B) {
|
||||
bm := getBitmap(bitsLen)
|
||||
bm.setBits()
|
||||
bm.forEachSetBit(func(idx int) bool {
|
||||
return idx == bitsLen/2
|
||||
})
|
||||
benchmarkBitmapForEachSetBitReadonly(b, bm)
|
||||
putBitmap(bm)
|
||||
})
|
||||
}
|
||||
|
||||
func BenchmarkBitmapForEachSetBit(b *testing.B) {
|
||||
const bitsLen = 64 * 1024
|
||||
|
||||
b.Run("no-zero-bits-noclear", func(b *testing.B) {
|
||||
bm := getBitmap(bitsLen)
|
||||
bm.setBits()
|
||||
benchmarkBitmapForEachSetBit(b, bm, false)
|
||||
putBitmap(bm)
|
||||
})
|
||||
b.Run("no-zero-bits-clear", func(b *testing.B) {
|
||||
bm := getBitmap(bitsLen)
|
||||
bm.setBits()
|
||||
benchmarkBitmapForEachSetBit(b, bm, true)
|
||||
putBitmap(bm)
|
||||
})
|
||||
b.Run("half-zero-bits-noclear", func(b *testing.B) {
|
||||
bm := getBitmap(bitsLen)
|
||||
bm.setBits()
|
||||
bm.forEachSetBit(func(idx int) bool {
|
||||
return idx%2 == 0
|
||||
})
|
||||
benchmarkBitmapForEachSetBit(b, bm, false)
|
||||
putBitmap(bm)
|
||||
})
|
||||
b.Run("half-zero-bits-clear", func(b *testing.B) {
|
||||
bm := getBitmap(bitsLen)
|
||||
bm.setBits()
|
||||
bm.forEachSetBit(func(idx int) bool {
|
||||
return idx%2 == 0
|
||||
})
|
||||
benchmarkBitmapForEachSetBit(b, bm, true)
|
||||
putBitmap(bm)
|
||||
})
|
||||
b.Run("one-set-bit-noclear", func(b *testing.B) {
|
||||
bm := getBitmap(bitsLen)
|
||||
bm.setBits()
|
||||
bm.forEachSetBit(func(idx int) bool {
|
||||
return idx == bitsLen/2
|
||||
})
|
||||
benchmarkBitmapForEachSetBit(b, bm, false)
|
||||
putBitmap(bm)
|
||||
})
|
||||
b.Run("one-set-bit-clear", func(b *testing.B) {
|
||||
bm := getBitmap(bitsLen)
|
||||
bm.setBits()
|
||||
bm.forEachSetBit(func(idx int) bool {
|
||||
return idx == bitsLen/2
|
||||
})
|
||||
benchmarkBitmapForEachSetBit(b, bm, true)
|
||||
putBitmap(bm)
|
||||
})
|
||||
}
|
||||
|
||||
func benchmarkBitmapForEachSetBitReadonly(b *testing.B, bm *bitmap) {
|
||||
b.SetBytes(int64(bm.bitsLen))
|
||||
b.ReportAllocs()
|
||||
b.RunParallel(func(pb *testing.PB) {
|
||||
bmLocal := getBitmap(bm.bitsLen)
|
||||
n := 0
|
||||
for pb.Next() {
|
||||
bmLocal.copyFrom(bm)
|
||||
bmLocal.forEachSetBitReadonly(func(idx int) {
|
||||
n++
|
||||
})
|
||||
}
|
||||
putBitmap(bmLocal)
|
||||
GlobalSink.Add(uint64(n))
|
||||
})
|
||||
}
|
||||
|
||||
func benchmarkBitmapForEachSetBit(b *testing.B, bm *bitmap, isClearBits bool) {
|
||||
b.SetBytes(int64(bm.bitsLen))
|
||||
b.ReportAllocs()
|
||||
b.RunParallel(func(pb *testing.PB) {
|
||||
bmLocal := getBitmap(bm.bitsLen)
|
||||
n := 0
|
||||
for pb.Next() {
|
||||
bmLocal.copyFrom(bm)
|
||||
bmLocal.forEachSetBit(func(idx int) bool {
|
||||
n++
|
||||
return !isClearBits
|
||||
})
|
||||
if isClearBits {
|
||||
if !bmLocal.isZero() {
|
||||
panic("BUG: bitmap must have no set bits")
|
||||
}
|
||||
} else {
|
||||
if bmLocal.isZero() {
|
||||
panic("BUG: bitmap must have some set bits")
|
||||
}
|
||||
}
|
||||
}
|
||||
putBitmap(bmLocal)
|
||||
GlobalSink.Add(uint64(n))
|
||||
})
|
||||
}
|
File diff suppressed because it is too large
Load Diff
@ -146,7 +146,7 @@ func (bs *blockSearch) partPath() string {
|
||||
return bs.bsw.p.path
|
||||
}
|
||||
|
||||
func (bs *blockSearch) search(bsw *blockSearchWork) {
|
||||
func (bs *blockSearch) search(bsw *blockSearchWork, bm *bitmap) {
|
||||
bs.reset()
|
||||
|
||||
bs.bsw = bsw
|
||||
@ -154,23 +154,22 @@ func (bs *blockSearch) search(bsw *blockSearchWork) {
|
||||
bs.csh.initFromBlockHeader(&bs.a, bsw.p, &bsw.bh)
|
||||
|
||||
// search rows matching the given filter
|
||||
bm := getBitmap(int(bsw.bh.rowsCount))
|
||||
defer putBitmap(bm)
|
||||
|
||||
bm.init(int(bsw.bh.rowsCount))
|
||||
bm.setBits()
|
||||
bs.bsw.so.filter.apply(bs, bm)
|
||||
bs.bsw.so.filter.applyToBlockSearch(bs, bm)
|
||||
|
||||
bs.br.mustInit(bs, bm)
|
||||
if bm.isZero() {
|
||||
// The filter doesn't match any logs in the current block.
|
||||
return
|
||||
}
|
||||
|
||||
bs.br.mustInit(bs, bm)
|
||||
|
||||
// fetch the requested columns to bs.br.
|
||||
if bs.bsw.so.needAllColumns {
|
||||
bs.br.fetchAllColumns(bs, bm)
|
||||
bs.br.initAllColumns(bs, bm)
|
||||
} else {
|
||||
bs.br.fetchRequestedColumns(bs, bm)
|
||||
bs.br.initRequestedColumns(bs, bm)
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -37,7 +37,22 @@ func (fs fieldsSet) getAll() []string {
|
||||
return a
|
||||
}
|
||||
|
||||
func (fs fieldsSet) addFields(fields []string) {
|
||||
for _, f := range fields {
|
||||
fs.add(f)
|
||||
}
|
||||
}
|
||||
|
||||
func (fs fieldsSet) removeFields(fields []string) {
|
||||
for _, f := range fields {
|
||||
fs.remove(f)
|
||||
}
|
||||
}
|
||||
|
||||
func (fs fieldsSet) contains(field string) bool {
|
||||
if field == "" {
|
||||
field = "_msg"
|
||||
}
|
||||
_, ok := fs[field]
|
||||
if !ok {
|
||||
_, ok = fs["*"]
|
||||
@ -45,28 +60,19 @@ func (fs fieldsSet) contains(field string) bool {
|
||||
return ok
|
||||
}
|
||||
|
||||
func (fs fieldsSet) removeAll(fields []string) {
|
||||
for _, f := range fields {
|
||||
fs.remove(f)
|
||||
}
|
||||
}
|
||||
|
||||
func (fs fieldsSet) remove(field string) {
|
||||
if field == "*" {
|
||||
fs.reset()
|
||||
return
|
||||
}
|
||||
if !fs.contains("*") {
|
||||
if field == "" {
|
||||
field = "_msg"
|
||||
}
|
||||
delete(fs, field)
|
||||
}
|
||||
}
|
||||
|
||||
func (fs fieldsSet) addAll(fields []string) {
|
||||
for _, f := range fields {
|
||||
fs.add(f)
|
||||
}
|
||||
}
|
||||
|
||||
func (fs fieldsSet) add(field string) {
|
||||
if fs.contains("*") {
|
||||
return
|
||||
@ -76,5 +82,8 @@ func (fs fieldsSet) add(field string) {
|
||||
fs["*"] = struct{}{}
|
||||
return
|
||||
}
|
||||
if field == "" {
|
||||
field = "_msg"
|
||||
}
|
||||
fs[field] = struct{}{}
|
||||
}
|
||||
|
@ -17,9 +17,10 @@ func TestFieldsSet(t *testing.T) {
|
||||
}
|
||||
fs.add("foo")
|
||||
fs.add("bar")
|
||||
fs.add("")
|
||||
s := fs.String()
|
||||
if s != "[bar,foo]" {
|
||||
t.Fatalf("unexpected String() result; got %s; want %s", s, "[bar,foo]")
|
||||
if s != "[_msg,bar,foo]" {
|
||||
t.Fatalf("unexpected String() result; got %s; want %s", s, "[_msg,bar,foo]")
|
||||
}
|
||||
if !fs.contains("foo") {
|
||||
t.Fatalf("fs must contain foo")
|
||||
@ -27,6 +28,12 @@ func TestFieldsSet(t *testing.T) {
|
||||
if !fs.contains("bar") {
|
||||
t.Fatalf("fs must contain bar")
|
||||
}
|
||||
if !fs.contains("") {
|
||||
t.Fatalf("fs must contain _msg")
|
||||
}
|
||||
if !fs.contains("_msg") {
|
||||
t.Fatalf("fs must contain _msg")
|
||||
}
|
||||
if fs.contains("baz") {
|
||||
t.Fatalf("fs musn't contain baz")
|
||||
}
|
||||
@ -41,6 +48,13 @@ func TestFieldsSet(t *testing.T) {
|
||||
if fs.contains("bar") {
|
||||
t.Fatalf("fs mustn't contain bar")
|
||||
}
|
||||
fs.remove("")
|
||||
if fs.contains("") {
|
||||
t.Fatalf("fs mustn't contain _msg")
|
||||
}
|
||||
if fs.contains("_msg") {
|
||||
t.Fatalf("fs mustn't contain _msg")
|
||||
}
|
||||
|
||||
// verify *
|
||||
fs.add("*")
|
||||
@ -59,25 +73,25 @@ func TestFieldsSet(t *testing.T) {
|
||||
t.Fatalf("fs must be empty")
|
||||
}
|
||||
|
||||
// verify addAll, getAll, removeAll
|
||||
fs.addAll([]string{"foo", "bar"})
|
||||
if !fs.contains("foo") || !fs.contains("bar") {
|
||||
t.Fatalf("fs must contain foo and bar")
|
||||
// verify addFields, removeFields, getAll
|
||||
fs.addFields([]string{"foo", "bar", "_msg"})
|
||||
if !fs.contains("foo") || !fs.contains("bar") || !fs.contains("_msg") {
|
||||
t.Fatalf("fs must contain foo, bar and _msg")
|
||||
}
|
||||
a := fs.getAll()
|
||||
if !reflect.DeepEqual(a, []string{"bar", "foo"}) {
|
||||
t.Fatalf("unexpected result from getAll(); got %q; want %q", a, []string{"bar", "foo"})
|
||||
if !reflect.DeepEqual(a, []string{"_msg", "bar", "foo"}) {
|
||||
t.Fatalf("unexpected result from getAll(); got %q; want %q", a, []string{"_msg", "bar", "foo"})
|
||||
}
|
||||
fs.removeAll([]string{"bar", "baz"})
|
||||
if fs.contains("bar") || fs.contains("baz") {
|
||||
t.Fatalf("fs mustn't contain bar and baz")
|
||||
fs.removeFields([]string{"bar", "baz", "_msg"})
|
||||
if fs.contains("bar") || fs.contains("baz") || fs.contains("_msg") {
|
||||
t.Fatalf("fs mustn't contain bar, baz and _msg")
|
||||
}
|
||||
if !fs.contains("foo") {
|
||||
t.Fatalf("fs must contain foo")
|
||||
}
|
||||
|
||||
// verify clone
|
||||
fs.addAll([]string{"foo", "bar", "baz"})
|
||||
fs.addFields([]string{"foo", "bar", "baz"})
|
||||
fsStr := fs.String()
|
||||
fsCopy := fs.clone()
|
||||
fsCopyStr := fsCopy.String()
|
||||
|
@ -5,6 +5,104 @@ type filter interface {
|
||||
// String returns string representation of the filter
|
||||
String() string
|
||||
|
||||
// apply must update bm according to the filter applied to the given bs block
|
||||
apply(bs *blockSearch, bm *bitmap)
|
||||
// udpdateNeededFields must update neededFields with fields needed for the filter
|
||||
updateNeededFields(neededFields fieldsSet)
|
||||
|
||||
// applyToBlockSearch must update bm according to the filter applied to the given bs block
|
||||
applyToBlockSearch(bs *blockSearch, bm *bitmap)
|
||||
|
||||
// applyToBlockResult must update bm according to the filter applied to the given br block
|
||||
applyToBlockResult(br *blockResult, bm *bitmap)
|
||||
}
|
||||
|
||||
// visitFilter sequentially calls visitFunc for filters inside f.
|
||||
//
|
||||
// It stops calling visitFunc on the remaining filters as soon as visitFunc returns true.
|
||||
// It returns the result of the last visitFunc call.
|
||||
func visitFilter(f filter, visitFunc func(f filter) bool) bool {
|
||||
switch t := f.(type) {
|
||||
case *filterAnd:
|
||||
return visitFilters(t.filters, visitFunc)
|
||||
case *filterOr:
|
||||
return visitFilters(t.filters, visitFunc)
|
||||
case *filterNot:
|
||||
return visitFilter(t.f, visitFunc)
|
||||
default:
|
||||
return visitFunc(f)
|
||||
}
|
||||
}
|
||||
|
||||
// visitFilters calls visitFunc per each filter in filters.
|
||||
//
|
||||
// It stops calling visitFunc on the remaining filters as soon as visitFunc returns true.
|
||||
// It returns the result of the last visitFunc call.
|
||||
func visitFilters(filters []filter, visitFunc func(f filter) bool) bool {
|
||||
for _, f := range filters {
|
||||
if visitFilter(f, visitFunc) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// copyFilter recursively copies f filters with the help of copyFunc if visitFunc returns true for them.
|
||||
//
|
||||
// It doesn't copy other filters by returning them as is.
|
||||
func copyFilter(f filter, visitFunc func(f filter) bool, copyFunc func(f filter) (filter, error)) (filter, error) {
|
||||
switch t := f.(type) {
|
||||
case *filterAnd:
|
||||
filters, err := copyFilters(t.filters, visitFunc, copyFunc)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
fa := &filterAnd{
|
||||
filters: filters,
|
||||
}
|
||||
return fa, nil
|
||||
case *filterOr:
|
||||
filters, err := copyFilters(t.filters, visitFunc, copyFunc)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
fo := &filterOr{
|
||||
filters: filters,
|
||||
}
|
||||
return fo, nil
|
||||
case *filterNot:
|
||||
f, err := copyFilter(t.f, visitFunc, copyFunc)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
fn := &filterNot{
|
||||
f: f,
|
||||
}
|
||||
return fn, nil
|
||||
default:
|
||||
if !visitFunc(t) {
|
||||
// Nothing to copy
|
||||
return t, nil
|
||||
}
|
||||
return copyFunc(t)
|
||||
}
|
||||
}
|
||||
|
||||
// copyFilters recursively copies filters with the help of copyfunc if visitFunc returns true for them.
|
||||
//
|
||||
// It doesn't copy other filters by returning them as is.
|
||||
func copyFilters(filters []filter, visitFunc func(f filter) bool, copyFunc func(f filter) (filter, error)) ([]filter, error) {
|
||||
if !visitFilters(filters, visitFunc) {
|
||||
// Nothing to copy
|
||||
return filters, nil
|
||||
}
|
||||
|
||||
// Copy filters.
|
||||
filtersNew := make([]filter, len(filters))
|
||||
for i, f := range filters {
|
||||
fNew, err := copyFilter(f, visitFunc, copyFunc)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
filtersNew[i] = fNew
|
||||
}
|
||||
return filtersNew, nil
|
||||
}
|
||||
|
@ -31,7 +31,24 @@ func (fa *filterAnd) String() string {
|
||||
return strings.Join(a, " ")
|
||||
}
|
||||
|
||||
func (fa *filterAnd) apply(bs *blockSearch, bm *bitmap) {
|
||||
func (fa *filterAnd) updateNeededFields(neededFields fieldsSet) {
|
||||
for _, f := range fa.filters {
|
||||
f.updateNeededFields(neededFields)
|
||||
}
|
||||
}
|
||||
|
||||
func (fa *filterAnd) applyToBlockResult(br *blockResult, bm *bitmap) {
|
||||
for _, f := range fa.filters {
|
||||
f.applyToBlockResult(br, bm)
|
||||
if bm.isZero() {
|
||||
// Shortcut - there is no need in applying the remaining filters,
|
||||
// since the result will be zero anyway.
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (fa *filterAnd) applyToBlockSearch(bs *blockSearch, bm *bitmap) {
|
||||
if !fa.matchMessageBloomFilter(bs) {
|
||||
// Fast path - fa doesn't match _msg bloom filter.
|
||||
bm.resetBits()
|
||||
@ -40,7 +57,7 @@ func (fa *filterAnd) apply(bs *blockSearch, bm *bitmap) {
|
||||
|
||||
// Slow path - verify every filter separately.
|
||||
for _, f := range fa.filters {
|
||||
f.apply(bs, bm)
|
||||
f.applyToBlockSearch(bs, bm)
|
||||
if bm.isZero() {
|
||||
// Shortcut - there is no need in applying the remaining filters,
|
||||
// since the result will be zero anyway.
|
||||
|
@ -29,6 +29,10 @@ func (fp *filterAnyCasePhrase) String() string {
|
||||
return fmt.Sprintf("%si(%s)", quoteFieldNameIfNeeded(fp.fieldName), quoteTokenIfNeeded(fp.phrase))
|
||||
}
|
||||
|
||||
func (fp *filterAnyCasePhrase) updateNeededFields(neededFields fieldsSet) {
|
||||
neededFields.add(fp.fieldName)
|
||||
}
|
||||
|
||||
func (fp *filterAnyCasePhrase) getTokens() []string {
|
||||
fp.tokensOnce.Do(fp.initTokens)
|
||||
return fp.tokens
|
||||
@ -47,7 +51,12 @@ func (fp *filterAnyCasePhrase) initPhraseLowercase() {
|
||||
fp.phraseLowercase = strings.ToLower(fp.phrase)
|
||||
}
|
||||
|
||||
func (fp *filterAnyCasePhrase) apply(bs *blockSearch, bm *bitmap) {
|
||||
func (fp *filterAnyCasePhrase) applyToBlockResult(br *blockResult, bm *bitmap) {
|
||||
phraseLowercase := fp.getPhraseLowercase()
|
||||
applyToBlockResultGeneric(br, bm, fp.fieldName, phraseLowercase, matchAnyCasePhrase)
|
||||
}
|
||||
|
||||
func (fp *filterAnyCasePhrase) applyToBlockSearch(bs *blockSearch, bm *bitmap) {
|
||||
fieldName := fp.fieldName
|
||||
phraseLowercase := fp.getPhraseLowercase()
|
||||
|
||||
@ -100,10 +109,12 @@ func (fp *filterAnyCasePhrase) apply(bs *blockSearch, bm *bitmap) {
|
||||
|
||||
func matchValuesDictByAnyCasePhrase(bs *blockSearch, ch *columnHeader, bm *bitmap, phraseLowercase string) {
|
||||
bb := bbPool.Get()
|
||||
for i, v := range ch.valuesDict.values {
|
||||
for _, v := range ch.valuesDict.values {
|
||||
c := byte(0)
|
||||
if matchAnyCasePhrase(v, phraseLowercase) {
|
||||
bb.B = append(bb.B, byte(i))
|
||||
c = 1
|
||||
}
|
||||
bb.B = append(bb.B, c)
|
||||
}
|
||||
matchEncodedValuesDict(bs, ch, bm, bb.B)
|
||||
bbPool.Put(bb)
|
||||
|
@ -33,6 +33,10 @@ func (fp *filterAnyCasePrefix) String() string {
|
||||
return fmt.Sprintf("%si(%s*)", quoteFieldNameIfNeeded(fp.fieldName), quoteTokenIfNeeded(fp.prefix))
|
||||
}
|
||||
|
||||
func (fp *filterAnyCasePrefix) updateNeededFields(neededFields fieldsSet) {
|
||||
neededFields.add(fp.fieldName)
|
||||
}
|
||||
|
||||
func (fp *filterAnyCasePrefix) getTokens() []string {
|
||||
fp.tokensOnce.Do(fp.initTokens)
|
||||
return fp.tokens
|
||||
@ -51,7 +55,12 @@ func (fp *filterAnyCasePrefix) initPrefixLowercase() {
|
||||
fp.prefixLowercase = strings.ToLower(fp.prefix)
|
||||
}
|
||||
|
||||
func (fp *filterAnyCasePrefix) apply(bs *blockSearch, bm *bitmap) {
|
||||
func (fp *filterAnyCasePrefix) applyToBlockResult(br *blockResult, bm *bitmap) {
|
||||
prefixLowercase := fp.getPrefixLowercase()
|
||||
applyToBlockResultGeneric(br, bm, fp.fieldName, prefixLowercase, matchAnyCasePrefix)
|
||||
}
|
||||
|
||||
func (fp *filterAnyCasePrefix) applyToBlockSearch(bs *blockSearch, bm *bitmap) {
|
||||
fieldName := fp.fieldName
|
||||
prefixLowercase := fp.getPrefixLowercase()
|
||||
|
||||
@ -101,10 +110,12 @@ func (fp *filterAnyCasePrefix) apply(bs *blockSearch, bm *bitmap) {
|
||||
|
||||
func matchValuesDictByAnyCasePrefix(bs *blockSearch, ch *columnHeader, bm *bitmap, prefixLowercase string) {
|
||||
bb := bbPool.Get()
|
||||
for i, v := range ch.valuesDict.values {
|
||||
for _, v := range ch.valuesDict.values {
|
||||
c := byte(0)
|
||||
if matchAnyCasePrefix(v, prefixLowercase) {
|
||||
bb.B = append(bb.B, byte(i))
|
||||
c = 1
|
||||
}
|
||||
bb.B = append(bb.B, c)
|
||||
}
|
||||
matchEncodedValuesDict(bs, ch, bm, bb.B)
|
||||
bbPool.Put(bb)
|
||||
|
@ -24,6 +24,10 @@ func (fe *filterExact) String() string {
|
||||
return fmt.Sprintf("%sexact(%s)", quoteFieldNameIfNeeded(fe.fieldName), quoteTokenIfNeeded(fe.value))
|
||||
}
|
||||
|
||||
func (fe *filterExact) updateNeededFields(neededFields fieldsSet) {
|
||||
neededFields.add(fe.fieldName)
|
||||
}
|
||||
|
||||
func (fe *filterExact) getTokens() []string {
|
||||
fe.tokensOnce.Do(fe.initTokens)
|
||||
return fe.tokens
|
||||
@ -33,7 +37,133 @@ func (fe *filterExact) initTokens() {
|
||||
fe.tokens = tokenizeStrings(nil, []string{fe.value})
|
||||
}
|
||||
|
||||
func (fe *filterExact) apply(bs *blockSearch, bm *bitmap) {
|
||||
func (fe *filterExact) applyToBlockResult(br *blockResult, bm *bitmap) {
|
||||
value := fe.value
|
||||
|
||||
c := br.getColumnByName(fe.fieldName)
|
||||
if c.isConst {
|
||||
v := c.valuesEncoded[0]
|
||||
if v != value {
|
||||
bm.resetBits()
|
||||
}
|
||||
return
|
||||
}
|
||||
if c.isTime {
|
||||
matchColumnByExactValue(br, bm, c, value)
|
||||
return
|
||||
}
|
||||
|
||||
switch c.valueType {
|
||||
case valueTypeString:
|
||||
matchColumnByExactValue(br, bm, c, value)
|
||||
case valueTypeDict:
|
||||
bb := bbPool.Get()
|
||||
for _, v := range c.dictValues {
|
||||
c := byte(0)
|
||||
if v == value {
|
||||
c = 1
|
||||
}
|
||||
bb.B = append(bb.B, c)
|
||||
}
|
||||
valuesEncoded := c.getValuesEncoded(br)
|
||||
bm.forEachSetBit(func(idx int) bool {
|
||||
n := valuesEncoded[idx][0]
|
||||
return bb.B[n] == 1
|
||||
})
|
||||
bbPool.Put(bb)
|
||||
case valueTypeUint8:
|
||||
n, ok := tryParseUint64(value)
|
||||
if !ok || n >= (1<<8) {
|
||||
bm.resetBits()
|
||||
return
|
||||
}
|
||||
nNeeded := uint8(n)
|
||||
valuesEncoded := c.getValuesEncoded(br)
|
||||
bm.forEachSetBit(func(idx int) bool {
|
||||
n := unmarshalUint8(valuesEncoded[idx])
|
||||
return n == nNeeded
|
||||
})
|
||||
case valueTypeUint16:
|
||||
n, ok := tryParseUint64(value)
|
||||
if !ok || n >= (1<<16) {
|
||||
bm.resetBits()
|
||||
return
|
||||
}
|
||||
nNeeded := uint16(n)
|
||||
valuesEncoded := c.getValuesEncoded(br)
|
||||
bm.forEachSetBit(func(idx int) bool {
|
||||
n := unmarshalUint16(valuesEncoded[idx])
|
||||
return n == nNeeded
|
||||
})
|
||||
case valueTypeUint32:
|
||||
n, ok := tryParseUint64(value)
|
||||
if !ok || n >= (1<<32) {
|
||||
bm.resetBits()
|
||||
return
|
||||
}
|
||||
nNeeded := uint32(n)
|
||||
valuesEncoded := c.getValuesEncoded(br)
|
||||
bm.forEachSetBit(func(idx int) bool {
|
||||
n := unmarshalUint32(valuesEncoded[idx])
|
||||
return n == nNeeded
|
||||
})
|
||||
case valueTypeUint64:
|
||||
nNeeded, ok := tryParseUint64(value)
|
||||
if !ok {
|
||||
bm.resetBits()
|
||||
return
|
||||
}
|
||||
valuesEncoded := c.getValuesEncoded(br)
|
||||
bm.forEachSetBit(func(idx int) bool {
|
||||
n := unmarshalUint64(valuesEncoded[idx])
|
||||
return n == nNeeded
|
||||
})
|
||||
case valueTypeFloat64:
|
||||
fNeeded, ok := tryParseFloat64(value)
|
||||
if !ok {
|
||||
bm.resetBits()
|
||||
return
|
||||
}
|
||||
valuesEncoded := c.getValuesEncoded(br)
|
||||
bm.forEachSetBit(func(idx int) bool {
|
||||
f := unmarshalFloat64(valuesEncoded[idx])
|
||||
return f == fNeeded
|
||||
})
|
||||
case valueTypeIPv4:
|
||||
ipNeeded, ok := tryParseIPv4(value)
|
||||
if !ok {
|
||||
bm.resetBits()
|
||||
return
|
||||
}
|
||||
valuesEncoded := c.getValuesEncoded(br)
|
||||
bm.forEachSetBit(func(idx int) bool {
|
||||
ip := unmarshalIPv4(valuesEncoded[idx])
|
||||
return ip == ipNeeded
|
||||
})
|
||||
case valueTypeTimestampISO8601:
|
||||
timestampNeeded, ok := tryParseTimestampISO8601(value)
|
||||
if !ok {
|
||||
bm.resetBits()
|
||||
return
|
||||
}
|
||||
valuesEncoded := c.getValuesEncoded(br)
|
||||
bm.forEachSetBit(func(idx int) bool {
|
||||
timestamp := unmarshalTimestampISO8601(valuesEncoded[idx])
|
||||
return timestamp == timestampNeeded
|
||||
})
|
||||
default:
|
||||
logger.Panicf("FATAL: unknown valueType=%d", c.valueType)
|
||||
}
|
||||
}
|
||||
|
||||
func matchColumnByExactValue(br *blockResult, bm *bitmap, c *blockResultColumn, value string) {
|
||||
values := c.getValues(br)
|
||||
bm.forEachSetBit(func(idx int) bool {
|
||||
return values[idx] == value
|
||||
})
|
||||
}
|
||||
|
||||
func (fe *filterExact) applyToBlockSearch(bs *blockSearch, bm *bitmap) {
|
||||
fieldName := fe.fieldName
|
||||
value := fe.value
|
||||
|
||||
@ -121,10 +251,12 @@ func matchFloat64ByExactValue(bs *blockSearch, ch *columnHeader, bm *bitmap, val
|
||||
|
||||
func matchValuesDictByExactValue(bs *blockSearch, ch *columnHeader, bm *bitmap, value string) {
|
||||
bb := bbPool.Get()
|
||||
for i, v := range ch.valuesDict.values {
|
||||
for _, v := range ch.valuesDict.values {
|
||||
c := byte(0)
|
||||
if v == value {
|
||||
bb.B = append(bb.B, byte(i))
|
||||
c = 1
|
||||
}
|
||||
bb.B = append(bb.B, c)
|
||||
}
|
||||
matchEncodedValuesDict(bs, ch, bm, bb.B)
|
||||
bbPool.Put(bb)
|
||||
|
@ -23,6 +23,10 @@ func (fep *filterExactPrefix) String() string {
|
||||
return fmt.Sprintf("%sexact(%s*)", quoteFieldNameIfNeeded(fep.fieldName), quoteTokenIfNeeded(fep.prefix))
|
||||
}
|
||||
|
||||
func (fep *filterExactPrefix) updateNeededFields(neededFields fieldsSet) {
|
||||
neededFields.add(fep.fieldName)
|
||||
}
|
||||
|
||||
func (fep *filterExactPrefix) getTokens() []string {
|
||||
fep.tokensOnce.Do(fep.initTokens)
|
||||
return fep.tokens
|
||||
@ -32,7 +36,11 @@ func (fep *filterExactPrefix) initTokens() {
|
||||
fep.tokens = getTokensSkipLast(fep.prefix)
|
||||
}
|
||||
|
||||
func (fep *filterExactPrefix) apply(bs *blockSearch, bm *bitmap) {
|
||||
func (fep *filterExactPrefix) applyToBlockResult(br *blockResult, bm *bitmap) {
|
||||
applyToBlockResultGeneric(br, bm, fep.fieldName, fep.prefix, matchExactPrefix)
|
||||
}
|
||||
|
||||
func (fep *filterExactPrefix) applyToBlockSearch(bs *blockSearch, bm *bitmap) {
|
||||
fieldName := fep.fieldName
|
||||
prefix := fep.prefix
|
||||
|
||||
@ -91,7 +99,7 @@ func matchTimestampISO8601ByExactPrefix(bs *blockSearch, ch *columnHeader, bm *b
|
||||
|
||||
bb := bbPool.Get()
|
||||
visitValues(bs, ch, bm, func(v string) bool {
|
||||
s := toTimestampISO8601StringExt(bs, bb, v)
|
||||
s := toTimestampISO8601String(bs, bb, v)
|
||||
return matchExactPrefix(s, prefix)
|
||||
})
|
||||
bbPool.Put(bb)
|
||||
@ -108,7 +116,7 @@ func matchIPv4ByExactPrefix(bs *blockSearch, ch *columnHeader, bm *bitmap, prefi
|
||||
|
||||
bb := bbPool.Get()
|
||||
visitValues(bs, ch, bm, func(v string) bool {
|
||||
s := toIPv4StringExt(bs, bb, v)
|
||||
s := toIPv4String(bs, bb, v)
|
||||
return matchExactPrefix(s, prefix)
|
||||
})
|
||||
bbPool.Put(bb)
|
||||
@ -126,7 +134,7 @@ func matchFloat64ByExactPrefix(bs *blockSearch, ch *columnHeader, bm *bitmap, pr
|
||||
|
||||
bb := bbPool.Get()
|
||||
visitValues(bs, ch, bm, func(v string) bool {
|
||||
s := toFloat64StringExt(bs, bb, v)
|
||||
s := toFloat64String(bs, bb, v)
|
||||
return matchExactPrefix(s, prefix)
|
||||
})
|
||||
bbPool.Put(bb)
|
||||
@ -134,10 +142,12 @@ func matchFloat64ByExactPrefix(bs *blockSearch, ch *columnHeader, bm *bitmap, pr
|
||||
|
||||
func matchValuesDictByExactPrefix(bs *blockSearch, ch *columnHeader, bm *bitmap, prefix string) {
|
||||
bb := bbPool.Get()
|
||||
for i, v := range ch.valuesDict.values {
|
||||
for _, v := range ch.valuesDict.values {
|
||||
c := byte(0)
|
||||
if matchExactPrefix(v, prefix) {
|
||||
bb.B = append(bb.B, byte(i))
|
||||
c = 1
|
||||
}
|
||||
bb.B = append(bb.B, c)
|
||||
}
|
||||
matchEncodedValuesDict(bs, ch, bm, bb.B)
|
||||
bbPool.Put(bb)
|
||||
|
@ -18,6 +18,15 @@ type filterIn struct {
|
||||
fieldName string
|
||||
values []string
|
||||
|
||||
// needeExecuteQuery is set to true if q must be executed for populating values before filter execution.
|
||||
needExecuteQuery bool
|
||||
|
||||
// If q is non-nil, then values must be populated from q before filter execution.
|
||||
q *Query
|
||||
|
||||
// qFieldName must be set to field name for obtaining values from if q is non-nil.
|
||||
qFieldName string
|
||||
|
||||
tokenSetsOnce sync.Once
|
||||
tokenSets [][]string
|
||||
|
||||
@ -47,12 +56,22 @@ type filterIn struct {
|
||||
}
|
||||
|
||||
func (fi *filterIn) String() string {
|
||||
values := fi.values
|
||||
a := make([]string, len(values))
|
||||
for i, value := range values {
|
||||
a[i] = quoteTokenIfNeeded(value)
|
||||
args := ""
|
||||
if fi.q != nil {
|
||||
args = fi.q.String()
|
||||
} else {
|
||||
values := fi.values
|
||||
a := make([]string, len(values))
|
||||
for i, value := range values {
|
||||
a[i] = quoteTokenIfNeeded(value)
|
||||
}
|
||||
args = strings.Join(a, ",")
|
||||
}
|
||||
return fmt.Sprintf("%sin(%s)", quoteFieldNameIfNeeded(fi.fieldName), strings.Join(a, ","))
|
||||
return fmt.Sprintf("%sin(%s)", quoteFieldNameIfNeeded(fi.fieldName), args)
|
||||
}
|
||||
|
||||
func (fi *filterIn) updateNeededFields(neededFields fieldsSet) {
|
||||
neededFields.add(fi.fieldName)
|
||||
}
|
||||
|
||||
func (fi *filterIn) getTokenSets() [][]string {
|
||||
@ -249,7 +268,95 @@ func (fi *filterIn) initTimestampISO8601Values() {
|
||||
fi.timestampISO8601Values = m
|
||||
}
|
||||
|
||||
func (fi *filterIn) apply(bs *blockSearch, bm *bitmap) {
|
||||
func (fi *filterIn) applyToBlockResult(br *blockResult, bm *bitmap) {
|
||||
if len(fi.values) == 0 {
|
||||
bm.resetBits()
|
||||
return
|
||||
}
|
||||
|
||||
c := br.getColumnByName(fi.fieldName)
|
||||
if c.isConst {
|
||||
stringValues := fi.getStringValues()
|
||||
v := c.valuesEncoded[0]
|
||||
if _, ok := stringValues[v]; !ok {
|
||||
bm.resetBits()
|
||||
}
|
||||
return
|
||||
}
|
||||
if c.isTime {
|
||||
fi.matchColumnByStringValues(br, bm, c)
|
||||
return
|
||||
}
|
||||
|
||||
switch c.valueType {
|
||||
case valueTypeString:
|
||||
fi.matchColumnByStringValues(br, bm, c)
|
||||
case valueTypeDict:
|
||||
stringValues := fi.getStringValues()
|
||||
bb := bbPool.Get()
|
||||
for _, v := range c.dictValues {
|
||||
c := byte(0)
|
||||
if _, ok := stringValues[v]; ok {
|
||||
c = 1
|
||||
}
|
||||
bb.B = append(bb.B, c)
|
||||
}
|
||||
valuesEncoded := c.getValuesEncoded(br)
|
||||
bm.forEachSetBit(func(idx int) bool {
|
||||
n := valuesEncoded[idx][0]
|
||||
return bb.B[n] == 1
|
||||
})
|
||||
bbPool.Put(bb)
|
||||
case valueTypeUint8:
|
||||
binValues := fi.getUint8Values()
|
||||
matchColumnByBinValues(br, bm, c, binValues)
|
||||
case valueTypeUint16:
|
||||
binValues := fi.getUint16Values()
|
||||
matchColumnByBinValues(br, bm, c, binValues)
|
||||
case valueTypeUint32:
|
||||
binValues := fi.getUint32Values()
|
||||
matchColumnByBinValues(br, bm, c, binValues)
|
||||
case valueTypeUint64:
|
||||
binValues := fi.getUint64Values()
|
||||
matchColumnByBinValues(br, bm, c, binValues)
|
||||
case valueTypeFloat64:
|
||||
binValues := fi.getFloat64Values()
|
||||
matchColumnByBinValues(br, bm, c, binValues)
|
||||
case valueTypeIPv4:
|
||||
binValues := fi.getIPv4Values()
|
||||
matchColumnByBinValues(br, bm, c, binValues)
|
||||
case valueTypeTimestampISO8601:
|
||||
binValues := fi.getTimestampISO8601Values()
|
||||
matchColumnByBinValues(br, bm, c, binValues)
|
||||
default:
|
||||
logger.Panicf("FATAL: unknown valueType=%d", c.valueType)
|
||||
}
|
||||
}
|
||||
|
||||
func (fi *filterIn) matchColumnByStringValues(br *blockResult, bm *bitmap, c *blockResultColumn) {
|
||||
stringValues := fi.getStringValues()
|
||||
values := c.getValues(br)
|
||||
bm.forEachSetBit(func(idx int) bool {
|
||||
v := values[idx]
|
||||
_, ok := stringValues[v]
|
||||
return ok
|
||||
})
|
||||
}
|
||||
|
||||
func matchColumnByBinValues(br *blockResult, bm *bitmap, c *blockResultColumn, binValues map[string]struct{}) {
|
||||
if len(binValues) == 0 {
|
||||
bm.resetBits()
|
||||
return
|
||||
}
|
||||
valuesEncoded := c.getValuesEncoded(br)
|
||||
bm.forEachSetBit(func(idx int) bool {
|
||||
v := valuesEncoded[idx]
|
||||
_, ok := binValues[v]
|
||||
return ok
|
||||
})
|
||||
}
|
||||
|
||||
func (fi *filterIn) applyToBlockSearch(bs *blockSearch, bm *bitmap) {
|
||||
fieldName := fi.fieldName
|
||||
|
||||
if len(fi.values) == 0 {
|
||||
@ -314,6 +421,10 @@ func (fi *filterIn) apply(bs *blockSearch, bm *bitmap) {
|
||||
}
|
||||
|
||||
func matchAnyValue(bs *blockSearch, ch *columnHeader, bm *bitmap, values map[string]struct{}, tokenSets [][]string) {
|
||||
if len(values) == 0 {
|
||||
bm.resetBits()
|
||||
return
|
||||
}
|
||||
if !matchBloomFilterAnyTokenSet(bs, ch, tokenSets) {
|
||||
bm.resetBits()
|
||||
return
|
||||
@ -344,10 +455,12 @@ func matchBloomFilterAnyTokenSet(bs *blockSearch, ch *columnHeader, tokenSets []
|
||||
|
||||
func matchValuesDictByAnyValue(bs *blockSearch, ch *columnHeader, bm *bitmap, values map[string]struct{}) {
|
||||
bb := bbPool.Get()
|
||||
for i, v := range ch.valuesDict.values {
|
||||
for _, v := range ch.valuesDict.values {
|
||||
c := byte(0)
|
||||
if _, ok := values[v]; ok {
|
||||
bb.B = append(bb.B, byte(i))
|
||||
c = 1
|
||||
}
|
||||
bb.B = append(bb.B, c)
|
||||
}
|
||||
matchEncodedValuesDict(bs, ch, bm, bb.B)
|
||||
bbPool.Put(bb)
|
||||
|
@ -3,8 +3,6 @@ package logstorage
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/encoding"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
||||
)
|
||||
|
||||
@ -18,12 +16,83 @@ type filterIPv4Range struct {
|
||||
}
|
||||
|
||||
func (fr *filterIPv4Range) String() string {
|
||||
minValue := string(encoding.MarshalUint32(nil, fr.minValue))
|
||||
maxValue := string(encoding.MarshalUint32(nil, fr.maxValue))
|
||||
return fmt.Sprintf("%sipv4_range(%s, %s)", quoteFieldNameIfNeeded(fr.fieldName), toIPv4String(nil, minValue), toIPv4String(nil, maxValue))
|
||||
minValue := marshalIPv4String(nil, fr.minValue)
|
||||
maxValue := marshalIPv4String(nil, fr.maxValue)
|
||||
return fmt.Sprintf("%sipv4_range(%s, %s)", quoteFieldNameIfNeeded(fr.fieldName), minValue, maxValue)
|
||||
}
|
||||
|
||||
func (fr *filterIPv4Range) apply(bs *blockSearch, bm *bitmap) {
|
||||
func (fr *filterIPv4Range) updateNeededFields(neededFields fieldsSet) {
|
||||
neededFields.add(fr.fieldName)
|
||||
}
|
||||
|
||||
func (fr *filterIPv4Range) applyToBlockResult(br *blockResult, bm *bitmap) {
|
||||
minValue := fr.minValue
|
||||
maxValue := fr.maxValue
|
||||
|
||||
if minValue > maxValue {
|
||||
bm.resetBits()
|
||||
return
|
||||
}
|
||||
|
||||
c := br.getColumnByName(fr.fieldName)
|
||||
if c.isConst {
|
||||
v := c.valuesEncoded[0]
|
||||
if !matchIPv4Range(v, minValue, maxValue) {
|
||||
bm.resetBits()
|
||||
}
|
||||
return
|
||||
}
|
||||
if c.isTime {
|
||||
bm.resetBits()
|
||||
return
|
||||
}
|
||||
|
||||
switch c.valueType {
|
||||
case valueTypeString:
|
||||
values := c.getValues(br)
|
||||
bm.forEachSetBit(func(idx int) bool {
|
||||
v := values[idx]
|
||||
return matchIPv4Range(v, minValue, maxValue)
|
||||
})
|
||||
case valueTypeDict:
|
||||
bb := bbPool.Get()
|
||||
for _, v := range c.dictValues {
|
||||
c := byte(0)
|
||||
if matchIPv4Range(v, minValue, maxValue) {
|
||||
c = 1
|
||||
}
|
||||
bb.B = append(bb.B, c)
|
||||
}
|
||||
valuesEncoded := c.getValuesEncoded(br)
|
||||
bm.forEachSetBit(func(idx int) bool {
|
||||
n := valuesEncoded[idx][0]
|
||||
return bb.B[n] == 1
|
||||
})
|
||||
bbPool.Put(bb)
|
||||
case valueTypeUint8:
|
||||
bm.resetBits()
|
||||
case valueTypeUint16:
|
||||
bm.resetBits()
|
||||
case valueTypeUint32:
|
||||
bm.resetBits()
|
||||
case valueTypeUint64:
|
||||
bm.resetBits()
|
||||
case valueTypeFloat64:
|
||||
bm.resetBits()
|
||||
case valueTypeIPv4:
|
||||
valuesEncoded := c.getValuesEncoded(br)
|
||||
bm.forEachSetBit(func(idx int) bool {
|
||||
ip := unmarshalIPv4(valuesEncoded[idx])
|
||||
return ip >= minValue && ip <= maxValue
|
||||
})
|
||||
case valueTypeTimestampISO8601:
|
||||
bm.resetBits()
|
||||
default:
|
||||
logger.Panicf("FATAL: unknown valueType=%d", c.valueType)
|
||||
}
|
||||
}
|
||||
|
||||
func (fr *filterIPv4Range) applyToBlockSearch(bs *blockSearch, bm *bitmap) {
|
||||
fieldName := fr.fieldName
|
||||
minValue := fr.minValue
|
||||
maxValue := fr.maxValue
|
||||
@ -75,10 +144,12 @@ func (fr *filterIPv4Range) apply(bs *blockSearch, bm *bitmap) {
|
||||
|
||||
func matchValuesDictByIPv4Range(bs *blockSearch, ch *columnHeader, bm *bitmap, minValue, maxValue uint32) {
|
||||
bb := bbPool.Get()
|
||||
for i, v := range ch.valuesDict.values {
|
||||
for _, v := range ch.valuesDict.values {
|
||||
c := byte(0)
|
||||
if matchIPv4Range(v, minValue, maxValue) {
|
||||
bb.B = append(bb.B, byte(i))
|
||||
c = 1
|
||||
}
|
||||
bb.B = append(bb.B, c)
|
||||
}
|
||||
matchEncodedValuesDict(bs, ch, bm, bb.B)
|
||||
bbPool.Put(bb)
|
||||
@ -108,8 +179,7 @@ func matchIPv4ByRange(bs *blockSearch, ch *columnHeader, bm *bitmap, minValue, m
|
||||
if len(v) != 4 {
|
||||
logger.Panicf("FATAL: %s: unexpected length for binary representation of IPv4: got %d; want 4", bs.partPath(), len(v))
|
||||
}
|
||||
b := bytesutil.ToUnsafeBytes(v)
|
||||
n := encoding.UnmarshalUint32(b)
|
||||
n := unmarshalIPv4(v)
|
||||
return n >= minValue && n <= maxValue
|
||||
})
|
||||
}
|
||||
|
@ -3,7 +3,6 @@ package logstorage
|
||||
import (
|
||||
"unicode/utf8"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
||||
)
|
||||
|
||||
@ -22,7 +21,101 @@ func (fr *filterLenRange) String() string {
|
||||
return quoteFieldNameIfNeeded(fr.fieldName) + "len_range" + fr.stringRepr
|
||||
}
|
||||
|
||||
func (fr *filterLenRange) apply(bs *blockSearch, bm *bitmap) {
|
||||
func (fr *filterLenRange) updateNeededFields(neededFields fieldsSet) {
|
||||
neededFields.add(fr.fieldName)
|
||||
}
|
||||
|
||||
func (fr *filterLenRange) applyToBlockResult(br *blockResult, bm *bitmap) {
|
||||
minLen := fr.minLen
|
||||
maxLen := fr.maxLen
|
||||
|
||||
if minLen > maxLen {
|
||||
bm.resetBits()
|
||||
return
|
||||
}
|
||||
|
||||
c := br.getColumnByName(fr.fieldName)
|
||||
if c.isConst {
|
||||
v := c.valuesEncoded[0]
|
||||
if !matchLenRange(v, minLen, maxLen) {
|
||||
bm.resetBits()
|
||||
}
|
||||
return
|
||||
}
|
||||
if c.isTime {
|
||||
matchColumnByLenRange(br, bm, c, minLen, maxLen)
|
||||
}
|
||||
|
||||
switch c.valueType {
|
||||
case valueTypeString:
|
||||
matchColumnByLenRange(br, bm, c, minLen, maxLen)
|
||||
case valueTypeDict:
|
||||
bb := bbPool.Get()
|
||||
for _, v := range c.dictValues {
|
||||
c := byte(0)
|
||||
if matchLenRange(v, minLen, maxLen) {
|
||||
c = 1
|
||||
}
|
||||
bb.B = append(bb.B, c)
|
||||
}
|
||||
valuesEncoded := c.getValuesEncoded(br)
|
||||
bm.forEachSetBit(func(idx int) bool {
|
||||
n := valuesEncoded[idx][0]
|
||||
return bb.B[n] == 1
|
||||
})
|
||||
bbPool.Put(bb)
|
||||
case valueTypeUint8:
|
||||
if minLen > 3 || maxLen == 0 {
|
||||
bm.resetBits()
|
||||
return
|
||||
}
|
||||
matchColumnByLenRange(br, bm, c, minLen, maxLen)
|
||||
case valueTypeUint16:
|
||||
if minLen > 5 || maxLen == 0 {
|
||||
bm.resetBits()
|
||||
return
|
||||
}
|
||||
matchColumnByLenRange(br, bm, c, minLen, maxLen)
|
||||
case valueTypeUint32:
|
||||
if minLen > 10 || maxLen == 0 {
|
||||
bm.resetBits()
|
||||
return
|
||||
}
|
||||
matchColumnByLenRange(br, bm, c, minLen, maxLen)
|
||||
case valueTypeUint64:
|
||||
if minLen > 20 || maxLen == 0 {
|
||||
bm.resetBits()
|
||||
return
|
||||
}
|
||||
matchColumnByLenRange(br, bm, c, minLen, maxLen)
|
||||
case valueTypeFloat64:
|
||||
if minLen > 24 || maxLen == 0 {
|
||||
bm.resetBits()
|
||||
return
|
||||
}
|
||||
matchColumnByLenRange(br, bm, c, minLen, maxLen)
|
||||
case valueTypeIPv4:
|
||||
if minLen > uint64(len("255.255.255.255")) || maxLen < uint64(len("0.0.0.0")) {
|
||||
bm.resetBits()
|
||||
return
|
||||
}
|
||||
matchColumnByLenRange(br, bm, c, minLen, maxLen)
|
||||
case valueTypeTimestampISO8601:
|
||||
matchTimestampISO8601ByLenRange(bm, minLen, maxLen)
|
||||
default:
|
||||
logger.Panicf("FATAL: unknown valueType=%d", c.valueType)
|
||||
}
|
||||
}
|
||||
|
||||
func matchColumnByLenRange(br *blockResult, bm *bitmap, c *blockResultColumn, minLen, maxLen uint64) {
|
||||
values := c.getValues(br)
|
||||
bm.forEachSetBit(func(idx int) bool {
|
||||
v := values[idx]
|
||||
return matchLenRange(v, minLen, maxLen)
|
||||
})
|
||||
}
|
||||
|
||||
func (fr *filterLenRange) applyToBlockSearch(bs *blockSearch, bm *bitmap) {
|
||||
fieldName := fr.fieldName
|
||||
minLen := fr.minLen
|
||||
maxLen := fr.maxLen
|
||||
@ -89,7 +182,7 @@ func matchIPv4ByLenRange(bs *blockSearch, ch *columnHeader, bm *bitmap, minLen,
|
||||
|
||||
bb := bbPool.Get()
|
||||
visitValues(bs, ch, bm, func(v string) bool {
|
||||
s := toIPv4StringExt(bs, bb, v)
|
||||
s := toIPv4String(bs, bb, v)
|
||||
return matchLenRange(s, minLen, maxLen)
|
||||
})
|
||||
bbPool.Put(bb)
|
||||
@ -103,7 +196,7 @@ func matchFloat64ByLenRange(bs *blockSearch, ch *columnHeader, bm *bitmap, minLe
|
||||
|
||||
bb := bbPool.Get()
|
||||
visitValues(bs, ch, bm, func(v string) bool {
|
||||
s := toFloat64StringExt(bs, bb, v)
|
||||
s := toFloat64String(bs, bb, v)
|
||||
return matchLenRange(s, minLen, maxLen)
|
||||
})
|
||||
bbPool.Put(bb)
|
||||
@ -111,10 +204,12 @@ func matchFloat64ByLenRange(bs *blockSearch, ch *columnHeader, bm *bitmap, minLe
|
||||
|
||||
func matchValuesDictByLenRange(bs *blockSearch, ch *columnHeader, bm *bitmap, minLen, maxLen uint64) {
|
||||
bb := bbPool.Get()
|
||||
for i, v := range ch.valuesDict.values {
|
||||
for _, v := range ch.valuesDict.values {
|
||||
c := byte(0)
|
||||
if matchLenRange(v, minLen, maxLen) {
|
||||
bb.B = append(bb.B, byte(i))
|
||||
c = 1
|
||||
}
|
||||
bb.B = append(bb.B, c)
|
||||
}
|
||||
matchEncodedValuesDict(bs, ch, bm, bb.B)
|
||||
bbPool.Put(bb)
|
||||
@ -127,6 +222,10 @@ func matchStringByLenRange(bs *blockSearch, ch *columnHeader, bm *bitmap, minLen
|
||||
}
|
||||
|
||||
func matchUint8ByLenRange(bs *blockSearch, ch *columnHeader, bm *bitmap, minLen, maxLen uint64) {
|
||||
if minLen > 3 || maxLen == 0 {
|
||||
bm.resetBits()
|
||||
return
|
||||
}
|
||||
if !matchMinMaxValueLen(ch, minLen, maxLen) {
|
||||
bm.resetBits()
|
||||
return
|
||||
@ -141,6 +240,10 @@ func matchUint8ByLenRange(bs *blockSearch, ch *columnHeader, bm *bitmap, minLen,
|
||||
}
|
||||
|
||||
func matchUint16ByLenRange(bs *blockSearch, ch *columnHeader, bm *bitmap, minLen, maxLen uint64) {
|
||||
if minLen > 5 || maxLen == 0 {
|
||||
bm.resetBits()
|
||||
return
|
||||
}
|
||||
if !matchMinMaxValueLen(ch, minLen, maxLen) {
|
||||
bm.resetBits()
|
||||
return
|
||||
@ -155,6 +258,10 @@ func matchUint16ByLenRange(bs *blockSearch, ch *columnHeader, bm *bitmap, minLen
|
||||
}
|
||||
|
||||
func matchUint32ByLenRange(bs *blockSearch, ch *columnHeader, bm *bitmap, minLen, maxLen uint64) {
|
||||
if minLen > 10 || maxLen == 0 {
|
||||
bm.resetBits()
|
||||
return
|
||||
}
|
||||
if !matchMinMaxValueLen(ch, minLen, maxLen) {
|
||||
bm.resetBits()
|
||||
return
|
||||
@ -169,6 +276,10 @@ func matchUint32ByLenRange(bs *blockSearch, ch *columnHeader, bm *bitmap, minLen
|
||||
}
|
||||
|
||||
func matchUint64ByLenRange(bs *blockSearch, ch *columnHeader, bm *bitmap, minLen, maxLen uint64) {
|
||||
if minLen > 20 || maxLen == 0 {
|
||||
bm.resetBits()
|
||||
return
|
||||
}
|
||||
if !matchMinMaxValueLen(ch, minLen, maxLen) {
|
||||
bm.resetBits()
|
||||
return
|
||||
@ -191,12 +302,10 @@ func matchMinMaxValueLen(ch *columnHeader, minLen, maxLen uint64) bool {
|
||||
bb := bbPool.Get()
|
||||
defer bbPool.Put(bb)
|
||||
|
||||
bb.B = marshalUint64(bb.B[:0], ch.minValue)
|
||||
s := bytesutil.ToUnsafeString(bb.B)
|
||||
if maxLen < uint64(len(s)) {
|
||||
bb.B = marshalUint64String(bb.B[:0], ch.minValue)
|
||||
if maxLen < uint64(len(bb.B)) {
|
||||
return false
|
||||
}
|
||||
bb.B = marshalUint64(bb.B[:0], ch.maxValue)
|
||||
s = bytesutil.ToUnsafeString(bb.B)
|
||||
return minLen <= uint64(len(s))
|
||||
bb.B = marshalUint64String(bb.B[:0], ch.maxValue)
|
||||
return minLen <= uint64(len(bb.B))
|
||||
}
|
||||
|
@ -8,6 +8,14 @@ func (fn *filterNoop) String() string {
|
||||
return ""
|
||||
}
|
||||
|
||||
func (fn *filterNoop) apply(_ *blockSearch, _ *bitmap) {
|
||||
func (fn *filterNoop) updateNeededFields(_ fieldsSet) {
|
||||
// nothing to do
|
||||
}
|
||||
|
||||
func (fn *filterNoop) applyToBlockResult(_ *blockResult, _ *bitmap) {
|
||||
// nothing to do
|
||||
}
|
||||
|
||||
func (fn *filterNoop) applyToBlockSearch(_ *blockSearch, _ *bitmap) {
|
||||
// nothing to do
|
||||
}
|
||||
|
@ -16,12 +16,26 @@ func (fn *filterNot) String() string {
|
||||
return "!" + s
|
||||
}
|
||||
|
||||
func (fn *filterNot) apply(bs *blockSearch, bm *bitmap) {
|
||||
func (fn *filterNot) updateNeededFields(neededFields fieldsSet) {
|
||||
fn.f.updateNeededFields(neededFields)
|
||||
}
|
||||
|
||||
func (fn *filterNot) applyToBlockResult(br *blockResult, bm *bitmap) {
|
||||
// Minimize the number of rows to check by the filter by applying it
|
||||
// only to the rows, which match the bm, e.g. they may change the bm result.
|
||||
bmTmp := getBitmap(bm.bitsLen)
|
||||
bmTmp.copyFrom(bm)
|
||||
fn.f.apply(bs, bmTmp)
|
||||
fn.f.applyToBlockResult(br, bmTmp)
|
||||
bm.andNot(bmTmp)
|
||||
putBitmap(bmTmp)
|
||||
}
|
||||
|
||||
func (fn *filterNot) applyToBlockSearch(bs *blockSearch, bm *bitmap) {
|
||||
// Minimize the number of rows to check by the filter by applying it
|
||||
// only to the rows, which match the bm, e.g. they may change the bm result.
|
||||
bmTmp := getBitmap(bm.bitsLen)
|
||||
bmTmp.copyFrom(bm)
|
||||
fn.f.applyToBlockSearch(bs, bmTmp)
|
||||
bm.andNot(bmTmp)
|
||||
putBitmap(bmTmp)
|
||||
}
|
||||
|
@ -21,7 +21,13 @@ func (fo *filterOr) String() string {
|
||||
return strings.Join(a, " or ")
|
||||
}
|
||||
|
||||
func (fo *filterOr) apply(bs *blockSearch, bm *bitmap) {
|
||||
func (fo *filterOr) updateNeededFields(neededFields fieldsSet) {
|
||||
for _, f := range fo.filters {
|
||||
f.updateNeededFields(neededFields)
|
||||
}
|
||||
}
|
||||
|
||||
func (fo *filterOr) applyToBlockResult(br *blockResult, bm *bitmap) {
|
||||
bmResult := getBitmap(bm.bitsLen)
|
||||
bmTmp := getBitmap(bm.bitsLen)
|
||||
for _, f := range fo.filters {
|
||||
@ -36,7 +42,30 @@ func (fo *filterOr) apply(bs *blockSearch, bm *bitmap) {
|
||||
// since the result already matches all the values from the block.
|
||||
break
|
||||
}
|
||||
f.apply(bs, bmTmp)
|
||||
f.applyToBlockResult(br, bmTmp)
|
||||
bmResult.or(bmTmp)
|
||||
}
|
||||
putBitmap(bmTmp)
|
||||
bm.copyFrom(bmResult)
|
||||
putBitmap(bmResult)
|
||||
}
|
||||
|
||||
func (fo *filterOr) applyToBlockSearch(bs *blockSearch, bm *bitmap) {
|
||||
bmResult := getBitmap(bm.bitsLen)
|
||||
bmTmp := getBitmap(bm.bitsLen)
|
||||
for _, f := range fo.filters {
|
||||
// Minimize the number of rows to check by the filter by checking only
|
||||
// the rows, which may change the output bm:
|
||||
// - bm matches them, e.g. the caller wants to get them
|
||||
// - bmResult doesn't match them, e.g. all the previous OR filters didn't match them
|
||||
bmTmp.copyFrom(bm)
|
||||
bmTmp.andNot(bmResult)
|
||||
if bmTmp.isZero() {
|
||||
// Shortcut - there is no need in applying the remaining filters,
|
||||
// since the result already matches all the values from the block.
|
||||
break
|
||||
}
|
||||
f.applyToBlockSearch(bs, bmTmp)
|
||||
bmResult.or(bmTmp)
|
||||
}
|
||||
putBitmap(bmTmp)
|
||||
|
@ -32,6 +32,10 @@ func (fp *filterPhrase) String() string {
|
||||
return quoteFieldNameIfNeeded(fp.fieldName) + quoteTokenIfNeeded(fp.phrase)
|
||||
}
|
||||
|
||||
func (fp *filterPhrase) updateNeededFields(neededFields fieldsSet) {
|
||||
neededFields.add(fp.fieldName)
|
||||
}
|
||||
|
||||
func (fp *filterPhrase) getTokens() []string {
|
||||
fp.tokensOnce.Do(fp.initTokens)
|
||||
return fp.tokens
|
||||
@ -41,7 +45,11 @@ func (fp *filterPhrase) initTokens() {
|
||||
fp.tokens = tokenizeStrings(nil, []string{fp.phrase})
|
||||
}
|
||||
|
||||
func (fp *filterPhrase) apply(bs *blockSearch, bm *bitmap) {
|
||||
func (fp *filterPhrase) applyToBlockResult(br *blockResult, bm *bitmap) {
|
||||
applyToBlockResultGeneric(br, bm, fp.fieldName, fp.phrase, matchPhrase)
|
||||
}
|
||||
|
||||
func (fp *filterPhrase) applyToBlockSearch(bs *blockSearch, bm *bitmap) {
|
||||
fieldName := fp.fieldName
|
||||
phrase := fp.phrase
|
||||
|
||||
@ -107,7 +115,7 @@ func matchTimestampISO8601ByPhrase(bs *blockSearch, ch *columnHeader, bm *bitmap
|
||||
|
||||
bb := bbPool.Get()
|
||||
visitValues(bs, ch, bm, func(v string) bool {
|
||||
s := toTimestampISO8601StringExt(bs, bb, v)
|
||||
s := toTimestampISO8601String(bs, bb, v)
|
||||
return matchPhrase(s, phrase)
|
||||
})
|
||||
bbPool.Put(bb)
|
||||
@ -131,7 +139,7 @@ func matchIPv4ByPhrase(bs *blockSearch, ch *columnHeader, bm *bitmap, phrase str
|
||||
|
||||
bb := bbPool.Get()
|
||||
visitValues(bs, ch, bm, func(v string) bool {
|
||||
s := toIPv4StringExt(bs, bb, v)
|
||||
s := toIPv4String(bs, bb, v)
|
||||
return matchPhrase(s, phrase)
|
||||
})
|
||||
bbPool.Put(bb)
|
||||
@ -160,7 +168,7 @@ func matchFloat64ByPhrase(bs *blockSearch, ch *columnHeader, bm *bitmap, phrase
|
||||
|
||||
bb := bbPool.Get()
|
||||
visitValues(bs, ch, bm, func(v string) bool {
|
||||
s := toFloat64StringExt(bs, bb, v)
|
||||
s := toFloat64String(bs, bb, v)
|
||||
return matchPhrase(s, phrase)
|
||||
})
|
||||
bbPool.Put(bb)
|
||||
@ -168,10 +176,12 @@ func matchFloat64ByPhrase(bs *blockSearch, ch *columnHeader, bm *bitmap, phrase
|
||||
|
||||
func matchValuesDictByPhrase(bs *blockSearch, ch *columnHeader, bm *bitmap, phrase string) {
|
||||
bb := bbPool.Get()
|
||||
for i, v := range ch.valuesDict.values {
|
||||
for _, v := range ch.valuesDict.values {
|
||||
c := byte(0)
|
||||
if matchPhrase(v, phrase) {
|
||||
bb.B = append(bb.B, byte(i))
|
||||
c = 1
|
||||
}
|
||||
bb.B = append(bb.B, c)
|
||||
}
|
||||
matchEncodedValuesDict(bs, ch, bm, bb.B)
|
||||
bbPool.Put(bb)
|
||||
@ -249,7 +259,7 @@ func getPhrasePos(s, phrase string) int {
|
||||
}
|
||||
|
||||
func matchEncodedValuesDict(bs *blockSearch, ch *columnHeader, bm *bitmap, encodedValues []byte) {
|
||||
if len(encodedValues) == 0 {
|
||||
if bytes.IndexByte(encodedValues, 1) < 0 {
|
||||
// Fast path - the phrase is missing in the valuesDict
|
||||
bm.resetBits()
|
||||
return
|
||||
@ -259,8 +269,11 @@ func matchEncodedValuesDict(bs *blockSearch, ch *columnHeader, bm *bitmap, encod
|
||||
if len(v) != 1 {
|
||||
logger.Panicf("FATAL: %s: unexpected length for dict value: got %d; want 1", bs.partPath(), len(v))
|
||||
}
|
||||
n := bytes.IndexByte(encodedValues, v[0])
|
||||
return n >= 0
|
||||
idx := v[0]
|
||||
if int(idx) >= len(encodedValues) {
|
||||
logger.Panicf("FATAL: %s: too big index for dict value; got %d; must be smaller than %d", bs.partPath(), idx, len(encodedValues))
|
||||
}
|
||||
return encodedValues[idx] == 1
|
||||
})
|
||||
}
|
||||
|
||||
@ -294,26 +307,107 @@ func isMsgFieldName(fieldName string) bool {
|
||||
return fieldName == "" || fieldName == "_msg"
|
||||
}
|
||||
|
||||
func toFloat64StringExt(bs *blockSearch, bb *bytesutil.ByteBuffer, v string) string {
|
||||
func toFloat64String(bs *blockSearch, bb *bytesutil.ByteBuffer, v string) string {
|
||||
if len(v) != 8 {
|
||||
logger.Panicf("FATAL: %s: unexpected length for binary representation of floating-point number: got %d; want 8", bs.partPath(), len(v))
|
||||
}
|
||||
bb.B = toFloat64String(bb.B[:0], v)
|
||||
f := unmarshalFloat64(v)
|
||||
bb.B = marshalFloat64String(bb.B[:0], f)
|
||||
return bytesutil.ToUnsafeString(bb.B)
|
||||
}
|
||||
|
||||
func toIPv4StringExt(bs *blockSearch, bb *bytesutil.ByteBuffer, v string) string {
|
||||
func toIPv4String(bs *blockSearch, bb *bytesutil.ByteBuffer, v string) string {
|
||||
if len(v) != 4 {
|
||||
logger.Panicf("FATAL: %s: unexpected length for binary representation of IPv4: got %d; want 4", bs.partPath(), len(v))
|
||||
}
|
||||
bb.B = toIPv4String(bb.B[:0], v)
|
||||
ip := unmarshalIPv4(v)
|
||||
bb.B = marshalIPv4String(bb.B[:0], ip)
|
||||
return bytesutil.ToUnsafeString(bb.B)
|
||||
}
|
||||
|
||||
func toTimestampISO8601StringExt(bs *blockSearch, bb *bytesutil.ByteBuffer, v string) string {
|
||||
func toTimestampISO8601String(bs *blockSearch, bb *bytesutil.ByteBuffer, v string) string {
|
||||
if len(v) != 8 {
|
||||
logger.Panicf("FATAL: %s: unexpected length for binary representation of ISO8601 timestamp: got %d; want 8", bs.partPath(), len(v))
|
||||
}
|
||||
bb.B = toTimestampISO8601String(bb.B[:0], v)
|
||||
timestamp := unmarshalTimestampISO8601(v)
|
||||
bb.B = marshalTimestampISO8601String(bb.B[:0], timestamp)
|
||||
return bytesutil.ToUnsafeString(bb.B)
|
||||
}
|
||||
|
||||
func applyToBlockResultGeneric(br *blockResult, bm *bitmap, fieldName, phrase string, matchFunc func(v, phrase string) bool) {
|
||||
c := br.getColumnByName(fieldName)
|
||||
if c.isConst {
|
||||
v := c.valuesEncoded[0]
|
||||
if !matchFunc(v, phrase) {
|
||||
bm.resetBits()
|
||||
}
|
||||
return
|
||||
}
|
||||
if c.isTime {
|
||||
matchColumnByPhraseGeneric(br, bm, c, phrase, matchFunc)
|
||||
return
|
||||
}
|
||||
|
||||
switch c.valueType {
|
||||
case valueTypeString:
|
||||
matchColumnByPhraseGeneric(br, bm, c, phrase, matchFunc)
|
||||
case valueTypeDict:
|
||||
bb := bbPool.Get()
|
||||
for _, v := range c.dictValues {
|
||||
c := byte(0)
|
||||
if matchFunc(v, phrase) {
|
||||
c = 1
|
||||
}
|
||||
bb.B = append(bb.B, c)
|
||||
}
|
||||
valuesEncoded := c.getValuesEncoded(br)
|
||||
bm.forEachSetBit(func(idx int) bool {
|
||||
n := valuesEncoded[idx][0]
|
||||
return bb.B[n] == 1
|
||||
})
|
||||
bbPool.Put(bb)
|
||||
case valueTypeUint8:
|
||||
n, ok := tryParseUint64(phrase)
|
||||
if !ok || n >= (1<<8) {
|
||||
bm.resetBits()
|
||||
return
|
||||
}
|
||||
matchColumnByPhraseGeneric(br, bm, c, phrase, matchFunc)
|
||||
case valueTypeUint16:
|
||||
n, ok := tryParseUint64(phrase)
|
||||
if !ok || n >= (1<<16) {
|
||||
bm.resetBits()
|
||||
return
|
||||
}
|
||||
matchColumnByPhraseGeneric(br, bm, c, phrase, matchFunc)
|
||||
case valueTypeUint32:
|
||||
n, ok := tryParseUint64(phrase)
|
||||
if !ok || n >= (1<<32) {
|
||||
bm.resetBits()
|
||||
return
|
||||
}
|
||||
matchColumnByPhraseGeneric(br, bm, c, phrase, matchFunc)
|
||||
case valueTypeUint64:
|
||||
_, ok := tryParseUint64(phrase)
|
||||
if !ok {
|
||||
bm.resetBits()
|
||||
return
|
||||
}
|
||||
matchColumnByPhraseGeneric(br, bm, c, phrase, matchFunc)
|
||||
case valueTypeFloat64:
|
||||
matchColumnByPhraseGeneric(br, bm, c, phrase, matchFunc)
|
||||
case valueTypeIPv4:
|
||||
matchColumnByPhraseGeneric(br, bm, c, phrase, matchFunc)
|
||||
case valueTypeTimestampISO8601:
|
||||
matchColumnByPhraseGeneric(br, bm, c, phrase, matchFunc)
|
||||
default:
|
||||
logger.Panicf("FATAL: unknown valueType=%d", c.valueType)
|
||||
}
|
||||
}
|
||||
|
||||
func matchColumnByPhraseGeneric(br *blockResult, bm *bitmap, c *blockResultColumn, phrase string, matchFunc func(v, phrase string) bool) {
|
||||
values := c.getValues(br)
|
||||
bm.forEachSetBit(func(idx int) bool {
|
||||
return matchFunc(values[idx], phrase)
|
||||
})
|
||||
}
|
||||
|
@ -7,7 +7,6 @@ import (
|
||||
"unicode/utf8"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/encoding"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
||||
)
|
||||
|
||||
@ -31,6 +30,10 @@ func (fp *filterPrefix) String() string {
|
||||
return fmt.Sprintf("%s%s*", quoteFieldNameIfNeeded(fp.fieldName), quoteTokenIfNeeded(fp.prefix))
|
||||
}
|
||||
|
||||
func (fp *filterPrefix) updateNeededFields(neededFields fieldsSet) {
|
||||
neededFields.add(fp.fieldName)
|
||||
}
|
||||
|
||||
func (fp *filterPrefix) getTokens() []string {
|
||||
fp.tokensOnce.Do(fp.initTokens)
|
||||
return fp.tokens
|
||||
@ -40,7 +43,11 @@ func (fp *filterPrefix) initTokens() {
|
||||
fp.tokens = getTokensSkipLast(fp.prefix)
|
||||
}
|
||||
|
||||
func (fp *filterPrefix) apply(bs *blockSearch, bm *bitmap) {
|
||||
func (fp *filterPrefix) applyToBlockResult(bs *blockResult, bm *bitmap) {
|
||||
applyToBlockResultGeneric(bs, bm, fp.fieldName, fp.prefix, matchPrefix)
|
||||
}
|
||||
|
||||
func (fp *filterPrefix) applyToBlockSearch(bs *blockSearch, bm *bitmap) {
|
||||
fieldName := fp.fieldName
|
||||
prefix := fp.prefix
|
||||
|
||||
@ -102,7 +109,7 @@ func matchTimestampISO8601ByPrefix(bs *blockSearch, ch *columnHeader, bm *bitmap
|
||||
|
||||
bb := bbPool.Get()
|
||||
visitValues(bs, ch, bm, func(v string) bool {
|
||||
s := toTimestampISO8601StringExt(bs, bb, v)
|
||||
s := toTimestampISO8601String(bs, bb, v)
|
||||
return matchPrefix(s, prefix)
|
||||
})
|
||||
bbPool.Put(bb)
|
||||
@ -123,7 +130,7 @@ func matchIPv4ByPrefix(bs *blockSearch, ch *columnHeader, bm *bitmap, prefix str
|
||||
|
||||
bb := bbPool.Get()
|
||||
visitValues(bs, ch, bm, func(v string) bool {
|
||||
s := toIPv4StringExt(bs, bb, v)
|
||||
s := toIPv4String(bs, bb, v)
|
||||
return matchPrefix(s, prefix)
|
||||
})
|
||||
bbPool.Put(bb)
|
||||
@ -151,7 +158,7 @@ func matchFloat64ByPrefix(bs *blockSearch, ch *columnHeader, bm *bitmap, prefix
|
||||
|
||||
bb := bbPool.Get()
|
||||
visitValues(bs, ch, bm, func(v string) bool {
|
||||
s := toFloat64StringExt(bs, bb, v)
|
||||
s := toFloat64String(bs, bb, v)
|
||||
return matchPrefix(s, prefix)
|
||||
})
|
||||
bbPool.Put(bb)
|
||||
@ -159,10 +166,12 @@ func matchFloat64ByPrefix(bs *blockSearch, ch *columnHeader, bm *bitmap, prefix
|
||||
|
||||
func matchValuesDictByPrefix(bs *blockSearch, ch *columnHeader, bm *bitmap, prefix string) {
|
||||
bb := bbPool.Get()
|
||||
for i, v := range ch.valuesDict.values {
|
||||
for _, v := range ch.valuesDict.values {
|
||||
c := byte(0)
|
||||
if matchPrefix(v, prefix) {
|
||||
bb.B = append(bb.B, byte(i))
|
||||
c = 1
|
||||
}
|
||||
bb.B = append(bb.B, c)
|
||||
}
|
||||
matchEncodedValuesDict(bs, ch, bm, bb.B)
|
||||
bbPool.Put(bb)
|
||||
@ -321,8 +330,8 @@ func toUint8String(bs *blockSearch, bb *bytesutil.ByteBuffer, v string) string {
|
||||
if len(v) != 1 {
|
||||
logger.Panicf("FATAL: %s: unexpected length for binary representation of uint8 number: got %d; want 1", bs.partPath(), len(v))
|
||||
}
|
||||
n := uint64(v[0])
|
||||
bb.B = marshalUint64(bb.B[:0], n)
|
||||
n := unmarshalUint8(v)
|
||||
bb.B = marshalUint8String(bb.B[:0], n)
|
||||
return bytesutil.ToUnsafeString(bb.B)
|
||||
}
|
||||
|
||||
@ -330,9 +339,8 @@ func toUint16String(bs *blockSearch, bb *bytesutil.ByteBuffer, v string) string
|
||||
if len(v) != 2 {
|
||||
logger.Panicf("FATAL: %s: unexpected length for binary representation of uint16 number: got %d; want 2", bs.partPath(), len(v))
|
||||
}
|
||||
b := bytesutil.ToUnsafeBytes(v)
|
||||
n := uint64(encoding.UnmarshalUint16(b))
|
||||
bb.B = marshalUint64(bb.B[:0], n)
|
||||
n := unmarshalUint16(v)
|
||||
bb.B = marshalUint16String(bb.B[:0], n)
|
||||
return bytesutil.ToUnsafeString(bb.B)
|
||||
}
|
||||
|
||||
@ -340,9 +348,8 @@ func toUint32String(bs *blockSearch, bb *bytesutil.ByteBuffer, v string) string
|
||||
if len(v) != 4 {
|
||||
logger.Panicf("FATAL: %s: unexpected length for binary representation of uint32 number: got %d; want 4", bs.partPath(), len(v))
|
||||
}
|
||||
b := bytesutil.ToUnsafeBytes(v)
|
||||
n := uint64(encoding.UnmarshalUint32(b))
|
||||
bb.B = marshalUint64(bb.B[:0], n)
|
||||
n := unmarshalUint32(v)
|
||||
bb.B = marshalUint32String(bb.B[:0], n)
|
||||
return bytesutil.ToUnsafeString(bb.B)
|
||||
}
|
||||
|
||||
@ -350,8 +357,7 @@ func toUint64String(bs *blockSearch, bb *bytesutil.ByteBuffer, v string) string
|
||||
if len(v) != 8 {
|
||||
logger.Panicf("FATAL: %s: unexpected length for binary representation of uint64 number: got %d; want 8", bs.partPath(), len(v))
|
||||
}
|
||||
b := bytesutil.ToUnsafeBytes(v)
|
||||
n := encoding.UnmarshalUint64(b)
|
||||
bb.B = marshalUint64(bb.B[:0], n)
|
||||
n := unmarshalUint64(v)
|
||||
bb.B = marshalUint64String(bb.B[:0], n)
|
||||
return bytesutil.ToUnsafeString(bb.B)
|
||||
}
|
||||
|
@ -3,8 +3,6 @@ package logstorage
|
||||
import (
|
||||
"math"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/encoding"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
||||
)
|
||||
|
||||
@ -13,17 +11,132 @@ import (
|
||||
// Example LogsQL: `fieldName:range(minValue, maxValue]`
|
||||
type filterRange struct {
|
||||
fieldName string
|
||||
minValue float64
|
||||
maxValue float64
|
||||
|
||||
minValue float64
|
||||
maxValue float64
|
||||
|
||||
stringRepr string
|
||||
}
|
||||
|
||||
func (fr *filterRange) String() string {
|
||||
return quoteFieldNameIfNeeded(fr.fieldName) + "range" + fr.stringRepr
|
||||
return quoteFieldNameIfNeeded(fr.fieldName) + fr.stringRepr
|
||||
}
|
||||
|
||||
func (fr *filterRange) apply(bs *blockSearch, bm *bitmap) {
|
||||
func (fr *filterRange) updateNeededFields(neededFields fieldsSet) {
|
||||
neededFields.add(fr.fieldName)
|
||||
}
|
||||
|
||||
func (fr *filterRange) applyToBlockResult(br *blockResult, bm *bitmap) {
|
||||
minValue := fr.minValue
|
||||
maxValue := fr.maxValue
|
||||
|
||||
if minValue > maxValue {
|
||||
bm.resetBits()
|
||||
return
|
||||
}
|
||||
|
||||
c := br.getColumnByName(fr.fieldName)
|
||||
if c.isConst {
|
||||
v := c.valuesEncoded[0]
|
||||
if !matchRange(v, minValue, maxValue) {
|
||||
bm.resetBits()
|
||||
}
|
||||
return
|
||||
}
|
||||
if c.isTime {
|
||||
bm.resetBits()
|
||||
return
|
||||
}
|
||||
|
||||
switch c.valueType {
|
||||
case valueTypeString:
|
||||
values := c.getValues(br)
|
||||
bm.forEachSetBit(func(idx int) bool {
|
||||
v := values[idx]
|
||||
return matchRange(v, minValue, maxValue)
|
||||
})
|
||||
case valueTypeDict:
|
||||
bb := bbPool.Get()
|
||||
for _, v := range c.dictValues {
|
||||
c := byte(0)
|
||||
if matchRange(v, minValue, maxValue) {
|
||||
c = 1
|
||||
}
|
||||
bb.B = append(bb.B, c)
|
||||
}
|
||||
valuesEncoded := c.getValuesEncoded(br)
|
||||
bm.forEachSetBit(func(idx int) bool {
|
||||
n := valuesEncoded[idx][0]
|
||||
return bb.B[n] == 1
|
||||
})
|
||||
bbPool.Put(bb)
|
||||
case valueTypeUint8:
|
||||
minValueUint, maxValueUint := toUint64Range(minValue, maxValue)
|
||||
if maxValue < 0 || minValueUint > c.maxValue || maxValueUint < c.minValue {
|
||||
bm.resetBits()
|
||||
return
|
||||
}
|
||||
valuesEncoded := c.getValuesEncoded(br)
|
||||
bm.forEachSetBit(func(idx int) bool {
|
||||
v := valuesEncoded[idx]
|
||||
n := uint64(unmarshalUint8(v))
|
||||
return n >= minValueUint && n <= maxValueUint
|
||||
})
|
||||
case valueTypeUint16:
|
||||
minValueUint, maxValueUint := toUint64Range(minValue, maxValue)
|
||||
if maxValue < 0 || minValueUint > c.maxValue || maxValueUint < c.minValue {
|
||||
bm.resetBits()
|
||||
return
|
||||
}
|
||||
valuesEncoded := c.getValuesEncoded(br)
|
||||
bm.forEachSetBit(func(idx int) bool {
|
||||
v := valuesEncoded[idx]
|
||||
n := uint64(unmarshalUint16(v))
|
||||
return n >= minValueUint && n <= maxValueUint
|
||||
})
|
||||
case valueTypeUint32:
|
||||
minValueUint, maxValueUint := toUint64Range(minValue, maxValue)
|
||||
if maxValue < 0 || minValueUint > c.maxValue || maxValueUint < c.minValue {
|
||||
bm.resetBits()
|
||||
return
|
||||
}
|
||||
valuesEncoded := c.getValuesEncoded(br)
|
||||
bm.forEachSetBit(func(idx int) bool {
|
||||
v := valuesEncoded[idx]
|
||||
n := uint64(unmarshalUint32(v))
|
||||
return n >= minValueUint && n <= maxValueUint
|
||||
})
|
||||
case valueTypeUint64:
|
||||
minValueUint, maxValueUint := toUint64Range(minValue, maxValue)
|
||||
if maxValue < 0 || minValueUint > c.maxValue || maxValueUint < c.minValue {
|
||||
bm.resetBits()
|
||||
return
|
||||
}
|
||||
valuesEncoded := c.getValuesEncoded(br)
|
||||
bm.forEachSetBit(func(idx int) bool {
|
||||
v := valuesEncoded[idx]
|
||||
n := unmarshalUint64(v)
|
||||
return n >= minValueUint && n <= maxValueUint
|
||||
})
|
||||
case valueTypeFloat64:
|
||||
if minValue > math.Float64frombits(c.maxValue) || maxValue < math.Float64frombits(c.minValue) {
|
||||
bm.resetBits()
|
||||
return
|
||||
}
|
||||
valuesEncoded := c.getValuesEncoded(br)
|
||||
bm.forEachSetBit(func(idx int) bool {
|
||||
v := valuesEncoded[idx]
|
||||
f := unmarshalFloat64(v)
|
||||
return f >= minValue && f <= maxValue
|
||||
})
|
||||
case valueTypeTimestampISO8601:
|
||||
bm.resetBits()
|
||||
default:
|
||||
logger.Panicf("FATAL: unknown valueType=%d", c.valueType)
|
||||
}
|
||||
}
|
||||
|
||||
func (fr *filterRange) applyToBlockSearch(bs *blockSearch, bm *bitmap) {
|
||||
fieldName := fr.fieldName
|
||||
minValue := fr.minValue
|
||||
maxValue := fr.maxValue
|
||||
@ -83,19 +196,19 @@ func matchFloat64ByRange(bs *blockSearch, ch *columnHeader, bm *bitmap, minValue
|
||||
if len(v) != 8 {
|
||||
logger.Panicf("FATAL: %s: unexpected length for binary representation of floating-point number: got %d; want 8", bs.partPath(), len(v))
|
||||
}
|
||||
b := bytesutil.ToUnsafeBytes(v)
|
||||
n := encoding.UnmarshalUint64(b)
|
||||
f := math.Float64frombits(n)
|
||||
f := unmarshalFloat64(v)
|
||||
return f >= minValue && f <= maxValue
|
||||
})
|
||||
}
|
||||
|
||||
func matchValuesDictByRange(bs *blockSearch, ch *columnHeader, bm *bitmap, minValue, maxValue float64) {
|
||||
bb := bbPool.Get()
|
||||
for i, v := range ch.valuesDict.values {
|
||||
for _, v := range ch.valuesDict.values {
|
||||
c := byte(0)
|
||||
if matchRange(v, minValue, maxValue) {
|
||||
bb.B = append(bb.B, byte(i))
|
||||
c = 1
|
||||
}
|
||||
bb.B = append(bb.B, c)
|
||||
}
|
||||
matchEncodedValuesDict(bs, ch, bm, bb.B)
|
||||
bbPool.Put(bb)
|
||||
@ -118,7 +231,7 @@ func matchUint8ByRange(bs *blockSearch, ch *columnHeader, bm *bitmap, minValue,
|
||||
if len(v) != 1 {
|
||||
logger.Panicf("FATAL: %s: unexpected length for binary representation of uint8 number: got %d; want 1", bs.partPath(), len(v))
|
||||
}
|
||||
n := uint64(v[0])
|
||||
n := uint64(unmarshalUint8(v))
|
||||
return n >= minValueUint && n <= maxValueUint
|
||||
})
|
||||
bbPool.Put(bb)
|
||||
@ -135,8 +248,7 @@ func matchUint16ByRange(bs *blockSearch, ch *columnHeader, bm *bitmap, minValue,
|
||||
if len(v) != 2 {
|
||||
logger.Panicf("FATAL: %s: unexpected length for binary representation of uint16 number: got %d; want 2", bs.partPath(), len(v))
|
||||
}
|
||||
b := bytesutil.ToUnsafeBytes(v)
|
||||
n := uint64(encoding.UnmarshalUint16(b))
|
||||
n := uint64(unmarshalUint16(v))
|
||||
return n >= minValueUint && n <= maxValueUint
|
||||
})
|
||||
bbPool.Put(bb)
|
||||
@ -153,8 +265,7 @@ func matchUint32ByRange(bs *blockSearch, ch *columnHeader, bm *bitmap, minValue,
|
||||
if len(v) != 4 {
|
||||
logger.Panicf("FATAL: %s: unexpected length for binary representation of uint8 number: got %d; want 4", bs.partPath(), len(v))
|
||||
}
|
||||
b := bytesutil.ToUnsafeBytes(v)
|
||||
n := uint64(encoding.UnmarshalUint32(b))
|
||||
n := uint64(unmarshalUint32(v))
|
||||
return n >= minValueUint && n <= maxValueUint
|
||||
})
|
||||
bbPool.Put(bb)
|
||||
@ -171,8 +282,7 @@ func matchUint64ByRange(bs *blockSearch, ch *columnHeader, bm *bitmap, minValue,
|
||||
if len(v) != 8 {
|
||||
logger.Panicf("FATAL: %s: unexpected length for binary representation of uint8 number: got %d; want 8", bs.partPath(), len(v))
|
||||
}
|
||||
b := bytesutil.ToUnsafeBytes(v)
|
||||
n := encoding.UnmarshalUint64(b)
|
||||
n := unmarshalUint64(v)
|
||||
return n >= minValueUint && n <= maxValueUint
|
||||
})
|
||||
bbPool.Put(bb)
|
||||
|
@ -19,7 +19,18 @@ func (fr *filterRegexp) String() string {
|
||||
return fmt.Sprintf("%sre(%q)", quoteFieldNameIfNeeded(fr.fieldName), fr.re.String())
|
||||
}
|
||||
|
||||
func (fr *filterRegexp) apply(bs *blockSearch, bm *bitmap) {
|
||||
func (fr *filterRegexp) updateNeededFields(neededFields fieldsSet) {
|
||||
neededFields.add(fr.fieldName)
|
||||
}
|
||||
|
||||
func (fr *filterRegexp) applyToBlockResult(br *blockResult, bm *bitmap) {
|
||||
re := fr.re
|
||||
applyToBlockResultGeneric(br, bm, fr.fieldName, "", func(v, _ string) bool {
|
||||
return re.MatchString(v)
|
||||
})
|
||||
}
|
||||
|
||||
func (fr *filterRegexp) applyToBlockSearch(bs *blockSearch, bm *bitmap) {
|
||||
fieldName := fr.fieldName
|
||||
re := fr.re
|
||||
|
||||
@ -69,7 +80,7 @@ func (fr *filterRegexp) apply(bs *blockSearch, bm *bitmap) {
|
||||
func matchTimestampISO8601ByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap, re *regexp.Regexp) {
|
||||
bb := bbPool.Get()
|
||||
visitValues(bs, ch, bm, func(v string) bool {
|
||||
s := toTimestampISO8601StringExt(bs, bb, v)
|
||||
s := toTimestampISO8601String(bs, bb, v)
|
||||
return re.MatchString(s)
|
||||
})
|
||||
bbPool.Put(bb)
|
||||
@ -78,7 +89,7 @@ func matchTimestampISO8601ByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap
|
||||
func matchIPv4ByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap, re *regexp.Regexp) {
|
||||
bb := bbPool.Get()
|
||||
visitValues(bs, ch, bm, func(v string) bool {
|
||||
s := toIPv4StringExt(bs, bb, v)
|
||||
s := toIPv4String(bs, bb, v)
|
||||
return re.MatchString(s)
|
||||
})
|
||||
bbPool.Put(bb)
|
||||
@ -87,7 +98,7 @@ func matchIPv4ByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap, re *regexp
|
||||
func matchFloat64ByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap, re *regexp.Regexp) {
|
||||
bb := bbPool.Get()
|
||||
visitValues(bs, ch, bm, func(v string) bool {
|
||||
s := toFloat64StringExt(bs, bb, v)
|
||||
s := toFloat64String(bs, bb, v)
|
||||
return re.MatchString(s)
|
||||
})
|
||||
bbPool.Put(bb)
|
||||
@ -95,10 +106,12 @@ func matchFloat64ByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap, re *reg
|
||||
|
||||
func matchValuesDictByRegexp(bs *blockSearch, ch *columnHeader, bm *bitmap, re *regexp.Regexp) {
|
||||
bb := bbPool.Get()
|
||||
for i, v := range ch.valuesDict.values {
|
||||
for _, v := range ch.valuesDict.values {
|
||||
c := byte(0)
|
||||
if re.MatchString(v) {
|
||||
bb.B = append(bb.B, byte(i))
|
||||
c = 1
|
||||
}
|
||||
bb.B = append(bb.B, c)
|
||||
}
|
||||
matchEncodedValuesDict(bs, ch, bm, bb.B)
|
||||
bbPool.Put(bb)
|
||||
|
@ -31,6 +31,10 @@ func (fs *filterSequence) String() string {
|
||||
return fmt.Sprintf("%sseq(%s)", quoteFieldNameIfNeeded(fs.fieldName), strings.Join(a, ","))
|
||||
}
|
||||
|
||||
func (fs *filterSequence) updateNeededFields(neededFields fieldsSet) {
|
||||
neededFields.add(fs.fieldName)
|
||||
}
|
||||
|
||||
func (fs *filterSequence) getTokens() []string {
|
||||
fs.tokensOnce.Do(fs.initTokens)
|
||||
return fs.tokens
|
||||
@ -58,7 +62,18 @@ func (fs *filterSequence) initNonEmptyPhrases() {
|
||||
fs.nonEmptyPhrases = result
|
||||
}
|
||||
|
||||
func (fs *filterSequence) apply(bs *blockSearch, bm *bitmap) {
|
||||
func (fs *filterSequence) applyToBlockResult(br *blockResult, bm *bitmap) {
|
||||
phrases := fs.getNonEmptyPhrases()
|
||||
if len(phrases) == 0 {
|
||||
return
|
||||
}
|
||||
|
||||
applyToBlockResultGeneric(br, bm, fs.fieldName, "", func(v, _ string) bool {
|
||||
return matchSequence(v, phrases)
|
||||
})
|
||||
}
|
||||
|
||||
func (fs *filterSequence) applyToBlockSearch(bs *blockSearch, bm *bitmap) {
|
||||
fieldName := fs.fieldName
|
||||
phrases := fs.getNonEmptyPhrases()
|
||||
|
||||
@ -124,7 +139,7 @@ func matchTimestampISO8601BySequence(bs *blockSearch, ch *columnHeader, bm *bitm
|
||||
// Slow path - phrases contain incomplete timestamp. Search over string representation of the timestamp.
|
||||
bb := bbPool.Get()
|
||||
visitValues(bs, ch, bm, func(v string) bool {
|
||||
s := toTimestampISO8601StringExt(bs, bb, v)
|
||||
s := toTimestampISO8601String(bs, bb, v)
|
||||
return matchSequence(s, phrases)
|
||||
})
|
||||
bbPool.Put(bb)
|
||||
@ -145,7 +160,7 @@ func matchIPv4BySequence(bs *blockSearch, ch *columnHeader, bm *bitmap, phrases,
|
||||
// the ip to string before searching for prefix there.
|
||||
bb := bbPool.Get()
|
||||
visitValues(bs, ch, bm, func(v string) bool {
|
||||
s := toIPv4StringExt(bs, bb, v)
|
||||
s := toIPv4String(bs, bb, v)
|
||||
return matchSequence(s, phrases)
|
||||
})
|
||||
bbPool.Put(bb)
|
||||
@ -163,7 +178,7 @@ func matchFloat64BySequence(bs *blockSearch, ch *columnHeader, bm *bitmap, phras
|
||||
// of floating-point numbers :(
|
||||
bb := bbPool.Get()
|
||||
visitValues(bs, ch, bm, func(v string) bool {
|
||||
s := toFloat64StringExt(bs, bb, v)
|
||||
s := toFloat64String(bs, bb, v)
|
||||
return matchSequence(s, phrases)
|
||||
})
|
||||
bbPool.Put(bb)
|
||||
@ -171,10 +186,12 @@ func matchFloat64BySequence(bs *blockSearch, ch *columnHeader, bm *bitmap, phras
|
||||
|
||||
func matchValuesDictBySequence(bs *blockSearch, ch *columnHeader, bm *bitmap, phrases []string) {
|
||||
bb := bbPool.Get()
|
||||
for i, v := range ch.valuesDict.values {
|
||||
for _, v := range ch.valuesDict.values {
|
||||
c := byte(0)
|
||||
if matchSequence(v, phrases) {
|
||||
bb.B = append(bb.B, byte(i))
|
||||
c = 1
|
||||
}
|
||||
bb.B = append(bb.B, c)
|
||||
}
|
||||
matchEncodedValuesDict(bs, ch, bm, bb.B)
|
||||
bbPool.Put(bb)
|
||||
|
@ -2,6 +2,8 @@ package logstorage
|
||||
|
||||
import (
|
||||
"sync"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
||||
)
|
||||
|
||||
// filterStream is the filter for `_stream:{...}`
|
||||
@ -27,6 +29,10 @@ func (fs *filterStream) String() string {
|
||||
return "_stream:" + s
|
||||
}
|
||||
|
||||
func (fs *filterStream) updateNeededFields(neededFields fieldsSet) {
|
||||
neededFields.add("_stream")
|
||||
}
|
||||
|
||||
func (fs *filterStream) getStreamIDs() map[streamID]struct{} {
|
||||
fs.streamIDsOnce.Do(fs.initStreamIDs)
|
||||
return fs.streamIDs
|
||||
@ -41,7 +47,66 @@ func (fs *filterStream) initStreamIDs() {
|
||||
fs.streamIDs = m
|
||||
}
|
||||
|
||||
func (fs *filterStream) apply(bs *blockSearch, bm *bitmap) {
|
||||
func (fs *filterStream) applyToBlockResult(br *blockResult, bm *bitmap) {
|
||||
if fs.f.isEmpty() {
|
||||
return
|
||||
}
|
||||
|
||||
c := br.getColumnByName("_stream")
|
||||
if c.isConst {
|
||||
v := c.valuesEncoded[0]
|
||||
if !fs.f.matchStreamName(v) {
|
||||
bm.resetBits()
|
||||
}
|
||||
return
|
||||
}
|
||||
if c.isTime {
|
||||
bm.resetBits()
|
||||
return
|
||||
}
|
||||
|
||||
switch c.valueType {
|
||||
case valueTypeString:
|
||||
values := c.getValues(br)
|
||||
bm.forEachSetBit(func(idx int) bool {
|
||||
v := values[idx]
|
||||
return fs.f.matchStreamName(v)
|
||||
})
|
||||
case valueTypeDict:
|
||||
bb := bbPool.Get()
|
||||
for _, v := range c.dictValues {
|
||||
c := byte(0)
|
||||
if fs.f.matchStreamName(v) {
|
||||
c = 1
|
||||
}
|
||||
bb.B = append(bb.B, c)
|
||||
}
|
||||
valuesEncoded := c.getValuesEncoded(br)
|
||||
bm.forEachSetBit(func(idx int) bool {
|
||||
n := valuesEncoded[idx][0]
|
||||
return bb.B[n] == 1
|
||||
})
|
||||
bbPool.Put(bb)
|
||||
case valueTypeUint8:
|
||||
bm.resetBits()
|
||||
case valueTypeUint16:
|
||||
bm.resetBits()
|
||||
case valueTypeUint32:
|
||||
bm.resetBits()
|
||||
case valueTypeUint64:
|
||||
bm.resetBits()
|
||||
case valueTypeFloat64:
|
||||
bm.resetBits()
|
||||
case valueTypeIPv4:
|
||||
bm.resetBits()
|
||||
case valueTypeTimestampISO8601:
|
||||
bm.resetBits()
|
||||
default:
|
||||
logger.Panicf("FATAL: unknown valueType=%d", c.valueType)
|
||||
}
|
||||
}
|
||||
|
||||
func (fs *filterStream) applyToBlockSearch(bs *blockSearch, bm *bitmap) {
|
||||
if fs.f.isEmpty() {
|
||||
return
|
||||
}
|
||||
|
@ -22,7 +22,25 @@ func (fr *filterStringRange) String() string {
|
||||
return fmt.Sprintf("%sstring_range(%s, %s)", quoteFieldNameIfNeeded(fr.fieldName), quoteTokenIfNeeded(fr.minValue), quoteTokenIfNeeded(fr.maxValue))
|
||||
}
|
||||
|
||||
func (fr *filterStringRange) apply(bs *blockSearch, bm *bitmap) {
|
||||
func (fr *filterStringRange) updateNeededFields(neededFields fieldsSet) {
|
||||
neededFields.add(fr.fieldName)
|
||||
}
|
||||
|
||||
func (fr *filterStringRange) applyToBlockResult(br *blockResult, bm *bitmap) {
|
||||
minValue := fr.minValue
|
||||
maxValue := fr.maxValue
|
||||
|
||||
if minValue > maxValue {
|
||||
bm.resetBits()
|
||||
return
|
||||
}
|
||||
|
||||
applyToBlockResultGeneric(br, bm, fr.fieldName, "", func(v, _ string) bool {
|
||||
return matchStringRange(v, minValue, maxValue)
|
||||
})
|
||||
}
|
||||
|
||||
func (fr *filterStringRange) applyToBlockSearch(bs *blockSearch, bm *bitmap) {
|
||||
fieldName := fr.fieldName
|
||||
minValue := fr.minValue
|
||||
maxValue := fr.maxValue
|
||||
@ -81,7 +99,7 @@ func matchTimestampISO8601ByStringRange(bs *blockSearch, ch *columnHeader, bm *b
|
||||
|
||||
bb := bbPool.Get()
|
||||
visitValues(bs, ch, bm, func(v string) bool {
|
||||
s := toTimestampISO8601StringExt(bs, bb, v)
|
||||
s := toTimestampISO8601String(bs, bb, v)
|
||||
return matchStringRange(s, minValue, maxValue)
|
||||
})
|
||||
bbPool.Put(bb)
|
||||
@ -95,7 +113,7 @@ func matchIPv4ByStringRange(bs *blockSearch, ch *columnHeader, bm *bitmap, minVa
|
||||
|
||||
bb := bbPool.Get()
|
||||
visitValues(bs, ch, bm, func(v string) bool {
|
||||
s := toIPv4StringExt(bs, bb, v)
|
||||
s := toIPv4String(bs, bb, v)
|
||||
return matchStringRange(s, minValue, maxValue)
|
||||
})
|
||||
bbPool.Put(bb)
|
||||
@ -109,7 +127,7 @@ func matchFloat64ByStringRange(bs *blockSearch, ch *columnHeader, bm *bitmap, mi
|
||||
|
||||
bb := bbPool.Get()
|
||||
visitValues(bs, ch, bm, func(v string) bool {
|
||||
s := toFloat64StringExt(bs, bb, v)
|
||||
s := toFloat64String(bs, bb, v)
|
||||
return matchStringRange(s, minValue, maxValue)
|
||||
})
|
||||
bbPool.Put(bb)
|
||||
@ -117,10 +135,12 @@ func matchFloat64ByStringRange(bs *blockSearch, ch *columnHeader, bm *bitmap, mi
|
||||
|
||||
func matchValuesDictByStringRange(bs *blockSearch, ch *columnHeader, bm *bitmap, minValue, maxValue string) {
|
||||
bb := bbPool.Get()
|
||||
for i, v := range ch.valuesDict.values {
|
||||
for _, v := range ch.valuesDict.values {
|
||||
c := byte(0)
|
||||
if matchStringRange(v, minValue, maxValue) {
|
||||
bb.B = append(bb.B, byte(i))
|
||||
c = 1
|
||||
}
|
||||
bb.B = append(bb.B, c)
|
||||
}
|
||||
matchEncodedValuesDict(bs, ch, bm, bb.B)
|
||||
bbPool.Put(bb)
|
||||
|
@ -197,11 +197,6 @@ func testFilterMatchForStorage(t *testing.T, s *Storage, tenantID TenantID, f fi
|
||||
}
|
||||
workersCount := 3
|
||||
s.search(workersCount, so, nil, func(_ uint, br *blockResult) {
|
||||
// Verify tenantID
|
||||
if !br.streamID.tenantID.equal(&tenantID) {
|
||||
t.Fatalf("unexpected tenantID in blockResult; got %s; want %s", &br.streamID.tenantID, &tenantID)
|
||||
}
|
||||
|
||||
// Verify columns
|
||||
cs := br.getColumns()
|
||||
if len(cs) != 1 {
|
||||
|
@ -1,8 +1,12 @@
|
||||
package logstorage
|
||||
|
||||
import (
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
||||
)
|
||||
|
||||
// filterTime filters by time.
|
||||
//
|
||||
// It is expressed as `_time:(start, end]` in LogsQL.
|
||||
// It is expressed as `_time:[start, end]` in LogsQL.
|
||||
type filterTime struct {
|
||||
// mintimestamp is the minimum timestamp in nanoseconds to find
|
||||
minTimestamp int64
|
||||
@ -18,7 +22,95 @@ func (ft *filterTime) String() string {
|
||||
return "_time:" + ft.stringRepr
|
||||
}
|
||||
|
||||
func (ft *filterTime) apply(bs *blockSearch, bm *bitmap) {
|
||||
func (ft *filterTime) updateNeededFields(neededFields fieldsSet) {
|
||||
neededFields.add("_time")
|
||||
}
|
||||
|
||||
func (ft *filterTime) applyToBlockResult(br *blockResult, bm *bitmap) {
|
||||
minTimestamp := ft.minTimestamp
|
||||
maxTimestamp := ft.maxTimestamp
|
||||
|
||||
if minTimestamp > maxTimestamp {
|
||||
bm.resetBits()
|
||||
return
|
||||
}
|
||||
|
||||
c := br.getColumnByName("_time")
|
||||
if c.isConst {
|
||||
v := c.valuesEncoded[0]
|
||||
if !ft.matchTimestampString(v) {
|
||||
bm.resetBits()
|
||||
}
|
||||
return
|
||||
}
|
||||
if c.isTime {
|
||||
timestamps := br.timestamps
|
||||
bm.forEachSetBit(func(idx int) bool {
|
||||
timestamp := timestamps[idx]
|
||||
return ft.matchTimestampValue(timestamp)
|
||||
})
|
||||
return
|
||||
}
|
||||
|
||||
switch c.valueType {
|
||||
case valueTypeString:
|
||||
values := c.getValues(br)
|
||||
bm.forEachSetBit(func(idx int) bool {
|
||||
v := values[idx]
|
||||
return ft.matchTimestampString(v)
|
||||
})
|
||||
case valueTypeDict:
|
||||
bb := bbPool.Get()
|
||||
for _, v := range c.dictValues {
|
||||
c := byte(0)
|
||||
if ft.matchTimestampString(v) {
|
||||
c = 1
|
||||
}
|
||||
bb.B = append(bb.B, c)
|
||||
}
|
||||
valuesEncoded := c.getValuesEncoded(br)
|
||||
bm.forEachSetBit(func(idx int) bool {
|
||||
n := valuesEncoded[idx][0]
|
||||
return bb.B[n] == 1
|
||||
})
|
||||
bbPool.Put(bb)
|
||||
case valueTypeUint8:
|
||||
bm.resetBits()
|
||||
case valueTypeUint16:
|
||||
bm.resetBits()
|
||||
case valueTypeUint32:
|
||||
bm.resetBits()
|
||||
case valueTypeUint64:
|
||||
bm.resetBits()
|
||||
case valueTypeFloat64:
|
||||
bm.resetBits()
|
||||
case valueTypeIPv4:
|
||||
bm.resetBits()
|
||||
case valueTypeTimestampISO8601:
|
||||
valuesEncoded := c.getValuesEncoded(br)
|
||||
bm.forEachSetBit(func(idx int) bool {
|
||||
v := valuesEncoded[idx]
|
||||
timestamp := unmarshalTimestampISO8601(v)
|
||||
return ft.matchTimestampValue(timestamp)
|
||||
})
|
||||
default:
|
||||
logger.Panicf("FATAL: unknown valueType=%d", c.valueType)
|
||||
}
|
||||
}
|
||||
|
||||
func (ft *filterTime) matchTimestampString(v string) bool {
|
||||
timestamp, ok := tryParseTimestampRFC3339Nano(v)
|
||||
if !ok {
|
||||
return false
|
||||
}
|
||||
return ft.matchTimestampValue(timestamp)
|
||||
}
|
||||
|
||||
func (ft *filterTime) matchTimestampValue(timestamp int64) bool {
|
||||
return timestamp >= ft.minTimestamp && timestamp <= ft.maxTimestamp
|
||||
}
|
||||
|
||||
func (ft *filterTime) applyToBlockSearch(bs *blockSearch, bm *bitmap) {
|
||||
minTimestamp := ft.minTimestamp
|
||||
maxTimestamp := ft.maxTimestamp
|
||||
|
||||
|
@ -268,7 +268,7 @@ func (is *indexSearch) getStreamIDsForTagFilter(tenantID TenantID, tf *streamTag
|
||||
}
|
||||
return ids
|
||||
case "=~":
|
||||
re := tf.getRegexp()
|
||||
re := tf.regexp
|
||||
if re.MatchString("") {
|
||||
// (field=~"|re") => (field="" or field=~"re")
|
||||
ids := is.getStreamIDsForEmptyTagValue(tenantID, tf.tagName)
|
||||
@ -280,7 +280,7 @@ func (is *indexSearch) getStreamIDsForTagFilter(tenantID TenantID, tf *streamTag
|
||||
}
|
||||
return is.getStreamIDsForTagRegexp(tenantID, tf.tagName, re)
|
||||
case "!~":
|
||||
re := tf.getRegexp()
|
||||
re := tf.regexp
|
||||
if re.MatchString("") {
|
||||
// (field!~"|re") => (field!="" and not field=~"re")
|
||||
ids := is.getStreamIDsForTagName(tenantID, tf.tagName)
|
||||
|
@ -50,7 +50,7 @@ func TestStorageSearchStreamIDs(t *testing.T) {
|
||||
|
||||
f := func(filterStream string, expectedStreamIDs []streamID) {
|
||||
t.Helper()
|
||||
sf := mustNewStreamFilter(filterStream)
|
||||
sf := mustNewTestStreamFilter(filterStream)
|
||||
if expectedStreamIDs == nil {
|
||||
expectedStreamIDs = []streamID{}
|
||||
}
|
||||
@ -68,7 +68,7 @@ func TestStorageSearchStreamIDs(t *testing.T) {
|
||||
AccountID: 1,
|
||||
ProjectID: 2,
|
||||
}
|
||||
sf := mustNewStreamFilter(`{job="job-0",instance="instance-0"}`)
|
||||
sf := mustNewTestStreamFilter(`{job="job-0",instance="instance-0"}`)
|
||||
for i := 0; i < 3; i++ {
|
||||
streamIDs := idb.searchStreamIDs([]TenantID{tenantID}, sf)
|
||||
if len(streamIDs) > 0 {
|
||||
|
@ -1,4 +1,4 @@
|
||||
package logjson
|
||||
package logstorage
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
@ -6,21 +6,20 @@ import (
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logstorage"
|
||||
"github.com/valyala/fastjson"
|
||||
)
|
||||
|
||||
// Parser parses a single JSON log message into Fields.
|
||||
// JSONParser parses a single JSON log message into Fields.
|
||||
//
|
||||
// See https://docs.victoriametrics.com/VictoriaLogs/keyConcepts.html#data-model
|
||||
//
|
||||
// Use GetParser() for obtaining the parser.
|
||||
type Parser struct {
|
||||
type JSONParser struct {
|
||||
// Fields contains the parsed JSON line after Parse() call
|
||||
//
|
||||
// The Fields are valid until the next call to ParseLogMessage()
|
||||
// or until the parser is returned to the pool with PutParser() call.
|
||||
Fields []logstorage.Field
|
||||
Fields []Field
|
||||
|
||||
// p is used for fast JSON parsing
|
||||
p fastjson.Parser
|
||||
@ -33,59 +32,79 @@ type Parser struct {
|
||||
prefixBuf []byte
|
||||
}
|
||||
|
||||
func (p *Parser) reset() {
|
||||
fields := p.Fields
|
||||
for i := range fields {
|
||||
lf := &fields[i]
|
||||
lf.Name = ""
|
||||
lf.Value = ""
|
||||
}
|
||||
p.Fields = fields[:0]
|
||||
func (p *JSONParser) reset() {
|
||||
p.resetNobuf()
|
||||
|
||||
p.buf = p.buf[:0]
|
||||
}
|
||||
|
||||
func (p *JSONParser) resetNobuf() {
|
||||
clear(p.Fields)
|
||||
p.Fields = p.Fields[:0]
|
||||
|
||||
p.prefixBuf = p.prefixBuf[:0]
|
||||
}
|
||||
|
||||
// GetParser returns Parser ready to parse JSON lines.
|
||||
// GetJSONParser returns JSONParser ready to parse JSON lines.
|
||||
//
|
||||
// Return the parser to the pool when it is no longer needed by calling PutParser().
|
||||
func GetParser() *Parser {
|
||||
// Return the parser to the pool when it is no longer needed by calling PutJSONParser().
|
||||
func GetJSONParser() *JSONParser {
|
||||
v := parserPool.Get()
|
||||
if v == nil {
|
||||
return &Parser{}
|
||||
return &JSONParser{}
|
||||
}
|
||||
return v.(*Parser)
|
||||
return v.(*JSONParser)
|
||||
}
|
||||
|
||||
// PutParser returns the parser to the pool.
|
||||
// PutJSONParser returns the parser to the pool.
|
||||
//
|
||||
// The parser cannot be used after returning to the pool.
|
||||
func PutParser(p *Parser) {
|
||||
func PutJSONParser(p *JSONParser) {
|
||||
p.reset()
|
||||
parserPool.Put(p)
|
||||
}
|
||||
|
||||
var parserPool sync.Pool
|
||||
|
||||
// ParseLogMessageNoResetBuf parses the given JSON log message msg into p.Fields.
|
||||
//
|
||||
// It adds the given prefix to all the parsed field names.
|
||||
//
|
||||
// The p.Fields remains valid until the next call to PutJSONParser().
|
||||
func (p *JSONParser) ParseLogMessageNoResetBuf(msg, prefix string) error {
|
||||
return p.parseLogMessage(msg, prefix, false)
|
||||
}
|
||||
|
||||
// ParseLogMessage parses the given JSON log message msg into p.Fields.
|
||||
//
|
||||
// The p.Fields remains valid until the next call to ParseLogMessage() or PutParser().
|
||||
func (p *Parser) ParseLogMessage(msg []byte) error {
|
||||
s := bytesutil.ToUnsafeString(msg)
|
||||
v, err := p.p.Parse(s)
|
||||
// It adds the given prefix to all the parsed field names.
|
||||
//
|
||||
// The p.Fields remains valid until the next call to ParseLogMessage() or PutJSONParser().
|
||||
func (p *JSONParser) ParseLogMessage(msg []byte, prefix string) error {
|
||||
msgStr := bytesutil.ToUnsafeString(msg)
|
||||
return p.parseLogMessage(msgStr, prefix, true)
|
||||
}
|
||||
|
||||
func (p *JSONParser) parseLogMessage(msg, prefix string, resetBuf bool) error {
|
||||
v, err := p.p.Parse(msg)
|
||||
if err != nil {
|
||||
return fmt.Errorf("cannot parse json: %w", err)
|
||||
}
|
||||
if t := v.Type(); t != fastjson.TypeObject {
|
||||
return fmt.Errorf("expecting json dictionary; got %s", t)
|
||||
}
|
||||
p.reset()
|
||||
if resetBuf {
|
||||
p.reset()
|
||||
} else {
|
||||
p.resetNobuf()
|
||||
}
|
||||
p.prefixBuf = append(p.prefixBuf[:0], prefix...)
|
||||
p.Fields, p.buf, p.prefixBuf = appendLogFields(p.Fields, p.buf, p.prefixBuf, v)
|
||||
return nil
|
||||
}
|
||||
|
||||
// RenameField renames field with the oldName to newName in p.Fields
|
||||
func (p *Parser) RenameField(oldName, newName string) {
|
||||
func (p *JSONParser) RenameField(oldName, newName string) {
|
||||
if oldName == "" {
|
||||
return
|
||||
}
|
||||
@ -99,7 +118,7 @@ func (p *Parser) RenameField(oldName, newName string) {
|
||||
}
|
||||
}
|
||||
|
||||
func appendLogFields(dst []logstorage.Field, dstBuf, prefixBuf []byte, v *fastjson.Value) ([]logstorage.Field, []byte, []byte) {
|
||||
func appendLogFields(dst []Field, dstBuf, prefixBuf []byte, v *fastjson.Value) ([]Field, []byte, []byte) {
|
||||
o := v.GetObject()
|
||||
o.Visit(func(k []byte, v *fastjson.Value) {
|
||||
t := v.Type()
|
||||
@ -133,13 +152,13 @@ func appendLogFields(dst []logstorage.Field, dstBuf, prefixBuf []byte, v *fastjs
|
||||
return dst, dstBuf, prefixBuf
|
||||
}
|
||||
|
||||
func appendLogField(dst []logstorage.Field, dstBuf, prefixBuf, k, value []byte) ([]logstorage.Field, []byte) {
|
||||
func appendLogField(dst []Field, dstBuf, prefixBuf, k, value []byte) ([]Field, []byte) {
|
||||
dstBufLen := len(dstBuf)
|
||||
dstBuf = append(dstBuf, prefixBuf...)
|
||||
dstBuf = append(dstBuf, k...)
|
||||
name := dstBuf[dstBufLen:]
|
||||
|
||||
dst = append(dst, logstorage.Field{
|
||||
dst = append(dst, Field{
|
||||
Name: bytesutil.ToUnsafeString(name),
|
||||
Value: bytesutil.ToUnsafeString(value),
|
||||
})
|
97
lib/logstorage/json_parser_test.go
Normal file
97
lib/logstorage/json_parser_test.go
Normal file
@ -0,0 +1,97 @@
|
||||
package logstorage
|
||||
|
||||
import (
|
||||
"reflect"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestJSONParserFailure(t *testing.T) {
|
||||
f := func(data string) {
|
||||
t.Helper()
|
||||
|
||||
p := GetJSONParser()
|
||||
err := p.ParseLogMessage([]byte(data), "")
|
||||
if err == nil {
|
||||
t.Fatalf("expecting non-nil error")
|
||||
}
|
||||
PutJSONParser(p)
|
||||
}
|
||||
f("")
|
||||
f("{foo")
|
||||
f("[1,2,3]")
|
||||
f(`{"foo",}`)
|
||||
}
|
||||
|
||||
func TestJSONParserSuccess(t *testing.T) {
|
||||
f := func(data, prefix string, fieldsExpected []Field) {
|
||||
t.Helper()
|
||||
|
||||
p := GetJSONParser()
|
||||
err := p.ParseLogMessage([]byte(data), prefix)
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error: %s", err)
|
||||
}
|
||||
if !reflect.DeepEqual(p.Fields, fieldsExpected) {
|
||||
t.Fatalf("unexpected fields;\ngot\n%s\nwant\n%s", p.Fields, fieldsExpected)
|
||||
}
|
||||
PutJSONParser(p)
|
||||
}
|
||||
|
||||
f("{}", "", nil)
|
||||
f(`{"foo":"bar"}`, "", []Field{
|
||||
{
|
||||
Name: "foo",
|
||||
Value: "bar",
|
||||
},
|
||||
})
|
||||
f(`{"foo":"bar"}`, "prefix_", []Field{
|
||||
{
|
||||
Name: "prefix_foo",
|
||||
Value: "bar",
|
||||
},
|
||||
})
|
||||
f(`{"foo":{"bar":"baz"},"a":1,"b":true,"c":[1,2],"d":false}`, "", []Field{
|
||||
{
|
||||
Name: "foo.bar",
|
||||
Value: "baz",
|
||||
},
|
||||
{
|
||||
Name: "a",
|
||||
Value: "1",
|
||||
},
|
||||
{
|
||||
Name: "b",
|
||||
Value: "true",
|
||||
},
|
||||
{
|
||||
Name: "c",
|
||||
Value: "[1,2]",
|
||||
},
|
||||
{
|
||||
Name: "d",
|
||||
Value: "false",
|
||||
},
|
||||
})
|
||||
f(`{"foo":{"bar":"baz"},"a":1,"b":true,"c":[1,2],"d":false}`, "prefix_", []Field{
|
||||
{
|
||||
Name: "prefix_foo.bar",
|
||||
Value: "baz",
|
||||
},
|
||||
{
|
||||
Name: "prefix_a",
|
||||
Value: "1",
|
||||
},
|
||||
{
|
||||
Name: "prefix_b",
|
||||
Value: "true",
|
||||
},
|
||||
{
|
||||
Name: "prefix_c",
|
||||
Value: "[1,2]",
|
||||
},
|
||||
{
|
||||
Name: "prefix_d",
|
||||
Value: "false",
|
||||
},
|
||||
})
|
||||
}
|
@ -10,8 +10,8 @@ import (
|
||||
"unicode"
|
||||
"unicode/utf8"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promutils"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/regexutil"
|
||||
)
|
||||
|
||||
type lexer struct {
|
||||
@ -39,6 +39,20 @@ type lexer struct {
|
||||
currentTimestamp int64
|
||||
}
|
||||
|
||||
type lexerState struct {
|
||||
lex lexer
|
||||
}
|
||||
|
||||
func (lex *lexer) backupState() *lexerState {
|
||||
return &lexerState{
|
||||
lex: *lex,
|
||||
}
|
||||
}
|
||||
|
||||
func (lex *lexer) restoreState(ls *lexerState) {
|
||||
*lex = ls.lex
|
||||
}
|
||||
|
||||
// newLexer returns new lexer for the given s.
|
||||
//
|
||||
// The lex.token points to the first token in s.
|
||||
@ -206,10 +220,45 @@ func (q *Query) String() string {
|
||||
return s
|
||||
}
|
||||
|
||||
// AddCountByTimePipe adds '| stats by (_time:step offset off, field1, ..., fieldN) count() hits' to the end of q.
|
||||
func (q *Query) AddCountByTimePipe(step, off int64, fields []string) {
|
||||
{
|
||||
// add 'stats by (_time:step offset off, fields) count() hits'
|
||||
stepStr := string(marshalDuration(nil, step))
|
||||
offsetStr := string(marshalDuration(nil, off))
|
||||
byFieldsStr := "_time:" + stepStr + " offset " + offsetStr
|
||||
for _, f := range fields {
|
||||
byFieldsStr += ", " + quoteTokenIfNeeded(f)
|
||||
}
|
||||
s := fmt.Sprintf("stats by (%s) count() hits", byFieldsStr)
|
||||
lex := newLexer(s)
|
||||
ps, err := parsePipeStats(lex)
|
||||
if err != nil {
|
||||
logger.Panicf("BUG: unexpected error when parsing %q: %s", s, err)
|
||||
}
|
||||
q.pipes = append(q.pipes, ps)
|
||||
}
|
||||
|
||||
{
|
||||
// Add 'sort by (_time, fields)' in order to get consistent order of the results.
|
||||
sortFieldsStr := "_time"
|
||||
for _, f := range fields {
|
||||
sortFieldsStr += ", " + quoteTokenIfNeeded(f)
|
||||
}
|
||||
s := fmt.Sprintf("sort by (%s)", sortFieldsStr)
|
||||
lex := newLexer(s)
|
||||
ps, err := parsePipeSort(lex)
|
||||
if err != nil {
|
||||
logger.Panicf("BUG: unexpected error when parsing %q: %s", s, err)
|
||||
}
|
||||
q.pipes = append(q.pipes, ps)
|
||||
}
|
||||
}
|
||||
|
||||
// AddTimeFilter adds global filter _time:[start ... end] to q.
|
||||
func (q *Query) AddTimeFilter(start, end int64) {
|
||||
startStr := marshalTimestampRFC3339Nano(nil, start)
|
||||
endStr := marshalTimestampRFC3339Nano(nil, end)
|
||||
startStr := marshalTimestampRFC3339NanoString(nil, start)
|
||||
endStr := marshalTimestampRFC3339NanoString(nil, end)
|
||||
ft := &filterTime{
|
||||
minTimestamp: start,
|
||||
maxTimestamp: end,
|
||||
@ -234,7 +283,7 @@ func (q *Query) AddTimeFilter(start, end int64) {
|
||||
// See https://docs.victoriametrics.com/victorialogs/logsql/#limit-pipe
|
||||
func (q *Query) AddPipeLimit(n uint64) {
|
||||
q.pipes = append(q.pipes, &pipeLimit{
|
||||
n: n,
|
||||
limit: n,
|
||||
})
|
||||
}
|
||||
|
||||
@ -242,6 +291,68 @@ func (q *Query) AddPipeLimit(n uint64) {
|
||||
func (q *Query) Optimize() {
|
||||
q.pipes = optimizeSortOffsetPipes(q.pipes)
|
||||
q.pipes = optimizeSortLimitPipes(q.pipes)
|
||||
q.pipes = optimizeUniqLimitPipes(q.pipes)
|
||||
q.pipes = optimizeFilterPipes(q.pipes)
|
||||
|
||||
// Merge `q | filter ...` into q.
|
||||
if len(q.pipes) > 0 {
|
||||
pf, ok := q.pipes[0].(*pipeFilter)
|
||||
if ok {
|
||||
q.f = mergeFiltersAnd(q.f, pf.f)
|
||||
q.pipes = append(q.pipes[:0], q.pipes[1:]...)
|
||||
}
|
||||
}
|
||||
|
||||
// Optimize `q | field_names ...` by marking pipeFieldNames as first pipe.
|
||||
if len(q.pipes) > 0 {
|
||||
pf, ok := q.pipes[0].(*pipeFieldNames)
|
||||
if ok {
|
||||
pf.isFirstPipe = true
|
||||
}
|
||||
}
|
||||
|
||||
// Substitute '*' prefixFilter with filterNoop in order to avoid reading _msg data.
|
||||
q.f = removeStarFilters(q.f)
|
||||
|
||||
// Call Optimize for queries from 'in(query)' filters.
|
||||
optimizeFilterIn(q.f)
|
||||
for _, p := range q.pipes {
|
||||
switch t := p.(type) {
|
||||
case *pipeStats:
|
||||
for _, f := range t.funcs {
|
||||
if f.iff != nil {
|
||||
optimizeFilterIn(f.iff)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func removeStarFilters(f filter) filter {
|
||||
visitFunc := func(f filter) bool {
|
||||
fp, ok := f.(*filterPrefix)
|
||||
return ok && isMsgFieldName(fp.fieldName) && fp.prefix == ""
|
||||
}
|
||||
copyFunc := func(_ filter) (filter, error) {
|
||||
fn := &filterNoop{}
|
||||
return fn, nil
|
||||
}
|
||||
f, err := copyFilter(f, visitFunc, copyFunc)
|
||||
if err != nil {
|
||||
logger.Fatalf("BUG: unexpected error: %s", err)
|
||||
}
|
||||
return f
|
||||
}
|
||||
|
||||
func optimizeFilterIn(f filter) {
|
||||
visitFunc := func(f filter) bool {
|
||||
fi, ok := f.(*filterIn)
|
||||
if ok && fi.q != nil {
|
||||
fi.q.Optimize()
|
||||
}
|
||||
return false
|
||||
}
|
||||
_ = visitFilter(f, visitFunc)
|
||||
}
|
||||
|
||||
func optimizeSortOffsetPipes(pipes []pipe) []pipe {
|
||||
@ -259,7 +370,7 @@ func optimizeSortOffsetPipes(pipes []pipe) []pipe {
|
||||
continue
|
||||
}
|
||||
if ps.offset == 0 && ps.limit == 0 {
|
||||
ps.offset = po.n
|
||||
ps.offset = po.offset
|
||||
}
|
||||
pipes = append(pipes[:i], pipes[i+1:]...)
|
||||
}
|
||||
@ -280,14 +391,78 @@ func optimizeSortLimitPipes(pipes []pipe) []pipe {
|
||||
i++
|
||||
continue
|
||||
}
|
||||
if ps.limit == 0 || pl.n < ps.limit {
|
||||
ps.limit = pl.n
|
||||
if ps.limit == 0 || pl.limit < ps.limit {
|
||||
ps.limit = pl.limit
|
||||
}
|
||||
pipes = append(pipes[:i], pipes[i+1:]...)
|
||||
}
|
||||
return pipes
|
||||
}
|
||||
|
||||
func optimizeUniqLimitPipes(pipes []pipe) []pipe {
|
||||
// Merge 'uniq ... | limit ...' into 'uniq ... limit ...'
|
||||
i := 1
|
||||
for i < len(pipes) {
|
||||
pl, ok := pipes[i].(*pipeLimit)
|
||||
if !ok {
|
||||
i++
|
||||
continue
|
||||
}
|
||||
pu, ok := pipes[i-1].(*pipeUniq)
|
||||
if !ok {
|
||||
i++
|
||||
continue
|
||||
}
|
||||
if pu.limit == 0 || pl.limit < pu.limit {
|
||||
pu.limit = pl.limit
|
||||
}
|
||||
pipes = append(pipes[:i], pipes[i+1:]...)
|
||||
}
|
||||
return pipes
|
||||
}
|
||||
|
||||
func optimizeFilterPipes(pipes []pipe) []pipe {
|
||||
// Merge multiple `| filter ...` pipes into a single `filter ...` pipe
|
||||
i := 1
|
||||
for i < len(pipes) {
|
||||
pf1, ok := pipes[i-1].(*pipeFilter)
|
||||
if !ok {
|
||||
i++
|
||||
continue
|
||||
}
|
||||
pf2, ok := pipes[i].(*pipeFilter)
|
||||
if !ok {
|
||||
i++
|
||||
continue
|
||||
}
|
||||
|
||||
pf1.f = mergeFiltersAnd(pf1.f, pf2.f)
|
||||
pipes = append(pipes[:i], pipes[i+1:]...)
|
||||
}
|
||||
return pipes
|
||||
}
|
||||
|
||||
func mergeFiltersAnd(f1, f2 filter) filter {
|
||||
fa1, ok := f1.(*filterAnd)
|
||||
if ok {
|
||||
fa1.filters = append(fa1.filters, f2)
|
||||
return fa1
|
||||
}
|
||||
|
||||
fa2, ok := f2.(*filterAnd)
|
||||
if ok {
|
||||
filters := make([]filter, len(fa2.filters)+1)
|
||||
filters[0] = f1
|
||||
copy(filters[1:], fa2.filters)
|
||||
fa2.filters = filters
|
||||
return fa2
|
||||
}
|
||||
|
||||
return &filterAnd{
|
||||
filters: []filter{f1, f2},
|
||||
}
|
||||
}
|
||||
|
||||
func (q *Query) getNeededColumns() ([]string, []string) {
|
||||
neededFields := newFieldsSet()
|
||||
neededFields.add("*")
|
||||
@ -304,7 +479,17 @@ func (q *Query) getNeededColumns() ([]string, []string) {
|
||||
// ParseQuery parses s.
|
||||
func ParseQuery(s string) (*Query, error) {
|
||||
lex := newLexer(s)
|
||||
q, err := parseQuery(lex)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if !lex.isEnd() {
|
||||
return nil, fmt.Errorf("unexpected unparsed tail after [%s]; context: [%s]; tail: [%s]", q, lex.context(), lex.s)
|
||||
}
|
||||
return q, nil
|
||||
}
|
||||
|
||||
func parseQuery(lex *lexer) (*Query, error) {
|
||||
f, err := parseFilter(lex)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("%w; context: [%s]", err, lex.context())
|
||||
@ -319,10 +504,6 @@ func ParseQuery(s string) (*Query, error) {
|
||||
}
|
||||
q.pipes = pipes
|
||||
|
||||
if !lex.isEnd() {
|
||||
return nil, fmt.Errorf("unexpected unparsed tail; context: [%s]; tail: [%s]", lex.context(), lex.s)
|
||||
}
|
||||
|
||||
return q, nil
|
||||
}
|
||||
|
||||
@ -407,6 +588,10 @@ func parseGenericFilter(lex *lexer, fieldName string) (filter, error) {
|
||||
return nil, fmt.Errorf("missing whitespace before the search word %q", lex.prevToken)
|
||||
}
|
||||
return parseParensFilter(lex, fieldName)
|
||||
case lex.isKeyword(">"):
|
||||
return parseFilterGT(lex, fieldName)
|
||||
case lex.isKeyword("<"):
|
||||
return parseFilterLT(lex, fieldName)
|
||||
case lex.isKeyword("not", "!"):
|
||||
return parseFilterNot(lex, fieldName)
|
||||
case lex.isKeyword("exact"):
|
||||
@ -432,19 +617,27 @@ func parseGenericFilter(lex *lexer, fieldName string) (filter, error) {
|
||||
case lex.isKeyword(",", ")", "[", "]"):
|
||||
return nil, fmt.Errorf("unexpected token %q", lex.token)
|
||||
}
|
||||
phrase := getCompoundPhrase(lex, fieldName != "")
|
||||
phrase, err := getCompoundPhrase(lex, fieldName != "")
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return parseFilterForPhrase(lex, phrase, fieldName)
|
||||
}
|
||||
|
||||
func getCompoundPhrase(lex *lexer, allowColon bool) string {
|
||||
func getCompoundPhrase(lex *lexer, allowColon bool) (string, error) {
|
||||
stopTokens := []string{"*", ",", "(", ")", "[", "]", "|", ""}
|
||||
if lex.isKeyword(stopTokens...) {
|
||||
return "", fmt.Errorf("compound phrase cannot start with '%s'", lex.token)
|
||||
}
|
||||
|
||||
phrase := lex.token
|
||||
rawPhrase := lex.rawToken
|
||||
lex.nextToken()
|
||||
suffix := getCompoundSuffix(lex, allowColon)
|
||||
if suffix == "" {
|
||||
return phrase
|
||||
return phrase, nil
|
||||
}
|
||||
return rawPhrase + suffix
|
||||
return rawPhrase + suffix, nil
|
||||
}
|
||||
|
||||
func getCompoundSuffix(lex *lexer, allowColon bool) string {
|
||||
@ -460,19 +653,24 @@ func getCompoundSuffix(lex *lexer, allowColon bool) string {
|
||||
return s
|
||||
}
|
||||
|
||||
func getCompoundToken(lex *lexer) string {
|
||||
func getCompoundToken(lex *lexer) (string, error) {
|
||||
stopTokens := []string{",", "(", ")", "[", "]", "|", ""}
|
||||
if lex.isKeyword(stopTokens...) {
|
||||
return "", fmt.Errorf("compound token cannot start with '%s'", lex.token)
|
||||
}
|
||||
|
||||
s := lex.token
|
||||
rawS := lex.rawToken
|
||||
lex.nextToken()
|
||||
suffix := ""
|
||||
for !lex.isSkippedSpace && !lex.isKeyword(",", "(", ")", "[", "]", "|", "") {
|
||||
for !lex.isSkippedSpace && !lex.isKeyword(stopTokens...) {
|
||||
s += lex.token
|
||||
lex.nextToken()
|
||||
}
|
||||
if suffix == "" {
|
||||
return s
|
||||
return s, nil
|
||||
}
|
||||
return rawS + suffix
|
||||
return rawS + suffix, nil
|
||||
}
|
||||
|
||||
func getCompoundFuncArg(lex *lexer) string {
|
||||
@ -483,7 +681,7 @@ func getCompoundFuncArg(lex *lexer) string {
|
||||
rawArg := lex.rawToken
|
||||
lex.nextToken()
|
||||
suffix := ""
|
||||
for !lex.isSkippedSpace && !lex.isKeyword("*", ",", ")", "") {
|
||||
for !lex.isSkippedSpace && !lex.isKeyword("*", ",", "(", ")", "|", "") {
|
||||
suffix += lex.rawToken
|
||||
lex.nextToken()
|
||||
}
|
||||
@ -704,13 +902,72 @@ func tryParseIPv4CIDR(s string) (uint32, uint32, bool) {
|
||||
}
|
||||
|
||||
func parseFilterIn(lex *lexer, fieldName string) (filter, error) {
|
||||
return parseFuncArgs(lex, fieldName, func(args []string) (filter, error) {
|
||||
f := &filterIn{
|
||||
if !lex.isKeyword("in") {
|
||||
return nil, fmt.Errorf("expecting 'in' keyword")
|
||||
}
|
||||
|
||||
// Try parsing in(arg1, ..., argN) at first
|
||||
lexState := lex.backupState()
|
||||
fi, err := parseFuncArgs(lex, fieldName, func(args []string) (filter, error) {
|
||||
fi := &filterIn{
|
||||
fieldName: fieldName,
|
||||
values: args,
|
||||
}
|
||||
return f, nil
|
||||
return fi, nil
|
||||
})
|
||||
if err == nil {
|
||||
return fi, nil
|
||||
}
|
||||
|
||||
// Parse in(query | fields someField) then
|
||||
lex.restoreState(lexState)
|
||||
lex.nextToken()
|
||||
if !lex.isKeyword("(") {
|
||||
return nil, fmt.Errorf("missing '(' after 'in'")
|
||||
}
|
||||
lex.nextToken()
|
||||
|
||||
q, err := parseQuery(lex)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cannot parse query inside 'in(...)': %w", err)
|
||||
}
|
||||
|
||||
if !lex.isKeyword(")") {
|
||||
return nil, fmt.Errorf("missing ')' after 'in(%s)'", q)
|
||||
}
|
||||
lex.nextToken()
|
||||
|
||||
qFieldName, err := getFieldNameFromPipes(q.pipes)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cannot determine field name for values in 'in(%s)': %w", q, err)
|
||||
}
|
||||
fi = &filterIn{
|
||||
fieldName: fieldName,
|
||||
needExecuteQuery: true,
|
||||
q: q,
|
||||
qFieldName: qFieldName,
|
||||
}
|
||||
return fi, nil
|
||||
}
|
||||
|
||||
func getFieldNameFromPipes(pipes []pipe) (string, error) {
|
||||
if len(pipes) == 0 {
|
||||
return "", fmt.Errorf("missing 'fields' or 'uniq' pipes at the end of query")
|
||||
}
|
||||
switch t := pipes[len(pipes)-1].(type) {
|
||||
case *pipeFields:
|
||||
if t.containsStar || len(t.fields) != 1 {
|
||||
return "", fmt.Errorf("'%s' pipe must contain only a single non-star field name", t)
|
||||
}
|
||||
return t.fields[0], nil
|
||||
case *pipeUniq:
|
||||
if len(t.byFields) != 1 {
|
||||
return "", fmt.Errorf("'%s' pipe must contain only a single non-star field name", t)
|
||||
}
|
||||
return t.byFields[0], nil
|
||||
default:
|
||||
return "", fmt.Errorf("missing 'fields' or 'uniq' pipe at the end of query")
|
||||
}
|
||||
}
|
||||
|
||||
func parseFilterSequence(lex *lexer, fieldName string) (filter, error) {
|
||||
@ -755,6 +1012,70 @@ func parseFilterRegexp(lex *lexer, fieldName string) (filter, error) {
|
||||
})
|
||||
}
|
||||
|
||||
func parseFilterGT(lex *lexer, fieldName string) (filter, error) {
|
||||
if fieldName == "" {
|
||||
return nil, fmt.Errorf("'>' and '>=' must be prefixed with the field name")
|
||||
}
|
||||
lex.nextToken()
|
||||
|
||||
includeMinValue := false
|
||||
op := ">"
|
||||
if lex.isKeyword("=") {
|
||||
lex.nextToken()
|
||||
includeMinValue = true
|
||||
op = ">="
|
||||
}
|
||||
|
||||
minValue, fStr, err := parseFloat64(lex)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cannot parse number after '%s': %w", op, err)
|
||||
}
|
||||
|
||||
if !includeMinValue {
|
||||
minValue = nextafter(minValue, inf)
|
||||
}
|
||||
fr := &filterRange{
|
||||
fieldName: fieldName,
|
||||
minValue: minValue,
|
||||
maxValue: inf,
|
||||
|
||||
stringRepr: op + fStr,
|
||||
}
|
||||
return fr, nil
|
||||
}
|
||||
|
||||
func parseFilterLT(lex *lexer, fieldName string) (filter, error) {
|
||||
if fieldName == "" {
|
||||
return nil, fmt.Errorf("'<' and '<=' must be prefixed with the field name")
|
||||
}
|
||||
lex.nextToken()
|
||||
|
||||
includeMaxValue := false
|
||||
op := "<"
|
||||
if lex.isKeyword("=") {
|
||||
lex.nextToken()
|
||||
includeMaxValue = true
|
||||
op = "<="
|
||||
}
|
||||
|
||||
maxValue, fStr, err := parseFloat64(lex)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cannot parse number after '%s': %w", op, err)
|
||||
}
|
||||
|
||||
if !includeMaxValue {
|
||||
maxValue = nextafter(maxValue, -inf)
|
||||
}
|
||||
fr := &filterRange{
|
||||
fieldName: fieldName,
|
||||
minValue: -inf,
|
||||
maxValue: maxValue,
|
||||
|
||||
stringRepr: op + fStr,
|
||||
}
|
||||
return fr, nil
|
||||
}
|
||||
|
||||
func parseFilterRange(lex *lexer, fieldName string) (filter, error) {
|
||||
funcName := lex.token
|
||||
lex.nextToken()
|
||||
@ -802,19 +1123,19 @@ func parseFilterRange(lex *lexer, fieldName string) (filter, error) {
|
||||
}
|
||||
lex.nextToken()
|
||||
|
||||
stringRepr := ""
|
||||
stringRepr := "range"
|
||||
if includeMinValue {
|
||||
stringRepr += "["
|
||||
} else {
|
||||
stringRepr += "("
|
||||
minValue = math.Nextafter(minValue, inf)
|
||||
minValue = nextafter(minValue, inf)
|
||||
}
|
||||
stringRepr += minValueStr + ", " + maxValueStr
|
||||
if includeMaxValue {
|
||||
stringRepr += "]"
|
||||
} else {
|
||||
stringRepr += ")"
|
||||
maxValue = math.Nextafter(maxValue, -inf)
|
||||
maxValue = nextafter(maxValue, -inf)
|
||||
}
|
||||
|
||||
fr := &filterRange{
|
||||
@ -828,7 +1149,10 @@ func parseFilterRange(lex *lexer, fieldName string) (filter, error) {
|
||||
}
|
||||
|
||||
func parseFloat64(lex *lexer) (float64, string, error) {
|
||||
s := getCompoundToken(lex)
|
||||
s, err := getCompoundToken(lex)
|
||||
if err != nil {
|
||||
return 0, "", fmt.Errorf("cannot parse float64: %w", err)
|
||||
}
|
||||
f, err := strconv.ParseFloat(s, 64)
|
||||
if err == nil {
|
||||
return f, s, nil
|
||||
@ -868,6 +1192,9 @@ func parseFuncArgs(lex *lexer, fieldName string, callback func(args []string) (f
|
||||
if lex.isKeyword(",") {
|
||||
return nil, fmt.Errorf("unexpected ',' - missing arg in %s()", funcName)
|
||||
}
|
||||
if lex.isKeyword("(") {
|
||||
return nil, fmt.Errorf("unexpected '(' - missing arg in %s()", funcName)
|
||||
}
|
||||
arg := getCompoundFuncArg(lex)
|
||||
args = append(args, arg)
|
||||
if lex.isKeyword(")") {
|
||||
@ -912,13 +1239,14 @@ func parseFilterTimeWithOffset(lex *lexer) (*filterTime, error) {
|
||||
if !lex.isKeyword("offset") {
|
||||
return ft, nil
|
||||
}
|
||||
if !lex.mustNextToken() {
|
||||
return nil, fmt.Errorf("missing offset for _time filter %s", ft)
|
||||
lex.nextToken()
|
||||
s, err := getCompoundToken(lex)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cannot parse offset in _time filter: %w", err)
|
||||
}
|
||||
s := getCompoundToken(lex)
|
||||
d, ok := tryParseDuration(s)
|
||||
if !ok {
|
||||
return nil, fmt.Errorf("cannot parse offset %q for _time filter %s: %w", s, ft, err)
|
||||
return nil, fmt.Errorf("cannot parse offset %q for _time filter %s", s, ft)
|
||||
}
|
||||
offset := int64(d)
|
||||
ft.minTimestamp -= offset
|
||||
@ -935,7 +1263,10 @@ func parseFilterTime(lex *lexer) (*filterTime, error) {
|
||||
case lex.isKeyword("("):
|
||||
startTimeInclude = false
|
||||
default:
|
||||
s := getCompoundToken(lex)
|
||||
s, err := getCompoundToken(lex)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cannot parse _time filter: %w", err)
|
||||
}
|
||||
sLower := strings.ToLower(s)
|
||||
if sLower == "now" || startsWithYear(s) {
|
||||
// Parse '_time:YYYY-MM-DD', which transforms to '_time:[YYYY-MM-DD, YYYY-MM-DD+1)'
|
||||
@ -1076,113 +1407,21 @@ func stripTimezoneSuffix(s string) string {
|
||||
}
|
||||
|
||||
func parseFilterStream(lex *lexer) (*filterStream, error) {
|
||||
if !lex.isKeyword("{") {
|
||||
return nil, fmt.Errorf("unexpected token %q instead of '{' in _stream filter", lex.token)
|
||||
}
|
||||
if !lex.mustNextToken() {
|
||||
return nil, fmt.Errorf("incomplete _stream filter after '{'")
|
||||
}
|
||||
var filters []*andStreamFilter
|
||||
for {
|
||||
f, err := parseAndStreamFilter(lex)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
filters = append(filters, f)
|
||||
switch {
|
||||
case lex.isKeyword("}"):
|
||||
lex.nextToken()
|
||||
fs := &filterStream{
|
||||
f: &StreamFilter{
|
||||
orFilters: filters,
|
||||
},
|
||||
}
|
||||
return fs, nil
|
||||
case lex.isKeyword("or"):
|
||||
if !lex.mustNextToken() {
|
||||
return nil, fmt.Errorf("incomplete _stream filter after 'or'")
|
||||
}
|
||||
if lex.isKeyword("}") {
|
||||
return nil, fmt.Errorf("unexpected '}' after 'or' in _stream filter")
|
||||
}
|
||||
default:
|
||||
return nil, fmt.Errorf("unexpected token in _stream filter: %q; want '}' or 'or'", lex.token)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func newStreamFilter(s string) (*StreamFilter, error) {
|
||||
lex := newLexer(s)
|
||||
fs, err := parseFilterStream(lex)
|
||||
sf, err := parseStreamFilter(lex)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return fs.f, nil
|
||||
}
|
||||
|
||||
func parseAndStreamFilter(lex *lexer) (*andStreamFilter, error) {
|
||||
var filters []*streamTagFilter
|
||||
for {
|
||||
if lex.isKeyword("}") {
|
||||
asf := &andStreamFilter{
|
||||
tagFilters: filters,
|
||||
}
|
||||
return asf, nil
|
||||
}
|
||||
f, err := parseStreamTagFilter(lex)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
filters = append(filters, f)
|
||||
switch {
|
||||
case lex.isKeyword("or", "}"):
|
||||
asf := &andStreamFilter{
|
||||
tagFilters: filters,
|
||||
}
|
||||
return asf, nil
|
||||
case lex.isKeyword(","):
|
||||
if !lex.mustNextToken() {
|
||||
return nil, fmt.Errorf("missing stream filter after ','")
|
||||
}
|
||||
default:
|
||||
return nil, fmt.Errorf("unexpected token %q in _stream filter; want 'or', 'and', '}' or ','", lex.token)
|
||||
}
|
||||
fs := &filterStream{
|
||||
f: sf,
|
||||
}
|
||||
}
|
||||
|
||||
func parseStreamTagFilter(lex *lexer) (*streamTagFilter, error) {
|
||||
tagName := lex.token
|
||||
if !lex.mustNextToken() {
|
||||
return nil, fmt.Errorf("missing operation in _stream filter for %q field", tagName)
|
||||
}
|
||||
if !lex.isKeyword("=", "!=", "=~", "!~") {
|
||||
return nil, fmt.Errorf("unsupported operation %q in _steam filter for %q field; supported operations: =, !=, =~, !~", lex.token, tagName)
|
||||
}
|
||||
op := lex.token
|
||||
if !lex.mustNextToken() {
|
||||
return nil, fmt.Errorf("missing _stream filter value for %q field", tagName)
|
||||
}
|
||||
value := lex.token
|
||||
if !lex.mustNextToken() {
|
||||
return nil, fmt.Errorf("missing token after %q%s%q filter", tagName, op, value)
|
||||
}
|
||||
stf := &streamTagFilter{
|
||||
tagName: tagName,
|
||||
op: op,
|
||||
value: value,
|
||||
}
|
||||
if op == "=~" || op == "!~" {
|
||||
re, err := regexutil.NewPromRegex(value)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("invalid regexp %q for stream filter: %w", value, err)
|
||||
}
|
||||
stf.regexp = re
|
||||
}
|
||||
return stf, nil
|
||||
return fs, nil
|
||||
}
|
||||
|
||||
func parseTime(lex *lexer) (int64, string, error) {
|
||||
s := getCompoundToken(lex)
|
||||
s, err := getCompoundToken(lex)
|
||||
if err != nil {
|
||||
return 0, "", err
|
||||
}
|
||||
t, err := promutils.ParseTimeAt(s, float64(lex.currentTimestamp)/1e9)
|
||||
if err != nil {
|
||||
return 0, "", err
|
||||
@ -1312,3 +1551,10 @@ func parseInt(s string) (int64, error) {
|
||||
}
|
||||
return nn, nil
|
||||
}
|
||||
|
||||
func nextafter(f, xInf float64) float64 {
|
||||
if math.IsInf(f, 0) {
|
||||
return f
|
||||
}
|
||||
return math.Nextafter(f, xInf)
|
||||
}
|
||||
|
@ -1,7 +1,6 @@
|
||||
package logstorage
|
||||
|
||||
import (
|
||||
"math"
|
||||
"reflect"
|
||||
"strings"
|
||||
"testing"
|
||||
@ -34,51 +33,6 @@ func TestLexer(t *testing.T) {
|
||||
[]string{"_stream", ":", "{", "foo", "=", "bar", ",", "a", "=~", "baz", ",", "b", "!=", "cd", ",", "d,}a", "!~", "abc", "}"})
|
||||
}
|
||||
|
||||
func TestNewStreamFilterSuccess(t *testing.T) {
|
||||
f := func(s, resultExpected string) {
|
||||
t.Helper()
|
||||
sf, err := newStreamFilter(s)
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error: %s", err)
|
||||
}
|
||||
result := sf.String()
|
||||
if result != resultExpected {
|
||||
t.Fatalf("unexpected StreamFilter; got %s; want %s", result, resultExpected)
|
||||
}
|
||||
}
|
||||
|
||||
f("{}", "{}")
|
||||
f(`{foo="bar"}`, `{foo="bar"}`)
|
||||
f(`{ "foo" =~ "bar.+" , baz!="a" or x="y"}`, `{foo=~"bar.+",baz!="a" or x="y"}`)
|
||||
f(`{"a b"='c}"d' OR de="aaa"}`, `{"a b"="c}\"d" or de="aaa"}`)
|
||||
f(`{a="b", c="d" or x="y"}`, `{a="b",c="d" or x="y"}`)
|
||||
}
|
||||
|
||||
func TestNewStreamFilterFailure(t *testing.T) {
|
||||
f := func(s string) {
|
||||
t.Helper()
|
||||
sf, err := newStreamFilter(s)
|
||||
if err == nil {
|
||||
t.Fatalf("expecting non-nil error")
|
||||
}
|
||||
if sf != nil {
|
||||
t.Fatalf("expecting nil sf; got %v", sf)
|
||||
}
|
||||
}
|
||||
|
||||
f("")
|
||||
f("}")
|
||||
f("{")
|
||||
f("{foo")
|
||||
f("{foo}")
|
||||
f("{'foo")
|
||||
f("{foo=")
|
||||
f("{foo or bar}")
|
||||
f("{foo=bar")
|
||||
f("{foo=bar baz}")
|
||||
f("{foo='bar' baz='x'}")
|
||||
}
|
||||
|
||||
func TestParseTimeDuration(t *testing.T) {
|
||||
f := func(s string, durationExpected time.Duration) {
|
||||
t.Helper()
|
||||
@ -323,6 +277,10 @@ func TestParseFilterIn(t *testing.T) {
|
||||
f(`:in("foo bar,baz")`, ``, []string{"foo bar,baz"})
|
||||
f(`ip:in(1.2.3.4, 5.6.7.8, 9.10.11.12)`, `ip`, []string{"1.2.3.4", "5.6.7.8", "9.10.11.12"})
|
||||
f(`foo-bar:in(foo,bar-baz.aa"bb","c,)d")`, `foo-bar`, []string{"foo", `bar-baz.aa"bb"`, "c,)d"})
|
||||
|
||||
// verify `in(query)` - it shouldn't set values
|
||||
f(`in(x|fields foo)`, ``, nil)
|
||||
f(`a:in(* | fields bar)`, `a`, nil)
|
||||
}
|
||||
|
||||
func TestParseFilterIPv4Range(t *testing.T) {
|
||||
@ -537,15 +495,25 @@ func TestParseRangeFilter(t *testing.T) {
|
||||
f(`range:range["-1.234e5", "-2e-5"]`, `range`, -1.234e5, -2e-5)
|
||||
|
||||
f(`_msg:range[1, 2]`, `_msg`, 1, 2)
|
||||
f(`:range(1, 2)`, ``, math.Nextafter(1, inf), math.Nextafter(2, -inf))
|
||||
f(`range[1, 2)`, ``, 1, math.Nextafter(2, -inf))
|
||||
f(`range("1", 2]`, ``, math.Nextafter(1, inf), 2)
|
||||
f(`:range(1, 2)`, ``, nextafter(1, inf), nextafter(2, -inf))
|
||||
f(`range[1, 2)`, ``, 1, nextafter(2, -inf))
|
||||
f(`range("1", 2]`, ``, nextafter(1, inf), 2)
|
||||
|
||||
f(`response_size:range[1KB, 10MiB]`, `response_size`, 1_000, 10*(1<<20))
|
||||
f(`response_size:range[1G, 10Ti]`, `response_size`, 1_000_000_000, 10*(1<<40))
|
||||
f(`response_size:range[10, inf]`, `response_size`, 10, inf)
|
||||
|
||||
f(`duration:range[100ns, 1y2w2.5m3s5ms]`, `duration`, 100, 1*nsecsPerYear+2*nsecsPerWeek+2.5*nsecsPerMinute+3*nsecsPerSecond+5*nsecsPerMillisecond)
|
||||
|
||||
f(`foo:>10.43`, `foo`, nextafter(10.43, inf), inf)
|
||||
f(`foo: > -10.43`, `foo`, nextafter(-10.43, inf), inf)
|
||||
f(`foo:>=10.43`, `foo`, 10.43, inf)
|
||||
f(`foo: >= -10.43`, `foo`, -10.43, inf)
|
||||
|
||||
f(`foo:<10.43`, `foo`, -inf, nextafter(10.43, -inf))
|
||||
f(`foo: < -10.43`, `foo`, -inf, nextafter(-10.43, -inf))
|
||||
f(`foo:<=10.43`, `foo`, -inf, 10.43)
|
||||
f(`foo: <= 10.43`, `foo`, -inf, 10.43)
|
||||
}
|
||||
|
||||
func TestParseQuerySuccess(t *testing.T) {
|
||||
@ -723,8 +691,8 @@ func TestParseQuerySuccess(t *testing.T) {
|
||||
f("exact(foo*)", `exact(foo*)`)
|
||||
f("exact('foo bar),|baz')", `exact("foo bar),|baz")`)
|
||||
f("exact('foo bar),|baz'*)", `exact("foo bar),|baz"*)`)
|
||||
f(`exact(foo|b:ar)`, `exact("foo|b:ar")`)
|
||||
f(`foo:exact(foo|b:ar*)`, `foo:exact("foo|b:ar"*)`)
|
||||
f(`exact(foo/b:ar)`, `exact("foo/b:ar")`)
|
||||
f(`foo:exact(foo/b:ar*)`, `foo:exact("foo/b:ar"*)`)
|
||||
|
||||
// i filter
|
||||
f("i(foo)", `i(foo)`)
|
||||
@ -732,14 +700,21 @@ func TestParseQuerySuccess(t *testing.T) {
|
||||
f("i(`foo`* )", `i(foo*)`)
|
||||
f("i(' foo ) bar')", `i(" foo ) bar")`)
|
||||
f("i('foo bar'*)", `i("foo bar"*)`)
|
||||
f(`foo:i(foo:bar-baz|aa+bb)`, `foo:i("foo:bar-baz|aa+bb")`)
|
||||
f(`foo:i(foo:bar-baz/aa+bb)`, `foo:i("foo:bar-baz/aa+bb")`)
|
||||
|
||||
// in filter
|
||||
// in filter with values
|
||||
f(`in()`, `in()`)
|
||||
f(`in(foo)`, `in(foo)`)
|
||||
f(`in(foo, bar)`, `in(foo,bar)`)
|
||||
f(`in("foo bar", baz)`, `in("foo bar",baz)`)
|
||||
f(`foo:in(foo-bar|baz)`, `foo:in("foo-bar|baz")`)
|
||||
f(`foo:in(foo-bar/baz)`, `foo:in("foo-bar/baz")`)
|
||||
|
||||
// in filter with query
|
||||
f(`in(err|fields x)`, `in(err | fields x)`)
|
||||
f(`ip:in(foo and user:in(admin, moderator)|fields ip)`, `ip:in(foo user:in(admin,moderator) | fields ip)`)
|
||||
f(`x:in(_time:5m y:in(*|fields z) | stats by (q) count() rows|fields q)`, `x:in(_time:5m y:in(* | fields z) | stats by (q) count(*) as rows | fields q)`)
|
||||
f(`in(bar:in(1,2,3) | uniq (x)) | stats count() rows`, `in(bar:in(1,2,3) | uniq by (x)) | stats count(*) as rows`)
|
||||
f(`in((1) | fields z) | stats count() rows`, `in(1 | fields z) | stats count(*) as rows`)
|
||||
|
||||
// ipv4_range filter
|
||||
f(`ipv4_range(1.2.3.4, "5.6.7.8")`, `ipv4_range(1.2.3.4, 5.6.7.8)`)
|
||||
@ -768,11 +743,18 @@ func TestParseQuerySuccess(t *testing.T) {
|
||||
f(`range(0x1ff, inf)`, `range(0x1ff, inf)`)
|
||||
f(`range(-INF,+inF)`, `range(-INF, +inF)`)
|
||||
f(`range(1.5K, 22.5GiB)`, `range(1.5K, 22.5GiB)`)
|
||||
f(`foo:range(5,inf)`, `foo:range(5, inf)`)
|
||||
|
||||
// >, >=, < and <= filter
|
||||
f(`foo: > 10.5M`, `foo:>10.5M`)
|
||||
f(`foo: >= 10.5M`, `foo:>=10.5M`)
|
||||
f(`foo: < 10.5M`, `foo:<10.5M`)
|
||||
f(`foo: <= 10.5M`, `foo:<=10.5M`)
|
||||
|
||||
// re filter
|
||||
f("re('foo|ba(r.+)')", `re("foo|ba(r.+)")`)
|
||||
f("re(foo)", `re("foo")`)
|
||||
f(`foo:re(foo-bar|baz.)`, `foo:re("foo-bar|baz.")`)
|
||||
f(`foo:re(foo-bar/baz.)`, `foo:re("foo-bar/baz.")`)
|
||||
|
||||
// seq filter
|
||||
f(`seq()`, `seq()`)
|
||||
@ -829,6 +811,10 @@ func TestParseQuerySuccess(t *testing.T) {
|
||||
// multiple fields pipes
|
||||
f(`foo | fields bar | fields baz, abc`, `foo | fields bar | fields baz, abc`)
|
||||
|
||||
// field_names pipe
|
||||
f(`foo | field_names as x`, `foo | field_names as x`)
|
||||
f(`foo | field_names y`, `foo | field_names as y`)
|
||||
|
||||
// copy and cp pipe
|
||||
f(`* | copy foo as bar`, `* | copy foo as bar`)
|
||||
f(`* | cp foo bar`, `* | copy foo as bar`)
|
||||
@ -966,6 +952,16 @@ func TestParseQuerySuccess(t *testing.T) {
|
||||
f(`* | stats by (_time:week) count() foo`, `* | stats by (_time:week) count(*) as foo`)
|
||||
f(`* | stats by (_time:month) count() foo`, `* | stats by (_time:month) count(*) as foo`)
|
||||
f(`* | stats by (_time:year offset 6.5h) count() foo`, `* | stats by (_time:year offset 6.5h) count(*) as foo`)
|
||||
f(`* | stats (_time:year offset 6.5h) count() foo`, `* | stats by (_time:year offset 6.5h) count(*) as foo`)
|
||||
|
||||
// stats pipe with per-func filters
|
||||
f(`* | stats count() if (foo bar) rows`, `* | stats count(*) if (foo bar) as rows`)
|
||||
f(`* | stats by (_time:1d offset -2h, f2)
|
||||
count() if (is_admin:true or _msg:"foo bar"*) as foo,
|
||||
sum(duration) if (host:in('foo.com', 'bar.com') and path:/foobar) as bar`,
|
||||
`* | stats by (_time:1d offset -2h, f2) count(*) if (is_admin:true or "foo bar"*) as foo, sum(duration) if (host:in(foo.com,bar.com) path:"/foobar") as bar`)
|
||||
f(`* | stats count(x) if (error ip:in(_time:1d | fields ip)) rows`, `* | stats count(x) if (error ip:in(_time:1d | fields ip)) as rows`)
|
||||
f(`* | stats count() if () rows`, `* | stats count(*) if () as rows`)
|
||||
|
||||
// sort pipe
|
||||
f(`* | sort`, `* | sort`)
|
||||
@ -983,6 +979,7 @@ func TestParseQuerySuccess(t *testing.T) {
|
||||
f(`* | sort by (foo desc, bar) desc limit 10`, `* | sort by (foo desc, bar) desc limit 10`)
|
||||
f(`* | sort by (foo desc, bar) desc OFFSET 30 limit 10`, `* | sort by (foo desc, bar) desc offset 30 limit 10`)
|
||||
f(`* | sort by (foo desc, bar) desc limit 10 OFFSET 30`, `* | sort by (foo desc, bar) desc offset 30 limit 10`)
|
||||
f(`* | sort (foo desc, bar) desc limit 10 OFFSET 30`, `* | sort by (foo desc, bar) desc offset 30 limit 10`)
|
||||
|
||||
// uniq pipe
|
||||
f(`* | uniq`, `* | uniq`)
|
||||
@ -991,8 +988,32 @@ func TestParseQuerySuccess(t *testing.T) {
|
||||
f(`* | uniq by(foo,*,bar)`, `* | uniq`)
|
||||
f(`* | uniq by(f1,f2)`, `* | uniq by (f1, f2)`)
|
||||
f(`* | uniq by(f1,f2) limit 10`, `* | uniq by (f1, f2) limit 10`)
|
||||
f(`* | uniq (f1,f2) limit 10`, `* | uniq by (f1, f2) limit 10`)
|
||||
f(`* | uniq limit 10`, `* | uniq limit 10`)
|
||||
|
||||
// filter pipe
|
||||
f(`* | filter error ip:12.3.4.5 or warn`, `* | filter error ip:12.3.4.5 or warn`)
|
||||
f(`foo | stats by (host) count() logs | filter logs:>50 | sort by (logs desc) | limit 10`, `foo | stats by (host) count(*) as logs | filter logs:>50 | sort by (logs desc) | limit 10`)
|
||||
|
||||
// extract pipe
|
||||
f(`* | extract "foo<bar>baz"`, `* | extract "foo<bar>baz"`)
|
||||
f(`* | extract from _msg "foo<bar>baz"`, `* | extract "foo<bar>baz"`)
|
||||
f(`* | extract from '' 'foo<bar>baz'`, `* | extract "foo<bar>baz"`)
|
||||
f("* | extract from x `foo<bar>baz`", `* | extract from x "foo<bar>baz"`)
|
||||
f("* | extract from x foo<bar>baz", `* | extract from x "foo<bar>baz"`)
|
||||
|
||||
// unpack_json pipe
|
||||
f(`* | unpack_json`, `* | unpack_json`)
|
||||
f(`* | unpack_json result_prefix y`, `* | unpack_json result_prefix y`)
|
||||
f(`* | unpack_json from x`, `* | unpack_json from x`)
|
||||
f(`* | unpack_json from x result_prefix y`, `* | unpack_json from x result_prefix y`)
|
||||
|
||||
// unpack_logfmt pipe
|
||||
f(`* | unpack_logfmt`, `* | unpack_logfmt`)
|
||||
f(`* | unpack_logfmt result_prefix y`, `* | unpack_logfmt result_prefix y`)
|
||||
f(`* | unpack_logfmt from x`, `* | unpack_logfmt from x`)
|
||||
f(`* | unpack_logfmt from x result_prefix y`, `* | unpack_logfmt from x result_prefix y`)
|
||||
|
||||
// multiple different pipes
|
||||
f(`* | fields foo, bar | limit 100 | stats by(foo,bar) count(baz) as qwert`, `* | fields foo, bar | limit 100 | stats by (foo, bar) count(baz) as qwert`)
|
||||
f(`* | skip 100 | head 20 | skip 10`, `* | offset 100 | limit 20 | offset 10`)
|
||||
@ -1130,6 +1151,10 @@ func TestParseQueryFailure(t *testing.T) {
|
||||
f(`in(foo, "bar baz"*, abc)`)
|
||||
f(`in(foo bar)`)
|
||||
f(`in(foo, bar`)
|
||||
f(`in(foo|bar)`)
|
||||
f(`in(|foo`)
|
||||
f(`in(x | limit 10)`)
|
||||
f(`in(x | fields a,b)`)
|
||||
|
||||
// invalid ipv4_range
|
||||
f(`ipv4_range(`)
|
||||
@ -1208,6 +1233,18 @@ func TestParseQueryFailure(t *testing.T) {
|
||||
f(`foo | fields bar,`)
|
||||
f(`foo | fields bar,,`)
|
||||
|
||||
// invalid field_names
|
||||
f(`foo | field_names`)
|
||||
f(`foo | field_names |`)
|
||||
f(`foo | field_names (`)
|
||||
f(`foo | field_names )`)
|
||||
f(`foo | field_names ,`)
|
||||
f(`foo | field_names ()`)
|
||||
f(`foo | field_names (x)`)
|
||||
f(`foo | field_names (x,y)`)
|
||||
f(`foo | field_names x y`)
|
||||
f(`foo | field_names x, y`)
|
||||
|
||||
// invalid copy and cp pipe
|
||||
f(`foo | copy`)
|
||||
f(`foo | cp`)
|
||||
@ -1359,6 +1396,39 @@ func TestParseQueryFailure(t *testing.T) {
|
||||
f(`foo | uniq by(a) bar`)
|
||||
f(`foo | uniq by(a) limit -10`)
|
||||
f(`foo | uniq by(a) limit foo`)
|
||||
|
||||
// invalid filter pipe
|
||||
f(`foo | filter`)
|
||||
f(`foo | filter | sort by (x)`)
|
||||
f(`foo | filter (`)
|
||||
f(`foo | filter )`)
|
||||
|
||||
// invalid extract pipe
|
||||
f(`foo | extract`)
|
||||
f(`foo | extract bar`)
|
||||
f(`foo | extract "xy"`)
|
||||
f(`foo | extract "<>"`)
|
||||
f(`foo | extract "foo<>foo"`)
|
||||
f(`foo | extract "foo<>foo<_>bar<*>asdf"`)
|
||||
f(`foo | extract from`)
|
||||
f(`foo | extract from x`)
|
||||
f(`foo | extract from x "abc"`)
|
||||
f(`foo | extract from x "<abc`)
|
||||
f(`foo | extract from x "<abc>" de`)
|
||||
|
||||
// invalid unpack_json pipe
|
||||
f(`foo | unpack_json bar`)
|
||||
f(`foo | unpack_json from`)
|
||||
f(`foo | unpack_json result_prefix`)
|
||||
f(`foo | unpack_json result_prefix x from y`)
|
||||
f(`foo | unpack_json from x result_prefix`)
|
||||
|
||||
// invalid unpack_logfmt pipe
|
||||
f(`foo | unpack_logfmt bar`)
|
||||
f(`foo | unpack_logfmt from`)
|
||||
f(`foo | unpack_logfmt result_prefix`)
|
||||
f(`foo | unpack_logfmt result_prefix x from y`)
|
||||
f(`foo | unpack_logfmt from x result_prefix`)
|
||||
}
|
||||
|
||||
func TestQueryGetNeededColumns(t *testing.T) {
|
||||
@ -1367,8 +1437,9 @@ func TestQueryGetNeededColumns(t *testing.T) {
|
||||
|
||||
q, err := ParseQuery(s)
|
||||
if err != nil {
|
||||
t.Fatalf("cannot parse query %s: %s", s, err)
|
||||
t.Fatalf("cannot parse query [%s]: %s", s, err)
|
||||
}
|
||||
q.Optimize()
|
||||
|
||||
needed, unneeded := q.getNeededColumns()
|
||||
neededColumns := strings.Join(needed, ",")
|
||||
@ -1454,11 +1525,10 @@ func TestQueryGetNeededColumns(t *testing.T) {
|
||||
f(`* | sort by (f1) | sort by (f2,f3 desc) desc | fields f4 | rm f1,f2,f5`, `f1,f2,f3,f4`, ``)
|
||||
|
||||
f(`* | stats by(f1) count(f2) r1, count(f3,f4) r2`, `f1,f2,f3,f4`, ``)
|
||||
f(`* | stats by(f1) count(f2) r1, count(f3,f4) r2 | fields f5,f6`, ``, ``)
|
||||
f(`* | stats by(f1) count(f2) r1, count(f3,f4) r2 | fields f5,f6`, `f1`, ``)
|
||||
f(`* | stats by(f1) count(f2) r1, count(f3,f4) r2 | fields f1,f5`, `f1`, ``)
|
||||
f(`* | stats by(f1) count(f2) r1, count(f3,f4) r2 | fields r1`, `f1,f2`, ``)
|
||||
f(`* | stats by(f1) count(f2) r1, count(f3,f4) r2 | fields r2,r3`, `f1,f3,f4`, ``)
|
||||
f(`_time:5m | stats by(_time:day) count() r1 | stats values(_time) r2`, `_time`, ``)
|
||||
f(`* | stats count(f1) r1 | stats count() r1`, ``, ``)
|
||||
f(`* | stats count(f1) r1 | stats count() r2`, ``, ``)
|
||||
f(`* | stats count(f1) r1 | stats count(r1) r2`, `f1`, ``)
|
||||
@ -1470,12 +1540,60 @@ func TestQueryGetNeededColumns(t *testing.T) {
|
||||
f(`* | stats by(f3,f4) count(f1,f2) r1 | stats count(f2) r1, count(r1) r2 | fields r2`, `f1,f2,f3,f4`, ``)
|
||||
f(`* | stats by(f3,f4) count(f1,f2) r1 | stats count(f3) r1, count(r1) r2 | fields r1`, `f3,f4`, ``)
|
||||
|
||||
f(`_time:5m | stats by(_time:day) count() r1 | stats values(_time) r2`, `_time`, ``)
|
||||
f(`_time:1y | stats (_time:1w) count() r1 | stats count() r2`, `_time`, ``)
|
||||
|
||||
f(`* | uniq`, `*`, ``)
|
||||
f(`* | uniq by (f1,f2)`, `f1,f2`, ``)
|
||||
f(`* | uniq by (f1,f2) | fields f1,f3`, `f1,f2`, ``)
|
||||
f(`* | uniq by (f1,f2) | rm f1,f3`, `f1,f2`, ``)
|
||||
f(`* | uniq by (f1,f2) | fields f3`, `f1,f2`, ``)
|
||||
|
||||
f(`* | filter foo f1:bar`, `*`, ``)
|
||||
f(`* | filter foo f1:bar | fields f2`, `f2`, ``)
|
||||
f(`* | limit 10 | filter foo f1:bar | fields f2`, `_msg,f1,f2`, ``)
|
||||
f(`* | filter foo f1:bar | fields f1`, `f1`, ``)
|
||||
f(`* | filter foo f1:bar | rm f1`, `*`, `f1`)
|
||||
f(`* | limit 10 | filter foo f1:bar | rm f1`, `*`, ``)
|
||||
f(`* | filter foo f1:bar | rm f2`, `*`, `f2`)
|
||||
f(`* | limit 10 | filter foo f1:bar | rm f2`, `*`, `f2`)
|
||||
f(`* | fields x | filter foo f1:bar | rm f2`, `x`, ``)
|
||||
f(`* | fields x,f1 | filter foo f1:bar | rm f2`, `f1,x`, ``)
|
||||
f(`* | rm x,f1 | filter foo f1:bar`, `*`, `f1,x`)
|
||||
|
||||
f(`* | field_names as foo`, `*`, `_time`)
|
||||
f(`* | field_names foo | fields bar`, `*`, `_time`)
|
||||
f(`* | field_names foo | fields foo`, `*`, `_time`)
|
||||
f(`* | field_names foo | rm foo`, `*`, `_time`)
|
||||
f(`* | field_names foo | rm bar`, `*`, `_time`)
|
||||
f(`* | field_names foo | rm _time`, `*`, `_time`)
|
||||
f(`* | fields x,y | field_names as bar | fields baz`, `x,y`, ``)
|
||||
f(`* | rm x,y | field_names as bar | fields baz`, `*`, `x,y`)
|
||||
|
||||
f(`* | extract from s1 "<f1>x<f2>"`, `*`, `f1,f2`)
|
||||
f(`* | extract from s1 "<f1>x<f2>" | fields foo`, `foo`, ``)
|
||||
f(`* | extract from s1 "<f1>x<f2>" | fields foo,s1`, `foo,s1`, ``)
|
||||
f(`* | extract from s1 "<f1>x<f2>" | fields foo,f1`, `foo,s1`, ``)
|
||||
f(`* | extract from s1 "<f1>x<f2>" | fields foo,f1,f2`, `foo,s1`, ``)
|
||||
f(`* | extract from s1 "<f1>x<f2>" | rm foo`, `*`, `f1,f2,foo`)
|
||||
f(`* | extract from s1 "<f1>x<f2>" | rm foo,s1`, `*`, `f1,f2,foo`)
|
||||
f(`* | extract from s1 "<f1>x<f2>" | rm foo,f1`, `*`, `f1,f2,foo`)
|
||||
f(`* | extract from s1 "<f1>x<f2>" | rm foo,f1,f2`, `*`, `f1,f2,foo,s1`)
|
||||
|
||||
f(`* | unpack_json`, `*`, ``)
|
||||
f(`* | unpack_json from s1`, `*`, ``)
|
||||
f(`* | unpack_json from s1 | fields f1`, `f1,s1`, ``)
|
||||
f(`* | unpack_json from s1 | fields s1,f1`, `f1,s1`, ``)
|
||||
f(`* | unpack_json from s1 | rm f1`, `*`, `f1`)
|
||||
f(`* | unpack_json from s1 | rm f1,s1`, `*`, `f1`)
|
||||
|
||||
f(`* | unpack_logfmt`, `*`, ``)
|
||||
f(`* | unpack_logfmt from s1`, `*`, ``)
|
||||
f(`* | unpack_logfmt from s1 | fields f1`, `f1,s1`, ``)
|
||||
f(`* | unpack_logfmt from s1 | fields s1,f1`, `f1,s1`, ``)
|
||||
f(`* | unpack_logfmt from s1 | rm f1`, `*`, `f1`)
|
||||
f(`* | unpack_logfmt from s1 | rm f1,s1`, `*`, `f1`)
|
||||
|
||||
f(`* | rm f1, f2`, `*`, `f1,f2`)
|
||||
f(`* | rm f1, f2 | mv f2 f3`, `*`, `f1,f2,f3`)
|
||||
f(`* | rm f1, f2 | cp f2 f3`, `*`, `f1,f2,f3`)
|
||||
|
@ -67,67 +67,103 @@ func parsePipes(lex *lexer) ([]pipe, error) {
|
||||
if !lex.isKeyword("|") {
|
||||
return nil, fmt.Errorf("expecting '|'; got %q", lex.token)
|
||||
}
|
||||
if !lex.mustNextToken() {
|
||||
return nil, fmt.Errorf("missing token after '|'")
|
||||
}
|
||||
switch {
|
||||
case lex.isKeyword("stats"):
|
||||
ps, err := parsePipeStats(lex)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cannot parse 'stats' pipe: %w", err)
|
||||
}
|
||||
pipes = append(pipes, ps)
|
||||
case lex.isKeyword("sort"):
|
||||
ps, err := parsePipeSort(lex)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cannot parse 'sort' pipe: %w", err)
|
||||
}
|
||||
pipes = append(pipes, ps)
|
||||
case lex.isKeyword("uniq"):
|
||||
pu, err := parsePipeUniq(lex)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cannot parse 'uniq' pipe: %w", err)
|
||||
}
|
||||
pipes = append(pipes, pu)
|
||||
case lex.isKeyword("limit", "head"):
|
||||
pl, err := parsePipeLimit(lex)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cannot parse 'limit' pipe: %w", err)
|
||||
}
|
||||
pipes = append(pipes, pl)
|
||||
case lex.isKeyword("offset", "skip"):
|
||||
ps, err := parsePipeOffset(lex)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cannot parse 'offset' pipe: %w", err)
|
||||
}
|
||||
pipes = append(pipes, ps)
|
||||
case lex.isKeyword("fields"):
|
||||
pf, err := parsePipeFields(lex)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cannot parse 'fields' pipe: %w", err)
|
||||
}
|
||||
pipes = append(pipes, pf)
|
||||
case lex.isKeyword("copy", "cp"):
|
||||
pc, err := parsePipeCopy(lex)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cannot parse 'copy' pipe: %w", err)
|
||||
}
|
||||
pipes = append(pipes, pc)
|
||||
case lex.isKeyword("rename", "mv"):
|
||||
pr, err := parsePipeRename(lex)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cannot parse 'rename' pipe: %w", err)
|
||||
}
|
||||
pipes = append(pipes, pr)
|
||||
case lex.isKeyword("delete", "del", "rm"):
|
||||
pd, err := parsePipeDelete(lex)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cannot parse 'delete' pipe: %w", err)
|
||||
}
|
||||
pipes = append(pipes, pd)
|
||||
default:
|
||||
return nil, fmt.Errorf("unexpected pipe %q", lex.token)
|
||||
lex.nextToken()
|
||||
p, err := parsePipe(lex)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
pipes = append(pipes, p)
|
||||
}
|
||||
return pipes, nil
|
||||
}
|
||||
|
||||
func parsePipe(lex *lexer) (pipe, error) {
|
||||
switch {
|
||||
case lex.isKeyword("copy", "cp"):
|
||||
pc, err := parsePipeCopy(lex)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cannot parse 'copy' pipe: %w", err)
|
||||
}
|
||||
return pc, nil
|
||||
case lex.isKeyword("delete", "del", "rm"):
|
||||
pd, err := parsePipeDelete(lex)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cannot parse 'delete' pipe: %w", err)
|
||||
}
|
||||
return pd, nil
|
||||
case lex.isKeyword("extract"):
|
||||
pe, err := parsePipeExtract(lex)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cannot parse 'extract' pipe: %w", err)
|
||||
}
|
||||
return pe, nil
|
||||
case lex.isKeyword("field_names"):
|
||||
pf, err := parsePipeFieldNames(lex)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cannot parse 'field_names' pipe: %w", err)
|
||||
}
|
||||
return pf, nil
|
||||
case lex.isKeyword("fields"):
|
||||
pf, err := parsePipeFields(lex)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cannot parse 'fields' pipe: %w", err)
|
||||
}
|
||||
return pf, nil
|
||||
case lex.isKeyword("filter"):
|
||||
pf, err := parsePipeFilter(lex)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cannot parse 'filter' pipe: %w", err)
|
||||
}
|
||||
return pf, nil
|
||||
case lex.isKeyword("limit", "head"):
|
||||
pl, err := parsePipeLimit(lex)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cannot parse 'limit' pipe: %w", err)
|
||||
}
|
||||
return pl, nil
|
||||
case lex.isKeyword("offset", "skip"):
|
||||
ps, err := parsePipeOffset(lex)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cannot parse 'offset' pipe: %w", err)
|
||||
}
|
||||
return ps, nil
|
||||
case lex.isKeyword("rename", "mv"):
|
||||
pr, err := parsePipeRename(lex)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cannot parse 'rename' pipe: %w", err)
|
||||
}
|
||||
return pr, nil
|
||||
case lex.isKeyword("sort"):
|
||||
ps, err := parsePipeSort(lex)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cannot parse 'sort' pipe: %w", err)
|
||||
}
|
||||
return ps, nil
|
||||
case lex.isKeyword("stats"):
|
||||
ps, err := parsePipeStats(lex)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cannot parse 'stats' pipe: %w", err)
|
||||
}
|
||||
return ps, nil
|
||||
case lex.isKeyword("uniq"):
|
||||
pu, err := parsePipeUniq(lex)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cannot parse 'uniq' pipe: %w", err)
|
||||
}
|
||||
return pu, nil
|
||||
case lex.isKeyword("unpack_json"):
|
||||
pu, err := parsePipeUnpackJSON(lex)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cannot parse 'unpack_json' pipe: %w", err)
|
||||
}
|
||||
return pu, nil
|
||||
case lex.isKeyword("unpack_logfmt"):
|
||||
pu, err := parsePipeUnpackLogfmt(lex)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cannot parse 'unpack_logfmt' pipe: %w", err)
|
||||
}
|
||||
return pu, nil
|
||||
default:
|
||||
return nil, fmt.Errorf("unexpected pipe %q", lex.token)
|
||||
}
|
||||
}
|
||||
|
@ -40,7 +40,7 @@ func (pc *pipeCopy) updateNeededFields(neededFields, unneededFields fieldsSet) {
|
||||
}
|
||||
if neededFields.contains("*") {
|
||||
// update only unneeded fields
|
||||
unneededFields.addAll(pc.dstFields)
|
||||
unneededFields.addFields(pc.dstFields)
|
||||
for i, srcField := range pc.srcFields {
|
||||
if neededSrcFields[i] {
|
||||
unneededFields.remove(srcField)
|
||||
@ -48,7 +48,7 @@ func (pc *pipeCopy) updateNeededFields(neededFields, unneededFields fieldsSet) {
|
||||
}
|
||||
} else {
|
||||
// update only needed fields and reset unneeded fields
|
||||
neededFields.removeAll(pc.dstFields)
|
||||
neededFields.removeFields(pc.dstFields)
|
||||
for i, srcField := range pc.srcFields {
|
||||
if neededSrcFields[i] {
|
||||
neededFields.add(srcField)
|
||||
|
@ -6,20 +6,9 @@ import (
|
||||
)
|
||||
|
||||
func TestPipeCopyUpdateNeededFields(t *testing.T) {
|
||||
f := func(s string, neededFields, unneededFields, neededFieldsExpected, unneededFieldsExpected string) {
|
||||
f := func(s, neededFields, unneededFields, neededFieldsExpected, unneededFieldsExpected string) {
|
||||
t.Helper()
|
||||
|
||||
nfs := newTestFieldsSet(neededFields)
|
||||
unfs := newTestFieldsSet(unneededFields)
|
||||
|
||||
lex := newLexer(s)
|
||||
p, err := parsePipeCopy(lex)
|
||||
if err != nil {
|
||||
t.Fatalf("cannot parse %s: %s", s, err)
|
||||
}
|
||||
p.updateNeededFields(nfs, unfs)
|
||||
|
||||
assertNeededFields(t, nfs, unfs, neededFieldsExpected, unneededFieldsExpected)
|
||||
expectPipeNeededFields(t, s, neededFields, unneededFields, neededFieldsExpected, unneededFieldsExpected)
|
||||
}
|
||||
|
||||
// all the needed fields
|
||||
@ -53,6 +42,22 @@ func TestPipeCopyUpdateNeededFields(t *testing.T) {
|
||||
f("copy s1 d1, s2 d2", "s2,d1,f1,f2", "", "s1,s2,f1,f2", "")
|
||||
}
|
||||
|
||||
func expectPipeNeededFields(t *testing.T, s, neededFields, unneededFields, neededFieldsExpected, unneededFieldsExpected string) {
|
||||
t.Helper()
|
||||
|
||||
nfs := newTestFieldsSet(neededFields)
|
||||
unfs := newTestFieldsSet(unneededFields)
|
||||
|
||||
lex := newLexer(s)
|
||||
p, err := parsePipe(lex)
|
||||
if err != nil {
|
||||
t.Fatalf("cannot parse %s: %s", s, err)
|
||||
}
|
||||
p.updateNeededFields(nfs, unfs)
|
||||
|
||||
assertNeededFields(t, nfs, unfs, neededFieldsExpected, unneededFieldsExpected)
|
||||
}
|
||||
|
||||
func assertNeededFields(t *testing.T, nfs, unfs fieldsSet, neededFieldsExpected, unneededFieldsExpected string) {
|
||||
t.Helper()
|
||||
|
||||
@ -75,7 +80,7 @@ func assertNeededFields(t *testing.T, nfs, unfs fieldsSet, neededFieldsExpected,
|
||||
func newTestFieldsSet(fields string) fieldsSet {
|
||||
fs := newFieldsSet()
|
||||
if fields != "" {
|
||||
fs.addAll(strings.Split(fields, ","))
|
||||
fs.addFields(strings.Split(fields, ","))
|
||||
}
|
||||
return fs
|
||||
}
|
||||
|
@ -25,10 +25,10 @@ func (pd *pipeDelete) String() string {
|
||||
func (pd *pipeDelete) updateNeededFields(neededFields, unneededFields fieldsSet) {
|
||||
if neededFields.contains("*") {
|
||||
// update only unneeded fields
|
||||
unneededFields.addAll(pd.fields)
|
||||
unneededFields.addFields(pd.fields)
|
||||
} else {
|
||||
// update only needed fields
|
||||
neededFields.removeAll(pd.fields)
|
||||
neededFields.removeFields(pd.fields)
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -7,18 +7,7 @@ import (
|
||||
func TestPipeDeleteUpdateNeededFields(t *testing.T) {
|
||||
f := func(s, neededFields, unneededFields, neededFieldsExpected, unneededFieldsExpected string) {
|
||||
t.Helper()
|
||||
|
||||
nfs := newTestFieldsSet(neededFields)
|
||||
unfs := newTestFieldsSet(unneededFields)
|
||||
|
||||
lex := newLexer(s)
|
||||
p, err := parsePipeDelete(lex)
|
||||
if err != nil {
|
||||
t.Fatalf("cannot parse %s: %s", s, err)
|
||||
}
|
||||
p.updateNeededFields(nfs, unfs)
|
||||
|
||||
assertNeededFields(t, nfs, unfs, neededFieldsExpected, unneededFieldsExpected)
|
||||
expectPipeNeededFields(t, s, neededFields, unneededFields, neededFieldsExpected, unneededFieldsExpected)
|
||||
}
|
||||
|
||||
// all the needed fields
|
||||
|
357
lib/logstorage/pipe_extract.go
Normal file
357
lib/logstorage/pipe_extract.go
Normal file
@ -0,0 +1,357 @@
|
||||
package logstorage
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"html"
|
||||
"strconv"
|
||||
"strings"
|
||||
"unsafe"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
||||
)
|
||||
|
||||
// pipeExtract processes '| extract from <field> <pattern>' pipe.
|
||||
//
|
||||
// See https://docs.victoriametrics.com/victorialogs/logsql/#extract-pipe
|
||||
type pipeExtract struct {
|
||||
fromField string
|
||||
steps []extractFormatStep
|
||||
|
||||
pattern string
|
||||
}
|
||||
|
||||
func (pe *pipeExtract) String() string {
|
||||
s := "extract"
|
||||
if !isMsgFieldName(pe.fromField) {
|
||||
s += " from " + quoteTokenIfNeeded(pe.fromField)
|
||||
}
|
||||
s += " " + quoteTokenIfNeeded(pe.pattern)
|
||||
return s
|
||||
}
|
||||
|
||||
func (pe *pipeExtract) updateNeededFields(neededFields, unneededFields fieldsSet) {
|
||||
if neededFields.contains("*") {
|
||||
unneededFieldsOrig := unneededFields.clone()
|
||||
needFromField := false
|
||||
for _, step := range pe.steps {
|
||||
if step.field != "" {
|
||||
if !unneededFieldsOrig.contains(step.field) {
|
||||
needFromField = true
|
||||
}
|
||||
unneededFields.add(step.field)
|
||||
}
|
||||
}
|
||||
if needFromField {
|
||||
unneededFields.remove(pe.fromField)
|
||||
} else {
|
||||
unneededFields.add(pe.fromField)
|
||||
}
|
||||
} else {
|
||||
needFromField := false
|
||||
for _, step := range pe.steps {
|
||||
if step.field != "" && neededFields.contains(step.field) {
|
||||
needFromField = true
|
||||
neededFields.remove(step.field)
|
||||
}
|
||||
}
|
||||
if needFromField {
|
||||
neededFields.add(pe.fromField)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (pe *pipeExtract) newPipeProcessor(workersCount int, _ <-chan struct{}, _ func(), ppBase pipeProcessor) pipeProcessor {
|
||||
shards := make([]pipeExtractProcessorShard, workersCount)
|
||||
for i := range shards {
|
||||
ef := newExtractFormat(pe.steps)
|
||||
rcs := make([]resultColumn, len(ef.fields))
|
||||
for j := range rcs {
|
||||
rcs[j].name = ef.fields[j].name
|
||||
}
|
||||
shards[i] = pipeExtractProcessorShard{
|
||||
pipeExtractProcessorShardNopad: pipeExtractProcessorShardNopad{
|
||||
ef: ef,
|
||||
rcs: rcs,
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
pep := &pipeExtractProcessor{
|
||||
pe: pe,
|
||||
ppBase: ppBase,
|
||||
|
||||
shards: shards,
|
||||
}
|
||||
return pep
|
||||
}
|
||||
|
||||
type pipeExtractProcessor struct {
|
||||
pe *pipeExtract
|
||||
ppBase pipeProcessor
|
||||
|
||||
shards []pipeExtractProcessorShard
|
||||
}
|
||||
|
||||
type pipeExtractProcessorShard struct {
|
||||
pipeExtractProcessorShardNopad
|
||||
|
||||
// The padding prevents false sharing on widespread platforms with 128 mod (cache line size) = 0 .
|
||||
_ [128 - unsafe.Sizeof(pipeExtractProcessorShardNopad{})%128]byte
|
||||
}
|
||||
|
||||
type pipeExtractProcessorShardNopad struct {
|
||||
ef *extractFormat
|
||||
|
||||
rcs []resultColumn
|
||||
}
|
||||
|
||||
func (pep *pipeExtractProcessor) writeBlock(workerID uint, br *blockResult) {
|
||||
if len(br.timestamps) == 0 {
|
||||
return
|
||||
}
|
||||
|
||||
shard := &pep.shards[workerID]
|
||||
ef := shard.ef
|
||||
rcs := shard.rcs
|
||||
|
||||
c := br.getColumnByName(pep.pe.fromField)
|
||||
if c.isConst {
|
||||
v := c.valuesEncoded[0]
|
||||
ef.apply(v)
|
||||
for i, f := range ef.fields {
|
||||
fieldValue := *f.value
|
||||
rc := &rcs[i]
|
||||
for range br.timestamps {
|
||||
rc.addValue(fieldValue)
|
||||
}
|
||||
}
|
||||
} else {
|
||||
values := c.getValues(br)
|
||||
for i, v := range values {
|
||||
if i == 0 || values[i-1] != v {
|
||||
ef.apply(v)
|
||||
}
|
||||
for j, f := range ef.fields {
|
||||
rcs[j].addValue(*f.value)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
br.addResultColumns(rcs)
|
||||
pep.ppBase.writeBlock(workerID, br)
|
||||
|
||||
for i := range rcs {
|
||||
rcs[i].resetValues()
|
||||
}
|
||||
}
|
||||
|
||||
func (pep *pipeExtractProcessor) flush() error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func parsePipeExtract(lex *lexer) (*pipeExtract, error) {
|
||||
if !lex.isKeyword("extract") {
|
||||
return nil, fmt.Errorf("unexpected token: %q; want %q", lex.token, "extract")
|
||||
}
|
||||
lex.nextToken()
|
||||
|
||||
fromField := "_msg"
|
||||
if lex.isKeyword("from") {
|
||||
lex.nextToken()
|
||||
f, err := parseFieldName(lex)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cannot parse 'from' field name: %w", err)
|
||||
}
|
||||
fromField = f
|
||||
}
|
||||
|
||||
pattern, err := getCompoundToken(lex)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cannot read 'pattern': %w", err)
|
||||
}
|
||||
steps, err := parseExtractFormatSteps(pattern)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cannot parse 'pattern' %q: %w", pattern, err)
|
||||
}
|
||||
|
||||
pe := &pipeExtract{
|
||||
fromField: fromField,
|
||||
steps: steps,
|
||||
pattern: pattern,
|
||||
}
|
||||
return pe, nil
|
||||
}
|
||||
|
||||
type extractFormat struct {
|
||||
// steps contains steps for extracting fields from string
|
||||
steps []extractFormatStep
|
||||
|
||||
// matches contains matches for every step in steps
|
||||
matches []string
|
||||
|
||||
// fields contains matches for non-empty fields
|
||||
fields []extractField
|
||||
}
|
||||
|
||||
type extractField struct {
|
||||
name string
|
||||
value *string
|
||||
}
|
||||
|
||||
type extractFormatStep struct {
|
||||
prefix string
|
||||
field string
|
||||
}
|
||||
|
||||
func newExtractFormat(steps []extractFormatStep) *extractFormat {
|
||||
if len(steps) == 0 {
|
||||
logger.Panicf("BUG: steps cannot be empty")
|
||||
}
|
||||
|
||||
matches := make([]string, len(steps))
|
||||
|
||||
var fields []extractField
|
||||
for i, step := range steps {
|
||||
if step.field != "" {
|
||||
fields = append(fields, extractField{
|
||||
name: step.field,
|
||||
value: &matches[i],
|
||||
})
|
||||
}
|
||||
}
|
||||
if len(fields) == 0 {
|
||||
logger.Panicf("BUG: fields cannot be empty")
|
||||
}
|
||||
|
||||
ef := &extractFormat{
|
||||
steps: steps,
|
||||
matches: matches,
|
||||
fields: fields,
|
||||
}
|
||||
return ef
|
||||
}
|
||||
|
||||
func (ef *extractFormat) apply(s string) {
|
||||
clear(ef.matches)
|
||||
|
||||
steps := ef.steps
|
||||
|
||||
if prefix := steps[0].prefix; prefix != "" {
|
||||
n := strings.Index(s, prefix)
|
||||
if n < 0 {
|
||||
// Mismatch
|
||||
return
|
||||
}
|
||||
s = s[n+len(prefix):]
|
||||
}
|
||||
|
||||
matches := ef.matches
|
||||
for i := range steps {
|
||||
nextPrefix := ""
|
||||
if i+1 < len(steps) {
|
||||
nextPrefix = steps[i+1].prefix
|
||||
}
|
||||
|
||||
us, nOffset := tryUnquoteString(s)
|
||||
if nOffset >= 0 {
|
||||
// Matched quoted string
|
||||
matches[i] = us
|
||||
s = s[nOffset:]
|
||||
if !strings.HasPrefix(s, nextPrefix) {
|
||||
// Mismatch
|
||||
return
|
||||
}
|
||||
s = s[len(nextPrefix):]
|
||||
} else {
|
||||
// Match unquoted string until the nextPrefix
|
||||
if nextPrefix == "" {
|
||||
matches[i] = s
|
||||
return
|
||||
}
|
||||
n := strings.Index(s, nextPrefix)
|
||||
if n < 0 {
|
||||
// Mismatch
|
||||
return
|
||||
}
|
||||
matches[i] = s[:n]
|
||||
s = s[n+len(nextPrefix):]
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func tryUnquoteString(s string) (string, int) {
|
||||
if len(s) == 0 {
|
||||
return s, -1
|
||||
}
|
||||
if s[0] != '"' && s[0] != '`' {
|
||||
return s, -1
|
||||
}
|
||||
qp, err := strconv.QuotedPrefix(s)
|
||||
if err != nil {
|
||||
return s, -1
|
||||
}
|
||||
us, err := strconv.Unquote(qp)
|
||||
if err != nil {
|
||||
return s, -1
|
||||
}
|
||||
return us, len(qp)
|
||||
}
|
||||
|
||||
func parseExtractFormatSteps(s string) ([]extractFormatStep, error) {
|
||||
var steps []extractFormatStep
|
||||
|
||||
hasNamedField := false
|
||||
|
||||
n := strings.IndexByte(s, '<')
|
||||
if n < 0 {
|
||||
return nil, fmt.Errorf("missing <...> fields")
|
||||
}
|
||||
prefix := s[:n]
|
||||
s = s[n+1:]
|
||||
for {
|
||||
n := strings.IndexByte(s, '>')
|
||||
if n < 0 {
|
||||
return nil, fmt.Errorf("missing '>' for <%s", s)
|
||||
}
|
||||
field := s[:n]
|
||||
s = s[n+1:]
|
||||
|
||||
if field == "_" || field == "*" {
|
||||
field = ""
|
||||
}
|
||||
steps = append(steps, extractFormatStep{
|
||||
prefix: prefix,
|
||||
field: field,
|
||||
})
|
||||
if !hasNamedField && field != "" {
|
||||
hasNamedField = true
|
||||
}
|
||||
if len(s) == 0 {
|
||||
break
|
||||
}
|
||||
|
||||
n = strings.IndexByte(s, '<')
|
||||
if n < 0 {
|
||||
steps = append(steps, extractFormatStep{
|
||||
prefix: s,
|
||||
})
|
||||
break
|
||||
}
|
||||
if n == 0 {
|
||||
return nil, fmt.Errorf("missing delimiter after <%s>", field)
|
||||
}
|
||||
prefix = s[:n]
|
||||
s = s[n+1:]
|
||||
}
|
||||
|
||||
if !hasNamedField {
|
||||
return nil, fmt.Errorf("missing named fields like <name>")
|
||||
}
|
||||
|
||||
for i := range steps {
|
||||
step := &steps[i]
|
||||
step.prefix = html.UnescapeString(step.prefix)
|
||||
}
|
||||
|
||||
return steps, nil
|
||||
}
|
213
lib/logstorage/pipe_extract_test.go
Normal file
213
lib/logstorage/pipe_extract_test.go
Normal file
@ -0,0 +1,213 @@
|
||||
package logstorage
|
||||
|
||||
import (
|
||||
"reflect"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestExtractFormatApply(t *testing.T) {
|
||||
f := func(pattern, s string, resultsExpected []string) {
|
||||
t.Helper()
|
||||
|
||||
steps, err := parseExtractFormatSteps(pattern)
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error: %s", err)
|
||||
}
|
||||
ef := newExtractFormat(steps)
|
||||
ef.apply(s)
|
||||
|
||||
if len(ef.fields) != len(resultsExpected) {
|
||||
t.Fatalf("unexpected number of results; got %d; want %d", len(ef.fields), len(resultsExpected))
|
||||
}
|
||||
for i, f := range ef.fields {
|
||||
if v := *f.value; v != resultsExpected[i] {
|
||||
t.Fatalf("unexpected value for field %q; got %q; want %q", f.name, v, resultsExpected[i])
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
f("<foo>", "", []string{""})
|
||||
f("<foo>", "abc", []string{"abc"})
|
||||
f("<foo>bar", "", []string{""})
|
||||
f("<foo>bar", "bar", []string{""})
|
||||
f("<foo>bar", "bazbar", []string{"baz"})
|
||||
f("<foo>bar", "a bazbar xdsf", []string{"a baz"})
|
||||
f("<foo>bar<>", "a bazbar xdsf", []string{"a baz"})
|
||||
f("<foo>bar<>x", "a bazbar xdsf", []string{"a baz"})
|
||||
f("foo<bar>", "", []string{""})
|
||||
f("foo<bar>", "foo", []string{""})
|
||||
f("foo<bar>", "a foo xdf sdf", []string{" xdf sdf"})
|
||||
f("foo<bar>", "a foo foobar", []string{" foobar"})
|
||||
f("foo<bar>baz", "a foo foobar", []string{""})
|
||||
f("foo<bar>baz", "a foobaz bar", []string{""})
|
||||
f("foo<bar>baz", "a foo foobar baz", []string{" foobar "})
|
||||
f("foo<bar>baz", "a foo foobar bazabc", []string{" foobar "})
|
||||
|
||||
f("ip=<ip> <> path=<path> ", "x=a, ip=1.2.3.4 method=GET host='abc' path=/foo/bar some tail here", []string{"1.2.3.4", "/foo/bar"})
|
||||
|
||||
// escaped pattern
|
||||
f("ip=<<ip>>", "foo ip=<1.2.3.4> bar", []string{"1.2.3.4"})
|
||||
f("ip=<<ip>>", "foo ip=<foo&bar> bar", []string{"foo&bar"})
|
||||
|
||||
// quoted fields
|
||||
f(`"msg":<msg>,`, `{"foo":"bar","msg":"foo,b\"ar\n\t","baz":"x"}`, []string{`foo,b"ar` + "\n\t"})
|
||||
f(`foo=<bar>`, "foo=`bar baz,abc` def", []string{"bar baz,abc"})
|
||||
f(`foo=<bar> `, "foo=`bar baz,abc` def", []string{"bar baz,abc"})
|
||||
f(`<foo>`, `"foo,\"bar"`, []string{`foo,"bar`})
|
||||
f(`<foo>,"bar`, `"foo,\"bar"`, []string{`foo,"bar`})
|
||||
}
|
||||
|
||||
func TestParseExtractFormatStepsSuccess(t *testing.T) {
|
||||
f := func(s string, stepsExpected []extractFormatStep) {
|
||||
t.Helper()
|
||||
|
||||
steps, err := parseExtractFormatSteps(s)
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error when parsing %q: %s", s, err)
|
||||
}
|
||||
if !reflect.DeepEqual(steps, stepsExpected) {
|
||||
t.Fatalf("unexpected steps for [%s]; got %v; want %v", s, steps, stepsExpected)
|
||||
}
|
||||
}
|
||||
|
||||
f("<foo>", []extractFormatStep{
|
||||
{
|
||||
field: "foo",
|
||||
},
|
||||
})
|
||||
f("<foo>bar", []extractFormatStep{
|
||||
{
|
||||
field: "foo",
|
||||
},
|
||||
{
|
||||
prefix: "bar",
|
||||
},
|
||||
})
|
||||
f("<>bar<foo>", []extractFormatStep{
|
||||
{},
|
||||
{
|
||||
prefix: "bar",
|
||||
field: "foo",
|
||||
},
|
||||
})
|
||||
f("bar<foo>", []extractFormatStep{
|
||||
{
|
||||
prefix: "bar",
|
||||
field: "foo",
|
||||
},
|
||||
})
|
||||
f("bar<foo>abc", []extractFormatStep{
|
||||
{
|
||||
prefix: "bar",
|
||||
field: "foo",
|
||||
},
|
||||
{
|
||||
prefix: "abc",
|
||||
},
|
||||
})
|
||||
f("bar<foo>abc<_>", []extractFormatStep{
|
||||
{
|
||||
prefix: "bar",
|
||||
field: "foo",
|
||||
},
|
||||
{
|
||||
prefix: "abc",
|
||||
},
|
||||
})
|
||||
f("<foo>bar<baz>", []extractFormatStep{
|
||||
{
|
||||
field: "foo",
|
||||
},
|
||||
{
|
||||
prefix: "bar",
|
||||
field: "baz",
|
||||
},
|
||||
})
|
||||
f("bar<foo>baz", []extractFormatStep{
|
||||
{
|
||||
prefix: "bar",
|
||||
field: "foo",
|
||||
},
|
||||
{
|
||||
prefix: "baz",
|
||||
},
|
||||
})
|
||||
f("<<foo>&gt;", []extractFormatStep{
|
||||
{
|
||||
prefix: "<",
|
||||
field: "foo",
|
||||
},
|
||||
{
|
||||
prefix: ">",
|
||||
},
|
||||
})
|
||||
}
|
||||
|
||||
func TestParseExtractFormatStepFailure(t *testing.T) {
|
||||
f := func(s string) {
|
||||
t.Helper()
|
||||
|
||||
_, err := parseExtractFormatSteps(s)
|
||||
if err == nil {
|
||||
t.Fatalf("expecting non-nil error when parsing %q", s)
|
||||
}
|
||||
}
|
||||
|
||||
// empty string
|
||||
f("")
|
||||
|
||||
// zero fields
|
||||
f("foobar")
|
||||
|
||||
// Zero named fields
|
||||
f("<>")
|
||||
f("foo<>")
|
||||
f("<>foo")
|
||||
f("foo<_>bar<*>baz<>xxx")
|
||||
|
||||
// missing delimiter between fields
|
||||
f("<foo><bar>")
|
||||
f("<><bar>")
|
||||
f("<foo><>")
|
||||
f("bb<foo><><bar>aa")
|
||||
f("aa<foo><bar>")
|
||||
f("aa<foo><bar>bb")
|
||||
|
||||
// missing >
|
||||
f("<foo")
|
||||
f("foo<bar")
|
||||
}
|
||||
|
||||
func TestPipeExtractUpdateNeededFields(t *testing.T) {
|
||||
f := func(s string, neededFields, unneededFields, neededFieldsExpected, unneededFieldsExpected string) {
|
||||
t.Helper()
|
||||
expectPipeNeededFields(t, s, neededFields, unneededFields, neededFieldsExpected, unneededFieldsExpected)
|
||||
}
|
||||
|
||||
// all the needed fields
|
||||
f("extract from x '<foo>'", "*", "", "*", "foo")
|
||||
|
||||
// all the needed fields, unneeded fields do not intersect with fromField and output fields
|
||||
f("extract from x '<foo>'", "*", "f1,f2", "*", "f1,f2,foo")
|
||||
|
||||
// all the needed fields, unneeded fields intersect with fromField
|
||||
f("extract from x '<foo>'", "*", "f2,x", "*", "f2,foo")
|
||||
|
||||
// all the needed fields, unneeded fields intersect with output fields
|
||||
f("extract from x '<foo>x<bar>'", "*", "f2,foo", "*", "bar,f2,foo")
|
||||
|
||||
// all the needed fields, unneeded fields intersect with all the output fields
|
||||
f("extract from x '<foo>x<bar>'", "*", "f2,foo,bar", "*", "bar,f2,foo,x")
|
||||
|
||||
// needed fields do not intersect with fromField and output fields
|
||||
f("extract from x '<foo>x<bar>'", "f1,f2", "", "f1,f2", "")
|
||||
|
||||
// needed fields intersect with fromField
|
||||
f("extract from x '<foo>x<bar>'", "f2,x", "", "f2,x", "")
|
||||
|
||||
// needed fields intersect with output fields
|
||||
f("extract from x '<foo>x<bar>'", "f2,foo", "", "f2,x", "")
|
||||
|
||||
// needed fields intersect with fromField and output fields
|
||||
f("extract from x '<foo>x<bar>'", "f2,foo,x,y", "", "f2,x,y", "")
|
||||
}
|
80
lib/logstorage/pipe_extract_timing_test.go
Normal file
80
lib/logstorage/pipe_extract_timing_test.go
Normal file
@ -0,0 +1,80 @@
|
||||
package logstorage
|
||||
|
||||
import (
|
||||
"testing"
|
||||
)
|
||||
|
||||
func BenchmarkExtractFormatApply(b *testing.B) {
|
||||
a := []string{
|
||||
`{"level":"error","ts":1716113701.63973,"caller":"gcm/export.go:498","msg":"Failed to export self-observability metrics to Cloud Monitoring","error":"rpc error: code = PermissionDenied desc = Permission monitoring.timeSeries.create denied (or the resource may not exist).","stacktrace":"google3/cloud/kubernetes/metrics/common/gcm/gcm.(*exporter).startSelfObservability\n\tcloud/kubernetes/metrics/common/gcm/export.go:498","foo":"bar"}`,
|
||||
`{"level":"error","ts":1716113370.2321634,"caller":"gcm/export.go:434","msg":"Failed to export metrics to Cloud Monitoring","error":"rpc error: code = PermissionDenied desc = Permission monitoring.timeSeries.create denied (or the resource may not exist).","stacktrace":"google3/cloud/kubernetes/metrics/common/gcm/gcm.(*exporter).exportBuffer\n\tcloud/kubernetes/metrics/common/gcm/export.go:434\ngoogle3/cloud/kubernetes/metrics/common/gcm/gcm.(*exporter).flush\n\tcloud/kubernetes/metrics/common/gcm/export.go:383\ngoogle3/cloud/kubernetes/metrics/common/gcm/gcm.(*exporter).Flush\n\tcloud/kubernetes/metrics/common/gcm/export.go:365\ngoogle3/cloud/kubernetes/metrics/components/collector/adapter/adapter.(*adapter).Finalize\n\tcloud/kubernetes/metrics/components/collector/adapter/consume.go:131\ngoogle3/cloud/kubernetes/metrics/components/collector/prometheus/prometheus.(*parser).ParseText\n\tcloud/kubernetes/metrics/components/collector/prometheus/parse.go:158\ngoogle3/cloud/kubernetes/metrics/components/collector/collector.runScrapeLoop\n\tcloud/kubernetes/metrics/components/collector/collector.go:103\ngoogle3/cloud/kubernetes/metrics/components/collector/collector.Run\n\tcloud/kubernetes/metrics/components/collector/collector.go:81\ngoogle3/cloud/kubernetes/metrics/components/collector/collector.Start.func1\n\tcloud/kubernetes/metrics/components/collector/multi_target_collector.go:45","foo":"bar"}`,
|
||||
`{"level":"error","ts":1716113127.7496774,"caller":"collector/collector.go:105","msg":"Failed to process metrics","scrape_target":"http://localhost:8093/metrics","error":"failed to finalize exporting: \"2 errors occurred:\\n\\t* failed to export 1 (out of 1) batches of metrics to Cloud Monitoring\\n\\t* failed to export 1 (out of 1) batches of metrics to Cloud Monitoring\\n\\n\"","stacktrace":"google3/cloud/kubernetes/metrics/components/collector/collector.runScrapeLoop\n\tcloud/kubernetes/metrics/components/collector/collector.go:105\ngoogle3/cloud/kubernetes/metrics/components/collector/collector.Run\n\tcloud/kubernetes/metrics/components/collector/collector.go:81\ngoogle3/cloud/kubernetes/metrics/components/collector/collector.Start.func1\n\tcloud/kubernetes/metrics/components/collector/multi_target_collector.go:45","foo":"bar"}`,
|
||||
`{"level":"error","ts":1716113547.6429873,"caller":"gcm/export.go:498","msg":"Failed to export self-observability metrics to Cloud Monitoring","error":"rpc error: code = PermissionDenied desc = Permission monitoring.timeSeries.create denied (or the resource may not exist).","stacktrace":"google3/cloud/kubernetes/metrics/common/gcm/gcm.(*exporter).startSelfObservability\n\tcloud/kubernetes/metrics/common/gcm/export.go:498", "foo":"bar"}`,
|
||||
`{"level":"error","ts":1716113541.4445803,"caller":"periodicexporter/periodic_exporter.go:180","msg":"Failed to flush metrics to Cloud Monitoring","error":"1 error occurred:\n\t* failed to export 1 (out of 1) batches of metrics to Cloud Monitoring\n\n","stacktrace":"google3/cloud/kubernetes/metrics/common/periodicexporter/periodicexporter.(*Exporter).exportAll\n\tcloud/kubernetes/metrics/common/periodicexporter/periodic_exporter.go:180\ngoogle3/cloud/kubernetes/metrics/common/periodicexporter/periodicexporter.(*Exporter).periodicExporter\n\tcloud/kubernetes/metrics/common/periodicexporter/periodic_exporter.go:157","foo":"bar"}`,
|
||||
}
|
||||
|
||||
b.Run("single-small-field-at-start", func(b *testing.B) {
|
||||
benchmarkExtractFormatApply(b, `"level":"<level>"`, a)
|
||||
})
|
||||
b.Run("single-small-field-at-start-unquote", func(b *testing.B) {
|
||||
benchmarkExtractFormatApply(b, `"level":<level>`, a)
|
||||
})
|
||||
b.Run("single-small-field-at-end", func(b *testing.B) {
|
||||
benchmarkExtractFormatApply(b, `"foo":"<foo>"`, a)
|
||||
})
|
||||
b.Run("single-small-field-at-end-unquote", func(b *testing.B) {
|
||||
benchmarkExtractFormatApply(b, `"foo":<foo>`, a)
|
||||
})
|
||||
b.Run("single-medium-field", func(b *testing.B) {
|
||||
benchmarkExtractFormatApply(b, `"msg":"<message>"`, a)
|
||||
})
|
||||
b.Run("single-medium-field-unquote", func(b *testing.B) {
|
||||
benchmarkExtractFormatApply(b, `"msg":<message>`, a)
|
||||
})
|
||||
b.Run("single-large-field", func(b *testing.B) {
|
||||
benchmarkExtractFormatApply(b, `"stacktrace":"<stacktrace>"`, a)
|
||||
})
|
||||
b.Run("single-large-field-unquote", func(b *testing.B) {
|
||||
benchmarkExtractFormatApply(b, `"stacktrace":<stacktrace>`, a)
|
||||
})
|
||||
b.Run("two-fields", func(b *testing.B) {
|
||||
benchmarkExtractFormatApply(b, `"level":"<level>",<_>"msg":"<msg>"`, a)
|
||||
})
|
||||
b.Run("two-fields-unquote", func(b *testing.B) {
|
||||
benchmarkExtractFormatApply(b, `"level":<level>,<_>"msg":<msg>`, a)
|
||||
})
|
||||
b.Run("many-fields", func(b *testing.B) {
|
||||
benchmarkExtractFormatApply(b, `"level":"<level>","ts":"<ts>","caller":"<caller>","msg":"<msg>","error":"<error>"`, a)
|
||||
})
|
||||
b.Run("many-fields-unquote", func(b *testing.B) {
|
||||
benchmarkExtractFormatApply(b, `"level":<level>,"ts":<ts>,"caller":<caller>,"msg":<msg>,"error":<error>`, a)
|
||||
})
|
||||
}
|
||||
|
||||
func benchmarkExtractFormatApply(b *testing.B, pattern string, a []string) {
|
||||
steps, err := parseExtractFormatSteps(pattern)
|
||||
if err != nil {
|
||||
b.Fatalf("unexpected error: %s", err)
|
||||
}
|
||||
|
||||
n := 0
|
||||
for _, s := range a {
|
||||
n += len(s)
|
||||
}
|
||||
|
||||
b.ReportAllocs()
|
||||
b.SetBytes(int64(n))
|
||||
b.RunParallel(func(pb *testing.PB) {
|
||||
sink := 0
|
||||
ef := newExtractFormat(steps)
|
||||
for pb.Next() {
|
||||
for _, s := range a {
|
||||
ef.apply(s)
|
||||
for _, v := range ef.matches {
|
||||
sink += len(v)
|
||||
}
|
||||
}
|
||||
}
|
||||
GlobalSink.Add(uint64(sink))
|
||||
})
|
||||
}
|
167
lib/logstorage/pipe_field_names.go
Normal file
167
lib/logstorage/pipe_field_names.go
Normal file
@ -0,0 +1,167 @@
|
||||
package logstorage
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strings"
|
||||
"unsafe"
|
||||
)
|
||||
|
||||
// pipeFieldNames processes '| field_names' pipe.
|
||||
//
|
||||
// See https://docs.victoriametrics.com/victorialogs/logsql/#field-names-pipe
|
||||
type pipeFieldNames struct {
|
||||
// resultName is the name of the column to write results to.
|
||||
resultName string
|
||||
|
||||
// isFirstPipe is set to true if '| field_names' pipe is the first in the query.
|
||||
//
|
||||
// This allows skipping loading of _time column.
|
||||
isFirstPipe bool
|
||||
}
|
||||
|
||||
func (pf *pipeFieldNames) String() string {
|
||||
return "field_names as " + quoteTokenIfNeeded(pf.resultName)
|
||||
}
|
||||
|
||||
func (pf *pipeFieldNames) updateNeededFields(neededFields, unneededFields fieldsSet) {
|
||||
neededFields.add("*")
|
||||
unneededFields.reset()
|
||||
|
||||
if pf.isFirstPipe {
|
||||
unneededFields.add("_time")
|
||||
}
|
||||
}
|
||||
|
||||
func (pf *pipeFieldNames) newPipeProcessor(workersCount int, stopCh <-chan struct{}, _ func(), ppBase pipeProcessor) pipeProcessor {
|
||||
shards := make([]pipeFieldNamesProcessorShard, workersCount)
|
||||
for i := range shards {
|
||||
shards[i] = pipeFieldNamesProcessorShard{
|
||||
pipeFieldNamesProcessorShardNopad: pipeFieldNamesProcessorShardNopad{
|
||||
m: make(map[string]struct{}),
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
pfp := &pipeFieldNamesProcessor{
|
||||
pf: pf,
|
||||
stopCh: stopCh,
|
||||
ppBase: ppBase,
|
||||
|
||||
shards: shards,
|
||||
}
|
||||
return pfp
|
||||
}
|
||||
|
||||
type pipeFieldNamesProcessor struct {
|
||||
pf *pipeFieldNames
|
||||
stopCh <-chan struct{}
|
||||
ppBase pipeProcessor
|
||||
|
||||
shards []pipeFieldNamesProcessorShard
|
||||
}
|
||||
|
||||
type pipeFieldNamesProcessorShard struct {
|
||||
pipeFieldNamesProcessorShardNopad
|
||||
|
||||
// The padding prevents false sharing on widespread platforms with 128 mod (cache line size) = 0 .
|
||||
_ [128 - unsafe.Sizeof(pipeFieldNamesProcessorShardNopad{})%128]byte
|
||||
}
|
||||
|
||||
type pipeFieldNamesProcessorShardNopad struct {
|
||||
// m holds unique field names.
|
||||
m map[string]struct{}
|
||||
}
|
||||
|
||||
func (pfp *pipeFieldNamesProcessor) writeBlock(workerID uint, br *blockResult) {
|
||||
if len(br.timestamps) == 0 {
|
||||
return
|
||||
}
|
||||
|
||||
shard := &pfp.shards[workerID]
|
||||
cs := br.getColumns()
|
||||
for _, c := range cs {
|
||||
if _, ok := shard.m[c.name]; !ok {
|
||||
nameCopy := strings.Clone(c.name)
|
||||
shard.m[nameCopy] = struct{}{}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (pfp *pipeFieldNamesProcessor) flush() error {
|
||||
if needStop(pfp.stopCh) {
|
||||
return nil
|
||||
}
|
||||
|
||||
// merge state across shards
|
||||
shards := pfp.shards
|
||||
m := shards[0].m
|
||||
shards = shards[1:]
|
||||
for i := range shards {
|
||||
for k := range shards[i].m {
|
||||
m[k] = struct{}{}
|
||||
}
|
||||
}
|
||||
if pfp.pf.isFirstPipe {
|
||||
m["_time"] = struct{}{}
|
||||
}
|
||||
|
||||
// write result
|
||||
wctx := &pipeFieldNamesWriteContext{
|
||||
pfp: pfp,
|
||||
}
|
||||
wctx.rcs[0].name = pfp.pf.resultName
|
||||
for k := range m {
|
||||
wctx.writeRow(k)
|
||||
}
|
||||
wctx.flush()
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
type pipeFieldNamesWriteContext struct {
|
||||
pfp *pipeFieldNamesProcessor
|
||||
rcs [1]resultColumn
|
||||
br blockResult
|
||||
|
||||
valuesLen int
|
||||
}
|
||||
|
||||
func (wctx *pipeFieldNamesWriteContext) writeRow(v string) {
|
||||
wctx.rcs[0].addValue(v)
|
||||
wctx.valuesLen += len(v)
|
||||
if wctx.valuesLen >= 1_000_000 {
|
||||
wctx.flush()
|
||||
}
|
||||
}
|
||||
|
||||
func (wctx *pipeFieldNamesWriteContext) flush() {
|
||||
br := &wctx.br
|
||||
|
||||
wctx.valuesLen = 0
|
||||
|
||||
// Flush rcs to ppBase
|
||||
br.setResultColumns(wctx.rcs[:1])
|
||||
wctx.pfp.ppBase.writeBlock(0, br)
|
||||
br.reset()
|
||||
wctx.rcs[0].resetValues()
|
||||
}
|
||||
|
||||
func parsePipeFieldNames(lex *lexer) (*pipeFieldNames, error) {
|
||||
if !lex.isKeyword("field_names") {
|
||||
return nil, fmt.Errorf("expecting 'field_names'; got %q", lex.token)
|
||||
}
|
||||
lex.nextToken()
|
||||
|
||||
if lex.isKeyword("as") {
|
||||
lex.nextToken()
|
||||
}
|
||||
resultName, err := parseFieldName(lex)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cannot parse result name for 'field_names': %w", err)
|
||||
}
|
||||
|
||||
pf := &pipeFieldNames{
|
||||
resultName: resultName,
|
||||
}
|
||||
return pf, nil
|
||||
}
|
27
lib/logstorage/pipe_field_names_test.go
Normal file
27
lib/logstorage/pipe_field_names_test.go
Normal file
@ -0,0 +1,27 @@
|
||||
package logstorage
|
||||
|
||||
import (
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestPipeFieldNamesUpdateNeededFields(t *testing.T) {
|
||||
f := func(s string, neededFields, unneededFields, neededFieldsExpected, unneededFieldsExpected string) {
|
||||
t.Helper()
|
||||
expectPipeNeededFields(t, s, neededFields, unneededFields, neededFieldsExpected, unneededFieldsExpected)
|
||||
}
|
||||
|
||||
// all the needed fields
|
||||
f("field_names as f1", "*", "", "*", "")
|
||||
|
||||
// all the needed fields, unneeded fields do not intersect with src
|
||||
f("field_names as f3", "*", "f1,f2", "*", "")
|
||||
|
||||
// all the needed fields, unneeded fields intersect with src
|
||||
f("field_names as f1", "*", "s1,f1,f2", "*", "")
|
||||
|
||||
// needed fields do not intersect with src
|
||||
f("field_names as f3", "f1,f2", "", "*", "")
|
||||
|
||||
// needed fields intersect with src
|
||||
f("field_names as f1", "s1,f1,f2", "", "*", "")
|
||||
}
|
@ -32,7 +32,7 @@ func (pf *pipeFields) updateNeededFields(neededFields, unneededFields fieldsSet)
|
||||
if neededFields.contains("*") {
|
||||
// subtract unneeded fields from pf.fields
|
||||
neededFields.reset()
|
||||
neededFields.addAll(pf.fields)
|
||||
neededFields.addFields(pf.fields)
|
||||
for _, f := range unneededFields.getAll() {
|
||||
neededFields.remove(f)
|
||||
}
|
||||
|
@ -7,18 +7,7 @@ import (
|
||||
func TestPipeFieldsUpdateNeededFields(t *testing.T) {
|
||||
f := func(s, neededFields, unneededFields, neededFieldsExpected, unneededFieldsExpected string) {
|
||||
t.Helper()
|
||||
|
||||
nfs := newTestFieldsSet(neededFields)
|
||||
unfs := newTestFieldsSet(unneededFields)
|
||||
|
||||
lex := newLexer(s)
|
||||
p, err := parsePipeFields(lex)
|
||||
if err != nil {
|
||||
t.Fatalf("cannot parse %s: %s", s, err)
|
||||
}
|
||||
p.updateNeededFields(nfs, unfs)
|
||||
|
||||
assertNeededFields(t, nfs, unfs, neededFieldsExpected, unneededFieldsExpected)
|
||||
expectPipeNeededFields(t, s, neededFields, unneededFields, neededFieldsExpected, unneededFieldsExpected)
|
||||
}
|
||||
|
||||
// all the needed fields
|
||||
|
108
lib/logstorage/pipe_filter.go
Normal file
108
lib/logstorage/pipe_filter.go
Normal file
@ -0,0 +1,108 @@
|
||||
package logstorage
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"unsafe"
|
||||
)
|
||||
|
||||
// pipeFilter processes '| filter ...' queries.
|
||||
//
|
||||
// See https://docs.victoriametrics.com/victorialogs/logsql/#filter-pipe
|
||||
type pipeFilter struct {
|
||||
// f is a filter to apply to the written rows.
|
||||
f filter
|
||||
}
|
||||
|
||||
func (pf *pipeFilter) String() string {
|
||||
return "filter " + pf.f.String()
|
||||
}
|
||||
|
||||
func (pf *pipeFilter) updateNeededFields(neededFields, unneededFields fieldsSet) {
|
||||
if neededFields.contains("*") {
|
||||
fs := newFieldsSet()
|
||||
pf.f.updateNeededFields(fs)
|
||||
for f := range fs {
|
||||
unneededFields.remove(f)
|
||||
}
|
||||
} else {
|
||||
pf.f.updateNeededFields(neededFields)
|
||||
}
|
||||
}
|
||||
|
||||
func (pf *pipeFilter) newPipeProcessor(workersCount int, _ <-chan struct{}, _ func(), ppBase pipeProcessor) pipeProcessor {
|
||||
shards := make([]pipeFilterProcessorShard, workersCount)
|
||||
|
||||
pfp := &pipeFilterProcessor{
|
||||
pf: pf,
|
||||
ppBase: ppBase,
|
||||
|
||||
shards: shards,
|
||||
}
|
||||
return pfp
|
||||
}
|
||||
|
||||
type pipeFilterProcessor struct {
|
||||
pf *pipeFilter
|
||||
ppBase pipeProcessor
|
||||
|
||||
shards []pipeFilterProcessorShard
|
||||
}
|
||||
|
||||
type pipeFilterProcessorShard struct {
|
||||
pipeFilterProcessorShardNopad
|
||||
|
||||
// The padding prevents false sharing on widespread platforms with 128 mod (cache line size) = 0 .
|
||||
_ [128 - unsafe.Sizeof(pipeFilterProcessorShardNopad{})%128]byte
|
||||
}
|
||||
|
||||
type pipeFilterProcessorShardNopad struct {
|
||||
br blockResult
|
||||
bm bitmap
|
||||
}
|
||||
|
||||
func (pfp *pipeFilterProcessor) writeBlock(workerID uint, br *blockResult) {
|
||||
if len(br.timestamps) == 0 {
|
||||
return
|
||||
}
|
||||
|
||||
shard := &pfp.shards[workerID]
|
||||
|
||||
bm := &shard.bm
|
||||
bm.init(len(br.timestamps))
|
||||
bm.setBits()
|
||||
pfp.pf.f.applyToBlockResult(br, bm)
|
||||
if bm.areAllBitsSet() {
|
||||
// Fast path - the filter didn't filter out anything - send br to the base pipe as is.
|
||||
pfp.ppBase.writeBlock(workerID, br)
|
||||
return
|
||||
}
|
||||
if bm.isZero() {
|
||||
// Nothing to send
|
||||
return
|
||||
}
|
||||
|
||||
// Slow path - copy the remaining rows from br to shard.br before sending them to base pipe.
|
||||
shard.br.initFromFilterAllColumns(br, bm)
|
||||
pfp.ppBase.writeBlock(workerID, &shard.br)
|
||||
}
|
||||
|
||||
func (pfp *pipeFilterProcessor) flush() error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func parsePipeFilter(lex *lexer) (*pipeFilter, error) {
|
||||
if !lex.isKeyword("filter") {
|
||||
return nil, fmt.Errorf("expecting 'filter'; got %q", lex.token)
|
||||
}
|
||||
lex.nextToken()
|
||||
|
||||
f, err := parseFilter(lex)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cannot parse 'filter': %w", err)
|
||||
}
|
||||
|
||||
pf := &pipeFilter{
|
||||
f: f,
|
||||
}
|
||||
return pf, nil
|
||||
}
|
27
lib/logstorage/pipe_filter_test.go
Normal file
27
lib/logstorage/pipe_filter_test.go
Normal file
@ -0,0 +1,27 @@
|
||||
package logstorage
|
||||
|
||||
import (
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestPipeFilterUpdateNeededFields(t *testing.T) {
|
||||
f := func(s string, neededFields, unneededFields, neededFieldsExpected, unneededFieldsExpected string) {
|
||||
t.Helper()
|
||||
expectPipeNeededFields(t, s, neededFields, unneededFields, neededFieldsExpected, unneededFieldsExpected)
|
||||
}
|
||||
|
||||
// all the needed fields
|
||||
f("filter foo f1:bar", "*", "", "*", "")
|
||||
|
||||
// all the needed fields, unneeded fields do not intersect with src
|
||||
f("filter foo f3:bar", "*", "f1,f2", "*", "f1,f2")
|
||||
|
||||
// all the needed fields, unneeded fields intersect with src
|
||||
f("filter foo f1:bar", "*", "s1,f1,f2", "*", "s1,f2")
|
||||
|
||||
// needed fields do not intersect with src
|
||||
f("filter foo f3:bar", "f1,f2", "", "_msg,f1,f2,f3", "")
|
||||
|
||||
// needed fields intersect with src
|
||||
f("filter foo f1:bar", "s1,f1,f2", "", "_msg,f1,f2,s1", "")
|
||||
}
|
@ -9,18 +9,18 @@ import (
|
||||
//
|
||||
// See https://docs.victoriametrics.com/victorialogs/logsql/#limit-pipe
|
||||
type pipeLimit struct {
|
||||
n uint64
|
||||
limit uint64
|
||||
}
|
||||
|
||||
func (pl *pipeLimit) String() string {
|
||||
return fmt.Sprintf("limit %d", pl.n)
|
||||
return fmt.Sprintf("limit %d", pl.limit)
|
||||
}
|
||||
|
||||
func (pl *pipeLimit) updateNeededFields(_, _ fieldsSet) {
|
||||
}
|
||||
|
||||
func (pl *pipeLimit) newPipeProcessor(_ int, _ <-chan struct{}, cancel func(), ppBase pipeProcessor) pipeProcessor {
|
||||
if pl.n == 0 {
|
||||
if pl.limit == 0 {
|
||||
// Special case - notify the caller to stop writing data to the returned pipeLimitProcessor
|
||||
cancel()
|
||||
}
|
||||
@ -45,7 +45,7 @@ func (plp *pipeLimitProcessor) writeBlock(workerID uint, br *blockResult) {
|
||||
}
|
||||
|
||||
rowsProcessed := plp.rowsProcessed.Add(uint64(len(br.timestamps)))
|
||||
if rowsProcessed <= plp.pl.n {
|
||||
if rowsProcessed <= plp.pl.limit {
|
||||
// Fast path - write all the rows to ppBase.
|
||||
plp.ppBase.writeBlock(workerID, br)
|
||||
return
|
||||
@ -53,13 +53,13 @@ func (plp *pipeLimitProcessor) writeBlock(workerID uint, br *blockResult) {
|
||||
|
||||
// Slow path - overflow. Write the remaining rows if needed.
|
||||
rowsProcessed -= uint64(len(br.timestamps))
|
||||
if rowsProcessed >= plp.pl.n {
|
||||
if rowsProcessed >= plp.pl.limit {
|
||||
// Nothing to write. There is no need in cancel() call, since it has been called by another goroutine.
|
||||
return
|
||||
}
|
||||
|
||||
// Write remaining rows.
|
||||
keepRows := plp.pl.n - rowsProcessed
|
||||
keepRows := plp.pl.limit - rowsProcessed
|
||||
br.truncateRows(int(keepRows))
|
||||
plp.ppBase.writeBlock(workerID, br)
|
||||
|
||||
@ -83,7 +83,7 @@ func parsePipeLimit(lex *lexer) (*pipeLimit, error) {
|
||||
}
|
||||
lex.nextToken()
|
||||
pl := &pipeLimit{
|
||||
n: n,
|
||||
limit: n,
|
||||
}
|
||||
return pl, nil
|
||||
}
|
||||
|
21
lib/logstorage/pipe_limit_test.go
Normal file
21
lib/logstorage/pipe_limit_test.go
Normal file
@ -0,0 +1,21 @@
|
||||
package logstorage
|
||||
|
||||
import (
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestPipeLimitUpdateNeededFields(t *testing.T) {
|
||||
f := func(s, neededFields, unneededFields, neededFieldsExpected, unneededFieldsExpected string) {
|
||||
t.Helper()
|
||||
expectPipeNeededFields(t, s, neededFields, unneededFields, neededFieldsExpected, unneededFieldsExpected)
|
||||
}
|
||||
|
||||
// all the needed fields
|
||||
f("limit 10", "*", "", "*", "")
|
||||
|
||||
// all the needed fields, plus unneeded fields
|
||||
f("limit 10", "*", "f1,f2", "*", "f1,f2")
|
||||
|
||||
// needed fields
|
||||
f("limit 10", "f1,f2", "", "f1,f2", "")
|
||||
}
|
@ -9,11 +9,11 @@ import (
|
||||
//
|
||||
// See https://docs.victoriametrics.com/victorialogs/logsql/#offset-pipe
|
||||
type pipeOffset struct {
|
||||
n uint64
|
||||
offset uint64
|
||||
}
|
||||
|
||||
func (po *pipeOffset) String() string {
|
||||
return fmt.Sprintf("offset %d", po.n)
|
||||
return fmt.Sprintf("offset %d", po.offset)
|
||||
}
|
||||
|
||||
func (po *pipeOffset) updateNeededFields(_, _ fieldsSet) {
|
||||
@ -39,17 +39,17 @@ func (pop *pipeOffsetProcessor) writeBlock(workerID uint, br *blockResult) {
|
||||
}
|
||||
|
||||
rowsProcessed := pop.rowsProcessed.Add(uint64(len(br.timestamps)))
|
||||
if rowsProcessed <= pop.po.n {
|
||||
if rowsProcessed <= pop.po.offset {
|
||||
return
|
||||
}
|
||||
|
||||
rowsProcessed -= uint64(len(br.timestamps))
|
||||
if rowsProcessed >= pop.po.n {
|
||||
if rowsProcessed >= pop.po.offset {
|
||||
pop.ppBase.writeBlock(workerID, br)
|
||||
return
|
||||
}
|
||||
|
||||
rowsSkip := pop.po.n - rowsProcessed
|
||||
rowsSkip := pop.po.offset - rowsProcessed
|
||||
br.skipRows(int(rowsSkip))
|
||||
pop.ppBase.writeBlock(workerID, br)
|
||||
}
|
||||
@ -70,7 +70,7 @@ func parsePipeOffset(lex *lexer) (*pipeOffset, error) {
|
||||
}
|
||||
lex.nextToken()
|
||||
po := &pipeOffset{
|
||||
n: n,
|
||||
offset: n,
|
||||
}
|
||||
return po, nil
|
||||
}
|
||||
|
21
lib/logstorage/pipe_offset_test.go
Normal file
21
lib/logstorage/pipe_offset_test.go
Normal file
@ -0,0 +1,21 @@
|
||||
package logstorage
|
||||
|
||||
import (
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestPipeOffsetUpdateNeededFields(t *testing.T) {
|
||||
f := func(s, neededFields, unneededFields, neededFieldsExpected, unneededFieldsExpected string) {
|
||||
t.Helper()
|
||||
expectPipeNeededFields(t, s, neededFields, unneededFields, neededFieldsExpected, unneededFieldsExpected)
|
||||
}
|
||||
|
||||
// all the needed fields
|
||||
f("offset 10", "*", "", "*", "")
|
||||
|
||||
// all the needed fields, plus unneeded fields
|
||||
f("offset 10", "*", "f1,f2", "*", "f1,f2")
|
||||
|
||||
// needed fields
|
||||
f("offset 10", "f1,f2", "", "f1,f2", "")
|
||||
}
|
@ -40,7 +40,7 @@ func (pr *pipeRename) updateNeededFields(neededFields, unneededFields fieldsSet)
|
||||
}
|
||||
if neededFields.contains("*") {
|
||||
// update only unneeded fields
|
||||
unneededFields.addAll(pr.dstFields)
|
||||
unneededFields.addFields(pr.dstFields)
|
||||
for i, srcField := range pr.srcFields {
|
||||
if neededSrcFields[i] {
|
||||
unneededFields.remove(srcField)
|
||||
@ -50,7 +50,7 @@ func (pr *pipeRename) updateNeededFields(neededFields, unneededFields fieldsSet)
|
||||
}
|
||||
} else {
|
||||
// update only needed fields and reset unneeded fields
|
||||
neededFields.removeAll(pr.dstFields)
|
||||
neededFields.removeFields(pr.dstFields)
|
||||
for i, srcField := range pr.srcFields {
|
||||
if neededSrcFields[i] {
|
||||
neededFields.add(srcField)
|
||||
|
@ -7,18 +7,7 @@ import (
|
||||
func TestPipeRenameUpdateNeededFields(t *testing.T) {
|
||||
f := func(s, neededFields, unneededFields, neededFieldsExpected, unneededFieldsExpected string) {
|
||||
t.Helper()
|
||||
|
||||
nfs := newTestFieldsSet(neededFields)
|
||||
unfs := newTestFieldsSet(unneededFields)
|
||||
|
||||
lex := newLexer(s)
|
||||
p, err := parsePipeRename(lex)
|
||||
if err != nil {
|
||||
t.Fatalf("cannot parse %s: %s", s, err)
|
||||
}
|
||||
p.updateNeededFields(nfs, unfs)
|
||||
|
||||
assertNeededFields(t, nfs, unfs, neededFieldsExpected, unneededFieldsExpected)
|
||||
expectPipeNeededFields(t, s, neededFields, unneededFields, neededFieldsExpected, unneededFieldsExpected)
|
||||
}
|
||||
|
||||
// all the needed fields
|
||||
|
@ -11,7 +11,6 @@ import (
|
||||
"sync/atomic"
|
||||
"unsafe"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/memory"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/stringsutil"
|
||||
)
|
||||
@ -80,9 +79,12 @@ func newPipeSortProcessor(ps *pipeSort, workersCount int, stopCh <-chan struct{}
|
||||
|
||||
shards := make([]pipeSortProcessorShard, workersCount)
|
||||
for i := range shards {
|
||||
shard := &shards[i]
|
||||
shard.ps = ps
|
||||
shard.stateSizeBudget = stateSizeBudgetChunk
|
||||
shards[i] = pipeSortProcessorShard{
|
||||
pipeSortProcessorShardNopad: pipeSortProcessorShardNopad{
|
||||
ps: ps,
|
||||
stateSizeBudget: stateSizeBudgetChunk,
|
||||
},
|
||||
}
|
||||
maxStateSize -= stateSizeBudgetChunk
|
||||
}
|
||||
|
||||
@ -202,12 +204,14 @@ func (shard *pipeSortProcessorShard) writeBlock(br *blockResult) {
|
||||
|
||||
columnValues := shard.columnValues[:0]
|
||||
for _, c := range cs {
|
||||
columnValues = append(columnValues, c.getValues(br))
|
||||
values := c.getValues(br)
|
||||
columnValues = append(columnValues, values)
|
||||
}
|
||||
shard.columnValues = columnValues
|
||||
|
||||
// Generate byColumns
|
||||
var rc resultColumn
|
||||
valuesEncoded := make([]string, len(br.timestamps))
|
||||
shard.stateSizeBudget -= len(valuesEncoded) * int(unsafe.Sizeof(valuesEncoded[0]))
|
||||
|
||||
bb := bbPool.Get()
|
||||
for rowIdx := range br.timestamps {
|
||||
@ -219,7 +223,12 @@ func (shard *pipeSortProcessorShard) writeBlock(br *blockResult) {
|
||||
bb.B = marshalJSONKeyValue(bb.B, cs[i].name, v)
|
||||
bb.B = append(bb.B, ',')
|
||||
}
|
||||
rc.addValue(bytesutil.ToUnsafeString(bb.B))
|
||||
if rowIdx > 0 && valuesEncoded[rowIdx-1] == string(bb.B) {
|
||||
valuesEncoded[rowIdx] = valuesEncoded[rowIdx-1]
|
||||
} else {
|
||||
valuesEncoded[rowIdx] = string(bb.B)
|
||||
shard.stateSizeBudget -= len(bb.B)
|
||||
}
|
||||
}
|
||||
bbPool.Put(bb)
|
||||
|
||||
@ -232,13 +241,13 @@ func (shard *pipeSortProcessorShard) writeBlock(br *blockResult) {
|
||||
{
|
||||
c: &blockResultColumn{
|
||||
valueType: valueTypeString,
|
||||
encodedValues: rc.values,
|
||||
valuesEncoded: valuesEncoded,
|
||||
},
|
||||
i64Values: i64Values,
|
||||
f64Values: f64Values,
|
||||
},
|
||||
}
|
||||
shard.stateSizeBudget -= len(rc.buf) + int(unsafe.Sizeof(byColumns[0])+unsafe.Sizeof(*byColumns[0].c))
|
||||
shard.stateSizeBudget -= int(unsafe.Sizeof(byColumns[0]) + unsafe.Sizeof(*byColumns[0].c))
|
||||
|
||||
// Append br to shard.blocks.
|
||||
shard.blocks = append(shard.blocks, sortBlock{
|
||||
@ -260,8 +269,8 @@ func (shard *pipeSortProcessorShard) writeBlock(br *blockResult) {
|
||||
continue
|
||||
}
|
||||
if c.isConst {
|
||||
bc.i64Values = shard.createInt64Values(c.encodedValues)
|
||||
bc.f64Values = shard.createFloat64Values(c.encodedValues)
|
||||
bc.i64Values = shard.createInt64Values(c.valuesEncoded)
|
||||
bc.f64Values = shard.createFloat64Values(c.valuesEncoded)
|
||||
continue
|
||||
}
|
||||
|
||||
@ -512,14 +521,10 @@ func (wctx *pipeSortWriteContext) writeNextRow(shard *pipeSortProcessorShard) {
|
||||
|
||||
rcs = wctx.rcs[:0]
|
||||
for _, bf := range byFields {
|
||||
rcs = append(rcs, resultColumn{
|
||||
name: bf.name,
|
||||
})
|
||||
rcs = appendResultColumnWithName(rcs, bf.name)
|
||||
}
|
||||
for _, c := range b.otherColumns {
|
||||
rcs = append(rcs, resultColumn{
|
||||
name: c.name,
|
||||
})
|
||||
rcs = appendResultColumnWithName(rcs, c.name)
|
||||
}
|
||||
wctx.rcs = rcs
|
||||
}
|
||||
@ -558,7 +563,7 @@ func (wctx *pipeSortWriteContext) flush() {
|
||||
wctx.psp.ppBase.writeBlock(0, br)
|
||||
br.reset()
|
||||
for i := range rcs {
|
||||
rcs[i].resetKeepName()
|
||||
rcs[i].resetValues()
|
||||
}
|
||||
}
|
||||
|
||||
@ -610,8 +615,8 @@ func sortBlockLess(shardA *pipeSortProcessorShard, rowIdxA int, shardB *pipeSort
|
||||
|
||||
if cA.c.isConst && cB.c.isConst {
|
||||
// Fast path - compare const values
|
||||
ccA := cA.c.encodedValues[0]
|
||||
ccB := cB.c.encodedValues[0]
|
||||
ccA := cA.c.valuesEncoded[0]
|
||||
ccB := cB.c.valuesEncoded[0]
|
||||
if ccA == ccB {
|
||||
continue
|
||||
}
|
||||
@ -689,8 +694,10 @@ func parsePipeSort(lex *lexer) (*pipeSort, error) {
|
||||
lex.nextToken()
|
||||
|
||||
var ps pipeSort
|
||||
if lex.isKeyword("by") {
|
||||
lex.nextToken()
|
||||
if lex.isKeyword("by", "(") {
|
||||
if lex.isKeyword("by") {
|
||||
lex.nextToken()
|
||||
}
|
||||
bfs, err := parseBySortFields(lex)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cannot parse 'by' clause: %w", err)
|
||||
|
@ -7,18 +7,7 @@ import (
|
||||
func TestPipeSortUpdateNeededFields(t *testing.T) {
|
||||
f := func(s, neededFields, unneededFields, neededFieldsExpected, unneededFieldsExpected string) {
|
||||
t.Helper()
|
||||
|
||||
nfs := newTestFieldsSet(neededFields)
|
||||
unfs := newTestFieldsSet(unneededFields)
|
||||
|
||||
lex := newLexer(s)
|
||||
p, err := parsePipeSort(lex)
|
||||
if err != nil {
|
||||
t.Fatalf("cannot parse %s: %s", s, err)
|
||||
}
|
||||
p.updateNeededFields(nfs, unfs)
|
||||
|
||||
assertNeededFields(t, nfs, unfs, neededFieldsExpected, unneededFieldsExpected)
|
||||
expectPipeNeededFields(t, s, neededFields, unneededFields, neededFieldsExpected, unneededFieldsExpected)
|
||||
}
|
||||
|
||||
// all the needed fields
|
||||
|
@ -20,23 +20,34 @@ type pipeStats struct {
|
||||
// byFields contains field names with optional buckets from 'by(...)' clause.
|
||||
byFields []*byStatsField
|
||||
|
||||
// resultNames contains names of output results generated by funcs.
|
||||
resultNames []string
|
||||
|
||||
// funcs contains stats functions to execute.
|
||||
funcs []statsFunc
|
||||
funcs []pipeStatsFunc
|
||||
}
|
||||
|
||||
type pipeStatsFunc struct {
|
||||
// f is stats function to execute
|
||||
f statsFunc
|
||||
|
||||
// neededFieldsForFunc contains needed fields for f execution
|
||||
neededFieldsForFunc []string
|
||||
|
||||
// iff is an additional filter, which is applied to results before executing f on them
|
||||
iff filter
|
||||
|
||||
// resultName is the name of the output generated by f
|
||||
resultName string
|
||||
}
|
||||
|
||||
type statsFunc interface {
|
||||
// String returns string representation of statsFunc
|
||||
String() string
|
||||
|
||||
// neededFields returns the needed fields for calculating the given stats
|
||||
neededFields() []string
|
||||
// updateNeededFields update neededFields with the fields needed for calculating the given stats
|
||||
updateNeededFields(neededFields fieldsSet)
|
||||
|
||||
// newStatsProcessor must create new statsProcessor for calculating stats for the given statsFunc.
|
||||
// newStatsProcessor must create new statsProcessor for calculating stats for the given statsFunc
|
||||
//
|
||||
// It also must return the size in bytes of the returned statsProcessor.
|
||||
// It also must return the size in bytes of the returned statsProcessor
|
||||
newStatsProcessor() (statsProcessor, int)
|
||||
}
|
||||
|
||||
@ -77,7 +88,12 @@ func (ps *pipeStats) String() string {
|
||||
}
|
||||
a := make([]string, len(ps.funcs))
|
||||
for i, f := range ps.funcs {
|
||||
a[i] = f.String() + " as " + quoteTokenIfNeeded(ps.resultNames[i])
|
||||
line := f.f.String()
|
||||
if f.iff != nil {
|
||||
line += " if (" + f.iff.String() + ")"
|
||||
}
|
||||
line += " as " + quoteTokenIfNeeded(f.resultName)
|
||||
a[i] = line
|
||||
}
|
||||
s += strings.Join(a, ", ")
|
||||
return s
|
||||
@ -87,22 +103,17 @@ func (ps *pipeStats) updateNeededFields(neededFields, unneededFields fieldsSet)
|
||||
neededFieldsOrig := neededFields.clone()
|
||||
neededFields.reset()
|
||||
|
||||
byFields := make([]string, len(ps.byFields))
|
||||
for i, bf := range ps.byFields {
|
||||
byFields[i] = bf.name
|
||||
// byFields are needed unconditionally, since the output number of rows depends on them.
|
||||
for _, bf := range ps.byFields {
|
||||
neededFields.add(bf.name)
|
||||
}
|
||||
|
||||
for _, f := range byFields {
|
||||
if neededFieldsOrig.contains(f) && !unneededFields.contains(f) {
|
||||
neededFields.addAll(byFields)
|
||||
}
|
||||
}
|
||||
|
||||
for i, resultName := range ps.resultNames {
|
||||
if neededFieldsOrig.contains(resultName) && !unneededFields.contains(resultName) {
|
||||
funcFields := ps.funcs[i].neededFields()
|
||||
neededFields.addAll(byFields)
|
||||
neededFields.addAll(funcFields)
|
||||
for _, f := range ps.funcs {
|
||||
if neededFieldsOrig.contains(f.resultName) && !unneededFields.contains(f.resultName) {
|
||||
f.f.updateNeededFields(neededFields)
|
||||
if f.iff != nil {
|
||||
f.iff.updateNeededFields(neededFields)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -115,11 +126,21 @@ func (ps *pipeStats) newPipeProcessor(workersCount int, stopCh <-chan struct{},
|
||||
maxStateSize := int64(float64(memory.Allowed()) * 0.3)
|
||||
|
||||
shards := make([]pipeStatsProcessorShard, workersCount)
|
||||
funcsLen := len(ps.funcs)
|
||||
for i := range shards {
|
||||
shard := &shards[i]
|
||||
shard.ps = ps
|
||||
shard.m = make(map[string]*pipeStatsGroup)
|
||||
shard.stateSizeBudget = stateSizeBudgetChunk
|
||||
shards[i] = pipeStatsProcessorShard{
|
||||
pipeStatsProcessorShardNopad: pipeStatsProcessorShardNopad{
|
||||
ps: ps,
|
||||
|
||||
m: make(map[string]*pipeStatsGroup),
|
||||
|
||||
bms: make([]bitmap, funcsLen),
|
||||
brs: make([]*blockResult, funcsLen),
|
||||
brsBuf: make([]blockResult, funcsLen),
|
||||
|
||||
stateSizeBudget: stateSizeBudgetChunk,
|
||||
},
|
||||
}
|
||||
maxStateSize -= stateSizeBudgetChunk
|
||||
}
|
||||
|
||||
@ -159,7 +180,13 @@ type pipeStatsProcessorShard struct {
|
||||
|
||||
type pipeStatsProcessorShardNopad struct {
|
||||
ps *pipeStats
|
||||
m map[string]*pipeStatsGroup
|
||||
|
||||
m map[string]*pipeStatsGroup
|
||||
|
||||
// bms, brs and brsBuf are used for applying per-func filters.
|
||||
bms []bitmap
|
||||
brs []*blockResult
|
||||
brsBuf []blockResult
|
||||
|
||||
columnValues [][]string
|
||||
keyBuf []byte
|
||||
@ -170,10 +197,14 @@ type pipeStatsProcessorShardNopad struct {
|
||||
func (shard *pipeStatsProcessorShard) writeBlock(br *blockResult) {
|
||||
byFields := shard.ps.byFields
|
||||
|
||||
// Apply per-function filters
|
||||
brs := shard.applyPerFunctionFilters(br)
|
||||
|
||||
// Process stats for the defined functions
|
||||
if len(byFields) == 0 {
|
||||
// Fast path - pass all the rows to a single group with empty key.
|
||||
psg := shard.getPipeStatsGroup(nil)
|
||||
shard.stateSizeBudget -= psg.updateStatsForAllRows(br)
|
||||
shard.stateSizeBudget -= psg.updateStatsForAllRows(brs)
|
||||
return
|
||||
}
|
||||
if len(byFields) == 1 {
|
||||
@ -182,19 +213,19 @@ func (shard *pipeStatsProcessorShard) writeBlock(br *blockResult) {
|
||||
c := br.getColumnByName(bf.name)
|
||||
if c.isConst {
|
||||
// Fast path for column with constant value.
|
||||
v := br.getBucketedValue(c.encodedValues[0], bf)
|
||||
v := br.getBucketedValue(c.valuesEncoded[0], bf)
|
||||
shard.keyBuf = encoding.MarshalBytes(shard.keyBuf[:0], bytesutil.ToUnsafeBytes(v))
|
||||
psg := shard.getPipeStatsGroup(shard.keyBuf)
|
||||
shard.stateSizeBudget -= psg.updateStatsForAllRows(br)
|
||||
shard.stateSizeBudget -= psg.updateStatsForAllRows(brs)
|
||||
return
|
||||
}
|
||||
|
||||
values := c.getBucketedValues(br, bf)
|
||||
values := c.getValuesBucketed(br, bf)
|
||||
if areConstValues(values) {
|
||||
// Fast path for column with constant values.
|
||||
shard.keyBuf = encoding.MarshalBytes(shard.keyBuf[:0], bytesutil.ToUnsafeBytes(values[0]))
|
||||
psg := shard.getPipeStatsGroup(shard.keyBuf)
|
||||
shard.stateSizeBudget -= psg.updateStatsForAllRows(br)
|
||||
shard.stateSizeBudget -= psg.updateStatsForAllRows(brs)
|
||||
return
|
||||
}
|
||||
|
||||
@ -206,7 +237,7 @@ func (shard *pipeStatsProcessorShard) writeBlock(br *blockResult) {
|
||||
keyBuf = encoding.MarshalBytes(keyBuf[:0], bytesutil.ToUnsafeBytes(values[i]))
|
||||
psg = shard.getPipeStatsGroup(keyBuf)
|
||||
}
|
||||
shard.stateSizeBudget -= psg.updateStatsForRow(br, i)
|
||||
shard.stateSizeBudget -= psg.updateStatsForRow(brs, i)
|
||||
}
|
||||
shard.keyBuf = keyBuf
|
||||
return
|
||||
@ -216,7 +247,7 @@ func (shard *pipeStatsProcessorShard) writeBlock(br *blockResult) {
|
||||
columnValues := shard.columnValues[:0]
|
||||
for _, bf := range byFields {
|
||||
c := br.getColumnByName(bf.name)
|
||||
values := c.getBucketedValues(br, bf)
|
||||
values := c.getValuesBucketed(br, bf)
|
||||
columnValues = append(columnValues, values)
|
||||
}
|
||||
shard.columnValues = columnValues
|
||||
@ -236,7 +267,7 @@ func (shard *pipeStatsProcessorShard) writeBlock(br *blockResult) {
|
||||
keyBuf = encoding.MarshalBytes(keyBuf, bytesutil.ToUnsafeBytes(values[0]))
|
||||
}
|
||||
psg := shard.getPipeStatsGroup(keyBuf)
|
||||
shard.stateSizeBudget -= psg.updateStatsForAllRows(br)
|
||||
shard.stateSizeBudget -= psg.updateStatsForAllRows(brs)
|
||||
shard.keyBuf = keyBuf
|
||||
return
|
||||
}
|
||||
@ -261,11 +292,44 @@ func (shard *pipeStatsProcessorShard) writeBlock(br *blockResult) {
|
||||
}
|
||||
psg = shard.getPipeStatsGroup(keyBuf)
|
||||
}
|
||||
shard.stateSizeBudget -= psg.updateStatsForRow(br, i)
|
||||
shard.stateSizeBudget -= psg.updateStatsForRow(brs, i)
|
||||
}
|
||||
shard.keyBuf = keyBuf
|
||||
}
|
||||
|
||||
func (shard *pipeStatsProcessorShard) applyPerFunctionFilters(brSrc *blockResult) []*blockResult {
|
||||
funcs := shard.ps.funcs
|
||||
brs := shard.brs
|
||||
for i := range funcs {
|
||||
iff := funcs[i].iff
|
||||
if iff == nil {
|
||||
// Fast path - there are no per-function filters
|
||||
brs[i] = brSrc
|
||||
continue
|
||||
}
|
||||
|
||||
bm := &shard.bms[i]
|
||||
bm.init(len(brSrc.timestamps))
|
||||
bm.setBits()
|
||||
iff.applyToBlockResult(brSrc, bm)
|
||||
if bm.areAllBitsSet() {
|
||||
// Fast path - per-function filter doesn't filter out rows
|
||||
brs[i] = brSrc
|
||||
continue
|
||||
}
|
||||
|
||||
// Store the remaining rows for the needed per-func fields to brDst
|
||||
brDst := &shard.brsBuf[i]
|
||||
if bm.isZero() {
|
||||
brDst.reset()
|
||||
} else {
|
||||
brDst.initFromFilterNeededColumns(brSrc, bm, funcs[i].neededFieldsForFunc)
|
||||
}
|
||||
brs[i] = brDst
|
||||
}
|
||||
return brs
|
||||
}
|
||||
|
||||
func (shard *pipeStatsProcessorShard) getPipeStatsGroup(key []byte) *pipeStatsGroup {
|
||||
psg := shard.m[string(key)]
|
||||
if psg != nil {
|
||||
@ -274,7 +338,7 @@ func (shard *pipeStatsProcessorShard) getPipeStatsGroup(key []byte) *pipeStatsGr
|
||||
|
||||
sfps := make([]statsProcessor, len(shard.ps.funcs))
|
||||
for i, f := range shard.ps.funcs {
|
||||
sfp, stateSize := f.newStatsProcessor()
|
||||
sfp, stateSize := f.f.newStatsProcessor()
|
||||
sfps[i] = sfp
|
||||
shard.stateSizeBudget -= stateSize
|
||||
}
|
||||
@ -291,18 +355,18 @@ type pipeStatsGroup struct {
|
||||
sfps []statsProcessor
|
||||
}
|
||||
|
||||
func (psg *pipeStatsGroup) updateStatsForAllRows(br *blockResult) int {
|
||||
func (psg *pipeStatsGroup) updateStatsForAllRows(brs []*blockResult) int {
|
||||
n := 0
|
||||
for _, sfp := range psg.sfps {
|
||||
n += sfp.updateStatsForAllRows(br)
|
||||
for i, sfp := range psg.sfps {
|
||||
n += sfp.updateStatsForAllRows(brs[i])
|
||||
}
|
||||
return n
|
||||
}
|
||||
|
||||
func (psg *pipeStatsGroup) updateStatsForRow(br *blockResult, rowIdx int) int {
|
||||
func (psg *pipeStatsGroup) updateStatsForRow(brs []*blockResult, rowIdx int) int {
|
||||
n := 0
|
||||
for _, sfp := range psg.sfps {
|
||||
n += sfp.updateStatsForRow(br, rowIdx)
|
||||
for i, sfp := range psg.sfps {
|
||||
n += sfp.updateStatsForRow(brs[i], rowIdx)
|
||||
}
|
||||
return n
|
||||
}
|
||||
@ -368,16 +432,12 @@ func (psp *pipeStatsProcessor) flush() error {
|
||||
m = shards[0].m
|
||||
}
|
||||
|
||||
rcs := make([]resultColumn, 0, len(byFields)+len(psp.ps.resultNames))
|
||||
rcs := make([]resultColumn, 0, len(byFields)+len(psp.ps.funcs))
|
||||
for _, bf := range byFields {
|
||||
rcs = append(rcs, resultColumn{
|
||||
name: bf.name,
|
||||
})
|
||||
rcs = appendResultColumnWithName(rcs, bf.name)
|
||||
}
|
||||
for _, resultName := range psp.ps.resultNames {
|
||||
rcs = append(rcs, resultColumn{
|
||||
name: resultName,
|
||||
})
|
||||
for _, f := range psp.ps.funcs {
|
||||
rcs = appendResultColumnWithName(rcs, f.resultName)
|
||||
}
|
||||
var br blockResult
|
||||
|
||||
@ -423,7 +483,7 @@ func (psp *pipeStatsProcessor) flush() error {
|
||||
psp.ppBase.writeBlock(0, &br)
|
||||
br.reset()
|
||||
for i := range rcs {
|
||||
rcs[i].resetKeepName()
|
||||
rcs[i].resetValues()
|
||||
}
|
||||
valuesLen = 0
|
||||
}
|
||||
@ -443,8 +503,10 @@ func parsePipeStats(lex *lexer) (*pipeStats, error) {
|
||||
lex.nextToken()
|
||||
|
||||
var ps pipeStats
|
||||
if lex.isKeyword("by") {
|
||||
lex.nextToken()
|
||||
if lex.isKeyword("by", "(") {
|
||||
if lex.isKeyword("by") {
|
||||
lex.nextToken()
|
||||
}
|
||||
bfs, err := parseByStatsFields(lex)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cannot parse 'by' clause: %w", err)
|
||||
@ -452,17 +514,36 @@ func parsePipeStats(lex *lexer) (*pipeStats, error) {
|
||||
ps.byFields = bfs
|
||||
}
|
||||
|
||||
var resultNames []string
|
||||
var funcs []statsFunc
|
||||
var funcs []pipeStatsFunc
|
||||
for {
|
||||
sf, resultName, err := parseStatsFunc(lex)
|
||||
var f pipeStatsFunc
|
||||
sf, err := parseStatsFunc(lex)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
resultNames = append(resultNames, resultName)
|
||||
funcs = append(funcs, sf)
|
||||
f.f = sf
|
||||
|
||||
if lex.isKeyword("if") {
|
||||
iff, err := parseIfFilter(lex)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cannot parse 'if' filter for %s: %w", sf, err)
|
||||
}
|
||||
f.iff = iff
|
||||
|
||||
neededFields := newFieldsSet()
|
||||
iff.updateNeededFields(neededFields)
|
||||
f.neededFieldsForFunc = neededFields.getAll()
|
||||
}
|
||||
|
||||
resultName, err := parseResultName(lex)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cannot parse result name for %s: %w", sf, err)
|
||||
}
|
||||
f.resultName = resultName
|
||||
|
||||
funcs = append(funcs, f)
|
||||
|
||||
if lex.isKeyword("|", ")", "") {
|
||||
ps.resultNames = resultNames
|
||||
ps.funcs = funcs
|
||||
return &ps, nil
|
||||
}
|
||||
@ -473,90 +554,107 @@ func parsePipeStats(lex *lexer) (*pipeStats, error) {
|
||||
}
|
||||
}
|
||||
|
||||
func parseStatsFunc(lex *lexer) (statsFunc, string, error) {
|
||||
var sf statsFunc
|
||||
func parseIfFilter(lex *lexer) (filter, error) {
|
||||
if !lex.isKeyword("if") {
|
||||
return nil, fmt.Errorf("unexpected keyword %q; expecting 'if'", lex.token)
|
||||
}
|
||||
lex.nextToken()
|
||||
if !lex.isKeyword("(") {
|
||||
return nil, fmt.Errorf("unexpected token %q after 'if'; expecting '('", lex.token)
|
||||
}
|
||||
lex.nextToken()
|
||||
if lex.isKeyword(")") {
|
||||
lex.nextToken()
|
||||
return &filterNoop{}, nil
|
||||
}
|
||||
f, err := parseFilter(lex)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cannot parse 'if' filter: %w", err)
|
||||
}
|
||||
if !lex.isKeyword(")") {
|
||||
return nil, fmt.Errorf("unexpected token %q after 'if' filter; expecting ')'", lex.token)
|
||||
}
|
||||
lex.nextToken()
|
||||
return f, nil
|
||||
}
|
||||
|
||||
func parseStatsFunc(lex *lexer) (statsFunc, error) {
|
||||
switch {
|
||||
case lex.isKeyword("count"):
|
||||
scs, err := parseStatsCount(lex)
|
||||
if err != nil {
|
||||
return nil, "", fmt.Errorf("cannot parse 'count' func: %w", err)
|
||||
return nil, fmt.Errorf("cannot parse 'count' func: %w", err)
|
||||
}
|
||||
sf = scs
|
||||
return scs, nil
|
||||
case lex.isKeyword("count_empty"):
|
||||
scs, err := parseStatsCountEmpty(lex)
|
||||
if err != nil {
|
||||
return nil, "", fmt.Errorf("cannot parse 'count_empty' func: %w", err)
|
||||
return nil, fmt.Errorf("cannot parse 'count_empty' func: %w", err)
|
||||
}
|
||||
sf = scs
|
||||
return scs, nil
|
||||
case lex.isKeyword("count_uniq"):
|
||||
sus, err := parseStatsCountUniq(lex)
|
||||
if err != nil {
|
||||
return nil, "", fmt.Errorf("cannot parse 'count_uniq' func: %w", err)
|
||||
return nil, fmt.Errorf("cannot parse 'count_uniq' func: %w", err)
|
||||
}
|
||||
sf = sus
|
||||
return sus, nil
|
||||
case lex.isKeyword("sum"):
|
||||
sss, err := parseStatsSum(lex)
|
||||
if err != nil {
|
||||
return nil, "", fmt.Errorf("cannot parse 'sum' func: %w", err)
|
||||
return nil, fmt.Errorf("cannot parse 'sum' func: %w", err)
|
||||
}
|
||||
sf = sss
|
||||
return sss, nil
|
||||
case lex.isKeyword("max"):
|
||||
sms, err := parseStatsMax(lex)
|
||||
if err != nil {
|
||||
return nil, "", fmt.Errorf("cannot parse 'max' func: %w", err)
|
||||
return nil, fmt.Errorf("cannot parse 'max' func: %w", err)
|
||||
}
|
||||
sf = sms
|
||||
return sms, nil
|
||||
case lex.isKeyword("min"):
|
||||
sms, err := parseStatsMin(lex)
|
||||
if err != nil {
|
||||
return nil, "", fmt.Errorf("cannot parse 'min' func: %w", err)
|
||||
return nil, fmt.Errorf("cannot parse 'min' func: %w", err)
|
||||
}
|
||||
sf = sms
|
||||
return sms, nil
|
||||
case lex.isKeyword("avg"):
|
||||
sas, err := parseStatsAvg(lex)
|
||||
if err != nil {
|
||||
return nil, "", fmt.Errorf("cannot parse 'avg' func: %w", err)
|
||||
return nil, fmt.Errorf("cannot parse 'avg' func: %w", err)
|
||||
}
|
||||
sf = sas
|
||||
return sas, nil
|
||||
case lex.isKeyword("uniq_values"):
|
||||
sus, err := parseStatsUniqValues(lex)
|
||||
if err != nil {
|
||||
return nil, "", fmt.Errorf("cannot parse 'uniq_values' func: %w", err)
|
||||
return nil, fmt.Errorf("cannot parse 'uniq_values' func: %w", err)
|
||||
}
|
||||
sf = sus
|
||||
return sus, nil
|
||||
case lex.isKeyword("values"):
|
||||
svs, err := parseStatsValues(lex)
|
||||
if err != nil {
|
||||
return nil, "", fmt.Errorf("cannot parse 'values' func: %w", err)
|
||||
return nil, fmt.Errorf("cannot parse 'values' func: %w", err)
|
||||
}
|
||||
sf = svs
|
||||
return svs, nil
|
||||
case lex.isKeyword("sum_len"):
|
||||
sss, err := parseStatsSumLen(lex)
|
||||
if err != nil {
|
||||
return nil, "", fmt.Errorf("cannot parse 'sum_len' func: %w", err)
|
||||
return nil, fmt.Errorf("cannot parse 'sum_len' func: %w", err)
|
||||
}
|
||||
sf = sss
|
||||
return sss, nil
|
||||
case lex.isKeyword("quantile"):
|
||||
sqs, err := parseStatsQuantile(lex)
|
||||
if err != nil {
|
||||
return nil, "", fmt.Errorf("cannot parse 'quantile' func: %w", err)
|
||||
return nil, fmt.Errorf("cannot parse 'quantile' func: %w", err)
|
||||
}
|
||||
sf = sqs
|
||||
return sqs, nil
|
||||
case lex.isKeyword("median"):
|
||||
sms, err := parseStatsMedian(lex)
|
||||
if err != nil {
|
||||
return nil, "", fmt.Errorf("cannot parse 'median' func: %w", err)
|
||||
return nil, fmt.Errorf("cannot parse 'median' func: %w", err)
|
||||
}
|
||||
sf = sms
|
||||
return sms, nil
|
||||
default:
|
||||
return nil, "", fmt.Errorf("unknown stats func %q", lex.token)
|
||||
return nil, fmt.Errorf("unknown stats func %q", lex.token)
|
||||
}
|
||||
|
||||
resultName, err := parseResultName(lex)
|
||||
if err != nil {
|
||||
return nil, "", fmt.Errorf("cannot parse result name for %s: %w", sf, err)
|
||||
}
|
||||
return sf, resultName, nil
|
||||
}
|
||||
|
||||
func parseResultName(lex *lexer) (string, error) {
|
||||
@ -619,10 +717,11 @@ func parseByStatsFields(lex *lexer) ([]*byStatsField, error) {
|
||||
lex.nextToken()
|
||||
return bfs, nil
|
||||
}
|
||||
fieldName, err := parseFieldName(lex)
|
||||
fieldName, err := getCompoundPhrase(lex, false)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cannot parse field name: %w", err)
|
||||
}
|
||||
fieldName = getCanonicalColumnName(fieldName)
|
||||
bf := &byStatsField{
|
||||
name: fieldName,
|
||||
}
|
||||
@ -796,10 +895,10 @@ func parseFieldNamesInParens(lex *lexer) ([]string, error) {
|
||||
}
|
||||
|
||||
func parseFieldName(lex *lexer) (string, error) {
|
||||
if lex.isKeyword(",", "(", ")", "[", "]", "|", ":", "") {
|
||||
return "", fmt.Errorf("unexpected token: %q", lex.token)
|
||||
fieldName, err := getCompoundToken(lex)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("cannot parse field name: %w", err)
|
||||
}
|
||||
fieldName := getCompoundPhrase(lex, false)
|
||||
fieldName = getCanonicalColumnName(fieldName)
|
||||
return fieldName, nil
|
||||
}
|
||||
|
@ -7,18 +7,7 @@ import (
|
||||
func TestPipeStatsUpdateNeededFields(t *testing.T) {
|
||||
f := func(s, neededFields, unneededFields, neededFieldsExpected, unneededFieldsExpected string) {
|
||||
t.Helper()
|
||||
|
||||
nfs := newTestFieldsSet(neededFields)
|
||||
unfs := newTestFieldsSet(unneededFields)
|
||||
|
||||
lex := newLexer(s)
|
||||
p, err := parsePipeStats(lex)
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error when parsing %s: %s", s, err)
|
||||
}
|
||||
p.updateNeededFields(nfs, unfs)
|
||||
|
||||
assertNeededFields(t, nfs, unfs, neededFieldsExpected, unneededFieldsExpected)
|
||||
expectPipeNeededFields(t, s, neededFields, unneededFields, neededFieldsExpected, unneededFieldsExpected)
|
||||
}
|
||||
|
||||
// all the needed fields
|
||||
@ -44,7 +33,7 @@ func TestPipeStatsUpdateNeededFields(t *testing.T) {
|
||||
f("stats count(f1,f2) r1, sum(f3,f4) r2", "*", "r1,r3", "f3,f4", "")
|
||||
f("stats by (b1,b2) count(f1,f2) r1", "*", "r1,r2", "b1,b2", "")
|
||||
f("stats by (b1,b2) count(f1,f2) r1", "*", "r1,r2,b1", "b1,b2", "")
|
||||
f("stats by (b1,b2) count(f1,f2) r1", "*", "r1,r2,b1,b2", "", "")
|
||||
f("stats by (b1,b2) count(f1,f2) r1", "*", "r1,r2,b1,b2", "b1,b2", "")
|
||||
f("stats by (b1,b2) count(f1,f2) r1, count(f1,f3) r2", "*", "r1,r3", "b1,b2,f1,f3", "")
|
||||
|
||||
// needed fields do not intersect with stats fields
|
||||
@ -52,8 +41,8 @@ func TestPipeStatsUpdateNeededFields(t *testing.T) {
|
||||
f("stats count(*) r1", "r2", "", "", "")
|
||||
f("stats count(f1,f2) r1", "r2", "", "", "")
|
||||
f("stats count(f1,f2) r1, sum(f3,f4) r2", "r3", "", "", "")
|
||||
f("stats by (b1,b2) count(f1,f2) r1", "r2", "", "", "")
|
||||
f("stats by (b1,b2) count(f1,f2) r1, count(f1,f3) r2", "r3", "", "", "")
|
||||
f("stats by (b1,b2) count(f1,f2) r1", "r2", "", "b1,b2", "")
|
||||
f("stats by (b1,b2) count(f1,f2) r1, count(f1,f3) r2", "r3", "", "b1,b2", "")
|
||||
|
||||
// needed fields intersect with stats fields
|
||||
f("stats count() r1", "r1,r2", "", "", "")
|
||||
|
@ -18,9 +18,12 @@ func newPipeTopkProcessor(ps *pipeSort, workersCount int, stopCh <-chan struct{}
|
||||
|
||||
shards := make([]pipeTopkProcessorShard, workersCount)
|
||||
for i := range shards {
|
||||
shard := &shards[i]
|
||||
shard.ps = ps
|
||||
shard.stateSizeBudget = stateSizeBudgetChunk
|
||||
shards[i] = pipeTopkProcessorShard{
|
||||
pipeTopkProcessorShardNopad: pipeTopkProcessorShardNopad{
|
||||
ps: ps,
|
||||
stateSizeBudget: stateSizeBudgetChunk,
|
||||
},
|
||||
}
|
||||
maxStateSize -= stateSizeBudgetChunk
|
||||
}
|
||||
|
||||
@ -72,10 +75,11 @@ type pipeTopkProcessorShardNopad struct {
|
||||
tmpRow pipeTopkRow
|
||||
|
||||
// these are aux fields for determining whether the next row must be stored in rows.
|
||||
byColumnValues [][]string
|
||||
otherColumnValues []pipeTopkOtherColumn
|
||||
byColumns []string
|
||||
otherColumns []Field
|
||||
byColumnValues [][]string
|
||||
csOther []*blockResultColumn
|
||||
byColumns []string
|
||||
byColumnsIsTime []bool
|
||||
otherColumns []Field
|
||||
|
||||
// stateSizeBudget is the remaining budget for the whole state size for the shard.
|
||||
// The per-shard budget is provided in chunks from the parent pipeTopkProcessor.
|
||||
@ -83,13 +87,10 @@ type pipeTopkProcessorShardNopad struct {
|
||||
}
|
||||
|
||||
type pipeTopkRow struct {
|
||||
byColumns []string
|
||||
otherColumns []Field
|
||||
}
|
||||
|
||||
type pipeTopkOtherColumn struct {
|
||||
name string
|
||||
values []string
|
||||
byColumns []string
|
||||
byColumnsIsTime []bool
|
||||
otherColumns []Field
|
||||
timestamp int64
|
||||
}
|
||||
|
||||
func (r *pipeTopkRow) clone() *pipeTopkRow {
|
||||
@ -98,6 +99,8 @@ func (r *pipeTopkRow) clone() *pipeTopkRow {
|
||||
byColumnsCopy[i] = strings.Clone(r.byColumns[i])
|
||||
}
|
||||
|
||||
byColumnsIsTime := append([]bool{}, r.byColumnsIsTime...)
|
||||
|
||||
otherColumnsCopy := make([]Field, len(r.otherColumns))
|
||||
for i := range otherColumnsCopy {
|
||||
src := &r.otherColumns[i]
|
||||
@ -107,8 +110,10 @@ func (r *pipeTopkRow) clone() *pipeTopkRow {
|
||||
}
|
||||
|
||||
return &pipeTopkRow{
|
||||
byColumns: byColumnsCopy,
|
||||
otherColumns: otherColumnsCopy,
|
||||
byColumns: byColumnsCopy,
|
||||
byColumnsIsTime: byColumnsIsTime,
|
||||
otherColumns: otherColumnsCopy,
|
||||
timestamp: r.timestamp,
|
||||
}
|
||||
}
|
||||
|
||||
@ -120,6 +125,8 @@ func (r *pipeTopkRow) sizeBytes() int {
|
||||
}
|
||||
n += len(r.byColumns) * int(unsafe.Sizeof(r.byColumns[0]))
|
||||
|
||||
n += len(r.byColumnsIsTime) * int(unsafe.Sizeof(r.byColumnsIsTime[0]))
|
||||
|
||||
for _, f := range r.otherColumns {
|
||||
n += len(f.Name) + len(f.Value)
|
||||
}
|
||||
@ -167,14 +174,15 @@ func (shard *pipeTopkProcessorShard) writeBlock(br *blockResult) {
|
||||
|
||||
byColumnValues := shard.byColumnValues[:0]
|
||||
for _, c := range cs {
|
||||
byColumnValues = append(byColumnValues, c.getValues(br))
|
||||
values := c.getValues(br)
|
||||
byColumnValues = append(byColumnValues, values)
|
||||
}
|
||||
shard.byColumnValues = byColumnValues
|
||||
|
||||
byColumns := shard.byColumns[:0]
|
||||
otherColumns := shard.otherColumns[:0]
|
||||
byColumnsIsTime := shard.byColumnsIsTime[:0]
|
||||
bb := bbPool.Get()
|
||||
for rowIdx := range br.timestamps {
|
||||
for rowIdx, timestamp := range br.timestamps {
|
||||
byColumns = byColumns[:0]
|
||||
bb.B = bb.B[:0]
|
||||
for i, values := range byColumnValues {
|
||||
@ -183,31 +191,33 @@ func (shard *pipeTopkProcessorShard) writeBlock(br *blockResult) {
|
||||
bb.B = append(bb.B, ',')
|
||||
}
|
||||
byColumns = append(byColumns, bytesutil.ToUnsafeString(bb.B))
|
||||
byColumnsIsTime = append(byColumnsIsTime, false)
|
||||
|
||||
otherColumns = otherColumns[:0]
|
||||
for i, values := range byColumnValues {
|
||||
otherColumns = append(otherColumns, Field{
|
||||
Name: cs[i].name,
|
||||
Value: values[rowIdx],
|
||||
})
|
||||
}
|
||||
|
||||
shard.addRow(byColumns, otherColumns)
|
||||
shard.addRow(br, byColumns, byColumnsIsTime, cs, rowIdx, timestamp)
|
||||
}
|
||||
bbPool.Put(bb)
|
||||
shard.byColumns = byColumns
|
||||
shard.otherColumns = otherColumns
|
||||
shard.byColumnsIsTime = byColumnsIsTime
|
||||
} else {
|
||||
// Sort by byFields
|
||||
|
||||
byColumnValues := shard.byColumnValues[:0]
|
||||
byColumnsIsTime := shard.byColumnsIsTime[:0]
|
||||
for _, bf := range byFields {
|
||||
c := br.getColumnByName(bf.name)
|
||||
byColumnValues = append(byColumnValues, c.getValues(br))
|
||||
|
||||
byColumnsIsTime = append(byColumnsIsTime, c.isTime)
|
||||
|
||||
var values []string
|
||||
if !c.isTime {
|
||||
values = c.getValues(br)
|
||||
}
|
||||
byColumnValues = append(byColumnValues, values)
|
||||
}
|
||||
shard.byColumnValues = byColumnValues
|
||||
shard.byColumnsIsTime = byColumnsIsTime
|
||||
|
||||
otherColumnValues := shard.otherColumnValues[:0]
|
||||
csOther := shard.csOther[:0]
|
||||
for _, c := range cs {
|
||||
isByField := false
|
||||
for _, bf := range byFields {
|
||||
@ -217,42 +227,35 @@ func (shard *pipeTopkProcessorShard) writeBlock(br *blockResult) {
|
||||
}
|
||||
}
|
||||
if !isByField {
|
||||
otherColumnValues = append(otherColumnValues, pipeTopkOtherColumn{
|
||||
name: c.name,
|
||||
values: c.getValues(br),
|
||||
})
|
||||
csOther = append(csOther, c)
|
||||
}
|
||||
}
|
||||
shard.otherColumnValues = otherColumnValues
|
||||
shard.csOther = csOther
|
||||
|
||||
// add rows to shard
|
||||
byColumns := shard.byColumns[:0]
|
||||
otherColumns := shard.otherColumns[:0]
|
||||
for rowIdx := range br.timestamps {
|
||||
for rowIdx, timestamp := range br.timestamps {
|
||||
byColumns = byColumns[:0]
|
||||
for _, values := range byColumnValues {
|
||||
byColumns = append(byColumns, values[rowIdx])
|
||||
|
||||
for i, values := range byColumnValues {
|
||||
v := ""
|
||||
if !byColumnsIsTime[i] {
|
||||
v = values[rowIdx]
|
||||
}
|
||||
byColumns = append(byColumns, v)
|
||||
}
|
||||
|
||||
otherColumns = otherColumns[:0]
|
||||
for _, ocv := range otherColumnValues {
|
||||
otherColumns = append(otherColumns, Field{
|
||||
Name: ocv.name,
|
||||
Value: ocv.values[rowIdx],
|
||||
})
|
||||
}
|
||||
|
||||
shard.addRow(byColumns, otherColumns)
|
||||
shard.addRow(br, byColumns, byColumnsIsTime, csOther, rowIdx, timestamp)
|
||||
}
|
||||
shard.byColumns = byColumns
|
||||
shard.otherColumns = otherColumns
|
||||
}
|
||||
}
|
||||
|
||||
func (shard *pipeTopkProcessorShard) addRow(byColumns []string, otherColumns []Field) {
|
||||
func (shard *pipeTopkProcessorShard) addRow(br *blockResult, byColumns []string, byColumnsIsTime []bool, csOther []*blockResultColumn, rowIdx int, timestamp int64) {
|
||||
r := &shard.tmpRow
|
||||
r.byColumns = byColumns
|
||||
r.otherColumns = otherColumns
|
||||
r.byColumnsIsTime = byColumnsIsTime
|
||||
r.timestamp = timestamp
|
||||
|
||||
rows := shard.rows
|
||||
if len(rows) > 0 && !topkLess(shard.ps, r, rows[0]) {
|
||||
@ -261,9 +264,25 @@ func (shard *pipeTopkProcessorShard) addRow(byColumns []string, otherColumns []F
|
||||
}
|
||||
|
||||
// Slow path - add r to shard.rows.
|
||||
|
||||
// Populate r.otherColumns
|
||||
otherColumns := shard.otherColumns[:0]
|
||||
for _, c := range csOther {
|
||||
v := c.getValueAtRow(br, rowIdx)
|
||||
otherColumns = append(otherColumns, Field{
|
||||
Name: c.name,
|
||||
Value: v,
|
||||
})
|
||||
}
|
||||
shard.otherColumns = otherColumns
|
||||
r.otherColumns = otherColumns
|
||||
|
||||
// Clone r, so it doesn't refer the original data.
|
||||
r = r.clone()
|
||||
shard.stateSizeBudget -= r.sizeBytes()
|
||||
if uint64(len(rows)) < shard.ps.limit {
|
||||
|
||||
// Push r to shard.rows.
|
||||
if uint64(len(rows)) < shard.ps.offset+shard.ps.limit {
|
||||
heap.Push(shard, r)
|
||||
shard.stateSizeBudget -= int(unsafe.Sizeof(r))
|
||||
} else {
|
||||
@ -443,21 +462,23 @@ func (wctx *pipeTopkWriteContext) writeNextRow(shard *pipeTopkProcessorShard) bo
|
||||
|
||||
rcs = wctx.rcs[:0]
|
||||
for _, bf := range byFields {
|
||||
rcs = append(rcs, resultColumn{
|
||||
name: bf.name,
|
||||
})
|
||||
rcs = appendResultColumnWithName(rcs, bf.name)
|
||||
}
|
||||
for _, c := range r.otherColumns {
|
||||
rcs = append(rcs, resultColumn{
|
||||
name: c.Name,
|
||||
})
|
||||
rcs = appendResultColumnWithName(rcs, c.Name)
|
||||
}
|
||||
wctx.rcs = rcs
|
||||
}
|
||||
|
||||
var tmpBuf []byte
|
||||
byColumns := r.byColumns
|
||||
byColumnsIsTime := r.byColumnsIsTime
|
||||
for i := range byFields {
|
||||
v := byColumns[i]
|
||||
if byColumnsIsTime[i] {
|
||||
tmpBuf = marshalTimestampRFC3339NanoString(tmpBuf[:0], r.timestamp)
|
||||
v = bytesutil.ToUnsafeString(tmpBuf)
|
||||
}
|
||||
rcs[i].addValue(v)
|
||||
wctx.valuesLen += len(v)
|
||||
}
|
||||
@ -490,7 +511,7 @@ func (wctx *pipeTopkWriteContext) flush() {
|
||||
wctx.ptp.ppBase.writeBlock(0, br)
|
||||
br.reset()
|
||||
for i := range rcs {
|
||||
rcs[i].resetKeepName()
|
||||
rcs[i].resetValues()
|
||||
}
|
||||
}
|
||||
|
||||
@ -529,25 +550,79 @@ func topkLess(ps *pipeSort, a, b *pipeTopkRow) bool {
|
||||
byFields := ps.byFields
|
||||
|
||||
csA := a.byColumns
|
||||
csB := b.byColumns
|
||||
isTimeA := a.byColumnsIsTime
|
||||
|
||||
for k := range csA {
|
||||
csB := b.byColumns
|
||||
isTimeB := b.byColumnsIsTime
|
||||
|
||||
for i := range csA {
|
||||
isDesc := ps.isDesc
|
||||
if len(byFields) > 0 && byFields[k].isDesc {
|
||||
if len(byFields) > 0 && byFields[i].isDesc {
|
||||
isDesc = !isDesc
|
||||
}
|
||||
|
||||
vA := csA[k]
|
||||
vB := csB[k]
|
||||
if isTimeA[i] && isTimeB[i] {
|
||||
// Fast path - compare timestamps
|
||||
if a.timestamp == b.timestamp {
|
||||
continue
|
||||
}
|
||||
if isDesc {
|
||||
return b.timestamp < a.timestamp
|
||||
}
|
||||
return a.timestamp < b.timestamp
|
||||
}
|
||||
|
||||
vA := csA[i]
|
||||
vB := csB[i]
|
||||
|
||||
var bb *bytesutil.ByteBuffer
|
||||
|
||||
if isTimeA[i] || isTimeB[i] {
|
||||
bb = bbPool.Get()
|
||||
}
|
||||
if isTimeA[i] {
|
||||
bb.B = marshalTimestampRFC3339NanoString(bb.B[:0], a.timestamp)
|
||||
vA = bytesutil.ToUnsafeString(bb.B)
|
||||
} else if isTimeB[i] {
|
||||
bb.B = marshalTimestampRFC3339NanoString(bb.B[:0], a.timestamp)
|
||||
vB = bytesutil.ToUnsafeString(bb.B)
|
||||
}
|
||||
|
||||
if vA == vB {
|
||||
if bb != nil {
|
||||
bbPool.Put(bb)
|
||||
}
|
||||
continue
|
||||
}
|
||||
|
||||
if isDesc {
|
||||
return stringsutil.LessNatural(vB, vA)
|
||||
vA, vB = vB, vA
|
||||
}
|
||||
return stringsutil.LessNatural(vA, vB)
|
||||
ok := lessString(vA, vB)
|
||||
if bb != nil {
|
||||
bbPool.Put(bb)
|
||||
}
|
||||
return ok
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func lessString(a, b string) bool {
|
||||
if a == b {
|
||||
return false
|
||||
}
|
||||
|
||||
nA, okA := tryParseUint64(a)
|
||||
nB, okB := tryParseUint64(b)
|
||||
if okA && okB {
|
||||
return nA < nB
|
||||
}
|
||||
|
||||
fA, okA := tryParseFloat64(a)
|
||||
fB, okB := tryParseFloat64(b)
|
||||
if okA && okB {
|
||||
return fA < fB
|
||||
}
|
||||
|
||||
return stringsutil.LessNatural(a, b)
|
||||
}
|
||||
|
@ -3,6 +3,7 @@ package logstorage
|
||||
import (
|
||||
"fmt"
|
||||
"slices"
|
||||
"strings"
|
||||
"sync/atomic"
|
||||
"unsafe"
|
||||
|
||||
@ -40,7 +41,7 @@ func (pu *pipeUniq) updateNeededFields(neededFields, unneededFields fieldsSet) {
|
||||
if len(pu.byFields) == 0 {
|
||||
neededFields.add("*")
|
||||
} else {
|
||||
neededFields.addAll(pu.byFields)
|
||||
neededFields.addFields(pu.byFields)
|
||||
}
|
||||
}
|
||||
|
||||
@ -49,10 +50,13 @@ func (pu *pipeUniq) newPipeProcessor(workersCount int, stopCh <-chan struct{}, c
|
||||
|
||||
shards := make([]pipeUniqProcessorShard, workersCount)
|
||||
for i := range shards {
|
||||
shard := &shards[i]
|
||||
shard.pu = pu
|
||||
shard.m = make(map[string]struct{})
|
||||
shard.stateSizeBudget = stateSizeBudgetChunk
|
||||
shards[i] = pipeUniqProcessorShard{
|
||||
pipeUniqProcessorShardNopad: pipeUniqProcessorShardNopad{
|
||||
pu: pu,
|
||||
m: make(map[string]struct{}),
|
||||
stateSizeBudget: stateSizeBudgetChunk,
|
||||
},
|
||||
}
|
||||
maxStateSize -= stateSizeBudgetChunk
|
||||
}
|
||||
|
||||
@ -116,7 +120,6 @@ func (shard *pipeUniqProcessorShard) writeBlock(br *blockResult) bool {
|
||||
return false
|
||||
}
|
||||
|
||||
m := shard.m
|
||||
byFields := shard.pu.byFields
|
||||
if len(byFields) == 0 {
|
||||
// Take into account all the columns in br.
|
||||
@ -129,20 +132,41 @@ func (shard *pipeUniqProcessorShard) writeBlock(br *blockResult) bool {
|
||||
keyBuf = encoding.MarshalBytes(keyBuf, bytesutil.ToUnsafeBytes(c.name))
|
||||
keyBuf = encoding.MarshalBytes(keyBuf, bytesutil.ToUnsafeBytes(v))
|
||||
}
|
||||
if _, ok := m[string(keyBuf)]; !ok {
|
||||
m[string(keyBuf)] = struct{}{}
|
||||
shard.stateSizeBudget -= len(keyBuf) + int(unsafe.Sizeof(""))
|
||||
}
|
||||
shard.updateState(bytesutil.ToUnsafeString(keyBuf))
|
||||
}
|
||||
shard.keyBuf = keyBuf
|
||||
return true
|
||||
}
|
||||
if len(byFields) == 1 {
|
||||
// Fast path for a single field.
|
||||
c := br.getColumnByName(byFields[0])
|
||||
if c.isConst {
|
||||
v := c.valuesEncoded[0]
|
||||
shard.updateState(v)
|
||||
return true
|
||||
}
|
||||
if c.valueType == valueTypeDict {
|
||||
for _, v := range c.dictValues {
|
||||
shard.updateState(v)
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
values := c.getValues(br)
|
||||
for i, v := range values {
|
||||
if i == 0 || values[i-1] != values[i] {
|
||||
shard.updateState(v)
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
// Take into account only the selected columns.
|
||||
columnValues := shard.columnValues[:0]
|
||||
for _, f := range byFields {
|
||||
c := br.getColumnByName(f)
|
||||
columnValues = append(columnValues, c.getValues(br))
|
||||
values := c.getValues(br)
|
||||
columnValues = append(columnValues, values)
|
||||
}
|
||||
shard.columnValues = columnValues
|
||||
|
||||
@ -163,16 +187,21 @@ func (shard *pipeUniqProcessorShard) writeBlock(br *blockResult) bool {
|
||||
for _, values := range columnValues {
|
||||
keyBuf = encoding.MarshalBytes(keyBuf, bytesutil.ToUnsafeBytes(values[i]))
|
||||
}
|
||||
if _, ok := m[string(keyBuf)]; !ok {
|
||||
m[string(keyBuf)] = struct{}{}
|
||||
shard.stateSizeBudget -= len(keyBuf) + int(unsafe.Sizeof(""))
|
||||
}
|
||||
shard.updateState(bytesutil.ToUnsafeString(keyBuf))
|
||||
}
|
||||
shard.keyBuf = keyBuf
|
||||
|
||||
return true
|
||||
}
|
||||
|
||||
func (shard *pipeUniqProcessorShard) updateState(v string) {
|
||||
if _, ok := shard.m[v]; !ok {
|
||||
vCopy := strings.Clone(v)
|
||||
shard.m[vCopy] = struct{}{}
|
||||
shard.stateSizeBudget -= len(vCopy) + int(unsafe.Sizeof(vCopy))
|
||||
}
|
||||
}
|
||||
|
||||
func (pup *pipeUniqProcessor) writeBlock(workerID uint, br *blockResult) {
|
||||
if len(br.timestamps) == 0 {
|
||||
return
|
||||
@ -253,6 +282,19 @@ func (pup *pipeUniqProcessor) flush() error {
|
||||
}
|
||||
wctx.writeRow(rowFields)
|
||||
}
|
||||
} else if len(byFields) == 1 {
|
||||
fieldName := byFields[0]
|
||||
for k := range m {
|
||||
if needStop(pup.stopCh) {
|
||||
return nil
|
||||
}
|
||||
|
||||
rowFields = append(rowFields[:0], Field{
|
||||
Name: fieldName,
|
||||
Value: k,
|
||||
})
|
||||
wctx.writeRow(rowFields)
|
||||
}
|
||||
} else {
|
||||
for k := range m {
|
||||
if needStop(pup.stopCh) {
|
||||
@ -317,9 +359,7 @@ func (wctx *pipeUniqWriteContext) writeRow(rowFields []Field) {
|
||||
|
||||
rcs = wctx.rcs[:0]
|
||||
for _, f := range rowFields {
|
||||
rcs = append(rcs, resultColumn{
|
||||
name: f.Name,
|
||||
})
|
||||
rcs = appendResultColumnWithName(rcs, f.Name)
|
||||
}
|
||||
wctx.rcs = rcs
|
||||
}
|
||||
@ -349,7 +389,7 @@ func (wctx *pipeUniqWriteContext) flush() {
|
||||
wctx.pup.ppBase.writeBlock(0, br)
|
||||
br.reset()
|
||||
for i := range rcs {
|
||||
rcs[i].resetKeepName()
|
||||
rcs[i].resetValues()
|
||||
}
|
||||
}
|
||||
|
||||
@ -360,8 +400,10 @@ func parsePipeUniq(lex *lexer) (*pipeUniq, error) {
|
||||
lex.nextToken()
|
||||
|
||||
var pu pipeUniq
|
||||
if lex.isKeyword("by") {
|
||||
lex.nextToken()
|
||||
if lex.isKeyword("by", "(") {
|
||||
if lex.isKeyword("by") {
|
||||
lex.nextToken()
|
||||
}
|
||||
bfs, err := parseFieldNamesInParens(lex)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cannot parse 'by' clause: %w", err)
|
||||
|
@ -7,18 +7,7 @@ import (
|
||||
func TestPipeUniqUpdateNeededFields(t *testing.T) {
|
||||
f := func(s, neededFields, unneededFields, neededFieldsExpected, unneededFieldsExpected string) {
|
||||
t.Helper()
|
||||
|
||||
nfs := newTestFieldsSet(neededFields)
|
||||
unfs := newTestFieldsSet(unneededFields)
|
||||
|
||||
lex := newLexer(s)
|
||||
p, err := parsePipeUniq(lex)
|
||||
if err != nil {
|
||||
t.Fatalf("cannot parse %s: %s", s, err)
|
||||
}
|
||||
p.updateNeededFields(nfs, unfs)
|
||||
|
||||
assertNeededFields(t, nfs, unfs, neededFieldsExpected, unneededFieldsExpected)
|
||||
expectPipeNeededFields(t, s, neededFields, unneededFields, neededFieldsExpected, unneededFieldsExpected)
|
||||
}
|
||||
|
||||
// all the needed fields
|
||||
|
147
lib/logstorage/pipe_unpack_json.go
Normal file
147
lib/logstorage/pipe_unpack_json.go
Normal file
@ -0,0 +1,147 @@
|
||||
package logstorage
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"unsafe"
|
||||
)
|
||||
|
||||
// pipeUnpackJSON processes '| unpack_json ...' pipe.
|
||||
//
|
||||
// See https://docs.victoriametrics.com/victorialogs/logsql/#unpack_json-pipe
|
||||
type pipeUnpackJSON struct {
|
||||
fromField string
|
||||
|
||||
resultPrefix string
|
||||
}
|
||||
|
||||
func (pu *pipeUnpackJSON) String() string {
|
||||
s := "unpack_json"
|
||||
if !isMsgFieldName(pu.fromField) {
|
||||
s += " from " + quoteTokenIfNeeded(pu.fromField)
|
||||
}
|
||||
if pu.resultPrefix != "" {
|
||||
s += " result_prefix " + quoteTokenIfNeeded(pu.resultPrefix)
|
||||
}
|
||||
return s
|
||||
}
|
||||
|
||||
func (pu *pipeUnpackJSON) updateNeededFields(neededFields, unneededFields fieldsSet) {
|
||||
if neededFields.contains("*") {
|
||||
unneededFields.remove(pu.fromField)
|
||||
} else {
|
||||
neededFields.add(pu.fromField)
|
||||
}
|
||||
}
|
||||
|
||||
func (pu *pipeUnpackJSON) newPipeProcessor(workersCount int, _ <-chan struct{}, _ func(), ppBase pipeProcessor) pipeProcessor {
|
||||
shards := make([]pipeUnpackJSONProcessorShard, workersCount)
|
||||
|
||||
pup := &pipeUnpackJSONProcessor{
|
||||
pu: pu,
|
||||
ppBase: ppBase,
|
||||
|
||||
shards: shards,
|
||||
}
|
||||
return pup
|
||||
}
|
||||
|
||||
type pipeUnpackJSONProcessor struct {
|
||||
pu *pipeUnpackJSON
|
||||
ppBase pipeProcessor
|
||||
|
||||
shards []pipeUnpackJSONProcessorShard
|
||||
}
|
||||
|
||||
type pipeUnpackJSONProcessorShard struct {
|
||||
pipeUnpackJSONProcessorShardNopad
|
||||
|
||||
// The padding prevents false sharing on widespread platforms with 128 mod (cache line size) = 0 .
|
||||
_ [128 - unsafe.Sizeof(pipeUnpackJSONProcessorShardNopad{})%128]byte
|
||||
}
|
||||
|
||||
type pipeUnpackJSONProcessorShardNopad struct {
|
||||
p JSONParser
|
||||
|
||||
wctx pipeUnpackWriteContext
|
||||
}
|
||||
|
||||
func (shard *pipeUnpackJSONProcessorShard) parseJSON(v, resultPrefix string) []Field {
|
||||
if len(v) == 0 || v[0] != '{' {
|
||||
// This isn't a JSON object
|
||||
return nil
|
||||
}
|
||||
if err := shard.p.ParseLogMessageNoResetBuf(v, resultPrefix); err != nil {
|
||||
// Cannot parse v
|
||||
return nil
|
||||
}
|
||||
return shard.p.Fields
|
||||
}
|
||||
|
||||
func (pup *pipeUnpackJSONProcessor) writeBlock(workerID uint, br *blockResult) {
|
||||
if len(br.timestamps) == 0 {
|
||||
return
|
||||
}
|
||||
|
||||
resultPrefix := pup.pu.resultPrefix
|
||||
shard := &pup.shards[workerID]
|
||||
wctx := &shard.wctx
|
||||
wctx.init(br, pup.ppBase)
|
||||
|
||||
c := br.getColumnByName(pup.pu.fromField)
|
||||
if c.isConst {
|
||||
v := c.valuesEncoded[0]
|
||||
extraFields := shard.parseJSON(v, resultPrefix)
|
||||
for rowIdx := range br.timestamps {
|
||||
wctx.writeRow(rowIdx, extraFields)
|
||||
}
|
||||
} else {
|
||||
values := c.getValues(br)
|
||||
var extraFields []Field
|
||||
for i, v := range values {
|
||||
if i == 0 || values[i-1] != v {
|
||||
extraFields = shard.parseJSON(v, resultPrefix)
|
||||
}
|
||||
wctx.writeRow(i, extraFields)
|
||||
}
|
||||
}
|
||||
|
||||
wctx.flush()
|
||||
shard.p.reset()
|
||||
}
|
||||
|
||||
func (pup *pipeUnpackJSONProcessor) flush() error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func parsePipeUnpackJSON(lex *lexer) (*pipeUnpackJSON, error) {
|
||||
if !lex.isKeyword("unpack_json") {
|
||||
return nil, fmt.Errorf("unexpected token: %q; want %q", lex.token, "unpack_json")
|
||||
}
|
||||
lex.nextToken()
|
||||
|
||||
fromField := "_msg"
|
||||
if lex.isKeyword("from") {
|
||||
lex.nextToken()
|
||||
f, err := parseFieldName(lex)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cannot parse 'from' field name: %w", err)
|
||||
}
|
||||
fromField = f
|
||||
}
|
||||
|
||||
resultPrefix := ""
|
||||
if lex.isKeyword("result_prefix") {
|
||||
lex.nextToken()
|
||||
p, err := getCompoundToken(lex)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cannot parse 'result_prefix': %w", err)
|
||||
}
|
||||
resultPrefix = p
|
||||
}
|
||||
|
||||
pu := &pipeUnpackJSON{
|
||||
fromField: fromField,
|
||||
resultPrefix: resultPrefix,
|
||||
}
|
||||
return pu, nil
|
||||
}
|
376
lib/logstorage/pipe_unpack_json_test.go
Normal file
376
lib/logstorage/pipe_unpack_json_test.go
Normal file
@ -0,0 +1,376 @@
|
||||
package logstorage
|
||||
|
||||
import (
|
||||
"math/rand"
|
||||
"slices"
|
||||
"strings"
|
||||
"sync"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestPipeUnpackJSON(t *testing.T) {
|
||||
f := func(pipeStr string, rows, rowsExpected [][]Field) {
|
||||
t.Helper()
|
||||
expectPipeResults(t, pipeStr, rows, rowsExpected)
|
||||
}
|
||||
|
||||
// single row, unpack from _msg
|
||||
f("unpack_json", [][]Field{
|
||||
{
|
||||
{"_msg", `{"foo":"bar"}`},
|
||||
},
|
||||
}, [][]Field{
|
||||
{
|
||||
{"_msg", `{"foo":"bar"}`},
|
||||
{"foo", "bar"},
|
||||
},
|
||||
})
|
||||
|
||||
// single row, unpack from _msg into _msg
|
||||
f("unpack_json", [][]Field{
|
||||
{
|
||||
{"_msg", `{"_msg":"bar"}`},
|
||||
},
|
||||
}, [][]Field{
|
||||
{
|
||||
{"_msg", "bar"},
|
||||
},
|
||||
})
|
||||
|
||||
// single row, unpack from missing field
|
||||
f("unpack_json from x", [][]Field{
|
||||
{
|
||||
{"_msg", `{"foo":"bar"}`},
|
||||
},
|
||||
}, [][]Field{
|
||||
{
|
||||
{"_msg", `{"foo":"bar"}`},
|
||||
},
|
||||
})
|
||||
|
||||
// single row, unpack from non-json field
|
||||
f("unpack_json from x", [][]Field{
|
||||
{
|
||||
{"x", `foobar`},
|
||||
},
|
||||
}, [][]Field{
|
||||
{
|
||||
{"x", `foobar`},
|
||||
},
|
||||
})
|
||||
|
||||
// single row, unpack from non-dict json
|
||||
f("unpack_json from x", [][]Field{
|
||||
{
|
||||
{"x", `["foobar"]`},
|
||||
},
|
||||
}, [][]Field{
|
||||
{
|
||||
{"x", `["foobar"]`},
|
||||
},
|
||||
})
|
||||
f("unpack_json from x", [][]Field{
|
||||
{
|
||||
{"x", `1234`},
|
||||
},
|
||||
}, [][]Field{
|
||||
{
|
||||
{"x", `1234`},
|
||||
},
|
||||
})
|
||||
f("unpack_json from x", [][]Field{
|
||||
{
|
||||
{"x", `"xxx"`},
|
||||
},
|
||||
}, [][]Field{
|
||||
{
|
||||
{"x", `"xxx"`},
|
||||
},
|
||||
})
|
||||
|
||||
// single row, unpack from named field
|
||||
f("unpack_json from x", [][]Field{
|
||||
{
|
||||
{"x", `{"foo":"bar","baz":"xyz","a":123,"b":["foo","bar"],"x":NaN,"y":{"z":{"a":"b"}}}`},
|
||||
},
|
||||
}, [][]Field{
|
||||
{
|
||||
{"x", `NaN`},
|
||||
{"foo", "bar"},
|
||||
{"baz", "xyz"},
|
||||
{"a", "123"},
|
||||
{"b", `["foo","bar"]`},
|
||||
{"y.z.a", "b"},
|
||||
},
|
||||
})
|
||||
|
||||
// multiple rows with distinct number of fields
|
||||
f("unpack_json from x", [][]Field{
|
||||
{
|
||||
{"x", `{"foo":"bar","baz":"xyz"}`},
|
||||
{"y", `abc`},
|
||||
},
|
||||
{
|
||||
{"y", `abc`},
|
||||
},
|
||||
{
|
||||
{"z", `foobar`},
|
||||
{"x", `{"z":["bar",123]}`},
|
||||
},
|
||||
}, [][]Field{
|
||||
{
|
||||
{"x", `{"foo":"bar","baz":"xyz"}`},
|
||||
{"y", "abc"},
|
||||
{"foo", "bar"},
|
||||
{"baz", "xyz"},
|
||||
},
|
||||
{
|
||||
{"y", `abc`},
|
||||
},
|
||||
{
|
||||
{"z", `["bar",123]`},
|
||||
{"x", `{"z":["bar",123]}`},
|
||||
},
|
||||
})
|
||||
|
||||
// multiple rows with distinct number of fields with result_prefix
|
||||
f("unpack_json from x result_prefix qwe_", [][]Field{
|
||||
{
|
||||
{"x", `{"foo":"bar","baz":"xyz"}`},
|
||||
{"y", `abc`},
|
||||
},
|
||||
{
|
||||
{"y", `abc`},
|
||||
},
|
||||
{
|
||||
{"z", `foobar`},
|
||||
{"x", `{"z":["bar",123]}`},
|
||||
},
|
||||
}, [][]Field{
|
||||
{
|
||||
{"x", `{"foo":"bar","baz":"xyz"}`},
|
||||
{"y", "abc"},
|
||||
{"qwe_foo", "bar"},
|
||||
{"qwe_baz", "xyz"},
|
||||
},
|
||||
{
|
||||
{"y", `abc`},
|
||||
},
|
||||
{
|
||||
{"z", `foobar`},
|
||||
{"x", `{"z":["bar",123]}`},
|
||||
{"qwe_z", `["bar",123]`},
|
||||
},
|
||||
})
|
||||
}
|
||||
|
||||
func expectPipeResults(t *testing.T, pipeStr string, rows, rowsExpected [][]Field) {
|
||||
t.Helper()
|
||||
|
||||
lex := newLexer(pipeStr)
|
||||
p, err := parsePipe(lex)
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error when parsing %q: %s", pipeStr, err)
|
||||
}
|
||||
|
||||
workersCount := 5
|
||||
stopCh := make(chan struct{})
|
||||
cancel := func() {}
|
||||
ppTest := newTestPipeProcessor()
|
||||
pp := p.newPipeProcessor(workersCount, stopCh, cancel, ppTest)
|
||||
|
||||
brw := newTestBlockResultWriter(workersCount, pp)
|
||||
for _, row := range rows {
|
||||
brw.writeRow(row)
|
||||
}
|
||||
brw.flush()
|
||||
|
||||
ppTest.expectRows(t, rowsExpected)
|
||||
}
|
||||
|
||||
func newTestBlockResultWriter(workersCount int, ppBase pipeProcessor) *testBlockResultWriter {
|
||||
return &testBlockResultWriter{
|
||||
workersCount: workersCount,
|
||||
ppBase: ppBase,
|
||||
}
|
||||
}
|
||||
|
||||
type testBlockResultWriter struct {
|
||||
workersCount int
|
||||
ppBase pipeProcessor
|
||||
rcs []resultColumn
|
||||
br blockResult
|
||||
}
|
||||
|
||||
func (brw *testBlockResultWriter) writeRow(row []Field) {
|
||||
if !brw.areSameFields(row) {
|
||||
brw.flush()
|
||||
|
||||
brw.rcs = brw.rcs[:0]
|
||||
for _, field := range row {
|
||||
brw.rcs = appendResultColumnWithName(brw.rcs, field.Name)
|
||||
}
|
||||
}
|
||||
|
||||
for i, field := range row {
|
||||
brw.rcs[i].addValue(field.Value)
|
||||
}
|
||||
if rand.Intn(5) == 0 {
|
||||
brw.flush()
|
||||
}
|
||||
}
|
||||
|
||||
func (brw *testBlockResultWriter) areSameFields(row []Field) bool {
|
||||
if len(brw.rcs) != len(row) {
|
||||
return false
|
||||
}
|
||||
for i, rc := range brw.rcs {
|
||||
if rc.name != row[i].Name {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
func (brw *testBlockResultWriter) flush() {
|
||||
brw.br.setResultColumns(brw.rcs)
|
||||
workerID := rand.Intn(brw.workersCount)
|
||||
brw.ppBase.writeBlock(uint(workerID), &brw.br)
|
||||
brw.br.reset()
|
||||
for i := range brw.rcs {
|
||||
brw.rcs[i].resetValues()
|
||||
}
|
||||
}
|
||||
|
||||
func newTestPipeProcessor() *testPipeProcessor {
|
||||
return &testPipeProcessor{}
|
||||
}
|
||||
|
||||
type testPipeProcessor struct {
|
||||
resultRowsLock sync.Mutex
|
||||
resultRows [][]Field
|
||||
}
|
||||
|
||||
func (pp *testPipeProcessor) writeBlock(_ uint, br *blockResult) {
|
||||
cs := br.getColumns()
|
||||
var columnValues [][]string
|
||||
for _, c := range cs {
|
||||
values := c.getValues(br)
|
||||
columnValues = append(columnValues, values)
|
||||
}
|
||||
|
||||
for i := range br.timestamps {
|
||||
row := make([]Field, len(columnValues))
|
||||
for j, values := range columnValues {
|
||||
r := &row[j]
|
||||
r.Name = strings.Clone(cs[j].name)
|
||||
r.Value = strings.Clone(values[i])
|
||||
}
|
||||
pp.resultRowsLock.Lock()
|
||||
pp.resultRows = append(pp.resultRows, row)
|
||||
pp.resultRowsLock.Unlock()
|
||||
}
|
||||
}
|
||||
|
||||
func (pp *testPipeProcessor) flush() error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (pp *testPipeProcessor) expectRows(t *testing.T, expectedRows [][]Field) {
|
||||
t.Helper()
|
||||
|
||||
if len(pp.resultRows) != len(expectedRows) {
|
||||
t.Fatalf("unexpected number of rows; got %d; want %d\nrows got\n%s\nrows expected\n%s",
|
||||
len(pp.resultRows), len(expectedRows), rowsToString(pp.resultRows), rowsToString(expectedRows))
|
||||
}
|
||||
|
||||
sortTestRows(pp.resultRows)
|
||||
sortTestRows(expectedRows)
|
||||
|
||||
for i, resultRow := range pp.resultRows {
|
||||
expectedRow := expectedRows[i]
|
||||
if len(resultRow) != len(expectedRow) {
|
||||
t.Fatalf("unexpected number of fields at row #%d; got %d; want %d\nrow got\n%s\nrow expected\n%s",
|
||||
i, len(resultRow), len(expectedRow), rowToString(resultRow), rowToString(expectedRow))
|
||||
}
|
||||
for j, resultField := range resultRow {
|
||||
expectedField := expectedRow[j]
|
||||
if resultField.Name != expectedField.Name {
|
||||
t.Fatalf("unexpected field name at row #%d; got %q; want %q\nrow got\n%s\nrow expected\n%s",
|
||||
i, resultField.Name, expectedField.Name, rowToString(resultRow), rowToString(expectedRow))
|
||||
}
|
||||
if resultField.Value != expectedField.Value {
|
||||
t.Fatalf("unexpected value for field %q at row #%d; got %q; want %q\nrow got\n%s\nrow expected\n%s",
|
||||
resultField.Name, i, resultField.Value, expectedField.Value, rowToString(resultRow), rowToString(expectedRow))
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func sortTestRows(rows [][]Field) {
|
||||
slices.SortFunc(rows, func(a, b []Field) int {
|
||||
reverse := -1
|
||||
if len(a) > len(b) {
|
||||
reverse = 1
|
||||
a, b = b, a
|
||||
}
|
||||
for i, fA := range a {
|
||||
fB := b[i]
|
||||
if fA.Name == fB.Name {
|
||||
if fA.Value == fB.Value {
|
||||
continue
|
||||
}
|
||||
if fA.Value < fB.Value {
|
||||
return reverse
|
||||
}
|
||||
return -reverse
|
||||
}
|
||||
if fA.Name < fB.Name {
|
||||
return reverse
|
||||
}
|
||||
return -reverse
|
||||
}
|
||||
if len(a) == len(b) {
|
||||
return 0
|
||||
}
|
||||
return reverse
|
||||
})
|
||||
}
|
||||
|
||||
func rowsToString(rows [][]Field) string {
|
||||
a := make([]string, len(rows))
|
||||
for i, row := range rows {
|
||||
a[i] = rowToString(row)
|
||||
}
|
||||
return strings.Join(a, "\n")
|
||||
}
|
||||
|
||||
func rowToString(row []Field) string {
|
||||
a := make([]string, len(row))
|
||||
for i, f := range row {
|
||||
a[i] = f.String()
|
||||
}
|
||||
return "{" + strings.Join(a, ",") + "}"
|
||||
}
|
||||
|
||||
func TestPipeUnpackJSONUpdateNeededFields(t *testing.T) {
|
||||
f := func(s string, neededFields, unneededFields, neededFieldsExpected, unneededFieldsExpected string) {
|
||||
t.Helper()
|
||||
expectPipeNeededFields(t, s, neededFields, unneededFields, neededFieldsExpected, unneededFieldsExpected)
|
||||
}
|
||||
|
||||
// all the needed fields
|
||||
f("unpack_json from x", "*", "", "*", "")
|
||||
|
||||
// all the needed fields, unneeded fields do not intersect with src
|
||||
f("unpack_json from x", "*", "f1,f2", "*", "f1,f2")
|
||||
|
||||
// all the needed fields, unneeded fields intersect with src
|
||||
f("unpack_json from x", "*", "f2,x", "*", "f2")
|
||||
|
||||
// needed fields do not intersect with src
|
||||
f("unpack_json from x", "f1,f2", "", "f1,f2,x", "")
|
||||
|
||||
// needed fields intersect with src
|
||||
f("unpack_json from x", "f2,x", "", "f2,x", "")
|
||||
}
|
289
lib/logstorage/pipe_unpack_logfmt.go
Normal file
289
lib/logstorage/pipe_unpack_logfmt.go
Normal file
@ -0,0 +1,289 @@
|
||||
package logstorage
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strings"
|
||||
"unsafe"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
|
||||
)
|
||||
|
||||
// pipeUnpackLogfmt processes '| unpack_logfmt ...' pipe.
|
||||
//
|
||||
// See https://docs.victoriametrics.com/victorialogs/logsql/#unpack_logfmt-pipe
|
||||
type pipeUnpackLogfmt struct {
|
||||
fromField string
|
||||
|
||||
resultPrefix string
|
||||
}
|
||||
|
||||
func (pu *pipeUnpackLogfmt) String() string {
|
||||
s := "unpack_logfmt"
|
||||
if !isMsgFieldName(pu.fromField) {
|
||||
s += " from " + quoteTokenIfNeeded(pu.fromField)
|
||||
}
|
||||
if pu.resultPrefix != "" {
|
||||
s += " result_prefix " + quoteTokenIfNeeded(pu.resultPrefix)
|
||||
}
|
||||
return s
|
||||
}
|
||||
|
||||
func (pu *pipeUnpackLogfmt) updateNeededFields(neededFields, unneededFields fieldsSet) {
|
||||
if neededFields.contains("*") {
|
||||
unneededFields.remove(pu.fromField)
|
||||
} else {
|
||||
neededFields.add(pu.fromField)
|
||||
}
|
||||
}
|
||||
|
||||
func (pu *pipeUnpackLogfmt) newPipeProcessor(workersCount int, _ <-chan struct{}, _ func(), ppBase pipeProcessor) pipeProcessor {
|
||||
shards := make([]pipeUnpackLogfmtProcessorShard, workersCount)
|
||||
|
||||
pup := &pipeUnpackLogfmtProcessor{
|
||||
pu: pu,
|
||||
ppBase: ppBase,
|
||||
|
||||
shards: shards,
|
||||
}
|
||||
return pup
|
||||
}
|
||||
|
||||
type pipeUnpackLogfmtProcessor struct {
|
||||
pu *pipeUnpackLogfmt
|
||||
ppBase pipeProcessor
|
||||
|
||||
shards []pipeUnpackLogfmtProcessorShard
|
||||
}
|
||||
|
||||
type pipeUnpackLogfmtProcessorShard struct {
|
||||
pipeUnpackLogfmtProcessorShardNopad
|
||||
|
||||
// The padding prevents false sharing on widespread platforms with 128 mod (cache line size) = 0 .
|
||||
_ [128 - unsafe.Sizeof(pipeUnpackLogfmtProcessorShardNopad{})%128]byte
|
||||
}
|
||||
|
||||
type pipeUnpackLogfmtProcessorShardNopad struct {
|
||||
p logfmtParser
|
||||
|
||||
wctx pipeUnpackWriteContext
|
||||
}
|
||||
|
||||
func (pup *pipeUnpackLogfmtProcessor) writeBlock(workerID uint, br *blockResult) {
|
||||
if len(br.timestamps) == 0 {
|
||||
return
|
||||
}
|
||||
|
||||
resultPrefix := pup.pu.resultPrefix
|
||||
shard := &pup.shards[workerID]
|
||||
wctx := &shard.wctx
|
||||
wctx.init(br, pup.ppBase)
|
||||
|
||||
c := br.getColumnByName(pup.pu.fromField)
|
||||
if c.isConst {
|
||||
v := c.valuesEncoded[0]
|
||||
extraFields := shard.p.parse(v, resultPrefix)
|
||||
for rowIdx := range br.timestamps {
|
||||
wctx.writeRow(rowIdx, extraFields)
|
||||
}
|
||||
} else {
|
||||
values := c.getValues(br)
|
||||
var extraFields []Field
|
||||
for i, v := range values {
|
||||
if i == 0 || values[i-1] != v {
|
||||
extraFields = shard.p.parse(v, resultPrefix)
|
||||
}
|
||||
wctx.writeRow(i, extraFields)
|
||||
}
|
||||
}
|
||||
|
||||
wctx.flush()
|
||||
shard.p.reset()
|
||||
}
|
||||
|
||||
func (pup *pipeUnpackLogfmtProcessor) flush() error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func parsePipeUnpackLogfmt(lex *lexer) (*pipeUnpackLogfmt, error) {
|
||||
if !lex.isKeyword("unpack_logfmt") {
|
||||
return nil, fmt.Errorf("unexpected token: %q; want %q", lex.token, "unpack_logfmt")
|
||||
}
|
||||
lex.nextToken()
|
||||
|
||||
fromField := "_msg"
|
||||
if lex.isKeyword("from") {
|
||||
lex.nextToken()
|
||||
f, err := parseFieldName(lex)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cannot parse 'from' field name: %w", err)
|
||||
}
|
||||
fromField = f
|
||||
}
|
||||
|
||||
resultPrefix := ""
|
||||
if lex.isKeyword("result_prefix") {
|
||||
lex.nextToken()
|
||||
p, err := getCompoundToken(lex)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cannot parse 'result_prefix': %w", err)
|
||||
}
|
||||
resultPrefix = p
|
||||
}
|
||||
|
||||
pu := &pipeUnpackLogfmt{
|
||||
fromField: fromField,
|
||||
resultPrefix: resultPrefix,
|
||||
}
|
||||
return pu, nil
|
||||
}
|
||||
|
||||
type pipeUnpackWriteContext struct {
|
||||
brSrc *blockResult
|
||||
csSrc []*blockResultColumn
|
||||
ppBase pipeProcessor
|
||||
|
||||
rcs []resultColumn
|
||||
br blockResult
|
||||
|
||||
valuesLen int
|
||||
}
|
||||
|
||||
func (wctx *pipeUnpackWriteContext) init(brSrc *blockResult, ppBase pipeProcessor) {
|
||||
wctx.brSrc = brSrc
|
||||
wctx.csSrc = brSrc.getColumns()
|
||||
wctx.ppBase = ppBase
|
||||
}
|
||||
|
||||
func (wctx *pipeUnpackWriteContext) writeRow(rowIdx int, extraFields []Field) {
|
||||
csSrc := wctx.csSrc
|
||||
rcs := wctx.rcs
|
||||
|
||||
areEqualColumns := len(rcs) == len(csSrc)+len(extraFields)
|
||||
if areEqualColumns {
|
||||
for i, f := range extraFields {
|
||||
if rcs[len(csSrc)+i].name != f.Name {
|
||||
areEqualColumns = false
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
if !areEqualColumns {
|
||||
// send the current block to bbBase and construct a block with new set of columns
|
||||
wctx.flush()
|
||||
|
||||
rcs = wctx.rcs[:0]
|
||||
for _, c := range csSrc {
|
||||
rcs = appendResultColumnWithName(rcs, c.name)
|
||||
}
|
||||
for _, f := range extraFields {
|
||||
rcs = appendResultColumnWithName(rcs, f.Name)
|
||||
}
|
||||
wctx.rcs = rcs
|
||||
}
|
||||
|
||||
brSrc := wctx.brSrc
|
||||
for i, c := range csSrc {
|
||||
v := c.getValueAtRow(brSrc, rowIdx)
|
||||
rcs[i].addValue(v)
|
||||
wctx.valuesLen += len(v)
|
||||
}
|
||||
for i, f := range extraFields {
|
||||
v := f.Value
|
||||
rcs[len(csSrc)+i].addValue(v)
|
||||
wctx.valuesLen += len(v)
|
||||
}
|
||||
if wctx.valuesLen >= 1_000_000 {
|
||||
wctx.flush()
|
||||
}
|
||||
}
|
||||
|
||||
func (wctx *pipeUnpackWriteContext) flush() {
|
||||
rcs := wctx.rcs
|
||||
|
||||
wctx.valuesLen = 0
|
||||
|
||||
if len(rcs) == 0 {
|
||||
return
|
||||
}
|
||||
|
||||
// Flush rcs to ppBase
|
||||
br := &wctx.br
|
||||
br.setResultColumns(rcs)
|
||||
wctx.ppBase.writeBlock(0, br)
|
||||
br.reset()
|
||||
for i := range rcs {
|
||||
rcs[i].resetValues()
|
||||
}
|
||||
}
|
||||
|
||||
type logfmtParser struct {
|
||||
Fields []Field
|
||||
|
||||
buf []byte
|
||||
}
|
||||
|
||||
func (p *logfmtParser) reset() {
|
||||
clear(p.Fields)
|
||||
p.Fields = p.Fields[:0]
|
||||
|
||||
p.buf = p.buf[:0]
|
||||
}
|
||||
|
||||
func (p *logfmtParser) parse(s, resultPrefix string) []Field {
|
||||
clear(p.Fields)
|
||||
p.Fields = p.Fields[:0]
|
||||
|
||||
for {
|
||||
// Search for field name
|
||||
n := strings.IndexByte(s, '=')
|
||||
if n < 0 {
|
||||
// field name couldn't be read
|
||||
return p.Fields
|
||||
}
|
||||
|
||||
name := strings.TrimSpace(s[:n])
|
||||
s = s[n+1:]
|
||||
if len(s) == 0 {
|
||||
p.addField(name, "", resultPrefix)
|
||||
return p.Fields
|
||||
}
|
||||
|
||||
// Search for field value
|
||||
value, nOffset := tryUnquoteString(s)
|
||||
if nOffset >= 0 {
|
||||
p.addField(name, value, resultPrefix)
|
||||
s = s[nOffset:]
|
||||
if len(s) == 0 {
|
||||
return p.Fields
|
||||
}
|
||||
if s[0] != ' ' {
|
||||
return p.Fields
|
||||
}
|
||||
s = s[1:]
|
||||
} else {
|
||||
n := strings.IndexByte(s, ' ')
|
||||
if n < 0 {
|
||||
p.addField(name, s, resultPrefix)
|
||||
return p.Fields
|
||||
}
|
||||
p.addField(name, s[:n], resultPrefix)
|
||||
s = s[n+1:]
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (p *logfmtParser) addField(name, value, resultPrefix string) {
|
||||
if resultPrefix != "" {
|
||||
buf := p.buf
|
||||
bufLen := len(buf)
|
||||
buf = append(buf, resultPrefix...)
|
||||
buf = append(buf, name...)
|
||||
p.buf = buf
|
||||
|
||||
name = bytesutil.ToUnsafeString(buf[bufLen:])
|
||||
}
|
||||
p.Fields = append(p.Fields, Field{
|
||||
Name: name,
|
||||
Value: value,
|
||||
})
|
||||
}
|
175
lib/logstorage/pipe_unpack_logfmt_test.go
Normal file
175
lib/logstorage/pipe_unpack_logfmt_test.go
Normal file
@ -0,0 +1,175 @@
|
||||
package logstorage
|
||||
|
||||
import (
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestPipeUnpackLogfmt(t *testing.T) {
|
||||
f := func(pipeStr string, rows, rowsExpected [][]Field) {
|
||||
t.Helper()
|
||||
expectPipeResults(t, pipeStr, rows, rowsExpected)
|
||||
}
|
||||
|
||||
// single row, unpack from _msg
|
||||
f("unpack_logfmt", [][]Field{
|
||||
{
|
||||
{"_msg", `foo=bar baz="x y=z" a=b`},
|
||||
},
|
||||
}, [][]Field{
|
||||
{
|
||||
{"_msg", `foo=bar baz="x y=z" a=b`},
|
||||
{"foo", "bar"},
|
||||
{"baz", "x y=z"},
|
||||
{"a", "b"},
|
||||
},
|
||||
})
|
||||
|
||||
// single row, unpack from _msg into _msg
|
||||
f("unpack_logfmt", [][]Field{
|
||||
{
|
||||
{"_msg", `_msg=bar`},
|
||||
},
|
||||
}, [][]Field{
|
||||
{
|
||||
{"_msg", "bar"},
|
||||
},
|
||||
})
|
||||
|
||||
// single row, unpack from missing field
|
||||
f("unpack_logfmt from x", [][]Field{
|
||||
{
|
||||
{"_msg", `foo=bar`},
|
||||
},
|
||||
}, [][]Field{
|
||||
{
|
||||
{"_msg", `foo=bar`},
|
||||
},
|
||||
})
|
||||
|
||||
// single row, unpack from non-json field
|
||||
f("unpack_logfmt from x", [][]Field{
|
||||
{
|
||||
{"x", `foobar`},
|
||||
},
|
||||
}, [][]Field{
|
||||
{
|
||||
{"x", `foobar`},
|
||||
},
|
||||
})
|
||||
|
||||
// single row, unpack from non-logfmt
|
||||
f("unpack_logfmt from x", [][]Field{
|
||||
{
|
||||
{"x", `foobar`},
|
||||
},
|
||||
}, [][]Field{
|
||||
{
|
||||
{"x", `foobar`},
|
||||
},
|
||||
})
|
||||
|
||||
// unpack empty value
|
||||
f("unpack_logfmt from x", [][]Field{
|
||||
{
|
||||
{"x", `foobar=`},
|
||||
},
|
||||
}, [][]Field{
|
||||
{
|
||||
{"x", `foobar=`},
|
||||
{"foobar", ""},
|
||||
},
|
||||
})
|
||||
f("unpack_logfmt from x", [][]Field{
|
||||
{
|
||||
{"x", `foo="" bar= baz=`},
|
||||
},
|
||||
}, [][]Field{
|
||||
{
|
||||
{"x", `foo="" bar= baz=`},
|
||||
{"foo", ""},
|
||||
{"bar", ""},
|
||||
{"baz", ""},
|
||||
},
|
||||
})
|
||||
|
||||
// multiple rows with distinct number of fields
|
||||
f("unpack_logfmt from x", [][]Field{
|
||||
{
|
||||
{"x", `foo=bar baz=xyz`},
|
||||
{"y", `abc`},
|
||||
},
|
||||
{
|
||||
{"y", `abc`},
|
||||
},
|
||||
{
|
||||
{"z", `foobar`},
|
||||
{"x", `z=bar`},
|
||||
},
|
||||
}, [][]Field{
|
||||
{
|
||||
{"x", `foo=bar baz=xyz`},
|
||||
{"y", "abc"},
|
||||
{"foo", "bar"},
|
||||
{"baz", "xyz"},
|
||||
},
|
||||
{
|
||||
{"y", `abc`},
|
||||
},
|
||||
{
|
||||
{"z", `bar`},
|
||||
{"x", `z=bar`},
|
||||
},
|
||||
})
|
||||
|
||||
// multiple rows with distinct number of fields, with result_prefix
|
||||
f("unpack_logfmt from x result_prefix qwe_", [][]Field{
|
||||
{
|
||||
{"x", `foo=bar baz=xyz`},
|
||||
{"y", `abc`},
|
||||
},
|
||||
{
|
||||
{"y", `abc`},
|
||||
},
|
||||
{
|
||||
{"z", `foobar`},
|
||||
{"x", `z=bar`},
|
||||
},
|
||||
}, [][]Field{
|
||||
{
|
||||
{"x", `foo=bar baz=xyz`},
|
||||
{"y", "abc"},
|
||||
{"qwe_foo", "bar"},
|
||||
{"qwe_baz", "xyz"},
|
||||
},
|
||||
{
|
||||
{"y", `abc`},
|
||||
},
|
||||
{
|
||||
{"z", `foobar`},
|
||||
{"x", `z=bar`},
|
||||
{"qwe_z", `bar`},
|
||||
},
|
||||
})
|
||||
}
|
||||
|
||||
func TestPipeUnpackLogfmtUpdateNeededFields(t *testing.T) {
|
||||
f := func(s string, neededFields, unneededFields, neededFieldsExpected, unneededFieldsExpected string) {
|
||||
t.Helper()
|
||||
expectPipeNeededFields(t, s, neededFields, unneededFields, neededFieldsExpected, unneededFieldsExpected)
|
||||
}
|
||||
|
||||
// all the needed fields
|
||||
f("unpack_logfmt from x", "*", "", "*", "")
|
||||
|
||||
// all the needed fields, unneeded fields do not intersect with src
|
||||
f("unpack_logfmt from x", "*", "f1,f2", "*", "f1,f2")
|
||||
|
||||
// all the needed fields, unneeded fields intersect with src
|
||||
f("unpack_logfmt from x", "*", "f2,x", "*", "f2")
|
||||
|
||||
// needed fields do not intersect with src
|
||||
f("unpack_logfmt from x", "f1,f2", "", "f1,f2,x", "")
|
||||
|
||||
// needed fields intersect with src
|
||||
f("unpack_logfmt from x", "f2,x", "", "f2,x", "")
|
||||
}
|
@ -1,7 +1,6 @@
|
||||
package logstorage
|
||||
|
||||
import (
|
||||
"math"
|
||||
"slices"
|
||||
"strconv"
|
||||
"unsafe"
|
||||
@ -16,8 +15,8 @@ func (sa *statsAvg) String() string {
|
||||
return "avg(" + fieldNamesString(sa.fields) + ")"
|
||||
}
|
||||
|
||||
func (sa *statsAvg) neededFields() []string {
|
||||
return sa.fields
|
||||
func (sa *statsAvg) updateNeededFields(neededFields fieldsSet) {
|
||||
neededFields.addFields(sa.fields)
|
||||
}
|
||||
|
||||
func (sa *statsAvg) newStatsProcessor() (statsProcessor, int) {
|
||||
@ -58,8 +57,8 @@ func (sap *statsAvgProcessor) updateStatsForRow(br *blockResult, rowIdx int) int
|
||||
if sap.sa.containsStar {
|
||||
// Scan all the fields for the given row
|
||||
for _, c := range br.getColumns() {
|
||||
f := c.getFloatValueAtRow(rowIdx)
|
||||
if !math.IsNaN(f) {
|
||||
f, ok := c.getFloatValueAtRow(br, rowIdx)
|
||||
if ok {
|
||||
sap.sum += f
|
||||
sap.count++
|
||||
}
|
||||
@ -68,8 +67,8 @@ func (sap *statsAvgProcessor) updateStatsForRow(br *blockResult, rowIdx int) int
|
||||
// Scan only the given fields for the given row
|
||||
for _, field := range sap.sa.fields {
|
||||
c := br.getColumnByName(field)
|
||||
f := c.getFloatValueAtRow(rowIdx)
|
||||
if !math.IsNaN(f) {
|
||||
f, ok := c.getFloatValueAtRow(br, rowIdx)
|
||||
if ok {
|
||||
sap.sum += f
|
||||
sap.count++
|
||||
}
|
||||
|
@ -17,12 +17,12 @@ func (sc *statsCount) String() string {
|
||||
return "count(" + fieldNamesString(sc.fields) + ")"
|
||||
}
|
||||
|
||||
func (sc *statsCount) neededFields() []string {
|
||||
func (sc *statsCount) updateNeededFields(neededFields fieldsSet) {
|
||||
if sc.containsStar {
|
||||
// There is no need in fetching any columns for count(*) - the number of matching rows can be calculated as len(blockResult.timestamps)
|
||||
return nil
|
||||
return
|
||||
}
|
||||
return sc.fields
|
||||
neededFields.addFields(sc.fields)
|
||||
}
|
||||
|
||||
func (sc *statsCount) newStatsProcessor() (statsProcessor, int) {
|
||||
@ -49,7 +49,7 @@ func (scp *statsCountProcessor) updateStatsForAllRows(br *blockResult) int {
|
||||
// Fast path for count(single_column)
|
||||
c := br.getColumnByName(fields[0])
|
||||
if c.isConst {
|
||||
if c.encodedValues[0] != "" {
|
||||
if c.valuesEncoded[0] != "" {
|
||||
scp.rowsCount += uint64(len(br.timestamps))
|
||||
}
|
||||
return 0
|
||||
@ -60,7 +60,7 @@ func (scp *statsCountProcessor) updateStatsForAllRows(br *blockResult) int {
|
||||
}
|
||||
switch c.valueType {
|
||||
case valueTypeString:
|
||||
for _, v := range c.encodedValues {
|
||||
for _, v := range c.getValuesEncoded(br) {
|
||||
if v != "" {
|
||||
scp.rowsCount++
|
||||
}
|
||||
@ -72,7 +72,7 @@ func (scp *statsCountProcessor) updateStatsForAllRows(br *blockResult) int {
|
||||
scp.rowsCount += uint64(len(br.timestamps))
|
||||
return 0
|
||||
}
|
||||
for _, v := range c.encodedValues {
|
||||
for _, v := range c.getValuesEncoded(br) {
|
||||
if int(v[0]) != zeroDictIdx {
|
||||
scp.rowsCount++
|
||||
}
|
||||
@ -95,7 +95,7 @@ func (scp *statsCountProcessor) updateStatsForAllRows(br *blockResult) int {
|
||||
for _, f := range fields {
|
||||
c := br.getColumnByName(f)
|
||||
if c.isConst {
|
||||
if c.encodedValues[0] != "" {
|
||||
if c.valuesEncoded[0] != "" {
|
||||
scp.rowsCount += uint64(len(br.timestamps))
|
||||
return 0
|
||||
}
|
||||
@ -105,18 +105,21 @@ func (scp *statsCountProcessor) updateStatsForAllRows(br *blockResult) int {
|
||||
scp.rowsCount += uint64(len(br.timestamps))
|
||||
return 0
|
||||
}
|
||||
|
||||
switch c.valueType {
|
||||
case valueTypeString:
|
||||
valuesEncoded := c.getValuesEncoded(br)
|
||||
bm.forEachSetBit(func(i int) bool {
|
||||
return c.encodedValues[i] == ""
|
||||
return valuesEncoded[i] == ""
|
||||
})
|
||||
case valueTypeDict:
|
||||
if !slices.Contains(c.dictValues, "") {
|
||||
scp.rowsCount += uint64(len(br.timestamps))
|
||||
return 0
|
||||
}
|
||||
valuesEncoded := c.getValuesEncoded(br)
|
||||
bm.forEachSetBit(func(i int) bool {
|
||||
dictIdx := c.encodedValues[i][0]
|
||||
dictIdx := valuesEncoded[i][0]
|
||||
return c.dictValues[dictIdx] == ""
|
||||
})
|
||||
case valueTypeUint8, valueTypeUint16, valueTypeUint32, valueTypeUint64, valueTypeFloat64, valueTypeIPv4, valueTypeTimestampISO8601:
|
||||
@ -144,7 +147,7 @@ func (scp *statsCountProcessor) updateStatsForRow(br *blockResult, rowIdx int) i
|
||||
// Fast path for count(single_column)
|
||||
c := br.getColumnByName(fields[0])
|
||||
if c.isConst {
|
||||
if c.encodedValues[0] != "" {
|
||||
if c.valuesEncoded[0] != "" {
|
||||
scp.rowsCount++
|
||||
}
|
||||
return 0
|
||||
@ -155,12 +158,14 @@ func (scp *statsCountProcessor) updateStatsForRow(br *blockResult, rowIdx int) i
|
||||
}
|
||||
switch c.valueType {
|
||||
case valueTypeString:
|
||||
if v := c.encodedValues[rowIdx]; v != "" {
|
||||
valuesEncoded := c.getValuesEncoded(br)
|
||||
if v := valuesEncoded[rowIdx]; v != "" {
|
||||
scp.rowsCount++
|
||||
}
|
||||
return 0
|
||||
case valueTypeDict:
|
||||
dictIdx := c.encodedValues[rowIdx][0]
|
||||
valuesEncoded := c.getValuesEncoded(br)
|
||||
dictIdx := valuesEncoded[rowIdx][0]
|
||||
if v := c.dictValues[dictIdx]; v != "" {
|
||||
scp.rowsCount++
|
||||
}
|
||||
|
@ -17,8 +17,8 @@ func (sc *statsCountEmpty) String() string {
|
||||
return "count_empty(" + fieldNamesString(sc.fields) + ")"
|
||||
}
|
||||
|
||||
func (sc *statsCountEmpty) neededFields() []string {
|
||||
return sc.fields
|
||||
func (sc *statsCountEmpty) updateNeededFields(neededFields fieldsSet) {
|
||||
neededFields.addFields(sc.fields)
|
||||
}
|
||||
|
||||
func (sc *statsCountEmpty) newStatsProcessor() (statsProcessor, int) {
|
||||
@ -53,7 +53,7 @@ func (scp *statsCountEmptyProcessor) updateStatsForAllRows(br *blockResult) int
|
||||
// Fast path for count_empty(single_column)
|
||||
c := br.getColumnByName(fields[0])
|
||||
if c.isConst {
|
||||
if c.encodedValues[0] == "" {
|
||||
if c.valuesEncoded[0] == "" {
|
||||
scp.rowsCount += uint64(len(br.timestamps))
|
||||
}
|
||||
return 0
|
||||
@ -63,7 +63,7 @@ func (scp *statsCountEmptyProcessor) updateStatsForAllRows(br *blockResult) int
|
||||
}
|
||||
switch c.valueType {
|
||||
case valueTypeString:
|
||||
for _, v := range c.encodedValues {
|
||||
for _, v := range c.getValuesEncoded(br) {
|
||||
if v == "" {
|
||||
scp.rowsCount++
|
||||
}
|
||||
@ -74,7 +74,7 @@ func (scp *statsCountEmptyProcessor) updateStatsForAllRows(br *blockResult) int
|
||||
if zeroDictIdx < 0 {
|
||||
return 0
|
||||
}
|
||||
for _, v := range c.encodedValues {
|
||||
for _, v := range c.getValuesEncoded(br) {
|
||||
if int(v[0]) == zeroDictIdx {
|
||||
scp.rowsCount++
|
||||
}
|
||||
@ -96,7 +96,7 @@ func (scp *statsCountEmptyProcessor) updateStatsForAllRows(br *blockResult) int
|
||||
for _, f := range fields {
|
||||
c := br.getColumnByName(f)
|
||||
if c.isConst {
|
||||
if c.encodedValues[0] == "" {
|
||||
if c.valuesEncoded[0] == "" {
|
||||
scp.rowsCount += uint64(len(br.timestamps))
|
||||
return 0
|
||||
}
|
||||
@ -107,15 +107,17 @@ func (scp *statsCountEmptyProcessor) updateStatsForAllRows(br *blockResult) int
|
||||
}
|
||||
switch c.valueType {
|
||||
case valueTypeString:
|
||||
valuesEncoded := c.getValuesEncoded(br)
|
||||
bm.forEachSetBit(func(i int) bool {
|
||||
return c.encodedValues[i] == ""
|
||||
return valuesEncoded[i] == ""
|
||||
})
|
||||
case valueTypeDict:
|
||||
if !slices.Contains(c.dictValues, "") {
|
||||
return 0
|
||||
}
|
||||
valuesEncoded := c.getValuesEncoded(br)
|
||||
bm.forEachSetBit(func(i int) bool {
|
||||
dictIdx := c.encodedValues[i][0]
|
||||
dictIdx := valuesEncoded[i][0]
|
||||
return c.dictValues[dictIdx] == ""
|
||||
})
|
||||
case valueTypeUint8, valueTypeUint16, valueTypeUint32, valueTypeUint64, valueTypeFloat64, valueTypeIPv4, valueTypeTimestampISO8601:
|
||||
@ -145,7 +147,7 @@ func (scp *statsCountEmptyProcessor) updateStatsForRow(br *blockResult, rowIdx i
|
||||
// Fast path for count_empty(single_column)
|
||||
c := br.getColumnByName(fields[0])
|
||||
if c.isConst {
|
||||
if c.encodedValues[0] == "" {
|
||||
if c.valuesEncoded[0] == "" {
|
||||
scp.rowsCount++
|
||||
}
|
||||
return 0
|
||||
@ -155,12 +157,14 @@ func (scp *statsCountEmptyProcessor) updateStatsForRow(br *blockResult, rowIdx i
|
||||
}
|
||||
switch c.valueType {
|
||||
case valueTypeString:
|
||||
if v := c.encodedValues[rowIdx]; v == "" {
|
||||
valuesEncoded := c.getValuesEncoded(br)
|
||||
if v := valuesEncoded[rowIdx]; v == "" {
|
||||
scp.rowsCount++
|
||||
}
|
||||
return 0
|
||||
case valueTypeDict:
|
||||
dictIdx := c.encodedValues[rowIdx][0]
|
||||
valuesEncoded := c.getValuesEncoded(br)
|
||||
dictIdx := valuesEncoded[rowIdx][0]
|
||||
if v := c.dictValues[dictIdx]; v == "" {
|
||||
scp.rowsCount++
|
||||
}
|
||||
|
@ -24,8 +24,8 @@ func (su *statsCountUniq) String() string {
|
||||
return s
|
||||
}
|
||||
|
||||
func (su *statsCountUniq) neededFields() []string {
|
||||
return su.fields
|
||||
func (su *statsCountUniq) updateNeededFields(neededFields fieldsSet) {
|
||||
neededFields.addFields(su.fields)
|
||||
}
|
||||
|
||||
func (su *statsCountUniq) newStatsProcessor() (statsProcessor, int) {
|
||||
@ -122,7 +122,7 @@ func (sup *statsCountUniqProcessor) updateStatsForAllRows(br *blockResult) int {
|
||||
}
|
||||
if c.isConst {
|
||||
// count unique const values
|
||||
v := c.encodedValues[0]
|
||||
v := c.valuesEncoded[0]
|
||||
if v == "" {
|
||||
// Do not count empty values
|
||||
return stateSizeIncrease
|
||||
@ -156,7 +156,7 @@ func (sup *statsCountUniqProcessor) updateStatsForAllRows(br *blockResult) int {
|
||||
return stateSizeIncrease
|
||||
}
|
||||
|
||||
// Count unique values across encodedValues
|
||||
// Count unique values across values
|
||||
values := c.getValues(br)
|
||||
keyBuf := sup.keyBuf[:0]
|
||||
for i, v := range values {
|
||||
@ -278,7 +278,7 @@ func (sup *statsCountUniqProcessor) updateStatsForRow(br *blockResult, rowIdx in
|
||||
}
|
||||
if c.isConst {
|
||||
// count unique const values
|
||||
v := c.encodedValues[0]
|
||||
v := c.valuesEncoded[0]
|
||||
if v == "" {
|
||||
// Do not count empty values
|
||||
return stateSizeIncrease
|
||||
@ -295,7 +295,8 @@ func (sup *statsCountUniqProcessor) updateStatsForRow(br *blockResult, rowIdx in
|
||||
}
|
||||
if c.valueType == valueTypeDict {
|
||||
// count unique non-zero c.dictValues
|
||||
dictIdx := c.encodedValues[rowIdx][0]
|
||||
valuesEncoded := c.getValuesEncoded(br)
|
||||
dictIdx := valuesEncoded[rowIdx][0]
|
||||
v := c.dictValues[dictIdx]
|
||||
if v == "" {
|
||||
// Do not count empty values
|
||||
|
@ -3,8 +3,11 @@ package logstorage
|
||||
import (
|
||||
"math"
|
||||
"slices"
|
||||
"strconv"
|
||||
"strings"
|
||||
"unsafe"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
||||
)
|
||||
|
||||
type statsMax struct {
|
||||
@ -16,14 +19,13 @@ func (sm *statsMax) String() string {
|
||||
return "max(" + fieldNamesString(sm.fields) + ")"
|
||||
}
|
||||
|
||||
func (sm *statsMax) neededFields() []string {
|
||||
return sm.fields
|
||||
func (sm *statsMax) updateNeededFields(neededFields fieldsSet) {
|
||||
neededFields.addFields(sm.fields)
|
||||
}
|
||||
|
||||
func (sm *statsMax) newStatsProcessor() (statsProcessor, int) {
|
||||
smp := &statsMaxProcessor{
|
||||
sm: sm,
|
||||
max: nan,
|
||||
sm: sm,
|
||||
}
|
||||
return smp, int(unsafe.Sizeof(*smp))
|
||||
}
|
||||
@ -31,62 +33,139 @@ func (sm *statsMax) newStatsProcessor() (statsProcessor, int) {
|
||||
type statsMaxProcessor struct {
|
||||
sm *statsMax
|
||||
|
||||
max float64
|
||||
max string
|
||||
hasMax bool
|
||||
}
|
||||
|
||||
func (smp *statsMaxProcessor) updateStatsForAllRows(br *blockResult) int {
|
||||
maxLen := len(smp.max)
|
||||
|
||||
if smp.sm.containsStar {
|
||||
// Find the maximum value across all the columns
|
||||
// Find the minimum value across all the columns
|
||||
for _, c := range br.getColumns() {
|
||||
f := c.getMaxValue()
|
||||
if f > smp.max || math.IsNaN(smp.max) {
|
||||
smp.max = f
|
||||
}
|
||||
smp.updateStateForColumn(br, c)
|
||||
}
|
||||
} else {
|
||||
// Find the maximum value across the requested columns
|
||||
// Find the minimum value across the requested columns
|
||||
for _, field := range smp.sm.fields {
|
||||
c := br.getColumnByName(field)
|
||||
f := c.getMaxValue()
|
||||
if f > smp.max || math.IsNaN(smp.max) {
|
||||
smp.max = f
|
||||
}
|
||||
smp.updateStateForColumn(br, c)
|
||||
}
|
||||
}
|
||||
return 0
|
||||
|
||||
return len(smp.max) - maxLen
|
||||
}
|
||||
|
||||
func (smp *statsMaxProcessor) updateStatsForRow(br *blockResult, rowIdx int) int {
|
||||
maxLen := len(smp.max)
|
||||
|
||||
if smp.sm.containsStar {
|
||||
// Find the maximum value across all the fields for the given row
|
||||
// Find the minimum value across all the fields for the given row
|
||||
for _, c := range br.getColumns() {
|
||||
f := c.getFloatValueAtRow(rowIdx)
|
||||
if f > smp.max || math.IsNaN(smp.max) {
|
||||
smp.max = f
|
||||
}
|
||||
v := c.getValueAtRow(br, rowIdx)
|
||||
smp.updateStateString(v)
|
||||
}
|
||||
} else {
|
||||
// Find the maximum value across the requested fields for the given row
|
||||
// Find the minimum value across the requested fields for the given row
|
||||
for _, field := range smp.sm.fields {
|
||||
c := br.getColumnByName(field)
|
||||
f := c.getFloatValueAtRow(rowIdx)
|
||||
if f > smp.max || math.IsNaN(smp.max) {
|
||||
smp.max = f
|
||||
}
|
||||
v := c.getValueAtRow(br, rowIdx)
|
||||
smp.updateStateString(v)
|
||||
}
|
||||
}
|
||||
return 0
|
||||
|
||||
return maxLen - len(smp.max)
|
||||
}
|
||||
|
||||
func (smp *statsMaxProcessor) mergeState(sfp statsProcessor) {
|
||||
src := sfp.(*statsMaxProcessor)
|
||||
if src.max > smp.max {
|
||||
smp.max = src.max
|
||||
if src.hasMax {
|
||||
smp.updateStateString(src.max)
|
||||
}
|
||||
}
|
||||
|
||||
func (smp *statsMaxProcessor) updateStateForColumn(br *blockResult, c *blockResultColumn) {
|
||||
if len(br.timestamps) == 0 {
|
||||
return
|
||||
}
|
||||
|
||||
if c.isTime {
|
||||
// Special case for time column
|
||||
timestamps := br.timestamps
|
||||
maxTimestamp := timestamps[len(timestamps)-1]
|
||||
for _, timestamp := range timestamps[:len(timestamps)-1] {
|
||||
if timestamp > maxTimestamp {
|
||||
maxTimestamp = timestamp
|
||||
}
|
||||
}
|
||||
|
||||
bb := bbPool.Get()
|
||||
bb.B = marshalTimestampRFC3339NanoString(bb.B[:0], maxTimestamp)
|
||||
smp.updateStateBytes(bb.B)
|
||||
bbPool.Put(bb)
|
||||
|
||||
return
|
||||
}
|
||||
if c.isConst {
|
||||
// Special case for const column
|
||||
v := c.valuesEncoded[0]
|
||||
smp.updateStateString(v)
|
||||
return
|
||||
}
|
||||
|
||||
switch c.valueType {
|
||||
case valueTypeString:
|
||||
for _, v := range c.getValuesEncoded(br) {
|
||||
smp.updateStateString(v)
|
||||
}
|
||||
case valueTypeDict:
|
||||
for _, v := range c.dictValues {
|
||||
smp.updateStateString(v)
|
||||
}
|
||||
case valueTypeUint8, valueTypeUint16, valueTypeUint32, valueTypeUint64:
|
||||
bb := bbPool.Get()
|
||||
bb.B = marshalUint64String(bb.B[:0], c.maxValue)
|
||||
smp.updateStateBytes(bb.B)
|
||||
bbPool.Put(bb)
|
||||
case valueTypeFloat64:
|
||||
f := math.Float64frombits(c.maxValue)
|
||||
bb := bbPool.Get()
|
||||
bb.B = marshalFloat64String(bb.B[:0], f)
|
||||
smp.updateStateBytes(bb.B)
|
||||
bbPool.Put(bb)
|
||||
case valueTypeIPv4:
|
||||
bb := bbPool.Get()
|
||||
bb.B = marshalIPv4String(bb.B[:0], uint32(c.maxValue))
|
||||
smp.updateStateBytes(bb.B)
|
||||
bbPool.Put(bb)
|
||||
case valueTypeTimestampISO8601:
|
||||
bb := bbPool.Get()
|
||||
bb.B = marshalTimestampISO8601String(bb.B[:0], int64(c.maxValue))
|
||||
smp.updateStateBytes(bb.B)
|
||||
bbPool.Put(bb)
|
||||
default:
|
||||
logger.Panicf("BUG: unknown valueType=%d", c.valueType)
|
||||
}
|
||||
}
|
||||
|
||||
func (smp *statsMaxProcessor) updateStateBytes(b []byte) {
|
||||
v := bytesutil.ToUnsafeString(b)
|
||||
smp.updateStateString(v)
|
||||
}
|
||||
|
||||
func (smp *statsMaxProcessor) updateStateString(v string) {
|
||||
if smp.hasMax && !lessString(smp.max, v) {
|
||||
return
|
||||
}
|
||||
smp.max = strings.Clone(v)
|
||||
smp.hasMax = true
|
||||
}
|
||||
|
||||
func (smp *statsMaxProcessor) finalizeStats() string {
|
||||
return strconv.FormatFloat(smp.max, 'f', -1, 64)
|
||||
if !smp.hasMax {
|
||||
return "NaN"
|
||||
}
|
||||
return smp.max
|
||||
}
|
||||
|
||||
func parseStatsMax(lex *lexer) (*statsMax, error) {
|
||||
|
@ -14,8 +14,8 @@ func (sm *statsMedian) String() string {
|
||||
return "median(" + fieldNamesString(sm.fields) + ")"
|
||||
}
|
||||
|
||||
func (sm *statsMedian) neededFields() []string {
|
||||
return sm.fields
|
||||
func (sm *statsMedian) updateNeededFields(neededFields fieldsSet) {
|
||||
neededFields.addFields(sm.fields)
|
||||
}
|
||||
|
||||
func (sm *statsMedian) newStatsProcessor() (statsProcessor, int) {
|
||||
|
@ -3,8 +3,11 @@ package logstorage
|
||||
import (
|
||||
"math"
|
||||
"slices"
|
||||
"strconv"
|
||||
"strings"
|
||||
"unsafe"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
||||
)
|
||||
|
||||
type statsMin struct {
|
||||
@ -16,14 +19,13 @@ func (sm *statsMin) String() string {
|
||||
return "min(" + fieldNamesString(sm.fields) + ")"
|
||||
}
|
||||
|
||||
func (sm *statsMin) neededFields() []string {
|
||||
return sm.fields
|
||||
func (sm *statsMin) updateNeededFields(neededFields fieldsSet) {
|
||||
neededFields.addFields(sm.fields)
|
||||
}
|
||||
|
||||
func (sm *statsMin) newStatsProcessor() (statsProcessor, int) {
|
||||
smp := &statsMinProcessor{
|
||||
sm: sm,
|
||||
min: nan,
|
||||
sm: sm,
|
||||
}
|
||||
return smp, int(unsafe.Sizeof(*smp))
|
||||
}
|
||||
@ -31,62 +33,139 @@ func (sm *statsMin) newStatsProcessor() (statsProcessor, int) {
|
||||
type statsMinProcessor struct {
|
||||
sm *statsMin
|
||||
|
||||
min float64
|
||||
min string
|
||||
hasMin bool
|
||||
}
|
||||
|
||||
func (smp *statsMinProcessor) updateStatsForAllRows(br *blockResult) int {
|
||||
minLen := len(smp.min)
|
||||
|
||||
if smp.sm.containsStar {
|
||||
// Find the minimum value across all the columns
|
||||
for _, c := range br.getColumns() {
|
||||
f := c.getMinValue()
|
||||
if f < smp.min || math.IsNaN(smp.min) {
|
||||
smp.min = f
|
||||
}
|
||||
smp.updateStateForColumn(br, c)
|
||||
}
|
||||
} else {
|
||||
// Find the minimum value across the requested columns
|
||||
for _, field := range smp.sm.fields {
|
||||
c := br.getColumnByName(field)
|
||||
f := c.getMinValue()
|
||||
if f < smp.min || math.IsNaN(smp.min) {
|
||||
smp.min = f
|
||||
}
|
||||
smp.updateStateForColumn(br, c)
|
||||
}
|
||||
}
|
||||
return 0
|
||||
|
||||
return len(smp.min) - minLen
|
||||
}
|
||||
|
||||
func (smp *statsMinProcessor) updateStatsForRow(br *blockResult, rowIdx int) int {
|
||||
minLen := len(smp.min)
|
||||
|
||||
if smp.sm.containsStar {
|
||||
// Find the minimum value across all the fields for the given row
|
||||
for _, c := range br.getColumns() {
|
||||
f := c.getFloatValueAtRow(rowIdx)
|
||||
if f < smp.min || math.IsNaN(smp.min) {
|
||||
smp.min = f
|
||||
}
|
||||
v := c.getValueAtRow(br, rowIdx)
|
||||
smp.updateStateString(v)
|
||||
}
|
||||
} else {
|
||||
// Find the minimum value across the requested fields for the given row
|
||||
for _, field := range smp.sm.fields {
|
||||
c := br.getColumnByName(field)
|
||||
f := c.getFloatValueAtRow(rowIdx)
|
||||
if f < smp.min || math.IsNaN(smp.min) {
|
||||
smp.min = f
|
||||
}
|
||||
v := c.getValueAtRow(br, rowIdx)
|
||||
smp.updateStateString(v)
|
||||
}
|
||||
}
|
||||
return 0
|
||||
|
||||
return minLen - len(smp.min)
|
||||
}
|
||||
|
||||
func (smp *statsMinProcessor) mergeState(sfp statsProcessor) {
|
||||
src := sfp.(*statsMinProcessor)
|
||||
if src.min < smp.min {
|
||||
smp.min = src.min
|
||||
if src.hasMin {
|
||||
smp.updateStateString(src.min)
|
||||
}
|
||||
}
|
||||
|
||||
func (smp *statsMinProcessor) updateStateForColumn(br *blockResult, c *blockResultColumn) {
|
||||
if len(br.timestamps) == 0 {
|
||||
return
|
||||
}
|
||||
|
||||
if c.isTime {
|
||||
// Special case for time column
|
||||
timestamps := br.timestamps
|
||||
minTimestamp := timestamps[0]
|
||||
for _, timestamp := range timestamps[1:] {
|
||||
if timestamp < minTimestamp {
|
||||
minTimestamp = timestamp
|
||||
}
|
||||
}
|
||||
|
||||
bb := bbPool.Get()
|
||||
bb.B = marshalTimestampRFC3339NanoString(bb.B[:0], minTimestamp)
|
||||
smp.updateStateBytes(bb.B)
|
||||
bbPool.Put(bb)
|
||||
|
||||
return
|
||||
}
|
||||
if c.isConst {
|
||||
// Special case for const column
|
||||
v := c.valuesEncoded[0]
|
||||
smp.updateStateString(v)
|
||||
return
|
||||
}
|
||||
|
||||
switch c.valueType {
|
||||
case valueTypeString:
|
||||
for _, v := range c.getValuesEncoded(br) {
|
||||
smp.updateStateString(v)
|
||||
}
|
||||
case valueTypeDict:
|
||||
for _, v := range c.dictValues {
|
||||
smp.updateStateString(v)
|
||||
}
|
||||
case valueTypeUint8, valueTypeUint16, valueTypeUint32, valueTypeUint64:
|
||||
bb := bbPool.Get()
|
||||
bb.B = marshalUint64String(bb.B[:0], c.minValue)
|
||||
smp.updateStateBytes(bb.B)
|
||||
bbPool.Put(bb)
|
||||
case valueTypeFloat64:
|
||||
f := math.Float64frombits(c.minValue)
|
||||
bb := bbPool.Get()
|
||||
bb.B = marshalFloat64String(bb.B[:0], f)
|
||||
smp.updateStateBytes(bb.B)
|
||||
bbPool.Put(bb)
|
||||
case valueTypeIPv4:
|
||||
bb := bbPool.Get()
|
||||
bb.B = marshalIPv4String(bb.B[:0], uint32(c.minValue))
|
||||
smp.updateStateBytes(bb.B)
|
||||
bbPool.Put(bb)
|
||||
case valueTypeTimestampISO8601:
|
||||
bb := bbPool.Get()
|
||||
bb.B = marshalTimestampISO8601String(bb.B[:0], int64(c.minValue))
|
||||
smp.updateStateBytes(bb.B)
|
||||
bbPool.Put(bb)
|
||||
default:
|
||||
logger.Panicf("BUG: unknown valueType=%d", c.valueType)
|
||||
}
|
||||
}
|
||||
|
||||
func (smp *statsMinProcessor) updateStateBytes(b []byte) {
|
||||
v := bytesutil.ToUnsafeString(b)
|
||||
smp.updateStateString(v)
|
||||
}
|
||||
|
||||
func (smp *statsMinProcessor) updateStateString(v string) {
|
||||
if smp.hasMin && !lessString(v, smp.min) {
|
||||
return
|
||||
}
|
||||
smp.min = strings.Clone(v)
|
||||
smp.hasMin = true
|
||||
}
|
||||
|
||||
func (smp *statsMinProcessor) finalizeStats() string {
|
||||
return strconv.FormatFloat(smp.min, 'f', -1, 64)
|
||||
if !smp.hasMin {
|
||||
return "NaN"
|
||||
}
|
||||
return smp.min
|
||||
}
|
||||
|
||||
func parseStatsMin(lex *lexer) (*statsMin, error) {
|
||||
|
@ -8,6 +8,9 @@ import (
|
||||
"unsafe"
|
||||
|
||||
"github.com/valyala/fastrand"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/encoding"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
||||
)
|
||||
|
||||
type statsQuantile struct {
|
||||
@ -21,8 +24,8 @@ func (sq *statsQuantile) String() string {
|
||||
return fmt.Sprintf("quantile(%g, %s)", sq.phi, fieldNamesString(sq.fields))
|
||||
}
|
||||
|
||||
func (sq *statsQuantile) neededFields() []string {
|
||||
return sq.fields
|
||||
func (sq *statsQuantile) updateNeededFields(neededFields fieldsSet) {
|
||||
neededFields.addFields(sq.fields)
|
||||
}
|
||||
|
||||
func (sq *statsQuantile) newStatsProcessor() (statsProcessor, int) {
|
||||
@ -39,27 +42,16 @@ type statsQuantileProcessor struct {
|
||||
}
|
||||
|
||||
func (sqp *statsQuantileProcessor) updateStatsForAllRows(br *blockResult) int {
|
||||
h := &sqp.h
|
||||
stateSizeIncrease := 0
|
||||
|
||||
if sqp.sq.containsStar {
|
||||
for _, c := range br.getColumns() {
|
||||
for _, v := range c.getValues(br) {
|
||||
f, ok := tryParseFloat64(v)
|
||||
if ok {
|
||||
stateSizeIncrease += h.update(f)
|
||||
}
|
||||
}
|
||||
stateSizeIncrease += sqp.updateStateForColumn(br, c)
|
||||
}
|
||||
} else {
|
||||
for _, field := range sqp.sq.fields {
|
||||
c := br.getColumnByName(field)
|
||||
for _, v := range c.getValues(br) {
|
||||
f, ok := tryParseFloat64(v)
|
||||
if ok {
|
||||
stateSizeIncrease += h.update(f)
|
||||
}
|
||||
}
|
||||
stateSizeIncrease += sqp.updateStateForColumn(br, c)
|
||||
}
|
||||
}
|
||||
|
||||
@ -72,16 +64,16 @@ func (sqp *statsQuantileProcessor) updateStatsForRow(br *blockResult, rowIdx int
|
||||
|
||||
if sqp.sq.containsStar {
|
||||
for _, c := range br.getColumns() {
|
||||
f := c.getFloatValueAtRow(rowIdx)
|
||||
if !math.IsNaN(f) {
|
||||
f, ok := c.getFloatValueAtRow(br, rowIdx)
|
||||
if ok {
|
||||
stateSizeIncrease += h.update(f)
|
||||
}
|
||||
}
|
||||
} else {
|
||||
for _, field := range sqp.sq.fields {
|
||||
c := br.getColumnByName(field)
|
||||
f := c.getFloatValueAtRow(rowIdx)
|
||||
if !math.IsNaN(f) {
|
||||
f, ok := c.getFloatValueAtRow(br, rowIdx)
|
||||
if ok {
|
||||
stateSizeIncrease += h.update(f)
|
||||
}
|
||||
}
|
||||
@ -90,6 +82,85 @@ func (sqp *statsQuantileProcessor) updateStatsForRow(br *blockResult, rowIdx int
|
||||
return stateSizeIncrease
|
||||
}
|
||||
|
||||
func (sqp *statsQuantileProcessor) updateStateForColumn(br *blockResult, c *blockResultColumn) int {
|
||||
h := &sqp.h
|
||||
stateSizeIncrease := 0
|
||||
|
||||
if c.isConst {
|
||||
f, ok := tryParseFloat64(c.valuesEncoded[0])
|
||||
if ok {
|
||||
for range br.timestamps {
|
||||
stateSizeIncrease += h.update(f)
|
||||
}
|
||||
}
|
||||
return stateSizeIncrease
|
||||
}
|
||||
if c.isTime {
|
||||
return 0
|
||||
}
|
||||
|
||||
switch c.valueType {
|
||||
case valueTypeString:
|
||||
for _, v := range c.getValues(br) {
|
||||
f, ok := tryParseFloat64(v)
|
||||
if ok {
|
||||
stateSizeIncrease += h.update(f)
|
||||
}
|
||||
}
|
||||
case valueTypeDict:
|
||||
dictValues := c.dictValues
|
||||
a := encoding.GetFloat64s(len(dictValues))
|
||||
for i, v := range dictValues {
|
||||
f, ok := tryParseFloat64(v)
|
||||
if !ok {
|
||||
f = nan
|
||||
}
|
||||
a.A[i] = f
|
||||
}
|
||||
for _, v := range c.getValuesEncoded(br) {
|
||||
idx := v[0]
|
||||
f := a.A[idx]
|
||||
if !math.IsNaN(f) {
|
||||
h.update(f)
|
||||
}
|
||||
}
|
||||
encoding.PutFloat64s(a)
|
||||
case valueTypeUint8:
|
||||
for _, v := range c.getValuesEncoded(br) {
|
||||
n := unmarshalUint8(v)
|
||||
h.update(float64(n))
|
||||
}
|
||||
case valueTypeUint16:
|
||||
for _, v := range c.getValuesEncoded(br) {
|
||||
n := unmarshalUint16(v)
|
||||
h.update(float64(n))
|
||||
}
|
||||
case valueTypeUint32:
|
||||
for _, v := range c.getValuesEncoded(br) {
|
||||
n := unmarshalUint32(v)
|
||||
h.update(float64(n))
|
||||
}
|
||||
case valueTypeUint64:
|
||||
for _, v := range c.getValuesEncoded(br) {
|
||||
n := unmarshalUint64(v)
|
||||
h.update(float64(n))
|
||||
}
|
||||
case valueTypeFloat64:
|
||||
for _, v := range c.getValuesEncoded(br) {
|
||||
f := unmarshalFloat64(v)
|
||||
if !math.IsNaN(f) {
|
||||
h.update(f)
|
||||
}
|
||||
}
|
||||
case valueTypeIPv4:
|
||||
case valueTypeTimestampISO8601:
|
||||
default:
|
||||
logger.Panicf("BUG: unexpected valueType=%d", c.valueType)
|
||||
}
|
||||
|
||||
return stateSizeIncrease
|
||||
}
|
||||
|
||||
func (sqp *statsQuantileProcessor) mergeState(sfp statsProcessor) {
|
||||
src := sfp.(*statsQuantileProcessor)
|
||||
sqp.h.mergeState(&src.h)
|
||||
|
@ -16,8 +16,8 @@ func (ss *statsSum) String() string {
|
||||
return "sum(" + fieldNamesString(ss.fields) + ")"
|
||||
}
|
||||
|
||||
func (ss *statsSum) neededFields() []string {
|
||||
return ss.fields
|
||||
func (ss *statsSum) updateNeededFields(neededFields fieldsSet) {
|
||||
neededFields.addFields(ss.fields)
|
||||
}
|
||||
|
||||
func (ss *statsSum) newStatsProcessor() (statsProcessor, int) {
|
||||
@ -38,27 +38,13 @@ func (ssp *statsSumProcessor) updateStatsForAllRows(br *blockResult) int {
|
||||
if ssp.ss.containsStar {
|
||||
// Sum all the columns
|
||||
for _, c := range br.getColumns() {
|
||||
f, count := c.sumValues(br)
|
||||
if count > 0 {
|
||||
if math.IsNaN(ssp.sum) {
|
||||
ssp.sum = f
|
||||
} else {
|
||||
ssp.sum += f
|
||||
}
|
||||
}
|
||||
ssp.updateStateForColumn(br, c)
|
||||
}
|
||||
} else {
|
||||
// Sum the requested columns
|
||||
for _, field := range ssp.ss.fields {
|
||||
c := br.getColumnByName(field)
|
||||
f, count := c.sumValues(br)
|
||||
if count > 0 {
|
||||
if math.IsNaN(ssp.sum) {
|
||||
ssp.sum = f
|
||||
} else {
|
||||
ssp.sum += f
|
||||
}
|
||||
}
|
||||
ssp.updateStateForColumn(br, c)
|
||||
}
|
||||
}
|
||||
return 0
|
||||
@ -68,32 +54,39 @@ func (ssp *statsSumProcessor) updateStatsForRow(br *blockResult, rowIdx int) int
|
||||
if ssp.ss.containsStar {
|
||||
// Sum all the fields for the given row
|
||||
for _, c := range br.getColumns() {
|
||||
f := c.getFloatValueAtRow(rowIdx)
|
||||
if !math.IsNaN(f) {
|
||||
if math.IsNaN(ssp.sum) {
|
||||
ssp.sum = f
|
||||
} else {
|
||||
ssp.sum += f
|
||||
}
|
||||
f, ok := c.getFloatValueAtRow(br, rowIdx)
|
||||
if ok {
|
||||
ssp.updateState(f)
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// Sum only the given fields for the given row
|
||||
for _, field := range ssp.ss.fields {
|
||||
c := br.getColumnByName(field)
|
||||
f := c.getFloatValueAtRow(rowIdx)
|
||||
if !math.IsNaN(f) {
|
||||
if math.IsNaN(ssp.sum) {
|
||||
ssp.sum = f
|
||||
} else {
|
||||
ssp.sum += f
|
||||
}
|
||||
f, ok := c.getFloatValueAtRow(br, rowIdx)
|
||||
if ok {
|
||||
ssp.updateState(f)
|
||||
}
|
||||
}
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
func (ssp *statsSumProcessor) updateStateForColumn(br *blockResult, c *blockResultColumn) {
|
||||
f, count := c.sumValues(br)
|
||||
if count > 0 {
|
||||
ssp.updateState(f)
|
||||
}
|
||||
}
|
||||
|
||||
func (ssp *statsSumProcessor) updateState(f float64) {
|
||||
if math.IsNaN(ssp.sum) {
|
||||
ssp.sum = f
|
||||
} else {
|
||||
ssp.sum += f
|
||||
}
|
||||
}
|
||||
|
||||
func (ssp *statsSumProcessor) mergeState(sfp statsProcessor) {
|
||||
src := sfp.(*statsSumProcessor)
|
||||
ssp.sum += src.sum
|
||||
|
@ -15,8 +15,8 @@ func (ss *statsSumLen) String() string {
|
||||
return "sum_len(" + fieldNamesString(ss.fields) + ")"
|
||||
}
|
||||
|
||||
func (ss *statsSumLen) neededFields() []string {
|
||||
return ss.fields
|
||||
func (ss *statsSumLen) updateNeededFields(neededFields fieldsSet) {
|
||||
neededFields.addFields(ss.fields)
|
||||
}
|
||||
|
||||
func (ss *statsSumLen) newStatsProcessor() (statsProcessor, int) {
|
||||
|
@ -24,8 +24,8 @@ func (su *statsUniqValues) String() string {
|
||||
return s
|
||||
}
|
||||
|
||||
func (su *statsUniqValues) neededFields() []string {
|
||||
return su.fields
|
||||
func (su *statsUniqValues) updateNeededFields(neededFields fieldsSet) {
|
||||
neededFields.addFields(su.fields)
|
||||
}
|
||||
|
||||
func (su *statsUniqValues) newStatsProcessor() (statsProcessor, int) {
|
||||
@ -68,7 +68,7 @@ func (sup *statsUniqValuesProcessor) updateStatsForAllRowsColumn(c *blockResultC
|
||||
stateSizeIncrease := 0
|
||||
if c.isConst {
|
||||
// collect unique const values
|
||||
v := c.encodedValues[0]
|
||||
v := c.valuesEncoded[0]
|
||||
if v == "" {
|
||||
// skip empty values
|
||||
return stateSizeIncrease
|
||||
@ -141,7 +141,7 @@ func (sup *statsUniqValuesProcessor) updateStatsForRowColumn(c *blockResultColum
|
||||
stateSizeIncrease := 0
|
||||
if c.isConst {
|
||||
// collect unique const values
|
||||
v := c.encodedValues[0]
|
||||
v := c.valuesEncoded[0]
|
||||
if v == "" {
|
||||
// skip empty values
|
||||
return stateSizeIncrease
|
||||
@ -155,7 +155,8 @@ func (sup *statsUniqValuesProcessor) updateStatsForRowColumn(c *blockResultColum
|
||||
}
|
||||
if c.valueType == valueTypeDict {
|
||||
// collect unique non-zero c.dictValues
|
||||
dictIdx := c.encodedValues[rowIdx][0]
|
||||
valuesEncoded := c.getValuesEncoded(br)
|
||||
dictIdx := valuesEncoded[rowIdx][0]
|
||||
v := c.dictValues[dictIdx]
|
||||
if v == "" {
|
||||
// skip empty values
|
||||
|
@ -21,8 +21,8 @@ func (sv *statsValues) String() string {
|
||||
return s
|
||||
}
|
||||
|
||||
func (sv *statsValues) neededFields() []string {
|
||||
return sv.fields
|
||||
func (sv *statsValues) updateNeededFields(neededFields fieldsSet) {
|
||||
neededFields.addFields(sv.fields)
|
||||
}
|
||||
|
||||
func (sv *statsValues) newStatsProcessor() (statsProcessor, int) {
|
||||
@ -61,7 +61,7 @@ func (svp *statsValuesProcessor) updateStatsForAllRows(br *blockResult) int {
|
||||
func (svp *statsValuesProcessor) updateStatsForAllRowsColumn(c *blockResultColumn, br *blockResult) int {
|
||||
stateSizeIncrease := 0
|
||||
if c.isConst {
|
||||
v := strings.Clone(c.encodedValues[0])
|
||||
v := strings.Clone(c.valuesEncoded[0])
|
||||
stateSizeIncrease += len(v)
|
||||
|
||||
values := svp.values
|
||||
@ -81,7 +81,7 @@ func (svp *statsValuesProcessor) updateStatsForAllRowsColumn(c *blockResultColum
|
||||
}
|
||||
|
||||
values := svp.values
|
||||
for _, encodedValue := range c.encodedValues {
|
||||
for _, encodedValue := range c.getValuesEncoded(br) {
|
||||
idx := encodedValue[0]
|
||||
values = append(values, dictValues[idx])
|
||||
}
|
||||
@ -128,7 +128,7 @@ func (svp *statsValuesProcessor) updateStatsForRow(br *blockResult, rowIdx int)
|
||||
func (svp *statsValuesProcessor) updateStatsForRowColumn(c *blockResultColumn, br *blockResult, rowIdx int) int {
|
||||
stateSizeIncrease := 0
|
||||
if c.isConst {
|
||||
v := strings.Clone(c.encodedValues[0])
|
||||
v := strings.Clone(c.valuesEncoded[0])
|
||||
stateSizeIncrease += len(v)
|
||||
|
||||
svp.values = append(svp.values, v)
|
||||
@ -138,7 +138,8 @@ func (svp *statsValuesProcessor) updateStatsForRowColumn(c *blockResultColumn, b
|
||||
}
|
||||
if c.valueType == valueTypeDict {
|
||||
// collect unique non-zero c.dictValues
|
||||
dictIdx := c.encodedValues[rowIdx][0]
|
||||
valuesEncoded := c.getValuesEncoded(br)
|
||||
dictIdx := valuesEncoded[rowIdx][0]
|
||||
v := strings.Clone(c.dictValues[dictIdx])
|
||||
stateSizeIncrease += len(v)
|
||||
|
||||
|
@ -2,12 +2,15 @@ package logstorage
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"math"
|
||||
"slices"
|
||||
"sort"
|
||||
"strings"
|
||||
"sync"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/cgroup"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
||||
)
|
||||
|
||||
// genericSearchOptions contain options used for search.
|
||||
@ -60,8 +63,44 @@ type searchOptions struct {
|
||||
needAllColumns bool
|
||||
}
|
||||
|
||||
// WriteBlockFunc must write a block with the given timestamps and columns.
|
||||
//
|
||||
// WriteBlockFunc cannot hold references to timestamps and columns after returning.
|
||||
type WriteBlockFunc func(workerID uint, timestamps []int64, columns []BlockColumn)
|
||||
|
||||
// RunQuery runs the given q and calls writeBlock for results.
|
||||
func (s *Storage) RunQuery(ctx context.Context, tenantIDs []TenantID, q *Query, writeBlock func(workerID uint, timestamps []int64, columns []BlockColumn)) error {
|
||||
func (s *Storage) RunQuery(ctx context.Context, tenantIDs []TenantID, q *Query, writeBlock WriteBlockFunc) error {
|
||||
qNew, err := s.initFilterInValues(ctx, tenantIDs, q)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
writeBlockResult := func(workerID uint, br *blockResult) {
|
||||
if len(br.timestamps) == 0 {
|
||||
return
|
||||
}
|
||||
|
||||
brs := getBlockRows()
|
||||
csDst := brs.cs
|
||||
|
||||
cs := br.getColumns()
|
||||
for _, c := range cs {
|
||||
values := c.getValues(br)
|
||||
csDst = append(csDst, BlockColumn{
|
||||
Name: c.name,
|
||||
Values: values,
|
||||
})
|
||||
}
|
||||
writeBlock(workerID, br.timestamps, csDst)
|
||||
|
||||
brs.cs = csDst
|
||||
putBlockRows(brs)
|
||||
}
|
||||
|
||||
return s.runQuery(ctx, tenantIDs, qNew, writeBlockResult)
|
||||
}
|
||||
|
||||
func (s *Storage) runQuery(ctx context.Context, tenantIDs []TenantID, q *Query, writeBlockResultFunc func(workerID uint, br *blockResult)) error {
|
||||
neededColumnNames, unneededColumnNames := q.getNeededColumns()
|
||||
so := &genericSearchOptions{
|
||||
tenantIDs: tenantIDs,
|
||||
@ -73,24 +112,8 @@ func (s *Storage) RunQuery(ctx context.Context, tenantIDs []TenantID, q *Query,
|
||||
|
||||
workersCount := cgroup.AvailableCPUs()
|
||||
|
||||
pp := newDefaultPipeProcessor(func(workerID uint, br *blockResult) {
|
||||
brs := getBlockRows()
|
||||
csDst := brs.cs
|
||||
|
||||
for _, c := range br.getColumns() {
|
||||
values := c.getValues(br)
|
||||
csDst = append(csDst, BlockColumn{
|
||||
Name: c.name,
|
||||
Values: values,
|
||||
})
|
||||
}
|
||||
writeBlock(workerID, br.timestamps, csDst)
|
||||
|
||||
brs.cs = csDst
|
||||
putBlockRows(brs)
|
||||
})
|
||||
|
||||
ppMain := pp
|
||||
ppMain := newDefaultPipeProcessor(writeBlockResultFunc)
|
||||
pp := ppMain
|
||||
stopCh := ctx.Done()
|
||||
cancels := make([]func(), len(q.pipes))
|
||||
pps := make([]pipeProcessor, len(q.pipes))
|
||||
@ -121,6 +144,202 @@ func (s *Storage) RunQuery(ctx context.Context, tenantIDs []TenantID, q *Query,
|
||||
return errFlush
|
||||
}
|
||||
|
||||
// GetFieldNames returns field names from q results for the given tenantIDs.
|
||||
func (s *Storage) GetFieldNames(ctx context.Context, tenantIDs []TenantID, q *Query) ([]string, error) {
|
||||
// add `field_names ...` to the end of q.pipes
|
||||
pipes := append([]pipe{}, q.pipes...)
|
||||
|
||||
pipeStr := "field_names as names"
|
||||
lex := newLexer(pipeStr)
|
||||
pf, err := parsePipeFieldNames(lex)
|
||||
if err != nil {
|
||||
logger.Panicf("BUG: unexpected error when parsing 'field_names' pipe: %s", err)
|
||||
}
|
||||
pf.isFirstPipe = len(pipes) == 0
|
||||
pipes = append(pipes, pf)
|
||||
|
||||
q = &Query{
|
||||
f: q.f,
|
||||
pipes: pipes,
|
||||
}
|
||||
|
||||
return s.runSingleColumnQuery(ctx, tenantIDs, q)
|
||||
}
|
||||
|
||||
// GetFieldValues returns unique values for the given fieldName returned by q for the given tenantIDs.
|
||||
//
|
||||
// If limit > 0, then up to limit unique values are returned. The values are returned in arbitrary order because of performance reasons.
|
||||
// The caller may sort the returned values if needed.
|
||||
func (s *Storage) GetFieldValues(ctx context.Context, tenantIDs []TenantID, q *Query, fieldName string, limit uint64) ([]string, error) {
|
||||
// add 'uniq fieldName' to the end of q.pipes
|
||||
if !endsWithPipeUniqSingleField(q.pipes, fieldName) {
|
||||
pipes := append([]pipe{}, q.pipes...)
|
||||
|
||||
pipeStr := fmt.Sprintf("uniq by (%s) limit %d", quoteTokenIfNeeded(fieldName), limit)
|
||||
lex := newLexer(pipeStr)
|
||||
pu, err := parsePipeUniq(lex)
|
||||
if err != nil {
|
||||
logger.Panicf("BUG: unexpected error when parsing 'uniq' pipe: %s", err)
|
||||
}
|
||||
pipes = append(pipes, pu)
|
||||
|
||||
q = &Query{
|
||||
f: q.f,
|
||||
pipes: pipes,
|
||||
}
|
||||
}
|
||||
|
||||
return s.runSingleColumnQuery(ctx, tenantIDs, q)
|
||||
}
|
||||
|
||||
func endsWithPipeUniqSingleField(pipes []pipe, fieldName string) bool {
|
||||
if len(pipes) == 0 {
|
||||
return false
|
||||
}
|
||||
pu, ok := pipes[len(pipes)-1].(*pipeUniq)
|
||||
if !ok {
|
||||
return false
|
||||
}
|
||||
return len(pu.byFields) == 1 && pu.byFields[0] == fieldName
|
||||
}
|
||||
|
||||
func (s *Storage) runSingleColumnQuery(ctx context.Context, tenantIDs []TenantID, q *Query) ([]string, error) {
|
||||
var values []string
|
||||
var valuesLock sync.Mutex
|
||||
writeBlockResult := func(workerID uint, br *blockResult) {
|
||||
if len(br.timestamps) == 0 {
|
||||
return
|
||||
}
|
||||
|
||||
cs := br.getColumns()
|
||||
if len(cs) != 1 {
|
||||
logger.Panicf("BUG: expecting only a single column; got %d columns", len(cs))
|
||||
}
|
||||
columnValues := cs[0].getValues(br)
|
||||
|
||||
columnValuesCopy := make([]string, len(columnValues))
|
||||
for i, v := range columnValues {
|
||||
columnValuesCopy[i] = strings.Clone(v)
|
||||
}
|
||||
|
||||
valuesLock.Lock()
|
||||
values = append(values, columnValuesCopy...)
|
||||
valuesLock.Unlock()
|
||||
}
|
||||
|
||||
err := s.runQuery(ctx, tenantIDs, q, writeBlockResult)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return values, nil
|
||||
}
|
||||
|
||||
func (s *Storage) initFilterInValues(ctx context.Context, tenantIDs []TenantID, q *Query) (*Query, error) {
|
||||
if !hasFilterInWithQueryForFilter(q.f) && !hasFilterInWithQueryForPipes(q.pipes) {
|
||||
return q, nil
|
||||
}
|
||||
|
||||
getFieldValues := func(q *Query, fieldName string) ([]string, error) {
|
||||
return s.GetFieldValues(ctx, tenantIDs, q, fieldName, 0)
|
||||
}
|
||||
cache := make(map[string][]string)
|
||||
fNew, err := initFilterInValuesForFilter(cache, q.f, getFieldValues)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
pipesNew, err := initFilterInValuesForPipes(cache, q.pipes, getFieldValues)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
qNew := &Query{
|
||||
f: fNew,
|
||||
pipes: pipesNew,
|
||||
}
|
||||
return qNew, nil
|
||||
}
|
||||
|
||||
func hasFilterInWithQueryForFilter(f filter) bool {
|
||||
visitFunc := func(f filter) bool {
|
||||
fi, ok := f.(*filterIn)
|
||||
return ok && fi.needExecuteQuery
|
||||
}
|
||||
return visitFilter(f, visitFunc)
|
||||
}
|
||||
|
||||
func hasFilterInWithQueryForPipes(pipes []pipe) bool {
|
||||
for _, p := range pipes {
|
||||
ps, ok := p.(*pipeStats)
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
for _, f := range ps.funcs {
|
||||
if f.iff != nil && hasFilterInWithQueryForFilter(f.iff) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
type getFieldValuesFunc func(q *Query, fieldName string) ([]string, error)
|
||||
|
||||
func initFilterInValuesForFilter(cache map[string][]string, f filter, getFieldValuesFunc getFieldValuesFunc) (filter, error) {
|
||||
visitFunc := func(f filter) bool {
|
||||
fi, ok := f.(*filterIn)
|
||||
return ok && fi.needExecuteQuery
|
||||
}
|
||||
copyFunc := func(f filter) (filter, error) {
|
||||
fi := f.(*filterIn)
|
||||
|
||||
qStr := fi.q.String()
|
||||
values, ok := cache[qStr]
|
||||
if !ok {
|
||||
vs, err := getFieldValuesFunc(fi.q, fi.qFieldName)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("cannot obtain unique values for %s: %w", fi, err)
|
||||
}
|
||||
cache[qStr] = vs
|
||||
values = vs
|
||||
}
|
||||
|
||||
fiNew := &filterIn{
|
||||
fieldName: fi.fieldName,
|
||||
q: fi.q,
|
||||
values: values,
|
||||
}
|
||||
return fiNew, nil
|
||||
}
|
||||
return copyFilter(f, visitFunc, copyFunc)
|
||||
}
|
||||
|
||||
func initFilterInValuesForPipes(cache map[string][]string, pipes []pipe, getFieldValuesFunc getFieldValuesFunc) ([]pipe, error) {
|
||||
pipesNew := make([]pipe, len(pipes))
|
||||
for i, p := range pipes {
|
||||
switch t := p.(type) {
|
||||
case *pipeStats:
|
||||
funcsNew := make([]pipeStatsFunc, len(t.funcs))
|
||||
for j, f := range t.funcs {
|
||||
if f.iff != nil {
|
||||
fNew, err := initFilterInValuesForFilter(cache, f.iff, getFieldValuesFunc)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
f.iff = fNew
|
||||
}
|
||||
funcsNew[j] = f
|
||||
}
|
||||
pipesNew[i] = &pipeStats{
|
||||
byFields: t.byFields,
|
||||
funcs: funcsNew,
|
||||
}
|
||||
default:
|
||||
pipesNew[i] = p
|
||||
}
|
||||
}
|
||||
return pipesNew, nil
|
||||
}
|
||||
|
||||
type blockRows struct {
|
||||
cs []BlockColumn
|
||||
}
|
||||
@ -169,7 +388,7 @@ type searchResultFunc func(workerID uint, br *blockResult)
|
||||
|
||||
// search searches for the matching rows according to so.
|
||||
//
|
||||
// It calls processBlockResult for each found matching block.
|
||||
// It calls processBlockResult for each matching block.
|
||||
func (s *Storage) search(workersCount int, so *genericSearchOptions, stopCh <-chan struct{}, processBlockResult searchResultFunc) {
|
||||
// Spin up workers
|
||||
var wgWorkers sync.WaitGroup
|
||||
@ -178,6 +397,7 @@ func (s *Storage) search(workersCount int, so *genericSearchOptions, stopCh <-ch
|
||||
for i := 0; i < workersCount; i++ {
|
||||
go func(workerID uint) {
|
||||
bs := getBlockSearch()
|
||||
bm := getBitmap(0)
|
||||
for bswb := range workCh {
|
||||
bsws := bswb.bsws
|
||||
for i := range bsws {
|
||||
@ -188,7 +408,7 @@ func (s *Storage) search(workersCount int, so *genericSearchOptions, stopCh <-ch
|
||||
continue
|
||||
}
|
||||
|
||||
bs.search(bsw)
|
||||
bs.search(bsw, bm)
|
||||
if len(bs.br.timestamps) > 0 {
|
||||
processBlockResult(workerID, &bs.br)
|
||||
}
|
||||
@ -198,22 +418,24 @@ func (s *Storage) search(workersCount int, so *genericSearchOptions, stopCh <-ch
|
||||
putBlockSearchWorkBatch(bswb)
|
||||
}
|
||||
putBlockSearch(bs)
|
||||
putBitmap(bm)
|
||||
wgWorkers.Done()
|
||||
}(uint(i))
|
||||
}
|
||||
|
||||
// Obtain common time filter from so.filter
|
||||
ft, f := getCommonFilterTime(so.filter)
|
||||
// Obtain time range from so.filter
|
||||
f := so.filter
|
||||
minTimestamp, maxTimestamp := getFilterTimeRange(f)
|
||||
|
||||
// Select partitions according to the selected time range
|
||||
s.partitionsLock.Lock()
|
||||
ptws := s.partitions
|
||||
minDay := ft.minTimestamp / nsecPerDay
|
||||
minDay := minTimestamp / nsecPerDay
|
||||
n := sort.Search(len(ptws), func(i int) bool {
|
||||
return ptws[i].day >= minDay
|
||||
})
|
||||
ptws = ptws[n:]
|
||||
maxDay := ft.maxTimestamp / nsecPerDay
|
||||
maxDay := maxTimestamp / nsecPerDay
|
||||
n = sort.Search(len(ptws), func(i int) bool {
|
||||
return ptws[i].day > maxDay
|
||||
})
|
||||
@ -234,7 +456,7 @@ func (s *Storage) search(workersCount int, so *genericSearchOptions, stopCh <-ch
|
||||
partitionSearchConcurrencyLimitCh <- struct{}{}
|
||||
wgSearchers.Add(1)
|
||||
go func(idx int, pt *partition) {
|
||||
psfs[idx] = pt.search(ft, sf, f, so, workCh, stopCh)
|
||||
psfs[idx] = pt.search(minTimestamp, maxTimestamp, sf, f, so, workCh, stopCh)
|
||||
wgSearchers.Done()
|
||||
<-partitionSearchConcurrencyLimitCh
|
||||
}(i, ptw.pt)
|
||||
@ -263,7 +485,7 @@ var partitionSearchConcurrencyLimitCh = make(chan struct{}, cgroup.AvailableCPUs
|
||||
|
||||
type partitionSearchFinalizer func()
|
||||
|
||||
func (pt *partition) search(ft *filterTime, sf *StreamFilter, f filter, so *genericSearchOptions, workCh chan<- *blockSearchWorkBatch, stopCh <-chan struct{}) partitionSearchFinalizer {
|
||||
func (pt *partition) search(minTimestamp, maxTimestamp int64, sf *StreamFilter, f filter, so *genericSearchOptions, workCh chan<- *blockSearchWorkBatch, stopCh <-chan struct{}) partitionSearchFinalizer {
|
||||
if needStop(stopCh) {
|
||||
// Do not spend CPU time on search, since it is already stopped.
|
||||
return func() {}
|
||||
@ -281,8 +503,8 @@ func (pt *partition) search(ft *filterTime, sf *StreamFilter, f filter, so *gene
|
||||
soInternal := &searchOptions{
|
||||
tenantIDs: tenantIDs,
|
||||
streamIDs: streamIDs,
|
||||
minTimestamp: ft.minTimestamp,
|
||||
maxTimestamp: ft.maxTimestamp,
|
||||
minTimestamp: minTimestamp,
|
||||
maxTimestamp: maxTimestamp,
|
||||
filter: f,
|
||||
neededColumnNames: so.neededColumnNames,
|
||||
unneededColumnNames: so.unneededColumnNames,
|
||||
@ -292,60 +514,32 @@ func (pt *partition) search(ft *filterTime, sf *StreamFilter, f filter, so *gene
|
||||
}
|
||||
|
||||
func hasStreamFilters(f filter) bool {
|
||||
switch t := f.(type) {
|
||||
case *filterAnd:
|
||||
return hasStreamFiltersInList(t.filters)
|
||||
case *filterOr:
|
||||
return hasStreamFiltersInList(t.filters)
|
||||
case *filterNot:
|
||||
return hasStreamFilters(t.f)
|
||||
case *filterStream:
|
||||
return true
|
||||
default:
|
||||
return false
|
||||
visitFunc := func(f filter) bool {
|
||||
_, ok := f.(*filterStream)
|
||||
return ok
|
||||
}
|
||||
}
|
||||
|
||||
func hasStreamFiltersInList(filters []filter) bool {
|
||||
for _, f := range filters {
|
||||
if hasStreamFilters(f) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
return visitFilter(f, visitFunc)
|
||||
}
|
||||
|
||||
func initStreamFilters(tenantIDs []TenantID, idb *indexdb, f filter) filter {
|
||||
switch t := f.(type) {
|
||||
case *filterAnd:
|
||||
return &filterAnd{
|
||||
filters: initStreamFiltersList(tenantIDs, idb, t.filters),
|
||||
}
|
||||
case *filterOr:
|
||||
return &filterOr{
|
||||
filters: initStreamFiltersList(tenantIDs, idb, t.filters),
|
||||
}
|
||||
case *filterNot:
|
||||
return &filterNot{
|
||||
f: initStreamFilters(tenantIDs, idb, t.f),
|
||||
}
|
||||
case *filterStream:
|
||||
return &filterStream{
|
||||
f: t.f,
|
||||
visitFunc := func(f filter) bool {
|
||||
_, ok := f.(*filterStream)
|
||||
return ok
|
||||
}
|
||||
copyFunc := func(f filter) (filter, error) {
|
||||
fs := f.(*filterStream)
|
||||
fsNew := &filterStream{
|
||||
f: fs.f,
|
||||
tenantIDs: tenantIDs,
|
||||
idb: idb,
|
||||
}
|
||||
default:
|
||||
return t
|
||||
return fsNew, nil
|
||||
}
|
||||
}
|
||||
|
||||
func initStreamFiltersList(tenantIDs []TenantID, idb *indexdb, filters []filter) []filter {
|
||||
result := make([]filter, len(filters))
|
||||
for i, f := range filters {
|
||||
result[i] = initStreamFilters(tenantIDs, idb, f)
|
||||
f, err := copyFilter(f, visitFunc, copyFunc)
|
||||
if err != nil {
|
||||
logger.Panicf("BUG: unexpected error: %s", err)
|
||||
}
|
||||
return result
|
||||
return f
|
||||
}
|
||||
|
||||
func (ddb *datadb) search(so *searchOptions, workCh chan<- *blockSearchWorkBatch, stopCh <-chan struct{}) partitionSearchFinalizer {
|
||||
@ -646,23 +840,25 @@ func getCommonStreamFilter(f filter) (*StreamFilter, filter) {
|
||||
return nil, f
|
||||
}
|
||||
|
||||
func getCommonFilterTime(f filter) (*filterTime, filter) {
|
||||
func getFilterTimeRange(f filter) (int64, int64) {
|
||||
switch t := f.(type) {
|
||||
case *filterAnd:
|
||||
minTimestamp := int64(math.MinInt64)
|
||||
maxTimestamp := int64(math.MaxInt64)
|
||||
for _, filter := range t.filters {
|
||||
ft, ok := filter.(*filterTime)
|
||||
if ok {
|
||||
// The ft must remain in t.filters order to properly filter out rows outside the selected time range
|
||||
return ft, f
|
||||
if ft.minTimestamp > minTimestamp {
|
||||
minTimestamp = ft.minTimestamp
|
||||
}
|
||||
if ft.maxTimestamp < maxTimestamp {
|
||||
maxTimestamp = ft.maxTimestamp
|
||||
}
|
||||
}
|
||||
}
|
||||
return minTimestamp, maxTimestamp
|
||||
case *filterTime:
|
||||
return t, f
|
||||
return t.minTimestamp, t.maxTimestamp
|
||||
}
|
||||
return allFilterTime, f
|
||||
}
|
||||
|
||||
var allFilterTime = &filterTime{
|
||||
minTimestamp: math.MinInt64,
|
||||
maxTimestamp: math.MaxInt64,
|
||||
return math.MinInt64, math.MaxInt64
|
||||
}
|
||||
|
@ -78,6 +78,14 @@ func TestStorageRunQuery(t *testing.T) {
|
||||
}
|
||||
s.debugFlush()
|
||||
|
||||
mustRunQuery := func(tenantIDs []TenantID, q *Query, writeBlock WriteBlockFunc) {
|
||||
t.Helper()
|
||||
err := s.RunQuery(context.Background(), tenantIDs, q, writeBlock)
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error returned from the query %s: %s", q, err)
|
||||
}
|
||||
}
|
||||
|
||||
// run tests on the storage data
|
||||
t.Run("missing-tenant", func(_ *testing.T) {
|
||||
q := mustParseQuery(`"log message"`)
|
||||
@ -89,7 +97,7 @@ func TestStorageRunQuery(t *testing.T) {
|
||||
panic(fmt.Errorf("unexpected match for %d rows", len(timestamps)))
|
||||
}
|
||||
tenantIDs := []TenantID{tenantID}
|
||||
checkErr(t, s.RunQuery(context.Background(), tenantIDs, q, writeBlock))
|
||||
mustRunQuery(tenantIDs, q, writeBlock)
|
||||
})
|
||||
t.Run("missing-message-text", func(_ *testing.T) {
|
||||
q := mustParseQuery(`foobar`)
|
||||
@ -101,7 +109,7 @@ func TestStorageRunQuery(t *testing.T) {
|
||||
panic(fmt.Errorf("unexpected match for %d rows", len(timestamps)))
|
||||
}
|
||||
tenantIDs := []TenantID{tenantID}
|
||||
checkErr(t, s.RunQuery(context.Background(), tenantIDs, q, writeBlock))
|
||||
mustRunQuery(tenantIDs, q, writeBlock)
|
||||
})
|
||||
t.Run("matching-tenant-id", func(t *testing.T) {
|
||||
q := mustParseQuery(`tenant.id:*`)
|
||||
@ -135,7 +143,7 @@ func TestStorageRunQuery(t *testing.T) {
|
||||
rowsCountTotal.Add(uint32(len(timestamps)))
|
||||
}
|
||||
tenantIDs := []TenantID{tenantID}
|
||||
checkErr(t, s.RunQuery(context.Background(), tenantIDs, q, writeBlock))
|
||||
mustRunQuery(tenantIDs, q, writeBlock)
|
||||
|
||||
expectedRowsCount := streamsPerTenant * blocksPerStream * rowsPerBlock
|
||||
if n := rowsCountTotal.Load(); n != uint32(expectedRowsCount) {
|
||||
@ -149,7 +157,7 @@ func TestStorageRunQuery(t *testing.T) {
|
||||
writeBlock := func(_ uint, timestamps []int64, _ []BlockColumn) {
|
||||
rowsCountTotal.Add(uint32(len(timestamps)))
|
||||
}
|
||||
checkErr(t, s.RunQuery(context.Background(), allTenantIDs, q, writeBlock))
|
||||
mustRunQuery(allTenantIDs, q, writeBlock)
|
||||
|
||||
expectedRowsCount := tenantsCount * streamsPerTenant * blocksPerStream * rowsPerBlock
|
||||
if n := rowsCountTotal.Load(); n != uint32(expectedRowsCount) {
|
||||
@ -162,7 +170,7 @@ func TestStorageRunQuery(t *testing.T) {
|
||||
writeBlock := func(_ uint, timestamps []int64, _ []BlockColumn) {
|
||||
rowsCountTotal.Add(uint32(len(timestamps)))
|
||||
}
|
||||
checkErr(t, s.RunQuery(context.Background(), allTenantIDs, q, writeBlock))
|
||||
mustRunQuery(allTenantIDs, q, writeBlock)
|
||||
|
||||
expectedRowsCount := tenantsCount * streamsPerTenant * blocksPerStream * rowsPerBlock
|
||||
if n := rowsCountTotal.Load(); n != uint32(expectedRowsCount) {
|
||||
@ -174,7 +182,7 @@ func TestStorageRunQuery(t *testing.T) {
|
||||
writeBlock := func(_ uint, timestamps []int64, _ []BlockColumn) {
|
||||
panic(fmt.Errorf("unexpected match for %d rows", len(timestamps)))
|
||||
}
|
||||
checkErr(t, s.RunQuery(context.Background(), allTenantIDs, q, writeBlock))
|
||||
mustRunQuery(allTenantIDs, q, writeBlock)
|
||||
})
|
||||
t.Run("matching-stream-id", func(t *testing.T) {
|
||||
for i := 0; i < streamsPerTenant; i++ {
|
||||
@ -208,7 +216,7 @@ func TestStorageRunQuery(t *testing.T) {
|
||||
rowsCountTotal.Add(uint32(len(timestamps)))
|
||||
}
|
||||
tenantIDs := []TenantID{tenantID}
|
||||
checkErr(t, s.RunQuery(context.Background(), tenantIDs, q, writeBlock))
|
||||
mustRunQuery(tenantIDs, q, writeBlock)
|
||||
|
||||
expectedRowsCount := blocksPerStream * rowsPerBlock
|
||||
if n := rowsCountTotal.Load(); n != uint32(expectedRowsCount) {
|
||||
@ -227,7 +235,7 @@ func TestStorageRunQuery(t *testing.T) {
|
||||
rowsCountTotal.Add(uint32(len(timestamps)))
|
||||
}
|
||||
tenantIDs := []TenantID{tenantID}
|
||||
checkErr(t, s.RunQuery(context.Background(), tenantIDs, q, writeBlock))
|
||||
mustRunQuery(tenantIDs, q, writeBlock)
|
||||
|
||||
expectedRowsCount := streamsPerTenant * blocksPerStream * 2
|
||||
if n := rowsCountTotal.Load(); n != uint32(expectedRowsCount) {
|
||||
@ -247,7 +255,7 @@ func TestStorageRunQuery(t *testing.T) {
|
||||
rowsCountTotal.Add(uint32(len(timestamps)))
|
||||
}
|
||||
tenantIDs := []TenantID{tenantID}
|
||||
checkErr(t, s.RunQuery(context.Background(), tenantIDs, q, writeBlock))
|
||||
mustRunQuery(tenantIDs, q, writeBlock)
|
||||
|
||||
expectedRowsCount := streamsPerTenant * blocksPerStream
|
||||
if n := rowsCountTotal.Load(); n != uint32(expectedRowsCount) {
|
||||
@ -267,7 +275,7 @@ func TestStorageRunQuery(t *testing.T) {
|
||||
rowsCountTotal.Add(uint32(len(timestamps)))
|
||||
}
|
||||
tenantIDs := []TenantID{tenantID}
|
||||
checkErr(t, s.RunQuery(context.Background(), tenantIDs, q, writeBlock))
|
||||
mustRunQuery(tenantIDs, q, writeBlock)
|
||||
|
||||
expectedRowsCount := blocksPerStream
|
||||
if n := rowsCountTotal.Load(); n != uint32(expectedRowsCount) {
|
||||
@ -286,7 +294,7 @@ func TestStorageRunQuery(t *testing.T) {
|
||||
panic(fmt.Errorf("unexpected match for %d rows", len(timestamps)))
|
||||
}
|
||||
tenantIDs := []TenantID{tenantID}
|
||||
checkErr(t, s.RunQuery(context.Background(), tenantIDs, q, writeBlock))
|
||||
mustRunQuery(tenantIDs, q, writeBlock)
|
||||
})
|
||||
t.Run("missing-time-range", func(_ *testing.T) {
|
||||
minTimestamp := baseTimestamp + (rowsPerBlock+1)*1e9
|
||||
@ -300,7 +308,7 @@ func TestStorageRunQuery(t *testing.T) {
|
||||
panic(fmt.Errorf("unexpected match for %d rows", len(timestamps)))
|
||||
}
|
||||
tenantIDs := []TenantID{tenantID}
|
||||
checkErr(t, s.RunQuery(context.Background(), tenantIDs, q, writeBlock))
|
||||
mustRunQuery(tenantIDs, q, writeBlock)
|
||||
})
|
||||
|
||||
// Close the storage and delete its data
|
||||
@ -308,13 +316,6 @@ func TestStorageRunQuery(t *testing.T) {
|
||||
fs.MustRemoveAll(path)
|
||||
}
|
||||
|
||||
func checkErr(t *testing.T, err error) {
|
||||
t.Helper()
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected err: %s", err)
|
||||
}
|
||||
}
|
||||
|
||||
func mustParseQuery(query string) *Query {
|
||||
q, err := ParseQuery(query)
|
||||
if err != nil {
|
||||
@ -470,9 +471,6 @@ func TestStorageSearch(t *testing.T) {
|
||||
}
|
||||
var rowsCountTotal atomic.Uint32
|
||||
processBlock := func(_ uint, br *blockResult) {
|
||||
if !br.streamID.tenantID.equal(&tenantID) {
|
||||
panic(fmt.Errorf("unexpected tenantID; got %s; want %s", &br.streamID.tenantID, &tenantID))
|
||||
}
|
||||
rowsCountTotal.Add(uint32(len(br.timestamps)))
|
||||
}
|
||||
s.search(workersCount, so, nil, processBlock)
|
||||
@ -504,7 +502,7 @@ func TestStorageSearch(t *testing.T) {
|
||||
}
|
||||
})
|
||||
t.Run("stream-filter-mismatch", func(_ *testing.T) {
|
||||
sf := mustNewStreamFilter(`{job="foobar",instance=~"host-.+:2345"}`)
|
||||
sf := mustNewTestStreamFilter(`{job="foobar",instance=~"host-.+:2345"}`)
|
||||
minTimestamp := baseTimestamp
|
||||
maxTimestamp := baseTimestamp + rowsPerBlock*1e9 + blocksPerStream
|
||||
f := getBaseFilter(minTimestamp, maxTimestamp, sf)
|
||||
@ -520,7 +518,7 @@ func TestStorageSearch(t *testing.T) {
|
||||
})
|
||||
t.Run("matching-stream-id", func(t *testing.T) {
|
||||
for i := 0; i < streamsPerTenant; i++ {
|
||||
sf := mustNewStreamFilter(fmt.Sprintf(`{job="foobar",instance="host-%d:234"}`, i))
|
||||
sf := mustNewTestStreamFilter(fmt.Sprintf(`{job="foobar",instance="host-%d:234"}`, i))
|
||||
tenantID := TenantID{
|
||||
AccountID: 1,
|
||||
ProjectID: 11,
|
||||
@ -535,9 +533,6 @@ func TestStorageSearch(t *testing.T) {
|
||||
}
|
||||
var rowsCountTotal atomic.Uint32
|
||||
processBlock := func(_ uint, br *blockResult) {
|
||||
if !br.streamID.tenantID.equal(&tenantID) {
|
||||
panic(fmt.Errorf("unexpected tenantID; got %s; want %s", &br.streamID.tenantID, &tenantID))
|
||||
}
|
||||
rowsCountTotal.Add(uint32(len(br.timestamps)))
|
||||
}
|
||||
s.search(workersCount, so, nil, processBlock)
|
||||
@ -549,7 +544,7 @@ func TestStorageSearch(t *testing.T) {
|
||||
}
|
||||
})
|
||||
t.Run("matching-multiple-stream-ids", func(t *testing.T) {
|
||||
sf := mustNewStreamFilter(`{job="foobar",instance=~"host-[^:]+:234"}`)
|
||||
sf := mustNewTestStreamFilter(`{job="foobar",instance=~"host-[^:]+:234"}`)
|
||||
tenantID := TenantID{
|
||||
AccountID: 1,
|
||||
ProjectID: 11,
|
||||
@ -564,9 +559,6 @@ func TestStorageSearch(t *testing.T) {
|
||||
}
|
||||
var rowsCountTotal atomic.Uint32
|
||||
processBlock := func(_ uint, br *blockResult) {
|
||||
if !br.streamID.tenantID.equal(&tenantID) {
|
||||
panic(fmt.Errorf("unexpected tenantID; got %s; want %s", &br.streamID.tenantID, &tenantID))
|
||||
}
|
||||
rowsCountTotal.Add(uint32(len(br.timestamps)))
|
||||
}
|
||||
s.search(workersCount, so, nil, processBlock)
|
||||
@ -577,7 +569,7 @@ func TestStorageSearch(t *testing.T) {
|
||||
}
|
||||
})
|
||||
t.Run("matching-multiple-stream-ids-with-re-filter", func(t *testing.T) {
|
||||
sf := mustNewStreamFilter(`{job="foobar",instance=~"host-[^:]+:234"}`)
|
||||
sf := mustNewTestStreamFilter(`{job="foobar",instance=~"host-[^:]+:234"}`)
|
||||
tenantID := TenantID{
|
||||
AccountID: 1,
|
||||
ProjectID: 11,
|
||||
@ -601,9 +593,6 @@ func TestStorageSearch(t *testing.T) {
|
||||
}
|
||||
var rowsCountTotal atomic.Uint32
|
||||
processBlock := func(_ uint, br *blockResult) {
|
||||
if !br.streamID.tenantID.equal(&tenantID) {
|
||||
panic(fmt.Errorf("unexpected tenantID; got %s; want %s", &br.streamID.tenantID, &tenantID))
|
||||
}
|
||||
rowsCountTotal.Add(uint32(len(br.timestamps)))
|
||||
}
|
||||
s.search(workersCount, so, nil, processBlock)
|
||||
@ -614,7 +603,7 @@ func TestStorageSearch(t *testing.T) {
|
||||
}
|
||||
})
|
||||
t.Run("matching-stream-id-smaller-time-range", func(t *testing.T) {
|
||||
sf := mustNewStreamFilter(`{job="foobar",instance="host-1:234"}`)
|
||||
sf := mustNewTestStreamFilter(`{job="foobar",instance="host-1:234"}`)
|
||||
tenantID := TenantID{
|
||||
AccountID: 1,
|
||||
ProjectID: 11,
|
||||
@ -639,7 +628,7 @@ func TestStorageSearch(t *testing.T) {
|
||||
}
|
||||
})
|
||||
t.Run("matching-stream-id-missing-time-range", func(_ *testing.T) {
|
||||
sf := mustNewStreamFilter(`{job="foobar",instance="host-1:234"}`)
|
||||
sf := mustNewTestStreamFilter(`{job="foobar",instance="host-1:234"}`)
|
||||
tenantID := TenantID{
|
||||
AccountID: 1,
|
||||
ProjectID: 11,
|
||||
@ -661,11 +650,3 @@ func TestStorageSearch(t *testing.T) {
|
||||
s.MustClose()
|
||||
fs.MustRemoveAll(path)
|
||||
}
|
||||
|
||||
func mustNewStreamFilter(s string) *StreamFilter {
|
||||
sf, err := newStreamFilter(s)
|
||||
if err != nil {
|
||||
panic(fmt.Errorf("unexpected error in newStreamFilter(%q): %w", s, err))
|
||||
}
|
||||
return sf
|
||||
}
|
||||
|
@ -1,11 +1,14 @@
|
||||
package logstorage
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strconv"
|
||||
"strings"
|
||||
"sync"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/encoding"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/regexutil"
|
||||
)
|
||||
|
||||
@ -14,6 +17,29 @@ type StreamFilter struct {
|
||||
orFilters []*andStreamFilter
|
||||
}
|
||||
|
||||
func (sf *StreamFilter) matchStreamName(s string) bool {
|
||||
sn := getStreamName()
|
||||
defer putStreamName(sn)
|
||||
|
||||
if !sn.parse(s) {
|
||||
return false
|
||||
}
|
||||
|
||||
for _, of := range sf.orFilters {
|
||||
matchAndFilters := true
|
||||
for _, tf := range of.tagFilters {
|
||||
if !sn.match(tf) {
|
||||
matchAndFilters = false
|
||||
break
|
||||
}
|
||||
}
|
||||
if matchAndFilters {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func (sf *StreamFilter) isEmpty() bool {
|
||||
for _, af := range sf.orFilters {
|
||||
if len(af.tagFilters) > 0 {
|
||||
@ -69,10 +95,199 @@ type streamTagFilter struct {
|
||||
regexp *regexutil.PromRegex
|
||||
}
|
||||
|
||||
func (tf *streamTagFilter) getRegexp() *regexutil.PromRegex {
|
||||
return tf.regexp
|
||||
}
|
||||
|
||||
func (tf *streamTagFilter) String() string {
|
||||
return quoteTokenIfNeeded(tf.tagName) + tf.op + strconv.Quote(tf.value)
|
||||
}
|
||||
|
||||
func parseStreamFilter(lex *lexer) (*StreamFilter, error) {
|
||||
if !lex.isKeyword("{") {
|
||||
return nil, fmt.Errorf("unexpected token %q instead of '{' in _stream filter", lex.token)
|
||||
}
|
||||
if !lex.mustNextToken() {
|
||||
return nil, fmt.Errorf("incomplete _stream filter after '{'")
|
||||
}
|
||||
var filters []*andStreamFilter
|
||||
for {
|
||||
f, err := parseAndStreamFilter(lex)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
filters = append(filters, f)
|
||||
switch {
|
||||
case lex.isKeyword("}"):
|
||||
lex.nextToken()
|
||||
sf := &StreamFilter{
|
||||
orFilters: filters,
|
||||
}
|
||||
return sf, nil
|
||||
case lex.isKeyword("or"):
|
||||
if !lex.mustNextToken() {
|
||||
return nil, fmt.Errorf("incomplete _stream filter after 'or'")
|
||||
}
|
||||
if lex.isKeyword("}") {
|
||||
return nil, fmt.Errorf("unexpected '}' after 'or' in _stream filter")
|
||||
}
|
||||
default:
|
||||
return nil, fmt.Errorf("unexpected token in _stream filter: %q; want '}' or 'or'", lex.token)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func parseAndStreamFilter(lex *lexer) (*andStreamFilter, error) {
|
||||
var filters []*streamTagFilter
|
||||
for {
|
||||
if lex.isKeyword("}") {
|
||||
asf := &andStreamFilter{
|
||||
tagFilters: filters,
|
||||
}
|
||||
return asf, nil
|
||||
}
|
||||
f, err := parseStreamTagFilter(lex)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
filters = append(filters, f)
|
||||
switch {
|
||||
case lex.isKeyword("or", "}"):
|
||||
asf := &andStreamFilter{
|
||||
tagFilters: filters,
|
||||
}
|
||||
return asf, nil
|
||||
case lex.isKeyword(","):
|
||||
if !lex.mustNextToken() {
|
||||
return nil, fmt.Errorf("missing stream filter after ','")
|
||||
}
|
||||
default:
|
||||
return nil, fmt.Errorf("unexpected token %q in _stream filter; want 'or', 'and', '}' or ','", lex.token)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func parseStreamTagFilter(lex *lexer) (*streamTagFilter, error) {
|
||||
tagName := lex.token
|
||||
if !lex.mustNextToken() {
|
||||
return nil, fmt.Errorf("missing operation in _stream filter for %q field", tagName)
|
||||
}
|
||||
if !lex.isKeyword("=", "!=", "=~", "!~") {
|
||||
return nil, fmt.Errorf("unsupported operation %q in _steam filter for %q field; supported operations: =, !=, =~, !~", lex.token, tagName)
|
||||
}
|
||||
op := lex.token
|
||||
if !lex.mustNextToken() {
|
||||
return nil, fmt.Errorf("missing _stream filter value for %q field", tagName)
|
||||
}
|
||||
value := lex.token
|
||||
if !lex.mustNextToken() {
|
||||
return nil, fmt.Errorf("missing token after %q%s%q filter", tagName, op, value)
|
||||
}
|
||||
stf := &streamTagFilter{
|
||||
tagName: tagName,
|
||||
op: op,
|
||||
value: value,
|
||||
}
|
||||
if op == "=~" || op == "!~" {
|
||||
re, err := regexutil.NewPromRegex(value)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("invalid regexp %q for stream filter: %w", value, err)
|
||||
}
|
||||
stf.regexp = re
|
||||
}
|
||||
return stf, nil
|
||||
}
|
||||
|
||||
func getStreamName() *streamName {
|
||||
v := streamNamePool.Get()
|
||||
if v == nil {
|
||||
return &streamName{}
|
||||
}
|
||||
return v.(*streamName)
|
||||
}
|
||||
|
||||
func putStreamName(sn *streamName) {
|
||||
sn.reset()
|
||||
streamNamePool.Put(sn)
|
||||
}
|
||||
|
||||
var streamNamePool sync.Pool
|
||||
|
||||
type streamName struct {
|
||||
tags []Field
|
||||
}
|
||||
|
||||
func (sn *streamName) reset() {
|
||||
clear(sn.tags)
|
||||
sn.tags = sn.tags[:0]
|
||||
}
|
||||
|
||||
func (sn *streamName) parse(s string) bool {
|
||||
if len(s) < 2 || s[0] != '{' || s[len(s)-1] != '}' {
|
||||
return false
|
||||
}
|
||||
s = s[1 : len(s)-1]
|
||||
if len(s) == 0 {
|
||||
return true
|
||||
}
|
||||
|
||||
for {
|
||||
// Parse tag name
|
||||
n := strings.IndexByte(s, '=')
|
||||
if n < 0 {
|
||||
// cannot find tag name
|
||||
return false
|
||||
}
|
||||
name := s[:n]
|
||||
s = s[n+1:]
|
||||
|
||||
// Parse tag value
|
||||
if len(s) == 0 || s[0] != '"' {
|
||||
return false
|
||||
}
|
||||
qPrefix, err := strconv.QuotedPrefix(s)
|
||||
if err != nil {
|
||||
return false
|
||||
}
|
||||
s = s[len(qPrefix):]
|
||||
value, err := strconv.Unquote(qPrefix)
|
||||
if err != nil {
|
||||
return false
|
||||
}
|
||||
|
||||
sn.tags = append(sn.tags, Field{
|
||||
Name: name,
|
||||
Value: value,
|
||||
})
|
||||
|
||||
if len(s) == 0 {
|
||||
return true
|
||||
}
|
||||
if s[0] != ',' {
|
||||
return false
|
||||
}
|
||||
s = s[1:]
|
||||
}
|
||||
}
|
||||
|
||||
func (sn *streamName) match(tf *streamTagFilter) bool {
|
||||
v := sn.getTagValueByTagName(tf.tagName)
|
||||
switch tf.op {
|
||||
case "=":
|
||||
return v == tf.value
|
||||
case "!=":
|
||||
return v != tf.value
|
||||
case "=~":
|
||||
return tf.regexp.MatchString(v)
|
||||
case "!~":
|
||||
return !tf.regexp.MatchString(v)
|
||||
default:
|
||||
logger.Panicf("BUG: unexpected tagFilter operation: %q", tf.op)
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
func (sn *streamName) getTagValueByTagName(name string) string {
|
||||
for _, t := range sn.tags {
|
||||
if t.Name == name {
|
||||
return t.Value
|
||||
}
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user