2024-05-12 16:33:29 +02:00
|
|
|
package logstorage
|
|
|
|
|
|
|
|
import (
|
|
|
|
"container/heap"
|
|
|
|
"fmt"
|
|
|
|
"math"
|
|
|
|
"sort"
|
|
|
|
"strings"
|
|
|
|
"sync"
|
|
|
|
"sync/atomic"
|
|
|
|
"unsafe"
|
|
|
|
|
2024-07-05 01:17:03 +02:00
|
|
|
"github.com/valyala/quicktemplate"
|
|
|
|
|
2024-07-01 01:44:17 +02:00
|
|
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
|
2024-05-12 16:33:29 +02:00
|
|
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/memory"
|
|
|
|
)
|
|
|
|
|
|
|
|
// pipeSort processes '| sort ...' queries.
|
|
|
|
//
|
|
|
|
// See https://docs.victoriametrics.com/victorialogs/logsql/#sort-pipe
|
|
|
|
type pipeSort struct {
|
|
|
|
// byFields contains field names for sorting from 'by(...)' clause.
|
|
|
|
byFields []*bySortField
|
|
|
|
|
|
|
|
// whether to apply descending order
|
|
|
|
isDesc bool
|
2024-05-15 04:55:44 +02:00
|
|
|
|
|
|
|
// how many results to skip
|
|
|
|
offset uint64
|
|
|
|
|
|
|
|
// how many results to return
|
|
|
|
//
|
|
|
|
// if zero, then all the results are returned
|
|
|
|
limit uint64
|
2024-07-01 01:44:17 +02:00
|
|
|
|
|
|
|
// The name of the field to store the row rank.
|
2024-10-29 16:43:07 +01:00
|
|
|
rankFieldName string
|
2024-05-12 16:33:29 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
func (ps *pipeSort) String() string {
|
|
|
|
s := "sort"
|
|
|
|
if len(ps.byFields) > 0 {
|
|
|
|
a := make([]string, len(ps.byFields))
|
|
|
|
for i, bf := range ps.byFields {
|
|
|
|
a[i] = bf.String()
|
|
|
|
}
|
|
|
|
s += " by (" + strings.Join(a, ", ") + ")"
|
|
|
|
}
|
|
|
|
if ps.isDesc {
|
|
|
|
s += " desc"
|
|
|
|
}
|
2024-05-15 04:55:44 +02:00
|
|
|
if ps.offset > 0 {
|
|
|
|
s += fmt.Sprintf(" offset %d", ps.offset)
|
|
|
|
}
|
|
|
|
if ps.limit > 0 {
|
|
|
|
s += fmt.Sprintf(" limit %d", ps.limit)
|
|
|
|
}
|
2024-10-29 16:43:07 +01:00
|
|
|
if ps.rankFieldName != "" {
|
|
|
|
s += rankFieldNameString(ps.rankFieldName)
|
2024-07-01 01:44:17 +02:00
|
|
|
}
|
2024-05-12 16:33:29 +02:00
|
|
|
return s
|
|
|
|
}
|
|
|
|
|
2024-06-27 14:18:42 +02:00
|
|
|
func (ps *pipeSort) canLiveTail() bool {
|
|
|
|
return false
|
|
|
|
}
|
|
|
|
|
2024-05-12 16:33:29 +02:00
|
|
|
func (ps *pipeSort) updateNeededFields(neededFields, unneededFields fieldsSet) {
|
2024-05-30 16:19:23 +02:00
|
|
|
if neededFields.isEmpty() {
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
2024-10-29 16:43:07 +01:00
|
|
|
if ps.rankFieldName != "" {
|
|
|
|
neededFields.remove(ps.rankFieldName)
|
2024-07-01 01:44:17 +02:00
|
|
|
if neededFields.contains("*") {
|
2024-10-29 16:43:07 +01:00
|
|
|
unneededFields.add(ps.rankFieldName)
|
2024-07-01 01:44:17 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2024-05-12 16:33:29 +02:00
|
|
|
if len(ps.byFields) == 0 {
|
|
|
|
neededFields.add("*")
|
|
|
|
unneededFields.reset()
|
|
|
|
} else {
|
|
|
|
for _, bf := range ps.byFields {
|
|
|
|
neededFields.add(bf.name)
|
|
|
|
unneededFields.remove(bf.name)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2024-05-25 21:36:16 +02:00
|
|
|
func (ps *pipeSort) hasFilterInWithQuery() bool {
|
|
|
|
return false
|
|
|
|
}
|
|
|
|
|
2024-05-26 02:01:32 +02:00
|
|
|
func (ps *pipeSort) initFilterInValues(_ map[string][]string, _ getFieldValuesFunc) (pipe, error) {
|
2024-05-25 21:36:16 +02:00
|
|
|
return ps, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
func (ps *pipeSort) newPipeProcessor(workersCount int, stopCh <-chan struct{}, cancel func(), ppNext pipeProcessor) pipeProcessor {
|
2024-05-15 04:55:44 +02:00
|
|
|
if ps.limit > 0 {
|
2024-05-25 21:36:16 +02:00
|
|
|
return newPipeTopkProcessor(ps, workersCount, stopCh, cancel, ppNext)
|
2024-05-15 04:55:44 +02:00
|
|
|
}
|
2024-05-25 21:36:16 +02:00
|
|
|
return newPipeSortProcessor(ps, workersCount, stopCh, cancel, ppNext)
|
2024-05-15 04:55:44 +02:00
|
|
|
}
|
|
|
|
|
2024-05-25 21:36:16 +02:00
|
|
|
func newPipeSortProcessor(ps *pipeSort, workersCount int, stopCh <-chan struct{}, cancel func(), ppNext pipeProcessor) pipeProcessor {
|
2024-05-12 16:33:29 +02:00
|
|
|
maxStateSize := int64(float64(memory.Allowed()) * 0.2)
|
|
|
|
|
|
|
|
shards := make([]pipeSortProcessorShard, workersCount)
|
|
|
|
for i := range shards {
|
2024-05-20 04:08:30 +02:00
|
|
|
shards[i] = pipeSortProcessorShard{
|
|
|
|
pipeSortProcessorShardNopad: pipeSortProcessorShardNopad{
|
2024-09-29 10:16:14 +02:00
|
|
|
ps: ps,
|
2024-05-20 04:08:30 +02:00
|
|
|
},
|
|
|
|
}
|
2024-05-12 16:33:29 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
psp := &pipeSortProcessor{
|
|
|
|
ps: ps,
|
|
|
|
stopCh: stopCh,
|
|
|
|
cancel: cancel,
|
2024-05-25 21:36:16 +02:00
|
|
|
ppNext: ppNext,
|
2024-05-12 16:33:29 +02:00
|
|
|
|
|
|
|
shards: shards,
|
|
|
|
|
|
|
|
maxStateSize: maxStateSize,
|
|
|
|
}
|
|
|
|
psp.stateSizeBudget.Store(maxStateSize)
|
|
|
|
|
|
|
|
return psp
|
|
|
|
}
|
|
|
|
|
|
|
|
type pipeSortProcessor struct {
|
|
|
|
ps *pipeSort
|
|
|
|
stopCh <-chan struct{}
|
|
|
|
cancel func()
|
2024-05-25 21:36:16 +02:00
|
|
|
ppNext pipeProcessor
|
2024-05-12 16:33:29 +02:00
|
|
|
|
|
|
|
shards []pipeSortProcessorShard
|
|
|
|
|
|
|
|
maxStateSize int64
|
|
|
|
stateSizeBudget atomic.Int64
|
|
|
|
}
|
|
|
|
|
|
|
|
type pipeSortProcessorShard struct {
|
|
|
|
pipeSortProcessorShardNopad
|
|
|
|
|
|
|
|
// The padding prevents false sharing on widespread platforms with 128 mod (cache line size) = 0 .
|
|
|
|
_ [128 - unsafe.Sizeof(pipeSortProcessorShardNopad{})%128]byte
|
|
|
|
}
|
|
|
|
|
|
|
|
type pipeSortProcessorShardNopad struct {
|
|
|
|
// ps points to the parent pipeSort.
|
|
|
|
ps *pipeSort
|
|
|
|
|
|
|
|
// blocks holds all the blocks with logs written to the shard.
|
|
|
|
blocks []sortBlock
|
|
|
|
|
|
|
|
// rowRefs holds references to all the rows stored in blocks.
|
|
|
|
//
|
|
|
|
// Sorting sorts rowRefs, while blocks remain unchanged. This should speed up sorting.
|
|
|
|
rowRefs []sortRowRef
|
|
|
|
|
|
|
|
// rowRefNext points to the next index at rowRefs during merge shards phase
|
|
|
|
rowRefNext int
|
|
|
|
|
|
|
|
// stateSizeBudget is the remaining budget for the whole state size for the shard.
|
|
|
|
// The per-shard budget is provided in chunks from the parent pipeSortProcessor.
|
|
|
|
stateSizeBudget int
|
2024-05-15 04:55:44 +02:00
|
|
|
|
|
|
|
// columnValues is used as temporary buffer at pipeSortProcessorShard.writeBlock
|
|
|
|
columnValues [][]string
|
2024-05-12 16:33:29 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
// sortBlock represents a block of logs for sorting.
|
|
|
|
type sortBlock struct {
|
|
|
|
// br is a result block to sort
|
|
|
|
br *blockResult
|
|
|
|
|
|
|
|
// byColumns refers block data for 'by(...)' columns
|
|
|
|
byColumns []sortBlockByColumn
|
|
|
|
|
|
|
|
// otherColumns refers block data for other than 'by(...)' columns
|
|
|
|
otherColumns []*blockResultColumn
|
|
|
|
}
|
|
|
|
|
|
|
|
// sortBlockByColumn represents data for a single column from 'sort by(...)' clause.
|
|
|
|
type sortBlockByColumn struct {
|
|
|
|
// c contains column data
|
|
|
|
c *blockResultColumn
|
|
|
|
|
|
|
|
// i64Values contains int64 numbers parsed from values
|
|
|
|
i64Values []int64
|
|
|
|
|
|
|
|
// f64Values contains float64 numbers parsed from values
|
|
|
|
f64Values []float64
|
|
|
|
}
|
|
|
|
|
|
|
|
// sortRowRef is the reference to a single log entry written to `sort` pipe.
|
|
|
|
type sortRowRef struct {
|
|
|
|
// blockIdx is the index of the block at pipeSortProcessorShard.blocks.
|
|
|
|
blockIdx int
|
|
|
|
|
|
|
|
// rowIdx is the index of the log entry inside the block referenced by blockIdx.
|
|
|
|
rowIdx int
|
|
|
|
}
|
|
|
|
|
|
|
|
func (c *sortBlockByColumn) getI64ValueAtRow(rowIdx int) int64 {
|
|
|
|
if c.c.isConst {
|
|
|
|
return c.i64Values[0]
|
|
|
|
}
|
|
|
|
return c.i64Values[rowIdx]
|
|
|
|
}
|
|
|
|
|
|
|
|
func (c *sortBlockByColumn) getF64ValueAtRow(rowIdx int) float64 {
|
|
|
|
if c.c.isConst {
|
|
|
|
return c.f64Values[0]
|
|
|
|
}
|
|
|
|
return c.f64Values[rowIdx]
|
|
|
|
}
|
|
|
|
|
|
|
|
// writeBlock writes br to shard.
|
|
|
|
func (shard *pipeSortProcessorShard) writeBlock(br *blockResult) {
|
|
|
|
// clone br, so it could be owned by shard
|
|
|
|
br = br.clone()
|
|
|
|
cs := br.getColumns()
|
|
|
|
|
|
|
|
byFields := shard.ps.byFields
|
|
|
|
if len(byFields) == 0 {
|
|
|
|
// Sort by all the columns
|
|
|
|
|
2024-05-15 04:55:44 +02:00
|
|
|
columnValues := shard.columnValues[:0]
|
|
|
|
for _, c := range cs {
|
2024-05-20 04:08:30 +02:00
|
|
|
values := c.getValues(br)
|
|
|
|
columnValues = append(columnValues, values)
|
2024-05-15 04:55:44 +02:00
|
|
|
}
|
|
|
|
shard.columnValues = columnValues
|
|
|
|
|
2024-05-12 16:33:29 +02:00
|
|
|
// Generate byColumns
|
2024-09-25 16:16:53 +02:00
|
|
|
valuesEncoded := make([]string, br.rowsLen)
|
2024-05-20 04:08:30 +02:00
|
|
|
shard.stateSizeBudget -= len(valuesEncoded) * int(unsafe.Sizeof(valuesEncoded[0]))
|
2024-05-15 04:55:44 +02:00
|
|
|
|
2024-05-12 16:33:29 +02:00
|
|
|
bb := bbPool.Get()
|
2024-09-25 16:16:53 +02:00
|
|
|
for rowIdx := 0; rowIdx < br.rowsLen; rowIdx++ {
|
2024-05-15 04:55:44 +02:00
|
|
|
// Marshal all the columns per each row into a single string
|
2024-05-12 16:33:29 +02:00
|
|
|
// and sort rows by the resulting string.
|
|
|
|
bb.B = bb.B[:0]
|
2024-05-15 04:55:44 +02:00
|
|
|
for i, values := range columnValues {
|
|
|
|
v := values[rowIdx]
|
|
|
|
bb.B = marshalJSONKeyValue(bb.B, cs[i].name, v)
|
2024-05-12 16:33:29 +02:00
|
|
|
bb.B = append(bb.B, ',')
|
|
|
|
}
|
2024-05-20 04:08:30 +02:00
|
|
|
if rowIdx > 0 && valuesEncoded[rowIdx-1] == string(bb.B) {
|
|
|
|
valuesEncoded[rowIdx] = valuesEncoded[rowIdx-1]
|
|
|
|
} else {
|
|
|
|
valuesEncoded[rowIdx] = string(bb.B)
|
|
|
|
shard.stateSizeBudget -= len(bb.B)
|
|
|
|
}
|
2024-05-12 16:33:29 +02:00
|
|
|
}
|
|
|
|
bbPool.Put(bb)
|
|
|
|
|
2024-09-25 16:16:53 +02:00
|
|
|
i64Values := make([]int64, br.rowsLen)
|
|
|
|
f64Values := make([]float64, br.rowsLen)
|
2024-05-12 16:33:29 +02:00
|
|
|
for i := range f64Values {
|
|
|
|
f64Values[i] = nan
|
|
|
|
}
|
|
|
|
byColumns := []sortBlockByColumn{
|
|
|
|
{
|
|
|
|
c: &blockResultColumn{
|
|
|
|
valueType: valueTypeString,
|
2024-05-20 04:08:30 +02:00
|
|
|
valuesEncoded: valuesEncoded,
|
2024-05-12 16:33:29 +02:00
|
|
|
},
|
|
|
|
i64Values: i64Values,
|
|
|
|
f64Values: f64Values,
|
|
|
|
},
|
|
|
|
}
|
2024-05-20 04:08:30 +02:00
|
|
|
shard.stateSizeBudget -= int(unsafe.Sizeof(byColumns[0]) + unsafe.Sizeof(*byColumns[0].c))
|
2024-05-12 16:33:29 +02:00
|
|
|
|
|
|
|
// Append br to shard.blocks.
|
|
|
|
shard.blocks = append(shard.blocks, sortBlock{
|
|
|
|
br: br,
|
|
|
|
byColumns: byColumns,
|
|
|
|
otherColumns: cs,
|
|
|
|
})
|
|
|
|
} else {
|
|
|
|
// Collect values for columns from byFields.
|
|
|
|
byColumns := make([]sortBlockByColumn, len(byFields))
|
|
|
|
for i, bf := range byFields {
|
|
|
|
c := br.getColumnByName(bf.name)
|
|
|
|
bc := &byColumns[i]
|
|
|
|
bc.c = c
|
|
|
|
|
|
|
|
if c.isTime {
|
|
|
|
// Do not initialize bc.i64Values and bc.f64Values, since they aren't used.
|
|
|
|
// This saves some memory.
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
if c.isConst {
|
2024-05-20 04:08:30 +02:00
|
|
|
bc.i64Values = shard.createInt64Values(c.valuesEncoded)
|
|
|
|
bc.f64Values = shard.createFloat64Values(c.valuesEncoded)
|
2024-05-12 16:33:29 +02:00
|
|
|
continue
|
|
|
|
}
|
|
|
|
|
|
|
|
// pre-populate values in order to track better br memory usage
|
|
|
|
values := c.getValues(br)
|
|
|
|
bc.i64Values = shard.createInt64Values(values)
|
|
|
|
bc.f64Values = shard.createFloat64Values(values)
|
|
|
|
}
|
|
|
|
shard.stateSizeBudget -= len(byColumns) * int(unsafe.Sizeof(byColumns[0]))
|
|
|
|
|
|
|
|
// Collect values for other columns.
|
|
|
|
otherColumns := make([]*blockResultColumn, 0, len(cs))
|
|
|
|
for _, c := range cs {
|
|
|
|
isByField := false
|
|
|
|
for _, bf := range byFields {
|
|
|
|
if bf.name == c.name {
|
|
|
|
isByField = true
|
|
|
|
break
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if !isByField {
|
|
|
|
otherColumns = append(otherColumns, c)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
shard.stateSizeBudget -= len(otherColumns) * int(unsafe.Sizeof(otherColumns[0]))
|
|
|
|
|
|
|
|
// Append br to shard.blocks.
|
|
|
|
shard.blocks = append(shard.blocks, sortBlock{
|
|
|
|
br: br,
|
|
|
|
byColumns: byColumns,
|
|
|
|
otherColumns: otherColumns,
|
|
|
|
})
|
|
|
|
}
|
|
|
|
|
|
|
|
shard.stateSizeBudget -= br.sizeBytes()
|
|
|
|
shard.stateSizeBudget -= int(unsafe.Sizeof(shard.blocks[0]))
|
|
|
|
|
|
|
|
// Add row references to rowRefs.
|
|
|
|
blockIdx := len(shard.blocks) - 1
|
|
|
|
rowRefs := shard.rowRefs
|
|
|
|
rowRefsLen := len(rowRefs)
|
2024-09-25 16:16:53 +02:00
|
|
|
for i := 0; i < br.rowsLen; i++ {
|
2024-05-12 16:33:29 +02:00
|
|
|
rowRefs = append(rowRefs, sortRowRef{
|
|
|
|
blockIdx: blockIdx,
|
|
|
|
rowIdx: i,
|
|
|
|
})
|
|
|
|
}
|
|
|
|
shard.rowRefs = rowRefs
|
|
|
|
shard.stateSizeBudget -= (len(rowRefs) - rowRefsLen) * int(unsafe.Sizeof(rowRefs[0]))
|
|
|
|
}
|
|
|
|
|
|
|
|
func (shard *pipeSortProcessorShard) createInt64Values(values []string) []int64 {
|
|
|
|
a := make([]int64, len(values))
|
|
|
|
for i, v := range values {
|
|
|
|
i64, ok := tryParseInt64(v)
|
|
|
|
if ok {
|
|
|
|
a[i] = i64
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
u32, _ := tryParseIPv4(v)
|
|
|
|
a[i] = int64(u32)
|
|
|
|
// Do not try parsing timestamp and duration, since they may be negative.
|
|
|
|
// This breaks sorting.
|
|
|
|
}
|
|
|
|
|
|
|
|
shard.stateSizeBudget -= len(a) * int(unsafe.Sizeof(a[0]))
|
|
|
|
|
|
|
|
return a
|
|
|
|
}
|
|
|
|
|
|
|
|
func (shard *pipeSortProcessorShard) createFloat64Values(values []string) []float64 {
|
|
|
|
a := make([]float64, len(values))
|
|
|
|
for i, v := range values {
|
|
|
|
f, ok := tryParseFloat64(v)
|
|
|
|
if !ok {
|
|
|
|
f = nan
|
|
|
|
}
|
|
|
|
a[i] = f
|
|
|
|
}
|
|
|
|
|
|
|
|
shard.stateSizeBudget -= len(a) * int(unsafe.Sizeof(a[0]))
|
|
|
|
|
|
|
|
return a
|
|
|
|
}
|
|
|
|
|
2024-05-13 15:35:11 +02:00
|
|
|
func (shard *pipeSortProcessorShard) Len() int {
|
|
|
|
return len(shard.rowRefs)
|
2024-05-12 16:33:29 +02:00
|
|
|
}
|
|
|
|
|
2024-05-13 15:35:11 +02:00
|
|
|
func (shard *pipeSortProcessorShard) Swap(i, j int) {
|
|
|
|
rowRefs := shard.rowRefs
|
2024-05-12 16:33:29 +02:00
|
|
|
rowRefs[i], rowRefs[j] = rowRefs[j], rowRefs[i]
|
|
|
|
}
|
|
|
|
|
2024-05-13 15:35:11 +02:00
|
|
|
func (shard *pipeSortProcessorShard) Less(i, j int) bool {
|
|
|
|
return sortBlockLess(shard, i, shard, j)
|
2024-05-12 16:33:29 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
func (psp *pipeSortProcessor) writeBlock(workerID uint, br *blockResult) {
|
2024-09-25 16:16:53 +02:00
|
|
|
if br.rowsLen == 0 {
|
2024-05-12 16:33:29 +02:00
|
|
|
return
|
|
|
|
}
|
|
|
|
|
|
|
|
shard := &psp.shards[workerID]
|
|
|
|
|
|
|
|
for shard.stateSizeBudget < 0 {
|
|
|
|
// steal some budget for the state size from the global budget.
|
|
|
|
remaining := psp.stateSizeBudget.Add(-stateSizeBudgetChunk)
|
|
|
|
if remaining < 0 {
|
|
|
|
// The state size is too big. Stop processing data in order to avoid OOM crash.
|
|
|
|
if remaining+stateSizeBudgetChunk >= 0 {
|
|
|
|
// Notify worker goroutines to stop calling writeBlock() in order to save CPU time.
|
|
|
|
psp.cancel()
|
|
|
|
}
|
|
|
|
return
|
|
|
|
}
|
|
|
|
shard.stateSizeBudget += stateSizeBudgetChunk
|
|
|
|
}
|
|
|
|
|
|
|
|
shard.writeBlock(br)
|
|
|
|
}
|
|
|
|
|
|
|
|
func (psp *pipeSortProcessor) flush() error {
|
|
|
|
if n := psp.stateSizeBudget.Load(); n <= 0 {
|
|
|
|
return fmt.Errorf("cannot calculate [%s], since it requires more than %dMB of memory", psp.ps.String(), psp.maxStateSize/(1<<20))
|
|
|
|
}
|
|
|
|
|
2024-05-15 04:55:44 +02:00
|
|
|
if needStop(psp.stopCh) {
|
2024-05-12 16:33:29 +02:00
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// Sort every shard in parallel
|
|
|
|
var wg sync.WaitGroup
|
|
|
|
shards := psp.shards
|
|
|
|
for i := range shards {
|
|
|
|
wg.Add(1)
|
|
|
|
go func(shard *pipeSortProcessorShard) {
|
|
|
|
// TODO: interrupt long sorting when psp.stopCh is closed.
|
|
|
|
sort.Sort(shard)
|
|
|
|
wg.Done()
|
|
|
|
}(&shards[i])
|
|
|
|
}
|
|
|
|
wg.Wait()
|
|
|
|
|
2024-05-15 04:55:44 +02:00
|
|
|
if needStop(psp.stopCh) {
|
2024-05-12 16:33:29 +02:00
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// Merge sorted results across shards
|
|
|
|
sh := pipeSortProcessorShardsHeap(make([]*pipeSortProcessorShard, 0, len(shards)))
|
|
|
|
for i := range shards {
|
|
|
|
shard := &shards[i]
|
2024-05-15 04:55:44 +02:00
|
|
|
if len(shard.rowRefs) > 0 {
|
2024-05-12 16:33:29 +02:00
|
|
|
sh = append(sh, shard)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if len(sh) == 0 {
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
heap.Init(&sh)
|
|
|
|
|
|
|
|
wctx := &pipeSortWriteContext{
|
|
|
|
psp: psp,
|
|
|
|
}
|
2024-05-15 04:55:44 +02:00
|
|
|
shardNextIdx := 0
|
2024-05-12 16:33:29 +02:00
|
|
|
|
|
|
|
for len(sh) > 1 {
|
|
|
|
shard := sh[0]
|
2024-05-15 04:55:44 +02:00
|
|
|
wctx.writeNextRow(shard)
|
2024-05-12 16:33:29 +02:00
|
|
|
|
|
|
|
if shard.rowRefNext >= len(shard.rowRefs) {
|
|
|
|
_ = heap.Pop(&sh)
|
2024-05-15 04:55:44 +02:00
|
|
|
shardNextIdx = 0
|
2024-05-12 16:33:29 +02:00
|
|
|
|
2024-05-15 04:55:44 +02:00
|
|
|
if needStop(psp.stopCh) {
|
2024-05-12 16:33:29 +02:00
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
|
2024-05-15 04:55:44 +02:00
|
|
|
if shardNextIdx == 0 {
|
|
|
|
shardNextIdx = 1
|
|
|
|
if len(sh) > 2 && sh.Less(2, 1) {
|
|
|
|
shardNextIdx = 2
|
2024-05-12 16:33:29 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2024-05-15 04:55:44 +02:00
|
|
|
if sh.Less(shardNextIdx, 0) {
|
2024-05-12 16:33:29 +02:00
|
|
|
heap.Fix(&sh, 0)
|
2024-05-15 04:55:44 +02:00
|
|
|
shardNextIdx = 0
|
2024-05-12 16:33:29 +02:00
|
|
|
|
2024-05-15 04:55:44 +02:00
|
|
|
if needStop(psp.stopCh) {
|
2024-05-12 16:33:29 +02:00
|
|
|
return nil
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if len(sh) == 1 {
|
|
|
|
shard := sh[0]
|
|
|
|
for shard.rowRefNext < len(shard.rowRefs) {
|
2024-05-15 04:55:44 +02:00
|
|
|
wctx.writeNextRow(shard)
|
2024-05-12 16:33:29 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
wctx.flush()
|
|
|
|
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
type pipeSortWriteContext struct {
|
|
|
|
psp *pipeSortProcessor
|
|
|
|
rcs []resultColumn
|
|
|
|
br blockResult
|
|
|
|
|
2024-07-01 01:44:17 +02:00
|
|
|
// buf is a temporary buffer for non-flushed block.
|
|
|
|
buf []byte
|
|
|
|
|
2024-05-22 21:01:20 +02:00
|
|
|
// rowsWritten is the total number of rows passed to writeNextRow.
|
2024-05-15 04:55:44 +02:00
|
|
|
rowsWritten uint64
|
2024-05-22 21:01:20 +02:00
|
|
|
|
|
|
|
// rowsCount is the number of rows in the current block
|
|
|
|
rowsCount int
|
|
|
|
|
|
|
|
// valuesLen is the length of all the values in the current block
|
|
|
|
valuesLen int
|
2024-05-12 16:33:29 +02:00
|
|
|
}
|
|
|
|
|
2024-05-15 04:55:44 +02:00
|
|
|
func (wctx *pipeSortWriteContext) writeNextRow(shard *pipeSortProcessorShard) {
|
|
|
|
ps := shard.ps
|
2024-10-29 16:43:07 +01:00
|
|
|
rankFieldName := ps.rankFieldName
|
2024-07-01 01:44:17 +02:00
|
|
|
rankFields := 0
|
2024-10-29 16:43:07 +01:00
|
|
|
if rankFieldName != "" {
|
2024-07-01 01:44:17 +02:00
|
|
|
rankFields = 1
|
|
|
|
}
|
2024-05-15 04:55:44 +02:00
|
|
|
|
|
|
|
rowIdx := shard.rowRefNext
|
|
|
|
shard.rowRefNext++
|
|
|
|
|
|
|
|
wctx.rowsWritten++
|
|
|
|
if wctx.rowsWritten <= ps.offset {
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
2024-05-12 16:33:29 +02:00
|
|
|
rr := shard.rowRefs[rowIdx]
|
|
|
|
b := &shard.blocks[rr.blockIdx]
|
|
|
|
|
2024-05-15 04:55:44 +02:00
|
|
|
byFields := ps.byFields
|
2024-05-12 16:33:29 +02:00
|
|
|
rcs := wctx.rcs
|
|
|
|
|
2024-07-01 01:44:17 +02:00
|
|
|
areEqualColumns := len(rcs) == rankFields+len(byFields)+len(b.otherColumns)
|
2024-05-12 16:33:29 +02:00
|
|
|
if areEqualColumns {
|
|
|
|
for i, c := range b.otherColumns {
|
2024-07-01 01:44:17 +02:00
|
|
|
if rcs[rankFields+len(byFields)+i].name != c.name {
|
2024-05-12 16:33:29 +02:00
|
|
|
areEqualColumns = false
|
|
|
|
break
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if !areEqualColumns {
|
2024-05-25 21:36:16 +02:00
|
|
|
// send the current block to ppNext and construct a block with new set of columns
|
2024-05-12 16:33:29 +02:00
|
|
|
wctx.flush()
|
|
|
|
|
|
|
|
rcs = wctx.rcs[:0]
|
2024-10-29 16:43:07 +01:00
|
|
|
if rankFieldName != "" {
|
|
|
|
rcs = appendResultColumnWithName(rcs, rankFieldName)
|
2024-07-01 01:44:17 +02:00
|
|
|
}
|
2024-05-12 16:33:29 +02:00
|
|
|
for _, bf := range byFields {
|
2024-05-20 04:08:30 +02:00
|
|
|
rcs = appendResultColumnWithName(rcs, bf.name)
|
2024-05-12 16:33:29 +02:00
|
|
|
}
|
|
|
|
for _, c := range b.otherColumns {
|
2024-05-20 04:08:30 +02:00
|
|
|
rcs = appendResultColumnWithName(rcs, c.name)
|
2024-05-12 16:33:29 +02:00
|
|
|
}
|
|
|
|
wctx.rcs = rcs
|
|
|
|
}
|
|
|
|
|
2024-10-29 16:43:07 +01:00
|
|
|
if rankFieldName != "" {
|
2024-07-01 01:44:17 +02:00
|
|
|
bufLen := len(wctx.buf)
|
|
|
|
wctx.buf = marshalUint64String(wctx.buf, wctx.rowsWritten)
|
|
|
|
v := bytesutil.ToUnsafeString(wctx.buf[bufLen:])
|
|
|
|
rcs[0].addValue(v)
|
|
|
|
}
|
|
|
|
|
2024-05-12 16:33:29 +02:00
|
|
|
br := b.br
|
|
|
|
byColumns := b.byColumns
|
|
|
|
for i := range byFields {
|
|
|
|
v := byColumns[i].c.getValueAtRow(br, rr.rowIdx)
|
2024-07-01 01:44:17 +02:00
|
|
|
rcs[rankFields+i].addValue(v)
|
2024-05-12 16:33:29 +02:00
|
|
|
wctx.valuesLen += len(v)
|
|
|
|
}
|
|
|
|
|
|
|
|
for i, c := range b.otherColumns {
|
|
|
|
v := c.getValueAtRow(br, rr.rowIdx)
|
2024-07-01 01:44:17 +02:00
|
|
|
rcs[rankFields+len(byFields)+i].addValue(v)
|
2024-05-12 16:33:29 +02:00
|
|
|
wctx.valuesLen += len(v)
|
|
|
|
}
|
|
|
|
|
2024-05-22 21:01:20 +02:00
|
|
|
wctx.rowsCount++
|
2024-05-12 16:33:29 +02:00
|
|
|
if wctx.valuesLen >= 1_000_000 {
|
|
|
|
wctx.flush()
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
func (wctx *pipeSortWriteContext) flush() {
|
|
|
|
rcs := wctx.rcs
|
|
|
|
br := &wctx.br
|
|
|
|
|
|
|
|
wctx.valuesLen = 0
|
|
|
|
|
2024-05-25 21:36:16 +02:00
|
|
|
// Flush rcs to ppNext
|
2024-05-22 21:01:20 +02:00
|
|
|
br.setResultColumns(rcs, wctx.rowsCount)
|
|
|
|
wctx.rowsCount = 0
|
2024-05-25 21:36:16 +02:00
|
|
|
wctx.psp.ppNext.writeBlock(0, br)
|
2024-05-12 16:33:29 +02:00
|
|
|
br.reset()
|
|
|
|
for i := range rcs {
|
2024-05-20 04:08:30 +02:00
|
|
|
rcs[i].resetValues()
|
2024-05-12 16:33:29 +02:00
|
|
|
}
|
2024-07-01 01:44:17 +02:00
|
|
|
wctx.buf = wctx.buf[:0]
|
2024-05-12 16:33:29 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
type pipeSortProcessorShardsHeap []*pipeSortProcessorShard
|
|
|
|
|
|
|
|
func (sh *pipeSortProcessorShardsHeap) Len() int {
|
|
|
|
return len(*sh)
|
|
|
|
}
|
|
|
|
|
|
|
|
func (sh *pipeSortProcessorShardsHeap) Swap(i, j int) {
|
|
|
|
a := *sh
|
|
|
|
a[i], a[j] = a[j], a[i]
|
|
|
|
}
|
|
|
|
|
|
|
|
func (sh *pipeSortProcessorShardsHeap) Less(i, j int) bool {
|
|
|
|
a := *sh
|
|
|
|
shardA := a[i]
|
|
|
|
shardB := a[j]
|
|
|
|
return sortBlockLess(shardA, shardA.rowRefNext, shardB, shardB.rowRefNext)
|
|
|
|
}
|
|
|
|
|
|
|
|
func (sh *pipeSortProcessorShardsHeap) Push(x any) {
|
|
|
|
shard := x.(*pipeSortProcessorShard)
|
|
|
|
*sh = append(*sh, shard)
|
|
|
|
}
|
|
|
|
|
|
|
|
func (sh *pipeSortProcessorShardsHeap) Pop() any {
|
|
|
|
a := *sh
|
|
|
|
x := a[len(a)-1]
|
|
|
|
a[len(a)-1] = nil
|
|
|
|
*sh = a[:len(a)-1]
|
|
|
|
return x
|
|
|
|
}
|
|
|
|
|
|
|
|
func sortBlockLess(shardA *pipeSortProcessorShard, rowIdxA int, shardB *pipeSortProcessorShard, rowIdxB int) bool {
|
|
|
|
byFields := shardA.ps.byFields
|
|
|
|
|
|
|
|
rrA := shardA.rowRefs[rowIdxA]
|
|
|
|
rrB := shardB.rowRefs[rowIdxB]
|
|
|
|
bA := &shardA.blocks[rrA.blockIdx]
|
|
|
|
bB := &shardB.blocks[rrB.blockIdx]
|
|
|
|
for idx := range bA.byColumns {
|
|
|
|
cA := &bA.byColumns[idx]
|
|
|
|
cB := &bB.byColumns[idx]
|
|
|
|
isDesc := len(byFields) > 0 && byFields[idx].isDesc
|
|
|
|
if shardA.ps.isDesc {
|
|
|
|
isDesc = !isDesc
|
|
|
|
}
|
|
|
|
|
|
|
|
if cA.c.isConst && cB.c.isConst {
|
|
|
|
// Fast path - compare const values
|
2024-05-20 04:08:30 +02:00
|
|
|
ccA := cA.c.valuesEncoded[0]
|
|
|
|
ccB := cB.c.valuesEncoded[0]
|
2024-05-12 16:33:29 +02:00
|
|
|
if ccA == ccB {
|
|
|
|
continue
|
|
|
|
}
|
2024-05-14 03:05:03 +02:00
|
|
|
if isDesc {
|
|
|
|
return ccB < ccA
|
|
|
|
}
|
|
|
|
return ccA < ccB
|
2024-05-12 16:33:29 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
if cA.c.isTime && cB.c.isTime {
|
|
|
|
// Fast path - sort by _time
|
2024-09-25 16:16:53 +02:00
|
|
|
timestampsA := bA.br.getTimestamps()
|
|
|
|
timestampsB := bB.br.getTimestamps()
|
|
|
|
tA := timestampsA[rrA.rowIdx]
|
|
|
|
tB := timestampsB[rrB.rowIdx]
|
2024-05-12 16:33:29 +02:00
|
|
|
if tA == tB {
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
if isDesc {
|
|
|
|
return tB < tA
|
|
|
|
}
|
|
|
|
return tA < tB
|
|
|
|
}
|
|
|
|
if cA.c.isTime {
|
|
|
|
// treat timestamps as smaller than other values
|
|
|
|
return true
|
|
|
|
}
|
|
|
|
if cB.c.isTime {
|
|
|
|
// treat timestamps as smaller than other values
|
|
|
|
return false
|
|
|
|
}
|
|
|
|
|
|
|
|
// Try sorting by int64 values at first
|
|
|
|
uA := cA.getI64ValueAtRow(rrA.rowIdx)
|
|
|
|
uB := cB.getI64ValueAtRow(rrB.rowIdx)
|
|
|
|
if uA != 0 && uB != 0 {
|
|
|
|
if uA == uB {
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
if isDesc {
|
|
|
|
return uB < uA
|
|
|
|
}
|
|
|
|
return uA < uB
|
|
|
|
}
|
|
|
|
|
|
|
|
// Try sorting by float64 then
|
|
|
|
fA := cA.getF64ValueAtRow(rrA.rowIdx)
|
|
|
|
fB := cB.getF64ValueAtRow(rrB.rowIdx)
|
|
|
|
if !math.IsNaN(fA) && !math.IsNaN(fB) {
|
|
|
|
if fA == fB {
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
if isDesc {
|
|
|
|
return fB < fA
|
|
|
|
}
|
|
|
|
return fA < fB
|
|
|
|
}
|
|
|
|
|
|
|
|
// Fall back to string sorting
|
|
|
|
sA := cA.c.getValueAtRow(bA.br, rrA.rowIdx)
|
|
|
|
sB := cB.c.getValueAtRow(bB.br, rrB.rowIdx)
|
|
|
|
if sA == sB {
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
if isDesc {
|
2024-11-06 18:28:48 +01:00
|
|
|
return lessString(sB, sA)
|
2024-05-12 16:33:29 +02:00
|
|
|
}
|
2024-11-06 18:28:48 +01:00
|
|
|
return lessString(sA, sB)
|
2024-05-12 16:33:29 +02:00
|
|
|
}
|
|
|
|
return false
|
|
|
|
}
|
|
|
|
|
|
|
|
func parsePipeSort(lex *lexer) (*pipeSort, error) {
|
2024-09-29 09:33:31 +02:00
|
|
|
if !lex.isKeyword("sort") && !lex.isKeyword("order") {
|
|
|
|
return nil, fmt.Errorf("expecting 'sort' or 'order'; got %q", lex.token)
|
2024-05-12 16:33:29 +02:00
|
|
|
}
|
|
|
|
lex.nextToken()
|
|
|
|
|
|
|
|
var ps pipeSort
|
2024-05-20 04:08:30 +02:00
|
|
|
if lex.isKeyword("by", "(") {
|
|
|
|
if lex.isKeyword("by") {
|
|
|
|
lex.nextToken()
|
|
|
|
}
|
2024-05-12 16:33:29 +02:00
|
|
|
bfs, err := parseBySortFields(lex)
|
|
|
|
if err != nil {
|
|
|
|
return nil, fmt.Errorf("cannot parse 'by' clause: %w", err)
|
|
|
|
}
|
|
|
|
ps.byFields = bfs
|
|
|
|
}
|
|
|
|
|
2024-06-04 02:29:10 +02:00
|
|
|
switch {
|
|
|
|
case lex.isKeyword("desc"):
|
2024-05-12 16:33:29 +02:00
|
|
|
lex.nextToken()
|
|
|
|
ps.isDesc = true
|
2024-06-04 02:29:10 +02:00
|
|
|
case lex.isKeyword("asc"):
|
|
|
|
lex.nextToken()
|
2024-05-12 16:33:29 +02:00
|
|
|
}
|
|
|
|
|
2024-05-15 04:55:44 +02:00
|
|
|
for {
|
|
|
|
switch {
|
|
|
|
case lex.isKeyword("offset"):
|
|
|
|
lex.nextToken()
|
|
|
|
s := lex.token
|
|
|
|
n, ok := tryParseUint64(s)
|
|
|
|
lex.nextToken()
|
|
|
|
if !ok {
|
|
|
|
return nil, fmt.Errorf("cannot parse 'offset %s'", s)
|
|
|
|
}
|
|
|
|
if ps.offset > 0 {
|
|
|
|
return nil, fmt.Errorf("duplicate 'offset'; the previous one is %d; the new one is %s", ps.offset, s)
|
|
|
|
}
|
|
|
|
ps.offset = n
|
|
|
|
case lex.isKeyword("limit"):
|
|
|
|
lex.nextToken()
|
|
|
|
s := lex.token
|
|
|
|
n, ok := tryParseUint64(s)
|
|
|
|
lex.nextToken()
|
|
|
|
if !ok {
|
|
|
|
return nil, fmt.Errorf("cannot parse 'limit %s'", s)
|
|
|
|
}
|
|
|
|
if ps.limit > 0 {
|
|
|
|
return nil, fmt.Errorf("duplicate 'limit'; the previous one is %d; the new one is %s", ps.limit, s)
|
|
|
|
}
|
|
|
|
ps.limit = n
|
2024-07-01 01:44:17 +02:00
|
|
|
case lex.isKeyword("rank"):
|
2024-10-29 16:43:07 +01:00
|
|
|
rankFieldName, err := parseRankFieldName(lex)
|
2024-07-01 01:44:17 +02:00
|
|
|
if err != nil {
|
|
|
|
return nil, fmt.Errorf("cannot read rank field name: %s", err)
|
|
|
|
}
|
2024-10-29 16:43:07 +01:00
|
|
|
ps.rankFieldName = rankFieldName
|
2024-05-15 04:55:44 +02:00
|
|
|
default:
|
|
|
|
return &ps, nil
|
|
|
|
}
|
|
|
|
}
|
2024-05-12 16:33:29 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
// bySortField represents 'by (...)' part of the pipeSort.
|
|
|
|
type bySortField struct {
|
|
|
|
// the name of the field to sort
|
|
|
|
name string
|
|
|
|
|
|
|
|
// whether the sorting for the given field in descending order
|
|
|
|
isDesc bool
|
|
|
|
}
|
|
|
|
|
|
|
|
func (bf *bySortField) String() string {
|
|
|
|
s := quoteTokenIfNeeded(bf.name)
|
|
|
|
if bf.isDesc {
|
|
|
|
s += " desc"
|
|
|
|
}
|
|
|
|
return s
|
|
|
|
}
|
|
|
|
|
|
|
|
func parseBySortFields(lex *lexer) ([]*bySortField, error) {
|
|
|
|
if !lex.isKeyword("(") {
|
|
|
|
return nil, fmt.Errorf("missing `(`")
|
|
|
|
}
|
|
|
|
var bfs []*bySortField
|
|
|
|
for {
|
|
|
|
lex.nextToken()
|
|
|
|
if lex.isKeyword(")") {
|
|
|
|
lex.nextToken()
|
|
|
|
return bfs, nil
|
|
|
|
}
|
|
|
|
fieldName, err := parseFieldName(lex)
|
|
|
|
if err != nil {
|
|
|
|
return nil, fmt.Errorf("cannot parse field name: %w", err)
|
|
|
|
}
|
|
|
|
bf := &bySortField{
|
|
|
|
name: fieldName,
|
|
|
|
}
|
2024-06-04 02:29:10 +02:00
|
|
|
switch {
|
|
|
|
case lex.isKeyword("desc"):
|
2024-05-12 16:33:29 +02:00
|
|
|
lex.nextToken()
|
|
|
|
bf.isDesc = true
|
2024-06-04 02:29:10 +02:00
|
|
|
case lex.isKeyword("asc"):
|
|
|
|
lex.nextToken()
|
2024-05-12 16:33:29 +02:00
|
|
|
}
|
|
|
|
bfs = append(bfs, bf)
|
|
|
|
switch {
|
|
|
|
case lex.isKeyword(")"):
|
|
|
|
lex.nextToken()
|
|
|
|
return bfs, nil
|
|
|
|
case lex.isKeyword(","):
|
|
|
|
default:
|
|
|
|
return nil, fmt.Errorf("unexpected token: %q; expecting ',' or ')'", lex.token)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
func tryParseInt64(s string) (int64, bool) {
|
|
|
|
if len(s) == 0 {
|
|
|
|
return 0, false
|
|
|
|
}
|
|
|
|
|
|
|
|
isMinus := s[0] == '-'
|
|
|
|
if isMinus {
|
|
|
|
s = s[1:]
|
|
|
|
}
|
|
|
|
u64, ok := tryParseUint64(s)
|
|
|
|
if !ok {
|
|
|
|
return 0, false
|
|
|
|
}
|
|
|
|
if !isMinus {
|
|
|
|
if u64 > math.MaxInt64 {
|
|
|
|
return 0, false
|
|
|
|
}
|
|
|
|
return int64(u64), true
|
|
|
|
}
|
|
|
|
if u64 > -math.MinInt64 {
|
|
|
|
return 0, false
|
|
|
|
}
|
|
|
|
return -int64(u64), true
|
|
|
|
}
|
2024-05-15 04:55:44 +02:00
|
|
|
|
|
|
|
func marshalJSONKeyValue(dst []byte, k, v string) []byte {
|
2024-07-05 01:17:03 +02:00
|
|
|
dst = quicktemplate.AppendJSONString(dst, k, true)
|
2024-05-15 04:55:44 +02:00
|
|
|
dst = append(dst, ':')
|
2024-07-05 01:17:03 +02:00
|
|
|
dst = quicktemplate.AppendJSONString(dst, v, true)
|
2024-05-15 04:55:44 +02:00
|
|
|
return dst
|
|
|
|
}
|