VictoriaMetrics/lib/logstorage/pipe_unpack_json_test.go

package logstorage

import (
	"math/rand"
	"slices"
	"strings"
	"sync"
	"testing"
)

func TestPipeUnpackJSON(t *testing.T) {
	f := func(pipeStr string, rows, rowsExpected [][]Field) {
		t.Helper()
		expectPipeResults(t, pipeStr, rows, rowsExpected)
	}

	// single row, unpack from _msg
	f("unpack_json", [][]Field{
		{
			{"_msg", `{"foo":"bar"}`},
		},
	}, [][]Field{
		{
			{"_msg", `{"foo":"bar"}`},
			{"foo", "bar"},
		},
	})

	// single row, unpack from _msg into _msg
	f("unpack_json", [][]Field{
		{
			{"_msg", `{"_msg":"bar"}`},
		},
	}, [][]Field{
		{
			{"_msg", "bar"},
		},
	})

	// single row, unpack from missing field
	f("unpack_json from x", [][]Field{
		{
			{"_msg", `{"foo":"bar"}`},
		},
	}, [][]Field{
		{
			{"_msg", `{"foo":"bar"}`},
		},
	})

	// single row, unpack from non-json field
	f("unpack_json from x", [][]Field{
		{
			{"x", `foobar`},
		},
	}, [][]Field{
		{
			{"x", `foobar`},
		},
	})

	// single row, unpack from non-dict json
	f("unpack_json from x", [][]Field{
		{
			{"x", `["foobar"]`},
		},
	}, [][]Field{
		{
			{"x", `["foobar"]`},
		},
	})
	f("unpack_json from x", [][]Field{
		{
			{"x", `1234`},
		},
	}, [][]Field{
		{
			{"x", `1234`},
		},
	})
	f("unpack_json from x", [][]Field{
		{
			{"x", `"xxx"`},
		},
	}, [][]Field{
		{
			{"x", `"xxx"`},
		},
	})

	// single row, unpack from named field
	f("unpack_json from x", [][]Field{
		{
			{"x", `{"foo":"bar","baz":"xyz","a":123,"b":["foo","bar"],"x":NaN,"y":{"z":{"a":"b"}}}`},
		},
	}, [][]Field{
		{
			{"x", `NaN`},
			{"foo", "bar"},
			{"baz", "xyz"},
			{"a", "123"},
			{"b", `["foo","bar"]`},
			{"y.z.a", "b"},
		},
	})

	// multiple rows with distinct number of fields
	f("unpack_json from x", [][]Field{
		{
			{"x", `{"foo":"bar","baz":"xyz"}`},
			{"y", `abc`},
		},
		{
			{"y", `abc`},
		},
		{
			{"z", `foobar`},
			{"x", `{"z":["bar",123]}`},
		},
	}, [][]Field{
		{
			{"x", `{"foo":"bar","baz":"xyz"}`},
			{"y", "abc"},
			{"foo", "bar"},
			{"baz", "xyz"},
		},
		{
			{"y", `abc`},
		},
		{
			{"z", `["bar",123]`},
			{"x", `{"z":["bar",123]}`},
		},
	})

	// multiple rows with distinct number of fields with result_prefix
	f("unpack_json from x result_prefix qwe_", [][]Field{
		{
			{"x", `{"foo":"bar","baz":"xyz"}`},
			{"y", `abc`},
		},
		{
			{"y", `abc`},
		},
		{
			{"z", `foobar`},
			{"x", `{"z":["bar",123]}`},
		},
	}, [][]Field{
		{
			{"x", `{"foo":"bar","baz":"xyz"}`},
			{"y", "abc"},
			{"qwe_foo", "bar"},
			{"qwe_baz", "xyz"},
		},
		{
			{"y", `abc`},
		},
		{
			{"z", `foobar`},
			{"x", `{"z":["bar",123]}`},
			{"qwe_z", `["bar",123]`},
		},
	})
}

func expectPipeResults(t *testing.T, pipeStr string, rows, rowsExpected [][]Field) {
	t.Helper()

	lex := newLexer(pipeStr)
	p, err := parsePipe(lex)
	if err != nil {
		t.Fatalf("unexpected error when parsing %q: %s", pipeStr, err)
	}

	workersCount := 5
	stopCh := make(chan struct{})
	cancel := func() {}
	ppTest := newTestPipeProcessor()
	pp := p.newPipeProcessor(workersCount, stopCh, cancel, ppTest)

	brw := newTestBlockResultWriter(workersCount, pp)
	for _, row := range rows {
		brw.writeRow(row)
	}
	brw.flush()

	ppTest.expectRows(t, rowsExpected)
}

func newTestBlockResultWriter(workersCount int, ppBase pipeProcessor) *testBlockResultWriter {
	return &testBlockResultWriter{
		workersCount: workersCount,
		ppBase:       ppBase,
	}
}

type testBlockResultWriter struct {
	workersCount int
	ppBase       pipeProcessor
	rcs          []resultColumn
	br           blockResult
}

func (brw *testBlockResultWriter) writeRow(row []Field) {
	if !brw.areSameFields(row) {
		brw.flush()

		brw.rcs = brw.rcs[:0]
		for _, field := range row {
			brw.rcs = appendResultColumnWithName(brw.rcs, field.Name)
		}
	}

	for i, field := range row {
		brw.rcs[i].addValue(field.Value)
	}
	if rand.Intn(5) == 0 {
		brw.flush()
	}
}

func (brw *testBlockResultWriter) areSameFields(row []Field) bool {
	if len(brw.rcs) != len(row) {
		return false
	}
	for i, rc := range brw.rcs {
		if rc.name != row[i].Name {
			return false
		}
	}
	return true
}

func (brw *testBlockResultWriter) flush() {
	brw.br.setResultColumns(brw.rcs)
	workerID := rand.Intn(brw.workersCount)
	brw.ppBase.writeBlock(uint(workerID), &brw.br)
	brw.br.reset()
	for i := range brw.rcs {
		brw.rcs[i].resetValues()
	}
}

func newTestPipeProcessor() *testPipeProcessor {
	return &testPipeProcessor{}
}

type testPipeProcessor struct {
	resultRowsLock sync.Mutex
	resultRows     [][]Field
}

func (pp *testPipeProcessor) writeBlock(_ uint, br *blockResult) {
	cs := br.getColumns()
	var columnValues [][]string
	for _, c := range cs {
		values := c.getValues(br)
		columnValues = append(columnValues, values)
	}

	for i := range br.timestamps {
		row := make([]Field, len(columnValues))
		for j, values := range columnValues {
			r := &row[j]
			r.Name = strings.Clone(cs[j].name)
			r.Value = strings.Clone(values[i])
		}
		pp.resultRowsLock.Lock()
		pp.resultRows = append(pp.resultRows, row)
		pp.resultRowsLock.Unlock()
	}
}

func (pp *testPipeProcessor) flush() error {
	return nil
}

func (pp *testPipeProcessor) expectRows(t *testing.T, expectedRows [][]Field) {
	t.Helper()

	if len(pp.resultRows) != len(expectedRows) {
		t.Fatalf("unexpected number of rows; got %d; want %d\nrows got\n%s\nrows expected\n%s",
			len(pp.resultRows), len(expectedRows), rowsToString(pp.resultRows), rowsToString(expectedRows))
	}

	sortTestRows(pp.resultRows)
	sortTestRows(expectedRows)

	for i, resultRow := range pp.resultRows {
		expectedRow := expectedRows[i]
		if len(resultRow) != len(expectedRow) {
			t.Fatalf("unexpected number of fields at row #%d; got %d; want %d\nrow got\n%s\nrow expected\n%s",
				i, len(resultRow), len(expectedRow), rowToString(resultRow), rowToString(expectedRow))
		}
		for j, resultField := range resultRow {
			expectedField := expectedRow[j]
			if resultField.Name != expectedField.Name {
				t.Fatalf("unexpected field name at row #%d; got %q; want %q\nrow got\n%s\nrow expected\n%s",
					i, resultField.Name, expectedField.Name, rowToString(resultRow), rowToString(expectedRow))
			}
			if resultField.Value != expectedField.Value {
				t.Fatalf("unexpected value for field %q at row #%d; got %q; want %q\nrow got\n%s\nrow expected\n%s",
					resultField.Name, i, resultField.Value, expectedField.Value, rowToString(resultRow), rowToString(expectedRow))
			}
		}
	}
}

func sortTestRows(rows [][]Field) {
	slices.SortFunc(rows, func(a, b []Field) int {
		reverse := -1
		if len(a) > len(b) {
			reverse = 1
			a, b = b, a
		}
		for i, fA := range a {
			fB := b[i]
			if fA.Name == fB.Name {
				if fA.Value == fB.Value {
					continue
				}
				if fA.Value < fB.Value {
					return reverse
				}
				return -reverse
			}
			if fA.Name < fB.Name {
				return reverse
			}
			return -reverse
		}
		if len(a) == len(b) {
			return 0
		}
		return reverse
	})
}

func rowsToString(rows [][]Field) string {
	a := make([]string, len(rows))
	for i, row := range rows {
		a[i] = rowToString(row)
	}
	return strings.Join(a, "\n")
}

func rowToString(row []Field) string {
	a := make([]string, len(row))
	for i, f := range row {
		a[i] = f.String()
	}
	return "{" + strings.Join(a, ",") + "}"
}

func TestPipeUnpackJSONUpdateNeededFields(t *testing.T) {
	f := func(s string, neededFields, unneededFields, neededFieldsExpected, unneededFieldsExpected string) {
		t.Helper()
		expectPipeNeededFields(t, s, neededFields, unneededFields, neededFieldsExpected, unneededFieldsExpected)
	}

	// all the needed fields
	f("unpack_json from x", "*", "", "*", "")

	// all the needed fields, unneeded fields do not intersect with src
	f("unpack_json from x", "*", "f1,f2", "*", "f1,f2")

	// all the needed fields, unneeded fields intersect with src
	f("unpack_json from x", "*", "f2,x", "*", "f2")

	// needed fields do not intersect with src
	f("unpack_json from x", "f1,f2", "", "f1,f2,x", "")

	// needed fields intersect with src
	f("unpack_json from x", "f2,x", "", "f2,x", "")
}
lib/logstorage: work-in-progress 2024-05-20 04:08:30 +02:00			`package logstorage`

			`import (`
			`"math/rand"`
			`"slices"`
			`"strings"`
			`"sync"`
			`"testing"`
			`)`

			`func TestPipeUnpackJSON(t *testing.T) {`
			`f := func(pipeStr string, rows, rowsExpected [][]Field) {`
			`t.Helper()`
			`expectPipeResults(t, pipeStr, rows, rowsExpected)`
			`}`

			`// single row, unpack from _msg`
			`f("unpack_json", [][]Field{`
			`{`
			{"_msg", `{"foo":"bar"}`},
			`},`
			`}, [][]Field{`
			`{`
			{"_msg", `{"foo":"bar"}`},
			`{"foo", "bar"},`
			`},`
			`})`

			`// single row, unpack from _msg into _msg`
			`f("unpack_json", [][]Field{`
			`{`
			{"_msg", `{"_msg":"bar"}`},
			`},`
			`}, [][]Field{`
			`{`
			`{"_msg", "bar"},`
			`},`
			`})`

			`// single row, unpack from missing field`
			`f("unpack_json from x", [][]Field{`
			`{`
			{"_msg", `{"foo":"bar"}`},
			`},`
			`}, [][]Field{`
			`{`
			{"_msg", `{"foo":"bar"}`},
			`},`
			`})`

			`// single row, unpack from non-json field`
			`f("unpack_json from x", [][]Field{`
			`{`
			{"x", `foobar`},
			`},`
			`}, [][]Field{`
			`{`
			{"x", `foobar`},
			`},`
			`})`

			`// single row, unpack from non-dict json`
			`f("unpack_json from x", [][]Field{`
			`{`
			{"x", `["foobar"]`},
			`},`
			`}, [][]Field{`
			`{`
			{"x", `["foobar"]`},
			`},`
			`})`
			`f("unpack_json from x", [][]Field{`
			`{`
			{"x", `1234`},
			`},`
			`}, [][]Field{`
			`{`
			{"x", `1234`},
			`},`
			`})`
			`f("unpack_json from x", [][]Field{`
			`{`
			{"x", `"xxx"`},
			`},`
			`}, [][]Field{`
			`{`
			{"x", `"xxx"`},
			`},`
			`})`

			`// single row, unpack from named field`
			`f("unpack_json from x", [][]Field{`
			`{`
			{"x", `{"foo":"bar","baz":"xyz","a":123,"b":["foo","bar"],"x":NaN,"y":{"z":{"a":"b"}}}`},
			`},`
			`}, [][]Field{`
			`{`
			{"x", `NaN`},
			`{"foo", "bar"},`
			`{"baz", "xyz"},`
			`{"a", "123"},`
			{"b", `["foo","bar"]`},
			`{"y.z.a", "b"},`
			`},`
			`})`

			`// multiple rows with distinct number of fields`
			`f("unpack_json from x", [][]Field{`
			`{`
			{"x", `{"foo":"bar","baz":"xyz"}`},
			{"y", `abc`},
			`},`
			`{`
			{"y", `abc`},
			`},`
			`{`
			{"z", `foobar`},
			{"x", `{"z":["bar",123]}`},
			`},`
			`}, [][]Field{`
			`{`
			{"x", `{"foo":"bar","baz":"xyz"}`},
			`{"y", "abc"},`
			`{"foo", "bar"},`
			`{"baz", "xyz"},`
			`},`
			`{`
			{"y", `abc`},
			`},`
			`{`
			{"z", `["bar",123]`},
			{"x", `{"z":["bar",123]}`},
			`},`
			`})`

			`// multiple rows with distinct number of fields with result_prefix`
			`f("unpack_json from x result_prefix qwe_", [][]Field{`
			`{`
			{"x", `{"foo":"bar","baz":"xyz"}`},
			{"y", `abc`},
			`},`
			`{`
			{"y", `abc`},
			`},`
			`{`
			{"z", `foobar`},
			{"x", `{"z":["bar",123]}`},
			`},`
			`}, [][]Field{`
			`{`
			{"x", `{"foo":"bar","baz":"xyz"}`},
			`{"y", "abc"},`
			`{"qwe_foo", "bar"},`
			`{"qwe_baz", "xyz"},`
			`},`
			`{`
			{"y", `abc`},
			`},`
			`{`
			{"z", `foobar`},
			{"x", `{"z":["bar",123]}`},
			{"qwe_z", `["bar",123]`},
			`},`
			`})`
			`}`

			`func expectPipeResults(t *testing.T, pipeStr string, rows, rowsExpected [][]Field) {`
			`t.Helper()`

			`lex := newLexer(pipeStr)`
			`p, err := parsePipe(lex)`
			`if err != nil {`
			`t.Fatalf("unexpected error when parsing %q: %s", pipeStr, err)`
			`}`

			`workersCount := 5`
			`stopCh := make(chan struct{})`
			`cancel := func() {}`
			`ppTest := newTestPipeProcessor()`
			`pp := p.newPipeProcessor(workersCount, stopCh, cancel, ppTest)`

			`brw := newTestBlockResultWriter(workersCount, pp)`
			`for _, row := range rows {`
			`brw.writeRow(row)`
			`}`
			`brw.flush()`

			`ppTest.expectRows(t, rowsExpected)`
			`}`

			`func newTestBlockResultWriter(workersCount int, ppBase pipeProcessor) *testBlockResultWriter {`
			`return &testBlockResultWriter{`
			`workersCount: workersCount,`
			`ppBase: ppBase,`
			`}`
			`}`

			`type testBlockResultWriter struct {`
			`workersCount int`
			`ppBase pipeProcessor`
			`rcs []resultColumn`
			`br blockResult`
			`}`

			`func (brw *testBlockResultWriter) writeRow(row []Field) {`
			`if !brw.areSameFields(row) {`
			`brw.flush()`

			`brw.rcs = brw.rcs[:0]`
			`for _, field := range row {`
			`brw.rcs = appendResultColumnWithName(brw.rcs, field.Name)`
			`}`
			`}`

			`for i, field := range row {`
			`brw.rcs[i].addValue(field.Value)`
			`}`
			`if rand.Intn(5) == 0 {`
			`brw.flush()`
			`}`
			`}`

			`func (brw *testBlockResultWriter) areSameFields(row []Field) bool {`
			`if len(brw.rcs) != len(row) {`
			`return false`
			`}`
			`for i, rc := range brw.rcs {`
			`if rc.name != row[i].Name {`
			`return false`
			`}`
			`}`
			`return true`
			`}`

			`func (brw *testBlockResultWriter) flush() {`
			`brw.br.setResultColumns(brw.rcs)`
			`workerID := rand.Intn(brw.workersCount)`
			`brw.ppBase.writeBlock(uint(workerID), &brw.br)`
			`brw.br.reset()`
			`for i := range brw.rcs {`
			`brw.rcs[i].resetValues()`
			`}`
			`}`

			`func newTestPipeProcessor() *testPipeProcessor {`
			`return &testPipeProcessor{}`
			`}`

			`type testPipeProcessor struct {`
			`resultRowsLock sync.Mutex`
			`resultRows [][]Field`
			`}`

			`func (pp testPipeProcessor) writeBlock(_ uint, br blockResult) {`
			`cs := br.getColumns()`
			`var columnValues [][]string`
			`for _, c := range cs {`
			`values := c.getValues(br)`
			`columnValues = append(columnValues, values)`
			`}`

			`for i := range br.timestamps {`
			`row := make([]Field, len(columnValues))`
			`for j, values := range columnValues {`
			`r := &row[j]`
			`r.Name = strings.Clone(cs[j].name)`
			`r.Value = strings.Clone(values[i])`
			`}`
			`pp.resultRowsLock.Lock()`
			`pp.resultRows = append(pp.resultRows, row)`
			`pp.resultRowsLock.Unlock()`
			`}`
			`}`

			`func (pp *testPipeProcessor) flush() error {`
			`return nil`
			`}`

			`func (pp testPipeProcessor) expectRows(t testing.T, expectedRows [][]Field) {`
			`t.Helper()`

			`if len(pp.resultRows) != len(expectedRows) {`
			`t.Fatalf("unexpected number of rows; got %d; want %d\nrows got\n%s\nrows expected\n%s",`
			`len(pp.resultRows), len(expectedRows), rowsToString(pp.resultRows), rowsToString(expectedRows))`
			`}`

			`sortTestRows(pp.resultRows)`
			`sortTestRows(expectedRows)`

			`for i, resultRow := range pp.resultRows {`
			`expectedRow := expectedRows[i]`
			`if len(resultRow) != len(expectedRow) {`
			`t.Fatalf("unexpected number of fields at row #%d; got %d; want %d\nrow got\n%s\nrow expected\n%s",`
			`i, len(resultRow), len(expectedRow), rowToString(resultRow), rowToString(expectedRow))`
			`}`
			`for j, resultField := range resultRow {`
			`expectedField := expectedRow[j]`
			`if resultField.Name != expectedField.Name {`
			`t.Fatalf("unexpected field name at row #%d; got %q; want %q\nrow got\n%s\nrow expected\n%s",`
			`i, resultField.Name, expectedField.Name, rowToString(resultRow), rowToString(expectedRow))`
			`}`
			`if resultField.Value != expectedField.Value {`
			`t.Fatalf("unexpected value for field %q at row #%d; got %q; want %q\nrow got\n%s\nrow expected\n%s",`
			`resultField.Name, i, resultField.Value, expectedField.Value, rowToString(resultRow), rowToString(expectedRow))`
			`}`
			`}`
			`}`
			`}`

			`func sortTestRows(rows [][]Field) {`
			`slices.SortFunc(rows, func(a, b []Field) int {`
			`reverse := -1`
			`if len(a) > len(b) {`
			`reverse = 1`
			`a, b = b, a`
			`}`
			`for i, fA := range a {`
			`fB := b[i]`
			`if fA.Name == fB.Name {`
			`if fA.Value == fB.Value {`
			`continue`
			`}`
			`if fA.Value < fB.Value {`
			`return reverse`
			`}`
			`return -reverse`
			`}`
			`if fA.Name < fB.Name {`
			`return reverse`
			`}`
			`return -reverse`
			`}`
			`if len(a) == len(b) {`
			`return 0`
			`}`
			`return reverse`
			`})`
			`}`

			`func rowsToString(rows [][]Field) string {`
			`a := make([]string, len(rows))`
			`for i, row := range rows {`
			`a[i] = rowToString(row)`
			`}`
			`return strings.Join(a, "\n")`
			`}`

			`func rowToString(row []Field) string {`
			`a := make([]string, len(row))`
			`for i, f := range row {`
			`a[i] = f.String()`
			`}`
			`return "{" + strings.Join(a, ",") + "}"`
			`}`

			`func TestPipeUnpackJSONUpdateNeededFields(t *testing.T) {`
			`f := func(s string, neededFields, unneededFields, neededFieldsExpected, unneededFieldsExpected string) {`
			`t.Helper()`
			`expectPipeNeededFields(t, s, neededFields, unneededFields, neededFieldsExpected, unneededFieldsExpected)`
			`}`

			`// all the needed fields`
			`f("unpack_json from x", "", "", "", "")`

			`// all the needed fields, unneeded fields do not intersect with src`
			`f("unpack_json from x", "", "f1,f2", "", "f1,f2")`

			`// all the needed fields, unneeded fields intersect with src`
			`f("unpack_json from x", "", "f2,x", "", "f2")`

			`// needed fields do not intersect with src`
			`f("unpack_json from x", "f1,f2", "", "f1,f2,x", "")`

			`// needed fields intersect with src`
			`f("unpack_json from x", "f2,x", "", "f2,x", "")`
			`}`