VictoriaMetrics/app/vmctl/influx/influx_test.go
Zhu Jiekun f16a58f14c
vmctl: fixed import duplicate data when query result contains multiple series (#7330)
### Describe Your Changes

Fix https://github.com/VictoriaMetrics/VictoriaMetrics/issues/7301

When querying with condition like `WHERE a=1` (looking for series A),
InfluxDB can return data with the tag `a=1` (series A) and data with the
tag `a=1,b=1` (series B).

However, series B is will be queried later and it's data should not be
combined into series A's data.

This PR filter those series that are not identical to the original query
condition.

For table `example`:
```
// time                           host    region  value
// ----                           ----    ------  -----
// 2024-10-25T02:12:13.469720983Z serverA us_west 0.64
// 2024-10-25T02:12:21.832755213Z serverA us_west 0.75
// 2024-10-25T02:12:32.351876479Z serverA         0.88
// 2024-10-25T02:12:37.766320484Z serverA         0.95
```

The query for series A (`example_value{host="serverA"}`) and result will
be:
```SQL
SELECT * FROM example WHERE host = "serverA"
```
```json
{
	"results": [{
		"statement_id": 0,
		"series": [{
			"name": "cpu",
			"columns": ["time", "host", "region", "value"],
			"values": [
				["2024-10-25T02:12:13.469720983Z", "serverA", "us_west", 0.64],
				["2024-10-25T02:12:21.832755213Z", "serverA", "us_west", 0.75],
				["2024-10-25T02:12:32.351876479Z", "serverA", null, 0.88],
				["2024-10-25T02:12:37.766320484Z", "serverA", null, 0.95]
			]
		}]
	}]
}
```

We need to abandon `values[0]` and `values[1]` because the value of
**unwanted** column `region` is not null.

As for series B (`example_value{host="serverA", region="us_west"}`), no
change needed since the query filter out unwanted rows already.

### Note
This is a draft PR for verifying the fix.

### Checklist

The following checks are **mandatory**:

- [x] My change adheres [VictoriaMetrics contributing
guidelines](https://docs.victoriametrics.com/contributing/).

---------

Signed-off-by: hagen1778 <roman@victoriametrics.com>
Co-authored-by: hagen1778 <roman@victoriametrics.com>
2024-11-06 13:53:49 +01:00

106 lines
2.4 KiB
Go

package influx
import "testing"
func TestFetchQuery(t *testing.T) {
f := func(s *Series, timeFilter, resultExpected string) {
t.Helper()
result := s.fetchQuery(timeFilter)
if result != resultExpected {
t.Fatalf("unexpected result\ngot\n%s\nwant\n%s", result, resultExpected)
}
}
f(&Series{
Measurement: "cpu",
Field: "value",
LabelPairs: []LabelPair{
{
Name: "foo",
Value: "bar",
},
},
}, "", `select "value" from "cpu" where "foo"::tag='bar'`)
f(&Series{
Measurement: "cpu",
Field: "value",
LabelPairs: []LabelPair{
{
Name: "foo",
Value: "bar",
},
{
Name: "baz",
Value: "qux",
},
},
}, "", `select "value" from "cpu" where "foo"::tag='bar' and "baz"::tag='qux'`)
f(&Series{
Measurement: "cpu",
Field: "value",
LabelPairs: []LabelPair{
{
Name: "foo",
Value: "b'ar",
},
},
}, "time >= now()", `select "value" from "cpu" where "foo"::tag='b\'ar' and time >= now()`)
f(&Series{
Measurement: "cpu",
Field: "value",
LabelPairs: []LabelPair{
{
Name: "name",
Value: `dev-mapper-centos\x2dswap.swap`,
},
{
Name: "state",
Value: "dev-mapp'er-c'en'tos",
},
},
}, "time >= now()", `select "value" from "cpu" where "name"::tag='dev-mapper-centos\\x2dswap.swap' and "state"::tag='dev-mapp\'er-c\'en\'tos' and time >= now()`)
f(&Series{
Measurement: "cpu",
Field: "value",
}, "time >= now()", `select "value" from "cpu" where time >= now()`)
f(&Series{
Measurement: "cpu",
Field: "value",
}, "", `select "value" from "cpu"`)
f(&Series{
Measurement: "cpu",
Field: "value1",
EmptyTags: []string{"e1", "e2", "e3"},
}, "", `select "value1" from "cpu" where "e1"::tag='' and "e2"::tag='' and "e3"::tag=''`)
}
func TestTimeFilter(t *testing.T) {
f := func(start, end, resultExpected string) {
t.Helper()
result := timeFilter(start, end)
if result != resultExpected {
t.Fatalf("unexpected result\ngot\n%v\nwant\n%s", result, resultExpected)
}
}
// no start and end filters
f("", "", "")
// missing end filter
f("2020-01-01T20:07:00Z", "", "time >= '2020-01-01T20:07:00Z'")
// missing start filter
f("", "2020-01-01T21:07:00Z", "time <= '2020-01-01T21:07:00Z'")
// both start and end filters
f("2020-01-01T20:07:00Z", "2020-01-01T21:07:00Z", "time >= '2020-01-01T20:07:00Z' and time <= '2020-01-01T21:07:00Z'")
}