From 7d7d7c03bca447c0e82ac02b3c65eb237bbda423 Mon Sep 17 00:00:00 2001 From: Aliaksandr Valialkin Date: Sat, 28 Sep 2024 21:17:10 +0200 Subject: [PATCH] app/vlinsert: accept unix timestamp in seconds additionally to milliseconds at ElasticSearch bulk API Timestamps in seconds are sometimes used for data ingestion via ElasticSearch bulk API --- app/vlinsert/elasticsearch/elasticsearch.go | 6 +++++- app/vlinsert/elasticsearch/elasticsearch_test.go | 11 +++++++---- docs/VictoriaLogs/CHANGELOG.md | 2 ++ docs/VictoriaLogs/data-ingestion/README.md | 10 +++++++++- 4 files changed, 23 insertions(+), 6 deletions(-) diff --git a/app/vlinsert/elasticsearch/elasticsearch.go b/app/vlinsert/elasticsearch/elasticsearch.go index 280ba007ed..6dafad9a29 100644 --- a/app/vlinsert/elasticsearch/elasticsearch.go +++ b/app/vlinsert/elasticsearch/elasticsearch.go @@ -251,11 +251,15 @@ func parseElasticsearchTimestamp(s string) (int64, error) { return 0, nil } if len(s) < len("YYYY-MM-DD") || s[len("YYYY")] != '-' { - // Try parsing timestamp in milliseconds + // Try parsing timestamp in seconds or milliseconds n, err := strconv.ParseInt(s, 10, 64) if err != nil { return 0, fmt.Errorf("cannot parse timestamp in milliseconds from %q: %w", s, err) } + if n < (1<<31) && n >= (-1<<31) { + // The timestamp is in seconds. Convert it to milliseconds + n *= 1e3 + } if n > int64(math.MaxInt64)/1e6 { return 0, fmt.Errorf("too big timestamp in milliseconds: %d; mustn't exceed %d", n, int64(math.MaxInt64)/1e6) } diff --git a/app/vlinsert/elasticsearch/elasticsearch_test.go b/app/vlinsert/elasticsearch/elasticsearch_test.go index ca88d2ccca..bfb1cd52e6 100644 --- a/app/vlinsert/elasticsearch/elasticsearch_test.go +++ b/app/vlinsert/elasticsearch/elasticsearch_test.go @@ -78,15 +78,18 @@ func TestReadBulkRequest_Success(t *testing.T) { {"create":{"_index":"filebeat-8.8.0"}} {"@timestamp":"2023-06-06 04:48:12.735+01:00","message":"baz"} {"index":{"_index":"filebeat-8.8.0"}} -{"message":"xyz","@timestamp":"2023-06-06T04:48:13.735Z","x":"y"} +{"message":"xyz","@timestamp":"1686026893735","x":"y"} +{"create":{"_index":"filebeat-8.8.0"}} +{"message":"qwe rty","@timestamp":"1686026893"} ` timeField := "@timestamp" msgField := "message" - rowsExpected := 3 - timestampsExpected := []int64{1686026891735000000, 1686023292735000000, 1686026893735000000} + rowsExpected := 4 + timestampsExpected := []int64{1686026891735000000, 1686023292735000000, 1686026893735000000, 1686026893000000000} resultExpected := `{"@timestamp":"","log.offset":"71770","log.file.path":"/var/log/auth.log","_msg":"foobar"} {"@timestamp":"","_msg":"baz"} -{"_msg":"xyz","@timestamp":"","x":"y"}` +{"_msg":"xyz","@timestamp":"","x":"y"} +{"_msg":"qwe rty","@timestamp":""}` f(data, timeField, msgField, rowsExpected, timestampsExpected, resultExpected) } diff --git a/docs/VictoriaLogs/CHANGELOG.md b/docs/VictoriaLogs/CHANGELOG.md index 197bff6f80..6a58d563d9 100644 --- a/docs/VictoriaLogs/CHANGELOG.md +++ b/docs/VictoriaLogs/CHANGELOG.md @@ -15,6 +15,8 @@ according to [these docs](https://docs.victoriametrics.com/victorialogs/quicksta ## tip +* FEATURE: [ElasticSearch bulk API](https://docs.victoriametrics.com/victorialogs/data-ingestion/#elasticsearch-bulk-api): accept timestamps in seconds in the ingested logs. + ## [v0.31.0](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v0.31.0-victorialogs) Released at 2024-09-27 diff --git a/docs/VictoriaLogs/data-ingestion/README.md b/docs/VictoriaLogs/data-ingestion/README.md index a548a195a8..5dacc7db92 100644 --- a/docs/VictoriaLogs/data-ingestion/README.md +++ b/docs/VictoriaLogs/data-ingestion/README.md @@ -46,7 +46,15 @@ It is possible to push thousands of log lines in a single request to this API. If the [timestamp field](https://docs.victoriametrics.com/victorialogs/keyconcepts/#time-field) is set to `"0"`, then the current timestamp at VictoriaLogs side is used per each ingested log line. -Otherwise the timestamp field must be in the [ISO8601](https://en.wikipedia.org/wiki/ISO_8601) or [RFC3339](https://www.rfc-editor.org/rfc/rfc3339) format. +Otherwise the timestamp field must be in one of the following formats: + +- [ISO8601](https://en.wikipedia.org/wiki/ISO_8601) or [RFC3339](https://www.rfc-editor.org/rfc/rfc3339). + For example, `2023-06-20T15:32:10Z` or `2023-06-20 15:32:10.123456789+02:00`. + If timezone information is missing (for example, `2023-06-20 15:32:10`), + then the time is parsed in the local timezone of the host where VictoriaLogs runs. + +- Unix timestamp in seconds or in milliseconds. For example, `1686026893` (seconds) or `1686026893735` (milliseconds). + For example, `2023-06-20T15:32:10Z` or `2023-06-20 15:32:10.123456789+02:00`. If timezone information is missing (for example, `2023-06-20 15:32:10`), then the time is parsed in the local timezone of the host where VictoriaLogs runs.