app/vlinsert: accept logs with empty _msg field

In this case the _msg field is set to the value specified in the -defaultMsgValue command-line flag.

This should simplify first-time migration to VictoriaLogs from other systems.
This commit is contained in:
Aliaksandr Valialkin 2024-10-30 14:59:03 +01:00
parent 96466562b6
commit 16ee470da6
No known key found for this signature in database
GPG Key ID: 52C003EE2BCDB9EB
5 changed files with 66 additions and 24 deletions

View File

@ -1,6 +1,7 @@
package insertutils
import (
"flag"
"net/http"
"strings"
"sync"
@ -16,6 +17,11 @@ import (
"github.com/VictoriaMetrics/VictoriaMetrics/lib/timeutil"
)
var (
defaultMsgValue = flag.String("defaultMsgValue", "missing _msg field; see https://docs.victoriametrics.com/victorialogs/keyconcepts/#message-field",
"Default value for _msg field if the ingested log entry doesn't contain it; see https://docs.victoriametrics.com/victorialogs/keyconcepts/#message-field")
)
// CommonParams contains common HTTP parameters used by log ingestion APIs.
//
// See https://docs.victoriametrics.com/victorialogs/data-ingestion/#http-parameters
@ -140,6 +146,8 @@ type logMessageProcessor struct {
stopCh chan struct{}
lastFlushTime time.Time
tmpFields []logstorage.Field
cp *CommonParams
lr *logstorage.LogRows
}
@ -182,20 +190,15 @@ func (lmp *logMessageProcessor) AddRow(timestamp int64, fields []logstorage.Fiel
return
}
// _msg field must be non-empty according to VictoriaLogs data model.
// See https://docs.victoriametrics.com/victorialogs/keyconcepts/#message-field
msgExist := false
for i := range fields {
if fields[i].Name == "_msg" {
msgExist = len(fields[i].Value) > 0
break
}
}
if !msgExist {
rf := logstorage.RowFormatter(fields)
logger.Warnf("dropping log line without _msg field; %s", rf)
rowsDroppedTotalMsgNotValid.Inc()
return
if *defaultMsgValue != "" && !hasMsgField(fields) {
// The log entry doesn't contain mandatory _msg field. Add _msg field with default value then
// according to https://docs.victoriametrics.com/victorialogs/keyconcepts/#message-field .
lmp.tmpFields = append(lmp.tmpFields[:0], fields...)
lmp.tmpFields = append(lmp.tmpFields, logstorage.Field{
Name: "_msg",
Value: *defaultMsgValue,
})
fields = lmp.tmpFields
}
lmp.lr.MustAdd(lmp.cp.TenantID, timestamp, fields)
@ -211,6 +214,15 @@ func (lmp *logMessageProcessor) AddRow(timestamp int64, fields []logstorage.Fiel
}
}
func hasMsgField(fields []logstorage.Field) bool {
for _, f := range fields {
if f.Name == "_msg" {
return len(f.Value) > 0
}
}
return false
}
// flushLocked must be called under locked lmp.mu.
func (lmp *logMessageProcessor) flushLocked() {
lmp.lastFlushTime = time.Now()
@ -247,5 +259,4 @@ func (cp *CommonParams) NewLogMessageProcessor() LogMessageProcessor {
var (
rowsDroppedTotalDebug = metrics.NewCounter(`vl_rows_dropped_total{reason="debug"}`)
rowsDroppedTotalTooManyFields = metrics.NewCounter(`vl_rows_dropped_total{reason="too_many_fields"}`)
rowsDroppedTotalMsgNotValid = metrics.NewCounter(`vl_rows_dropped_total{reason="msg_not_exist"}`)
)

View File

@ -35,6 +35,18 @@ func TestProcessStreamInternal_Success(t *testing.T) {
{"_msg":"baz"}
{"_msg":"xyz","x":"y"}`
f(data, timeField, msgField, rowsExpected, timestampsExpected, resultExpected)
// Non-existing msgField
data = `{"@timestamp":"2023-06-06T04:48:11.735Z","log":{"offset":71770,"file":{"path":"/var/log/auth.log"}},"message":"foobar"}
{"@timestamp":"2023-06-06T04:48:12.735+01:00","message":"baz"}
`
timeField = "@timestamp"
msgField = "foobar"
rowsExpected = 2
timestampsExpected = []int64{1686026891735000000, 1686023292735000000}
resultExpected = `{"log.offset":"71770","log.file.path":"/var/log/auth.log","message":"foobar"}
{"message":"baz","aa":"bb"}`
f(data, timeField, msgField, rowsExpected, timestampsExpected, resultExpected)
}
func TestProcessStreamInternal_Failure(t *testing.T) {

View File

@ -16,6 +16,7 @@ according to [these docs](https://docs.victoriametrics.com/victorialogs/quicksta
## tip
* FEATURE: allow specifying a list of log fields, which contain log message, via `_msg_field` query arg and via `VL-Msg-Field` HTTP request header. For example, `_msg_field=message,event.message` instructs obtaining [message field](https://docs.victoriametrics.com/victorialogs/keyconcepts/#message-field) from the first non-empty field out of the `message` and `event.message` fields. See [these docs](https://docs.victoriametrics.com/victorialogs/data-ingestion/#http-parameters) for details.
* FEATURE: accept logs without [`_msg` field](https://docs.victoriametrics.com/victorialogs/keyconcepts/#message-field). In this case the `_msg` field is automatically set to the value specified in the `-defaultMsgValue` command-line flag.
* BUGFIX: fix `runtime error: index out of range [0] with length 0` panic during low-rate data ingestion. The panic has been introduced in [v0.38.0](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v0.38.0-victorialogs). See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/7391).

View File

@ -260,8 +260,8 @@ Pass `-help` to VictoriaLogs in order to see the list of supported command-line
```
-blockcache.missesBeforeCaching int
The number of cache misses before putting the block into cache. Higher values may reduce indexdb/dataBlocks cache size at the cost of higher CPU and disk read usage (default 2)
-cacheExpireDuration duration
Items are removed from in-memory caches after they aren't accessed for this duration. Lower values may reduce memory usage at the cost of higher CPU usage. See also -prevCacheRemovalPercent (default 30m0s)
-defaultMsgValue string
Default value for _msg field if the ingested log entry doesn't contain it; see https://docs.victoriametrics.com/victorialogs/keyconcepts/#message-field (default "missing _msg field; see https://docs.victoriametrics.com/victorialogs/keyconcepts/#message-field")
-elasticsearch.version string
Elasticsearch version to report to client (default "8.9.0")
-enableTCP6
@ -275,6 +275,9 @@ Pass `-help` to VictoriaLogs in order to see the list of supported command-line
-flagsAuthKey value
Auth key for /flags endpoint. It must be passed via authKey query arg. It overrides -httpAuth.*
Flag value can be read from the given file when using -flagsAuthKey=file:///abs/path/to/file or -flagsAuthKey=file://./relative/path/to/file . Flag value can be read from the given http/https url when using -flagsAuthKey=http://host/path or -flagsAuthKey=https://host/path
-forceMergeAuthKey value
authKey, which must be passed in query string to /internal/force_merge pages. It overrides -httpAuth.*
Flag value can be read from the given file when using -forceMergeAuthKey=file:///abs/path/to/file or -forceMergeAuthKey=file://./relative/path/to/file . Flag value can be read from the given http/https url when using -forceMergeAuthKey=http://host/path or -forceMergeAuthKey=https://host/path
-fs.disableMmap
Whether to use pread() instead of mmap() for reading data files. By default, mmap() is used for 64-bit arches and pread() is used for 32-bit arches, since they cannot read data files bigger than 2^32 bytes in memory. mmap() is usually faster for reading small data chunks than pread()
-futureRetention value
@ -326,6 +329,20 @@ Pass `-help` to VictoriaLogs in order to see the list of supported command-line
Whether to disable caches for interned strings. This may reduce memory usage at the cost of higher CPU usage. See https://en.wikipedia.org/wiki/String_interning . See also -internStringCacheExpireDuration and -internStringMaxLen
-internStringMaxLen int
The maximum length for strings to intern. A lower limit may save memory at the cost of higher CPU usage. See https://en.wikipedia.org/wiki/String_interning . See also -internStringDisableCache and -internStringCacheExpireDuration (default 500)
-journald.ignoreFields array
Journal fields to ignore. See the list of allowed fields at https://www.freedesktop.org/software/systemd/man/latest/systemd.journal-fields.html.
Supports an array of values separated by comma or specified via multiple flags.
Value can contain comma inside single-quoted or double-quoted string, {}, [] and () braces.
-journald.includeEntryMetadata
Include journal entry fields, which with double underscores.
-journald.streamFields array
Journal fields to be used as stream fields. See the list of allowed fields at https://www.freedesktop.org/software/systemd/man/latest/systemd.journal-fields.html.
Supports an array of values separated by comma or specified via multiple flags.
Value can contain comma inside single-quoted or double-quoted string, {}, [] and () braces.
-journald.tenantID string
TenantID for logs ingested via the Journald endpoint. (default "0:0")
-journald.timeField string
Journal field to be used as time field. See the list of allowed fields at https://www.freedesktop.org/software/systemd/man/latest/systemd.journal-fields.html. (default "__REALTIME_TIMESTAMP")
-logIngestedRows
Whether to log all the ingested log entries; this can be useful for debugging of data ingestion; see https://docs.victoriametrics.com/victorialogs/data-ingestion/ ; see also -logNewStreams
-logNewStreams
@ -341,7 +358,7 @@ Pass `-help` to VictoriaLogs in order to see the list of supported command-line
-loggerLevel string
Minimum level of errors to log. Possible values: INFO, WARN, ERROR, FATAL, PANIC (default "INFO")
-loggerMaxArgLen int
The maximum length of a single logged argument. Longer arguments are replaced with 'arg_start..arg_end', where 'arg_start' and 'arg_end' is prefix and suffix of the arg with the length not exceeding -loggerMaxArgLen / 2 (default 1000)
The maximum length of a single logged argument. Longer arguments are replaced with 'arg_start..arg_end', where 'arg_start' and 'arg_end' is prefix and suffix of the arg with the length not exceeding -loggerMaxArgLen / 2 (default 5000)
-loggerOutput string
Output for the logs. Supported values: stderr, stdout (default "stderr")
-loggerTimezone string
@ -361,10 +378,8 @@ Pass `-help` to VictoriaLogs in order to see the list of supported command-line
Auth key for /metrics endpoint. It must be passed via authKey query arg. It overrides -httpAuth.*
Flag value can be read from the given file when using -metricsAuthKey=file:///abs/path/to/file or -metricsAuthKey=file://./relative/path/to/file . Flag value can be read from the given http/https url when using -metricsAuthKey=http://host/path or -metricsAuthKey=https://host/path
-pprofAuthKey value
Auth key for /debug/pprof/* endpoints. It must be passed via authKey query arg. It overrides -httpAuth.*
Auth key for /debug/pprof/* endpoints. It must be passed via authKey query arg. It -httpAuth.*
Flag value can be read from the given file when using -pprofAuthKey=file:///abs/path/to/file or -pprofAuthKey=file://./relative/path/to/file . Flag value can be read from the given http/https url when using -pprofAuthKey=http://host/path or -pprofAuthKey=https://host/path
-prevCacheRemovalPercent float
Items in the previous caches are removed when the percent of requests it serves becomes lower than this value. Higher values reduce memory usage at the cost of higher CPU usage. See also -cacheExpireDuration (default 0.1)
-pushmetrics.disableCompression
Whether to disable request body compression when pushing metrics to every -pushmetrics.url
-pushmetrics.extraLabel array

View File

@ -127,11 +127,14 @@ log entry, which can be ingested into VictoriaLogs:
}
```
If the actual log message has other than `_msg` field name, then it is possible to specify the real log message field
via `_msg_field` query arg or via `VL-Msg-Field` HTTP header during [data ingestion](https://docs.victoriametrics.com/victorialogs/data-ingestion/).
If the actual log message has other than `_msg` field name, then it can be specified via `_msg_field` HTTP query arg or via `VL-Msg-Field` HTTP header
during [data ingestion](https://docs.victoriametrics.com/victorialogs/data-ingestion/)
according to [these docs](https://docs.victoriametrics.com/victorialogs/data-ingestion/#http-parameters).
For example, if log message is located in the `event.original` field, then specify `_msg_field=event.original` query arg
during [data ingestion](https://docs.victoriametrics.com/victorialogs/data-ingestion/).
See [these docs](https://docs.victoriametrics.com/victorialogs/data-ingestion/#http-parameters) for more details.
If the `_msg` field remains empty after an attempt to get it from `_msg_field`, then VictoriaLogs automatically sets it to the value specified
via `-defaultMsgValue` command-line flag.
### Time field