2023-06-22 04:39:22 +02:00
|
|
|
package insertutils
|
|
|
|
|
|
|
|
import (
|
|
|
|
"net/http"
|
2024-09-03 19:16:10 +02:00
|
|
|
"strings"
|
2024-07-02 01:28:02 +02:00
|
|
|
"sync"
|
|
|
|
"time"
|
2023-06-22 04:39:22 +02:00
|
|
|
|
2023-09-15 15:18:38 +02:00
|
|
|
"github.com/VictoriaMetrics/metrics"
|
|
|
|
|
2023-06-22 04:39:22 +02:00
|
|
|
"github.com/VictoriaMetrics/VictoriaMetrics/app/vlstorage"
|
|
|
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/httpserver"
|
|
|
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/httputils"
|
|
|
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
|
|
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logstorage"
|
2024-07-02 01:28:02 +02:00
|
|
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/timeutil"
|
2023-06-22 04:39:22 +02:00
|
|
|
)
|
|
|
|
|
|
|
|
// CommonParams contains common HTTP parameters used by log ingestion APIs.
|
|
|
|
//
|
2024-05-25 00:30:58 +02:00
|
|
|
// See https://docs.victoriametrics.com/victorialogs/data-ingestion/#http-parameters
|
2023-06-22 04:39:22 +02:00
|
|
|
type CommonParams struct {
|
|
|
|
TenantID logstorage.TenantID
|
|
|
|
TimeField string
|
|
|
|
MsgField string
|
|
|
|
StreamFields []string
|
|
|
|
IgnoreFields []string
|
|
|
|
|
|
|
|
Debug bool
|
|
|
|
DebugRequestURI string
|
|
|
|
DebugRemoteAddr string
|
|
|
|
}
|
|
|
|
|
|
|
|
// GetCommonParams returns CommonParams from r.
|
|
|
|
func GetCommonParams(r *http.Request) (*CommonParams, error) {
|
|
|
|
// Extract tenantID
|
|
|
|
tenantID, err := logstorage.GetTenantIDFromRequest(r)
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
2024-09-03 17:43:26 +02:00
|
|
|
// Extract time field name from _time_field query arg or header
|
2024-09-03 19:16:10 +02:00
|
|
|
timeField := "_time"
|
2023-06-22 04:39:22 +02:00
|
|
|
if tf := r.FormValue("_time_field"); tf != "" {
|
|
|
|
timeField = tf
|
2024-09-03 17:43:26 +02:00
|
|
|
} else if tf = r.Header.Get("VL-Time-Field"); tf != "" {
|
|
|
|
timeField = tf
|
2023-06-22 04:39:22 +02:00
|
|
|
}
|
|
|
|
|
2024-09-03 17:43:26 +02:00
|
|
|
// Extract message field name from _msg_field query arg or header
|
2024-09-03 19:16:10 +02:00
|
|
|
msgField := ""
|
2023-06-22 04:39:22 +02:00
|
|
|
if msgf := r.FormValue("_msg_field"); msgf != "" {
|
|
|
|
msgField = msgf
|
2024-09-03 17:43:26 +02:00
|
|
|
} else if msgf = r.Header.Get("VL-Msg-Field"); msgf != "" {
|
|
|
|
msgField = msgf
|
2023-06-22 04:39:22 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
streamFields := httputils.GetArray(r, "_stream_fields")
|
2024-09-03 17:43:26 +02:00
|
|
|
if len(streamFields) == 0 {
|
2024-09-03 19:16:10 +02:00
|
|
|
if sf := r.Header.Get("VL-Stream-Fields"); len(sf) > 0 {
|
|
|
|
streamFields = strings.Split(sf, ",")
|
2024-09-03 17:43:26 +02:00
|
|
|
}
|
|
|
|
}
|
2023-06-22 04:39:22 +02:00
|
|
|
ignoreFields := httputils.GetArray(r, "ignore_fields")
|
2024-09-03 17:43:26 +02:00
|
|
|
if len(ignoreFields) == 0 {
|
2024-09-03 19:16:10 +02:00
|
|
|
if f := r.Header.Get("VL-Ignore-Fields"); len(f) > 0 {
|
|
|
|
ignoreFields = strings.Split(f, ",")
|
2024-09-03 17:43:26 +02:00
|
|
|
}
|
|
|
|
}
|
2023-06-22 04:39:22 +02:00
|
|
|
|
|
|
|
debug := httputils.GetBool(r, "debug")
|
2024-09-03 19:16:10 +02:00
|
|
|
if !debug {
|
|
|
|
if dh := r.Header.Get("VL-Debug"); len(dh) > 0 {
|
|
|
|
hv := strings.ToLower(dh)
|
|
|
|
switch hv {
|
|
|
|
case "", "0", "f", "false", "no":
|
|
|
|
default:
|
|
|
|
debug = true
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2023-06-22 04:39:22 +02:00
|
|
|
debugRequestURI := ""
|
|
|
|
debugRemoteAddr := ""
|
|
|
|
if debug {
|
|
|
|
debugRequestURI = httpserver.GetRequestURI(r)
|
|
|
|
debugRemoteAddr = httpserver.GetQuotedRemoteAddr(r)
|
|
|
|
}
|
|
|
|
|
|
|
|
cp := &CommonParams{
|
|
|
|
TenantID: tenantID,
|
|
|
|
TimeField: timeField,
|
|
|
|
MsgField: msgField,
|
|
|
|
StreamFields: streamFields,
|
|
|
|
IgnoreFields: ignoreFields,
|
|
|
|
Debug: debug,
|
|
|
|
DebugRequestURI: debugRequestURI,
|
|
|
|
DebugRemoteAddr: debugRemoteAddr,
|
|
|
|
}
|
2024-09-03 17:43:26 +02:00
|
|
|
|
2023-06-22 04:39:22 +02:00
|
|
|
return cp, nil
|
|
|
|
}
|
|
|
|
|
2024-06-17 12:13:18 +02:00
|
|
|
// GetCommonParamsForSyslog returns common params needed for parsing syslog messages and storing them to the given tenantID.
|
|
|
|
func GetCommonParamsForSyslog(tenantID logstorage.TenantID) *CommonParams {
|
|
|
|
// See https://docs.victoriametrics.com/victorialogs/logsql/#unpack_syslog-pipe
|
|
|
|
cp := &CommonParams{
|
|
|
|
TenantID: tenantID,
|
|
|
|
TimeField: "timestamp",
|
|
|
|
MsgField: "message",
|
|
|
|
StreamFields: []string{
|
|
|
|
"hostname",
|
|
|
|
"app_name",
|
|
|
|
"proc_id",
|
|
|
|
},
|
|
|
|
}
|
|
|
|
|
|
|
|
return cp
|
|
|
|
}
|
|
|
|
|
2024-06-17 22:28:15 +02:00
|
|
|
// LogMessageProcessor is an interface for log message processors.
|
|
|
|
type LogMessageProcessor interface {
|
|
|
|
// AddRow must add row to the LogMessageProcessor with the given timestamp and the given fields.
|
|
|
|
//
|
|
|
|
// The LogMessageProcessor implementation cannot hold references to fields, since the caller can re-use them.
|
|
|
|
AddRow(timestamp int64, fields []logstorage.Field)
|
|
|
|
|
|
|
|
// MustClose() must flush all the remaining fields and free up resources occupied by LogMessageProcessor.
|
|
|
|
MustClose()
|
|
|
|
}
|
|
|
|
|
|
|
|
type logMessageProcessor struct {
|
2024-07-02 01:28:02 +02:00
|
|
|
mu sync.Mutex
|
|
|
|
wg sync.WaitGroup
|
|
|
|
stopCh chan struct{}
|
|
|
|
lastFlushTime time.Time
|
|
|
|
|
2024-06-17 22:28:15 +02:00
|
|
|
cp *CommonParams
|
|
|
|
lr *logstorage.LogRows
|
|
|
|
}
|
|
|
|
|
2024-07-02 01:28:02 +02:00
|
|
|
func (lmp *logMessageProcessor) initPeriodicFlush() {
|
|
|
|
lmp.lastFlushTime = time.Now()
|
|
|
|
|
|
|
|
lmp.wg.Add(1)
|
|
|
|
go func() {
|
|
|
|
defer lmp.wg.Done()
|
|
|
|
|
|
|
|
d := timeutil.AddJitterToDuration(time.Second)
|
|
|
|
ticker := time.NewTicker(d)
|
|
|
|
defer ticker.Stop()
|
|
|
|
|
|
|
|
for {
|
|
|
|
select {
|
|
|
|
case <-lmp.stopCh:
|
|
|
|
return
|
|
|
|
case <-ticker.C:
|
|
|
|
lmp.mu.Lock()
|
|
|
|
if time.Since(lmp.lastFlushTime) >= d {
|
|
|
|
lmp.flushLocked()
|
|
|
|
}
|
|
|
|
lmp.mu.Unlock()
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}()
|
|
|
|
}
|
|
|
|
|
2024-06-17 22:28:15 +02:00
|
|
|
// AddRow adds new log message to lmp with the given timestamp and fields.
|
|
|
|
func (lmp *logMessageProcessor) AddRow(timestamp int64, fields []logstorage.Field) {
|
2024-07-02 01:28:02 +02:00
|
|
|
lmp.mu.Lock()
|
|
|
|
defer lmp.mu.Unlock()
|
|
|
|
|
2024-06-17 22:28:15 +02:00
|
|
|
if len(fields) > *MaxFieldsPerLine {
|
|
|
|
rf := logstorage.RowFormatter(fields)
|
|
|
|
logger.Warnf("dropping log line with %d fields; it exceeds -insert.maxFieldsPerLine=%d; %s", len(fields), *MaxFieldsPerLine, rf)
|
|
|
|
rowsDroppedTotalTooManyFields.Inc()
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
2024-09-26 09:53:17 +02:00
|
|
|
// _msg field must be non-empty according to VictoriaLogs data model.
|
|
|
|
// See https://docs.victoriametrics.com/victorialogs/keyconcepts/#message-field
|
2024-09-26 09:35:28 +02:00
|
|
|
msgExist := false
|
|
|
|
for i := range fields {
|
2024-09-26 09:53:17 +02:00
|
|
|
if fields[i].Name == "_msg" {
|
2024-09-26 09:35:28 +02:00
|
|
|
msgExist = len(fields[i].Value) > 0
|
|
|
|
break
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if !msgExist {
|
|
|
|
rf := logstorage.RowFormatter(fields)
|
|
|
|
logger.Warnf("dropping log line without _msg field; %s", rf)
|
|
|
|
rowsDroppedTotalMsgNotValid.Inc()
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
2024-06-17 22:28:15 +02:00
|
|
|
lmp.lr.MustAdd(lmp.cp.TenantID, timestamp, fields)
|
|
|
|
if lmp.cp.Debug {
|
|
|
|
s := lmp.lr.GetRowString(0)
|
|
|
|
lmp.lr.ResetKeepSettings()
|
2024-09-03 19:16:10 +02:00
|
|
|
logger.Infof("remoteAddr=%s; requestURI=%s; ignoring log entry because of `debug` arg: %s", lmp.cp.DebugRemoteAddr, lmp.cp.DebugRequestURI, s)
|
2024-06-17 22:28:15 +02:00
|
|
|
rowsDroppedTotalDebug.Inc()
|
|
|
|
return
|
|
|
|
}
|
|
|
|
if lmp.lr.NeedFlush() {
|
2024-07-02 01:28:02 +02:00
|
|
|
lmp.flushLocked()
|
2024-06-17 22:28:15 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2024-07-02 01:28:02 +02:00
|
|
|
// flushLocked must be called under locked lmp.mu.
|
|
|
|
func (lmp *logMessageProcessor) flushLocked() {
|
|
|
|
lmp.lastFlushTime = time.Now()
|
2024-06-17 22:28:15 +02:00
|
|
|
vlstorage.MustAddRows(lmp.lr)
|
|
|
|
lmp.lr.ResetKeepSettings()
|
|
|
|
}
|
|
|
|
|
|
|
|
// MustClose flushes the remaining data to the underlying storage and closes lmp.
|
|
|
|
func (lmp *logMessageProcessor) MustClose() {
|
2024-07-02 01:28:02 +02:00
|
|
|
close(lmp.stopCh)
|
|
|
|
lmp.wg.Wait()
|
|
|
|
|
|
|
|
lmp.flushLocked()
|
2024-06-17 22:28:15 +02:00
|
|
|
logstorage.PutLogRows(lmp.lr)
|
|
|
|
lmp.lr = nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// NewLogMessageProcessor returns new LogMessageProcessor for the given cp.
|
2024-07-02 01:28:02 +02:00
|
|
|
//
|
|
|
|
// MustClose() must be called on the returned LogMessageProcessor when it is no longer needed.
|
2024-06-17 22:28:15 +02:00
|
|
|
func (cp *CommonParams) NewLogMessageProcessor() LogMessageProcessor {
|
|
|
|
lr := logstorage.GetLogRows(cp.StreamFields, cp.IgnoreFields)
|
2024-07-02 01:28:02 +02:00
|
|
|
lmp := &logMessageProcessor{
|
2024-06-17 22:28:15 +02:00
|
|
|
cp: cp,
|
|
|
|
lr: lr,
|
2024-07-02 01:28:02 +02:00
|
|
|
|
|
|
|
stopCh: make(chan struct{}),
|
2023-06-22 04:39:22 +02:00
|
|
|
}
|
2024-07-02 01:28:02 +02:00
|
|
|
lmp.initPeriodicFlush()
|
|
|
|
|
|
|
|
return lmp
|
2023-06-22 04:39:22 +02:00
|
|
|
}
|
|
|
|
|
2024-09-03 19:16:10 +02:00
|
|
|
var (
|
|
|
|
rowsDroppedTotalDebug = metrics.NewCounter(`vl_rows_dropped_total{reason="debug"}`)
|
|
|
|
rowsDroppedTotalTooManyFields = metrics.NewCounter(`vl_rows_dropped_total{reason="too_many_fields"}`)
|
2024-09-26 09:35:28 +02:00
|
|
|
rowsDroppedTotalMsgNotValid = metrics.NewCounter(`vl_rows_dropped_total{reason="msg_not_exist"}`)
|
2024-09-03 19:16:10 +02:00
|
|
|
)
|