mirror of
https://github.com/VictoriaMetrics/VictoriaMetrics.git
synced 2024-12-20 23:46:23 +01:00
f548adce0b
- Parse protobuf if Content-Type isn't set to `application/json` - this behavior is documented at https://grafana.com/docs/loki/latest/api/#push-log-entries-to-loki - Properly handle gzip'ped JSON requests. The `gzip` header must be read from `Content-Encoding` instead of `Content-Type` header - Properly flush all the parsed logs with the explicit call to vlstorage.MustAddRows() at the end of query handler - Check JSON field types more strictly. - Allow parsing Loki timestamp as floating-point number. Such a timestamp can be generated by some clients, which store timestamps in float64 instead of int64. - Optimize parsing of Loki labels in Prometheus text exposition format. - Simplify tests. - Remove lib/slicesutil, since there are no more users for it. - Update docs with missing info and fix various typos. For example, it should be enough to have `instance` and `job` labels as stream fields in most Loki setups. - Allow empty of missing timestamps in the ingested logs. The current timestamp at VictoriaLogs side is then used for the ingested logs. This simplifies debugging and testing of the provided HTTP-based data ingestion APIs. The remaining MAJOR issue, which needs to be addressed: victoria-logs binary size increased from 13MB to 22MB after adding support for Loki data ingestion protocol at https://github.com/VictoriaMetrics/VictoriaMetrics/pull/4482 . This is because of shitty protobuf dependencies. They must be replaced with another protobuf implementation similar to the one used at lib/prompb or lib/prompbmarshal .
150 lines
3.9 KiB
Go
150 lines
3.9 KiB
Go
package jsonline
|
|
|
|
import (
|
|
"bufio"
|
|
"errors"
|
|
"fmt"
|
|
"net/http"
|
|
"time"
|
|
|
|
"github.com/VictoriaMetrics/VictoriaMetrics/app/vlinsert/insertutils"
|
|
"github.com/VictoriaMetrics/VictoriaMetrics/app/vlstorage"
|
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
|
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/httpserver"
|
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logjson"
|
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logstorage"
|
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/common"
|
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/writeconcurrencylimiter"
|
|
"github.com/VictoriaMetrics/metrics"
|
|
)
|
|
|
|
// RequestHandler processes jsonline insert requests
|
|
func RequestHandler(w http.ResponseWriter, r *http.Request) bool {
|
|
w.Header().Add("Content-Type", "application/json")
|
|
|
|
if r.Method != "POST" {
|
|
w.WriteHeader(http.StatusMethodNotAllowed)
|
|
return true
|
|
}
|
|
|
|
requestsTotal.Inc()
|
|
|
|
cp, err := insertutils.GetCommonParams(r)
|
|
if err != nil {
|
|
httpserver.Errorf(w, r, "%s", err)
|
|
return true
|
|
}
|
|
lr := logstorage.GetLogRows(cp.StreamFields, cp.IgnoreFields)
|
|
processLogMessage := cp.GetProcessLogMessageFunc(lr)
|
|
|
|
reader := r.Body
|
|
if r.Header.Get("Content-Encoding") == "gzip" {
|
|
zr, err := common.GetGzipReader(reader)
|
|
if err != nil {
|
|
logger.Errorf("cannot read gzipped _bulk request: %s", err)
|
|
return true
|
|
}
|
|
defer common.PutGzipReader(zr)
|
|
reader = zr
|
|
}
|
|
|
|
wcr := writeconcurrencylimiter.GetReader(reader)
|
|
defer writeconcurrencylimiter.PutReader(wcr)
|
|
|
|
lb := lineBufferPool.Get()
|
|
defer lineBufferPool.Put(lb)
|
|
|
|
lb.B = bytesutil.ResizeNoCopyNoOverallocate(lb.B, insertutils.MaxLineSizeBytes.IntN())
|
|
sc := bufio.NewScanner(wcr)
|
|
sc.Buffer(lb.B, len(lb.B))
|
|
|
|
n := 0
|
|
for {
|
|
ok, err := readLine(sc, cp.TimeField, cp.MsgField, processLogMessage)
|
|
wcr.DecConcurrency()
|
|
if err != nil {
|
|
logger.Errorf("cannot read line #%d in /jsonline request: %s", n, err)
|
|
break
|
|
}
|
|
if !ok {
|
|
break
|
|
}
|
|
n++
|
|
rowsIngestedTotal.Inc()
|
|
}
|
|
|
|
vlstorage.MustAddRows(lr)
|
|
logstorage.PutLogRows(lr)
|
|
|
|
return true
|
|
}
|
|
|
|
func readLine(sc *bufio.Scanner, timeField, msgField string, processLogMessage func(timestamp int64, fields []logstorage.Field)) (bool, error) {
|
|
var line []byte
|
|
for len(line) == 0 {
|
|
if !sc.Scan() {
|
|
if err := sc.Err(); err != nil {
|
|
if errors.Is(err, bufio.ErrTooLong) {
|
|
return false, fmt.Errorf(`cannot read json line, since its size exceeds -insert.maxLineSizeBytes=%d`, insertutils.MaxLineSizeBytes.IntN())
|
|
}
|
|
return false, err
|
|
}
|
|
return false, nil
|
|
}
|
|
line = sc.Bytes()
|
|
}
|
|
|
|
p := logjson.GetParser()
|
|
if err := p.ParseLogMessage(line); err != nil {
|
|
return false, fmt.Errorf("cannot parse json-encoded log entry: %w", err)
|
|
}
|
|
ts, err := extractTimestampFromFields(timeField, p.Fields)
|
|
if err != nil {
|
|
return false, fmt.Errorf("cannot parse timestamp: %w", err)
|
|
}
|
|
if ts == 0 {
|
|
ts = time.Now().UnixNano()
|
|
}
|
|
p.RenameField(msgField, "_msg")
|
|
processLogMessage(ts, p.Fields)
|
|
logjson.PutParser(p)
|
|
return true, nil
|
|
}
|
|
|
|
func extractTimestampFromFields(timeField string, fields []logstorage.Field) (int64, error) {
|
|
for i := range fields {
|
|
f := &fields[i]
|
|
if f.Name != timeField {
|
|
continue
|
|
}
|
|
timestamp, err := parseISO8601Timestamp(f.Value)
|
|
if err != nil {
|
|
return 0, err
|
|
}
|
|
f.Value = ""
|
|
return timestamp, nil
|
|
}
|
|
return 0, nil
|
|
}
|
|
|
|
func parseISO8601Timestamp(s string) (int64, error) {
|
|
if s == "0" || s == "" {
|
|
// Special case for returning the current timestamp.
|
|
// It must be automatically converted to the current timestamp by the caller.
|
|
return 0, nil
|
|
}
|
|
t, err := time.Parse(time.RFC3339, s)
|
|
if err != nil {
|
|
return 0, fmt.Errorf("cannot parse timestamp %q: %w", s, err)
|
|
}
|
|
return t.UnixNano(), nil
|
|
}
|
|
|
|
var lineBufferPool bytesutil.ByteBufferPool
|
|
|
|
var (
|
|
requestsTotal = metrics.NewCounter(`vl_http_requests_total{path="/insert/jsonline"}`)
|
|
rowsIngestedTotal = metrics.NewCounter(`vl_rows_ingested_total{type="jsonline"}`)
|
|
)
|