VictoriaMetrics/lib/logger/logger.go

309 lines
7.8 KiB
Go
Raw Normal View History

2019-05-22 23:16:55 +02:00
package logger
import (
"errors"
"flag"
"fmt"
"io"
2019-05-22 23:16:55 +02:00
"log"
"os"
"runtime"
"strings"
"sync"
2019-05-22 23:16:55 +02:00
"time"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/buildinfo"
"github.com/VictoriaMetrics/metrics"
2019-05-22 23:16:55 +02:00
)
var (
loggerLevel = flag.String("loggerLevel", "INFO", "Minimum level of errors to log. Possible values: INFO, WARN, ERROR, FATAL, PANIC")
loggerFormat = flag.String("loggerFormat", "default", "Format for logs. Possible values: default, json")
loggerOutput = flag.String("loggerOutput", "stderr", "Output for the logs. Supported values: stderr, stdout")
loggerTimezone = flag.String("loggerTimezone", "UTC", "Timezone to use for timestamps in logs. Timezone must be a valid IANA Time Zone. "+
"For example: America/New_York, Europe/Berlin, Etc/GMT+3 or Local")
disableTimestamps = flag.Bool("loggerDisableTimestamps", false, "Whether to disable writing timestamps in logs")
errorsPerSecondLimit = flag.Int("loggerErrorsPerSecondLimit", 0, "Per-second limit on the number of ERROR messages. If more than the given number of errors "+
"are emitted per second, then the remaining errors are suppressed. Zero value disables the rate limit")
warnsPerSecondLimit = flag.Int("loggerWarnsPerSecondLimit", 0, "Per-second limit on the number of WARN messages. If more than the given number of warns "+
"are emitted per second, then the remaining warns are suppressed. Zero value disables the rate limit")
)
2019-05-22 23:16:55 +02:00
// Init initializes the logger.
//
// Init must be called after flag.Parse()
//
// There is no need in calling Init from tests.
func Init() {
setLoggerOutput()
2019-05-22 23:16:55 +02:00
validateLoggerLevel()
validateLoggerFormat()
initTimezone()
go logLimiterCleaner()
2019-05-22 23:16:55 +02:00
logAllFlags()
2019-05-22 23:16:55 +02:00
}
func initTimezone() {
tz, err := time.LoadLocation(*loggerTimezone)
if err != nil {
log.Fatalf("cannot load timezone %q: %s", *loggerTimezone, err)
}
timezone = tz
}
var timezone = time.UTC
func setLoggerOutput() {
switch *loggerOutput {
case "stderr":
output = os.Stderr
case "stdout":
output = os.Stdout
default:
panic(fmt.Errorf("FATAL: unsupported `loggerOutput` value: %q; supported values are: stderr, stdout", *loggerOutput))
}
}
var output io.Writer = os.Stderr
2019-05-22 23:16:55 +02:00
func validateLoggerLevel() {
switch *loggerLevel {
case "INFO", "WARN", "ERROR", "FATAL", "PANIC":
2019-05-22 23:16:55 +02:00
default:
// We cannot use logger.Panicf here, since the logger isn't initialized yet.
panic(fmt.Errorf("FATAL: unsupported `-loggerLevel` value: %q; supported values are: INFO, WARN, ERROR, FATAL, PANIC", *loggerLevel))
2019-05-22 23:16:55 +02:00
}
}
func validateLoggerFormat() {
switch *loggerFormat {
case "default", "json":
default:
// We cannot use logger.Pancif here, since the logger isn't initialized yet.
panic(fmt.Errorf("FATAL: unsupported `-loggerFormat` value: %q; supported values are: default, json", *loggerFormat))
}
}
2019-05-22 23:16:55 +02:00
var stdErrorLogger = log.New(&logWriter{}, "", 0)
// StdErrorLogger returns standard error logger.
func StdErrorLogger() *log.Logger {
return stdErrorLogger
}
// Infof logs info message.
func Infof(format string, args ...interface{}) {
logLevel("INFO", format, args...)
}
// Warnf logs warn message.
func Warnf(format string, args ...interface{}) {
logLevel("WARN", format, args...)
}
2019-05-22 23:16:55 +02:00
// Errorf logs error message.
func Errorf(format string, args ...interface{}) {
logLevel("ERROR", format, args...)
}
// WarnfSkipframes logs warn message and skips the given number of frames for the caller.
func WarnfSkipframes(skipframes int, format string, args ...interface{}) {
logLevelSkipframes(skipframes, "WARN", format, args...)
}
// ErrorfSkipframes logs error message and skips the given number of frames for the caller.
func ErrorfSkipframes(skipframes int, format string, args ...interface{}) {
logLevelSkipframes(skipframes, "ERROR", format, args...)
}
2019-05-22 23:16:55 +02:00
// Fatalf logs fatal message and terminates the app.
func Fatalf(format string, args ...interface{}) {
logLevel("FATAL", format, args...)
}
// Panicf logs panic message and panics.
func Panicf(format string, args ...interface{}) {
logLevel("PANIC", format, args...)
}
func logLevel(level, format string, args ...interface{}) {
logLevelSkipframes(1, level, format, args...)
}
func logLevelSkipframes(skipframes int, level, format string, args ...interface{}) {
2019-05-22 23:16:55 +02:00
if shouldSkipLog(level) {
return
}
msg := fmt.Sprintf(format, args...)
logMessage(level, msg, 3+skipframes)
2019-05-22 23:16:55 +02:00
}
func logLimiterCleaner() {
2019-05-22 23:16:55 +02:00
for {
time.Sleep(time.Second)
logLimiter.reset()
2019-05-22 23:16:55 +02:00
}
}
var logLimiter = newLogLimit()
2019-05-22 23:16:55 +02:00
func newLogLimit() *logLimit {
return &logLimit{
m: make(map[string]uint64),
}
}
type logLimit struct {
mu sync.Mutex
m map[string]uint64
}
func (ll *logLimit) reset() {
ll.mu.Lock()
ll.m = make(map[string]uint64, len(ll.m))
ll.mu.Unlock()
}
// needSuppress checks if the number of calls for the given location exceeds the given limit.
//
// When the number of calls equals limit, log message prefix returned.
func (ll *logLimit) needSuppress(location string, limit uint64) (bool, string) {
// fast path
var msg string
if limit == 0 {
return false, msg
}
ll.mu.Lock()
defer ll.mu.Unlock()
if n, ok := ll.m[location]; ok {
if n >= limit {
switch n {
// report only once
case limit:
msg = fmt.Sprintf("suppressing log message with rate limit=%d: ", limit)
default:
return true, msg
}
}
ll.m[location] = n + 1
} else {
ll.m[location] = 1
}
return false, msg
}
2019-05-22 23:16:55 +02:00
type logWriter struct {
}
func (lw *logWriter) Write(p []byte) (int, error) {
logLevelSkipframes(2, "ERROR", "%s", p)
2019-05-22 23:16:55 +02:00
return len(p), nil
}
func logMessage(level, msg string, skipframes int) {
timestamp := ""
if !*disableTimestamps {
timestamp = time.Now().In(timezone).Format("2006-01-02T15:04:05.000Z0700")
}
2019-05-22 23:16:55 +02:00
levelLowercase := strings.ToLower(level)
_, file, line, ok := runtime.Caller(skipframes)
if !ok {
file = "???"
line = 0
}
if n := strings.Index(file, "/VictoriaMetrics/"); n >= 0 {
// Strip /VictoriaMetrics/ prefix
file = file[n+len("/VictoriaMetrics/"):]
}
location := fmt.Sprintf("%s:%d", file, line)
// rate limit ERROR and WARN log messages with given limit.
if level == "ERROR" || level == "WARN" {
limit := uint64(*errorsPerSecondLimit)
if level == "WARN" {
limit = uint64(*warnsPerSecondLimit)
}
ok, suppressMessage := logLimiter.needSuppress(location, limit)
if ok {
return
}
if len(suppressMessage) > 0 {
msg = suppressMessage + msg
}
}
2019-05-22 23:16:55 +02:00
for len(msg) > 0 && msg[len(msg)-1] == '\n' {
msg = msg[:len(msg)-1]
}
var logMsg string
switch *loggerFormat {
case "json":
if *disableTimestamps {
logMsg = fmt.Sprintf(`{"level":%q,"caller":%q,"msg":%q}`+"\n", levelLowercase, location, msg)
} else {
logMsg = fmt.Sprintf(`{"ts":%q,"level":%q,"caller":%q,"msg":%q}`+"\n", timestamp, levelLowercase, location, msg)
}
default:
if *disableTimestamps {
logMsg = fmt.Sprintf("%s\t%s\t%s\n", levelLowercase, location, msg)
} else {
logMsg = fmt.Sprintf("%s\t%s\t%s\t%s\n", timestamp, levelLowercase, location, msg)
}
}
2019-05-22 23:16:55 +02:00
// Serialize writes to log.
mu.Lock()
fmt.Fprint(output, logMsg)
mu.Unlock()
2019-05-22 23:16:55 +02:00
// Increment vm_log_messages_total
counterName := fmt.Sprintf(`vm_log_messages_total{app_version=%q, level=%q, location=%q}`, buildinfo.Version, levelLowercase, location)
metrics.GetOrCreateCounter(counterName).Inc()
2019-05-22 23:16:55 +02:00
switch level {
case "PANIC":
if *loggerFormat == "json" {
// Do not clutter `json` output with panic stack trace
os.Exit(-1)
}
2019-05-22 23:16:55 +02:00
panic(errors.New(msg))
case "FATAL":
os.Exit(-1)
}
}
var mu sync.Mutex
2019-05-22 23:16:55 +02:00
func shouldSkipLog(level string) bool {
switch *loggerLevel {
case "WARN":
switch level {
case "WARN", "ERROR", "FATAL", "PANIC":
return false
default:
return true
}
2019-05-22 23:16:55 +02:00
case "ERROR":
switch level {
case "ERROR", "FATAL", "PANIC":
return false
default:
return true
}
case "FATAL":
switch level {
case "FATAL", "PANIC":
return false
default:
return true
}
case "PANIC":
return level != "PANIC"
default:
return false
}
}