lib/httpserver: add a jitter to connection timeouts in order to protect from Thundering herd problem

This commit is contained in:
Aliaksandr Valialkin 2020-09-08 19:54:41 +03:00
parent df6519c190
commit e5c8377212

View File

@ -23,6 +23,7 @@ import (
"github.com/VictoriaMetrics/VictoriaMetrics/lib/netutil" "github.com/VictoriaMetrics/VictoriaMetrics/lib/netutil"
"github.com/VictoriaMetrics/metrics" "github.com/VictoriaMetrics/metrics"
"github.com/klauspost/compress/gzip" "github.com/klauspost/compress/gzip"
"github.com/valyala/fastrand"
) )
var ( var (
@ -41,7 +42,7 @@ var (
"from /health page, so load balancers can route new requests to other servers") "from /health page, so load balancers can route new requests to other servers")
idleConnTimeout = flag.Duration("http.idleConnTimeout", time.Minute, "Timeout for incoming idle http connections") idleConnTimeout = flag.Duration("http.idleConnTimeout", time.Minute, "Timeout for incoming idle http connections")
connTimeout = flag.Duration("http.connTimeout", 2*time.Minute, "Incoming http connections are closed after the configured timeout. This may help spreading incoming load "+ connTimeout = flag.Duration("http.connTimeout", 2*time.Minute, "Incoming http connections are closed after the configured timeout. This may help spreading incoming load "+
"among a cluster of services behind load balancer") "among a cluster of services behind load balancer. Note that the real timeout may be bigger by up to 10% as a protection from Thundering herd problem")
) )
var ( var (
@ -112,8 +113,13 @@ func serveWithListener(addr string, ln net.Listener, rh RequestHandler) {
ErrorLog: logger.StdErrorLogger(), ErrorLog: logger.StdErrorLogger(),
ConnContext: func(ctx context.Context, c net.Conn) context.Context { ConnContext: func(ctx context.Context, c net.Conn) context.Context {
startTime := fasttime.UnixTimestamp() timeoutSec := connTimeout.Seconds()
return context.WithValue(ctx, connStartTimeKey, &startTime) // Add a jitter for connection timeout in order to prevent Thundering herd problem
// when all the connections are established at the same time.
// See https://en.wikipedia.org/wiki/Thundering_herd_problem
jitterSec := fastrand.Uint32n(uint32(timeoutSec / 10))
deadline := fasttime.UnixTimestamp() + uint64(timeoutSec) + uint64(jitterSec)
return context.WithValue(ctx, connDeadlineTimeKey, &deadline)
}, },
} }
serversLock.Lock() serversLock.Lock()
@ -130,12 +136,12 @@ func serveWithListener(addr string, ln net.Listener, rh RequestHandler) {
func whetherToCloseConn(r *http.Request) bool { func whetherToCloseConn(r *http.Request) bool {
ctx := r.Context() ctx := r.Context()
v := ctx.Value(connStartTimeKey) v := ctx.Value(connDeadlineTimeKey)
st, ok := v.(*uint64) deadline, ok := v.(*uint64)
return ok && fasttime.UnixTimestamp()-*st > uint64(*connTimeout/time.Second) return ok && fasttime.UnixTimestamp() > *deadline
} }
var connStartTimeKey = interface{}("startTime") var connDeadlineTimeKey = interface{}("connDeadlineSecs")
// Stop stops the http server on the given addr, which has been started // Stop stops the http server on the given addr, which has been started
// via Serve func. // via Serve func.