mirror of
https://github.com/VictoriaMetrics/VictoriaMetrics.git
synced 2024-11-23 12:31:07 +01:00
app/vmauth: adds idleConnTimeout flag, retry trivial errors (#6388)
* adds idleConnTimeout flag, which must reduce probability of `broken pipe` and `connection reset` errors. * one-time retry trivial network requests for the same backend --------- Signed-off-by: hagen1778 <roman@victoriametrics.com> Co-authored-by: hagen1778 <roman@victoriametrics.com>
This commit is contained in:
parent
ff458af25e
commit
d44058bcd6
@ -37,6 +37,8 @@ var (
|
||||
"With enabled proxy protocol http server cannot serve regular /metrics endpoint. Use -pushmetrics.url for metrics pushing")
|
||||
maxIdleConnsPerBackend = flag.Int("maxIdleConnsPerBackend", 100, "The maximum number of idle connections vmauth can open per each backend host. "+
|
||||
"See also -maxConcurrentRequests")
|
||||
idleConnTimeout = flag.Duration("idleConnTimeout", 50*time.Second, `Defines a duration for idle (keep-alive connections) to exist.
|
||||
Consider setting this value less than "-http.idleConnTimeout". It must prevent possible "write: broken pipe" and "read: connection reset by peer" errors.`)
|
||||
responseTimeout = flag.Duration("responseTimeout", 5*time.Minute, "The timeout for receiving a response from backend")
|
||||
maxConcurrentRequests = flag.Int("maxConcurrentRequests", 1000, "The maximum number of concurrent requests vmauth can process. Other requests are rejected with "+
|
||||
"'429 Too Many Requests' http status code. See also -maxConcurrentPerUserRequests and -maxIdleConnsPerBackend command-line options")
|
||||
@ -199,10 +201,8 @@ func processRequest(w http.ResponseWriter, r *http.Request, ui *UserInfo) {
|
||||
isDefault = true
|
||||
}
|
||||
maxAttempts := up.getBackendsCount()
|
||||
if maxAttempts > 1 {
|
||||
r.Body = &readTrackingBody{
|
||||
r: r.Body,
|
||||
}
|
||||
r.Body = &readTrackingBody{
|
||||
r: r.Body,
|
||||
}
|
||||
for i := 0; i < maxAttempts; i++ {
|
||||
bu := up.getBackendURL()
|
||||
@ -243,8 +243,10 @@ func tryProcessingRequest(w http.ResponseWriter, r *http.Request, targetURL *url
|
||||
req.Host = targetURL.Host
|
||||
}
|
||||
updateHeadersByConfig(req.Header, hc.RequestHeaders)
|
||||
res, err := ui.rt.RoundTrip(req)
|
||||
var trivialRetries int
|
||||
rtb, rtbOK := req.Body.(*readTrackingBody)
|
||||
again:
|
||||
res, err := ui.rt.RoundTrip(req)
|
||||
if err != nil {
|
||||
if errors.Is(err, context.Canceled) || errors.Is(err, context.DeadlineExceeded) {
|
||||
// Do not retry canceled or timed out requests
|
||||
@ -267,6 +269,12 @@ func tryProcessingRequest(w http.ResponseWriter, r *http.Request, targetURL *url
|
||||
ui.backendErrors.Inc()
|
||||
return true
|
||||
}
|
||||
// one time retry trivial network errors, such as proxy idle timeout misconfiguration
|
||||
// or socket close by OS
|
||||
if (netutil.IsTrivialNetworkError(err) || errors.Is(err, io.EOF)) && trivialRetries < 1 {
|
||||
trivialRetries++
|
||||
goto again
|
||||
}
|
||||
// Retry the request if its body wasn't read yet. This usually means that the backend isn't reachable.
|
||||
remoteAddr := httpserver.GetQuotedRemoteAddr(r)
|
||||
// NOTE: do not use httpserver.GetRequestURI
|
||||
@ -434,6 +442,7 @@ func newRoundTripper(caFileOpt, certFileOpt, keyFileOpt, serverNameOpt string, i
|
||||
tr.ResponseHeaderTimeout = *responseTimeout
|
||||
// Automatic compression must be disabled in order to fix https://github.com/VictoriaMetrics/VictoriaMetrics/issues/535
|
||||
tr.DisableCompression = true
|
||||
tr.IdleConnTimeout = *idleConnTimeout
|
||||
tr.MaxIdleConnsPerHost = *maxIdleConnsPerBackend
|
||||
if tr.MaxIdleConns != 0 && tr.MaxIdleConns < tr.MaxIdleConnsPerHost {
|
||||
tr.MaxIdleConns = tr.MaxIdleConnsPerHost
|
||||
|
@ -34,6 +34,8 @@ See also [LTS releases](https://docs.victoriametrics.com/lts-releases/).
|
||||
|
||||
* FEATURE: [alerts-vmagent](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/deployment/docker/alerts-vmagent.yml): add new alerting rules `StreamAggrFlushTimeout` and `StreamAggrDedupFlushTimeout` to notify about issues during stream aggregation.
|
||||
* FEATURE: [dashboards/vmagent](https://grafana.com/grafana/dashboards/12683): add row `Streaming aggregation` with panels related to [streaming aggregation](https://docs.victoriametrics.com/stream-aggregation/) process.
|
||||
* FEATURE: [vmauth](https://docs.victoriametrics.com/vmauth/): add `idleConnTimeout` flag set to 50s by default. It should reduce the probability of `broken pipe` or `connection reset by peer` errors in vmauth logs.
|
||||
* FEATURE: [vmauth](https://docs.victoriametrics.com/vmauth/): add auto request retry for trivial network errors, such as `broken pipe` and `connection reset` for requests to the configured backends.
|
||||
|
||||
* BUGFIX: all VictoriaMetrics components: prioritize `-configAuthKey` and `-reloadAuthKey` over `-httpAuth.*` settings. This change aligns behavior of mentioned flags with other auth flags like `-metricsAuthKey`, `-flagsAuthKey`, `-pprofAuthKey`. Check [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/6329).
|
||||
* BUGFIX: [vmctl](https://docs.victoriametrics.com/vmctl/): add `--disable-progress-bar` global command-line flag. It can be used for disabling dynamic progress bar for all migration modes. `--vm-disable-progress-bar` command-line flag is deprecated and will be removed in the future releases. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/6367).
|
||||
|
@ -1206,6 +1206,8 @@ See the docs at https://docs.victoriametrics.com/vmauth/ .
|
||||
Whether to use proxy protocol for connections accepted at the corresponding -httpListenAddr . See https://www.haproxy.org/download/1.8/doc/proxy-protocol.txt . With enabled proxy protocol http server cannot serve regular /metrics endpoint. Use -pushmetrics.url for metrics pushing
|
||||
Supports array of values separated by comma or specified via multiple flags.
|
||||
Empty values are set to false.
|
||||
-idleConnTimeout duration
|
||||
Defines a duration for idle (keep-alive connections) to exist. Consider setting this value less than "-http.idleConnTimeout". It must prevent possible "write: broken pipe" and "read: connection reset by peer" errors. (default 50s)
|
||||
-internStringCacheExpireDuration duration
|
||||
The expiry duration for caches for interned strings. See https://en.wikipedia.org/wiki/String_interning . See also -internStringMaxLen and -internStringDisableCache (default 6m0s)
|
||||
-internStringDisableCache
|
||||
|
Loading…
Reference in New Issue
Block a user