lib/netutil: close connections in ConnPool if they are idle for more than 30 seconds

Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/2508
This commit is contained in:
Aliaksandr Valialkin 2022-05-02 15:01:50 +03:00
parent 11db05a4ff
commit 190c8b463c
No known key found for this signature in database
GPG Key ID: A72BEC6CD3D0DED1
2 changed files with 61 additions and 5 deletions

View File

@ -33,6 +33,7 @@ The following tip changes can be tested by building VictoriaMetrics components f
* FEATURE: add a handler for `/api/v1/status/buildinfo` endpoint, which is used by Grafana starting from v8.5.0 . See [this pull request](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/2515).
* BUGFIX: export staleness markers as `null` values from [JSON export API](https://docs.victoriametrics.com/#how-to-export-data-in-json-line-format). Previously they were exported as `NaN` values. This could break the exported JSON parsing, since `NaN` values aren't supported by [JSON specification](https://www.json.org/).
* BUGFIX: [VictoriaMetrics cluster](https://docs.victoriametrics.com/Cluster-VictoriaMetrics.html): close `vmselect->vmstorage` connections if they were idle for more than 30 seconds. Expose `vm_tcpdialer_conns_idle` metric at `http://vmselect:8481/metrics` with the number of idle connections to `vmstorage`. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/2508).
* BUGFIX: [vmctl](https://docs.victoriametrics.com/vmctl.html): return non-zero exit code on error. This allows handling `vmctl` errors in shell scripts. Previously `vmctl` was returning 0 exit code on error. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/2322).
* BUGFIX: [vmagent](https://docs.victoriametrics.com/vmagent.html): properly show `scrape_timeout` and `scrape_interval` options at `http://vmagent:8429/config` page. Previously these options weren't displayed even if they were set in `-promscrape.config`.
* BUGFIX: [MetricsQL](https://docs.victoriametrics.com/MetricsQL.html): properly handle joins on time series filtered by values. For example, `kube_pod_container_resource_requests{resource="cpu"} * on (namespace,pod) group_left() (kube_pod_status_phase{phase=~"Pending|Running"}==1)`. This query could result in `duplicate time series on the right side` error even if `==1` filter leaves only a single time series per `(namespace,pod)` labels. Now such query is properly executed.

View File

@ -5,7 +5,9 @@ import (
"sync"
"time"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/fasttime"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/handshake"
"github.com/VictoriaMetrics/metrics"
)
// ConnPool is a connection pool with ZSTD-compressed connections.
@ -17,7 +19,12 @@ type ConnPool struct {
handshakeFunc handshake.Func
compressionLevel int
conns []*handshake.BufferedConn
conns []connWithTimestamp
}
type connWithTimestamp struct {
bc *handshake.BufferedConn
lastActiveTime uint64
}
// NewConnPool creates a new connection pool for the given addr.
@ -26,13 +33,23 @@ type ConnPool struct {
// handshakeFunc is used for handshaking after the connection establishing.
// The compression is disabled if compressionLevel <= 0.
func NewConnPool(name, addr string, handshakeFunc handshake.Func, compressionLevel int) *ConnPool {
return &ConnPool{
cp := &ConnPool{
d: NewTCPDialer(name, addr),
name: name,
handshakeFunc: handshakeFunc,
compressionLevel: compressionLevel,
}
_ = metrics.NewGauge(fmt.Sprintf(`vm_tcpdialer_conns_idle{name=%q, addr=%q}`, name, addr), func() float64 {
cp.mu.Lock()
n := len(cp.conns)
cp.mu.Unlock()
return float64(n)
})
connPoolsMu.Lock()
connPools = append(connPools, cp)
connPoolsMu.Unlock()
return cp
}
// Addr returns the address where connections are established.
@ -45,8 +62,9 @@ func (cp *ConnPool) Get() (*handshake.BufferedConn, error) {
var bc *handshake.BufferedConn
cp.mu.Lock()
if len(cp.conns) > 0 {
bc = cp.conns[len(cp.conns)-1]
cp.conns[len(cp.conns)-1] = nil
c := cp.conns[len(cp.conns)-1]
bc = c.bc
c.bc = nil
cp.conns = cp.conns[:len(cp.conns)-1]
}
cp.mu.Unlock()
@ -78,6 +96,43 @@ func (cp *ConnPool) Put(bc *handshake.BufferedConn) {
return
}
cp.mu.Lock()
cp.conns = append(cp.conns, bc)
cp.conns = append(cp.conns, connWithTimestamp{
bc: bc,
lastActiveTime: fasttime.UnixTimestamp(),
})
cp.mu.Unlock()
}
func (cp *ConnPool) closeIdleConns() {
// Close connections, which were idle for more than 30 seconds.
deadline := fasttime.UnixTimestamp() - 30
var activeConns []connWithTimestamp
cp.mu.Lock()
conns := cp.conns
for _, c := range conns {
if c.lastActiveTime > deadline {
activeConns = append(activeConns, c)
} else {
_ = c.bc.Close()
c.bc = nil
}
}
cp.conns = activeConns
cp.mu.Unlock()
}
func init() {
go func() {
for {
time.Sleep(17 * time.Second)
connPoolsMu.Lock()
for _, cp := range connPools {
cp.closeIdleConns()
}
connPoolsMu.Unlock()
}
}()
}
var connPoolsMu sync.Mutex
var connPools []*ConnPool