VictoriaMetrics/lib/ingestserver/opentsdb/server.go

192 lines
5.7 KiB
Go
Raw Normal View History

2019-05-22 23:16:55 +02:00
package opentsdb
import (
"errors"
"io"
2019-05-22 23:16:55 +02:00
"net"
"net/http"
2019-05-22 23:16:55 +02:00
"strings"
"sync"
"time"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/cgroup"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/ingestserver"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/ingestserver/opentsdbhttp"
2019-05-22 23:16:55 +02:00
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/netutil"
2019-05-22 23:16:55 +02:00
"github.com/VictoriaMetrics/metrics"
)
var (
writeRequestsTCP = metrics.NewCounter(`vm_ingestserver_requests_total{type="opentsdb", name="write", net="tcp"}`)
writeErrorsTCP = metrics.NewCounter(`vm_ingestserver_request_errors_total{type="opentsdb", name="write", net="tcp"}`)
2019-05-22 23:16:55 +02:00
writeRequestsUDP = metrics.NewCounter(`vm_ingestserver_requests_total{type="opentsdb", name="write", net="udp"}`)
writeErrorsUDP = metrics.NewCounter(`vm_ingestserver_request_errors_total{type="opentsdb", name="write", net="udp"}`)
2019-05-22 23:16:55 +02:00
)
// Server is a server for collecting OpenTSDB TCP and UDP metrics.
//
// It accepts simultaneously Telnet put requests and HTTP put requests over TCP.
type Server struct {
addr string
ls *listenerSwitch
httpServer *opentsdbhttp.Server
lnUDP net.PacketConn
wg sync.WaitGroup
cm ingestserver.ConnsMap
}
// MustStart starts OpenTSDB collector on the given addr.
//
// If useProxyProtocol is set to true, then the incoming connections are accepted via proxy protocol.
// See https://www.haproxy.org/download/1.8/doc/proxy-protocol.txt
//
// MustStop must be called on the returned server when it is no longer needed.
func MustStart(addr string, useProxyProtocol bool, telnetInsertHandler func(r io.Reader) error, httpInsertHandler func(req *http.Request) error) *Server {
2019-05-22 23:16:55 +02:00
logger.Infof("starting TCP OpenTSDB collector at %q", addr)
lnTCP, err := netutil.NewTCPListener("opentsdb", addr, useProxyProtocol, nil)
2019-05-22 23:16:55 +02:00
if err != nil {
logger.Fatalf("cannot start TCP OpenTSDB collector at %q: %s", addr, err)
}
ls := newListenerSwitch(lnTCP)
lnHTTP := ls.newHTTPListener()
lnTelnet := ls.newTelnetListener()
httpServer := opentsdbhttp.MustServe(lnHTTP, httpInsertHandler)
2019-05-22 23:16:55 +02:00
logger.Infof("starting UDP OpenTSDB collector at %q", addr)
lnUDP, err := net.ListenPacket(netutil.GetUDPNetwork(), addr)
2019-05-22 23:16:55 +02:00
if err != nil {
logger.Fatalf("cannot start UDP OpenTSDB collector at %q: %s", addr, err)
}
s := &Server{
addr: addr,
ls: ls,
httpServer: httpServer,
lnUDP: lnUDP,
}
vmcluster: re-routing enhancement (#5293) * app/vmstorage: close vminsert connections gradually before stopping storage Implements graceful shutdown approach suggested here - https://github.com/VictoriaMetrics/VictoriaMetrics/issues/4922#issuecomment-1768146878 Test results for this can be found here - https://github.com/VictoriaMetrics/VictoriaMetrics/issues/4922#issuecomment-1790640274 Signed-off-by: Zakhar Bessarab <z.bessarab@victoriametrics.com> * app/vmstorage: update graceful shutdown logic - close connections from vminsert in determenistic order - update flag description - lower default timeout to 25 seconds. 25 seconds value was chosen because the lowest default value used in default configuration deployments is 30s(default value in Kubernetes and ansible-playbooks). Signed-off-by: Zakhar Bessarab <z.bessarab@victoriametrics.com> * docs/cluster: add information about re-routing enhancement during restart Signed-off-by: Zakhar Bessarab <z.bessarab@victoriametrics.com> * docs/changelog: add entry for new command-line flag Signed-off-by: Zakhar Bessarab <z.bessarab@victoriametrics.com> * {app/vmstorage,lib/ingestserver}: address review feedback Signed-off-by: Zakhar Bessarab <z.bessarab@victoriametrics.com> * docs/cluster: add note to update workload scheduler timeout Signed-off-by: Zakhar Bessarab <z.bessarab@victoriametrics.com> * wip --------- Signed-off-by: Zakhar Bessarab <z.bessarab@victoriametrics.com> Co-authored-by: Aliaksandr Valialkin <valyala@victoriametrics.com>
2023-11-14 01:00:42 +01:00
s.cm.Init("opentsdb")
s.wg.Add(1)
2019-05-22 23:16:55 +02:00
go func() {
defer s.wg.Done()
s.serveTelnet(lnTelnet, telnetInsertHandler)
logger.Infof("stopped TCP telnet OpenTSDB server at %q", addr)
2019-05-22 23:16:55 +02:00
}()
s.wg.Add(1)
2019-05-22 23:16:55 +02:00
go func() {
defer s.wg.Done()
httpServer.Wait()
// Do not log when httpServer is stopped, since this is logged by the server itself.
2019-05-22 23:16:55 +02:00
}()
s.wg.Add(1)
go func() {
defer s.wg.Done()
s.serveUDP(telnetInsertHandler)
logger.Infof("stopped UDP OpenTSDB server at %q", addr)
}()
return s
2019-05-22 23:16:55 +02:00
}
// MustStop stops the server.
func (s *Server) MustStop() {
// Stop HTTP server. Do not emit log message, since it is emitted by the httpServer.
s.httpServer.MustStop()
logger.Infof("stopping TCP telnet OpenTSDB server at %q...", s.addr)
if err := s.ls.stop(); err != nil {
logger.Errorf("cannot stop TCP telnet OpenTSDB server: %s", err)
}
logger.Infof("stopping UDP OpenTSDB server at %q...", s.addr)
if err := s.lnUDP.Close(); err != nil {
logger.Errorf("cannot stop UDP OpenTSDB server: %s", err)
}
vmcluster: re-routing enhancement (#5293) * app/vmstorage: close vminsert connections gradually before stopping storage Implements graceful shutdown approach suggested here - https://github.com/VictoriaMetrics/VictoriaMetrics/issues/4922#issuecomment-1768146878 Test results for this can be found here - https://github.com/VictoriaMetrics/VictoriaMetrics/issues/4922#issuecomment-1790640274 Signed-off-by: Zakhar Bessarab <z.bessarab@victoriametrics.com> * app/vmstorage: update graceful shutdown logic - close connections from vminsert in determenistic order - update flag description - lower default timeout to 25 seconds. 25 seconds value was chosen because the lowest default value used in default configuration deployments is 30s(default value in Kubernetes and ansible-playbooks). Signed-off-by: Zakhar Bessarab <z.bessarab@victoriametrics.com> * docs/cluster: add information about re-routing enhancement during restart Signed-off-by: Zakhar Bessarab <z.bessarab@victoriametrics.com> * docs/changelog: add entry for new command-line flag Signed-off-by: Zakhar Bessarab <z.bessarab@victoriametrics.com> * {app/vmstorage,lib/ingestserver}: address review feedback Signed-off-by: Zakhar Bessarab <z.bessarab@victoriametrics.com> * docs/cluster: add note to update workload scheduler timeout Signed-off-by: Zakhar Bessarab <z.bessarab@victoriametrics.com> * wip --------- Signed-off-by: Zakhar Bessarab <z.bessarab@victoriametrics.com> Co-authored-by: Aliaksandr Valialkin <valyala@victoriametrics.com>
2023-11-14 01:00:42 +01:00
s.cm.CloseAll(0)
s.wg.Wait()
logger.Infof("TCP and UDP OpenTSDB servers at %q have been stopped", s.addr)
}
func (s *Server) serveTelnet(ln net.Listener, insertHandler func(r io.Reader) error) {
var wg sync.WaitGroup
2019-05-22 23:16:55 +02:00
for {
c, err := ln.Accept()
if err != nil {
var ne net.Error
if errors.As(err, &ne) {
2019-05-22 23:16:55 +02:00
if ne.Temporary() {
logger.Errorf("opentsdb: temporary error when listening for TCP addr %q: %s", ln.Addr(), err)
2019-05-22 23:16:55 +02:00
time.Sleep(time.Second)
continue
}
if strings.Contains(err.Error(), "use of closed network connection") {
break
}
logger.Fatalf("unrecoverable error when accepting TCP OpenTSDB connections: %s", err)
}
logger.Fatalf("unexpected error when accepting TCP OpenTSDB connections: %s", err)
}
if !s.cm.Add(c) {
_ = c.Close()
break
}
wg.Add(1)
2019-05-22 23:16:55 +02:00
go func() {
defer func() {
s.cm.Delete(c)
_ = c.Close()
wg.Done()
}()
2019-05-22 23:16:55 +02:00
writeRequestsTCP.Inc()
if err := insertHandler(c); err != nil {
writeErrorsTCP.Inc()
logger.Errorf("error in TCP OpenTSDB conn %q<->%q: %s", c.LocalAddr(), c.RemoteAddr(), err)
}
}()
}
wg.Wait()
2019-05-22 23:16:55 +02:00
}
func (s *Server) serveUDP(insertHandler func(r io.Reader) error) {
gomaxprocs := cgroup.AvailableCPUs()
2019-05-22 23:16:55 +02:00
var wg sync.WaitGroup
for i := 0; i < gomaxprocs; i++ {
wg.Add(1)
go func() {
defer wg.Done()
var bb bytesutil.ByteBuffer
bb.B = bytesutil.ResizeNoCopyNoOverallocate(bb.B, 64*1024)
2019-05-22 23:16:55 +02:00
for {
bb.Reset()
bb.B = bb.B[:cap(bb.B)]
n, addr, err := s.lnUDP.ReadFrom(bb.B)
2019-05-22 23:16:55 +02:00
if err != nil {
writeErrorsUDP.Inc()
var ne net.Error
if errors.As(err, &ne) {
2019-05-22 23:16:55 +02:00
if ne.Temporary() {
logger.Errorf("opentsdb: temporary error when listening for UDP addr %q: %s", s.lnUDP.LocalAddr(), err)
2019-05-22 23:16:55 +02:00
time.Sleep(time.Second)
continue
}
if strings.Contains(err.Error(), "use of closed network connection") {
break
}
}
logger.Errorf("cannot read OpenTSDB UDP data: %s", err)
continue
}
bb.B = bb.B[:n]
writeRequestsUDP.Inc()
if err := insertHandler(bb.NewReader()); err != nil {
writeErrorsUDP.Inc()
logger.Errorf("error in UDP OpenTSDB conn %q<->%q: %s", s.lnUDP.LocalAddr(), addr, err)
2019-05-22 23:16:55 +02:00
continue
}
}
}()
}
wg.Wait()
}