2020-02-23 12:35:47 +01:00
package promscrape
import (
2020-11-01 22:12:13 +01:00
"context"
2020-02-23 12:35:47 +01:00
"flag"
"fmt"
2020-11-01 22:12:13 +01:00
"io"
"net/http"
2020-12-24 09:56:10 +01:00
"net/url"
2020-02-23 12:35:47 +01:00
"strings"
"time"
2024-04-03 10:01:43 +02:00
"github.com/VictoriaMetrics/metrics"
2020-11-26 17:08:39 +01:00
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
2020-08-16 16:05:52 +02:00
"github.com/VictoriaMetrics/VictoriaMetrics/lib/flagutil"
2024-07-15 23:00:14 +02:00
"github.com/VictoriaMetrics/VictoriaMetrics/lib/netutil"
2020-02-23 12:35:47 +01:00
)
var (
2021-11-03 21:26:56 +01:00
maxResponseHeadersSize = flagutil . NewBytes ( "promscrape.maxResponseHeadersSize" , 4096 , "The maximum size of http response headers from Prometheus scrape targets" )
disableCompression = flag . Bool ( "promscrape.disableCompression" , false , "Whether to disable sending 'Accept-Encoding: gzip' request headers to all the scrape targets. " +
2020-07-02 13:19:11 +02:00
"This may reduce CPU usage on scrape targets at the cost of higher network bandwidth utilization. " +
2023-05-10 09:50:41 +02:00
"It is possible to set 'disable_compression: true' individually per each 'scrape_config' section in '-promscrape.config' for fine-grained control" )
2020-07-02 13:19:11 +02:00
disableKeepAlive = flag . Bool ( "promscrape.disableKeepAlive" , false , "Whether to disable HTTP keep-alive connections when scraping all the targets. " +
"This may be useful when targets has no support for HTTP keep-alive connection. " +
2023-05-10 09:50:41 +02:00
"It is possible to set 'disable_keepalive: true' individually per each 'scrape_config' section in '-promscrape.config' for fine-grained control. " +
2020-07-02 13:19:11 +02:00
"Note that disabling HTTP keep-alive may increase load on both vmagent and scrape targets" )
2020-11-01 22:12:13 +01:00
streamParse = flag . Bool ( "promscrape.streamParse" , false , "Whether to enable stream parsing for metrics obtained from scrape targets. This may be useful " +
"for reducing memory usage when millions of metrics are exposed per each scrape target. " +
2023-05-10 09:50:41 +02:00
"It is possible to set 'stream_parse: true' individually per each 'scrape_config' section in '-promscrape.config' for fine-grained control" )
2020-02-23 12:35:47 +01:00
)
type client struct {
2024-01-30 16:51:44 +01:00
c * http . Client
2023-02-09 20:13:06 +01:00
ctx context . Context
2021-04-05 11:15:07 +02:00
scrapeURL string
scrapeTimeoutSecondsStr string
2023-10-17 11:58:19 +02:00
setHeaders func ( req * http . Request ) error
setProxyHeaders func ( req * http . Request ) error
2024-06-20 13:58:42 +02:00
maxScrapeSize int64
2022-11-30 06:22:12 +01:00
}
2023-10-17 11:58:19 +02:00
func newClient ( ctx context . Context , sw * ScrapeWork ) ( * client , error ) {
2024-04-03 23:46:40 +02:00
ac := sw . AuthConfig
2024-01-30 16:51:44 +01:00
setHeaders := func ( req * http . Request ) error {
return sw . AuthConfig . SetHeaders ( req , true )
}
2024-04-02 22:16:24 +02:00
setProxyHeaders := func ( _ * http . Request ) error {
2024-01-30 16:51:44 +01:00
return nil
}
2024-08-19 22:31:18 +02:00
dialFunc := netutil . NewStatDialFunc ( "vm_promscrape" )
2021-04-03 23:40:08 +02:00
proxyURL := sw . ProxyURL
2021-10-16 12:18:20 +02:00
var proxyURLFunc func ( * http . Request ) ( * url . URL , error )
2024-08-19 22:31:18 +02:00
if proxyURL != nil {
// case for direct http proxy connection.
// must be used for http based scrape targets
// since standard golang http.transport has special case for it
if strings . HasPrefix ( sw . ScrapeURL , "http://" ) {
if proxyURL . URL . Scheme == "https" {
ac = sw . ProxyAuthConfig
}
proxyURLFunc = http . ProxyURL ( proxyURL . URL )
setProxyHeaders = func ( req * http . Request ) error {
return proxyURL . SetHeaders ( sw . ProxyAuthConfig , req )
}
} else {
// HTTP-Connect or socks5 proxy tunnel
// it makes possible to use separate tls configurations
// for proxy and backend connections
proxyDial , err := proxyURL . NewDialFunc ( sw . ProxyAuthConfig )
if err != nil {
return nil , fmt . Errorf ( "cannot create dialer for proxy_url=%q connection: %w" , proxyURL , err )
}
dialFunc = netutil . NewStatDialFuncWithDial ( "vm_promscrape" , proxyDial )
}
2021-10-16 12:18:20 +02:00
}
2024-08-19 22:31:18 +02:00
2024-01-30 16:51:44 +01:00
hc := & http . Client {
2024-04-03 23:46:40 +02:00
Transport : ac . NewRoundTripper ( & http . Transport {
Proxy : proxyURLFunc ,
TLSHandshakeTimeout : 10 * time . Second ,
IdleConnTimeout : 2 * sw . ScrapeInterval ,
DisableCompression : * disableCompression || sw . DisableCompression ,
DisableKeepAlives : * disableKeepAlive || sw . DisableKeepAlive ,
2024-08-19 22:31:18 +02:00
DialContext : dialFunc ,
2024-04-03 23:46:40 +02:00
MaxIdleConnsPerHost : 100 ,
MaxResponseHeaderBytes : int64 ( maxResponseHeadersSize . N ) ,
} ) ,
Timeout : sw . ScrapeTimeout ,
2021-10-16 12:18:20 +02:00
}
if sw . DenyRedirects {
2024-04-02 22:16:24 +02:00
hc . CheckRedirect = func ( _ * http . Request , _ [ ] * http . Request ) error {
2021-10-16 12:18:20 +02:00
return http . ErrUseLastResponse
2021-04-02 18:56:38 +02:00
}
2020-11-01 22:12:13 +01:00
}
2023-06-05 16:31:58 +02:00
2023-10-25 23:19:33 +02:00
c := & client {
2024-01-30 16:51:44 +01:00
c : hc ,
2023-02-09 20:13:06 +01:00
ctx : ctx ,
2021-04-05 11:15:07 +02:00
scrapeURL : sw . ScrapeURL ,
scrapeTimeoutSecondsStr : fmt . Sprintf ( "%.3f" , sw . ScrapeTimeout . Seconds ( ) ) ,
2024-01-30 16:51:44 +01:00
setHeaders : setHeaders ,
setProxyHeaders : setProxyHeaders ,
2024-06-20 13:58:42 +02:00
maxScrapeSize : sw . MaxScrapeSize ,
2023-10-25 23:19:33 +02:00
}
return c , nil
2020-02-23 12:35:47 +01:00
}
2024-01-30 16:51:44 +01:00
func ( c * client ) ReadData ( dst * bytesutil . ByteBuffer ) error {
deadline := time . Now ( ) . Add ( c . c . Timeout )
2023-02-09 20:13:06 +01:00
ctx , cancel := context . WithDeadline ( c . ctx , deadline )
2023-02-23 03:58:44 +01:00
req , err := http . NewRequestWithContext ( ctx , http . MethodGet , c . scrapeURL , nil )
2020-11-01 22:12:13 +01:00
if err != nil {
cancel ( )
2024-01-30 16:51:44 +01:00
return fmt . Errorf ( "cannot create request for %q: %w" , c . scrapeURL , err )
2020-11-01 22:12:13 +01:00
}
// The following `Accept` header has been copied from Prometheus sources.
// See https://github.com/prometheus/prometheus/blob/f9d21f10ecd2a343a381044f131ea4e46381ce09/scrape/scrape.go#L532 .
// This is needed as a workaround for scraping stupid Java-based servers such as Spring Boot.
// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/608 for details.
// Do not bloat the `Accept` header with OpenMetrics shit, since it looks like dead standard now.
req . Header . Set ( "Accept" , "text/plain;version=0.0.4;q=1,*/*;q=0.1" )
2021-04-05 11:15:07 +02:00
// Set X-Prometheus-Scrape-Timeout-Seconds like Prometheus does, since it is used by some exporters such as PushProx.
// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1179#issuecomment-813117162
req . Header . Set ( "X-Prometheus-Scrape-Timeout-Seconds" , c . scrapeTimeoutSecondsStr )
2024-01-30 16:51:44 +01:00
req . Header . Set ( "User-Agent" , "vm_promscrape" )
2023-10-25 23:19:33 +02:00
if err := c . setHeaders ( req ) ; err != nil {
2023-10-17 11:58:19 +02:00
cancel ( )
2024-01-30 16:51:44 +01:00
return fmt . Errorf ( "failed to set request headers for %q: %w" , c . scrapeURL , err )
2023-10-17 11:58:19 +02:00
}
2023-10-25 23:19:33 +02:00
if err := c . setProxyHeaders ( req ) ; err != nil {
2023-10-17 11:58:19 +02:00
cancel ( )
2024-01-30 16:51:44 +01:00
return fmt . Errorf ( "failed to set proxy request headers for %q: %w" , c . scrapeURL , err )
2023-10-17 11:58:19 +02:00
}
2022-08-16 13:52:38 +02:00
scrapeRequests . Inc ( )
2024-01-30 16:51:44 +01:00
resp , err := c . c . Do ( req )
2020-11-01 22:12:13 +01:00
if err != nil {
cancel ( )
2024-01-30 16:51:44 +01:00
if ue , ok := err . ( * url . Error ) ; ok && ue . Timeout ( ) {
scrapesTimedout . Inc ( )
}
return fmt . Errorf ( "cannot perform request to %q: %w" , c . scrapeURL , err )
2020-11-01 22:12:13 +01:00
}
if resp . StatusCode != http . StatusOK {
metrics . GetOrCreateCounter ( fmt . Sprintf ( ` vm_promscrape_scrapes_total { status_code="%d"} ` , resp . StatusCode ) ) . Inc ( )
2022-08-21 23:13:44 +02:00
respBody , _ := io . ReadAll ( resp . Body )
2020-11-01 22:12:13 +01:00
_ = resp . Body . Close ( )
cancel ( )
2024-01-30 16:51:44 +01:00
return fmt . Errorf ( "unexpected status code returned when scraping %q: %d; expecting %d; response body: %q" ,
2020-11-01 22:12:13 +01:00
c . scrapeURL , resp . StatusCode , http . StatusOK , respBody )
}
scrapesOK . Inc ( )
2024-01-30 16:51:44 +01:00
// Read the data from resp.Body
r := & io . LimitedReader {
R : resp . Body ,
2024-06-20 13:58:42 +02:00
N : c . maxScrapeSize ,
2022-05-03 12:31:31 +02:00
}
2024-01-30 16:51:44 +01:00
_ , err = dst . ReadFrom ( r )
_ = resp . Body . Close ( )
cancel ( )
2020-02-23 12:35:47 +01:00
if err != nil {
2024-01-30 16:51:44 +01:00
if ue , ok := err . ( * url . Error ) ; ok && ue . Timeout ( ) {
2020-02-23 12:35:47 +01:00
scrapesTimedout . Inc ( )
2020-05-24 13:41:08 +02:00
}
2024-01-30 16:51:44 +01:00
return fmt . Errorf ( "cannot read data from %s: %w" , c . scrapeURL , err )
2020-02-23 12:35:47 +01:00
}
2024-06-20 13:58:42 +02:00
if int64 ( len ( dst . B ) ) >= c . maxScrapeSize {
2022-12-28 21:19:41 +01:00
maxScrapeSizeExceeded . Inc ( )
2024-07-16 12:24:14 +02:00
return fmt . Errorf ( "the response from %q exceeds -promscrape.maxScrapeSize or max_scrape_size in the scrape config (%d bytes). " +
2024-06-20 13:58:42 +02:00
"Possible solutions are: reduce the response size for the target, increase -promscrape.maxScrapeSize command-line flag, " +
2024-10-18 11:35:23 +02:00
"increase max_scrape_size value in scrape config for the given target" , c . scrapeURL , c . maxScrapeSize )
2022-12-28 21:19:41 +01:00
}
2024-01-30 16:51:44 +01:00
return nil
2020-02-23 12:35:47 +01:00
}
var (
2021-09-23 13:47:20 +02:00
maxScrapeSizeExceeded = metrics . NewCounter ( ` vm_promscrape_max_scrape_size_exceeded_errors_total ` )
scrapesTimedout = metrics . NewCounter ( ` vm_promscrape_scrapes_timed_out_total ` )
scrapesOK = metrics . NewCounter ( ` vm_promscrape_scrapes_total { status_code="200"} ` )
2022-08-16 13:52:38 +02:00
scrapeRequests = metrics . NewCounter ( ` vm_promscrape_scrape_requests_total ` )
2020-02-23 12:35:47 +01:00
)