From 7301aa678cd9728b77a9e1998492400dcfbe75d9 Mon Sep 17 00:00:00 2001 From: Nikolay Date: Wed, 13 Jul 2022 22:43:18 +0200 Subject: [PATCH] lib/promscrape: adds azure service discovery (#2743) * lib/promscrape: adds azure service discovery Adds azure service discovery mechanism implements authorization with oauth and msi lists virtual machines and virtual machines managed by scaleSet https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1364 * makes linter happy * Apply suggestions from code review Co-authored-by: Roman Khavronenko * wip Co-authored-by: Roman Khavronenko Co-authored-by: Aliaksandr Valialkin --- README.md | 3 + app/vmagent/README.md | 4 + docs/CHANGELOG.md | 1 + docs/README.md | 3 + docs/Single-server-VictoriaMetrics.md | 3 + docs/vmagent.md | 4 + lib/promscrape/config.go | 32 ++ lib/promscrape/discovery/azure/api.go | 273 +++++++++++++ lib/promscrape/discovery/azure/azure.go | 107 +++++ lib/promscrape/discovery/azure/azure_test.go | 49 +++ lib/promscrape/discovery/azure/machine.go | 204 ++++++++++ .../discovery/azure/machine_test.go | 376 ++++++++++++++++++ lib/promscrape/discovery/azure/nic.go | 80 ++++ lib/promscrape/scraper.go | 2 + 14 files changed, 1141 insertions(+) create mode 100644 lib/promscrape/discovery/azure/api.go create mode 100644 lib/promscrape/discovery/azure/azure.go create mode 100644 lib/promscrape/discovery/azure/azure_test.go create mode 100644 lib/promscrape/discovery/azure/machine.go create mode 100644 lib/promscrape/discovery/azure/machine_test.go create mode 100644 lib/promscrape/discovery/azure/nic.go diff --git a/README.md b/README.md index b7d68a63c..fe7efac2d 100644 --- a/README.md +++ b/README.md @@ -316,6 +316,7 @@ VictoriaMetrics can be used as drop-in replacement for Prometheus for scraping t * [kubernetes_sd_config](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#kubernetes_sd_config) * [ec2_sd_config](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#ec2_sd_config) * [gce_sd_config](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#gce_sd_config) +* [azure_sd_config](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#azure_sd_config) * [consul_sd_config](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#consul_sd_config) * [dns_sd_config](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#dns_sd_config) * [openstack_sd_config](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#openstack_sd_config) @@ -2003,6 +2004,8 @@ Pass `-help` to VictoriaMetrics in order to see the list of supported command-li Auth key for /debug/pprof/* endpoints. It must be passed via authKey query arg. It overrides httpAuth.* settings -precisionBits int The number of precision bits to store per each value. Lower precision bits improves data compression at the cost of precision loss (default 64) + -promscrape.azureSDCheckInterval duration + Interval for checking for changes in Azure. This works only if azure_sd_configs is configured in '-promscrape.config' file. See https://prometheus.io/docs/prometheus/latest/configuration/configuration/#azure_sd_config for details (default 1m0s) -promscrape.cluster.memberNum string The number of number in the cluster of scrapers. It must be an unique value in the range 0 ... promscrape.cluster.membersCount-1 across scrapers in the cluster. Can be specified as pod name of Kubernetes StatefulSet - pod-name-Num, where Num is a numeric part of pod name (default "0") -promscrape.cluster.membersCount int diff --git a/app/vmagent/README.md b/app/vmagent/README.md index 8b6296aaa..14a29fe31 100644 --- a/app/vmagent/README.md +++ b/app/vmagent/README.md @@ -166,6 +166,8 @@ The following scrape types in [scrape_config](https://prometheus.io/docs/prometh * if `zone` arg is missing then `vmagent` uses the zone for the instance where it runs; * if `zone` arg equals to `"*"`, then `vmagent` discovers all the zones for the given project; * `zone` may contain a list of zones, i.e. `zone: [us-east1-a, us-east1-b]`. +* `azure_sd_configs` - is for scraping the targets registered in Azure Cloud. + See [azure_sd_config](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#azure_sd_config) for details. * `consul_sd_configs` is for discovering and scraping targets registered in Consul. See [consul_sd_config](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#consul_sd_config) for details. * `dns_sd_configs` is for discovering and scraping targets from DNS records (SRV, A and AAAA). See [dns_sd_config](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#dns_sd_config) for details. * `openstack_sd_configs` is for discovering and scraping OpenStack targets. See [openstack_sd_config](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#openstack_sd_config) for details. [OpenStack identity API v3](https://docs.openstack.org/api-ref/identity/v3/) is supported only. @@ -998,6 +1000,8 @@ See the docs at https://docs.victoriametrics.com/vmagent.html . Trim timestamps for OpenTSDB HTTP data to this duration. Minimum practical duration is 1ms. Higher duration (i.e. 1s) may be used for reducing disk space usage for timestamp data (default 1ms) -pprofAuthKey string Auth key for /debug/pprof/* endpoints. It must be passed via authKey query arg. It overrides httpAuth.* settings + -promscrape.azureSDCheckInterval duration + Interval for checking for changes in Azure. This works only if azure_sd_configs is configured in '-promscrape.config' file. See https://prometheus.io/docs/prometheus/latest/configuration/configuration/#azure_sd_config for details (default 1m0s) -promscrape.cluster.memberNum string The number of number in the cluster of scrapers. It must be an unique value in the range 0 ... promscrape.cluster.membersCount-1 across scrapers in the cluster. Can be specified as pod name of Kubernetes StatefulSet - pod-name-Num, where Num is a numeric part of pod name (default "0") -promscrape.cluster.membersCount int diff --git a/docs/CHANGELOG.md b/docs/CHANGELOG.md index 9a312747b..9c557470f 100644 --- a/docs/CHANGELOG.md +++ b/docs/CHANGELOG.md @@ -19,6 +19,7 @@ The following tip changes can be tested by building VictoriaMetrics components f **Update note 2:** [vmalert](https://docs.victoriametrics.com/vmalert.html) adds `/vmalert/` prefix to [web urls](https://docs.victoriametrics.com/vmalert.html#web) according to [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/2825). This may affect `vmalert` instances with non-empty `-http.pathPrefix` command-line flag. After the update, configuring this flag is no longer needed. Here's [why](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/2799#issuecomment-1171392005). **Update note 3:** this release introduces backwards-incompatible changes to communication protocol between `vmselect` and `vmstorage` nodes in cluster version of VictoriaMetrics because of added ability to query `vmselect` data from other `vmselect` nodes - see [these docs](https://docs.victoriametrics.com/Cluster-VictoriaMetrics.html#multi-level-cluster-setup), so read requests to `vmselect` will fail until the upgrade is complete. These errors will stop after all the `vmselect` and `vmstorage` nodes are updated to the new release. It is safe to downgrade to previous releases at any time. +* FEATURE: [vmagent](https://docs.victoriametrics.com/vmagent.html): add [azure_sd_configs](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#azure_sd_config) service discovery mechanism. It allows discovering Virtual Machines at [Azure Cloud](https://azure.microsoft.com/en-us/). See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1364). * FEATURE: [vmalert](https://docs.victoriametrics.com/vmalert.html): deprecate alert's status link `/api/v1///status` in favour of `api/v1/alert?group_id=&alert_id="`. The old alert's status link is still supported, but will be removed in future releases. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/2825). * FEATURE: [cluster version of VictoriaMetrics](https://docs.victoriametrics.com/Cluster-VictoriaMetrics.html): add support for querying lower-level `vmselect` nodes from upper-level `vmselect` nodes. This makes possible to build multi-level cluster setups for global querying view and HA purposes without the need to use [Promxy](https://github.com/jacksontj/promxy). See [these docs](https://docs.victoriametrics.com/Cluster-VictoriaMetrics.html#multi-level-cluster-setup) and [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/2778). * FEATURE: add `-search.setLookbackToStep` command-line flag, which enables InfluxDB-like gap filling during querying. See [these docs](https://docs.victoriametrics.com/guides/migrate-from-influx.html) for details. diff --git a/docs/README.md b/docs/README.md index b7d68a63c..fe7efac2d 100644 --- a/docs/README.md +++ b/docs/README.md @@ -316,6 +316,7 @@ VictoriaMetrics can be used as drop-in replacement for Prometheus for scraping t * [kubernetes_sd_config](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#kubernetes_sd_config) * [ec2_sd_config](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#ec2_sd_config) * [gce_sd_config](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#gce_sd_config) +* [azure_sd_config](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#azure_sd_config) * [consul_sd_config](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#consul_sd_config) * [dns_sd_config](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#dns_sd_config) * [openstack_sd_config](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#openstack_sd_config) @@ -2003,6 +2004,8 @@ Pass `-help` to VictoriaMetrics in order to see the list of supported command-li Auth key for /debug/pprof/* endpoints. It must be passed via authKey query arg. It overrides httpAuth.* settings -precisionBits int The number of precision bits to store per each value. Lower precision bits improves data compression at the cost of precision loss (default 64) + -promscrape.azureSDCheckInterval duration + Interval for checking for changes in Azure. This works only if azure_sd_configs is configured in '-promscrape.config' file. See https://prometheus.io/docs/prometheus/latest/configuration/configuration/#azure_sd_config for details (default 1m0s) -promscrape.cluster.memberNum string The number of number in the cluster of scrapers. It must be an unique value in the range 0 ... promscrape.cluster.membersCount-1 across scrapers in the cluster. Can be specified as pod name of Kubernetes StatefulSet - pod-name-Num, where Num is a numeric part of pod name (default "0") -promscrape.cluster.membersCount int diff --git a/docs/Single-server-VictoriaMetrics.md b/docs/Single-server-VictoriaMetrics.md index 4b87e3e35..9fa2c6352 100644 --- a/docs/Single-server-VictoriaMetrics.md +++ b/docs/Single-server-VictoriaMetrics.md @@ -320,6 +320,7 @@ VictoriaMetrics can be used as drop-in replacement for Prometheus for scraping t * [kubernetes_sd_config](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#kubernetes_sd_config) * [ec2_sd_config](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#ec2_sd_config) * [gce_sd_config](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#gce_sd_config) +* [azure_sd_config](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#azure_sd_config) * [consul_sd_config](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#consul_sd_config) * [dns_sd_config](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#dns_sd_config) * [openstack_sd_config](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#openstack_sd_config) @@ -2007,6 +2008,8 @@ Pass `-help` to VictoriaMetrics in order to see the list of supported command-li Auth key for /debug/pprof/* endpoints. It must be passed via authKey query arg. It overrides httpAuth.* settings -precisionBits int The number of precision bits to store per each value. Lower precision bits improves data compression at the cost of precision loss (default 64) + -promscrape.azureSDCheckInterval duration + Interval for checking for changes in Azure. This works only if azure_sd_configs is configured in '-promscrape.config' file. See https://prometheus.io/docs/prometheus/latest/configuration/configuration/#azure_sd_config for details (default 1m0s) -promscrape.cluster.memberNum string The number of number in the cluster of scrapers. It must be an unique value in the range 0 ... promscrape.cluster.membersCount-1 across scrapers in the cluster. Can be specified as pod name of Kubernetes StatefulSet - pod-name-Num, where Num is a numeric part of pod name (default "0") -promscrape.cluster.membersCount int diff --git a/docs/vmagent.md b/docs/vmagent.md index 362ce8779..344eb4a65 100644 --- a/docs/vmagent.md +++ b/docs/vmagent.md @@ -170,6 +170,8 @@ The following scrape types in [scrape_config](https://prometheus.io/docs/prometh * if `zone` arg is missing then `vmagent` uses the zone for the instance where it runs; * if `zone` arg equals to `"*"`, then `vmagent` discovers all the zones for the given project; * `zone` may contain a list of zones, i.e. `zone: [us-east1-a, us-east1-b]`. +* `azure_sd_configs` - is for scraping the targets registered in Azure Cloud. + See [azure_sd_config](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#azure_sd_config) for details. * `consul_sd_configs` is for discovering and scraping targets registered in Consul. See [consul_sd_config](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#consul_sd_config) for details. * `dns_sd_configs` is for discovering and scraping targets from DNS records (SRV, A and AAAA). See [dns_sd_config](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#dns_sd_config) for details. * `openstack_sd_configs` is for discovering and scraping OpenStack targets. See [openstack_sd_config](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#openstack_sd_config) for details. [OpenStack identity API v3](https://docs.openstack.org/api-ref/identity/v3/) is supported only. @@ -1002,6 +1004,8 @@ See the docs at https://docs.victoriametrics.com/vmagent.html . Trim timestamps for OpenTSDB HTTP data to this duration. Minimum practical duration is 1ms. Higher duration (i.e. 1s) may be used for reducing disk space usage for timestamp data (default 1ms) -pprofAuthKey string Auth key for /debug/pprof/* endpoints. It must be passed via authKey query arg. It overrides httpAuth.* settings + -promscrape.azureSDCheckInterval duration + Interval for checking for changes in Azure. This works only if azure_sd_configs is configured in '-promscrape.config' file. See https://prometheus.io/docs/prometheus/latest/configuration/configuration/#azure_sd_config for details (default 1m0s) -promscrape.cluster.memberNum string The number of number in the cluster of scrapers. It must be an unique value in the range 0 ... promscrape.cluster.membersCount-1 across scrapers in the cluster. Can be specified as pod name of Kubernetes StatefulSet - pod-name-Num, where Num is a numeric part of pod name (default "0") -promscrape.cluster.membersCount int diff --git a/lib/promscrape/config.go b/lib/promscrape/config.go index 102f95465..86c85b76b 100644 --- a/lib/promscrape/config.go +++ b/lib/promscrape/config.go @@ -21,6 +21,7 @@ import ( "github.com/VictoriaMetrics/VictoriaMetrics/lib/promauth" "github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal" "github.com/VictoriaMetrics/VictoriaMetrics/lib/promrelabel" + "github.com/VictoriaMetrics/VictoriaMetrics/lib/promscrape/discovery/azure" "github.com/VictoriaMetrics/VictoriaMetrics/lib/promscrape/discovery/consul" "github.com/VictoriaMetrics/VictoriaMetrics/lib/promscrape/discovery/digitalocean" "github.com/VictoriaMetrics/VictoriaMetrics/lib/promscrape/discovery/dns" @@ -228,6 +229,7 @@ type ScrapeConfig struct { MetricRelabelConfigs []promrelabel.RelabelConfig `yaml:"metric_relabel_configs,omitempty"` SampleLimit int `yaml:"sample_limit,omitempty"` + AzureSDConfigs []azure.SDConfig `yaml:"azure_sd_configs,omitempty"` ConsulSDConfigs []consul.SDConfig `yaml:"consul_sd_configs,omitempty"` DigitaloceanSDConfigs []digitalocean.SDConfig `yaml:"digitalocean_sd_configs,omitempty"` DNSSDConfigs []dns.SDConfig `yaml:"dns_sd_configs,omitempty"` @@ -273,6 +275,9 @@ func (sc *ScrapeConfig) mustStart(baseDir string) { } func (sc *ScrapeConfig) mustStop() { + for i := range sc.AzureSDConfigs { + sc.AzureSDConfigs[i].MustStop() + } for i := range sc.ConsulSDConfigs { sc.ConsulSDConfigs[i].MustStop() } @@ -450,6 +455,33 @@ func getSWSByJob(sws []*ScrapeWork) map[string][]*ScrapeWork { return m } +// getAzureSDScrapeWork returns `azure_sd_configs` ScrapeWork from cfg. +func (cfg *Config) getAzureSDScrapeWork(prev []*ScrapeWork) []*ScrapeWork { + swsPrevByJob := getSWSByJob(prev) + dst := make([]*ScrapeWork, 0, len(prev)) + for _, sc := range cfg.ScrapeConfigs { + dstLen := len(dst) + ok := true + for j := range sc.AzureSDConfigs { + sdc := &sc.AzureSDConfigs[j] + var okLocal bool + dst, okLocal = appendSDScrapeWork(dst, sdc, cfg.baseDir, sc.swc, "azure_sd_config") + if ok { + ok = okLocal + } + } + if ok { + continue + } + swsPrev := swsPrevByJob[sc.swc.jobName] + if len(swsPrev) > 0 { + logger.Errorf("there were errors when discovering azure targets for job %q, so preserving the previous targets", sc.swc.jobName) + dst = append(dst[:dstLen], swsPrev...) + } + } + return dst +} + // getConsulSDScrapeWork returns `consul_sd_configs` ScrapeWork from cfg. func (cfg *Config) getConsulSDScrapeWork(prev []*ScrapeWork) []*ScrapeWork { swsPrevByJob := getSWSByJob(prev) diff --git a/lib/promscrape/discovery/azure/api.go b/lib/promscrape/discovery/azure/api.go new file mode 100644 index 000000000..d328706c8 --- /dev/null +++ b/lib/promscrape/discovery/azure/api.go @@ -0,0 +1,273 @@ +package azure + +import ( + "encoding/json" + "fmt" + "io/ioutil" + "net/url" + "os" + "strconv" + "strings" + "sync" + "time" + + "github.com/VictoriaMetrics/VictoriaMetrics/lib/logger" + "github.com/VictoriaMetrics/VictoriaMetrics/lib/promauth" + "github.com/VictoriaMetrics/VictoriaMetrics/lib/promscrape/discoveryutils" + "github.com/VictoriaMetrics/fasthttp" +) + +var configMap = discoveryutils.NewConfigMap() + +// Extract from the needed params from https://github.com/Azure/go-autorest/blob/7dd32b67be4e6c9386b9ba7b1c44a51263f05270/autorest/azure/environments.go#L61 +type cloudEnvironmentEndpoints struct { + ActiveDirectoryEndpoint string `json:"activeDirectoryEndpoint"` + ResourceManagerEndpoint string `json:"resourceManagerEndpoint"` +} + +// well-known azure cloud endpoints +// See https://github.com/Azure/go-autorest/blob/7dd32b67be4e6c9386b9ba7b1c44a51263f05270/autorest/azure/environments.go#L34 +var cloudEnvironments = map[string]*cloudEnvironmentEndpoints{ + "AZURECHINACLOUD": { + ActiveDirectoryEndpoint: "https://login.chinacloudapi.cn", + ResourceManagerEndpoint: "https://management.chinacloudapi.cn", + }, + "AZUREGERMANCLOUD": { + ActiveDirectoryEndpoint: "https://login.microsoftonline.de", + ResourceManagerEndpoint: "https://management.microsoftazure.de", + }, + "AZURECLOUD": { + ActiveDirectoryEndpoint: "https://login.microsoftonline.com", + ResourceManagerEndpoint: "https://management.azure.com", + }, + "AZUREPUBLICCLOUD": { + ActiveDirectoryEndpoint: "https://login.microsoftonline.com", + ResourceManagerEndpoint: "https://management.azure.com", + }, + "AZUREUSGOVERNMENT": { + ActiveDirectoryEndpoint: "https://login.microsoftonline.us", + ResourceManagerEndpoint: "https://management.usgovcloudapi.net", + }, + "AZUREUSGOVERNMENTCLOUD": { + ActiveDirectoryEndpoint: "https://login.microsoftonline.us", + ResourceManagerEndpoint: "https://management.usgovcloudapi.net", + }, +} + +// apiConfig contains config for API server. +type apiConfig struct { + c *discoveryutils.Client + port int + resourceGroup string + subscriptionID string + tenantID string + + refreshToken refreshTokenFunc + // tokenLock guards auth token and tokenExpireDeadline + tokenLock sync.Mutex + token string + tokenExpireDeadline time.Time +} + +type refreshTokenFunc func() (string, time.Duration, error) + +func getAPIConfig(sdc *SDConfig, baseDir string) (*apiConfig, error) { + v, err := configMap.Get(sdc, func() (interface{}, error) { return newAPIConfig(sdc, baseDir) }) + if err != nil { + return nil, err + } + return v.(*apiConfig), nil +} + +func newAPIConfig(sdc *SDConfig, baseDir string) (*apiConfig, error) { + if sdc.SubscriptionID == "" { + return nil, fmt.Errorf("missing `subscription_id` config option") + } + port := sdc.Port + if port == 0 { + port = 80 + } + + ac, err := sdc.HTTPClientConfig.NewConfig(baseDir) + if err != nil { + return nil, fmt.Errorf("cannot parse auth config: %w", err) + } + proxyAC, err := sdc.ProxyClientConfig.NewConfig(baseDir) + if err != nil { + return nil, fmt.Errorf("cannot parse proxy auth config: %w", err) + } + + environment := sdc.Environment + if environment == "" { + environment = "AZURECLOUD" + } + env, err := getCloudEnvByName(environment) + if err != nil { + return nil, fmt.Errorf("cannot read configs for `environment: %q`: %w", environment, err) + } + + refreshToken, err := getRefreshTokenFunc(sdc, ac, proxyAC, env) + if err != nil { + return nil, err + } + c, err := discoveryutils.NewClient(env.ResourceManagerEndpoint, ac, sdc.ProxyURL, proxyAC) + if err != nil { + return nil, fmt.Errorf("cannot create client for %q: %w", env.ResourceManagerEndpoint, err) + } + cfg := &apiConfig{ + c: c, + port: port, + resourceGroup: sdc.ResourceGroup, + subscriptionID: sdc.SubscriptionID, + tenantID: sdc.TenantID, + + refreshToken: refreshToken, + } + return cfg, nil +} + +func getCloudEnvByName(name string) (*cloudEnvironmentEndpoints, error) { + name = strings.ToUpper(name) + // Special case, azure cloud k8s cluster, read content from file. + // See https://github.com/Azure/go-autorest/blob/7dd32b67be4e6c9386b9ba7b1c44a51263f05270/autorest/azure/environments.go#L301 + if name == "AZURESTACKCLOUD" { + return readCloudEndpointsFromFile(os.Getenv("AZURE_ENVIRONMENT_FILEPATH")) + } + env := cloudEnvironments[name] + if env == nil { + var supportedEnvs []string + for envName := range cloudEnvironments { + supportedEnvs = append(supportedEnvs, envName) + } + return nil, fmt.Errorf("unsupported `environment: %q`; supported values: %s", name, strings.Join(supportedEnvs, ",")) + } + return env, nil +} + +func readCloudEndpointsFromFile(filePath string) (*cloudEnvironmentEndpoints, error) { + data, err := ioutil.ReadFile(filePath) + if err != nil { + return nil, fmt.Errorf("cannot file %q: %w", filePath, err) + } + var cee cloudEnvironmentEndpoints + if err := json.Unmarshal(data, &cee); err != nil { + return nil, fmt.Errorf("cannot parse cloud environment endpoints from file %q: %w", filePath, err) + } + return &cee, nil +} + +func getRefreshTokenFunc(sdc *SDConfig, ac, proxyAC *promauth.Config, env *cloudEnvironmentEndpoints) (refreshTokenFunc, error) { + var tokenEndpoint, tokenAPIPath string + var modifyRequest func(request *fasthttp.Request) + authenticationMethod := sdc.AuthenticationMethod + if authenticationMethod == "" { + authenticationMethod = "OAuth" + } + switch strings.ToLower(authenticationMethod) { + case "oauth": + if sdc.TenantID == "" { + return nil, fmt.Errorf("missing `tenant_id` config option for `authentication_method: Oauth`") + } + if sdc.ClientID == "" { + return nil, fmt.Errorf("missing `client_id` config option for `authentication_method: OAuth`") + } + if sdc.ClientSecret.String() == "" { + return nil, fmt.Errorf("missing `client_secrect` config option for `authentication_method: OAuth`") + } + q := url.Values{ + "grant_type": []string{"client_credentials"}, + "client_id": []string{sdc.ClientID}, + "client_secret": []string{sdc.ClientSecret.String()}, + "resource": []string{env.ResourceManagerEndpoint}, + } + authParams := q.Encode() + tokenAPIPath = "/" + sdc.TenantID + "/oauth2/token" + tokenEndpoint = env.ActiveDirectoryEndpoint + modifyRequest = func(request *fasthttp.Request) { + request.SetBodyString(authParams) + request.Header.SetMethod("POST") + } + case "managedidentity": + endpoint := "http://169.254.169.254/metadata/identity/oauth2/token" + if ep := os.Getenv("MSI_ENDPOINT"); ep != "" { + endpoint = ep + } + endpointURL, err := url.Parse(endpoint) + if err != nil { + return nil, fmt.Errorf("cannot parse MSI endpoint url %q: %w", endpoint, err) + } + q := endpointURL.Query() + + msiSecret := os.Getenv("MSI_SECRET") + clientIDParam := "client_id" + apiVersion := "2018-02-01" + if msiSecret != "" { + clientIDParam = "clientid" + apiVersion = "2017-09-01" + } + q.Set("api-version", apiVersion) + q.Set(clientIDParam, sdc.ClientID) + q.Set("resource", env.ResourceManagerEndpoint) + endpointURL.RawQuery = q.Encode() + tokenAPIPath = endpointURL.RequestURI() + tokenEndpoint = endpointURL.Scheme + "://" + endpointURL.Host + modifyRequest = func(request *fasthttp.Request) { + if msiSecret != "" { + request.Header.Set("secret", msiSecret) + } else { + request.Header.Set("Metadata", "true") + } + } + default: + return nil, fmt.Errorf("unsupported `authentication_method: %q` only `OAuth` and `ManagedIdentity` are supported", authenticationMethod) + } + + authClient, err := discoveryutils.NewClient(tokenEndpoint, ac, sdc.ProxyURL, proxyAC) + if err != nil { + return nil, fmt.Errorf("cannot build auth client: %w", err) + } + refreshToken := func() (string, time.Duration, error) { + data, err := authClient.GetAPIResponseWithReqParams(tokenAPIPath, modifyRequest) + if err != nil { + return "", 0, err + } + var tr tokenResponse + if err := json.Unmarshal(data, &tr); err != nil { + return "", 0, fmt.Errorf("cannot parse token auth response %q: %w", string(data), err) + } + expiresInSeconds, err := strconv.ParseInt(tr.ExpiresIn, 10, 64) + if err != nil { + return "", 0, fmt.Errorf("cannot parse expiresIn param in token auth %q: %w", tr.ExpiresIn, err) + } + return tr.AccessToken, time.Second * time.Duration(expiresInSeconds), nil + } + return refreshToken, nil +} + +// mustGetAuthToken returns auth token +// in case of error, logs error and return empty token +func (ac *apiConfig) mustGetAuthToken() string { + ac.tokenLock.Lock() + defer ac.tokenLock.Unlock() + + ct := time.Now() + if ac.tokenExpireDeadline.Sub(ct) > time.Second*30 { + return ac.token + } + token, expiresDuration, err := ac.refreshToken() + if err != nil { + logger.Errorf("cannot refresh azure auth token: %s", err) + return "" + } + ac.token = token + ac.tokenExpireDeadline = ct.Add(expiresDuration) + return ac.token +} + +// tokenResponse represent response from oauth2 azure token service +// +// https://docs.microsoft.com/en-us/azure/active-directory/managed-identities-azure-resources/how-to-use-vm-token#get-a-token-using-go +type tokenResponse struct { + AccessToken string `json:"access_token"` + ExpiresIn string `json:"expires_in"` +} diff --git a/lib/promscrape/discovery/azure/azure.go b/lib/promscrape/discovery/azure/azure.go new file mode 100644 index 000000000..fb669676d --- /dev/null +++ b/lib/promscrape/discovery/azure/azure.go @@ -0,0 +1,107 @@ +package azure + +import ( + "flag" + "fmt" + "strings" + "time" + + "github.com/VictoriaMetrics/VictoriaMetrics/lib/promauth" + "github.com/VictoriaMetrics/VictoriaMetrics/lib/promscrape/discoveryutils" + "github.com/VictoriaMetrics/VictoriaMetrics/lib/proxy" +) + +// SDCheckInterval is check interval for Azure service discovery. +var SDCheckInterval = flag.Duration("promscrape.azureSDCheckInterval", 60*time.Second, "Interval for checking for changes in Azure. "+ + "This works only if azure_sd_configs is configured in '-promscrape.config' file. "+ + "See https://prometheus.io/docs/prometheus/latest/configuration/configuration/#azure_sd_config for details") + +// SDConfig represents service discovery config for Azure. +// +// See https://prometheus.io/docs/prometheus/latest/configuration/configuration/#azure_sd_config +type SDConfig struct { + Environment string `yaml:"environment,omitempty"` + + // AuthenticationMethod can be either Oauth or ManagedIdentity. + // See https://docs.microsoft.com/en-us/azure/active-directory/managed-identities-azure-resources/overview + AuthenticationMethod string `yaml:"authentication_method,omitempty"` + + SubscriptionID string `yaml:"subscription_id"` + TenantID string `yaml:"tenant_id,omitempty"` + ClientID string `yaml:"client_id,omitempty"` + ClientSecret *promauth.Secret `yaml:"client_secret,omitempty"` + ResourceGroup string `yaml:"resource_group,omitempty"` + + // RefreshInterval time.Duration `yaml:"refresh_interval"` + // refresh_interval is obtained from `-promscrape.azureSDCheckInterval` command-line option. + + Port int `yaml:"port"` + + HTTPClientConfig promauth.HTTPClientConfig `yaml:",inline"` + ProxyURL *proxy.URL `yaml:"proxy_url,omitempty"` + ProxyClientConfig promauth.ProxyClientConfig `yaml:",inline"` +} + +// GetLabels returns Consul labels according to sdc. +func (sdc *SDConfig) GetLabels(baseDir string) ([]map[string]string, error) { + ac, err := getAPIConfig(sdc, baseDir) + if err != nil { + return nil, fmt.Errorf("cannot get API config: %w", err) + } + vms, err := getVirtualMachines(ac) + if err != nil { + return nil, err + } + return appendMachineLabels(vms, ac.port, sdc), nil +} + +// MustStop stops further usage for sdc. +func (sdc *SDConfig) MustStop() { + configMap.Delete(sdc) +} + +func appendMachineLabels(vms []virtualMachine, port int, sdc *SDConfig) []map[string]string { + ms := make([]map[string]string, 0, len(vms)) + for i := range vms { + vm := &vms[i] + for _, ips := range vm.ipAddresses { + if ips.privateIP == "" { + continue + } + addr := discoveryutils.JoinHostPort(ips.privateIP, port) + m := map[string]string{ + "__address__": addr, + "__meta_azure_subscription_id": sdc.SubscriptionID, + "__meta_azure_machine_id": vm.ID, + "__meta_azure_machine_name": vm.Name, + "__meta_azure_machine_location": vm.Location, + "__meta_azure_machine_private_ip": ips.privateIP, + } + if sdc.TenantID != "" { + m["__meta_azure_tenant_id"] = sdc.TenantID + } + // /subscriptions/SUBSCRIPTION_ID/resourceGroups/RESOURCE_GROUP/providers/PROVIDER/TYPE/NAME + idPath := strings.Split(vm.ID, "/") + if len(idPath) > 4 { + m["__meta_azure_machine_resource_group"] = idPath[4] + } + if vm.Properties.StorageProfile.OsDisk.OsType != "" { + m["__meta_azure_machine_os_type"] = vm.Properties.StorageProfile.OsDisk.OsType + } + if vm.Properties.OsProfile.ComputerName != "" { + m["__meta_azure_machine_computer_name"] = vm.Properties.OsProfile.ComputerName + } + if ips.publicIP != "" { + m["__meta_azure_machine_public_ip"] = ips.publicIP + } + if vm.scaleSet != "" { + m["__meta_azure_machine_scale_set"] = vm.scaleSet + } + for k, v := range vm.Tags { + m[discoveryutils.SanitizeLabelName("__meta_azure_machine_tag_"+k)] = v + } + ms = append(ms, m) + } + } + return ms +} diff --git a/lib/promscrape/discovery/azure/azure_test.go b/lib/promscrape/discovery/azure/azure_test.go new file mode 100644 index 000000000..2660e2cf2 --- /dev/null +++ b/lib/promscrape/discovery/azure/azure_test.go @@ -0,0 +1,49 @@ +package azure + +import ( + "reflect" + "testing" + + "github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal" + "github.com/VictoriaMetrics/VictoriaMetrics/lib/promscrape/discoveryutils" +) + +func TestAppendMachineLabels(t *testing.T) { + f := func(name string, vms []virtualMachine, expectedLabels [][]prompbmarshal.Label) { + t.Run(name, func(t *testing.T) { + labelss := appendMachineLabels(vms, 80, &SDConfig{SubscriptionID: "some-id"}) + var sortedLabelss [][]prompbmarshal.Label + for _, labels := range labelss { + sortedLabelss = append(sortedLabelss, discoveryutils.GetSortedLabels(labels)) + } + if !reflect.DeepEqual(sortedLabelss, expectedLabels) { + t.Fatalf("unexpected labels:\ngot\n%v\nwant\n%v", sortedLabelss, expectedLabels) + } + }) + } + f("single vm", []virtualMachine{ + { + Name: "vm-1", + ID: "id-2", + Type: "Azure", + Location: "eu-west-1", + Properties: virtualMachineProperties{OsProfile: osProfile{ComputerName: "test-1"}, StorageProfile: storageProfile{OsDisk: osDisk{OsType: "Linux"}}}, + Tags: map[string]string{"key-1": "value-1"}, + ipAddresses: []vmIPAddress{ + {privateIP: "10.10.10.1"}, + }, + }, + }, [][]prompbmarshal.Label{ + discoveryutils.GetSortedLabels(map[string]string{ + "__address__": "10.10.10.1:80", + "__meta_azure_machine_id": "id-2", + "__meta_azure_subscription_id": "some-id", + "__meta_azure_machine_os_type": "Linux", + "__meta_azure_machine_name": "vm-1", + "__meta_azure_machine_computer_name": "test-1", + "__meta_azure_machine_location": "eu-west-1", + "__meta_azure_machine_private_ip": "10.10.10.1", + "__meta_azure_machine_tag_key_1": "value-1", + }), + }) +} diff --git a/lib/promscrape/discovery/azure/machine.go b/lib/promscrape/discovery/azure/machine.go new file mode 100644 index 000000000..b2fe24f81 --- /dev/null +++ b/lib/promscrape/discovery/azure/machine.go @@ -0,0 +1,204 @@ +package azure + +import ( + "encoding/json" + "fmt" + "sync" + + "github.com/VictoriaMetrics/VictoriaMetrics/lib/cgroup" + "github.com/VictoriaMetrics/fasthttp" +) + +// virtualMachine represents an Azure virtual machine (which can also be created by a VMSS) +type virtualMachine struct { + ID string `json:"id,omitempty"` + Name string `json:"name,omitempty"` + Type string `json:"type,omitempty"` + Location string `json:"location,omitempty"` + Properties virtualMachineProperties `json:"properties,omitempty"` + Tags map[string]string `json:"tags,omitempty"` + // enriched during service discovery + scaleSet string + ipAddresses []vmIPAddress +} + +type vmIPAddress struct { + publicIP string + privateIP string +} + +type virtualMachineProperties struct { + NetworkProfile networkProfile `json:"networkProfile,omitempty"` + OsProfile osProfile `json:"osProfile,omitempty"` + StorageProfile storageProfile `json:"storageProfile,omitempty"` +} + +type storageProfile struct { + OsDisk osDisk `json:"osDisk,omitempty"` +} + +type osDisk struct { + OsType string `json:"osType,omitempty"` +} + +type osProfile struct { + ComputerName string `json:"computerName,omitempty"` +} +type networkProfile struct { + // NetworkInterfaces - Specifies the list of resource Ids for the network interfaces associated with the virtual machine. + NetworkInterfaces []networkInterfaceReference `json:"networkInterfaces,omitempty"` +} + +type networkInterfaceReference struct { + ID string `json:"id,omitempty"` +} + +// listAPIResponse generic response from list api +type listAPIResponse struct { + NextLink string `json:"nextLink"` + Value []json.RawMessage `json:"value"` +} + +// visitAllAPIObjects iterates over list API with pagination and applies cb for each response object +func visitAllAPIObjects(ac *apiConfig, apiURL string, cb func(data json.RawMessage) error) error { + nextLink := apiURL + for nextLink != "" { + resp, err := ac.c.GetAPIResponseWithReqParams(nextLink, func(request *fasthttp.Request) { + request.Header.Set("Authorization", "Bearer "+ac.mustGetAuthToken()) + }) + if err != nil { + return fmt.Errorf("cannot execute azure api request at %s: %w", nextLink, err) + } + var lar listAPIResponse + if err := json.Unmarshal(resp, &lar); err != nil { + return fmt.Errorf("cannot parse azure api response %q obtained from %s: %w", resp, nextLink, err) + } + for i := range lar.Value { + if err := cb(lar.Value[i]); err != nil { + return err + } + } + nextLink = lar.NextLink + } + return nil +} + +// getVirtualMachines +func getVirtualMachines(ac *apiConfig) ([]virtualMachine, error) { + vms, err := listVMs(ac) + if err != nil { + return nil, fmt.Errorf("cannot list virtual machines: %w", err) + } + scaleSetRefs, err := listScaleSetRefs(ac) + if err != nil { + return nil, fmt.Errorf("cannot list scaleSets: %w", err) + } + ssvms, err := listScaleSetVMs(ac, scaleSetRefs) + if err != nil { + return nil, fmt.Errorf("cannot list virtual machines for scaleSets: %w", err) + } + vms = append(vms, ssvms...) + if err := enrichVirtualMachinesNetworkInterfaces(ac, vms); err != nil { + return nil, fmt.Errorf("cannot discover network interfaces for virtual machines: %w", err) + } + return vms, nil +} + +func enrichVirtualMachinesNetworkInterfaces(ac *apiConfig, vms []virtualMachine) error { + concurrency := cgroup.AvailableCPUs() * 10 + workCh := make(chan *virtualMachine, concurrency) + resultCh := make(chan error, concurrency) + var wg sync.WaitGroup + for i := 0; i < concurrency; i++ { + wg.Add(1) + go func() { + defer wg.Done() + for vm := range workCh { + err := enrichVMNetworkInterfaces(ac, vm) + resultCh <- err + } + }() + } + wg.Add(1) + go func() { + defer wg.Done() + for i := range vms { + workCh <- &vms[i] + } + close(workCh) + }() + var firstErr error + for range vms { + err := <-resultCh + if err != nil && firstErr == nil { + firstErr = err + } + } + wg.Wait() + return firstErr +} + +// See https://docs.microsoft.com/en-us/rest/api/compute/virtual-machines/list-all +func listVMs(ac *apiConfig) ([]virtualMachine, error) { + // https://management.azure.com/subscriptions/{subscriptionId}/providers/Microsoft.Compute/virtualMachines?api-version=2022-03-01 + apiURL := "/subscriptions/" + ac.subscriptionID + if ac.resourceGroup != "" { + // special case filter by resourceGroup + // https://management.azure.com/subscriptions/{subscriptionId}/resourceGroups/{resourceGroupName}/providers/Microsoft.Compute/virtualMachines?api-version=2022-03-01 + apiURL += "/resourceGroups/" + ac.resourceGroup + } + apiURL += "/providers/Microsoft.Compute/virtualMachines?api-version=2022-03-01" + var vms []virtualMachine + err := visitAllAPIObjects(ac, apiURL, func(data json.RawMessage) error { + var vm virtualMachine + if err := json.Unmarshal(data, &vm); err != nil { + return fmt.Errorf("cannot parse virtualMachine list API response %q: %w", data, err) + } + vms = append(vms, vm) + return nil + }) + return vms, err +} + +type scaleSet struct { + Name string `json:"name"` + ID string `json:"id"` +} + +// See https://docs.microsoft.com/en-us/rest/api/compute/virtual-machine-scale-sets/list-all +func listScaleSetRefs(ac *apiConfig) ([]scaleSet, error) { + // https://management.azure.com/subscriptions/{subscriptionId}/providers/Microsoft.Compute/virtualMachineScaleSets?api-version=2022-03-01 + apiURL := "/subscriptions/" + ac.subscriptionID + "/providers/Microsoft.Compute/virtualMachineScaleSets?api-version=2022-03-01" + var sss []scaleSet + err := visitAllAPIObjects(ac, apiURL, func(data json.RawMessage) error { + var ss scaleSet + if err := json.Unmarshal(data, &ss); err != nil { + return fmt.Errorf("cannot parse scaleSet list API response %q: %w", data, err) + } + sss = append(sss, ss) + return nil + }) + return sss, err +} + +// See https://docs.microsoft.com/en-us/rest/api/compute/virtual-machine-scale-set-vms/list +func listScaleSetVMs(ac *apiConfig, sss []scaleSet) ([]virtualMachine, error) { + // https://management.azure.com/subscriptions/{subscriptionId}/resourceGroups/{resourceGroupName}/providers/Microsoft.Compute/virtualMachineScaleSets/{virtualMachineScaleSetName}/virtualMachines?api-version=2022-03-01 + var vms []virtualMachine + for _, ss := range sss { + apiURI := ss.ID + "/virtualMachines?api-version=2022-03-01" + err := visitAllAPIObjects(ac, apiURI, func(data json.RawMessage) error { + var vm virtualMachine + if err := json.Unmarshal(data, &vm); err != nil { + return fmt.Errorf("cannot parse virtualMachine list API response %q: %w", data, err) + } + vm.scaleSet = ss.Name + vms = append(vms, vm) + return nil + }) + if err != nil { + return nil, err + } + } + return vms, nil +} diff --git a/lib/promscrape/discovery/azure/machine_test.go b/lib/promscrape/discovery/azure/machine_test.go new file mode 100644 index 000000000..f2ba253ca --- /dev/null +++ b/lib/promscrape/discovery/azure/machine_test.go @@ -0,0 +1,376 @@ +package azure + +import ( + "fmt" + "net/http" + "net/http/httptest" + "reflect" + "strings" + "testing" + "time" + + "github.com/VictoriaMetrics/VictoriaMetrics/lib/promscrape/discoveryutils" +) + +func TestGetVirtualMachinesSuccess(t *testing.T) { + prettifyVMs := func(src []virtualMachine) string { + var sb strings.Builder + for idx, vm := range src { + fmt.Fprintf(&sb, `idx: %d, vm: Name: %q, ID: %q, Location: %q, Type: %q, ComputerName: %q, OsType: %q, scaleSet: %q`, + idx, vm.Name, vm.ID, vm.Location, vm.Type, vm.Properties.OsProfile.ComputerName, vm.Properties.StorageProfile.OsDisk.OsType, vm.scaleSet) + if vm.Tags != nil { + fmt.Fprint(&sb, " vmtags: ") + } + for tagK, tagV := range vm.Tags { + fmt.Fprintf(&sb, `%q: %q, `, tagK, tagV) + } + if len(vm.Properties.NetworkProfile.NetworkInterfaces) > 0 { + fmt.Fprint(&sb, " network ints: ") + } + for idx, nic := range vm.Properties.NetworkProfile.NetworkInterfaces { + fmt.Fprintf(&sb, " idx %d, ID: %q", idx, nic.ID) + } + if len(vm.ipAddresses) > 0 { + fmt.Fprint(&sb, " ip addresses: ") + } + for idx, ip := range vm.ipAddresses { + fmt.Fprintf(&sb, "idx: %d, PrivateIP: %q, PublicIP: %q", idx, ip.privateIP, ip.publicIP) + } + fmt.Fprintf(&sb, "\n") + } + return sb.String() + } + f := func(name string, expectedVMs []virtualMachine, apiResponses [4]string) { + t.Run(name, func(t *testing.T) { + testServer := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + switch { + // list vms response + case strings.Contains(r.URL.Path, "/providers/Microsoft.Compute/virtualMachines"): + w.WriteHeader(http.StatusOK) + fmt.Fprintf(w, apiResponses[0]) + // list scaleSets response + case strings.Contains(r.URL.RequestURI(), "/providers/Microsoft.Compute/virtualMachineScaleSets?api-version=2022-03-01"): + w.WriteHeader(http.StatusOK) + fmt.Fprintf(w, apiResponses[1]) + // list scalesets vms response + case strings.Contains(r.URL.Path, "/providers/Microsoft.Compute/virtualMachineScaleSets/{virtualMachineScaleSetName}/virtualMach"): + w.WriteHeader(http.StatusOK) + fmt.Fprintf(w, apiResponses[2]) + // nic response + case strings.Contains(r.URL.Path, "/networkInterfaces/"): + w.WriteHeader(http.StatusOK) + fmt.Fprintf(w, apiResponses[3]) + default: + w.WriteHeader(http.StatusNotFound) + fmt.Fprintf(w, "API path not found: %s", r.URL.Path) + } + })) + defer testServer.Close() + c, err := discoveryutils.NewClient(testServer.URL, nil, nil, nil) + if err != nil { + t.Fatalf("unexpected error at client create: %s", err) + } + ac := &apiConfig{ + c: c, + subscriptionID: "some-id", + refreshToken: func() (string, time.Duration, error) { + return "auth-token", 0, nil + }, + } + gotVMs, err := getVirtualMachines(ac) + if err != nil { + t.Fatalf("unexpected error: %s", err) + } + if !reflect.DeepEqual(gotVMs, expectedVMs) { + t.Fatalf("unexpected test result\ngot:\n%s\nwant:\n%s", prettifyVMs(gotVMs), prettifyVMs(expectedVMs)) + } + }) + } + f("discover single vm", []virtualMachine{ + { + Name: "{virtualMachineName}", ID: "/subscriptions/{subscriptionId}/resourceGroups/{resourceGroupName}/providers/Microsoft.Compute/virtualMachines/{virtualMachineName}", + Location: "eastus", Type: "Microsoft.Compute/virtualMachines", + Properties: virtualMachineProperties{ + NetworkProfile: networkProfile{NetworkInterfaces: []networkInterfaceReference{{ID: "/subscriptions/{subscriptionId}/resourceGroups/{resourceGroupName}/providers/Microsoft.Network/networkInterfaces/{networkInterfaceName}"}}}, + OsProfile: osProfile{ComputerName: "Test"}, + StorageProfile: storageProfile{OsDisk: osDisk{OsType: "Windows"}}, + }, + ipAddresses: []vmIPAddress{ + {publicIP: "20.30.40.50", privateIP: "172.20.2.4"}, + }, + Tags: map[string]string{}, + }, + }, [4]string{ + ` +{ + "value": [ + { "id": "/some-vm/id", + "properties": { + "vmId": "{vmId}", + "storageProfile": { + "imageReference": { + "publisher": "MicrosoftWindowsServer", + "offer": "WindowsServer", + "sku": "2012-R2-Datacenter", + "version": "4.127.20170406", + "exactVersion": "aaaaaaaaaaaaa", + "sharedGalleryImageId": "aaaaaaaaaaaaaaa", + "communityGalleryImageId": "aaaa", + "id": "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" + }, + "osDisk": { + "osType": "Windows", + "name": "test", + "createOption": "FromImage", + "vhd": { + "uri": "https://{storageAccountName}.blob.core.windows.net/{containerName}/{vhdName}.vhd" + }, + "caching": "None", + "diskSizeGB": 127, + "encryptionSettings": { + "diskEncryptionKey": { + "secretUrl": "aaaaaaaaa", + "sourceVault": { + "id": "/subscriptions/{subscriptionId}/resourceGroups/{resourceGroupName}/providers/Microsoft.Compute/availabilitySets/{availabilitySetName}" + } + }, + "keyEncryptionKey": { + "keyUrl": "aaaaaaaaaaaaa", + "sourceVault": { + "id": "/subscriptions/{subscriptionId}/resourceGroups/{resourceGroupName}/providers/Microsoft.Compute/availabilitySets/{availabilitySetName}" + } + }, + "enabled": true + }, + "image": { + "uri": "https://{storageAccountName}.blob.core.windows.net/{containerName}/{vhdName}.vhd" + }, + "writeAcceleratorEnabled": true, + "diffDiskSettings": { + "option": "Local", + "placement": "CacheDisk" + }, + "managedDisk": { + "storageAccountType": "Standard_LRS", + "diskEncryptionSet": { + "id": "aaaaaaaaaaaaaaaaaaaaaaaaaaaa" + }, + "securityProfile": { + "securityEncryptionType": "VMGuestStateOnly", + "diskEncryptionSet": { + "id": "aaaaaaaaaaaaaaaaaaaaaaaaaaaa" + } + }, + "id": "/subscriptions/{subscription-id}/resourceGroups/myResourceGroup/providers/Microsoft.Compute/disks/testingexcludedisk_OsDisk_1_74cdaedcea50483d9833c96adefa100f" + }, + "deleteOption": "Delete" + }, + "dataDisks": [] + }, + "osProfile": { + "computerName": "Test", + "adminUsername": "Foo12" + }, + "networkProfile": { + "networkInterfaces": [ + { + "id": "/subscriptions/{subscriptionId}/resourceGroups/{resourceGroupName}/providers/Microsoft.Network/networkInterfaces/{networkInterfaceName}", + "properties": { + "primary": true, + "deleteOption": "Delete" + } + } + ], + "networkApiVersion": "2020-11-01" + } + }, + "type": "Microsoft.Compute/virtualMachines", + "location": "eastus", + "tags": {}, + "id": "/subscriptions/{subscriptionId}/resourceGroups/{resourceGroupName}/providers/Microsoft.Compute/virtualMachines/{virtualMachineName}", + "name": "{virtualMachineName}" + } + ], + "nextLink": "" +}`, + `{}`, + `{}`, + `{ + "name": "test-nic", + "properties": { + "ipConfigurations": [ + { + "name": "ipconfig1", + "properties": { + "privateIPAddress": "172.20.2.4", + "publicIPAddress": { + "properties": { + "ipAddress": "20.30.40.50" + } + }, + "primary": true + } + } + ], + "primary": true + }, + "type": "Microsoft.Network/networkInterfaces" +}`, + }) + + f("discover vm with scaleSet", []virtualMachine{ + { + Name: "{vmss-vm-name}", ID: "/subscriptions/{subscription-id}/resourceGroups/myResourceGroup/providers/Microsoft.Compute/virtualMachineScaleSets/{vmss-name}/virtualMachines/0", + Location: "westus", Type: "Microsoft.Compute/virtualMachines", + Properties: virtualMachineProperties{ + NetworkProfile: networkProfile{NetworkInterfaces: []networkInterfaceReference{ + {ID: "/subscriptions/{subscription-id}/resourceGroups/myResourceGroup/providers/Microsoft.Compute/virtualMachineScaleSets/{vmss-name}/virtualMachines/0/networkInterfaces/vmsstestnetconfig5415"}, + {ID: "/subscriptions/{subscription-id}/resourceGroups/myResourceGroup/providers/Microsoft.Compute/virtualMachineScaleSets/{vmss-name}/virtualMachines/0/networkInterfaces/vmsstestnetconfig5415"}, + }}, + OsProfile: osProfile{ComputerName: "test000000"}, + StorageProfile: storageProfile{OsDisk: osDisk{OsType: "Windows"}}, + }, + scaleSet: "{virtualMachineScaleSetName}", + ipAddresses: []vmIPAddress{ + {publicIP: "20.30.40.50", privateIP: "172.20.2.4"}, + {publicIP: "20.30.40.50", privateIP: "172.20.2.4"}, + }, + Tags: map[string]string{}, + }, + { + Name: "{vmss-vm-name}", ID: "/subscriptions/{subscription-id}/resourceGroups/myResourceGroup/providers/Microsoft.Compute/virtualMachineScaleSets/{vmss-name}/virtualMachines/15", + Location: "westp", Type: "Microsoft.Compute/virtualMachines", + Properties: virtualMachineProperties{ + NetworkProfile: networkProfile{NetworkInterfaces: []networkInterfaceReference{ + {ID: "/subscriptions/{subscription-id}/resourceGroups/myResourceGroup/providers/Microsoft.Compute/virtualMachineScaleSets/{vmss-name}/virtualMachines/0/networkInterfaces/vmsstestnetconfig5415"}, + }}, + OsProfile: osProfile{ComputerName: "test-15"}, + StorageProfile: storageProfile{OsDisk: osDisk{OsType: "Linux"}}, + }, + scaleSet: "{virtualMachineScaleSetName}", + ipAddresses: []vmIPAddress{ + {publicIP: "20.30.40.50", privateIP: "172.20.2.4"}, + }, + Tags: map[string]string{}, + }, + }, [4]string{ + `{}`, + `{ + "value": [ + { + "sku": { + "tier": "Standard", + "capacity": 3, + "name": "Standard_D1_v2" + }, + "location": "westus", + "properties": { }, + "id": "/subscriptions/{subscription-id}/resourceGroups/{resourceGroupName}/providers/Microsoft.Compute/virtualMachineScaleSets/{virtualMachineScaleSetName}", + "name": "{virtualMachineScaleSetName}", + "type": "Microsoft.Compute/virtualMachineScaleSets", + "tags": { + "key8425": "aaa" + } + } + ], + "nextLink": "" +}`, + ` +{ + "value": [ + { + "name": "{vmss-vm-name}", + "id": "/subscriptions/{subscription-id}/resourceGroups/myResourceGroup/providers/Microsoft.Compute/virtualMachineScaleSets/{vmss-name}/virtualMachines/0", + "type": "Microsoft.Compute/virtualMachines", + "location": "westus", + "tags": {}, + "properties": { + "storageProfile": { + "osDisk": { + "osType": "Windows" + } + }, + "osProfile": { + "computerName": "test000000" + }, + "networkProfile": { + "networkInterfaces": [ + { + "id": "/subscriptions/{subscription-id}/resourceGroups/myResourceGroup/providers/Microsoft.Compute/virtualMachineScaleSets/{vmss-name}/virtualMachines/0/networkInterfaces/vmsstestnetconfig5415", + "properties": { + "primary": true, + "deleteOption": "Delete" + } + }, + { + "id": "/subscriptions/{subscription-id}/resourceGroups/myResourceGroup/providers/Microsoft.Compute/virtualMachineScaleSets/{vmss-name}/virtualMachines/0/networkInterfaces/vmsstestnetconfig5415", + "properties": { + "primary": true, + "deleteOption": "Delete" + } + } + ] + }, + "licenseType": "aaaaaaaaaa", + "protectionPolicy": { + "protectFromScaleIn": true, + "protectFromScaleSetActions": true + } + } + }, + { + "name": "{vmss-vm-name}", + "id": "/subscriptions/{subscription-id}/resourceGroups/myResourceGroup/providers/Microsoft.Compute/virtualMachineScaleSets/{vmss-name}/virtualMachines/15", + "type": "Microsoft.Compute/virtualMachines", + "location": "westp", + "tags": {}, + "properties": { + "storageProfile": { + "osDisk": { + "osType": "Linux" + } + }, + "osProfile": { + "computerName": "test-15" + }, + "networkProfile": { + "networkInterfaces": [ + { + "id": "/subscriptions/{subscription-id}/resourceGroups/myResourceGroup/providers/Microsoft.Compute/virtualMachineScaleSets/{vmss-name}/virtualMachines/0/networkInterfaces/vmsstestnetconfig5415", + "properties": { + "primary": true, + "deleteOption": "Delete" + } + } + ] + }, + "licenseType": "aaaaaaaaaa" + } + } + + ], + "nextLink": "" +}`, + `{ + "name": "test-nic", + "properties": { + "ipConfigurations": [ + { + "name": "ipconfig1", + "properties": { + "privateIPAddress": "172.20.2.4", + "publicIPAddress": { + "properties": { + "ipAddress": "20.30.40.50" + } + }, + "primary": true + } + } + ], + "primary": true + }, + "type": "Microsoft.Network/networkInterfaces" +}`, + }) +} diff --git a/lib/promscrape/discovery/azure/nic.go b/lib/promscrape/discovery/azure/nic.go new file mode 100644 index 000000000..a93c57577 --- /dev/null +++ b/lib/promscrape/discovery/azure/nic.go @@ -0,0 +1,80 @@ +package azure + +import ( + "encoding/json" + "fmt" + + "github.com/VictoriaMetrics/fasthttp" +) + +// networkInterface a network interface in a resource group. +type networkInterface struct { + Properties networkProperties `json:"properties,omitempty"` +} + +type networkProperties struct { + // Primary - Gets whether this is a primary network interface on a virtual machine. + Primary bool `json:"primary,omitempty"` + IPConfigurations []ipConfiguration `json:"ipConfigurations,omitempty"` +} + +type ipConfiguration struct { + Properties ipProperties `json:"properties,omitempty"` +} + +type ipProperties struct { + PublicIPAddress publicIPAddress `json:"publicIPAddress,omitempty"` + PrivateIPAddress string `json:"privateIPAddress,omitempty"` +} + +type publicIPAddress struct { + Properties publicIPProperties `json:"properties,omitempty"` +} + +type publicIPProperties struct { + IPAddress string `json:"ipAddress,omitempty"` +} + +func enrichVMNetworkInterfaces(ac *apiConfig, vm *virtualMachine) error { + for _, nicRef := range vm.Properties.NetworkProfile.NetworkInterfaces { + isScaleSetVM := vm.scaleSet != "" + nic, err := getNIC(ac, nicRef.ID, isScaleSetVM) + if err != nil { + return err + } + // only primary interface is relevant for us + // mimic Prometheus logic + if nic.Properties.Primary { + for _, ipCfg := range nic.Properties.IPConfigurations { + vm.ipAddresses = append(vm.ipAddresses, vmIPAddress{ + publicIP: ipCfg.Properties.PublicIPAddress.Properties.IPAddress, + privateIP: ipCfg.Properties.PrivateIPAddress, + }) + } + } + } + return nil +} + +// See https://docs.microsoft.com/en-us/rest/api/virtualnetwork/network-interfaces/get +func getNIC(ac *apiConfig, id string, isScaleSetVM bool) (*networkInterface, error) { + // https://management.azure.com/subscriptions/{subscriptionId}/resourceGroups/{resourceGroupName}/providers/Microsoft.Network/networkInterfaces/{networkInterfaceName}?api-version=2021-08-01 + apiQueryParams := "api-version=2021-08-01&$expand=ipConfigurations/publicIPAddress" + // special case for VMs managed by ScaleSet + // it's not documented at API docs. + if isScaleSetVM { + apiQueryParams = "api-version=2021-03-01&$expand=ipConfigurations/publicIPAddress" + } + apiURL := id + "?" + apiQueryParams + resp, err := ac.c.GetAPIResponseWithReqParams(apiURL, func(request *fasthttp.Request) { + request.Header.Set("Authorization", "Bearer "+ac.mustGetAuthToken()) + }) + if err != nil { + return nil, fmt.Errorf("cannot execute api request at %s :%w", apiURL, err) + } + var nic networkInterface + if err := json.Unmarshal(resp, &nic); err != nil { + return nil, fmt.Errorf("cannot parse network-interface api response %q: %w", resp, err) + } + return &nic, nil +} diff --git a/lib/promscrape/scraper.go b/lib/promscrape/scraper.go index c7ca6b08c..d72edad5d 100644 --- a/lib/promscrape/scraper.go +++ b/lib/promscrape/scraper.go @@ -12,6 +12,7 @@ import ( "github.com/VictoriaMetrics/VictoriaMetrics/lib/logger" "github.com/VictoriaMetrics/VictoriaMetrics/lib/procutil" "github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal" + "github.com/VictoriaMetrics/VictoriaMetrics/lib/promscrape/discovery/azure" "github.com/VictoriaMetrics/VictoriaMetrics/lib/promscrape/discovery/consul" "github.com/VictoriaMetrics/VictoriaMetrics/lib/promscrape/discovery/digitalocean" "github.com/VictoriaMetrics/VictoriaMetrics/lib/promscrape/discovery/dns" @@ -110,6 +111,7 @@ func runScraper(configFile string, pushData func(wr *prompbmarshal.WriteRequest) cfg.mustStart() scs := newScrapeConfigs(pushData, globalStopCh) + scs.add("azure_sd_configs", *azure.SDCheckInterval, func(cfg *Config, swsPrev []*ScrapeWork) []*ScrapeWork { return cfg.getAzureSDScrapeWork(swsPrev) }) scs.add("consul_sd_configs", *consul.SDCheckInterval, func(cfg *Config, swsPrev []*ScrapeWork) []*ScrapeWork { return cfg.getConsulSDScrapeWork(swsPrev) }) scs.add("digitalocean_sd_configs", *digitalocean.SDCheckInterval, func(cfg *Config, swsPrev []*ScrapeWork) []*ScrapeWork { return cfg.getDigitalOceanDScrapeWork(swsPrev) }) scs.add("dns_sd_configs", *dns.SDCheckInterval, func(cfg *Config, swsPrev []*ScrapeWork) []*ScrapeWork { return cfg.getDNSSDScrapeWork(swsPrev) })