lib/promscrape/discovery/yandexcloud: follow-up for 070abe5c71

- Obtain IAM token via GCE-like API instead of Amazon EC2 IMDSv2 API,
  since it looks like IMDBSv2 API isn't supported by Yandex Cloud
  according to https://yandex.cloud/en/docs/security/standard/authentication#aws-token :

  > So far, Yandex Cloud does not support version 2, so it is strongly recommended
  > to technically disable getting a service account token via the Amazon EC2 metadata service.

- Try obtaining IAM token via GCE-like API at first and then fall back to the deprecated Amazon EC2 IMDBSv1.
  This should prevent from auth errors for instances with disabled GCE-like auth API.
  This addresses @ITD27M01 concern at https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5513#issuecomment-1867794884

- Make more clear the description of the change at docs/CHANGELOG.md , add reference to the related issue.

P.S. This change wasn't tested in prod because I have no access to Yandex Cloud.
It is recommended to test this change by @ITD27M01 and @vmazgo , who filed
the issue https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5513

Updates https://github.com/VictoriaMetrics/VictoriaMetrics/pull/6524
This commit is contained in:
Aliaksandr Valialkin 2024-07-16 17:49:55 +02:00
parent 6aed628f04
commit 4304950391
No known key found for this signature in database
GPG Key ID: 52C003EE2BCDB9EB
2 changed files with 68 additions and 48 deletions

View File

@ -41,7 +41,7 @@ See also [LTS releases](https://docs.victoriametrics.com/lts-releases/).
* FEATURE: [vmauth](https://docs.victoriametrics.com/vmauth/): reduces CPU usage by reusing request body buffer. Allows to disable requests caching with `-maxRequestBodySizeToRetry=0`. See this [PR](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/6533) for details.
* FEATURE: [dashboards](https://grafana.com/orgs/victoriametrics): add [Grafana dashboard](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/dashboards/vmauth.json) and [alerting rules](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/deployment/docker/alerts-vmauth.yml) for [vmauth](https://docs.victoriametrics.com/vmauth/) dashboard. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/4313) for details.
* FEATURE: [vmauth](https://docs.victoriametrics.com/vmauth/): reduces CPU usage by reusing request body buffer. Allows to disable requests caching with `-maxRequestBodySizeToRetry=0`. See this [PR](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/6533) for details.
* FEATURE: [vmagent](https://docs.victoriametrics.com/vmagent/): added `yandexcloud_sd` AWS API IMDSv2 support.
* FEATURE: [vmagent](https://docs.victoriametrics.com/vmagent/): [`yandexcloud_sd_configs`](https://docs.victoriametrics.com/sd_configs/#yandexcloud_sd_configs): add support for obtaining IAM token in [GCE format](https://yandex.cloud/en-ru/docs/compute/operations/vm-connect/auth-inside-vm#auth-inside-vm) additionally to the [deprecated Amazon EC2 IMDSv1 format](https://yandex.cloud/en/docs/security/standard/authentication#aws-token). See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5513).
* FEATURE: [vmagent](https://docs.victoriametrics.com/vmagent/) and [Single-node VictoriaMetrics](https://docs.victoriametrics.com/): add `-graphite.sanitizeMetricName` cmd-line flag for sanitizing metrics ingested via [Graphite protocol](https://docs.victoriametrics.com/#how-to-send-data-from-graphite-compatible-agents-such-as-statsd). See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/6077).
* FEATURE: [streaming aggregation](https://docs.victoriametrics.com/stream-aggregation/): expose the following metrics at `/metrics` page of [vmagent](https://docs.victoriametrics.com/vmagent/) and [single-node VictoriaMetrics](https://docs.victoriametrics.com/):
* `vm_streamaggr_matched_samples_total` - the number of input samples matched by the corresponding aggregation rule

View File

@ -7,7 +7,6 @@ import (
"io"
"net/http"
"net/url"
"strconv"
"sync"
"time"
@ -37,10 +36,6 @@ type apiConfig struct {
// credsLock protects the refresh of creds
credsLock sync.Mutex
creds *apiCredentials
// metadataCredsLock protects the refresh of metadataCreds
metadataCredsLock sync.Mutex
metadataCreds *apiCredentials
}
func getAPIConfig(sdc *SDConfig, baseDir string) (*apiConfig, error) {
@ -124,55 +119,80 @@ func getCreds(cfg *apiConfig) (*apiCredentials, error) {
}, nil
}
// getMetadataCreds gets Yandex Cloud IAM metadata token
func getMetadataCreds(cfg *apiConfig) (*apiCredentials, error) {
cfg.metadataCredsLock.Lock()
defer cfg.metadataCredsLock.Unlock()
if cfg.metadataCreds != nil && time.Until(cfg.metadataCreds.Expiration) > 10*time.Second {
// Credentials aren't expired yet.
return cfg.metadataCreds, nil
}
endpoint := "http://169.254.169.254/latest/api/token"
req, err := http.NewRequest(http.MethodPut, endpoint, nil)
if err != nil {
return nil, fmt.Errorf("cannot create metadata token request: %w", err)
}
ttl := 1800
expiration := time.Now().Add(time.Duration(ttl) * time.Second)
req.Header.Add("X-aws-ec2-metadata-token-ttl-seconds", strconv.Itoa(ttl))
resp, err := cfg.client.Do(req)
if err != nil {
return nil, fmt.Errorf("cannot perform metadata token request: %w", err)
}
data, err := readResponseBody(resp, endpoint)
if err != nil {
return nil, fmt.Errorf("cannot read metadata creds from %s: %w", endpoint, err)
}
return &apiCredentials{
Token: string(data),
Expiration: expiration,
}, nil
}
// getInstanceCreds gets Yandex Cloud IAM token using instance Service Account
//
// See https://cloud.yandex.com/en-ru/docs/compute/operations/vm-connect/auth-inside-vm
func getInstanceCreds(cfg *apiConfig) (*apiCredentials, error) {
metadataCreds, err := getMetadataCreds(cfg)
if err != nil {
return nil, err
// Try obtaining GCE-like creds at first.
// See https://yandex.cloud/en-ru/docs/compute/operations/vm-connect/auth-inside-vm#auth-inside-vm
creds, err := getGCEInstanceCreds(cfg)
if err == nil {
return creds, nil
}
endpoint := "http://169.254.169.254/latest/meta-data/iam/security-credentials/default"
errGCE := err
// Fall back to the disabled IMDSv1 - see https://yandex.cloud/en/docs/security/standard/authentication#aws-token
//
// TODO: remove this when it is completely removed from Yandex Cloud.
// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5513
// and https://yandex.cloud/en/docs/security/standard/authentication#aws-token
creds, err = getEC2IMDBSv1Creds(cfg)
if err == nil {
return creds, nil
}
// Return errGCE, since it is likely the IMDBSv1 is disabled.
return nil, errGCE
}
// getGCEInstanceCreds gets Yandex Cloud IAM token using GCE API
//
// See https://yandex.cloud/en-ru/docs/compute/operations/vm-connect/auth-inside-vm#auth-inside-vm
func getGCEInstanceCreds(cfg *apiConfig) (*apiCredentials, error) {
endpoint := "http://169.254.169.254/computeMetadata/v1/instance/service-accounts/default/token"
req, err := http.NewRequest(http.MethodGet, endpoint, nil)
if err != nil {
return nil, fmt.Errorf("cannot create instance creds request: %w", err)
logger.Panicf("BUG: cannot create GCE token request for %s: %s", endpoint, err)
}
req.Header.Add("X-aws-ec2-metadata-token", metadataCreds.Token)
req.Header.Add("Metadata-Flavor", "Google")
resp, err := cfg.client.Do(req)
if err != nil {
return nil, fmt.Errorf("cannot read instance creds from %s: %w", endpoint, err)
return nil, fmt.Errorf("cannot obtain GCE token from %s: %w", endpoint, err)
}
data, err := readResponseBody(resp, endpoint)
if err != nil {
return nil, fmt.Errorf("cannot read GCE token from %s: %w", endpoint, err)
}
var ac gceAPICredentials
if err := json.Unmarshal(data, &ac); err != nil {
return nil, fmt.Errorf("cannot unmarshal GCE token from %s: %w; data=%s", endpoint, err, data)
}
if ac.TokenType != "Bearer" {
return nil, fmt.Errorf("unsupported GCE token type received from %s: %q; supported: %q", endpoint, ac.TokenType, "Bearer")
}
expiration := time.Now().Add(time.Duration(ac.ExpiresIn) * time.Second)
return &apiCredentials{
Token: ac.AccessToken,
Expiration: expiration,
}, nil
}
// See https://yandex.cloud/en-ru/docs/compute/operations/vm-connect/auth-inside-vm#auth-inside-vm
type gceAPICredentials struct {
AccessToken string `json:"access_token"`
ExpiresIn int `json:"expires_in"`
TokenType string `json:"token_type"`
}
// getEC2IMDBSv1Creds gets Yandex Cloud IAM token using Amazon EC2 IMDBSv1
func getEC2IMDBSv1Creds(cfg *apiConfig) (*apiCredentials, error) {
endpoint := "http://169.254.169.254/latest/meta-data/iam/security-credentials/default"
resp, err := cfg.client.Get(endpoint)
if err != nil {
return nil, fmt.Errorf("cannot read Amazon EC2 IMDBSv1 token from %s: %w", endpoint, err)
}
data, err := readResponseBody(resp, endpoint)
if err != nil {
@ -181,7 +201,7 @@ func getInstanceCreds(cfg *apiConfig) (*apiCredentials, error) {
var ac apiCredentials
if err := json.Unmarshal(data, &ac); err != nil {
return nil, fmt.Errorf("cannot parse auth credentials response from %s: %w", endpoint, err)
return nil, fmt.Errorf("cannot parse Amazon EC2 IMDBSv1 token from %s: %w; data=%s", endpoint, err, data)
}
return &ac, nil
}
@ -198,7 +218,7 @@ func getIAMToken(cfg *apiConfig) (*iamToken, error) {
body := bytes.NewBuffer(passport)
resp, err := cfg.client.Post(iamURL, "application/json", body)
if err != nil {
logger.Panicf("BUG: cannot create request to yandex cloud iam api %q: %s", iamURL, err)
return nil, fmt.Errorf("cannot send request to yandex cloud iam api %q: %s", iamURL, err)
}
data, err := readResponseBody(resp, iamURL)
if err != nil {
@ -206,7 +226,7 @@ func getIAMToken(cfg *apiConfig) (*iamToken, error) {
}
var it iamToken
if err := json.Unmarshal(data, &it); err != nil {
return nil, fmt.Errorf("cannot parse iam token: %w; data: %s", err, data)
return nil, fmt.Errorf("cannot parse iam token from %s: %w; data: %s", iamURL, err, data)
}
return &it, nil
}