mirror of
https://github.com/VictoriaMetrics/VictoriaMetrics.git
synced 2024-11-23 12:31:07 +01:00
lib/cgroup: document the ability to detect cgroup v2 memory and cpu limits. This is follow-up for b50024812e
This commit is contained in:
parent
d7be2753c0
commit
c0ec541559
@ -10,6 +10,7 @@ sort: 15
|
||||
* FEATURE: return `X-Server-Hostname` header in http responses of all the VictoriaMetrics components. This should simplify tracing the origin server behind a load balancer or behind auth proxy during troubleshooting.
|
||||
* FEATURE: vmselect: allow to use 2x more memory for query processing at `vmselect` nodes in [VictoriaMetrics cluster](https://docs.victoriametrics.com/Cluster-VictoriaMetrics.html). This should allow processing heavy queries without the need to increase RAM size at `vmselect` nodes.
|
||||
* FEATURE: add ability to filter `/api/v1/status/tsdb` output with arbitrary [time series selectors](https://prometheus.io/docs/prometheus/latest/querying/basics/#time-series-selectors) passed via `match[]` query args. See [these docs](https://docs.victoriametrics.com/#tsdb-stats) and [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1168) for details.
|
||||
* FEATURE: automatically detect memory and cpu limits for VictoriaMetrics components running under [cgroup v2](https://www.kernel.org/doc/html/latest/admin-guide/cgroup-v2.html) environments such as [HashiCorp Nomad](https://www.nomadproject.io/). See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1269).
|
||||
|
||||
* BUGFIX: vmagent: fix possible race when refreshing `role: endpoints` and `role: endpointslices` scrape targets in `kubernetes_sd_config`. Prevoiusly `pod` objects could be updated after the related `endpoints` object update. This could lead to missing scrape targets. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1240).
|
||||
* BUGFIX: properly remove stale parts outside the configured retention if `-retentionPeriod` is smaller than one month. Previously stale parts could remain active for up to a month after they go outside the retention.
|
||||
|
@ -42,11 +42,10 @@ func updateGOMAXPROCSToCPUQuota() {
|
||||
}
|
||||
|
||||
func getCPUQuota() float64 {
|
||||
cpuQuota, err := getCPUStatGeneric()
|
||||
cpuQuota, err := getCPUQuotaGeneric()
|
||||
if err != nil {
|
||||
return 0
|
||||
}
|
||||
|
||||
if cpuQuota <= 0 {
|
||||
// The quota isn't set. This may be the case in multilevel containers.
|
||||
// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/685#issuecomment-674423728
|
||||
@ -55,7 +54,7 @@ func getCPUQuota() float64 {
|
||||
return cpuQuota
|
||||
}
|
||||
|
||||
func getCPUStatGeneric() (float64, error) {
|
||||
func getCPUQuotaGeneric() (float64, error) {
|
||||
quotaUS, err := getCPUStat("cpu.cfs_quota_us")
|
||||
if err == nil {
|
||||
periodUS, err := getCPUStat("cpu.cfs_period_us")
|
||||
@ -63,7 +62,7 @@ func getCPUStatGeneric() (float64, error) {
|
||||
return float64(quotaUS) / float64(periodUS), nil
|
||||
}
|
||||
}
|
||||
return getCPUStatV2("/sys/fs/cgroup", "/proc/self/cgroup")
|
||||
return getCPUQuotaV2("/sys/fs/cgroup", "/proc/self/cgroup")
|
||||
}
|
||||
|
||||
func getCPUStat(statName string) (int64, error) {
|
||||
@ -83,31 +82,35 @@ func getOnlineCPUCount() float64 {
|
||||
return n
|
||||
}
|
||||
|
||||
func getCPUStatV2(sysPrefix, cgroupPath string) (float64, error) {
|
||||
func getCPUQuotaV2(sysPrefix, cgroupPath string) (float64, error) {
|
||||
data, err := getFileContents("cpu.max", sysPrefix, cgroupPath, "")
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
return parseCPUMax(data)
|
||||
data = strings.TrimSpace(data)
|
||||
n, err := parseCPUMax(data)
|
||||
if err != nil {
|
||||
return 0, fmt.Errorf("cannot parse cpu.max file contents: %w", err)
|
||||
}
|
||||
return n, nil
|
||||
}
|
||||
|
||||
// https://www.kernel.org/doc/html/latest/admin-guide/cgroup-v2.html#cpu
|
||||
// See https://www.kernel.org/doc/html/latest/admin-guide/cgroup-v2.html#cpu
|
||||
func parseCPUMax(data string) (float64, error) {
|
||||
data = strings.TrimRight(data, "\r\n")
|
||||
bounds := strings.Split(data, " ")
|
||||
if len(bounds) != 2 {
|
||||
return 0, fmt.Errorf("unexpected count: %d, want quota and period, got: %s", len(bounds), data)
|
||||
return 0, fmt.Errorf("unexpected line format: want 'quota period'; got: %s", data)
|
||||
}
|
||||
if bounds[0] == "max" {
|
||||
return -1, nil
|
||||
}
|
||||
quota, err := strconv.ParseUint(bounds[0], 10, 64)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
return 0, fmt.Errorf("cannot parse quota: %w", err)
|
||||
}
|
||||
period, err := strconv.ParseUint(bounds[1], 10, 64)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
return 0, fmt.Errorf("cannot parse period: %w", err)
|
||||
}
|
||||
return float64(quota) / float64(period), nil
|
||||
}
|
||||
|
@ -23,15 +23,15 @@ func TestCountCPUs(t *testing.T) {
|
||||
f("0-6", 7)
|
||||
}
|
||||
|
||||
func TestGetCPUStatV2(t *testing.T) {
|
||||
func TestGetCPUQuotaV2(t *testing.T) {
|
||||
f := func(sysPrefix, cgroupPath string, expectedCPU float64) {
|
||||
t.Helper()
|
||||
got, err := getCPUStatV2(sysPrefix, cgroupPath)
|
||||
got, err := getCPUQuotaV2(sysPrefix, cgroupPath)
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error: %s, sysPrefix: %s, cgroupPath: %s", err, sysPrefix, cgroupPath)
|
||||
}
|
||||
if got != expectedCPU {
|
||||
t.Fatalf("unexpected result from getCPUStatV2(%s, %s), got %f, want %f", sysPrefix, cgroupPath, got, expectedCPU)
|
||||
t.Fatalf("unexpected result from getCPUQuotaV2(%s, %s), got %f, want %f", sysPrefix, cgroupPath, got, expectedCPU)
|
||||
}
|
||||
}
|
||||
f("testdata/cgroup", "testdata/self/cgroupv2", 2)
|
||||
|
@ -16,17 +16,16 @@ func GetMemoryLimit() int64 {
|
||||
if err == nil {
|
||||
return n
|
||||
}
|
||||
// https: //www.kernel.org/doc/html/latest/admin-guide/cgroup-v2.html#memory-interface-files
|
||||
n, err = getMemStatV2()
|
||||
n, err = getMemStatV2("memory.max")
|
||||
if err != nil {
|
||||
return 0
|
||||
}
|
||||
|
||||
return n
|
||||
}
|
||||
|
||||
func getMemStatV2() (int64, error) {
|
||||
return getStatGeneric("memory.max", "/sys/fs/cgroup", "/proc/self/cgroup", "")
|
||||
func getMemStatV2(statName string) (int64, error) {
|
||||
// See https: //www.kernel.org/doc/html/latest/admin-guide/cgroup-v2.html#memory-interface-files
|
||||
return getStatGeneric(statName, "/sys/fs/cgroup", "/proc/self/cgroup", "")
|
||||
}
|
||||
|
||||
func getMemStat(statName string) (int64, error) {
|
||||
|
@ -13,10 +13,10 @@ func getStatGeneric(statName, sysfsPrefix, cgroupPath, cgroupGrepLine string) (i
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
data = strings.TrimRight(data, "\r\n")
|
||||
data = strings.TrimSpace(data)
|
||||
n, err := strconv.ParseInt(data, 10, 64)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
return 0, fmt.Errorf("cannot parse %q: %w", cgroupPath, err)
|
||||
}
|
||||
return n, nil
|
||||
}
|
||||
@ -33,7 +33,7 @@ func getFileContents(statName, sysfsPrefix, cgroupPath, cgroupGrepLine string) (
|
||||
}
|
||||
subPath, err := grepFirstMatch(string(cgroupData), cgroupGrepLine, 2, ":")
|
||||
if err != nil {
|
||||
return "", err
|
||||
return "", fmt.Errorf("cannot find cgroup path for %q in %q: %w", cgroupGrepLine, cgroupPath, err)
|
||||
}
|
||||
filepath = path.Join(sysfsPrefix, subPath, statName)
|
||||
data, err = ioutil.ReadFile(filepath)
|
||||
|
Loading…
Reference in New Issue
Block a user