mirror of
https://github.com/VictoriaMetrics/VictoriaMetrics.git
synced 2024-11-23 12:31:07 +01:00
lib/{storage,mergeset}: remove empty directories on startup. Such directories can be left after unclean shutdown on NFS storage
Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1142
This commit is contained in:
parent
1ab27582a3
commit
bbebdf9ba1
@ -10,6 +10,9 @@ sort: 14
|
|||||||
* FEATURE: add OpenTSDB migration option to vmctl. See more details [here](https://docs.victoriametrics.com/vmctl#migrating-data-from-opentsdb).
|
* FEATURE: add OpenTSDB migration option to vmctl. See more details [here](https://docs.victoriametrics.com/vmctl#migrating-data-from-opentsdb).
|
||||||
Thanks to @johnseekins!
|
Thanks to @johnseekins!
|
||||||
|
|
||||||
|
* BUGFIX: vmstorage: remove empty directories on startup. Such directories can be left after unclean shutdown on NFS storage. Previously such directories could lead to crashloop until manually removed. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1142).
|
||||||
|
|
||||||
|
|
||||||
## [v1.58.0](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v1.58.0)
|
## [v1.58.0](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v1.58.0)
|
||||||
|
|
||||||
* FEATURE: vminsert and vmagent: add `-sortLabels` command-line flag for sorting metric labels before pushing them to `vmstorage`. This should reduce the size of `MetricName -> internal_series_id` cache (aka `vm_cache_size_bytes{type="storage/tsid"}`) when ingesting samples for the same time series with distinct order of labels. For example, `foo{k1="v1",k2="v2"}` and `foo{k2="v2",k1="v1"}` represent a single time series. Labels sorting is disabled by default, since the majority of established exporters preserve the order of labels for the exported metrics.
|
* FEATURE: vminsert and vmagent: add `-sortLabels` command-line flag for sorting metric labels before pushing them to `vmstorage`. This should reduce the size of `MetricName -> internal_series_id` cache (aka `vm_cache_size_bytes{type="storage/tsid"}`) when ingesting samples for the same time series with distinct order of labels. For example, `foo{k1="v1",k2="v2"}` and `foo{k2="v2",k1="v1"}` represent a single time series. Labels sorting is disabled by default, since the majority of established exporters preserve the order of labels for the exported metrics.
|
||||||
|
@ -81,7 +81,7 @@ func dirRemover() {
|
|||||||
time.Sleep(sleepTime)
|
time.Sleep(sleepTime)
|
||||||
if sleepTime < maxSleepTime {
|
if sleepTime < maxSleepTime {
|
||||||
sleepTime *= 2
|
sleepTime *= 2
|
||||||
} else if sleepTime > time.Second {
|
} else if sleepTime > 5*time.Second {
|
||||||
logger.Warnf("failed to remove directory %q due to NFS lock; retrying later in %.3f seconds", w.path, sleepTime.Seconds())
|
logger.Warnf("failed to remove directory %q due to NFS lock; retrying later in %.3f seconds", w.path, sleepTime.Seconds())
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -121,11 +121,11 @@ func MustStopDirRemover() {
|
|||||||
dirRemoverWG.Wait()
|
dirRemoverWG.Wait()
|
||||||
close(doneCh)
|
close(doneCh)
|
||||||
}()
|
}()
|
||||||
const maxWaitTime = 5 * time.Second
|
const maxWaitTime = 10 * time.Second
|
||||||
select {
|
select {
|
||||||
case <-doneCh:
|
case <-doneCh:
|
||||||
return
|
return
|
||||||
case <-time.After(maxWaitTime):
|
case <-time.After(maxWaitTime):
|
||||||
logger.Panicf("FATAL: cannot stop dirRemover in %s", maxWaitTime)
|
logger.Errorf("cannot stop dirRemover in %s; the remaining empty NFS directories should be automatically removed on the next startup", maxWaitTime)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
18
lib/fs/fs.go
18
lib/fs/fs.go
@ -174,6 +174,24 @@ func mustSyncParentDirIfExists(path string) {
|
|||||||
MustSyncPath(parentDirPath)
|
MustSyncPath(parentDirPath)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// IsEmptyDir returns true if path points to empty directory.
|
||||||
|
func IsEmptyDir(path string) bool {
|
||||||
|
// See https://stackoverflow.com/a/30708914/274937
|
||||||
|
f, err := os.Open(path)
|
||||||
|
if err != nil {
|
||||||
|
logger.Panicf("FATAL: unexpected error when opening directory %q: %s", path, err)
|
||||||
|
}
|
||||||
|
_, err = f.Readdirnames(1)
|
||||||
|
MustClose(f)
|
||||||
|
if err != nil {
|
||||||
|
if err == io.EOF {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
logger.Panicf("FATAL: unexpected error when reading directory %q: %s", path, err)
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
// MustRemoveAll removes path with all the contents.
|
// MustRemoveAll removes path with all the contents.
|
||||||
//
|
//
|
||||||
// It properly fsyncs the parent directory after path removal.
|
// It properly fsyncs the parent directory after path removal.
|
||||||
|
@ -1014,6 +1014,12 @@ func openParts(path string) ([]*partWrapper, error) {
|
|||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
partPath := path + "/" + fn
|
partPath := path + "/" + fn
|
||||||
|
if fs.IsEmptyDir(partPath) {
|
||||||
|
// Remove empty directory, which can be left after unclean shutdown on NFS.
|
||||||
|
// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1142
|
||||||
|
fs.MustRemoveAll(partPath)
|
||||||
|
continue
|
||||||
|
}
|
||||||
p, err := openFilePart(partPath)
|
p, err := openFilePart(partPath)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
mustCloseParts(pws)
|
mustCloseParts(pws)
|
||||||
|
@ -1546,6 +1546,12 @@ func openParts(pathPrefix1, pathPrefix2, path string) ([]*partWrapper, error) {
|
|||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
partPath := path + "/" + fn
|
partPath := path + "/" + fn
|
||||||
|
if fs.IsEmptyDir(partPath) {
|
||||||
|
// Remove empty directory, which can be left after unclean shutdown on NFS.
|
||||||
|
// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1142
|
||||||
|
fs.MustRemoveAll(partPath)
|
||||||
|
continue
|
||||||
|
}
|
||||||
startTime := time.Now()
|
startTime := time.Now()
|
||||||
p, err := openFilePart(partPath)
|
p, err := openFilePart(partPath)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
Loading…
Reference in New Issue
Block a user