diff --git a/app/vmbackup/README.md b/app/vmbackup/README.md index 185528ae4..9ad2dda6e 100644 --- a/app/vmbackup/README.md +++ b/app/vmbackup/README.md @@ -265,6 +265,16 @@ You have to add a custom url endpoint via flag: -customS3Endpoint=https://s3-fips.us-gov-west-1.amazonaws.com ``` +### Permanent deletion of objects in S3 and compatible storages + +By default, when using S3 compatible storages, `vmbackup` and `vmbackupmanager` will use the basic delete operation, +which will delete current version of the object only. +In order to enforce removing all versions of an object when object is deleted, you need to use `-deleteAllObjectVersions` flag. +Using this flag will enforce listing all versions of an object and deleting them one by one. + +Alternatively, it is possible to use object storage lifecycle rules to remove non-current versions of objects automatically. +Refer to the respective documentation for your object storage provider for more details. + ### Command-line flags Run `vmbackup -help` in order to see all the available options: @@ -282,6 +292,8 @@ Run `vmbackup -help` in order to see all the available options: See https://cloud.google.com/iam/docs/creating-managing-service-account-keys and https://docs.aws.amazon.com/general/latest/gr/aws-security-credentials.html -customS3Endpoint string Custom S3 endpoint for use with S3-compatible storages (e.g. MinIO). S3 is used if not set + -deleteAllObjectVersions + Whether to prune previous object versions when deleting an object. By default, when object storage has versioning enabled deleting the file removes only current version. This option forces removal of all previous versions. See: https://docs.victoriametrics.com/vmbackup.html#permanent-deletion-of-objects-in-s3-compatible-storages -dst string Where to put the backup on the remote storage. Example: gs://bucket/path/to/backup, s3://bucket/path/to/backup, azblob://container/path/to/backup or fs:///path/to/local/backup/dir -dst can point to the previous backup. In this case incremental backup is performed, i.e. only changed data is uploaded diff --git a/app/vmbackupmanager/README.md b/app/vmbackupmanager/README.md index 1482297f1..0cf86d569 100644 --- a/app/vmbackupmanager/README.md +++ b/app/vmbackupmanager/README.md @@ -418,6 +418,8 @@ command-line flags: See https://cloud.google.com/iam/docs/creating-managing-service-account-keys and https://docs.aws.amazon.com/general/latest/gr/aws-security-credentials.html -customS3Endpoint string Custom S3 endpoint for use with S3-compatible storages (e.g. MinIO). S3 is used if not set + -deleteAllObjectVersions + Whether to prune previous object versions when deleting an object. By default, when object storage has versioning enabled deleting the file removes only current version. This option forces removal of all previous versions. See: https://docs.victoriametrics.com/vmbackup.html#permanent-deletion-of-objects-in-s3-compatible-storages -disableDaily Disable daily run. Default false -disableHourly diff --git a/docs/CHANGELOG.md b/docs/CHANGELOG.md index d7c27f233..4f8018f6f 100644 --- a/docs/CHANGELOG.md +++ b/docs/CHANGELOG.md @@ -35,14 +35,15 @@ The sandbox cluster installation is running under the constant load generated by See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5049). * FEATURE: [vmagent](https://docs.victoriametrics.com/vmagent.html): support data ingestion from [NewRelic infrastructure agent](https://docs.newrelic.com/docs/infrastructure/install-infrastructure-agent). See [these docs](https://docs.victoriametrics.com/Single-server-VictoriaMetrics.html#how-to-send-data-from-newrelic-agent), [this feature request](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3520) and [this pull request](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/4712). * FEATURE: [vmbackup](https://docs.victoriametrics.com/vmbackup.html): add `-filestream.disableFadvise` command-line flag, which can be used for disabling `fadvise` syscall during backup upload to the remote storage. By default `vmbackup` uses `fadvise` syscall in order to prevent from eviction of recently accessed data from the [OS page cache](https://en.wikipedia.org/wiki/Page_cache) when backing up large files. Sometimes the `fadvise` syscall may take significant amounts of CPU when the backup is performed with large value of `-concurrency` command-line flag on systems with big number of CPU cores. In this case it is better to manually disable `fadvise` syscall by passing `-filestream.disableFadvise` command-line flag to `vmbackup`. See [this pull request](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/5120) for details. +* FEATURE: [vmbackup](https://docs.victoriametrics.com/vmbackup.html): add `-deleteAllObjectVersions` command-line flag, which can be used for forcing removal of all object versions in remote object storage. See [this](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5121) issue and [these docs](https://docs.victoriametrics.com/vmbackup.html#permanent-deletion-of-objects-in-s3-compatible-storages) for the details. * FEATURE: [Alerting rules for VictoriaMetrics](https://github.com/VictoriaMetrics/VictoriaMetrics/tree/master/deployment/docker#alerts): account for `vmauth` component for alerts `ServiceDown` and `TooManyRestarts`. * FEATURE: [vmui](https://docs.victoriametrics.com/#vmui): add support for functions, labels, values in autocomplete. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3006). * FEATURE: [vmui](https://docs.victoriametrics.com/#vmui): retain specified time interval when executing a query from `Top Queries`. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5097). * BUGFIX: [vmalert](https://docs.victoriametrics.com/vmalert.html): strip sensitive information such as auth headers or passwords from datasource, remote-read, remote-write or notifier URLs in log messages or UI. This behavior is by default and is controlled via `-datasource.showURL`, `-remoteRead.showURL`, `remoteWrite.showURL` or `-notifier.showURL` cmd-line flags. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5044). - * BUGFIX: [vmselect](https://docs.victoriametrics.com/Cluster-VictoriaMetrics.html): improve performance and memory usage during query processing on machines with big number of CPU cores. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5087) for details. + ## [v1.94.0](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v1.94.0) Released at 2023-10-02 diff --git a/docs/vmbackup.md b/docs/vmbackup.md index 9489a3d32..de1b407d9 100644 --- a/docs/vmbackup.md +++ b/docs/vmbackup.md @@ -276,6 +276,16 @@ You have to add a custom url endpoint via flag: -customS3Endpoint=https://s3-fips.us-gov-west-1.amazonaws.com ``` +### Permanent deletion of objects in S3 and compatible storages + +By default, when using S3 compatible storages, `vmbackup` and `vmbackupmanager` will use the basic delete operation, +which will delete current version of the object only. +In order to enforce removing all versions of an object when object is deleted, you need to use `-deleteAllObjectVersions` flag. +Using this flag will enforce listing all versions of an object and deleting them one by one. + +Alternatively, it is possible to use object storage lifecycle rules to remove non-current versions of objects automatically. +Refer to the respective documentation for your object storage provider for more details. + ### Command-line flags Run `vmbackup -help` in order to see all the available options: @@ -293,6 +303,8 @@ Run `vmbackup -help` in order to see all the available options: See https://cloud.google.com/iam/docs/creating-managing-service-account-keys and https://docs.aws.amazon.com/general/latest/gr/aws-security-credentials.html -customS3Endpoint string Custom S3 endpoint for use with S3-compatible storages (e.g. MinIO). S3 is used if not set + -deleteAllObjectVersions + Whether to prune previous object versions when deleting an object. By default, when object storage has versioning enabled deleting the file removes only current version. This option forces removal of all previous versions. See: https://docs.victoriametrics.com/vmbackup.html#permanent-deletion-of-objects-in-s3-compatible-storages -dst string Where to put the backup on the remote storage. Example: gs://bucket/path/to/backup, s3://bucket/path/to/backup, azblob://container/path/to/backup or fs:///path/to/local/backup/dir -dst can point to the previous backup. In this case incremental backup is performed, i.e. only changed data is uploaded diff --git a/docs/vmbackupmanager.md b/docs/vmbackupmanager.md index 90ac90359..17b8197bd 100644 --- a/docs/vmbackupmanager.md +++ b/docs/vmbackupmanager.md @@ -429,6 +429,8 @@ command-line flags: See https://cloud.google.com/iam/docs/creating-managing-service-account-keys and https://docs.aws.amazon.com/general/latest/gr/aws-security-credentials.html -customS3Endpoint string Custom S3 endpoint for use with S3-compatible storages (e.g. MinIO). S3 is used if not set + -deleteAllObjectVersions + Whether to prune previous object versions when deleting an object. By default, when object storage has versioning enabled deleting the file removes only current version. This option forces removal of all previous versions. See: https://docs.victoriametrics.com/vmbackup.html#permanent-deletion-of-objects-in-s3-compatible-storages -disableDaily Disable daily run. Default false -disableHourly diff --git a/lib/backup/azremote/azblob.go b/lib/backup/azremote/azblob.go index e8f069a6d..2e9b5bf07 100644 --- a/lib/backup/azremote/azblob.go +++ b/lib/backup/azremote/azblob.go @@ -143,12 +143,7 @@ func (fs *FS) ListParts() ([]common.Part, error) { // DeletePart deletes part p from fs. func (fs *FS) DeletePart(p common.Part) error { - bc := fs.clientForPart(p) - ctx := context.Background() - if _, err := bc.Delete(ctx, &blob.DeleteOptions{}); err != nil { - return fmt.Errorf("cannot delete %q at %s (remote path %q): %w", p.Path, fs, bc.URL(), err) - } - return nil + return fs.delete(p.RemotePath(fs.Dir)) } // RemoveEmptyDirs recursively removes empty dirs in fs. @@ -278,14 +273,58 @@ func (fs *FS) DeleteFile(filePath string) error { } path := fs.Dir + filePath - bc := fs.clientForPath(path) - if err != nil { - return err + return fs.delete(path) +} + +func (fs *FS) delete(path string) error { + if *common.DeleteAllObjectVersions { + return fs.deleteObjectWithGenerations(path) } + return fs.deleteObject(path) +} + +func (fs *FS) deleteObjectWithGenerations(path string) error { + pager := fs.client.NewListBlobsFlatPager(&azblob.ListBlobsFlatOptions{ + Prefix: &path, + Include: azblob.ListBlobsInclude{ + Versions: true, + }, + }) + + ctx := context.Background() + for pager.More() { + resp, err := pager.NextPage(ctx) + if err != nil { + return fmt.Errorf("cannot list blobs at %s (remote path %q): %w", path, fs.Container, err) + } + + for _, v := range resp.Segment.BlobItems { + var c *blob.Client + // Either versioning is disabled or we are deleting the current version + if v.VersionID == nil || (v.VersionID != nil && v.IsCurrentVersion != nil && *v.IsCurrentVersion) { + c = fs.client.NewBlobClient(*v.Name) + } else { + c, err = fs.client.NewBlobClient(*v.Name).WithVersionID(*v.VersionID) + if err != nil { + return fmt.Errorf("cannot read blob at %q at %s: %w", path, fs.Container, err) + } + } + + if _, err := c.Delete(ctx, nil); err != nil { + return fmt.Errorf("cannot delete %q at %s: %w", path, fs.Container, err) + } + } + } + + return nil +} + +func (fs *FS) deleteObject(path string) error { + bc := fs.clientForPath(path) ctx := context.Background() if _, err := bc.Delete(ctx, nil); err != nil { - return fmt.Errorf("cannot delete %q at %s (remote path %q): %w", filePath, fs, bc.URL(), err) + return fmt.Errorf("cannot delete %q at %s: %w", bc.URL(), fs, err) } return nil } diff --git a/lib/backup/common/flags.go b/lib/backup/common/flags.go new file mode 100644 index 000000000..014288b61 --- /dev/null +++ b/lib/backup/common/flags.go @@ -0,0 +1,13 @@ +package common + +import ( + "flag" +) + +var ( + // DeleteAllObjectVersions is a flag for whether to prune previous object versions when deleting an object. + DeleteAllObjectVersions = flag.Bool("deleteAllObjectVersions", false, "Whether to prune previous object versions when deleting an object. "+ + "By default, when object storage has versioning enabled deleting the file removes only current version. "+ + "This option forces removal of all previous versions. "+ + "See: https://docs.victoriametrics.com/vmbackup.html#permanent-deletion-of-objects-in-s3-compatible-storages") +) diff --git a/lib/backup/gcsremote/gcs.go b/lib/backup/gcsremote/gcs.go index f1cd91359..ce7ece62a 100644 --- a/lib/backup/gcsremote/gcs.go +++ b/lib/backup/gcsremote/gcs.go @@ -2,6 +2,7 @@ package gcsremote import ( "context" + "errors" "fmt" "io" "strings" @@ -131,12 +132,8 @@ func (fs *FS) ListParts() ([]common.Part, error) { // DeletePart deletes part p from fs. func (fs *FS) DeletePart(p common.Part) error { - o := fs.object(p) - ctx := context.Background() - if err := o.Delete(ctx); err != nil { - return fmt.Errorf("cannot delete %q at %s (remote path %q): %w", p.Path, fs, o.ObjectName(), err) - } - return nil + path := p.RemotePath(fs.Dir) + return fs.delete(path) } // RemoveEmptyDirs recursively removes empty dirs in fs. @@ -215,13 +212,52 @@ func (fs *FS) object(p common.Part) *storage.ObjectHandle { // The function does nothing if the filePath doesn't exists. func (fs *FS) DeleteFile(filePath string) error { path := fs.Dir + filePath + return fs.delete(path) +} + +func (fs *FS) delete(path string) error { + if *common.DeleteAllObjectVersions { + return fs.deleteObjectWithGenerations(path) + } + return fs.deleteObject(path) +} + +// deleteObjectWithGenerations deletes object at path and all its generations. +func (fs *FS) deleteObjectWithGenerations(path string) error { + it := fs.bkt.Objects(context.Background(), &storage.Query{ + Versions: true, + Prefix: path, + }) + ctx := context.Background() + for { + attrs, err := it.Next() + if errors.Is(err, iterator.Done) { + return nil + } + + if err != nil { + return fmt.Errorf("cannot read %q at %s: %w", path, fs, err) + } + + if err := fs.bkt.Object(path).Generation(attrs.Generation).Delete(ctx); err != nil { + if !errors.Is(err, storage.ErrObjectNotExist) { + return fmt.Errorf("cannot delete %q at %s: %w", path, fs, err) + } + } + } +} + +// deleteObject deletes object at path. +// It does not specify a Generation, so it will delete the latest generation of the object. +func (fs *FS) deleteObject(path string) error { o := fs.bkt.Object(path) ctx := context.Background() if err := o.Delete(ctx); err != nil { - if err != storage.ErrObjectNotExist { - return fmt.Errorf("cannot delete %q at %s (remote path %q): %w", filePath, fs, o.ObjectName(), err) + if !errors.Is(err, storage.ErrObjectNotExist) { + return fmt.Errorf("cannot delete %q at %s: %w", o.ObjectName(), fs, err) } } + return nil } diff --git a/lib/backup/s3remote/s3.go b/lib/backup/s3remote/s3.go index 1bd958e95..45bc8cfc1 100644 --- a/lib/backup/s3remote/s3.go +++ b/lib/backup/s3remote/s3.go @@ -194,15 +194,7 @@ func (fs *FS) ListParts() ([]common.Part, error) { // DeletePart deletes part p from fs. func (fs *FS) DeletePart(p common.Part) error { path := fs.path(p) - input := &s3.DeleteObjectInput{ - Bucket: aws.String(fs.Bucket), - Key: aws.String(path), - } - _, err := fs.s3.DeleteObject(context.Background(), input) - if err != nil { - return fmt.Errorf("cannot delete %q at %s (remote path %q): %w", p.Path, fs, path, err) - } - return nil + return fs.delete(path) } // RemoveEmptyDirs recursively removes empty dirs in fs. @@ -301,16 +293,53 @@ func (fs *FS) DeleteFile(filePath string) error { } path := fs.Dir + filePath + return fs.delete(path) +} + +func (fs *FS) delete(path string) error { + if *common.DeleteAllObjectVersions { + return fs.deleteObjectWithVersions(path) + } + return fs.deleteObject(path) +} + +// deleteObject deletes object at path. +// It does not specify a version ID, so it will delete the latest version of the object. +func (fs *FS) deleteObject(path string) error { input := &s3.DeleteObjectInput{ Bucket: aws.String(fs.Bucket), Key: aws.String(path), } if _, err := fs.s3.DeleteObject(context.Background(), input); err != nil { - return fmt.Errorf("cannot delete %q at %s (remote path %q): %w", filePath, fs, path, err) + return fmt.Errorf("cannot delete %q at %s: %w", path, fs, err) } return nil } +// deleteObjectWithVersions deletes object at path and all its versions. +func (fs *FS) deleteObjectWithVersions(path string) error { + versions, err := fs.s3.ListObjectVersions(context.Background(), &s3.ListObjectVersionsInput{ + Bucket: aws.String(fs.Bucket), + Prefix: aws.String(path), + }) + if err != nil { + return fmt.Errorf("cannot list versions for %q at %s: %w", path, fs, err) + } + + for _, version := range versions.Versions { + input := &s3.DeleteObjectInput{ + Bucket: aws.String(fs.Bucket), + Key: version.Key, + VersionId: version.VersionId, + } + if _, err := fs.s3.DeleteObject(context.Background(), input); err != nil { + return fmt.Errorf("cannot delete %q at %s: %w", path, fs, err) + } + } + + return nil +} + // CreateFile creates filePath at fs and puts data into it. // // The file is overwritten if it already exists.