mirror of
https://github.com/VictoriaMetrics/VictoriaMetrics.git
synced 2024-11-23 12:31:07 +01:00
app/vmselect: de-duplicate data exported via /api/v1/export/csv
by default
Previously the exported data wasn't de-duplicated. Now it is possible to export the raw data without deduplication by passing reduce_mem_usage=1 query arg to /api/v1/export/csv See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1837
This commit is contained in:
parent
f30ed13155
commit
193331d522
@ -837,7 +837,7 @@ unix timestamp in seconds or [RFC3339](https://www.ietf.org/rfc/rfc3339.txt) val
|
|||||||
|
|
||||||
The exported CSV data can be imported to VictoriaMetrics via [/api/v1/import/csv](#how-to-import-csv-data).
|
The exported CSV data can be imported to VictoriaMetrics via [/api/v1/import/csv](#how-to-import-csv-data).
|
||||||
|
|
||||||
The [deduplication](#deduplication) isn't applied for the data exported in CSV. It is expected that the de-duplication is performed during data import.
|
The [deduplication](#deduplication) is applied for the data exported in CSV by default. It is possible to export raw data without de-duplication by passing `reduce_mem_usage=1` query arg to `/api/v1/export/csv`.
|
||||||
|
|
||||||
|
|
||||||
### How to export data in native format
|
### How to export data in native format
|
||||||
|
@ -129,6 +129,7 @@ func ExportCSVHandler(startTime time.Time, w http.ResponseWriter, r *http.Reques
|
|||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
reduceMemUsage := searchutils.GetBool(r, "reduce_mem_usage")
|
||||||
deadline := searchutils.GetDeadlineForExport(r, startTime)
|
deadline := searchutils.GetDeadlineForExport(r, startTime)
|
||||||
tagFilterss, err := getTagFilterssFromRequest(r)
|
tagFilterss, err := getTagFilterssFromRequest(r)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
@ -140,7 +141,38 @@ func ExportCSVHandler(startTime time.Time, w http.ResponseWriter, r *http.Reques
|
|||||||
defer bufferedwriter.Put(bw)
|
defer bufferedwriter.Put(bw)
|
||||||
|
|
||||||
resultsCh := make(chan *quicktemplate.ByteBuffer, cgroup.AvailableCPUs())
|
resultsCh := make(chan *quicktemplate.ByteBuffer, cgroup.AvailableCPUs())
|
||||||
doneCh := make(chan error)
|
writeCSVLine := func(xb *exportBlock) {
|
||||||
|
if len(xb.timestamps) == 0 {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
bb := quicktemplate.AcquireByteBuffer()
|
||||||
|
WriteExportCSVLine(bb, xb, fieldNames)
|
||||||
|
resultsCh <- bb
|
||||||
|
}
|
||||||
|
doneCh := make(chan error, 1)
|
||||||
|
if !reduceMemUsage {
|
||||||
|
rss, err := netstorage.ProcessSearchQuery(sq, true, deadline)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("cannot fetch data for %q: %w", sq, err)
|
||||||
|
}
|
||||||
|
go func() {
|
||||||
|
err := rss.RunParallel(func(rs *netstorage.Result, workerID uint) error {
|
||||||
|
if err := bw.Error(); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
xb := exportBlockPool.Get().(*exportBlock)
|
||||||
|
xb.mn = &rs.MetricName
|
||||||
|
xb.timestamps = rs.Timestamps
|
||||||
|
xb.values = rs.Values
|
||||||
|
writeCSVLine(xb)
|
||||||
|
xb.reset()
|
||||||
|
exportBlockPool.Put(xb)
|
||||||
|
return nil
|
||||||
|
})
|
||||||
|
close(resultsCh)
|
||||||
|
doneCh <- err
|
||||||
|
}()
|
||||||
|
} else {
|
||||||
go func() {
|
go func() {
|
||||||
err := netstorage.ExportBlocks(sq, deadline, func(mn *storage.MetricName, b *storage.Block, tr storage.TimeRange) error {
|
err := netstorage.ExportBlocks(sq, deadline, func(mn *storage.MetricName, b *storage.Block, tr storage.TimeRange) error {
|
||||||
if err := bw.Error(); err != nil {
|
if err := bw.Error(); err != nil {
|
||||||
@ -152,11 +184,7 @@ func ExportCSVHandler(startTime time.Time, w http.ResponseWriter, r *http.Reques
|
|||||||
xb := exportBlockPool.Get().(*exportBlock)
|
xb := exportBlockPool.Get().(*exportBlock)
|
||||||
xb.mn = mn
|
xb.mn = mn
|
||||||
xb.timestamps, xb.values = b.AppendRowsWithTimeRangeFilter(xb.timestamps[:0], xb.values[:0], tr)
|
xb.timestamps, xb.values = b.AppendRowsWithTimeRangeFilter(xb.timestamps[:0], xb.values[:0], tr)
|
||||||
if len(xb.timestamps) > 0 {
|
writeCSVLine(xb)
|
||||||
bb := quicktemplate.AcquireByteBuffer()
|
|
||||||
WriteExportCSVLine(bb, xb, fieldNames)
|
|
||||||
resultsCh <- bb
|
|
||||||
}
|
|
||||||
xb.reset()
|
xb.reset()
|
||||||
exportBlockPool.Put(xb)
|
exportBlockPool.Put(xb)
|
||||||
return nil
|
return nil
|
||||||
@ -164,6 +192,7 @@ func ExportCSVHandler(startTime time.Time, w http.ResponseWriter, r *http.Reques
|
|||||||
close(resultsCh)
|
close(resultsCh)
|
||||||
doneCh <- err
|
doneCh <- err
|
||||||
}()
|
}()
|
||||||
|
}
|
||||||
// Consume all the data from resultsCh.
|
// Consume all the data from resultsCh.
|
||||||
for bb := range resultsCh {
|
for bb := range resultsCh {
|
||||||
// Do not check for error in bw.Write, since this error is checked inside netstorage.ExportBlocks above.
|
// Do not check for error in bw.Write, since this error is checked inside netstorage.ExportBlocks above.
|
||||||
@ -360,7 +389,7 @@ func exportHandler(w http.ResponseWriter, matches []string, etfs [][]storage.Tag
|
|||||||
defer bufferedwriter.Put(bw)
|
defer bufferedwriter.Put(bw)
|
||||||
|
|
||||||
resultsCh := make(chan *quicktemplate.ByteBuffer, cgroup.AvailableCPUs())
|
resultsCh := make(chan *quicktemplate.ByteBuffer, cgroup.AvailableCPUs())
|
||||||
doneCh := make(chan error)
|
doneCh := make(chan error, 1)
|
||||||
if !reduceMemUsage {
|
if !reduceMemUsage {
|
||||||
rss, err := netstorage.ProcessSearchQuery(sq, true, deadline)
|
rss, err := netstorage.ProcessSearchQuery(sq, true, deadline)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
@ -24,6 +24,7 @@ sort: 15
|
|||||||
* BUGFIX: [vmui](https://docs.victoriametrics.com/#vmui): fix navigation over query history with `Ctrl+up/down` and fix zoom relatively to the cursor position. See [this pull request](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/1936).
|
* BUGFIX: [vmui](https://docs.victoriametrics.com/#vmui): fix navigation over query history with `Ctrl+up/down` and fix zoom relatively to the cursor position. See [this pull request](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/1936).
|
||||||
* BUGFIX: deduplicate samples more thoroughly if [deduplication](https://docs.victoriametrics.com/#deduplication) is enabled. Previously some duplicate samples may be left on disk for time series with high churn rate. This may result in bigger storage space requirements.
|
* BUGFIX: deduplicate samples more thoroughly if [deduplication](https://docs.victoriametrics.com/#deduplication) is enabled. Previously some duplicate samples may be left on disk for time series with high churn rate. This may result in bigger storage space requirements.
|
||||||
* BUGFIX: [vmagent](https://docs.victoriametrics.com/vmagent.html): follow up to 5 redirects when `follow_redirects: true` is set for a particular scrape config. Previously only a single redirect was performed in this case. It is expected these redirects are performed to the original hostname. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1945).
|
* BUGFIX: [vmagent](https://docs.victoriametrics.com/vmagent.html): follow up to 5 redirects when `follow_redirects: true` is set for a particular scrape config. Previously only a single redirect was performed in this case. It is expected these redirects are performed to the original hostname. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1945).
|
||||||
|
* BUGFIX: de-duplicate data exported via [/api/v1/export/csv](https://docs.victoriametrics.com/#how-to-export-csv-data) by default if [deduplication](https://docs.victoriametrics.com/#deduplication) is enabled. The de-duplication can be disabled by passing `reduce_mem_usage=1` query arg to `/api/v1/export/csv`. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1837).
|
||||||
|
|
||||||
|
|
||||||
## [v1.70.0](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v1.70.0)
|
## [v1.70.0](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v1.70.0)
|
||||||
|
@ -837,7 +837,7 @@ unix timestamp in seconds or [RFC3339](https://www.ietf.org/rfc/rfc3339.txt) val
|
|||||||
|
|
||||||
The exported CSV data can be imported to VictoriaMetrics via [/api/v1/import/csv](#how-to-import-csv-data).
|
The exported CSV data can be imported to VictoriaMetrics via [/api/v1/import/csv](#how-to-import-csv-data).
|
||||||
|
|
||||||
The [deduplication](#deduplication) isn't applied for the data exported in CSV. It is expected that the de-duplication is performed during data import.
|
The [deduplication](#deduplication) is applied for the data exported in CSV by default. It is possible to export raw data without de-duplication by passing `reduce_mem_usage=1` query arg to `/api/v1/export/csv`.
|
||||||
|
|
||||||
|
|
||||||
### How to export data in native format
|
### How to export data in native format
|
||||||
|
@ -841,7 +841,7 @@ unix timestamp in seconds or [RFC3339](https://www.ietf.org/rfc/rfc3339.txt) val
|
|||||||
|
|
||||||
The exported CSV data can be imported to VictoriaMetrics via [/api/v1/import/csv](#how-to-import-csv-data).
|
The exported CSV data can be imported to VictoriaMetrics via [/api/v1/import/csv](#how-to-import-csv-data).
|
||||||
|
|
||||||
The [deduplication](#deduplication) isn't applied for the data exported in CSV. It is expected that the de-duplication is performed during data import.
|
The [deduplication](#deduplication) is applied for the data exported in CSV by default. It is possible to export raw data without de-duplication by passing `reduce_mem_usage=1` query arg to `/api/v1/export/csv`.
|
||||||
|
|
||||||
|
|
||||||
### How to export data in native format
|
### How to export data in native format
|
||||||
|
Loading…
Reference in New Issue
Block a user