From f5cb213ef954ea30effddc52cfb0d59e9cb86d0e Mon Sep 17 00:00:00 2001
From: Aliaksandr Valialkin <valyala@gmail.com>
Date: Wed, 9 Sep 2020 23:18:32 +0300
Subject: [PATCH] lib/storage: reuse timestamp blocks for adjancent metric
 blocks with identical timestamps

This should reduce disk space usage when scraping targets containing metrics with identical names
such as `node_cpu_seconds_total`, histograms, quantiles, etc.

Expose `vm_timestamps_blocks_merged_total` and `vm_timestamps_bytes_saved_total` metrics for monitoring
the effectiveness of timestamp blocks merging.
---
 app/vmstorage/main.go              |  7 ++++++
 lib/storage/block_stream_reader.go | 30 ++++++++++++++++++++-----
 lib/storage/block_stream_writer.go | 36 +++++++++++++++++++++++++-----
 lib/storage/storage.go             |  6 +++++
 4 files changed, 68 insertions(+), 11 deletions(-)

diff --git a/app/vmstorage/main.go b/app/vmstorage/main.go
index 61369c025f..78f03a6127 100644
--- a/app/vmstorage/main.go
+++ b/app/vmstorage/main.go
@@ -389,6 +389,13 @@ func registerStorageMetrics(strg *storage.Storage) {
 		return float64(m().SlowMetricNameLoads)
 	})
 
+	metrics.NewGauge(`vm_timestamps_blocks_merged_total`, func() float64 {
+		return float64(m().TimestampsBlocksMerged)
+	})
+	metrics.NewGauge(`vm_timestamps_bytes_saved_total`, func() float64 {
+		return float64(m().TimestampsBytesSaved)
+	})
+
 	metrics.NewGauge(`vm_rows{type="storage/big"}`, func() float64 {
 		return float64(tm().BigRowsCount)
 	})
diff --git a/lib/storage/block_stream_reader.go b/lib/storage/block_stream_reader.go
index 9b5e206190..f4aa6fab23 100644
--- a/lib/storage/block_stream_reader.go
+++ b/lib/storage/block_stream_reader.go
@@ -51,6 +51,9 @@ type blockStreamReader struct {
 	valuesBlockOffset     uint64
 	indexBlockOffset      uint64
 
+	prevTimestampsBlockOffset uint64
+	prevTimestampsData        []byte
+
 	indexData           []byte
 	compressedIndexData []byte
 
@@ -87,6 +90,9 @@ func (bsr *blockStreamReader) reset() {
 	bsr.valuesBlockOffset = 0
 	bsr.indexBlockOffset = 0
 
+	bsr.prevTimestampsBlockOffset = 0
+	bsr.prevTimestampsData = bsr.prevTimestampsData[:0]
+
 	bsr.indexData = bsr.indexData[:0]
 	bsr.compressedIndexData = bsr.compressedIndexData[:0]
 
@@ -275,7 +281,13 @@ func (bsr *blockStreamReader) readBlock() error {
 		return fmt.Errorf("invalid MaxTimestamp at block header at offset %d; got %d; cannot be bigger than %d",
 			bsr.prevIndexBlockOffset(), bsr.Block.bh.MaxTimestamp, bsr.ph.MaxTimestamp)
 	}
-	if bsr.Block.bh.TimestampsBlockOffset != bsr.timestampsBlockOffset {
+	usePrevTimestamps := len(bsr.prevTimestampsData) > 0 && bsr.Block.bh.TimestampsBlockOffset == bsr.prevTimestampsBlockOffset
+	if usePrevTimestamps {
+		if int(bsr.Block.bh.TimestampsBlockSize) != len(bsr.prevTimestampsData) {
+			return fmt.Errorf("invalid TimestampsBlockSize at block header at offset %d; got %d; want %d",
+				bsr.prevIndexBlockOffset(), bsr.Block.bh.TimestampsBlockSize, len(bsr.prevTimestampsData))
+		}
+	} else if bsr.Block.bh.TimestampsBlockOffset != bsr.timestampsBlockOffset {
 		return fmt.Errorf("invalid TimestampsBlockOffset at block header at offset %d; got %d; want %d",
 			bsr.prevIndexBlockOffset(), bsr.Block.bh.TimestampsBlockOffset, bsr.timestampsBlockOffset)
 	}
@@ -285,9 +297,15 @@ func (bsr *blockStreamReader) readBlock() error {
 	}
 
 	// Read timestamps data.
-	bsr.Block.timestampsData = bytesutil.Resize(bsr.Block.timestampsData, int(bsr.Block.bh.TimestampsBlockSize))
-	if err := fs.ReadFullData(bsr.timestampsReader, bsr.Block.timestampsData); err != nil {
-		return fmt.Errorf("cannot read timestamps block at offset %d: %w", bsr.timestampsBlockOffset, err)
+	if usePrevTimestamps {
+		bsr.Block.timestampsData = append(bsr.Block.timestampsData[:0], bsr.prevTimestampsData...)
+	} else {
+		bsr.Block.timestampsData = bytesutil.Resize(bsr.Block.timestampsData, int(bsr.Block.bh.TimestampsBlockSize))
+		if err := fs.ReadFullData(bsr.timestampsReader, bsr.Block.timestampsData); err != nil {
+			return fmt.Errorf("cannot read timestamps block at offset %d: %w", bsr.timestampsBlockOffset, err)
+		}
+		bsr.prevTimestampsBlockOffset = bsr.timestampsBlockOffset
+		bsr.prevTimestampsData = append(bsr.prevTimestampsData[:0], bsr.Block.timestampsData...)
 	}
 
 	// Read values data.
@@ -297,7 +315,9 @@ func (bsr *blockStreamReader) readBlock() error {
 	}
 
 	// Update offsets.
-	bsr.timestampsBlockOffset += uint64(bsr.Block.bh.TimestampsBlockSize)
+	if !usePrevTimestamps {
+		bsr.timestampsBlockOffset += uint64(bsr.Block.bh.TimestampsBlockSize)
+	}
 	bsr.valuesBlockOffset += uint64(bsr.Block.bh.ValuesBlockSize)
 	bsr.indexBlockHeadersCount++
 
diff --git a/lib/storage/block_stream_writer.go b/lib/storage/block_stream_writer.go
index ce35c0b0b9..899facbf75 100644
--- a/lib/storage/block_stream_writer.go
+++ b/lib/storage/block_stream_writer.go
@@ -1,6 +1,7 @@
 package storage
 
 import (
+	"bytes"
 	"fmt"
 	"io"
 	"path/filepath"
@@ -38,6 +39,13 @@ type blockStreamWriter struct {
 
 	metaindexData           []byte
 	compressedMetaindexData []byte
+
+	// prevTimestamps* is used as an optimization for reducing disk space usage
+	// when serially written blocks have identical timestamps.
+	// This is usually the case when adjancent blocks contain metrics scraped from the same target,
+	// since such metrics have identical timestamps.
+	prevTimestampsData        []byte
+	prevTimestampsBlockOffset uint64
 }
 
 func (bsw *blockStreamWriter) assertWriteClosers() {
@@ -66,6 +74,9 @@ func (bsw *blockStreamWriter) reset() {
 
 	bsw.metaindexData = bsw.metaindexData[:0]
 	bsw.compressedMetaindexData = bsw.compressedMetaindexData[:0]
+
+	bsw.prevTimestampsData = bsw.prevTimestampsData[:0]
+	bsw.prevTimestampsBlockOffset = 0
 }
 
 // InitFromInmemoryPart initialzes bsw from inmemory part.
@@ -177,22 +188,35 @@ func (bsw *blockStreamWriter) WriteExternalBlock(b *Block, ph *partHeader, rowsM
 	atomic.AddUint64(rowsMerged, uint64(b.rowsCount()))
 	b.deduplicateSamplesDuringMerge()
 	headerData, timestampsData, valuesData := b.MarshalData(bsw.timestampsBlockOffset, bsw.valuesBlockOffset)
-
+	usePrevTimestamps := len(bsw.prevTimestampsData) > 0 && bytes.Equal(timestampsData, bsw.prevTimestampsData)
+	if usePrevTimestamps {
+		// The current timestamps block equals to the previous timestamps block.
+		// Update headerData so it points to the previous timestamps block. This saves disk space.
+		headerData, timestampsData, valuesData = b.MarshalData(bsw.prevTimestampsBlockOffset, bsw.valuesBlockOffset)
+		atomic.AddUint64(&timestampsBlocksMerged, 1)
+		atomic.AddUint64(&timestampsBytesSaved, uint64(len(timestampsData)))
+	}
 	bsw.indexData = append(bsw.indexData, headerData...)
 	bsw.mr.RegisterBlockHeader(&b.bh)
 	if len(bsw.indexData) >= maxBlockSize {
 		bsw.flushIndexData()
 	}
-
-	fs.MustWriteData(bsw.timestampsWriter, timestampsData)
-	bsw.timestampsBlockOffset += uint64(len(timestampsData))
-
+	if !usePrevTimestamps {
+		bsw.prevTimestampsData = append(bsw.prevTimestampsData[:0], timestampsData...)
+		bsw.prevTimestampsBlockOffset = bsw.timestampsBlockOffset
+		fs.MustWriteData(bsw.timestampsWriter, timestampsData)
+		bsw.timestampsBlockOffset += uint64(len(timestampsData))
+	}
 	fs.MustWriteData(bsw.valuesWriter, valuesData)
 	bsw.valuesBlockOffset += uint64(len(valuesData))
-
 	updatePartHeader(b, ph)
 }
 
+var (
+	timestampsBlocksMerged uint64
+	timestampsBytesSaved   uint64
+)
+
 func updatePartHeader(b *Block, ph *partHeader) {
 	ph.BlocksCount++
 	ph.RowsCount += uint64(b.bh.RowsCount)
diff --git a/lib/storage/storage.go b/lib/storage/storage.go
index d4c45fdca3..cc57f1acb1 100644
--- a/lib/storage/storage.go
+++ b/lib/storage/storage.go
@@ -355,6 +355,9 @@ type Metrics struct {
 	SlowPerDayIndexInserts uint64
 	SlowMetricNameLoads    uint64
 
+	TimestampsBlocksMerged uint64
+	TimestampsBytesSaved   uint64
+
 	TSIDCacheSize       uint64
 	TSIDCacheSizeBytes  uint64
 	TSIDCacheRequests   uint64
@@ -420,6 +423,9 @@ func (s *Storage) UpdateMetrics(m *Metrics) {
 	m.SlowPerDayIndexInserts += atomic.LoadUint64(&s.slowPerDayIndexInserts)
 	m.SlowMetricNameLoads += atomic.LoadUint64(&s.slowMetricNameLoads)
 
+	m.TimestampsBlocksMerged = atomic.LoadUint64(&timestampsBlocksMerged)
+	m.TimestampsBytesSaved = atomic.LoadUint64(&timestampsBytesSaved)
+
 	var cs fastcache.Stats
 	s.tsidCache.UpdateStats(&cs)
 	m.TSIDCacheSize += cs.EntriesCount