From 4c56acbafa484a1b902342447fd36ab91cc23599 Mon Sep 17 00:00:00 2001 From: Aliaksandr Valialkin Date: Wed, 1 Apr 2020 15:46:58 +0300 Subject: [PATCH] lib/storage: remove duplicate data points on 7/8*minScrapeInterval interval instead of 1/2*minScrapeInterval This should reduce storage usage and should improve deduplication accuracy --- lib/storage/dedup.go | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/lib/storage/dedup.go b/lib/storage/dedup.go index f65b2635d3..5acbdb653e 100644 --- a/lib/storage/dedup.go +++ b/lib/storage/dedup.go @@ -16,14 +16,14 @@ func SetMinScrapeIntervalForDeduplication(interval time.Duration) { var minScrapeInterval = time.Duration(0) func getMinDelta() int64 { - // Divide minScrapeInterval by 2 in order to preserve proper data points. + // Use 7/8 of minScrapeInterval in order to preserve proper data points. // For instance, if minScrapeInterval=10, the following time series: // 10 15 19 25 30 34 41 - // Would be unexpectedly converted to: + // Would be unexpectedly converted to if using 100% of minScrapeInterval: // 10 25 41 - // When dividing minScrapeInterval by 2, it will be converted to the expected: + // When using 7/8 of minScrapeInterval, it will be converted to the expected: // 10 19 30 41 - return minScrapeInterval.Milliseconds() / 2 + return (minScrapeInterval.Milliseconds() / 8) * 7 } // DeduplicateSamples removes samples from src* if they are closer to each other than minScrapeInterval.