From 7e99bbb9678318c78bcfad79f6e84710d398ee2d Mon Sep 17 00:00:00 2001 From: Aliaksandr Valialkin Date: Fri, 25 Feb 2022 13:21:02 +0200 Subject: [PATCH] lib/storage: document why job-like and instance-like labels must be stored at mn.Tags[0] and mn.Tags[1] Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/2244 --- lib/storage/index_db.go | 5 +++++ lib/storage/metric_name.go | 6 ++++++ 2 files changed, 11 insertions(+) diff --git a/lib/storage/index_db.go b/lib/storage/index_db.go index 1aa76cf71..77e1c2782 100644 --- a/lib/storage/index_db.go +++ b/lib/storage/index_db.go @@ -608,6 +608,11 @@ func (db *indexDB) getOrCreateTSID(dst *TSID, metricName []byte, mn *MetricName) func generateTSID(dst *TSID, mn *MetricName) { dst.MetricGroupID = xxhash.Sum64(mn.MetricGroup) + // Assume that the job-like metric is put at mn.Tags[0], while instance-like metric is put at mn.Tags[1] + // This assumption is true because mn.Tags must be sorted with mn.sortTags() before calling generateTSID() function. + // This allows grouping data blocks for the same (job, instance) close to each other on disk. + // This reduces disk seeks and disk read IO when data blocks are read from disk for the same job and/or instance. + // For example, data blocks for time series matching `process_resident_memory_bytes{job="vmstorage"}` are physically adjancent on disk. if len(mn.Tags) > 0 { dst.JobID = uint32(xxhash.Sum64(mn.Tags[0].Value)) } diff --git a/lib/storage/metric_name.go b/lib/storage/metric_name.go index 0d8d73ed2..6984e6c4f 100644 --- a/lib/storage/metric_name.go +++ b/lib/storage/metric_name.go @@ -608,6 +608,12 @@ func unmarshalBytesFast(src []byte) ([]byte, []byte, error) { // sortTags sorts tags in mn to canonical form needed for storing in the index. // +// The sortTags tries moving job-like tag to mn.Tags[0], while instance-like tag to mn.Tags[1]. +// See commonTagKeys list for job-like and instance-like tags. +// This guarantees that indexdb entries for the same (job, instance) are located +// close to each other on disk. This reduces disk seeks and disk read IO when metrics +// for a particular job and/or instance are read from the disk. +// // The function also de-duplicates tags with identical keys in mn. The last tag value // for duplicate tags wins. //