From 7e99bbb9678318c78bcfad79f6e84710d398ee2d Mon Sep 17 00:00:00 2001
From: Aliaksandr Valialkin <valyala@victoriametrics.com>
Date: Fri, 25 Feb 2022 13:21:02 +0200
Subject: [PATCH] lib/storage: document why job-like and instance-like labels
 must be stored at mn.Tags[0] and mn.Tags[1]

Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/2244
---
 lib/storage/index_db.go    | 5 +++++
 lib/storage/metric_name.go | 6 ++++++
 2 files changed, 11 insertions(+)

diff --git a/lib/storage/index_db.go b/lib/storage/index_db.go
index 1aa76cf71..77e1c2782 100644
--- a/lib/storage/index_db.go
+++ b/lib/storage/index_db.go
@@ -608,6 +608,11 @@ func (db *indexDB) getOrCreateTSID(dst *TSID, metricName []byte, mn *MetricName)
 
 func generateTSID(dst *TSID, mn *MetricName) {
 	dst.MetricGroupID = xxhash.Sum64(mn.MetricGroup)
+	// Assume that the job-like metric is put at mn.Tags[0], while instance-like metric is put at mn.Tags[1]
+	// This assumption is true because mn.Tags must be sorted with mn.sortTags() before calling generateTSID() function.
+	// This allows grouping data blocks for the same (job, instance) close to each other on disk.
+	// This reduces disk seeks and disk read IO when data blocks are read from disk for the same job and/or instance.
+	// For example, data blocks for time series matching `process_resident_memory_bytes{job="vmstorage"}` are physically adjancent on disk.
 	if len(mn.Tags) > 0 {
 		dst.JobID = uint32(xxhash.Sum64(mn.Tags[0].Value))
 	}
diff --git a/lib/storage/metric_name.go b/lib/storage/metric_name.go
index 0d8d73ed2..6984e6c4f 100644
--- a/lib/storage/metric_name.go
+++ b/lib/storage/metric_name.go
@@ -608,6 +608,12 @@ func unmarshalBytesFast(src []byte) ([]byte, []byte, error) {
 
 // sortTags sorts tags in mn to canonical form needed for storing in the index.
 //
+// The sortTags tries moving job-like tag to mn.Tags[0], while instance-like tag to mn.Tags[1].
+// See commonTagKeys list for job-like and instance-like tags.
+// This guarantees that indexdb entries for the same (job, instance) are located
+// close to each other on disk. This reduces disk seeks and disk read IO when metrics
+// for a particular job and/or instance are read from the disk.
+//
 // The function also de-duplicates tags with identical keys in mn. The last tag value
 // for duplicate tags wins.
 //