lib/storage: document why job-like and instance-like labels must be stored at mn.Tags[0] and mn.Tags[1]

Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/2244
This commit is contained in:
Aliaksandr Valialkin 2022-02-25 13:21:02 +02:00
parent 8bf3fb917a
commit 7e99bbb967
No known key found for this signature in database
GPG Key ID: A72BEC6CD3D0DED1
2 changed files with 11 additions and 0 deletions

View File

@ -608,6 +608,11 @@ func (db *indexDB) getOrCreateTSID(dst *TSID, metricName []byte, mn *MetricName)
func generateTSID(dst *TSID, mn *MetricName) {
dst.MetricGroupID = xxhash.Sum64(mn.MetricGroup)
// Assume that the job-like metric is put at mn.Tags[0], while instance-like metric is put at mn.Tags[1]
// This assumption is true because mn.Tags must be sorted with mn.sortTags() before calling generateTSID() function.
// This allows grouping data blocks for the same (job, instance) close to each other on disk.
// This reduces disk seeks and disk read IO when data blocks are read from disk for the same job and/or instance.
// For example, data blocks for time series matching `process_resident_memory_bytes{job="vmstorage"}` are physically adjancent on disk.
if len(mn.Tags) > 0 {
dst.JobID = uint32(xxhash.Sum64(mn.Tags[0].Value))
}

View File

@ -608,6 +608,12 @@ func unmarshalBytesFast(src []byte) ([]byte, []byte, error) {
// sortTags sorts tags in mn to canonical form needed for storing in the index.
//
// The sortTags tries moving job-like tag to mn.Tags[0], while instance-like tag to mn.Tags[1].
// See commonTagKeys list for job-like and instance-like tags.
// This guarantees that indexdb entries for the same (job, instance) are located
// close to each other on disk. This reduces disk seeks and disk read IO when metrics
// for a particular job and/or instance are read from the disk.
//
// The function also de-duplicates tags with identical keys in mn. The last tag value
// for duplicate tags wins.
//