VictoriaMetrics/app/vmctl/opentsdb/parser.go

package opentsdb

import (
	"fmt"
	"regexp"
	"strconv"
	"strings"
	"time"

	"github.com/VictoriaMetrics/VictoriaMetrics/lib/promrelabel"
)

var (
	allowedFirstChar = regexp.MustCompile("^[a-zA-Z]")
)

func convertDuration(duration string) (time.Duration, error) {
	/*
		Golang's time library doesn't support many different
		string formats (year, month, week, day) because they
		aren't consistent ranges. But Java's library _does_.
		Consequently, we'll need to handle all the custom
		time ranges, and, to make the internal API call consistent,
		we'll need to allow for durations that Go supports, too.

		The nice thing is all the "broken" time ranges are > 1 hour,
		so we can just make assumptions to convert them to a range in hours.
		They aren't *good* assumptions, but they're reasonable
		for this function.
	*/
	var actualDuration time.Duration
	var err error
	var timeValue int
	if strings.HasSuffix(duration, "y") {
		timeValue, err = strconv.Atoi(strings.Trim(duration, "y"))
		if err != nil {
			return 0, fmt.Errorf("invalid time range: %q", duration)
		}
		timeValue = timeValue * 365 * 24
		actualDuration, err = time.ParseDuration(fmt.Sprintf("%vh", timeValue))
		if err != nil {
			return 0, fmt.Errorf("invalid time range: %q", duration)
		}
	} else if strings.HasSuffix(duration, "w") {
		timeValue, err = strconv.Atoi(strings.Trim(duration, "w"))
		if err != nil {
			return 0, fmt.Errorf("invalid time range: %q", duration)
		}
		timeValue = timeValue * 7 * 24
		actualDuration, err = time.ParseDuration(fmt.Sprintf("%vh", timeValue))
		if err != nil {
			return 0, fmt.Errorf("invalid time range: %q", duration)
		}
	} else if strings.HasSuffix(duration, "d") {
		timeValue, err = strconv.Atoi(strings.Trim(duration, "d"))
		if err != nil {
			return 0, fmt.Errorf("invalid time range: %q", duration)
		}
		timeValue = timeValue * 24
		actualDuration, err = time.ParseDuration(fmt.Sprintf("%vh", timeValue))
		if err != nil {
			return 0, fmt.Errorf("invalid time range: %q", duration)
		}
	} else if strings.HasSuffix(duration, "h") || strings.HasSuffix(duration, "m") || strings.HasSuffix(duration, "s") || strings.HasSuffix(duration, "ms") {
		actualDuration, err = time.ParseDuration(duration)
		if err != nil {
			return 0, fmt.Errorf("invalid time range: %q", duration)
		}
	} else {
		return 0, fmt.Errorf("invalid time duration string: %q", duration)
	}
	return actualDuration, nil
}

// Convert an incoming retention "string" into the component parts
func convertRetention(retention string, offset int64, msecTime bool) (Retention, error) {
	/*
		A retention string coming in looks like
		sum-1m-avg:1h:30d
		So we:
		1. split on the :
		2. split on the - in slice 0
		3. create the time ranges we actually need
	*/
	chunks := strings.Split(retention, ":")
	if len(chunks) != 3 {
		return Retention{}, fmt.Errorf("invalid retention string: %q", retention)
	}
	queryLengthDuration, err := convertDuration(chunks[2])
	if err != nil {
		return Retention{}, fmt.Errorf("invalid ttl (second order) duration string: %q: %s", chunks[2], err)
	}
	// set ttl in milliseconds, unless we aren't using millisecond time in OpenTSDB...then use seconds
	queryLength := queryLengthDuration.Milliseconds()
	if !msecTime {
		queryLength = queryLength / 1000
	}
	queryRange := queryLength
	// bump by the offset so we don't look at empty ranges any time offset > ttl
	queryLength += offset

	// first/second order aggregations for queries defined in chunk 0...
	aggregates := strings.Split(chunks[0], "-")
	if len(aggregates) != 3 {
		return Retention{}, fmt.Errorf("invalid aggregation string: %q", chunks[0])
	}

	aggTimeDuration, err := convertDuration(aggregates[1])
	if err != nil {
		return Retention{}, fmt.Errorf("invalid aggregation time duration string: %q: %s", aggregates[1], err)
	}
	aggTime := aggTimeDuration.Milliseconds()
	if !msecTime {
		aggTime = aggTime / 1000
	}

	rowLengthDuration, err := convertDuration(chunks[1])
	if err != nil {
		return Retention{}, fmt.Errorf("invalid row length (first order) duration string: %q: %s", chunks[1], err)
	}
	// set length of each row in milliseconds, unless we aren't using millisecond time in OpenTSDB...then use seconds
	rowLength := rowLengthDuration.Milliseconds()
	if !msecTime {
		rowLength = rowLength / 1000
	}

	var querySize int64
	/*
		The idea here is to ensure each individual query sent to OpenTSDB is *at least*
		large enough to ensure no single query requests essentially 0 data.
	*/
	if rowLength > aggTime {
		/*
			We'll look at 2x the row size for each query we perform
			This is a strange function, but the logic works like this:
			1. we discover the "number" of ranges we should split the time range into
			   This is found with queryRange / (rowLength * 4)...kind of a percentage query
			2. we discover the actual size of each "chunk"
			   This is second division step
		*/
		querySize = int64(queryRange / (queryRange / (rowLength * 4)))
	} else {
		/*
			Unless the aggTime (how long a range of data we're requesting per individual point)
			is greater than the row size. Then we'll need to use that to determine
			how big each individual query should be
		*/
		querySize = int64(queryRange / (queryRange / (aggTime * 4)))
	}

	var timeChunks []TimeRange
	var i int64
	for i = offset; i <= queryLength; i = i + querySize {
		timeChunks = append(timeChunks, TimeRange{Start: i + querySize, End: i})
	}

	ret := Retention{FirstOrder: aggregates[0],
		SecondOrder: aggregates[2],
		AggTime:     aggregates[1],
		QueryRanges: timeChunks}
	return ret, nil
}

// This ensures any incoming data from OpenTSDB matches the Prometheus data model
// https://prometheus.io/docs/concepts/data_model
func modifyData(msg Metric, normalize bool) (Metric, error) {
	finalMsg := Metric{
		Metric: "", Tags: make(map[string]string),
		Timestamps: msg.Timestamps, Values: msg.Values,
	}
	// if the metric name has invalid characters, the data model says to drop it
	if !allowedFirstChar.MatchString(msg.Metric) {
		return Metric{}, fmt.Errorf("%s has a bad first character", msg.Metric)
	}
	name := msg.Metric
	// if normalization requested, lowercase the name
	if normalize {
		name = strings.ToLower(name)
	}
	/*
		replace bad characters in metric name with _ per the data model
	*/
	finalMsg.Metric = promrelabel.SanitizeMetricName(name)
	// replace bad characters in tag keys with _ per the data model
	for key, value := range msg.Tags {
		// if normalization requested, lowercase the key and value
		if normalize {
			key = strings.ToLower(key)
			value = strings.ToLower(value)
		}
		/*
			replace all explicitly bad characters with _
		*/
		key = promrelabel.SanitizeLabelName(key)
		// tags that start with __ are considered custom stats for internal prometheus stuff, we should drop them
		if !strings.HasPrefix(key, "__") {
			finalMsg.Tags[key] = value
		}
	}
	return finalMsg, nil
}
OpenTSDB migration to VictoriaMetrics (#1089) 2021-04-08 21:58:06 +02:00			`package opentsdb`

			`import (`
			`"fmt"`
			`"regexp"`
			`"strconv"`
			`"strings"`
			`"time"`
lib/promrelabel: add SanitizeName() function for sanitizing Prometheus metric names and label names Optimize this function by using results cache for input strings. Use this function all over the code. This is a follow-up for fcffdba9dcbc6a696d328cc8f4268349d54218e3 Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3113 2022-09-28 08:59:36 +02:00
			`"github.com/VictoriaMetrics/VictoriaMetrics/lib/promrelabel"`
OpenTSDB migration to VictoriaMetrics (#1089) 2021-04-08 21:58:06 +02:00			`)`

			`var (`
			`allowedFirstChar = regexp.MustCompile("^[a-zA-Z]")`
			`)`

			`func convertDuration(duration string) (time.Duration, error) {`
			`/*`
			`Golang's time library doesn't support many different`
			`string formats (year, month, week, day) because they`
			`aren't consistent ranges. But Java's library _does_.`
			`Consequently, we'll need to handle all the custom`
			`time ranges, and, to make the internal API call consistent,`
			`we'll need to allow for durations that Go supports, too.`

			`The nice thing is all the "broken" time ranges are > 1 hour,`
			`so we can just make assumptions to convert them to a range in hours.`
			`They aren't good assumptions, but they're reasonable`
			`for this function.`
			`*/`
			`var actualDuration time.Duration`
			`var err error`
			`var timeValue int`
			`if strings.HasSuffix(duration, "y") {`
			`timeValue, err = strconv.Atoi(strings.Trim(duration, "y"))`
			`if err != nil {`
			`return 0, fmt.Errorf("invalid time range: %q", duration)`
			`}`
			`timeValue = timeValue * 365 * 24`
			`actualDuration, err = time.ParseDuration(fmt.Sprintf("%vh", timeValue))`
			`if err != nil {`
			`return 0, fmt.Errorf("invalid time range: %q", duration)`
			`}`
			`} else if strings.HasSuffix(duration, "w") {`
			`timeValue, err = strconv.Atoi(strings.Trim(duration, "w"))`
			`if err != nil {`
			`return 0, fmt.Errorf("invalid time range: %q", duration)`
			`}`
			`timeValue = timeValue * 7 * 24`
			`actualDuration, err = time.ParseDuration(fmt.Sprintf("%vh", timeValue))`
			`if err != nil {`
			`return 0, fmt.Errorf("invalid time range: %q", duration)`
			`}`
			`} else if strings.HasSuffix(duration, "d") {`
			`timeValue, err = strconv.Atoi(strings.Trim(duration, "d"))`
			`if err != nil {`
			`return 0, fmt.Errorf("invalid time range: %q", duration)`
			`}`
			`timeValue = timeValue * 24`
			`actualDuration, err = time.ParseDuration(fmt.Sprintf("%vh", timeValue))`
			`if err != nil {`
			`return 0, fmt.Errorf("invalid time range: %q", duration)`
			`}`
			`} else if strings.HasSuffix(duration, "h") \|\| strings.HasSuffix(duration, "m") \|\| strings.HasSuffix(duration, "s") \|\| strings.HasSuffix(duration, "ms") {`
			`actualDuration, err = time.ParseDuration(duration)`
			`if err != nil {`
			`return 0, fmt.Errorf("invalid time range: %q", duration)`
			`}`
			`} else {`
			`return 0, fmt.Errorf("invalid time duration string: %q", duration)`
			`}`
			`return actualDuration, nil`
			`}`

			`// Convert an incoming retention "string" into the component parts`
			`func convertRetention(retention string, offset int64, msecTime bool) (Retention, error) {`
			`/*`
			`A retention string coming in looks like`
			`sum-1m-avg:1h:30d`
			`So we:`
			`1. split on the :`
			`2. split on the - in slice 0`
			`3. create the time ranges we actually need`
			`*/`
			`chunks := strings.Split(retention, ":")`
			`if len(chunks) != 3 {`
			`return Retention{}, fmt.Errorf("invalid retention string: %q", retention)`
			`}`
Address some edge cases in OpenTSDB importer and speed it up (#2019) * Simplify queries to OpenTSDB (and make them properly appear in OpenTSDB query stats) and also tweak defaults a bit * Convert seconds to milliseconds before writing to VictoriaMetrics and increase subquery size Signed-off-by: John Seekins <jseekins@datto.com> 2022-01-04 07:51:23 +01:00			`queryLengthDuration, err := convertDuration(chunks[2])`
			`if err != nil {`
			`return Retention{}, fmt.Errorf("invalid ttl (second order) duration string: %q: %s", chunks[2], err)`
			`}`
			`// set ttl in milliseconds, unless we aren't using millisecond time in OpenTSDB...then use seconds`
			`queryLength := queryLengthDuration.Milliseconds()`
			`if !msecTime {`
			`queryLength = queryLength / 1000`
			`}`
			`queryRange := queryLength`
			`// bump by the offset so we don't look at empty ranges any time offset > ttl`
			`queryLength += offset`

			`// first/second order aggregations for queries defined in chunk 0...`
			`aggregates := strings.Split(chunks[0], "-")`
			`if len(aggregates) != 3 {`
			`return Retention{}, fmt.Errorf("invalid aggregation string: %q", chunks[0])`
			`}`

			`aggTimeDuration, err := convertDuration(aggregates[1])`
			`if err != nil {`
			`return Retention{}, fmt.Errorf("invalid aggregation time duration string: %q: %s", aggregates[1], err)`
			`}`
			`aggTime := aggTimeDuration.Milliseconds()`
			`if !msecTime {`
			`aggTime = aggTime / 1000`
			`}`

OpenTSDB migration to VictoriaMetrics (#1089) 2021-04-08 21:58:06 +02:00			`rowLengthDuration, err := convertDuration(chunks[1])`
			`if err != nil {`
			`return Retention{}, fmt.Errorf("invalid row length (first order) duration string: %q: %s", chunks[1], err)`
			`}`
			`// set length of each row in milliseconds, unless we aren't using millisecond time in OpenTSDB...then use seconds`
			`rowLength := rowLengthDuration.Milliseconds()`
			`if !msecTime {`
			`rowLength = rowLength / 1000`
			`}`
Address some edge cases in OpenTSDB importer and speed it up (#2019) * Simplify queries to OpenTSDB (and make them properly appear in OpenTSDB query stats) and also tweak defaults a bit * Convert seconds to milliseconds before writing to VictoriaMetrics and increase subquery size Signed-off-by: John Seekins <jseekins@datto.com> 2022-01-04 07:51:23 +01:00
			`var querySize int64`
			`/*`
			`The idea here is to ensure each individual query sent to OpenTSDB is at least`
			`large enough to ensure no single query requests essentially 0 data.`
			`*/`
			`if rowLength > aggTime {`
			`/*`
			`We'll look at 2x the row size for each query we perform`
			`This is a strange function, but the logic works like this:`
			`1. we discover the "number" of ranges we should split the time range into`
			`This is found with queryRange / (rowLength * 4)...kind of a percentage query`
			`2. we discover the actual size of each "chunk"`
			`This is second division step`
			`*/`
			`querySize = int64(queryRange / (queryRange / (rowLength * 4)))`
			`} else {`
			`/*`
			`Unless the aggTime (how long a range of data we're requesting per individual point)`
			`is greater than the row size. Then we'll need to use that to determine`
			`how big each individual query should be`
			`*/`
			`querySize = int64(queryRange / (queryRange / (aggTime * 4)))`
OpenTSDB migration to VictoriaMetrics (#1089) 2021-04-08 21:58:06 +02:00			`}`
Simplify queries to OpenTSDB for migration (#1809) * Simplify queries to OpenTSDB (and make them properly appear in OpenTSDB query stats) and also tweak defaults a bit Signed-off-by: John Seekins <jseekins@datto.com> * remove extraneous printlns Signed-off-by: John Seekins <jseekins@datto.com> * remove empty line Signed-off-by: John Seekins <jseekins@datto.com> * fix bug in offset calcuation and closer to working with simpler queries Signed-off-by: John Seekins <jseekins@datto.com> * fix boolean eval Signed-off-by: John Seekins <jseekins@datto.com> * fix casting and check for multiple series Signed-off-by: John Seekins <jseekins@datto.com> 2021-11-18 18:18:15 +01:00
OpenTSDB migration to VictoriaMetrics (#1089) 2021-04-08 21:58:06 +02:00			`var timeChunks []TimeRange`
			`var i int64`
Address some edge cases in OpenTSDB importer and speed it up (#2019) * Simplify queries to OpenTSDB (and make them properly appear in OpenTSDB query stats) and also tweak defaults a bit * Convert seconds to milliseconds before writing to VictoriaMetrics and increase subquery size Signed-off-by: John Seekins <jseekins@datto.com> 2022-01-04 07:51:23 +01:00			`for i = offset; i <= queryLength; i = i + querySize {`
			`timeChunks = append(timeChunks, TimeRange{Start: i + querySize, End: i})`
OpenTSDB migration to VictoriaMetrics (#1089) 2021-04-08 21:58:06 +02:00			`}`

			`ret := Retention{FirstOrder: aggregates[0],`
			`SecondOrder: aggregates[2],`
			`AggTime: aggregates[1],`
			`QueryRanges: timeChunks}`
			`return ret, nil`
			`}`

			`// This ensures any incoming data from OpenTSDB matches the Prometheus data model`
			`// https://prometheus.io/docs/concepts/data_model`
			`func modifyData(msg Metric, normalize bool) (Metric, error) {`
			`finalMsg := Metric{`
			`Metric: "", Tags: make(map[string]string),`
			`Timestamps: msg.Timestamps, Values: msg.Values,`
			`}`
			`// if the metric name has invalid characters, the data model says to drop it`
			`if !allowedFirstChar.MatchString(msg.Metric) {`
			`return Metric{}, fmt.Errorf("%s has a bad first character", msg.Metric)`
			`}`
			`name := msg.Metric`
			`// if normalization requested, lowercase the name`
			`if normalize {`
			`name = strings.ToLower(name)`
			`}`
			`/*`
			`replace bad characters in metric name with _ per the data model`
			`*/`
lib/promrelabel: properly replace `:` char with `_` in metric names when -usePromCompatibleNaming command-line flag is set This addresses https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3113#issuecomment-1275077071 comment from @johnseekins 2023-08-14 16:14:40 +02:00			`finalMsg.Metric = promrelabel.SanitizeMetricName(name)`
OpenTSDB migration to VictoriaMetrics (#1089) 2021-04-08 21:58:06 +02:00			`// replace bad characters in tag keys with _ per the data model`
			`for key, value := range msg.Tags {`
			`// if normalization requested, lowercase the key and value`
			`if normalize {`
			`key = strings.ToLower(key)`
			`value = strings.ToLower(value)`
			`}`
			`/*`
			`replace all explicitly bad characters with _`
			`*/`
lib/promrelabel: properly replace `:` char with `_` in metric names when -usePromCompatibleNaming command-line flag is set This addresses https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3113#issuecomment-1275077071 comment from @johnseekins 2023-08-14 16:14:40 +02:00			`key = promrelabel.SanitizeLabelName(key)`
OpenTSDB migration to VictoriaMetrics (#1089) 2021-04-08 21:58:06 +02:00			`// tags that start with __ are considered custom stats for internal prometheus stuff, we should drop them`
			`if !strings.HasPrefix(key, "__") {`
			`finalMsg.Tags[key] = value`
			`}`
			`}`
			`return finalMsg, nil`
			`}`