VictoriaMetrics/lib/metricsql/lexer.go
2019-12-25 22:03:59 +02:00

493 lines
8.6 KiB
Go

package metricsql
import (
"fmt"
"strconv"
"strings"
)
type lexer struct {
// Token contains the currently parsed token.
// An empty token means EOF.
Token string
prevTokens []string
nextTokens []string
sOrig string
sTail string
err error
}
func (lex *lexer) Context() string {
return fmt.Sprintf("%s%s", lex.Token, lex.sTail)
}
func (lex *lexer) Init(s string) {
lex.Token = ""
lex.prevTokens = nil
lex.nextTokens = nil
lex.err = nil
lex.sOrig = s
lex.sTail = s
}
func (lex *lexer) Next() error {
if lex.err != nil {
return lex.err
}
lex.prevTokens = append(lex.prevTokens, lex.Token)
if len(lex.nextTokens) > 0 {
lex.Token = lex.nextTokens[len(lex.nextTokens)-1]
lex.nextTokens = lex.nextTokens[:len(lex.nextTokens)-1]
return nil
}
token, err := lex.next()
if err != nil {
lex.err = err
return err
}
lex.Token = token
return nil
}
func (lex *lexer) next() (string, error) {
again:
// Skip whitespace
s := lex.sTail
i := 0
for i < len(s) && isSpaceChar(s[i]) {
i++
}
s = s[i:]
lex.sTail = s
if len(s) == 0 {
return "", nil
}
var token string
var err error
switch s[0] {
case '#':
// Skip comment till the end of string
s = s[1:]
n := strings.IndexByte(s, '\n')
if n < 0 {
return "", nil
}
lex.sTail = s[n+1:]
goto again
case '{', '}', '[', ']', '(', ')', ',':
token = s[:1]
goto tokenFoundLabel
}
if isIdentPrefix(s) {
token = scanIdent(s)
goto tokenFoundLabel
}
if isStringPrefix(s) {
token, err = scanString(s)
if err != nil {
return "", err
}
goto tokenFoundLabel
}
if n := scanBinaryOpPrefix(s); n > 0 {
token = s[:n]
goto tokenFoundLabel
}
if n := scanTagFilterOpPrefix(s); n > 0 {
token = s[:n]
goto tokenFoundLabel
}
if n := scanDuration(s, false); n > 0 {
token = s[:n]
goto tokenFoundLabel
}
if isPositiveNumberPrefix(s) {
token, err = scanPositiveNumber(s)
if err != nil {
return "", err
}
goto tokenFoundLabel
}
return "", fmt.Errorf("cannot recognize %q", s)
tokenFoundLabel:
lex.sTail = s[len(token):]
return token, nil
}
func scanString(s string) (string, error) {
if len(s) < 2 {
return "", fmt.Errorf("cannot find end of string in %q", s)
}
quote := s[0]
i := 1
for {
n := strings.IndexByte(s[i:], quote)
if n < 0 {
return "", fmt.Errorf("cannot find closing quote %ch for the string %q", quote, s)
}
i += n
bs := 0
for bs < i && s[i-bs-1] == '\\' {
bs++
}
if bs%2 == 0 {
token := s[:i+1]
return token, nil
}
i++
}
}
func scanPositiveNumber(s string) (string, error) {
// Scan integer part. It may be empty if fractional part exists.
i := 0
for i < len(s) && isDecimalChar(s[i]) {
i++
}
if i == len(s) {
if i == 0 {
return "", fmt.Errorf("number cannot be empty")
}
return s, nil
}
if s[i] != '.' && s[i] != 'e' && s[i] != 'E' {
return s[:i], nil
}
if s[i] == '.' {
// Scan fractional part. It cannot be empty.
i++
j := i
for j < len(s) && isDecimalChar(s[j]) {
j++
}
if j == i {
return "", fmt.Errorf("missing fractional part in %q", s)
}
i = j
if i == len(s) {
return s, nil
}
}
if s[i] != 'e' && s[i] != 'E' {
return s[:i], nil
}
i++
// Scan exponent part.
if i == len(s) {
return "", fmt.Errorf("missing exponent part in %q", s)
}
if s[i] == '-' || s[i] == '+' {
i++
}
j := i
for j < len(s) && isDecimalChar(s[j]) {
j++
}
if j == i {
return "", fmt.Errorf("missing exponent part in %q", s)
}
return s[:j], nil
}
func scanIdent(s string) string {
i := 0
for i < len(s) {
if isIdentChar(s[i]) {
i++
continue
}
if s[i] != '\\' {
break
}
// Do not verify the next char, since it is escaped.
i += 2
if i > len(s) {
i--
break
}
}
if i == 0 {
panic("BUG: scanIdent couldn't find a single ident char; make sure isIdentPrefix called before scanIdent")
}
return s[:i]
}
func unescapeIdent(s string) string {
n := strings.IndexByte(s, '\\')
if n < 0 {
return s
}
dst := make([]byte, 0, len(s))
for {
dst = append(dst, s[:n]...)
s = s[n+1:]
if len(s) == 0 {
return string(dst)
}
if s[0] == 'x' && len(s) >= 3 {
h1 := fromHex(s[1])
h2 := fromHex(s[2])
if h1 >= 0 && h2 >= 0 {
dst = append(dst, byte((h1<<4)|h2))
s = s[3:]
} else {
dst = append(dst, s[0])
s = s[1:]
}
} else {
dst = append(dst, s[0])
s = s[1:]
}
n = strings.IndexByte(s, '\\')
if n < 0 {
dst = append(dst, s...)
return string(dst)
}
}
}
func fromHex(ch byte) int {
if ch >= '0' && ch <= '9' {
return int(ch - '0')
}
if ch >= 'a' && ch <= 'f' {
return int((ch - 'a') + 10)
}
if ch >= 'A' && ch <= 'F' {
return int((ch - 'A') + 10)
}
return -1
}
func toHex(n byte) byte {
if n < 10 {
return '0' + n
}
return 'a' + (n - 10)
}
func appendEscapedIdent(dst []byte, s string) []byte {
for i := 0; i < len(s); i++ {
ch := s[i]
if isIdentChar(ch) {
if i == 0 && !isFirstIdentChar(ch) {
// hex-encode the first char
dst = append(dst, '\\', 'x', toHex(ch>>4), toHex(ch&0xf))
} else {
dst = append(dst, ch)
}
} else if ch >= 0x20 && ch < 0x7f {
// Leave ASCII printable chars as is
dst = append(dst, '\\', ch)
} else {
// hex-encode non-printable chars
dst = append(dst, '\\', 'x', toHex(ch>>4), toHex(ch&0xf))
}
}
return dst
}
func (lex *lexer) Prev() {
lex.nextTokens = append(lex.nextTokens, lex.Token)
lex.Token = lex.prevTokens[len(lex.prevTokens)-1]
lex.prevTokens = lex.prevTokens[:len(lex.prevTokens)-1]
}
func isEOF(s string) bool {
return len(s) == 0
}
func scanTagFilterOpPrefix(s string) int {
if len(s) >= 2 {
switch s[:2] {
case "=~", "!~", "!=":
return 2
}
}
if len(s) >= 1 {
if s[0] == '=' {
return 1
}
}
return -1
}
func isInfOrNaN(s string) bool {
if len(s) != 3 {
return false
}
s = strings.ToLower(s)
return s == "inf" || s == "nan"
}
func isOffset(s string) bool {
s = strings.ToLower(s)
return s == "offset"
}
func isStringPrefix(s string) bool {
if len(s) == 0 {
return false
}
switch s[0] {
// See https://prometheus.io/docs/prometheus/latest/querying/basics/#string-literals
case '"', '\'', '`':
return true
default:
return false
}
}
func isPositiveNumberPrefix(s string) bool {
if len(s) == 0 {
return false
}
if isDecimalChar(s[0]) {
return true
}
// Check for .234 numbers
if s[0] != '.' || len(s) < 2 {
return false
}
return isDecimalChar(s[1])
}
func isPositiveDuration(s string) bool {
n := scanDuration(s, false)
return n == len(s)
}
// PositiveDurationValue returns the duration in milliseconds for the given s
// and the given step.
func PositiveDurationValue(s string, step int64) (int64, error) {
d, err := DurationValue(s, step)
if err != nil {
return 0, err
}
if d < 0 {
return 0, fmt.Errorf("duration cannot be negative; got %q", s)
}
return d, nil
}
// DurationValue returns the duration in milliseconds for the given s
// and the given step.
//
// The returned duration value can be negative.
func DurationValue(s string, step int64) (int64, error) {
n := scanDuration(s, true)
if n != len(s) {
return 0, fmt.Errorf("cannot parse duration %q", s)
}
f, err := strconv.ParseFloat(s[:len(s)-1], 64)
if err != nil {
return 0, fmt.Errorf("cannot parse duration %q: %s", s, err)
}
var mp float64
switch s[len(s)-1] {
case 's':
mp = 1
case 'm':
mp = 60
case 'h':
mp = 60 * 60
case 'd':
mp = 24 * 60 * 60
case 'w':
mp = 7 * 24 * 60 * 60
case 'y':
mp = 365 * 24 * 60 * 60
case 'i':
mp = float64(step) / 1e3
default:
return 0, fmt.Errorf("invalid duration suffix in %q", s)
}
return int64(mp * f * 1e3), nil
}
func scanDuration(s string, canBeNegative bool) int {
if len(s) == 0 {
return -1
}
i := 0
if s[0] == '-' && canBeNegative {
i++
}
for i < len(s) && isDecimalChar(s[i]) {
i++
}
if i == 0 || i == len(s) {
return -1
}
if s[i] == '.' {
j := i
i++
for i < len(s) && isDecimalChar(s[i]) {
i++
}
if i == j || i == len(s) {
return -1
}
}
switch s[i] {
case 's', 'm', 'h', 'd', 'w', 'y', 'i':
return i + 1
default:
return -1
}
}
func isDecimalChar(ch byte) bool {
return ch >= '0' && ch <= '9'
}
func isIdentPrefix(s string) bool {
if len(s) == 0 {
return false
}
if s[0] == '\\' {
// Assume this is an escape char for the next char.
return true
}
return isFirstIdentChar(s[0])
}
func isFirstIdentChar(ch byte) bool {
if ch >= 'a' && ch <= 'z' || ch >= 'A' && ch <= 'Z' {
return true
}
return ch == '_' || ch == ':'
}
func isIdentChar(ch byte) bool {
if isFirstIdentChar(ch) {
return true
}
return isDecimalChar(ch) || ch == '.'
}
func isSpaceChar(ch byte) bool {
switch ch {
case ' ', '\t', '\n', '\v', '\f', '\r':
return true
default:
return false
}
}