mirror of
https://github.com/VictoriaMetrics/VictoriaMetrics.git
synced 2025-01-22 00:00:39 +01:00
113 lines
2.8 KiB
Go
113 lines
2.8 KiB
Go
|
package matchr
|
||
|
|
||
|
// DamerauLevenshtein computes the Damerau-Levenshtein distance between two
|
||
|
// strings. The returned value - distance - is the number of insertions,
|
||
|
// deletions, substitutions, and transpositions it takes to transform one
|
||
|
// string (s1) into another (s2). Each step in the transformation "costs"
|
||
|
// one distance point. It is similar to the Optimal String Alignment,
|
||
|
// algorithm, but is more complex because it allows multiple edits on
|
||
|
// substrings.
|
||
|
//
|
||
|
// This implementation is based off of the one found on Wikipedia at
|
||
|
// http://en.wikipedia.org/wiki/Damerau%E2%80%93Levenshtein_distance#Distance_with_adjacent_transpositions
|
||
|
// as well as KevinStern's Java implementation found at
|
||
|
// https://github.com/KevinStern/software-and-algorithms.
|
||
|
func DamerauLevenshtein(s1 string, s2 string) (distance int) {
|
||
|
// index by code point, not byte
|
||
|
r1 := []rune(s1)
|
||
|
r2 := []rune(s2)
|
||
|
|
||
|
// the maximum possible distance
|
||
|
inf := len(r1) + len(r2)
|
||
|
|
||
|
// if one string is blank, we needs insertions
|
||
|
// for all characters in the other one
|
||
|
if len(r1) == 0 {
|
||
|
return len(r2)
|
||
|
}
|
||
|
|
||
|
if len(r2) == 0 {
|
||
|
return len(r1)
|
||
|
}
|
||
|
|
||
|
// construct the edit-tracking matrix
|
||
|
matrix := make([][]int, len(r1))
|
||
|
for i := range matrix {
|
||
|
matrix[i] = make([]int, len(r2))
|
||
|
}
|
||
|
|
||
|
// seen characters
|
||
|
seenRunes := make(map[rune]int)
|
||
|
|
||
|
if r1[0] != r2[0] {
|
||
|
matrix[0][0] = 1
|
||
|
}
|
||
|
|
||
|
seenRunes[r1[0]] = 0
|
||
|
for i := 1; i < len(r1); i++ {
|
||
|
deleteDist := matrix[i-1][0] + 1
|
||
|
insertDist := (i+1)*1 + 1
|
||
|
var matchDist int
|
||
|
if r1[i] == r2[0] {
|
||
|
matchDist = i
|
||
|
} else {
|
||
|
matchDist = i + 1
|
||
|
}
|
||
|
matrix[i][0] = min(min(deleteDist, insertDist), matchDist)
|
||
|
}
|
||
|
|
||
|
for j := 1; j < len(r2); j++ {
|
||
|
deleteDist := (j + 1) * 2
|
||
|
insertDist := matrix[0][j-1] + 1
|
||
|
var matchDist int
|
||
|
if r1[0] == r2[j] {
|
||
|
matchDist = j
|
||
|
} else {
|
||
|
matchDist = j + 1
|
||
|
}
|
||
|
|
||
|
matrix[0][j] = min(min(deleteDist, insertDist), matchDist)
|
||
|
}
|
||
|
|
||
|
for i := 1; i < len(r1); i++ {
|
||
|
var maxSrcMatchIndex int
|
||
|
if r1[i] == r2[0] {
|
||
|
maxSrcMatchIndex = 0
|
||
|
} else {
|
||
|
maxSrcMatchIndex = -1
|
||
|
}
|
||
|
|
||
|
for j := 1; j < len(r2); j++ {
|
||
|
swapIndex, ok := seenRunes[r2[j]]
|
||
|
jSwap := maxSrcMatchIndex
|
||
|
deleteDist := matrix[i-1][j] + 1
|
||
|
insertDist := matrix[i][j-1] + 1
|
||
|
matchDist := matrix[i-1][j-1]
|
||
|
if r1[i] != r2[j] {
|
||
|
matchDist += 1
|
||
|
} else {
|
||
|
maxSrcMatchIndex = j
|
||
|
}
|
||
|
|
||
|
// for transpositions
|
||
|
var swapDist int
|
||
|
if ok && jSwap != -1 {
|
||
|
iSwap := swapIndex
|
||
|
var preSwapCost int
|
||
|
if iSwap == 0 && jSwap == 0 {
|
||
|
preSwapCost = 0
|
||
|
} else {
|
||
|
preSwapCost = matrix[maxI(0, iSwap-1)][maxI(0, jSwap-1)]
|
||
|
}
|
||
|
swapDist = i + j + preSwapCost - iSwap - jSwap - 1
|
||
|
} else {
|
||
|
swapDist = inf
|
||
|
}
|
||
|
matrix[i][j] = min(min(min(deleteDist, insertDist), matchDist), swapDist)
|
||
|
}
|
||
|
seenRunes[r1[i]] = i
|
||
|
}
|
||
|
|
||
|
return matrix[len(r1)-1][len(r2)-1]
|
||
|
}
|