mirror of
https://github.com/VictoriaMetrics/VictoriaMetrics.git
synced 2025-01-22 00:00:39 +01:00
57 lines
1.2 KiB
Go
57 lines
1.2 KiB
Go
|
package matchr
|
||
|
|
||
|
// OSA computes the Optimal String Alignment distance between two
|
||
|
// strings. The returned value - distance - is the number of insertions,
|
||
|
// deletions, substitutions, and transpositions it takes to transform one
|
||
|
// string (s1) into another (s2). Each step in the transformation "costs"
|
||
|
// one distance point. It is similar to Damerau-Levenshtein, but is simpler
|
||
|
// because it does not allow multiple edits on any substring.
|
||
|
func OSA(s1 string, s2 string) (distance int) {
|
||
|
// index by code point, not byte
|
||
|
r1 := []rune(s1)
|
||
|
r2 := []rune(s2)
|
||
|
|
||
|
rows := len(r1) + 1
|
||
|
cols := len(r2) + 1
|
||
|
|
||
|
var i, j, d1, d2, d3, d_now, cost int
|
||
|
|
||
|
dist := make([]int, rows*cols)
|
||
|
|
||
|
for i = 0; i < rows; i++ {
|
||
|
dist[i*cols] = i
|
||
|
}
|
||
|
|
||
|
for j = 0; j < cols; j++ {
|
||
|
dist[j] = j
|
||
|
}
|
||
|
|
||
|
for i = 1; i < rows; i++ {
|
||
|
for j = 1; j < cols; j++ {
|
||
|
if r1[i-1] == r2[j-1] {
|
||
|
cost = 0
|
||
|
} else {
|
||
|
cost = 1
|
||
|
}
|
||
|
|
||
|
d1 = dist[((i-1)*cols)+j] + 1
|
||
|
d2 = dist[(i*cols)+(j-1)] + 1
|
||
|
d3 = dist[((i-1)*cols)+(j-1)] + cost
|
||
|
|
||
|
d_now = min(d1, min(d2, d3))
|
||
|
|
||
|
if i > 2 && j > 2 && r1[i-1] == r2[j-2] &&
|
||
|
r1[i-2] == r2[j-1] {
|
||
|
d1 = dist[((i-2)*cols)+(j-2)] + cost
|
||
|
d_now = min(d_now, d1)
|
||
|
}
|
||
|
|
||
|
dist[(i*cols)+j] = d_now
|
||
|
}
|
||
|
}
|
||
|
|
||
|
distance = dist[(cols*rows)-1]
|
||
|
|
||
|
return
|
||
|
}
|