mirror of
https://github.com/VictoriaMetrics/VictoriaMetrics.git
synced 2025-01-22 00:00:39 +01:00
73 lines
1.4 KiB
Go
73 lines
1.4 KiB
Go
package matchr
|
|
|
|
import "strings"
|
|
|
|
// Soundex computes the Soundex phonetic representation of the input string. It
|
|
// attempts to encode homophones with the same characters. More information can
|
|
// be found at http://en.wikipedia.org/wiki/Soundex.
|
|
func Soundex(s1 string) string {
|
|
if len(s1) == 0 {
|
|
return ""
|
|
}
|
|
|
|
// we should work with all uppercase
|
|
s1 = strings.ToUpper(s1)
|
|
|
|
input := NewString(s1)
|
|
|
|
// the encoded value
|
|
enc := input.Slice(0, 1)
|
|
|
|
c := ""
|
|
prev := ""
|
|
hw := false
|
|
|
|
for i := 0; i < input.RuneCount(); i++ {
|
|
switch rune(input.At(i)) {
|
|
case 'B', 'F', 'P', 'V':
|
|
c = "1"
|
|
case 'C', 'G', 'J', 'K', 'Q', 'S', 'X', 'Z':
|
|
c = "2"
|
|
case 'D', 'T':
|
|
c = "3"
|
|
case 'L':
|
|
c = "4"
|
|
case 'M', 'N':
|
|
c = "5"
|
|
case 'R':
|
|
c = "6"
|
|
case 'H', 'W':
|
|
hw = true
|
|
default:
|
|
c = ""
|
|
}
|
|
|
|
// don't encode the first position, but we need its code value
|
|
// to prevent repeats
|
|
if c != "" && c != prev && i > 0 {
|
|
// if the next encoded digit is different, we can add it right away
|
|
// if it is the same, though, it must not have been preceded
|
|
// by an 'H' or a 'W'
|
|
if enc[len(enc)-1:len(enc)] != c || !hw {
|
|
enc = enc + c
|
|
}
|
|
|
|
// we're done when we reach four encoded characters
|
|
if len(enc) == 4 {
|
|
break
|
|
}
|
|
}
|
|
|
|
prev = c
|
|
hw = false
|
|
}
|
|
|
|
// if we've fallen short of 4 "real" encoded characters,
|
|
// it gets padded with zeros
|
|
for len(enc) < 4 {
|
|
enc = enc + "0"
|
|
}
|
|
|
|
return enc
|
|
}
|