2022-05-25 20:49:12 +02:00
package smetrics
import (
"strings"
)
// The Soundex encoding. It is a phonetic algorithm that considers how the words sound in English. Soundex maps a string to a 4-byte code consisting of the first letter of the original string and three numbers. Strings that sound similar should map to the same code.
func Soundex ( s string ) string {
2024-01-16 21:48:46 +01:00
b := strings . Builder { }
b . Grow ( 4 )
2022-05-25 20:49:12 +02:00
p := s [ 0 ]
2024-01-16 21:48:46 +01:00
if p <= 'z' && p >= 'a' {
p -= 32 // convert to uppercase
}
b . WriteByte ( p )
n := 0
for i := 1 ; i < len ( s ) ; i ++ {
2022-05-25 20:49:12 +02:00
c := s [ i ]
2024-01-16 21:48:46 +01:00
if c <= 'z' && c >= 'a' {
c -= 32 // convert to uppercase
} else if c < 'A' || c > 'Z' {
continue
}
if c == p {
2022-05-25 20:49:12 +02:00
continue
}
p = c
2024-01-16 21:48:46 +01:00
switch c {
case 'B' , 'P' , 'F' , 'V' :
c = '1'
case 'C' , 'S' , 'K' , 'G' , 'J' , 'Q' , 'X' , 'Z' :
c = '2'
case 'D' , 'T' :
c = '3'
case 'L' :
c = '4'
case 'M' , 'N' :
c = '5'
case 'R' :
c = '6'
default :
continue
}
b . WriteByte ( c )
n ++
if n == 3 {
break
2022-05-25 20:49:12 +02:00
}
}
2024-01-16 21:48:46 +01:00
for i := n ; i < 3 ; i ++ {
b . WriteByte ( '0' )
2022-05-25 20:49:12 +02:00
}
2024-01-16 21:48:46 +01:00
return b . String ( )
2022-05-25 20:49:12 +02:00
}