Friday, 17 April 2015

String Sound Matching in Vb.net

Public Class SoundexComparison

        Public Shared Function GetSoundexCode(ByVal word As String) As String
            word = word.ToUpper()

            ' Keep the first character of the word.
            Dim SoundexCode As String = word.Substring(0, 1)

            Dim i As Integer
            For i = 1 To word.Length - 1

                ' Transform a single character.
                Dim Character As String = Transform(word.Substring(i, 1))

                ' Decide whether to append this character code,
                ' depending on the previous sound.
                Select Case word.Substring(i - 1, 1)
                    Case "H", "W"
                        ' Ignore
                    Case "A", "E", "I", "O", "U"
                        ' Characters separated by a vowel represent distinct
                        ' sounds, and should be encoded.
                        SoundexCode &= Character
                    Case Else
                        If SoundexCode.Length = 1 Then
                            ' We only have the first character, which is never
                            ' encoded. However, we need to check whether it is
                            ' the same phonetically as the next character.
                            If Transform(word.Substring(0, 1)) <> Character Then
                                SoundexCode &= Character
                            End If
                        Else
                            ' Only add if it does not represent a duplicated
                            ' sound.
                            If Transform(word.Substring(i - 1, 1)) <> _
                              Character Then
                                SoundexCode &= Character
                            End If
                        End If
                End Select

            Next

            ' A SoundEx code must be exactly 4 characters long.
            ' Pad it with zeroes in case the code is too short.
            SoundexCode = SoundexCode.PadRight(4, "0"c)

            ' Truncate the code if it is too long.
            Return SoundexCode.Substring(0, 4)
        End Function

        Public Shared Function Transform(ByVal character As String) As String
            ' Map the character to a SoundEx code.
            Select Case character
                Case "B", "F", "P", "V"
                    Return "1"
                Case "C", "G", "J", "K", "Q", "S", "X", "Z"
                    Return "2"
                Case "D", "T"
                    Return "3"
                Case "L"
                    Return "4"
                Case "M", "N"
                    Return "5"
                Case "R"
                    Return "6"
                Case Else
                    ' All other characters are ignored.
                    Return String.Empty
            End Select
        End Function
   
    End Class

No comments:

Post a Comment