Fuzzy Logic Matching - Excel code in VBA that might apply to Access VBA (1 Viewer)

Rx_

Nothing In Moderation
Local time
Today, 09:06
Joined
Oct 22, 2009
Messages
2,803
There is a lot in common with Excel VBA and Access VBA. Some of the Excel samples are hard-coded to the data in Excel. However, code can be modified for Access. The Code for Fuzzy matching. Was researching something for a solution and sharing some results.

This one is designed for two columns of numbers (as strings)
Code:
[FONT=Courier New]Sub FuzzyMatch()[/FONT]
[FONT=Courier New]Dim L, L1, L2, M, SC, T, R As Integer[/FONT]
[FONT=Courier New]Dim Fstr, Sstr As String[/FONT]
[FONT=Courier New]For R = 1 To Range("A65536").End(xlUp).Row[/FONT] [FONT=Courier][SIZE=2]L = 0: M = 0: SC = 1[/SIZE][/FONT]
[SIZE=2][FONT=Courier]Fstr = UCase(Cells(R, 1).Value)[/FONT][/SIZE]
[SIZE=2][FONT=Courier]Sstr = UCase(Cells(R, 2).Value)[/FONT][/SIZE]
[SIZE=2][FONT=Courier]L1 = Len(Fstr)[/FONT][/SIZE]
[SIZE=2][FONT=Courier]L2 = Len(Sstr)[/FONT][/SIZE]
[SIZE=2][FONT=Courier]Do While L < L1[/FONT][/SIZE][FONT=Courier][SIZE=2]L = L + 1[/SIZE][/FONT]
[SIZE=2][FONT=Courier]For T = SC To L1[/FONT][/SIZE]
[SIZE=2][FONT=Courier]If Mid$(Sstr, L, 1) <> Mid$(Fstr, T, 1) Then GoTo RS[/FONT][/SIZE]
[SIZE=2][FONT=Courier]M = M + 1[/FONT][/SIZE]
[SIZE=2][FONT=Courier]SC = T[/FONT][/SIZE]
[SIZE=2][FONT=Courier]T = L1 + 1[/FONT][/SIZE][FONT=Courier][SIZE=2]RS:[/SIZE][/FONT][FONT=Courier][SIZE=2]Next T[/SIZE][/FONT][FONT=Courier][SIZE=2]Loop[/SIZE][/FONT][FONT=Courier][SIZE=2]Cells(R, 3).Value = M / L1[/SIZE][/FONT]
[SIZE=2][FONT=Courier]Next R[/FONT][/SIZE]
[SIZE=2][FONT=Courier]End Sub[/FONT][/SIZE]

This is more of a VBA example passing strings into a formula. The example given was PercentageMatch =Fuzzy("I B M","The IBM Corporation")
Code:
[FONT=Courier][SIZE=2]Dim TopMatch As Integer[/SIZE][/FONT]
[SIZE=2][FONT=Courier]Dim strCompare As String[/FONT][/SIZE]
[SIZE=2][FONT=Courier]Function Fuzzy(strIn1 As String, strIn2 As String) As Single[/FONT][/SIZE][FONT=Courier][SIZE=2]Dim L1 As Integer[/SIZE][/FONT]
[SIZE=2][FONT=Courier]Dim In1Mask(1 To 24) As Long 'strIn1 is 24 characters max[/FONT][/SIZE]
[SIZE=2][FONT=Courier]Dim iCh As Integer[/FONT][/SIZE]
[SIZE=2][FONT=Courier]Dim N As Long[/FONT][/SIZE]
[SIZE=2][FONT=Courier]Dim strTry As String[/FONT][/SIZE]
[SIZE=2][FONT=Courier]Dim strTest As String[/FONT][/SIZE]
 
[SIZE=2][FONT=Courier]TopMatch = 0[/FONT][/SIZE]
[SIZE=2][FONT=Courier]L1 = Len(strIn1)[/FONT][/SIZE]
[SIZE=2][FONT=Courier]strTest = UCase(strIn1)[/FONT][/SIZE]
[SIZE=2][FONT=Courier]strCompare = UCase(strIn2)[/FONT][/SIZE]
 
[SIZE=2][FONT=Courier]For iCh = 1 To L1[/FONT][/SIZE][FONT=Courier][SIZE=2]In1Mask(iCh) = 2 ^ iCh[/SIZE][/FONT][FONT=Courier][SIZE=2]Next iCh[/SIZE][/FONT]
 
[SIZE=2][FONT=Courier]'Loop thru all ordered combinations of characters in strIn1[/FONT][/SIZE]
[SIZE=2][FONT=Courier]For N = 2 ^ (L1 + 1) - 1 To 1 Step -1[/FONT][/SIZE][FONT=Courier][SIZE=2]strTry = ""[/SIZE][/FONT]
[SIZE=2][FONT=Courier]For iCh = 1 To L1[/FONT][/SIZE][FONT=Courier][SIZE=2]If In1Mask(iCh) And N Then[/SIZE][/FONT][FONT=Courier][SIZE=2]strTry = strTry & Mid(strTest, iCh, 1)[/SIZE][/FONT][FONT=Courier][SIZE=2]End If[/SIZE][/FONT][FONT=Courier][SIZE=2]Next iCh[/SIZE][/FONT]
[SIZE=2][FONT=Courier]If Len(strTry) > TopMatch Then TestString strTry[/FONT][/SIZE][FONT=Courier][SIZE=2]Next N[/SIZE][/FONT]
 
[SIZE=2][FONT=Courier]Fuzzy = TopMatch / CSng(L1)[/FONT][/SIZE][FONT=Courier][SIZE=2]End Function[/SIZE][/FONT]
[SIZE=2][FONT=Courier]Sub TestString(strIn As String)[/FONT][/SIZE]
[FONT=Courier][SIZE=2]Dim L As Integer[/SIZE][/FONT]
[SIZE=2][FONT=Courier]Dim strTry As String[/FONT][/SIZE]
[SIZE=2][FONT=Courier]Dim iCh As Integer[/FONT][/SIZE]
 
[SIZE=2][FONT=Courier]L = Len(strIn)[/FONT][/SIZE]
[SIZE=2][FONT=Courier]If L <= TopMatch Then Exit Sub[/FONT][/SIZE]
 
[SIZE=2][FONT=Courier]strTry = "*"[/FONT][/SIZE]
 
[SIZE=2][FONT=Courier]For iCh = 1 To L[/FONT][/SIZE]
[SIZE=2][FONT=Courier]strTry = strTry & Mid(strIn, iCh, 1) & "*"[/FONT][/SIZE]
[SIZE=2][FONT=Courier]Next iCh[/FONT][/SIZE]
 
[SIZE=2][FONT=Courier]If strCompare Like strTry Then[/FONT][/SIZE]
[SIZE=2][FONT=Courier]If L > TopMatch Then TopMatch = L[/FONT][/SIZE]
[SIZE=2][FONT=Courier]End If[/FONT][/SIZE]
[SIZE=2][FONT=Courier]End Sub[/FONT][/SIZE]

Note; I reviewed the code above and edited same and tested it with Access 2010
Here is the revised/formatted vba (separate module)

Code:
[COLOR="Sienna"]Option Compare Database
Option Explicit

Dim TopMatch As Integer
Dim strCompare As String

Function Fuzzy(strIn1 As String, strIn2 As String) As Single

    Dim L1 As Integer
    Dim In1Mask(1 To 24) As Long    'strIn1 is 24 characters max
    Dim iCh As Integer
    Dim N As Long
    Dim strTry As String
    Dim strTest As String

    TopMatch = 0
    L1 = Len(strIn1)
    strTest = UCase(strIn1)
    strCompare = UCase(strIn2)

    For iCh = 1 To L1
        In1Mask(iCh) = 2 ^ iCh
    Next iCh

    'Loop thru all ordered combinations of characters in strIn1
    For N = 2 ^ (L1 + 1) - 1 To 1 Step -1
        strTry = ""
        For iCh = 1 To L1
            If In1Mask(iCh) And N Then
                strTry = strTry & Mid(strTest, iCh, 1)
            End If
        Next iCh
        If Len(strTry) > TopMatch Then TestString strTry
    Next N

    Fuzzy = TopMatch / CSng(L1)
End Function
Sub TestString(strIn As String)
    Dim L As Integer
    Dim strTry As String
    Dim iCh As Integer

    L = Len(strIn)
    If L <= TopMatch Then Exit Sub

    strTry = "*"

    For iCh = 1 To L
        strTry = strTry & Mid(strIn, iCh, 1) & "*"
    Next iCh

    If strCompare Like strTry Then
        If L > TopMatch Then TopMatch = L
    End If
End Sub[/COLOR]

This one allows the search to operate on a word by word basis with garbage-characters removed and with extra words discarded (optional).

Code:
' Compare two phrases and return a similarity value (between 0 and 100).
'
' Arguments:
'
' 1. Phrase1        String; any text string
' 2. Phrase2        String; any text string
' 3. StripVowels    Optional to strip all vowels from the phrases
' 4. DiscardExtra   Optional to discard any unmatched words
'
 
    'local variables
    Dim lsWord1() As String
    Dim lsWord2() As String
    Dim ldMatch() As Double
    Dim ldCur As Double
    Dim ldMax As Double
    Dim liCnt1 As Integer
    Dim liCnt2 As Integer
    Dim liCnt3 As Integer
    Dim lbMatched() As Boolean
    Dim lsNew As String
    Dim lsChr As String
    Dim lsKeep As String
 
    'set default value as failure
    FuzzyMatchByWord = 0
 
    'create list of characters to keep
    lsKeep = "BCDFGHJKLMNPQRSTVWXYZ0123456789 "
    If Not lbStripVowels Then
        lsKeep = lsKeep & "AEIOU"
    End If
 
    'clean up phrases by stripping undesired characters
    'phrase1
    lsPhrase1 = Trim$(UCase$(lsPhrase1))
    lsNew = ""
    For liCnt1 = 1 To Len(lsPhrase1)
        lsChr = Mid$(lsPhrase1, liCnt1, 1)
        If InStr(lsKeep, lsChr) <> 0 Then
            lsNew = lsNew & lsChr
        End If
    Next
    lsPhrase1 = lsNew
    lsPhrase1 = Replace(lsPhrase1, "  ", " ")
    lsWord1 = Split(lsPhrase1, " ")
    If UBound(lsWord1) = -1 Then
        Exit Function
    End If
    ReDim ldMatch(UBound(lsWord1))
    'phrase2
    lsPhrase2 = Trim$(UCase$(lsPhrase2))
    lsNew = ""
    For liCnt1 = 1 To Len(lsPhrase2)
        lsChr = Mid$(lsPhrase2, liCnt1, 1)
        If InStr(lsKeep, lsChr) <> 0 Then
            lsNew = lsNew & lsChr
        End If
    Next
    lsPhrase2 = lsNew
    lsPhrase2 = Replace(lsPhrase2, "  ", " ")
    lsWord2 = Split(lsPhrase2, " ")
    If UBound(lsWord2) = -1 Then
        Exit Function
    End If
    ReDim lbMatched(UBound(lsWord2))
 
    'exit if empty
    If Trim$(lsPhrase1) = "" Or Trim$(lsPhrase2) = "" Then
        Exit Function
    End If
 
    'compare words in each phrase
    For liCnt1 = 0 To UBound(lsWord1)
        ldMax = 0
        For liCnt2 = 0 To UBound(lsWord2)
            If Not lbMatched(liCnt2) Then
                ldCur = FuzzyMatch(lsWord1(liCnt1), lsWord2(liCnt2))
                If ldCur > ldMax Then
                    liCnt3 = liCnt2
                    ldMax = ldCur
                End If
            End If
        Next
        lbMatched(liCnt3) = True
        ldMatch(liCnt1) = ldMax
    Next
 
    'discard extra words
    ldMax = 0
    For liCnt1 = 0 To UBound(ldMatch)
        ldMax = ldMax + ldMatch(liCnt1)
    Next
    If lbDiscardExtra Then
        liCnt2 = 0
        For liCnt1 = 0 To UBound(lbMatched)
            If lbMatched(liCnt1) Then
                liCnt2 = liCnt2 + 1
            End If
        Next
    Else
        liCnt2 = UBound(lsWord2) + 1
    End If
 
    'return overall similarity
    FuzzyMatchByWord = 100 * (ldMax / liCnt2)
 
End Function
 
Function FuzzyMatch(Fstr As String, Sstr As String) As Double
 
'
' Code sourced from: http://www.mrexcel.com/pc07.shtml
' Credited to: Ed Acosta
' Modified: Joe Stanton
'
 
    Dim L, L1, L2, M, SC, T, R As Integer
 
    L = 0
    M = 0
    SC = 1
 
    L1 = Len(Fstr)
    L2 = Len(Sstr)
 
    Do While L < L1
        L = L + 1
        For T = SC To L1
            If Mid$(Sstr, L, 1) = Mid$(Fstr, T, 1) Then
                M = M + 1
                SC = T
                T = L1 + 1
            End If
        Next T
    Loop
 
    If L1 = 0 Then
        FuzzyMatch = 0
    Else
        FuzzyMatch = M / L1
    End If
 
End Function
 
Last edited by a moderator:

Users who are viewing this thread

Top Bottom