0 people like it.

String percentual similarity using Levenshtein Distance

String percentual similarity using Levenshtein Distance, as described in https://en.wikipedia.org/wiki/Levenshtein_distance.

 1: 
 2: 
 3: 
 4: 
 5: 
 6: 
 7: 
 8: 
 9: 
10: 
11: 
12: 
13: 
14: 
15: 
16: 
17: 
18: 
19: 
20: 
21: 
22: 
23: 
24: 
25: 
26: 
27: 
28: 
29: 
30: 
(*
* https://en.wikipedia.org/wiki/Levenshtein_distance
*)

let levenshteinDistance source target =
    let sourceLen = String.length source
    let targetLen = String.length target
    let distance = Array2D.zeroCreate<int> (sourceLen + 1) (targetLen + 1)
    Array2D.iteri (fun i _ _ -> distance.[i,0] <- i) distance |> ignore
    Array2D.iteri (fun _ j _ -> distance.[0,j] <- j) distance |> ignore
    for i in 1..sourceLen do
         for j in 1..targetLen do
            let cost = if target.[j - 1] = source.[i - 1] then 0
                       else 1
            distance.[i,j] <- min (min (distance.[i - 1, j] + 1) (distance.[i, j - 1] + 1))
                                  distance.[i - 1, j - 1] + cost
    distance.[sourceLen, targetLen]

let similarity source target =
    let dist = levenshteinDistance source target
    (1.0 - (float dist / float (max source.Length target.Length))) * 100.0

let reportSimilarity str1 str2 =
    sprintf "'%s' is similar to '%s' for %.2f%%" str1 str2 (similarity str1 str2)

let tests =
    sprintf "%s\n%s\n%s\n" 
                (reportSimilarity "test" "test")
                (reportSimilarity "first" "second")
                (reportSimilarity "another" "this")
val levenshteinDistance : source:string -> target:string -> int

Full name: Script.levenshteinDistance
val source : string
val target : string
val sourceLen : int
module String

from Microsoft.FSharp.Core
val length : str:string -> int

Full name: Microsoft.FSharp.Core.String.length
val targetLen : int
val distance : int [,]
module Array2D

from Microsoft.FSharp.Collections
val zeroCreate : length1:int -> length2:int -> 'T [,]

Full name: Microsoft.FSharp.Collections.Array2D.zeroCreate
Multiple items
val int : value:'T -> int (requires member op_Explicit)

Full name: Microsoft.FSharp.Core.Operators.int

--------------------
type int = int32

Full name: Microsoft.FSharp.Core.int

--------------------
type int<'Measure> = int

Full name: Microsoft.FSharp.Core.int<_>
val iteri : action:(int -> int -> 'T -> unit) -> array:'T [,] -> unit

Full name: Microsoft.FSharp.Collections.Array2D.iteri
val i : int
val ignore : value:'T -> unit

Full name: Microsoft.FSharp.Core.Operators.ignore
val j : int
val i : int32
val j : int32
val cost : int
val min : e1:'T -> e2:'T -> 'T (requires comparison)

Full name: Microsoft.FSharp.Core.Operators.min
val similarity : source:string -> target:string -> float

Full name: Script.similarity
val dist : int
Multiple items
val float : value:'T -> float (requires member op_Explicit)

Full name: Microsoft.FSharp.Core.Operators.float

--------------------
type float = System.Double

Full name: Microsoft.FSharp.Core.float

--------------------
type float<'Measure> = float

Full name: Microsoft.FSharp.Core.float<_>
val max : e1:'T -> e2:'T -> 'T (requires comparison)

Full name: Microsoft.FSharp.Core.Operators.max
property System.String.Length: int
val reportSimilarity : str1:string -> str2:string -> string

Full name: Script.reportSimilarity
val str1 : string
val str2 : string
val sprintf : format:Printf.StringFormat<'T> -> 'T

Full name: Microsoft.FSharp.Core.ExtraTopLevelOperators.sprintf
val tests : string

Full name: Script.tests
Raw view Test code New version

More information

Link:http://fssnip.net/sp
Posted:8 years ago
Author:Giacomo Stelluti Scala
Tags: algorithms