0 people like it.
Like the snippet!
String percentual similarity using Levenshtein Distance
String percentual similarity using Levenshtein Distance, as described in https://en.wikipedia.org/wiki/Levenshtein_distance.
1:
2:
3:
4:
5:
6:
7:
8:
9:
10:
11:
12:
13:
14:
15:
16:
17:
18:
19:
20:
21:
22:
23:
24:
25:
26:
27:
28:
29:
30:
|
(*
* https://en.wikipedia.org/wiki/Levenshtein_distance
*)
let levenshteinDistance source target =
let sourceLen = String.length source
let targetLen = String.length target
let distance = Array2D.zeroCreate<int> (sourceLen + 1) (targetLen + 1)
Array2D.iteri (fun i _ _ -> distance.[i,0] <- i) distance |> ignore
Array2D.iteri (fun _ j _ -> distance.[0,j] <- j) distance |> ignore
for i in 1..sourceLen do
for j in 1..targetLen do
let cost = if target.[j - 1] = source.[i - 1] then 0
else 1
distance.[i,j] <- min (min (distance.[i - 1, j] + 1) (distance.[i, j - 1] + 1))
distance.[i - 1, j - 1] + cost
distance.[sourceLen, targetLen]
let similarity source target =
let dist = levenshteinDistance source target
(1.0 - (float dist / float (max source.Length target.Length))) * 100.0
let reportSimilarity str1 str2 =
sprintf "'%s' is similar to '%s' for %.2f%%" str1 str2 (similarity str1 str2)
let tests =
sprintf "%s\n%s\n%s\n"
(reportSimilarity "test" "test")
(reportSimilarity "first" "second")
(reportSimilarity "another" "this")
|
val levenshteinDistance : source:string -> target:string -> int
Full name: Script.levenshteinDistance
val source : string
val target : string
val sourceLen : int
module String
from Microsoft.FSharp.Core
val length : str:string -> int
Full name: Microsoft.FSharp.Core.String.length
val targetLen : int
val distance : int [,]
module Array2D
from Microsoft.FSharp.Collections
val zeroCreate : length1:int -> length2:int -> 'T [,]
Full name: Microsoft.FSharp.Collections.Array2D.zeroCreate
Multiple items
val int : value:'T -> int (requires member op_Explicit)
Full name: Microsoft.FSharp.Core.Operators.int
--------------------
type int = int32
Full name: Microsoft.FSharp.Core.int
--------------------
type int<'Measure> = int
Full name: Microsoft.FSharp.Core.int<_>
val iteri : action:(int -> int -> 'T -> unit) -> array:'T [,] -> unit
Full name: Microsoft.FSharp.Collections.Array2D.iteri
val i : int
val ignore : value:'T -> unit
Full name: Microsoft.FSharp.Core.Operators.ignore
val j : int
val i : int32
val j : int32
val cost : int
val min : e1:'T -> e2:'T -> 'T (requires comparison)
Full name: Microsoft.FSharp.Core.Operators.min
val similarity : source:string -> target:string -> float
Full name: Script.similarity
val dist : int
Multiple items
val float : value:'T -> float (requires member op_Explicit)
Full name: Microsoft.FSharp.Core.Operators.float
--------------------
type float = System.Double
Full name: Microsoft.FSharp.Core.float
--------------------
type float<'Measure> = float
Full name: Microsoft.FSharp.Core.float<_>
val max : e1:'T -> e2:'T -> 'T (requires comparison)
Full name: Microsoft.FSharp.Core.Operators.max
property System.String.Length: int
val reportSimilarity : str1:string -> str2:string -> string
Full name: Script.reportSimilarity
val str1 : string
val str2 : string
val sprintf : format:Printf.StringFormat<'T> -> 'T
Full name: Microsoft.FSharp.Core.ExtraTopLevelOperators.sprintf
val tests : string
Full name: Script.tests
More information