29 people like it.
Like the snippet!
Jaro-Winkler in F#
Jaro-Winkler is a fast and effective name matching algorithm.
For more info see "A Comparison of String Distance Metrics for Name-Matching Tasks" http://www.isi.edu/info-agents/workshops/ijcai03/papers/Cohen-p.pdf
or the Wikipedia article
http://en.wikipedia.org/wiki/Jaro%E2%80%93Winkler_distance
1:
2:
3:
4:
5:
6:
7:
8:
9:
10:
11:
12:
13:
14:
15:
16:
17:
18:
19:
20:
21:
22:
23:
24:
25:
26:
27:
28:
29:
30:
31:
32:
33:
34:
35:
36:
37:
38:
39:
40:
41:
42:
43:
44:
45:
46:
47:
48:
|
let windowAt str offset radius =
let startAt = max 0 (offset - radius)
let endAt = min (offset + radius) (String.length str - 1)
[ for i in startAt .. endAt -> str.[i] ]
let jaro s1 s2 =
let matchRadius =
let s1_l, s2_l = String.length s1, String.length s2 in
(min s1_l s2_l) / 2 +
(min s1_l s2_l) % 2
let commonChars chars1 chars2 =
[
for i = 0 to String.length chars1 - 1 do
let matchChar = chars1.[i]
let windowChars = windowAt chars2 i matchRadius
if windowChars |> List.exists (fun c -> c = matchChar) then
yield matchChar
]
let common1 = commonChars s1 s2
let common2 = commonChars s2 s1
let transpositions =
List.zip common1 common2
|> List.fold (fun s (c1,c2) -> if c1 <> c2 then s + 1.0 else s) 0.0
|> (fun x -> x / 2.0)
let s1length = float (String.length s1)
let s2length = float (String.length s2)
let c1length = float (List.length common1)
let c2length = float (List.length common2)
((c1length / s1length) +
(c2length / s2length) +
((c1length - transpositions) / c1length))
/ 3.0
let jaroWinkler s1 s2 =
let jaroScore = jaro s1 s2
let p = 0.1
let maxLength = (min s1.Length s2.Length) - 1
let rec calcL i acc =
if i > maxLength || s1.[i] <> s2.[i] then acc
else calcL (i + 1) (acc + 1.0)
let l = max (calcL 0 0.0) 4.0
jaroScore + (l * p * (1.0 - jaroScore))
|
val windowAt : str:string -> offset:int -> radius:int -> char list
Full name: Script.windowAt
val str : string
val offset : int
val radius : int
val startAt : int
val max : e1:'T -> e2:'T -> 'T (requires comparison)
Full name: Microsoft.FSharp.Core.Operators.max
val endAt : int
val min : e1:'T -> e2:'T -> 'T (requires comparison)
Full name: Microsoft.FSharp.Core.Operators.min
module String
from Microsoft.FSharp.Core
val length : str:string -> int
Full name: Microsoft.FSharp.Core.String.length
val i : int
val jaro : s1:string -> s2:string -> float
Full name: Script.jaro
val s1 : string
val s2 : string
val matchRadius : int
val s1_l : int
val s2_l : int
val commonChars : (string -> string -> char list)
val chars1 : string
val chars2 : string
val matchChar : char
val windowChars : char list
Multiple items
module List
from Microsoft.FSharp.Collections
--------------------
type List<'T> =
| ( [] )
| ( :: ) of Head: 'T * Tail: 'T list
interface IEnumerable
interface IEnumerable<'T>
member Head : 'T
member IsEmpty : bool
member Item : index:int -> 'T with get
member Length : int
member Tail : 'T list
static member Cons : head:'T * tail:'T list -> 'T list
static member Empty : 'T list
Full name: Microsoft.FSharp.Collections.List<_>
val exists : predicate:('T -> bool) -> list:'T list -> bool
Full name: Microsoft.FSharp.Collections.List.exists
val c : char
val common1 : char list
val common2 : char list
val transpositions : float
val zip : list1:'T1 list -> list2:'T2 list -> ('T1 * 'T2) list
Full name: Microsoft.FSharp.Collections.List.zip
val fold : folder:('State -> 'T -> 'State) -> state:'State -> list:'T list -> 'State
Full name: Microsoft.FSharp.Collections.List.fold
val s : float
val c1 : char
val c2 : char
val x : float
val s1length : float
Multiple items
val float : value:'T -> float (requires member op_Explicit)
Full name: Microsoft.FSharp.Core.Operators.float
--------------------
type float = System.Double
Full name: Microsoft.FSharp.Core.float
--------------------
type float<'Measure> = float
Full name: Microsoft.FSharp.Core.float<_>
val s2length : float
val c1length : float
val length : list:'T list -> int
Full name: Microsoft.FSharp.Collections.List.length
val c2length : float
val jaroWinkler : s1:string -> s2:string -> float
Full name: Script.jaroWinkler
val jaroScore : float
val p : float
val maxLength : int
property System.String.Length: int
val calcL : (int -> float -> float)
val acc : float
val l : float
More information