3 people like it.
Like the snippet!
EditDistance
Computes the Minimum Edit Distance or the Levenshtein Distance between two words
1:
2:
3:
4:
5:
6:
7:
8:
9:
10:
11:
12:
13:
14:
15:
16:
17:
18:
19:
20:
21:
22:
23:
24:
25:
26:
27:
28:
29:
30:
31:
32:
33:
34:
35:
36:
37:
38:
39:
40:
41:
42:
43:
44:
45:
46:
47:
48:
|
type EditType = Deletion | Insertion | Substitution
type DistanceType = MinimumEditDistance | LevenshteinDistance
let getEditDistance distanceType (X:string) (Y:string) =
let m = X.Length
let n = Y.Length
let d = Array2D.init (m + 1) (n + 1) (fun i j -> if j = 0 then i elif i = 0 then j else 0)
let ptr = Array2D.init (m + 1) (n + 1) (fun i j -> if j = 0 then Deletion elif i = 0 then Insertion else Substitution)
let penalizationForSubstitution =
match distanceType with
| MinimumEditDistance -> 1
| LevenshteinDistance -> 2
for i in 1..m do
for j in 1..n do
let a, b = Seq.minBy fst [d.[i-1, j] + 1, Deletion
d.[i, j-1] + 1, Insertion
d.[i-1, j-1] + (if X.[i-1] <> Y.[j-1] then penalizationForSubstitution else 0), Substitution]
d.[i, j] <- a
ptr.[i, j] <- b
let alignment =
(m, n)
|> Seq.unfold (fun (i, j) ->
if i = 0 && j = 0 then
None
else
match ptr.[i, j] with
| Deletion -> Some((X.[i-1], '*'), (i-1, j))
| Insertion -> Some(('*', Y.[j-1]), (i, j-1))
| Substitution -> Some((X.[i-1], Y.[j-1]), (i-1, j-1)))
|> Array.ofSeq
|> Array.rev
d.[m, n], alignment
let printAlignment alignment =
let toString (chars : char array) = new string(chars)
alignment |> Array.map fst |> toString |> printfn "%s"
alignment |> Array.map snd |> toString |> printfn "%s"
let distanceM, alignmentM = getEditDistance MinimumEditDistance "intention" "execution"
printfn "Minimum Edit Distance: %d" distanceM
printAlignment alignmentM
let distanceL, alignmentL = getEditDistance LevenshteinDistance "intention" "execution"
printfn "Levenshtein Distance: %d" distanceL
printAlignment alignmentL
|
union case EditType.Deletion: EditType
union case EditType.Insertion: EditType
union case EditType.Substitution: EditType
type DistanceType =
| MinimumEditDistance
| LevenshteinDistance
Full name: Script.DistanceType
union case DistanceType.MinimumEditDistance: DistanceType
union case DistanceType.LevenshteinDistance: DistanceType
val getEditDistance : distanceType:DistanceType -> X:string -> Y:string -> int * (char * char) []
Full name: Script.getEditDistance
val distanceType : DistanceType
val X : string
Multiple items
val string : value:'T -> string
Full name: Microsoft.FSharp.Core.Operators.string
--------------------
type string = System.String
Full name: Microsoft.FSharp.Core.string
val Y : string
val m : int
property System.String.Length: int
val n : int
val d : int [,]
module Array2D
from Microsoft.FSharp.Collections
val init : length1:int -> length2:int -> initializer:(int -> int -> 'T) -> 'T [,]
Full name: Microsoft.FSharp.Collections.Array2D.init
val i : int
val j : int
val ptr : EditType [,]
val penalizationForSubstitution : int
val i : int32
val j : int32
val a : int
val b : EditType
module Seq
from Microsoft.FSharp.Collections
val minBy : projection:('T -> 'U) -> source:seq<'T> -> 'T (requires comparison)
Full name: Microsoft.FSharp.Collections.Seq.minBy
val fst : tuple:('T1 * 'T2) -> 'T1
Full name: Microsoft.FSharp.Core.Operators.fst
val alignment : (char * char) []
val unfold : generator:('State -> ('T * 'State) option) -> state:'State -> seq<'T>
Full name: Microsoft.FSharp.Collections.Seq.unfold
union case Option.None: Option<'T>
union case Option.Some: Value: 'T -> Option<'T>
module Array
from Microsoft.FSharp.Collections
val ofSeq : source:seq<'T> -> 'T []
Full name: Microsoft.FSharp.Collections.Array.ofSeq
val rev : array:'T [] -> 'T []
Full name: Microsoft.FSharp.Collections.Array.rev
val printAlignment : alignment:(char * char) [] -> unit
Full name: Script.printAlignment
val toString : (char array -> string)
val chars : char array
Multiple items
val char : value:'T -> char (requires member op_Explicit)
Full name: Microsoft.FSharp.Core.Operators.char
--------------------
type char = System.Char
Full name: Microsoft.FSharp.Core.char
type 'T array = 'T []
Full name: Microsoft.FSharp.Core.array<_>
val map : mapping:('T -> 'U) -> array:'T [] -> 'U []
Full name: Microsoft.FSharp.Collections.Array.map
val printfn : format:Printf.TextWriterFormat<'T> -> 'T
Full name: Microsoft.FSharp.Core.ExtraTopLevelOperators.printfn
val snd : tuple:('T1 * 'T2) -> 'T2
Full name: Microsoft.FSharp.Core.Operators.snd
val distanceM : int
Full name: Script.distanceM
val alignmentM : (char * char) []
Full name: Script.alignmentM
val distanceL : int
Full name: Script.distanceL
val alignmentL : (char * char) []
Full name: Script.alignmentL
More information