9 people like it.

Levenshtein distance

Computes Levenshtein (min edit) distance between two strings http://en.wikipedia.org/wiki/Levenstein_Distance

 1: 
 2: 
 3: 
 4: 
 5: 
 6: 
 7: 
 8: 
 9: 
10: 
11: 
12: 
13: 
14: 
15: 
16: 
17: 
18: 
19: 
20: 
21: 
22: 
23: 
let levenshtein word1 word2 =
    let preprocess = fun (str : string) -> str.ToLower().ToCharArray()
    let chars1, chars2 = preprocess word1, preprocess word2
    let m, n = chars1.Length, chars2.Length
    let table : int[,] = Array2D.zeroCreate (m + 1) (n + 1)
    for i in 0..m do
        for j in 0..n do
            match i, j with
            | i, 0 -> table.[i, j] <- i
            | 0, j -> table.[i, j] <- j
            | _, _ ->
                let delete = table.[i-1, j] + 1
                let insert = table.[i, j-1] + 1
                //cost of substitution is 2
                let substitute = 
                    if chars1.[i - 1] = chars2.[j - 1] 
                        then table.[i-1, j-1] //same character
                        else table.[i-1, j-1] + 2
                table.[i, j] <- List.min [delete; insert; substitute]
    table.[m, n], table //return tuple of the table and distance

//test
levenshtein "intention" "execution" //|> ignore
val levenshtein : word1:string -> word2:string -> int * int [,]

Full name: Script.levenshtein
val word1 : string
val word2 : string
val preprocess : (string -> char [])
val str : string
Multiple items
val string : value:'T -> string

Full name: Microsoft.FSharp.Core.Operators.string

--------------------
type string = System.String

Full name: Microsoft.FSharp.Core.string
System.String.ToLower() : string
System.String.ToLower(culture: System.Globalization.CultureInfo) : string
val chars1 : char []
val chars2 : char []
val m : int
val n : int
property System.Array.Length: int
val table : int [,]
Multiple items
val int : value:'T -> int (requires member op_Explicit)

Full name: Microsoft.FSharp.Core.Operators.int

--------------------
type int = int32

Full name: Microsoft.FSharp.Core.int

--------------------
type int<'Measure> = int

Full name: Microsoft.FSharp.Core.int<_>
module Array2D

from Microsoft.FSharp.Collections
val zeroCreate : length1:int -> length2:int -> 'T [,]

Full name: Microsoft.FSharp.Collections.Array2D.zeroCreate
val i : int32
val j : int32
val delete : int
val insert : int
val substitute : int
Multiple items
module List

from Microsoft.FSharp.Collections

--------------------
type List<'T> =
  | ( [] )
  | ( :: ) of Head: 'T * Tail: 'T list
  interface IEnumerable
  interface IEnumerable<'T>
  member Head : 'T
  member IsEmpty : bool
  member Item : index:int -> 'T with get
  member Length : int
  member Tail : 'T list
  static member Cons : head:'T * tail:'T list -> 'T list
  static member Empty : 'T list

Full name: Microsoft.FSharp.Collections.List<_>
val min : list:'T list -> 'T (requires comparison)

Full name: Microsoft.FSharp.Collections.List.min
Raw view Test code New version

More information

Link:http://fssnip.net/bj
Posted:12 years ago
Author:Lakret
Tags: algorithms , text processing