6 people like it.

Norvig's Spelling Corrector

A line-by-line translation of Norvig's original Python code. An attempt to view F# as a "typed" Python.

 1: 
 2: 
 3: 
 4: 
 5: 
 6: 
 7: 
 8: 
 9: 
10: 
11: 
12: 
13: 
14: 
15: 
16: 
17: 
18: 
19: 
20: 
21: 
22: 
23: 
24: 
25: 
// Norvig's Spelling Corrector: http://norvig.com/spell-correct.html
open System.IO open System.Text.RegularExpressions

let edits1 (word : string) = 
    let splits = [for i in 0 .. word.Length do yield (word.[0..i-1], word.[i..])]
    let deletes = [for a, b in splits do if b <> "" then yield a + b.[1..]]
    let transposes = [for a, b in splits do if b.Length > 1 then yield a + string b.[1] + string b.[0] + b.[2..]]
    let replaces = [for a, b in splits do for c in 'a'..'z' do if b <> "" then yield a + string c + b.[1..]]
    let inserts = [for a, b in splits do for c in 'a'..'z' do yield a + string c + b]
    deletes @ transposes @ replaces @ inserts |> Set.ofList

let NWORDS = 
    File.ReadAllText "big.txt" |> (Regex "[a-zA-Z]+").Matches |> Seq.cast 
    |> Seq.map (fun (m:Match) -> m.Value.ToLower()) |> Seq.countBy id |> Map.ofSeq

let known_edits2 word = [for e1 in edits1(word) do for e2 in edits1(e1) do if Map.containsKey e2 NWORDS then yield e2] |> Set.ofList
let known words = [for w in words do if Map.containsKey w NWORDS then yield w] |> Set.ofList

let (<||>) (first : Lazy<_>) (second : Lazy<_>) : Lazy<_> = lazy(if Set.isEmpty first.Value then second.Value else first.Value)
let correct word = 
    (lazy known([word]) <||> lazy known(edits1(word)) <||> lazy known_edits2(word) <||> lazy Set.singleton word).Value 
    |> Seq.sortBy (fun w -> -NWORDS.[w]) |> Seq.head

// Example
correct "speling"
namespace System
namespace System.IO
namespace System.Text
namespace System.Text.RegularExpressions
val edits1 : word:string -> Set<string>

Full name: Script.edits1
val word : string
Multiple items
val string : value:'T -> string

Full name: Microsoft.FSharp.Core.Operators.string

--------------------
type string = System.String

Full name: Microsoft.FSharp.Core.string
val splits : (string * string) list
val i : int
property System.String.Length: int
val deletes : string list
val a : string
val b : string
val transposes : string list
val replaces : string list
val c : char
val inserts : string list
Multiple items
module Set

from Microsoft.FSharp.Collections

--------------------
type Set<'T (requires comparison)> =
  interface IComparable
  interface IEnumerable
  interface IEnumerable<'T>
  interface ICollection<'T>
  new : elements:seq<'T> -> Set<'T>
  member Add : value:'T -> Set<'T>
  member Contains : value:'T -> bool
  override Equals : obj -> bool
  member IsProperSubsetOf : otherSet:Set<'T> -> bool
  member IsProperSupersetOf : otherSet:Set<'T> -> bool
  ...

Full name: Microsoft.FSharp.Collections.Set<_>

--------------------
new : elements:seq<'T> -> Set<'T>
val ofList : elements:'T list -> Set<'T> (requires comparison)

Full name: Microsoft.FSharp.Collections.Set.ofList
val NWORDS : Map<string,int>

Full name: Script.NWORDS
type File =
  static member AppendAllLines : path:string * contents:IEnumerable<string> -> unit + 1 overload
  static member AppendAllText : path:string * contents:string -> unit + 1 overload
  static member AppendText : path:string -> StreamWriter
  static member Copy : sourceFileName:string * destFileName:string -> unit + 1 overload
  static member Create : path:string -> FileStream + 3 overloads
  static member CreateText : path:string -> StreamWriter
  static member Decrypt : path:string -> unit
  static member Delete : path:string -> unit
  static member Encrypt : path:string -> unit
  static member Exists : path:string -> bool
  ...

Full name: System.IO.File
File.ReadAllText(path: string) : string
File.ReadAllText(path: string, encoding: System.Text.Encoding) : string
Multiple items
type Regex =
  new : pattern:string -> Regex + 1 overload
  member GetGroupNames : unit -> string[]
  member GetGroupNumbers : unit -> int[]
  member GroupNameFromNumber : i:int -> string
  member GroupNumberFromName : name:string -> int
  member IsMatch : input:string -> bool + 1 overload
  member Match : input:string -> Match + 2 overloads
  member Matches : input:string -> MatchCollection + 1 overload
  member Options : RegexOptions
  member Replace : input:string * replacement:string -> string + 5 overloads
  ...

Full name: System.Text.RegularExpressions.Regex

--------------------
Regex(pattern: string) : unit
Regex(pattern: string, options: RegexOptions) : unit
module Seq

from Microsoft.FSharp.Collections
val cast : source:System.Collections.IEnumerable -> seq<'T>

Full name: Microsoft.FSharp.Collections.Seq.cast
val map : mapping:('T -> 'U) -> source:seq<'T> -> seq<'U>

Full name: Microsoft.FSharp.Collections.Seq.map
val m : Match
type Match =
  inherit Group
  member Groups : GroupCollection
  member NextMatch : unit -> Match
  member Result : replacement:string -> string
  static member Empty : Match
  static member Synchronized : inner:Match -> Match

Full name: System.Text.RegularExpressions.Match
property Capture.Value: string
System.String.ToLower() : string
System.String.ToLower(culture: System.Globalization.CultureInfo) : string
val countBy : projection:('T -> 'Key) -> source:seq<'T> -> seq<'Key * int> (requires equality)

Full name: Microsoft.FSharp.Collections.Seq.countBy
val id : x:'T -> 'T

Full name: Microsoft.FSharp.Core.Operators.id
Multiple items
module Map

from Microsoft.FSharp.Collections

--------------------
type Map<'Key,'Value (requires comparison)> =
  interface IEnumerable
  interface IComparable
  interface IEnumerable<KeyValuePair<'Key,'Value>>
  interface ICollection<KeyValuePair<'Key,'Value>>
  interface IDictionary<'Key,'Value>
  new : elements:seq<'Key * 'Value> -> Map<'Key,'Value>
  member Add : key:'Key * value:'Value -> Map<'Key,'Value>
  member ContainsKey : key:'Key -> bool
  override Equals : obj -> bool
  member Remove : key:'Key -> Map<'Key,'Value>
  ...

Full name: Microsoft.FSharp.Collections.Map<_,_>

--------------------
new : elements:seq<'Key * 'Value> -> Map<'Key,'Value>
val ofSeq : elements:seq<'Key * 'T> -> Map<'Key,'T> (requires comparison)

Full name: Microsoft.FSharp.Collections.Map.ofSeq
val known_edits2 : word:string -> Set<string>

Full name: Script.known_edits2
val e1 : string
val e2 : string
val containsKey : key:'Key -> table:Map<'Key,'T> -> bool (requires comparison)

Full name: Microsoft.FSharp.Collections.Map.containsKey
val known : words:seq<string> -> Set<string>

Full name: Script.known
val words : seq<string>
val w : string
val first : Lazy<Set<'a>> (requires comparison)
Multiple items
active recognizer Lazy: Lazy<'T> -> 'T

Full name: Microsoft.FSharp.Core.ExtraTopLevelOperators.( |Lazy| )

--------------------
type Lazy<'T> = System.Lazy<'T>

Full name: Microsoft.FSharp.Control.Lazy<_>
val second : Lazy<Set<'a>> (requires comparison)
val isEmpty : set:Set<'T> -> bool (requires comparison)

Full name: Microsoft.FSharp.Collections.Set.isEmpty
property System.Lazy.Value: Set<'a>
val correct : word:string -> string

Full name: Script.correct
val singleton : value:'T -> Set<'T> (requires comparison)

Full name: Microsoft.FSharp.Collections.Set.singleton
val sortBy : projection:('T -> 'Key) -> source:seq<'T> -> seq<'T> (requires comparison)

Full name: Microsoft.FSharp.Collections.Seq.sortBy
val head : source:seq<'T> -> 'T

Full name: Microsoft.FSharp.Collections.Seq.head

More information

Link:http://fssnip.net/6j
Posted:5 years ago
Author:Nick Palladinos
Tags: spelling corrector , python