0 people like it.
Like the snippet!
Norvig's Spelling Corrector
A line-by-line translation of Norvig's original Python code. An attempt to view F# as a "typed" Python.
1:
2:
3:
4:
5:
6:
7:
8:
9:
10:
11:
12:
13:
14:
15:
16:
17:
18:
19:
20:
21:
22:
23:
24:
25:
|
// Norvig's Spelling Corrector: http://norvig.com/spell-correct.html
open System.IO open System.Text.RegularExpressions
let edits1 (word : string) =
let splits = [for i in 0 .. word.Length do yield (word.[0..i-1], word.[i..])]
let deletes = [for a, b in splits do if b <> "" then yield a + b.[1..]]
let transposes = [for a, b in splits do if b.Length > 1 then yield a + string b.[1] + string b.[0] + b.[2..]]
let replaces = [for a, b in splits do for c in 'a'..'z' do if b <> "" then yield a + string c + b.[1..]]
let inserts = [for a, b in splits do for c in 'a'..'z' do yield a + string c + b]
deletes @ transposes @ replaces @ inserts |> Set.ofList
let NWORDS =
File.ReadAllText "big.txt" |> (Regex "[a-zA-Z]+").Matches |> Seq.cast
|> Seq.map (fun (m:Match) -> m.Value.ToLower()) |> Seq.countBy id |> Map.ofSeq
let known_edits2 word = [for e1 in edits1(word) do for e2 in edits1(e1) do if Map.containsKey e2 NWORDS then yield e2] |> Set.ofList
let known words = [for w in words do if Map.containsKey w NWORDS then yield w] |> Set.ofList
let (<||>) (first : Lazy<_>) (second : Lazy<_>) : Lazy<_> = lazy(if Set.isEmpty first.Value then second.Value else first.Value)
let correct word =
(lazy known([word]) <||> lazy known(edits1(word)) <||> lazy known_edits2(word) <||> lazy Set.singleton word).Value
|> Seq.sortBy (fun w -> -NWORDS.[w]) |> Seq.head
// Example
correct "speling"
|
namespace System
namespace System.IO
namespace System.Text
namespace System.Text.RegularExpressions
val edits1 : word:string -> Set<string>
Full name: Script.edits1
val word : string
Multiple items
val string : value:'T -> string
Full name: Microsoft.FSharp.Core.Operators.string
--------------------
type string = System.String
Full name: Microsoft.FSharp.Core.string
val splits : (string * string) list
val i : int
property System.String.Length: int
val deletes : string list
val a : string
val b : string
val transposes : string list
val replaces : string list
val c : char
val inserts : string list
Multiple items
module Set
from Microsoft.FSharp.Collections
--------------------
type Set<'T (requires comparison)> =
interface IComparable
interface IEnumerable
interface IEnumerable<'T>
interface ICollection<'T>
new : elements:seq<'T> -> Set<'T>
member Add : value:'T -> Set<'T>
member Contains : value:'T -> bool
override Equals : obj -> bool
member IsProperSubsetOf : otherSet:Set<'T> -> bool
member IsProperSupersetOf : otherSet:Set<'T> -> bool
...
Full name: Microsoft.FSharp.Collections.Set<_>
--------------------
new : elements:seq<'T> -> Set<'T>
val ofList : elements:'T list -> Set<'T> (requires comparison)
Full name: Microsoft.FSharp.Collections.Set.ofList
val NWORDS : Map<string,int>
Full name: Script.NWORDS
type File =
static member AppendAllLines : path:string * contents:IEnumerable<string> -> unit + 1 overload
static member AppendAllText : path:string * contents:string -> unit + 1 overload
static member AppendText : path:string -> StreamWriter
static member Copy : sourceFileName:string * destFileName:string -> unit + 1 overload
static member Create : path:string -> FileStream + 3 overloads
static member CreateText : path:string -> StreamWriter
static member Decrypt : path:string -> unit
static member Delete : path:string -> unit
static member Encrypt : path:string -> unit
static member Exists : path:string -> bool
...
Full name: System.IO.File
File.ReadAllText(path: string) : string
File.ReadAllText(path: string, encoding: System.Text.Encoding) : string
Multiple items
type Regex =
new : pattern:string -> Regex + 1 overload
member GetGroupNames : unit -> string[]
member GetGroupNumbers : unit -> int[]
member GroupNameFromNumber : i:int -> string
member GroupNumberFromName : name:string -> int
member IsMatch : input:string -> bool + 1 overload
member Match : input:string -> Match + 2 overloads
member Matches : input:string -> MatchCollection + 1 overload
member Options : RegexOptions
member Replace : input:string * replacement:string -> string + 5 overloads
...
Full name: System.Text.RegularExpressions.Regex
--------------------
Regex(pattern: string) : unit
Regex(pattern: string, options: RegexOptions) : unit
module Seq
from Microsoft.FSharp.Collections
val cast : source:System.Collections.IEnumerable -> seq<'T>
Full name: Microsoft.FSharp.Collections.Seq.cast
val map : mapping:('T -> 'U) -> source:seq<'T> -> seq<'U>
Full name: Microsoft.FSharp.Collections.Seq.map
val m : Match
type Match =
inherit Group
member Groups : GroupCollection
member NextMatch : unit -> Match
member Result : replacement:string -> string
static member Empty : Match
static member Synchronized : inner:Match -> Match
Full name: System.Text.RegularExpressions.Match
property Capture.Value: string
System.String.ToLower() : string
System.String.ToLower(culture: System.Globalization.CultureInfo) : string
val countBy : projection:('T -> 'Key) -> source:seq<'T> -> seq<'Key * int> (requires equality)
Full name: Microsoft.FSharp.Collections.Seq.countBy
val id : x:'T -> 'T
Full name: Microsoft.FSharp.Core.Operators.id
Multiple items
module Map
from Microsoft.FSharp.Collections
--------------------
type Map<'Key,'Value (requires comparison)> =
interface IEnumerable
interface IComparable
interface IEnumerable<KeyValuePair<'Key,'Value>>
interface ICollection<KeyValuePair<'Key,'Value>>
interface IDictionary<'Key,'Value>
new : elements:seq<'Key * 'Value> -> Map<'Key,'Value>
member Add : key:'Key * value:'Value -> Map<'Key,'Value>
member ContainsKey : key:'Key -> bool
override Equals : obj -> bool
member Remove : key:'Key -> Map<'Key,'Value>
...
Full name: Microsoft.FSharp.Collections.Map<_,_>
--------------------
new : elements:seq<'Key * 'Value> -> Map<'Key,'Value>
val ofSeq : elements:seq<'Key * 'T> -> Map<'Key,'T> (requires comparison)
Full name: Microsoft.FSharp.Collections.Map.ofSeq
val known_edits2 : word:string -> Set<string>
Full name: Script.known_edits2
val e1 : string
val e2 : string
val containsKey : key:'Key -> table:Map<'Key,'T> -> bool (requires comparison)
Full name: Microsoft.FSharp.Collections.Map.containsKey
val known : words:seq<string> -> Set<string>
Full name: Script.known
val words : seq<string>
val w : string
val first : Lazy<Set<'a>> (requires comparison)
Multiple items
active recognizer Lazy: Lazy<'T> -> 'T
Full name: Microsoft.FSharp.Core.ExtraTopLevelOperators.( |Lazy| )
--------------------
type Lazy<'T> = System.Lazy<'T>
Full name: Microsoft.FSharp.Control.Lazy<_>
val second : Lazy<Set<'a>> (requires comparison)
val isEmpty : set:Set<'T> -> bool (requires comparison)
Full name: Microsoft.FSharp.Collections.Set.isEmpty
property System.Lazy.Value: Set<'a>
val correct : word:string -> string
Full name: Script.correct
val singleton : value:'T -> Set<'T> (requires comparison)
Full name: Microsoft.FSharp.Collections.Set.singleton
val sortBy : projection:('T -> 'Key) -> source:seq<'T> -> seq<'T> (requires comparison)
Full name: Microsoft.FSharp.Collections.Seq.sortBy
val head : source:seq<'T> -> 'T
Full name: Microsoft.FSharp.Collections.Seq.head
More information