25 people like it.
Like the snippet!
Anagrams
Let's use the fundamental theorem of arithmetic to determine if two words are anagrams of each other. How? The theorem states (roughly) that each number can be written as a product of prime numbers in only one unique way. For instance 42 = 7 * 2 * 3 = 3 * 7 * 2.
Now what will happen if you associate a letter with a unique prime number? You can see that "team" [71*11*2*41] = "meat" [41*11*2*71]. Oh, the possibilities. Note that in the code below big integers are used since the product of many primes will overflow a 32- or even a 64-bit integer.
1:
2:
3:
4:
5:
6:
7:
8:
9:
10:
11:
12:
13:
14:
15:
16:
17:
18:
19:
20:
21:
22:
23:
24:
25:
26:
27:
28:
29:
30:
31:
32:
33:
34:
35:
36:
37:
38:
39:
40:
41:
42:
43:
44:
45:
46:
47:
48:
49:
50:
51:
52:
53:
54:
55:
56:
57:
58:
59:
60:
61:
62:
63:
64:
65:
66:
67:
68:
69:
70:
71:
72:
73:
74:
75:
76:
77:
78:
79:
|
// rather crude, but only a handful of prime numbers are needed here
let isPrime = function
| n when n < 2 -> false
| 2 -> true
| n -> let upper = int (sqrt (float n))
[2..upper] |> List.forall (fun d -> n%d <> 0)
let primes = {0 .. System.Int32.MaxValue} |> Seq.filter isPrime
let createLookup alphabet =
// you can give characters you don't care about a weight of 1
let ignoredChars = ['\'';' ';'.';'!';':';';';'?']
let nchars = alphabet |> Seq.length
Seq.zip alphabet (primes |> Seq.take nchars)
|> Seq.append (ignoredChars |> Seq.map (fun c -> (c,1)))
|> dict
let lookup = ['a'..'z'] |> createLookup
// or if you want to discriminate between upper and lower case
// let lookup = ['A'..'Z'] @ ['a'..'z'] |> createLookup
let computeHash (s:string) =
s |> Seq.fold (fun acc c -> acc * (bigint (lookup.[c]))) 1I
let isAnagram word1 word2 =
(computeHash word1) = (computeHash word2)
// --- example (FSI) ---
//
//> isAnagram "team" "meat";;
//Real: 00:00:00.000, CPU: 00:00:00.000, GC gen0: 0, gen1: 0, gen2: 0
//val it : bool = true
//> isAnagram "team" "meet";;
//Real: 00:00:00.000, CPU: 00:00:00.000, GC gen0: 0, gen1: 0, gen2: 0
//val it : bool = false
//> isAnagram "crazy dudes" "dude's crazy";;
//Real: 00:00:00.001, CPU: 00:00:00.000, GC gen0: 0, gen1: 0, gen2: 0
//val it : bool = true
// --- another example: let's find a word with the most anagrams ---
// source: http://sourceforge.net/projects/wordlist/files/ [continued]
// Ispell-EnWL/3.1.20/ispell-enwl-3.1.20.zip/download
let wordPath = @"c:\temp\words\english.0"
let readWords path =
System.IO.File.ReadAllLines(path)
|> Array.map (fun word -> word.ToLowerInvariant())
let mostAnagrams() =
readWords wordPath
|> Seq.groupBy computeHash
|> Seq.sortBy (fun (_, anagrams) -> -(Seq.length anagrams))
|> Seq.head
|> snd
|> Seq.toList
let longestWordWithTwoAnagrams() =
readWords wordPath
//cellars and cellar's are the same for this purpose
|> Seq.distinctBy (fun word -> word.Replace("\'",""))
|> Seq.sortBy (fun word -> -(word.Length))
|> Seq.groupBy computeHash
|> Seq.filter (fun (_,anagrams) -> Seq.length anagrams = 3)
|> Seq.head
|> snd
|> Seq.toList
// FSI
//> mostAnagrams();;
//Real: 00:00:00.537, CPU: 00:00:00.531, GC gen0: 9, gen1: 9, gen2: 0
//val it : string list =
// ["asper"; "pares"; "parse"; "pears"; "rapes"; "reaps"; "spare"; "spear"]
//> longestWordWithTwoAnagrams();;
//Real: 00:00:00.525, CPU: 00:00:00.531, GC gen0: 7, gen1: 7, gen2: 0
//val it : string list = ["reduction's"; "discounter"; "introduces"]
|
val isPrime : _arg1:int -> bool
Full name: Script.isPrime
val n : int
val upper : int
Multiple items
val int : value:'T -> int (requires member op_Explicit)
Full name: Microsoft.FSharp.Core.Operators.int
--------------------
type int = int32
Full name: Microsoft.FSharp.Core.int
--------------------
type int<'Measure> = int
Full name: Microsoft.FSharp.Core.int<_>
val sqrt : value:'T -> 'U (requires member Sqrt)
Full name: Microsoft.FSharp.Core.Operators.sqrt
Multiple items
val float : value:'T -> float (requires member op_Explicit)
Full name: Microsoft.FSharp.Core.Operators.float
--------------------
type float = System.Double
Full name: Microsoft.FSharp.Core.float
--------------------
type float<'Measure> = float
Full name: Microsoft.FSharp.Core.float<_>
Multiple items
module List
from Microsoft.FSharp.Collections
--------------------
type List<'T> =
| ( [] )
| ( :: ) of Head: 'T * Tail: 'T list
interface IEnumerable
interface IEnumerable<'T>
member Head : 'T
member IsEmpty : bool
member Item : index:int -> 'T with get
member Length : int
member Tail : 'T list
static member Cons : head:'T * tail:'T list -> 'T list
static member Empty : 'T list
Full name: Microsoft.FSharp.Collections.List<_>
val forall : predicate:('T -> bool) -> list:'T list -> bool
Full name: Microsoft.FSharp.Collections.List.forall
val d : int
val primes : seq<int>
Full name: Script.primes
namespace System
type Int32 =
struct
member CompareTo : value:obj -> int + 1 overload
member Equals : obj:obj -> bool + 1 overload
member GetHashCode : unit -> int
member GetTypeCode : unit -> TypeCode
member ToString : unit -> string + 3 overloads
static val MaxValue : int
static val MinValue : int
static member Parse : s:string -> int + 3 overloads
static member TryParse : s:string * result:int -> bool + 1 overload
end
Full name: System.Int32
field int.MaxValue = 2147483647
module Seq
from Microsoft.FSharp.Collections
val filter : predicate:('T -> bool) -> source:seq<'T> -> seq<'T>
Full name: Microsoft.FSharp.Collections.Seq.filter
val createLookup : alphabet:seq<char> -> System.Collections.Generic.IDictionary<char,int>
Full name: Script.createLookup
val alphabet : seq<char>
val ignoredChars : char list
val nchars : int
val length : source:seq<'T> -> int
Full name: Microsoft.FSharp.Collections.Seq.length
val zip : source1:seq<'T1> -> source2:seq<'T2> -> seq<'T1 * 'T2>
Full name: Microsoft.FSharp.Collections.Seq.zip
val take : count:int -> source:seq<'T> -> seq<'T>
Full name: Microsoft.FSharp.Collections.Seq.take
val append : source1:seq<'T> -> source2:seq<'T> -> seq<'T>
Full name: Microsoft.FSharp.Collections.Seq.append
val map : mapping:('T -> 'U) -> source:seq<'T> -> seq<'U>
Full name: Microsoft.FSharp.Collections.Seq.map
val c : char
val dict : keyValuePairs:seq<'Key * 'Value> -> System.Collections.Generic.IDictionary<'Key,'Value> (requires equality)
Full name: Microsoft.FSharp.Core.ExtraTopLevelOperators.dict
val lookup : System.Collections.Generic.IDictionary<char,int>
Full name: Script.lookup
val computeHash : s:string -> System.Numerics.BigInteger
Full name: Script.computeHash
val s : string
Multiple items
val string : value:'T -> string
Full name: Microsoft.FSharp.Core.Operators.string
--------------------
type string = System.String
Full name: Microsoft.FSharp.Core.string
val fold : folder:('State -> 'T -> 'State) -> state:'State -> source:seq<'T> -> 'State
Full name: Microsoft.FSharp.Collections.Seq.fold
val acc : System.Numerics.BigInteger
type bigint = System.Numerics.BigInteger
Full name: Microsoft.FSharp.Core.bigint
val isAnagram : word1:string -> word2:string -> bool
Full name: Script.isAnagram
val word1 : string
val word2 : string
val wordPath : string
Full name: Script.wordPath
val readWords : path:string -> string []
Full name: Script.readWords
val path : string
namespace System.IO
type File =
static member AppendAllLines : path:string * contents:IEnumerable<string> -> unit + 1 overload
static member AppendAllText : path:string * contents:string -> unit + 1 overload
static member AppendText : path:string -> StreamWriter
static member Copy : sourceFileName:string * destFileName:string -> unit + 1 overload
static member Create : path:string -> FileStream + 3 overloads
static member CreateText : path:string -> StreamWriter
static member Decrypt : path:string -> unit
static member Delete : path:string -> unit
static member Encrypt : path:string -> unit
static member Exists : path:string -> bool
...
Full name: System.IO.File
System.IO.File.ReadAllLines(path: string) : string []
System.IO.File.ReadAllLines(path: string, encoding: System.Text.Encoding) : string []
module Array
from Microsoft.FSharp.Collections
val map : mapping:('T -> 'U) -> array:'T [] -> 'U []
Full name: Microsoft.FSharp.Collections.Array.map
val word : string
System.String.ToLowerInvariant() : string
val mostAnagrams : unit -> string list
Full name: Script.mostAnagrams
val groupBy : projection:('T -> 'Key) -> source:seq<'T> -> seq<'Key * seq<'T>> (requires equality)
Full name: Microsoft.FSharp.Collections.Seq.groupBy
val sortBy : projection:('T -> 'Key) -> source:seq<'T> -> seq<'T> (requires comparison)
Full name: Microsoft.FSharp.Collections.Seq.sortBy
val anagrams : seq<string>
val head : source:seq<'T> -> 'T
Full name: Microsoft.FSharp.Collections.Seq.head
val snd : tuple:('T1 * 'T2) -> 'T2
Full name: Microsoft.FSharp.Core.Operators.snd
val toList : source:seq<'T> -> 'T list
Full name: Microsoft.FSharp.Collections.Seq.toList
val longestWordWithTwoAnagrams : unit -> string list
Full name: Script.longestWordWithTwoAnagrams
val distinctBy : projection:('T -> 'Key) -> source:seq<'T> -> seq<'T> (requires equality)
Full name: Microsoft.FSharp.Collections.Seq.distinctBy
System.String.Replace(oldValue: string, newValue: string) : string
System.String.Replace(oldChar: char, newChar: char) : string
property System.String.Length: int
More information