25 people like it.

Anagrams

Let's use the fundamental theorem of arithmetic to determine if two words are anagrams of each other. How? The theorem states (roughly) that each number can be written as a product of prime numbers in only one unique way. For instance 42 = 7 * 2 * 3 = 3 * 7 * 2. Now what will happen if you associate a letter with a unique prime number? You can see that "team" [71*11*2*41] = "meat" [41*11*2*71]. Oh, the possibilities. Note that in the code below big integers are used since the product of many primes will overflow a 32- or even a 64-bit integer.

 1: 
 2: 
 3: 
 4: 
 5: 
 6: 
 7: 
 8: 
 9: 
10: 
11: 
12: 
13: 
14: 
15: 
16: 
17: 
18: 
19: 
20: 
21: 
22: 
23: 
24: 
25: 
26: 
27: 
28: 
29: 
30: 
31: 
32: 
33: 
34: 
35: 
36: 
37: 
38: 
39: 
40: 
41: 
42: 
43: 
44: 
45: 
46: 
47: 
48: 
49: 
50: 
51: 
52: 
53: 
54: 
55: 
56: 
57: 
58: 
59: 
60: 
61: 
62: 
63: 
64: 
65: 
66: 
67: 
68: 
69: 
70: 
71: 
72: 
73: 
74: 
75: 
76: 
// rather crude, but only a handful of prime numbers are needed here
let isPrime = function
    | n when n < 2 -> false
    | 2 -> true
    | n -> let upper = int (sqrt (float n))
           [2..upper] |> List.forall (fun d -> n%d <> 0)

let primes = {0 .. System.Int32.MaxValue} |> Seq.filter isPrime

let createLookup alphabet = 
    // you can give characters you don't care about a weight of 1
    let ignoredChars = ['\'';' ';'.';'!';':';';';'?']
    
    let nchars = alphabet |> Seq.length
    Seq.zip alphabet (primes |> Seq.take nchars) 
    |> Seq.append (ignoredChars |> Seq.map (fun c -> (c,1)))
    |> dict

let lookup = ['a'..'z'] |> createLookup
// or if you want to discriminate between upper and lower case
// let lookup = ['A'..'Z'] @ ['a'..'z'] |> createLookup

let computeHash (s:string) =
    s |> Seq.fold (fun acc c -> acc * (bigint (lookup.[c]))) 1I

let isAnagram word1 word2 = 
    (computeHash word1) = (computeHash word2)

// --- example (FSI) ---
//
//> isAnagram "team" "meat";;
//Real: 00:00:00.000, CPU: 00:00:00.000, GC gen0: 0, gen1: 0, gen2: 0
//val it : bool = true
//> isAnagram "team" "meet";;
//Real: 00:00:00.000, CPU: 00:00:00.000, GC gen0: 0, gen1: 0, gen2: 0
//val it : bool = false  
//> isAnagram "crazy dudes" "dude's crazy";;
//Real: 00:00:00.001, CPU: 00:00:00.000, GC gen0: 0, gen1: 0, gen2: 0
//val it : bool = true

// --- another example: let's find a word with the most anagrams ---
// source: http://sourceforge.net/projects/wordlist/files/ [continued] 
//                Ispell-EnWL/3.1.20/ispell-enwl-3.1.20.zip/download
let wordPath = @"c:\temp\words\english.0" 

let readWords path = 
    System.IO.File.ReadAllLines(path)
    |> Array.map (fun word -> word.ToLowerInvariant())
    
let mostAnagrams() = 
    readWords wordPath
    |> Seq.groupBy computeHash
    |> Seq.sortBy (fun (_, anagrams) -> -(Seq.length anagrams))
    |> Seq.head
    |> snd
    |> Seq.toList

let longestWordWithTwoAnagrams() =
    readWords wordPath    
    //cellars and cellar's are the same for this purpose
    |> Seq.distinctBy (fun word -> word.Replace("\'","")) 
    |> Seq.sortBy (fun word -> -(word.Length))
    |> Seq.groupBy computeHash
    |> Seq.filter (fun (_,anagrams) -> Seq.length anagrams = 3)
    |> Seq.head 
    |> snd
    |> Seq.toList

// FSI
//> mostAnagrams();;
//Real: 00:00:00.537, CPU: 00:00:00.531, GC gen0: 9, gen1: 9, gen2: 0
//val it : string list =
//  ["asper"; "pares"; "parse"; "pears"; "rapes"; "reaps"; "spare"; "spear"]
//> longestWordWithTwoAnagrams();;
//Real: 00:00:00.525, CPU: 00:00:00.531, GC gen0: 7, gen1: 7, gen2: 0
//val it : string list = ["reduction's"; "discounter"; "introduces"]
val isPrime : _arg1:int -> bool

Full name: Script.isPrime
val n : int
val upper : int
Multiple items
val int : value:'T -> int (requires member op_Explicit)

Full name: Microsoft.FSharp.Core.Operators.int

--------------------
type int = int32

Full name: Microsoft.FSharp.Core.int

--------------------
type int<'Measure> = int

Full name: Microsoft.FSharp.Core.int<_>
val sqrt : value:'T -> 'U (requires member Sqrt)

Full name: Microsoft.FSharp.Core.Operators.sqrt
Multiple items
val float : value:'T -> float (requires member op_Explicit)

Full name: Microsoft.FSharp.Core.Operators.float

--------------------
type float = System.Double

Full name: Microsoft.FSharp.Core.float

--------------------
type float<'Measure> = float

Full name: Microsoft.FSharp.Core.float<_>
Multiple items
module List

from Microsoft.FSharp.Collections

--------------------
type List<'T> =
  | ( [] )
  | ( :: ) of Head: 'T * Tail: 'T list
  interface IEnumerable
  interface IEnumerable<'T>
  member Head : 'T
  member IsEmpty : bool
  member Item : index:int -> 'T with get
  member Length : int
  member Tail : 'T list
  static member Cons : head:'T * tail:'T list -> 'T list
  static member Empty : 'T list

Full name: Microsoft.FSharp.Collections.List<_>
val forall : predicate:('T -> bool) -> list:'T list -> bool

Full name: Microsoft.FSharp.Collections.List.forall
val d : int
val primes : seq<int>

Full name: Script.primes
namespace System
type Int32 =
  struct
    member CompareTo : value:obj -> int + 1 overload
    member Equals : obj:obj -> bool + 1 overload
    member GetHashCode : unit -> int
    member GetTypeCode : unit -> TypeCode
    member ToString : unit -> string + 3 overloads
    static val MaxValue : int
    static val MinValue : int
    static member Parse : s:string -> int + 3 overloads
    static member TryParse : s:string * result:int -> bool + 1 overload
  end

Full name: System.Int32
field int.MaxValue = 2147483647
module Seq

from Microsoft.FSharp.Collections
val filter : predicate:('T -> bool) -> source:seq<'T> -> seq<'T>

Full name: Microsoft.FSharp.Collections.Seq.filter
val createLookup : alphabet:seq<char> -> System.Collections.Generic.IDictionary<char,int>

Full name: Script.createLookup
val alphabet : seq<char>
val ignoredChars : char list
val nchars : int
val length : source:seq<'T> -> int

Full name: Microsoft.FSharp.Collections.Seq.length
val zip : source1:seq<'T1> -> source2:seq<'T2> -> seq<'T1 * 'T2>

Full name: Microsoft.FSharp.Collections.Seq.zip
val take : count:int -> source:seq<'T> -> seq<'T>

Full name: Microsoft.FSharp.Collections.Seq.take
val append : source1:seq<'T> -> source2:seq<'T> -> seq<'T>

Full name: Microsoft.FSharp.Collections.Seq.append
val map : mapping:('T -> 'U) -> source:seq<'T> -> seq<'U>

Full name: Microsoft.FSharp.Collections.Seq.map
val c : char
val dict : keyValuePairs:seq<'Key * 'Value> -> System.Collections.Generic.IDictionary<'Key,'Value> (requires equality)

Full name: Microsoft.FSharp.Core.ExtraTopLevelOperators.dict
val lookup : System.Collections.Generic.IDictionary<char,int>

Full name: Script.lookup
val computeHash : s:string -> System.Numerics.BigInteger

Full name: Script.computeHash
val s : string
Multiple items
val string : value:'T -> string

Full name: Microsoft.FSharp.Core.Operators.string

--------------------
type string = System.String

Full name: Microsoft.FSharp.Core.string
val fold : folder:('State -> 'T -> 'State) -> state:'State -> source:seq<'T> -> 'State

Full name: Microsoft.FSharp.Collections.Seq.fold
val acc : System.Numerics.BigInteger
type bigint = System.Numerics.BigInteger

Full name: Microsoft.FSharp.Core.bigint
val isAnagram : word1:string -> word2:string -> bool

Full name: Script.isAnagram
val word1 : string
val word2 : string
val wordPath : string

Full name: Script.wordPath
val readWords : path:string -> string []

Full name: Script.readWords
val path : string
namespace System.IO
type File =
  static member AppendAllLines : path:string * contents:IEnumerable<string> -> unit + 1 overload
  static member AppendAllText : path:string * contents:string -> unit + 1 overload
  static member AppendText : path:string -> StreamWriter
  static member Copy : sourceFileName:string * destFileName:string -> unit + 1 overload
  static member Create : path:string -> FileStream + 3 overloads
  static member CreateText : path:string -> StreamWriter
  static member Decrypt : path:string -> unit
  static member Delete : path:string -> unit
  static member Encrypt : path:string -> unit
  static member Exists : path:string -> bool
  ...

Full name: System.IO.File
System.IO.File.ReadAllLines(path: string) : string []
System.IO.File.ReadAllLines(path: string, encoding: System.Text.Encoding) : string []
module Array

from Microsoft.FSharp.Collections
val map : mapping:('T -> 'U) -> array:'T [] -> 'U []

Full name: Microsoft.FSharp.Collections.Array.map
val word : string
System.String.ToLowerInvariant() : string
val mostAnagrams : unit -> string list

Full name: Script.mostAnagrams
val groupBy : projection:('T -> 'Key) -> source:seq<'T> -> seq<'Key * seq<'T>> (requires equality)

Full name: Microsoft.FSharp.Collections.Seq.groupBy
val sortBy : projection:('T -> 'Key) -> source:seq<'T> -> seq<'T> (requires comparison)

Full name: Microsoft.FSharp.Collections.Seq.sortBy
val anagrams : seq<string>
val head : source:seq<'T> -> 'T

Full name: Microsoft.FSharp.Collections.Seq.head
val snd : tuple:('T1 * 'T2) -> 'T2

Full name: Microsoft.FSharp.Core.Operators.snd
val toList : source:seq<'T> -> 'T list

Full name: Microsoft.FSharp.Collections.Seq.toList
val longestWordWithTwoAnagrams : unit -> string list

Full name: Script.longestWordWithTwoAnagrams
val distinctBy : projection:('T -> 'Key) -> source:seq<'T> -> seq<'T> (requires equality)

Full name: Microsoft.FSharp.Collections.Seq.distinctBy
System.String.Replace(oldValue: string, newValue: string) : string
System.String.Replace(oldChar: char, newChar: char) : string
property System.String.Length: int

More information

Link:http://fssnip.net/24
Posted:13 years ago
Author:Arjen Kopinga
Tags: algorithms , anagram