3 people like it.

Bloom filter implementation in F#

http://codekata.pragprog.com/2007/01/kata_five_bloom.html

 1: 
 2: 
 3: 
 4: 
 5: 
 6: 
 7: 
 8: 
 9: 
10: 
11: 
12: 
13: 
14: 
15: 
16: 
17: 
18: 
19: 
20: 
21: 
22: 
23: 
24: 
25: 
26: 
27: 
28: 
29: 
30: 
31: 
32: 
33: 
34: 
35: 
36: 
37: 
38: 
39: 
40: 
open System.Security.Cryptography
open System.Text
open System.IO

let (bitMap:byte[]) = Array.zeroCreate 20000000  
    
let convertToNumber byte1 byte2 byte3 = 
    int (int byte1 + ((int byte2) <<< 8) + ((int byte3) <<< 16)) 

let createHash (word : string) = 
    let ts = MD5.Create()
    let hash:byte[] = System.Text.Encoding.ASCII.GetBytes(word)
                     |> ts.ComputeHash 
    let a = convertToNumber hash.[0] hash.[1] hash.[8]
    let b = convertToNumber hash.[2] hash.[3] hash.[9]
    let c = convertToNumber hash.[4] hash.[5] hash.[10]
    let d = convertToNumber hash.[6] hash.[7] hash.[11]
    (int a, int b, int c, int d)

let setBit (pos) = 
    bitMap.[pos/8] <- byte (bitMap.[pos/8] ||| (1uy <<< (pos % 8)))

let getBit (pos) = 
    byte ((bitMap.[pos/8] &&& (1uy <<< (pos % 8))) >>> (pos % 8))
    
let addWord (word : string) = 
    createHash word
    |> (fun (a, b, c, d) -> setBit a; setBit b; setBit c; setBit d)

let isWordInDictionary (word : string) = 
    let (p, q, r, s) =
        createHash word
        |> (fun (a, b, c, d) -> (getBit a, getBit b, getBit c, getBit d))   
    (p &&& q &&& r &&& s = 1uy)

let readDictionary = 
   File.ReadLines @"wordlist.txt"
    |> Seq.iter(fun w -> addWord w )

isWordInDictionary "Zulu"
namespace System
namespace System.Security
namespace System.Security.Cryptography
namespace System.Text
namespace System.IO
val bitMap : byte []

Full name: Script.bitMap
Multiple items
val byte : value:'T -> byte (requires member op_Explicit)

Full name: Microsoft.FSharp.Core.Operators.byte

--------------------
type byte = System.Byte

Full name: Microsoft.FSharp.Core.byte
module Array

from Microsoft.FSharp.Collections
val zeroCreate : count:int -> 'T []

Full name: Microsoft.FSharp.Collections.Array.zeroCreate
val convertToNumber : byte1:byte -> byte2:byte -> byte3:byte -> int

Full name: Script.convertToNumber
val byte1 : byte
val byte2 : byte
val byte3 : byte
Multiple items
val int : value:'T -> int (requires member op_Explicit)

Full name: Microsoft.FSharp.Core.Operators.int

--------------------
type int = int32

Full name: Microsoft.FSharp.Core.int

--------------------
type int<'Measure> = int

Full name: Microsoft.FSharp.Core.int<_>
val createHash : word:string -> int * int * int * int

Full name: Script.createHash
val word : string
Multiple items
val string : value:'T -> string

Full name: Microsoft.FSharp.Core.Operators.string

--------------------
type string = System.String

Full name: Microsoft.FSharp.Core.string
val ts : MD5
type MD5 =
  inherit HashAlgorithm
  static member Create : unit -> MD5 + 1 overload

Full name: System.Security.Cryptography.MD5
MD5.Create() : MD5
MD5.Create(algName: string) : MD5
val hash : byte []
type Encoding =
  member BodyName : string
  member Clone : unit -> obj
  member CodePage : int
  member DecoderFallback : DecoderFallback with get, set
  member EncoderFallback : EncoderFallback with get, set
  member EncodingName : string
  member Equals : value:obj -> bool
  member GetByteCount : chars:char[] -> int + 3 overloads
  member GetBytes : chars:char[] -> byte[] + 5 overloads
  member GetCharCount : bytes:byte[] -> int + 2 overloads
  ...

Full name: System.Text.Encoding
property Encoding.ASCII: Encoding
Encoding.GetBytes(s: string) : byte []
Encoding.GetBytes(chars: char []) : byte []
Encoding.GetBytes(chars: char [], index: int, count: int) : byte []
Encoding.GetBytes(chars: nativeptr<char>, charCount: int, bytes: nativeptr<byte>, byteCount: int) : int
Encoding.GetBytes(s: string, charIndex: int, charCount: int, bytes: byte [], byteIndex: int) : int
Encoding.GetBytes(chars: char [], charIndex: int, charCount: int, bytes: byte [], byteIndex: int) : int
HashAlgorithm.ComputeHash(buffer: byte []) : byte []
HashAlgorithm.ComputeHash(inputStream: Stream) : byte []
HashAlgorithm.ComputeHash(buffer: byte [], offset: int, count: int) : byte []
val a : int
val b : int
val c : int
val d : int
val setBit : pos:int -> unit

Full name: Script.setBit
val pos : int
val getBit : pos:int -> byte

Full name: Script.getBit
val addWord : word:string -> unit

Full name: Script.addWord
val isWordInDictionary : word:string -> bool

Full name: Script.isWordInDictionary
val p : byte
val q : byte
val r : byte
val s : byte
val readDictionary : unit

Full name: Script.readDictionary
type File =
  static member AppendAllLines : path:string * contents:IEnumerable<string> -> unit + 1 overload
  static member AppendAllText : path:string * contents:string -> unit + 1 overload
  static member AppendText : path:string -> StreamWriter
  static member Copy : sourceFileName:string * destFileName:string -> unit + 1 overload
  static member Create : path:string -> FileStream + 3 overloads
  static member CreateText : path:string -> StreamWriter
  static member Decrypt : path:string -> unit
  static member Delete : path:string -> unit
  static member Encrypt : path:string -> unit
  static member Exists : path:string -> bool
  ...

Full name: System.IO.File
File.ReadLines(path: string) : System.Collections.Generic.IEnumerable<string>
File.ReadLines(path: string, encoding: Encoding) : System.Collections.Generic.IEnumerable<string>
module Seq

from Microsoft.FSharp.Collections
val iter : action:('T -> unit) -> source:seq<'T> -> unit

Full name: Microsoft.FSharp.Collections.Seq.iter
val w : string
Raw view Test code New version

More information

Link:http://fssnip.net/hl
Posted:11 years ago
Author:Suzanna
Tags: bloom , fsharp , f#