4 people like it.

MurmurHash3

An attempt to implement murmurhash version 3 in F# Original source code: https://github.com/aappleby/smhasher/blob/master/src/MurmurHash3.cpp Original author: Austin Appleby Wikpedia link: https://en.wikipedia.org/wiki/MurmurHash

 1: 
 2: 
 3: 
 4: 
 5: 
 6: 
 7: 
 8: 
 9: 
10: 
11: 
12: 
13: 
14: 
15: 
16: 
17: 
18: 
19: 
20: 
21: 
22: 
23: 
24: 
25: 
26: 
27: 
28: 
29: 
30: 
31: 
32: 
33: 
34: 
35: 
36: 
37: 
38: 
39: 
40: 
41: 
42: 
43: 
44: 
45: 
46: 
47: 
48: 
49: 
50: 
51: 
52: 
53: 
54: 
55: 
56: 
57: 
58: 
59: 
60: 
61: 
62: 
module murmur =
    let multiply (x : uint32) (r : uint32) =
        ((x &&& 0xffffu) * r) + ((((x >>> 16) * r) &&& 0xffffu) <<< 16)
    let rotl (x : uint32) (r : int) =
        (x <<< r) ||| (x >>> (32 - r))
    let fmix (h : uint32) =
        h
        |> fun x -> x ^^^ (x >>> 16)
        |> fun x -> multiply x 0x85ebca6bu
        |> fun x -> x ^^^ (x >>> 13)
        |> fun x -> multiply x 0xc2b2ae35u
        |> fun x -> x ^^^ (x >>> 16)
    let bitSum (b : byte[]) =
        b
        |> Array.reduce (|||)
    let tailReducer b c1 c2 =
        b
        |> fun x -> multiply (uint32 x) c1
        |> fun x -> rotl x 15
        |> fun x -> multiply x c2

    let murmurhash3 (convertString : string) (seed : uint32) =
        let byteArray = System.Text.Encoding.UTF8.GetBytes(convertString)
        let c1 = 0xcc9e2d51u
        let c2 = 0x1b873593u
        let mutable h1 = seed
        let len = byteArray.Length
        let remainder = len % 4
        if len-remainder-1 > 0 then do
            let headArray = byteArray.[..(len-remainder-1)]
            for i = 0 to ((len-remainder)/4)-1 do
                let miniArray = headArray.[(i*4)..(i*4)+3]
                miniArray.[1] <- miniArray.[1] <<< 8
                miniArray.[2] <- miniArray.[2] <<< 16
                miniArray.[3] <- miniArray.[3] <<< 24
                let helper =
                    bitSum miniArray
                    |> fun x -> multiply (uint32 x) c1
                    |> fun x -> rotl x 15
                    |> fun x -> multiply x c2
                    |> fun x -> multiply h1 x
                    |> fun x -> rotl x 13
                    |> fun x -> (multiply x 5u) + 0xe6546b64u
                    |> fun x -> x ^^^ (uint32 len)
                    |> fmix
                h1 <- helper
        let tailArray = byteArray.[(len-remainder)..]
        let k1 = 
            match tailArray.Length with
            | 3 -> 
                    tailArray.[1] <- tailArray.[1] <<< 8
                    tailArray.[2] <- tailArray.[2] <<< 16
                    tailReducer (bitSum tailArray) c1 c2
            | 2 -> 
                    tailArray.[1] <- tailArray.[1] <<< 8
                    tailReducer (bitSum tailArray) c1 c2
            | 1 -> 
                    tailReducer tailArray.[0] c1 c2
            | _ ->  0u
        h1 <- h1 ^^^ k1
        h1 <- h1 ^^^ (uint32 len)
        h1
val multiply : x:uint32 -> r:uint32 -> uint32

Full name: Script.murmur.multiply
val x : uint32
Multiple items
val uint32 : value:'T -> uint32 (requires member op_Explicit)

Full name: Microsoft.FSharp.Core.Operators.uint32

--------------------
type uint32 = System.UInt32

Full name: Microsoft.FSharp.Core.uint32
val r : uint32
val rotl : x:uint32 -> r:int -> uint32

Full name: Script.murmur.rotl
val r : int
Multiple items
val int : value:'T -> int (requires member op_Explicit)

Full name: Microsoft.FSharp.Core.Operators.int

--------------------
type int = int32

Full name: Microsoft.FSharp.Core.int

--------------------
type int<'Measure> = int

Full name: Microsoft.FSharp.Core.int<_>
val fmix : h:uint32 -> uint32

Full name: Script.murmur.fmix
val h : uint32
val bitSum : b:byte [] -> byte

Full name: Script.murmur.bitSum
val b : byte []
Multiple items
val byte : value:'T -> byte (requires member op_Explicit)

Full name: Microsoft.FSharp.Core.Operators.byte

--------------------
type byte = System.Byte

Full name: Microsoft.FSharp.Core.byte
module Array

from Microsoft.FSharp.Collections
val reduce : reduction:('T -> 'T -> 'T) -> array:'T [] -> 'T

Full name: Microsoft.FSharp.Collections.Array.reduce
val tailReducer : b:byte -> c1:uint32 -> c2:uint32 -> uint32

Full name: Script.murmur.tailReducer
val b : byte
val c1 : uint32
val c2 : uint32
val x : byte
val murmurhash3 : convertString:string -> seed:uint32 -> uint32

Full name: Script.murmur.murmurhash3
val convertString : string
Multiple items
val string : value:'T -> string

Full name: Microsoft.FSharp.Core.Operators.string

--------------------
type string = System.String

Full name: Microsoft.FSharp.Core.string
val seed : uint32
val byteArray : byte []
namespace System
namespace System.Text
type Encoding =
  member BodyName : string
  member Clone : unit -> obj
  member CodePage : int
  member DecoderFallback : DecoderFallback with get, set
  member EncoderFallback : EncoderFallback with get, set
  member EncodingName : string
  member Equals : value:obj -> bool
  member GetByteCount : chars:char[] -> int + 3 overloads
  member GetBytes : chars:char[] -> byte[] + 5 overloads
  member GetCharCount : bytes:byte[] -> int + 2 overloads
  ...

Full name: System.Text.Encoding
property System.Text.Encoding.UTF8: System.Text.Encoding
System.Text.Encoding.GetBytes(s: string) : byte []
System.Text.Encoding.GetBytes(chars: char []) : byte []
System.Text.Encoding.GetBytes(chars: char [], index: int, count: int) : byte []
System.Text.Encoding.GetBytes(chars: nativeptr<char>, charCount: int, bytes: nativeptr<byte>, byteCount: int) : int
System.Text.Encoding.GetBytes(s: string, charIndex: int, charCount: int, bytes: byte [], byteIndex: int) : int
System.Text.Encoding.GetBytes(chars: char [], charIndex: int, charCount: int, bytes: byte [], byteIndex: int) : int
val mutable h1 : uint32
val len : int
property System.Array.Length: int
val remainder : int
val headArray : byte []
val i : int
val miniArray : byte []
val helper : uint32
val tailArray : byte []
val k1 : uint32

More information

Link:http://fssnip.net/7RH
Posted:7 years ago
Author:MÃ¥rten Lindblad
Tags: algorithm , hashing