2 people like it.

Huffman coding est Cool

Huffman coding in F#. frenchCode courtesy of M.Odersky.

 1: 
 2: 
 3: 
 4: 
 5: 
 6: 
 7: 
 8: 
 9: 
10: 
11: 
12: 
13: 
14: 
15: 
16: 
17: 
18: 
19: 
20: 
21: 
22: 
23: 
24: 
25: 
26: 
27: 
28: 
29: 
30: 
31: 
32: 
33: 
34: 
35: 
36: 
37: 
38: 
39: 
40: 
41: 
42: 
43: 
44: 
45: 
46: 
47: 
48: 
49: 
50: 
51: 
52: 
53: 
54: 
55: 
56: 
open System.Collections.Generic

type CodeTree<'T> = Leaf of 'T * int | Fork of CodeTree<'T> * CodeTree<'T> * 'T list * int

let chars  = function | Leaf(c,_) -> [c] | Fork(_,_,l,_) -> l
let weight = function | Leaf(c,w) ->  w  | Fork(_,_,_,w) -> w
let mergeCT ct1 ct2 = Fork(ct1, ct2, chars ct1 @ chars ct2, weight ct1 + weight ct2)
let foldCodeTree fNode (fLeaf:'T -> 'a) (x:'T CodeTree) = 
         let rec f x = match x with | Leaf(e,w) -> fLeaf(e) | Fork(l,r,cs,w) -> fNode(f l)(f r) 
         f x 
  
module List =
    let span f xs = 
        let l,r = xs |> List.fold(fun (l,r: _ list) e -> if f e then e::l, r.Tail else l,r ) ([],xs)
        l |> List.rev, r

module Huffman = 
   let buildFreq (sample : 'T list) = 
       if sample.Length = 0 then []
       else let sample = sample |> List.sort
            let rec count (sample: _ list) = [ if sample.Length <> 0 then 
                                                let left, right = sample |> List.span ((=) sample.Head)
                                                yield sample.Head,left.Length
                                                yield! count right  ]
            count sample |> List.sortBy snd        

   let buildFromFreq(distrib:('T*int) list) : CodeTree<'T> =
       let rec loop = function 
       | [] ->  failwith "not possible"
       | [r] -> r
       | f::s::xs ->    let newCT = mergeCT f s
                        let left, right = List.span (fun e -> weight e < weight newCT) xs
                        loop(left @ (newCT:: right))
       loop (distrib  |> List.sortBy(snd) |> List.map(Leaf))

   let decode (codeTree:_ CodeTree) (bits : int list) =
       let o = bits |> List.fold(fun ((Fork(l,r,_,_)), decMsg) b ->  let branch = if b = 0 then l else r
                                                                     match branch with | Leaf(c,_) -> (codeTree, c::decMsg) 
                                                                                       | _         -> (branch  , decMsg))
                                     (codeTree, [])
       snd o |> List.rev

   let makeEncoderDic (tree: 'T CodeTree) = 
      foldCodeTree (fun (lMap:Map<'T,int list>) rMap -> lMap |> Map.fold(fun m k v -> m.Add(k, 0::v)) (rMap |> Map.fold(fun m k v -> m.Add(k, 1::v)) Map.empty)) 
                   (fun c -> Map.empty.Add(c,[])) 
                   tree
   
   let encode (dic:Map<'T,int list>) (text : 'T list) = text |> List.fold(fun s e -> s@dic.[e]) ([])


   let secret = [0;0;1;1;1;0;1;0;1;1;1;0;0;1;1;0;1;0;0;1;1;0;1;0;1;1;0;0;1;1;1;1;1;0;1;0;1;1;0;0;0;0;1;0;1;1;1;0;0;1;0;0;1;0;0;0;1;0;0;0;1;0;1]
   let frenchCode = Fork(Fork(Fork(Leaf('s',121895),Fork(Leaf('d',56269),Fork(Fork(Fork(Leaf('x',5928),Leaf('j',8351),['x';'j'],14279),Leaf('f',16351),['x';'j';'f'],30630),Fork(Fork(Fork(Fork(Leaf('z',2093),Fork(Leaf('k',745),Leaf('w',1747),['k';'w'],2492),['z';'k';'w'],4585),Leaf('y',4725),['z';'k';'w';'y'],9310),Leaf('h',11298),['z';'k';'w';'y';'h'],20608),Leaf('q',20889),['z';'k';'w';'y';'h';'q'],41497),['x';'j';'f';'z';'k';'w';'y';'h';'q'],72127),['d';'x';'j';'f';'z';'k';'w';'y';'h';'q'],128396),['s';'d';'x';'j';'f';'z';'k';'w';'y';'h';'q'],250291),Fork(Fork(Leaf('o',82762),Leaf('l',83668),['o';'l'],166430),Fork(Fork(Leaf('m',45521),Leaf('p',46335),['m';'p'],91856),Leaf('u',96785),['m';'p';'u'],188641),['o';'l';'m';'p';'u'],355071),['s';'d';'x';'j';'f';'z';'k';'w';'y';'h';'q';'o';'l';'m';'p';'u'],605362),Fork(Fork(Fork(Leaf('r',100500),Fork(Leaf('c',50003),Fork(Leaf('v',24975),Fork(Leaf('g',13288),Leaf('b',13822),['g';'b'],27110),['v';'g';'b'],52085),['c';'v';'g';'b'],102088),['r';'c';'v';'g';'b'],202588),Fork(Leaf('n',108812),Leaf('t',111103),['n';'t'],219915),['r';'c';'v';'g';'b';'n';'t'],422503),Fork(Leaf('e',225947),Fork(Leaf('i',115465),Leaf('a',117110),['i';'a'],232575),['e';'i';'a'],458522),['r';'c';'v';'g';'b';'n';'t';'e';'i';'a'],881025),['s';'d';'x';'j';'f';'z';'k';'w';'y';'h';'q';'o';'l';'m';'p';'u';'r';'c';'v';'g';'b';'n';'t';'e';'i';'a'],1486387)

   let r = decode frenchCode secret

   let r' = decode frenchCode (encode (makeEncoderDic frenchCode) r)
namespace System
namespace System.Collections
namespace System.Collections.Generic
type CodeTree<'T> =
  | Leaf of 'T * int
  | Fork of CodeTree<'T> * CodeTree<'T> * 'T list * int

Full name: Script.CodeTree<_>
union case CodeTree.Leaf: 'T * int -> CodeTree<'T>
Multiple items
val int : value:'T -> int (requires member op_Explicit)

Full name: Microsoft.FSharp.Core.Operators.int

--------------------
type int = int32

Full name: Microsoft.FSharp.Core.int

--------------------
type int<'Measure> = int

Full name: Microsoft.FSharp.Core.int<_>
union case CodeTree.Fork: CodeTree<'T> * CodeTree<'T> * 'T list * int -> CodeTree<'T>
type 'T list = List<'T>

Full name: Microsoft.FSharp.Collections.list<_>
val chars : _arg1:CodeTree<'a> -> 'a list

Full name: Script.chars
val c : 'a
val l : 'a list
val weight : _arg1:CodeTree<'a> -> int

Full name: Script.weight
val w : int
val mergeCT : ct1:CodeTree<'a> -> ct2:CodeTree<'a> -> CodeTree<'a>

Full name: Script.mergeCT
val ct1 : CodeTree<'a>
val ct2 : CodeTree<'a>
val foldCodeTree : fNode:('a -> 'a -> 'a) -> fLeaf:('T -> 'a) -> x:CodeTree<'T> -> 'a

Full name: Script.foldCodeTree
val fNode : ('a -> 'a -> 'a)
val fLeaf : ('T -> 'a)
val x : CodeTree<'T>
val f : (CodeTree<'T> -> 'a)
val e : 'T
val l : CodeTree<'T>
val r : CodeTree<'T>
val cs : 'T list
Multiple items
type List<'T> =
  new : unit -> List<'T> + 2 overloads
  member Add : item:'T -> unit
  member AddRange : collection:IEnumerable<'T> -> unit
  member AsReadOnly : unit -> ReadOnlyCollection<'T>
  member BinarySearch : item:'T -> int + 2 overloads
  member Capacity : int with get, set
  member Clear : unit -> unit
  member Contains : item:'T -> bool
  member ConvertAll<'TOutput> : converter:Converter<'T, 'TOutput> -> List<'TOutput>
  member CopyTo : array:'T[] -> unit + 2 overloads
  ...
  nested type Enumerator

Full name: System.Collections.Generic.List<_>

--------------------
List() : unit
List(capacity: int) : unit
List(collection: IEnumerable<'T>) : unit
val span : f:('a -> bool) -> xs:'a list -> 'a list * 'a list

Full name: Script.List.span
val f : ('a -> bool)
val xs : 'a list
val r : 'a list
val fold : folder:('State -> 'T -> 'State) -> state:'State -> list:'T list -> 'State

Full name: Microsoft.FSharp.Collections.List.fold
val e : 'a
property List.Tail: 'a list
val rev : list:'T list -> 'T list

Full name: Microsoft.FSharp.Collections.List.rev
module Huffman

from Script
val buildFreq : sample:'T list -> ('T * int) list (requires comparison)

Full name: Script.Huffman.buildFreq
val sample : 'T list (requires comparison)
property List.Length: int
val sort : list:'T list -> 'T list (requires comparison)

Full name: Microsoft.FSharp.Collections.List.sort
val count : ('a list -> ('a * int) list) (requires equality)
val sample : 'a list (requires equality)
val left : 'a list (requires equality)
val right : 'a list (requires equality)
property List.Head: 'a
val sortBy : projection:('T -> 'Key) -> list:'T list -> 'T list (requires comparison)

Full name: Microsoft.FSharp.Collections.List.sortBy
val snd : tuple:('T1 * 'T2) -> 'T2

Full name: Microsoft.FSharp.Core.Operators.snd
val buildFromFreq : distrib:('T * int) list -> CodeTree<'T>

Full name: Script.Huffman.buildFromFreq
val distrib : ('T * int) list
val loop : (CodeTree<'a> list -> CodeTree<'a>)
val failwith : message:string -> 'T

Full name: Microsoft.FSharp.Core.Operators.failwith
val r : CodeTree<'a>
val f : CodeTree<'a>
val s : CodeTree<'a>
val xs : CodeTree<'a> list
val newCT : CodeTree<'a>
val left : CodeTree<'a> list
val right : CodeTree<'a> list
val e : CodeTree<'a>
val map : mapping:('T -> 'U) -> list:'T list -> 'U list

Full name: Microsoft.FSharp.Collections.List.map
val decode : codeTree:CodeTree<'a> -> bits:int list -> 'a list

Full name: Script.Huffman.decode
val codeTree : CodeTree<'a>
val bits : int list
val o : CodeTree<'a> * 'a list
val l : CodeTree<'a>
val decMsg : 'a list
val b : int
val branch : CodeTree<'a>
val makeEncoderDic : tree:CodeTree<'T> -> Map<'T,int list> (requires comparison)

Full name: Script.Huffman.makeEncoderDic
val tree : CodeTree<'T> (requires comparison)
val lMap : Map<'T,int list> (requires comparison)
Multiple items
module Map

from Microsoft.FSharp.Collections

--------------------
type Map<'Key,'Value (requires comparison)> =
  interface IEnumerable
  interface IComparable
  interface IEnumerable<KeyValuePair<'Key,'Value>>
  interface ICollection<KeyValuePair<'Key,'Value>>
  interface IDictionary<'Key,'Value>
  new : elements:seq<'Key * 'Value> -> Map<'Key,'Value>
  member Add : key:'Key * value:'Value -> Map<'Key,'Value>
  member ContainsKey : key:'Key -> bool
  override Equals : obj -> bool
  member Remove : key:'Key -> Map<'Key,'Value>
  ...

Full name: Microsoft.FSharp.Collections.Map<_,_>

--------------------
new : elements:seq<'Key * 'Value> -> Map<'Key,'Value>
val rMap : Map<'T,int list> (requires comparison)
val fold : folder:('State -> 'Key -> 'T -> 'State) -> state:'State -> table:Map<'Key,'T> -> 'State (requires comparison)

Full name: Microsoft.FSharp.Collections.Map.fold
val m : Map<'T,int list> (requires comparison)
val k : 'T (requires comparison)
val v : int list
member Map.Add : key:'Key * value:'Value -> Map<'Key,'Value>
val empty<'Key,'T (requires comparison)> : Map<'Key,'T> (requires comparison)

Full name: Microsoft.FSharp.Collections.Map.empty
val c : 'T (requires comparison)
val encode : dic:Map<'T,int list> -> text:'T list -> int list (requires comparison)

Full name: Script.Huffman.encode
val dic : Map<'T,int list> (requires comparison)
val text : 'T list (requires comparison)
val s : int list
val e : 'T (requires comparison)
val secret : int list

Full name: Script.Huffman.secret
val frenchCode : CodeTree<char>

Full name: Script.Huffman.frenchCode
val r : char list

Full name: Script.Huffman.r
val r' : char list

Full name: Script.Huffman.r'
Raw view Test code New version

More information

Link:http://fssnip.net/kJ
Posted:10 years ago
Author:nicolas
Tags: huffman , entropy