0 people like it.

Frequent Pattern Growth algorithm

This is a commonly used algorithm for 'market basket' type analysis. It finds frequent itemsets from a series of transactions. Frequent itemsets are the item combinations that are frequently purchased together. See usage comment for example

  1: 
  2: 
  3: 
  4: 
  5: 
  6: 
  7: 
  8: 
  9: 
 10: 
 11: 
 12: 
 13: 
 14: 
 15: 
 16: 
 17: 
 18: 
 19: 
 20: 
 21: 
 22: 
 23: 
 24: 
 25: 
 26: 
 27: 
 28: 
 29: 
 30: 
 31: 
 32: 
 33: 
 34: 
 35: 
 36: 
 37: 
 38: 
 39: 
 40: 
 41: 
 42: 
 43: 
 44: 
 45: 
 46: 
 47: 
 48: 
 49: 
 50: 
 51: 
 52: 
 53: 
 54: 
 55: 
 56: 
 57: 
 58: 
 59: 
 60: 
 61: 
 62: 
 63: 
 64: 
 65: 
 66: 
 67: 
 68: 
 69: 
 70: 
 71: 
 72: 
 73: 
 74: 
 75: 
 76: 
 77: 
 78: 
 79: 
 80: 
 81: 
 82: 
 83: 
 84: 
 85: 
 86: 
 87: 
 88: 
 89: 
 90: 
 91: 
 92: 
 93: 
 94: 
 95: 
 96: 
 97: 
 98: 
 99: 
100: 
101: 
102: 
103: 
104: 
105: 
106: 
107: 
108: 
109: 
110: 
111: 
112: 
113: 
114: 
115: 
116: 
117: 
118: 
119: 
120: 
121: 
122: 
123: 
124: 
125: 
126: 
127: 
128: 
129: 
130: 
131: 
132: 
133: 
134: 
135: 
136: 
137: 
138: 
139: 
140: 
141: 
module FPGrowth
//An implementation of frequent pattern growth algorithm to find frequent itemsets
//(generates only frequent itemsets; does not mine association rules)

open System.Collections.Generic

type Node<'i> = 
  {
    Item:'i
    mutable Support: int; 
    Children:Dictionary<'i,Node<'i>>
  }

type Root<'i> = {Heads:Dictionary<'i,Node<'i>>}
type TableEntry<'i>= {Item:'i; Frequency:int}
type Tree<'i> = {Root:Root<'i>}
type Path<'i> = {Items:'i list; Support:int}

let private yourself x = x

//scan transactions and find frequency of each distinct item
//sort results in descending order of frequency
let initTable (transactions:seq<'id*Set<'item>>) =
  let freq = transactions |> Seq.collect snd |> Seq.countBy yourself
  let table = 
    freq 
    |> Seq.map (fun (i,c) -> {Item=i; Frequency=c})
    |> Seq.sortBy (fun te -> -te.Frequency)
    |> Seq.toArray
  table

//sort items in each *transaction* by descending order of (global) item frequency (calculated in initTable)
let sortTransactions (transactions:seq<'id*Set<'item>>)  (table:TableEntry<'item>[]) =
  let tblIdx = table |> Seq.map (fun te -> te.Item, te ) |> dict
  let sorted = 
    transactions 
    |> Seq.map (fun (id,s)->
      id,
      s 
      |> Seq.filter (fun item -> tblIdx.ContainsKey item) 
      |> Seq.sortByDescending (fun item -> tblIdx.[item].Frequency) 
      |> Seq.toArray) 
    |> Seq.toArray
  sorted

//insert item (should be the first item of each transaction) into the root of the tree
let insertRoot (r:Root<_>) item =
    let node = 
      match r.Heads.TryGetValue item with
      | true,n -> n
      | false,_ -> 
        let n = {Item=item; Support=0; Children=Dictionary()}
        r.Heads.Add(item,n)
        n
    node.Support <- node.Support + 1     //count how times this item shows up as the first element of sorted transactions
    node
  
//insert a transaction item into the tree
let insertNode (r:Node<_>) item =
    let node = 
      match r.Children.TryGetValue item with
      | true,n -> n
      | false,_ -> 
        let n = {Item=item; Support=0; Children=Dictionary()}
        r.Children.Add(item,n)
        n
    node.Support <- node.Support + 1  //count how times this item shows in association with other items along the path to the root
    node

//recursively insert items of a (sorted) transaction into the tree
let rec insert acc tree (items:array<_>) i =
  if i >= items.Length then 
    acc
  else
    let item = items.[i]
    let n = 
      if i = 0 then 
        insertRoot tree.Root item
      else
        let prev = List.head acc
        insertNode prev item
    insert (n::acc) tree items (i+1)
      
let makeTree (transactions:seq<'id*Set<'item>>) =
  let table = initTable transactions
  let sorted = sortTransactions transactions table
  let tree = {Root={Heads=Dictionary()}}
  let _ = (tree,sorted) ||> Array.fold (fun tree (_,sortedTxns) -> 
    let _ = insert [] tree sortedTxns 0
    tree
  )
  tree

let rec mineNode acc (node:Node<_>) =
  if node.Children.Count = 0 then     //leaf
    seq{yield {Items=acc; Support=node.Support}}
  else
    let acc = node.Item::acc //combine path items
    node.Children |> Seq.collect (fun kv->mineNode acc kv.Value) //PSeq seems to be much slower for test data
  
//http://www.fssnip.net/2z/title/All-combinations-of-list-elements
let allCombinations lst =
    let rec comb accLst elemLst =
        match elemLst with
        | h::t ->
            let next = [h]::List.map (fun el -> h::el) accLst @ accLst
            comb next t
        | _ -> accLst
    comb [] lst

let mineTree support (tree:Tree<_>) =
  let paths = tree.Root.Heads |> Seq.collect (fun kv->mineNode [] kv.Value) //find support for each path from root to leaf
  paths |> Seq.collect (fun {Items=is;Support=c} ->
    let combos = allCombinations is |> List.map set         //explode all possible combinations of items in the path
    combos |> List.map (fun combo -> combo,c))              //associate them with support from this path
  |> Seq.filter (fun (combo,_) -> Set.isEmpty combo |> not) //filter out empty sets
  |> Seq.groupBy fst                                        //group by each combination of items generated earlier
  |> Seq.map (fun (k,vs)->k,vs|>Seq.sumBy snd)              //sum up individual supports for each group into total support
  |> Seq.filter (fun (_,s)-> s >= support)                  //filter out combinations that fall below the specified support threshold

(* usage  

#load "FPGrowth.fs"
open FPGrowth
let transactions = //each character in the strings below is a transaction item
  [|"abcefo"; "acg"; "ei"; "acdeg"; "acegl"; "ej"; "abcefp"; "acd"; "acegm"; "acegn"|]
  |> Array.mapi (fun i s-> i,s |> Seq.toList |> set)

let support = 3
let tree = makeTree transactions //make a compressed representation of the transactions
let frequentItems = mineTree support tree |> Seq.toList

>
val frequentItems : (Set<char> * int) list =
  [(set ['a'], 8); (set ['a'; 'c'], 8); (set ['a'; 'c'; 'e'], 6);
   (set ['a'; 'e'], 6); (set ['c'], 8); (set ['c'; 'e'], 6); (set ['e'], 8);
   (set ['a'; 'c'; 'e'; 'g'], 4); (set ['a'; 'c'; 'g'], 4);
   (set ['a'; 'e'; 'g'], 4); (set ['a'; 'g'], 4); (set ['c'; 'e'; 'g'], 4);
   (set ['c'; 'g'], 4); (set ['e'; 'g'], 4); (set ['g'], 4)]

*)
module FPGrowth
namespace System
namespace System.Collections
namespace System.Collections.Generic
type Node<'i> =
  {Item: 'i;
   mutable Support: int;
   Children: Dictionary<'i,Node<'i>>;}

Full name: FPGrowth.Node<_>
Node.Item: 'i
Node.Support: int
Multiple items
val int : value:'T -> int (requires member op_Explicit)

Full name: Microsoft.FSharp.Core.Operators.int

--------------------
type int = int32

Full name: Microsoft.FSharp.Core.int

--------------------
type int<'Measure> = int

Full name: Microsoft.FSharp.Core.int<_>
Node.Children: Dictionary<'i,Node<'i>>
Multiple items
type Dictionary<'TKey,'TValue> =
  new : unit -> Dictionary<'TKey, 'TValue> + 5 overloads
  member Add : key:'TKey * value:'TValue -> unit
  member Clear : unit -> unit
  member Comparer : IEqualityComparer<'TKey>
  member ContainsKey : key:'TKey -> bool
  member ContainsValue : value:'TValue -> bool
  member Count : int
  member GetEnumerator : unit -> Enumerator<'TKey, 'TValue>
  member GetObjectData : info:SerializationInfo * context:StreamingContext -> unit
  member Item : 'TKey -> 'TValue with get, set
  ...
  nested type Enumerator
  nested type KeyCollection
  nested type ValueCollection

Full name: System.Collections.Generic.Dictionary<_,_>

--------------------
Dictionary() : unit
Dictionary(capacity: int) : unit
Dictionary(comparer: IEqualityComparer<'TKey>) : unit
Dictionary(dictionary: IDictionary<'TKey,'TValue>) : unit
Dictionary(capacity: int, comparer: IEqualityComparer<'TKey>) : unit
Dictionary(dictionary: IDictionary<'TKey,'TValue>, comparer: IEqualityComparer<'TKey>) : unit
type Root<'i> =
  {Heads: Dictionary<'i,Node<'i>>;}

Full name: FPGrowth.Root<_>
Root.Heads: Dictionary<'i,Node<'i>>
type TableEntry<'i> =
  {Item: 'i;
   Frequency: int;}

Full name: FPGrowth.TableEntry<_>
TableEntry.Item: 'i
TableEntry.Frequency: int
type Tree<'i> =
  {Root: Root<'i>;}

Full name: FPGrowth.Tree<_>
Multiple items
Tree.Root: Root<'i>

--------------------
type Root<'i> =
  {Heads: Dictionary<'i,Node<'i>>;}

Full name: FPGrowth.Root<_>
type Path<'i> =
  {Items: 'i list;
   Support: int;}

Full name: FPGrowth.Path<_>
Path.Items: 'i list
type 'T list = List<'T>

Full name: Microsoft.FSharp.Collections.list<_>
Path.Support: int
val private yourself : x:'a -> 'a

Full name: FPGrowth.yourself
val x : 'a
val initTable : transactions:seq<'id * Set<'item>> -> TableEntry<'item> [] (requires comparison)

Full name: FPGrowth.initTable
val transactions : seq<'id * Set<'item>> (requires comparison)
Multiple items
val seq : sequence:seq<'T> -> seq<'T>

Full name: Microsoft.FSharp.Core.Operators.seq

--------------------
type seq<'T> = IEnumerable<'T>

Full name: Microsoft.FSharp.Collections.seq<_>
val id : x:'T -> 'T

Full name: Microsoft.FSharp.Core.Operators.id
Multiple items
module Set

from Microsoft.FSharp.Collections

--------------------
type Set<'T (requires comparison)> =
  interface IComparable
  interface IEnumerable
  interface IEnumerable<'T>
  interface ICollection<'T>
  new : elements:seq<'T> -> Set<'T>
  member Add : value:'T -> Set<'T>
  member Contains : value:'T -> bool
  override Equals : obj -> bool
  member IsProperSubsetOf : otherSet:Set<'T> -> bool
  member IsProperSupersetOf : otherSet:Set<'T> -> bool
  ...

Full name: Microsoft.FSharp.Collections.Set<_>

--------------------
new : elements:seq<'T> -> Set<'T>
val freq : seq<'item * int> (requires comparison)
module Seq

from Microsoft.FSharp.Collections
val collect : mapping:('T -> #seq<'U>) -> source:seq<'T> -> seq<'U>

Full name: Microsoft.FSharp.Collections.Seq.collect
val snd : tuple:('T1 * 'T2) -> 'T2

Full name: Microsoft.FSharp.Core.Operators.snd
val countBy : projection:('T -> 'Key) -> source:seq<'T> -> seq<'Key * int> (requires equality)

Full name: Microsoft.FSharp.Collections.Seq.countBy
val table : TableEntry<'item> [] (requires comparison)
val map : mapping:('T -> 'U) -> source:seq<'T> -> seq<'U>

Full name: Microsoft.FSharp.Collections.Seq.map
val i : 'item (requires comparison)
val c : int
val sortBy : projection:('T -> 'Key) -> source:seq<'T> -> seq<'T> (requires comparison)

Full name: Microsoft.FSharp.Collections.Seq.sortBy
val te : TableEntry<'item> (requires comparison)
val toArray : source:seq<'T> -> 'T []

Full name: Microsoft.FSharp.Collections.Seq.toArray
val sortTransactions : transactions:seq<'id * Set<'item>> -> table:TableEntry<'item> [] -> ('id * 'item []) [] (requires comparison)

Full name: FPGrowth.sortTransactions
val tblIdx : IDictionary<'item,TableEntry<'item>> (requires comparison)
TableEntry.Item: 'item
val dict : keyValuePairs:seq<'Key * 'Value> -> IDictionary<'Key,'Value> (requires equality)

Full name: Microsoft.FSharp.Core.ExtraTopLevelOperators.dict
val sorted : ('id * 'item []) [] (requires comparison)
val id : 'id
val s : Set<'item> (requires comparison)
val filter : predicate:('T -> bool) -> source:seq<'T> -> seq<'T>

Full name: Microsoft.FSharp.Collections.Seq.filter
val item : 'item (requires comparison)
IDictionary.ContainsKey(key: 'item) : bool
val sortByDescending : projection:('T -> 'Key) -> source:seq<'T> -> seq<'T> (requires comparison)

Full name: Microsoft.FSharp.Collections.Seq.sortByDescending
val insertRoot : r:Root<'a> -> item:'a -> Node<'a> (requires equality)

Full name: FPGrowth.insertRoot
val r : Root<'a> (requires equality)
val item : 'a (requires equality)
val node : Node<'a> (requires equality)
Root.Heads: Dictionary<'a,Node<'a>>
Dictionary.TryGetValue(key: 'a, value: byref<Node<'a>>) : bool
val n : Node<'a> (requires equality)
Dictionary.Add(key: 'a, value: Node<'a>) : unit
val insertNode : r:Node<'a> -> item:'a -> Node<'a> (requires equality)

Full name: FPGrowth.insertNode
val r : Node<'a> (requires equality)
Node.Children: Dictionary<'a,Node<'a>>
val insert : acc:Node<'a> list -> tree:Tree<'a> -> items:'a array -> i:int -> Node<'a> list (requires equality)

Full name: FPGrowth.insert
val acc : Node<'a> list (requires equality)
val tree : Tree<'a> (requires equality)
val items : 'a array (requires equality)
type 'T array = 'T []

Full name: Microsoft.FSharp.Core.array<_>
val i : int
property System.Array.Length: int
Tree.Root: Root<'a>
val prev : Node<'a> (requires equality)
Multiple items
type List<'T> =
  new : unit -> List<'T> + 2 overloads
  member Add : item:'T -> unit
  member AddRange : collection:IEnumerable<'T> -> unit
  member AsReadOnly : unit -> ReadOnlyCollection<'T>
  member BinarySearch : item:'T -> int + 2 overloads
  member Capacity : int with get, set
  member Clear : unit -> unit
  member Contains : item:'T -> bool
  member ConvertAll<'TOutput> : converter:Converter<'T, 'TOutput> -> List<'TOutput>
  member CopyTo : array:'T[] -> unit + 2 overloads
  ...
  nested type Enumerator

Full name: System.Collections.Generic.List<_>

--------------------
List() : unit
List(capacity: int) : unit
List(collection: IEnumerable<'T>) : unit
val head : list:'T list -> 'T

Full name: Microsoft.FSharp.Collections.List.head
val makeTree : transactions:seq<'id * Set<'item>> -> Tree<'item> (requires comparison)

Full name: FPGrowth.makeTree
val tree : Tree<'item> (requires comparison)
module Array

from Microsoft.FSharp.Collections
val fold : folder:('State -> 'T -> 'State) -> state:'State -> array:'T [] -> 'State

Full name: Microsoft.FSharp.Collections.Array.fold
val sortedTxns : 'item [] (requires comparison)
val mineNode : acc:'a list -> node:Node<'a> -> seq<Path<'a>>

Full name: FPGrowth.mineNode
val acc : 'a list
val node : Node<'a>
property Dictionary.Count: int
Node.Item: 'a
val kv : KeyValuePair<'a,Node<'a>>
property KeyValuePair.Value: Node<'a>
val allCombinations : lst:'a list -> 'a list list

Full name: FPGrowth.allCombinations
val lst : 'a list
val comb : ('b list list -> 'b list -> 'b list list)
val accLst : 'b list list
val elemLst : 'b list
val h : 'b
val t : 'b list
val next : 'b list list
val map : mapping:('T -> 'U) -> list:'T list -> 'U list

Full name: Microsoft.FSharp.Collections.List.map
val el : 'b list
val mineTree : support:int -> tree:Tree<'a> -> seq<Set<'a> * int> (requires comparison)

Full name: FPGrowth.mineTree
val support : int
val tree : Tree<'a> (requires comparison)
val paths : seq<Path<'a>> (requires comparison)
val kv : KeyValuePair<'a,Node<'a>> (requires comparison)
val is : 'a list (requires comparison)
val combos : Set<'a> list (requires comparison)
val set : elements:seq<'T> -> Set<'T> (requires comparison)

Full name: Microsoft.FSharp.Core.ExtraTopLevelOperators.set
val combo : Set<'a> (requires comparison)
val isEmpty : set:Set<'T> -> bool (requires comparison)

Full name: Microsoft.FSharp.Collections.Set.isEmpty
val not : value:bool -> bool

Full name: Microsoft.FSharp.Core.Operators.not
val groupBy : projection:('T -> 'Key) -> source:seq<'T> -> seq<'Key * seq<'T>> (requires equality)

Full name: Microsoft.FSharp.Collections.Seq.groupBy
val fst : tuple:('T1 * 'T2) -> 'T1

Full name: Microsoft.FSharp.Core.Operators.fst
val k : Set<'a> (requires comparison)
val vs : seq<Set<'a> * int> (requires comparison)
val sumBy : projection:('T -> 'U) -> source:seq<'T> -> 'U (requires member ( + ) and member get_Zero)

Full name: Microsoft.FSharp.Collections.Seq.sumBy
val s : int
Raw view New version

More information

Link:http://fssnip.net/7V4
Posted:4 months ago
Author:Faisal Waris
Tags: data mining; , frequent itemset