0 people like it.

# Frequent Pattern Growth algorithm

This is a commonly used algorithm for 'market basket' type analysis. It finds frequent itemsets from a series of transactions. Frequent itemsets are the item combinations that are frequently purchased together. See usage comment for example

 ``` 1: 2: 3: 4: 5: 6: 7: 8: 9: 10: 11: 12: 13: 14: 15: 16: 17: 18: 19: 20: 21: 22: 23: 24: 25: 26: 27: 28: 29: 30: 31: 32: 33: 34: 35: 36: 37: 38: 39: 40: 41: 42: 43: 44: 45: 46: 47: 48: 49: 50: 51: 52: 53: 54: 55: 56: 57: 58: 59: 60: 61: 62: 63: 64: 65: 66: 67: 68: 69: 70: 71: 72: 73: 74: 75: 76: 77: 78: 79: 80: 81: 82: 83: 84: 85: 86: 87: 88: 89: 90: 91: 92: 93: 94: 95: 96: 97: 98: 99: 100: 101: 102: 103: 104: 105: 106: 107: 108: 109: 110: 111: 112: 113: 114: 115: 116: 117: 118: 119: 120: 121: 122: 123: 124: 125: 126: 127: 128: 129: 130: 131: 132: 133: 134: 135: 136: 137: 138: 139: 140: 141: ``` ``````module FPGrowth //An implementation of frequent pattern growth algorithm to find frequent itemsets //(generates only frequent itemsets; does not mine association rules) open System.Collections.Generic type Node<'i> = { Item:'i mutable Support: int; Children:Dictionary<'i,Node<'i>> } type Root<'i> = {Heads:Dictionary<'i,Node<'i>>} type TableEntry<'i>= {Item:'i; Frequency:int} type Tree<'i> = {Root:Root<'i>} type Path<'i> = {Items:'i list; Support:int} let private yourself x = x //scan transactions and find frequency of each distinct item //sort results in descending order of frequency let initTable (transactions:seq<'id*Set<'item>>) = let freq = transactions |> Seq.collect snd |> Seq.countBy yourself let table = freq |> Seq.map (fun (i,c) -> {Item=i; Frequency=c}) |> Seq.sortBy (fun te -> -te.Frequency) |> Seq.toArray table //sort items in each *transaction* by descending order of (global) item frequency (calculated in initTable) let sortTransactions (transactions:seq<'id*Set<'item>>) (table:TableEntry<'item>[]) = let tblIdx = table |> Seq.map (fun te -> te.Item, te ) |> dict let sorted = transactions |> Seq.map (fun (id,s)-> id, s |> Seq.filter (fun item -> tblIdx.ContainsKey item) |> Seq.sortByDescending (fun item -> tblIdx.[item].Frequency) |> Seq.toArray) |> Seq.toArray sorted //insert item (should be the first item of each transaction) into the root of the tree let insertRoot (r:Root<_>) item = let node = match r.Heads.TryGetValue item with | true,n -> n | false,_ -> let n = {Item=item; Support=0; Children=Dictionary()} r.Heads.Add(item,n) n node.Support <- node.Support + 1 //count how times this item shows up as the first element of sorted transactions node //insert a transaction item into the tree let insertNode (r:Node<_>) item = let node = match r.Children.TryGetValue item with | true,n -> n | false,_ -> let n = {Item=item; Support=0; Children=Dictionary()} r.Children.Add(item,n) n node.Support <- node.Support + 1 //count how times this item shows in association with other items along the path to the root node //recursively insert items of a (sorted) transaction into the tree let rec insert acc tree (items:array<_>) i = if i >= items.Length then acc else let item = items.[i] let n = if i = 0 then insertRoot tree.Root item else let prev = List.head acc insertNode prev item insert (n::acc) tree items (i+1) let makeTree (transactions:seq<'id*Set<'item>>) = let table = initTable transactions let sorted = sortTransactions transactions table let tree = {Root={Heads=Dictionary()}} let _ = (tree,sorted) ||> Array.fold (fun tree (_,sortedTxns) -> let _ = insert [] tree sortedTxns 0 tree ) tree let rec mineNode acc (node:Node<_>) = if node.Children.Count = 0 then //leaf seq{yield {Items=acc; Support=node.Support}} else let acc = node.Item::acc //combine path items node.Children |> Seq.collect (fun kv->mineNode acc kv.Value) //PSeq seems to be much slower for test data //http://www.fssnip.net/2z/title/All-combinations-of-list-elements let allCombinations lst = let rec comb accLst elemLst = match elemLst with | h::t -> let next = [h]::List.map (fun el -> h::el) accLst @ accLst comb next t | _ -> accLst comb [] lst let mineTree support (tree:Tree<_>) = let paths = tree.Root.Heads |> Seq.collect (fun kv->mineNode [] kv.Value) //find support for each path from root to leaf paths |> Seq.collect (fun {Items=is;Support=c} -> let combos = allCombinations is |> List.map set //explode all possible combinations of items in the path combos |> List.map (fun combo -> combo,c)) //associate them with support from this path |> Seq.filter (fun (combo,_) -> Set.isEmpty combo |> not) //filter out empty sets |> Seq.groupBy fst //group by each combination of items generated earlier |> Seq.map (fun (k,vs)->k,vs|>Seq.sumBy snd) //sum up individual supports for each group into total support |> Seq.filter (fun (_,s)-> s >= support) //filter out combinations that fall below the specified support threshold (* usage #load "FPGrowth.fs" open FPGrowth let transactions = //each character in the strings below is a transaction item [|"abcefo"; "acg"; "ei"; "acdeg"; "acegl"; "ej"; "abcefp"; "acd"; "acegm"; "acegn"|] |> Array.mapi (fun i s-> i,s |> Seq.toList |> set) let support = 3 let tree = makeTree transactions //make a compressed representation of the transactions let frequentItems = mineTree support tree |> Seq.toList > val frequentItems : (Set * int) list = [(set ['a'], 8); (set ['a'; 'c'], 8); (set ['a'; 'c'; 'e'], 6); (set ['a'; 'e'], 6); (set ['c'], 8); (set ['c'; 'e'], 6); (set ['e'], 8); (set ['a'; 'c'; 'e'; 'g'], 4); (set ['a'; 'c'; 'g'], 4); (set ['a'; 'e'; 'g'], 4); (set ['a'; 'g'], 4); (set ['c'; 'e'; 'g'], 4); (set ['c'; 'g'], 4); (set ['e'; 'g'], 4); (set ['g'], 4)] *) ``````
module FPGrowth
namespace System
namespace System.Collections
namespace System.Collections.Generic
type Node<'i> =
{Item: 'i;
mutable Support: int;
Children: Dictionary<'i,Node<'i>>;}

Full name: FPGrowth.Node<_>
Node.Item: 'i
Node.Support: int
Multiple items
val int : value:'T -> int (requires member op_Explicit)

Full name: Microsoft.FSharp.Core.Operators.int

--------------------
type int = int32

Full name: Microsoft.FSharp.Core.int

--------------------
type int<'Measure> = int

Full name: Microsoft.FSharp.Core.int<_>
Node.Children: Dictionary<'i,Node<'i>>
Multiple items
type Dictionary<'TKey,'TValue> =
new : unit -> Dictionary<'TKey, 'TValue> + 5 overloads
member Add : key:'TKey * value:'TValue -> unit
member Clear : unit -> unit
member Comparer : IEqualityComparer<'TKey>
member ContainsKey : key:'TKey -> bool
member ContainsValue : value:'TValue -> bool
member Count : int
member GetEnumerator : unit -> Enumerator<'TKey, 'TValue>
member GetObjectData : info:SerializationInfo * context:StreamingContext -> unit
member Item : 'TKey -> 'TValue with get, set
...
nested type Enumerator
nested type KeyCollection
nested type ValueCollection

Full name: System.Collections.Generic.Dictionary<_,_>

--------------------
Dictionary() : unit
Dictionary(capacity: int) : unit
Dictionary(comparer: IEqualityComparer<'TKey>) : unit
Dictionary(dictionary: IDictionary<'TKey,'TValue>) : unit
Dictionary(capacity: int, comparer: IEqualityComparer<'TKey>) : unit
Dictionary(dictionary: IDictionary<'TKey,'TValue>, comparer: IEqualityComparer<'TKey>) : unit
type Root<'i> =

Full name: FPGrowth.Root<_>
type TableEntry<'i> =
{Item: 'i;
Frequency: int;}

Full name: FPGrowth.TableEntry<_>
TableEntry.Item: 'i
TableEntry.Frequency: int
type Tree<'i> =
{Root: Root<'i>;}

Full name: FPGrowth.Tree<_>
Multiple items
Tree.Root: Root<'i>

--------------------
type Root<'i> =

Full name: FPGrowth.Root<_>
type Path<'i> =
{Items: 'i list;
Support: int;}

Full name: FPGrowth.Path<_>
Path.Items: 'i list
type 'T list = List<'T>

Full name: Microsoft.FSharp.Collections.list<_>
Path.Support: int
val private yourself : x:'a -> 'a

Full name: FPGrowth.yourself
val x : 'a
val initTable : transactions:seq<'id * Set<'item>> -> TableEntry<'item> [] (requires comparison)

Full name: FPGrowth.initTable
val transactions : seq<'id * Set<'item>> (requires comparison)
Multiple items
val seq : sequence:seq<'T> -> seq<'T>

Full name: Microsoft.FSharp.Core.Operators.seq

--------------------
type seq<'T> = IEnumerable<'T>

Full name: Microsoft.FSharp.Collections.seq<_>
val id : x:'T -> 'T

Full name: Microsoft.FSharp.Core.Operators.id
Multiple items
module Set

from Microsoft.FSharp.Collections

--------------------
type Set<'T (requires comparison)> =
interface IComparable
interface IEnumerable
interface IEnumerable<'T>
interface ICollection<'T>
new : elements:seq<'T> -> Set<'T>
member Add : value:'T -> Set<'T>
member Contains : value:'T -> bool
override Equals : obj -> bool
member IsProperSubsetOf : otherSet:Set<'T> -> bool
member IsProperSupersetOf : otherSet:Set<'T> -> bool
...

Full name: Microsoft.FSharp.Collections.Set<_>

--------------------
new : elements:seq<'T> -> Set<'T>
val freq : seq<'item * int> (requires comparison)
module Seq

from Microsoft.FSharp.Collections
val collect : mapping:('T -> #seq<'U>) -> source:seq<'T> -> seq<'U>

Full name: Microsoft.FSharp.Collections.Seq.collect
val snd : tuple:('T1 * 'T2) -> 'T2

Full name: Microsoft.FSharp.Core.Operators.snd
val countBy : projection:('T -> 'Key) -> source:seq<'T> -> seq<'Key * int> (requires equality)

Full name: Microsoft.FSharp.Collections.Seq.countBy
val table : TableEntry<'item> [] (requires comparison)
val map : mapping:('T -> 'U) -> source:seq<'T> -> seq<'U>

Full name: Microsoft.FSharp.Collections.Seq.map
val i : 'item (requires comparison)
val c : int
val sortBy : projection:('T -> 'Key) -> source:seq<'T> -> seq<'T> (requires comparison)

Full name: Microsoft.FSharp.Collections.Seq.sortBy
val te : TableEntry<'item> (requires comparison)
val toArray : source:seq<'T> -> 'T []

Full name: Microsoft.FSharp.Collections.Seq.toArray
val sortTransactions : transactions:seq<'id * Set<'item>> -> table:TableEntry<'item> [] -> ('id * 'item []) [] (requires comparison)

Full name: FPGrowth.sortTransactions
val tblIdx : IDictionary<'item,TableEntry<'item>> (requires comparison)
TableEntry.Item: 'item
val dict : keyValuePairs:seq<'Key * 'Value> -> IDictionary<'Key,'Value> (requires equality)

Full name: Microsoft.FSharp.Core.ExtraTopLevelOperators.dict
val sorted : ('id * 'item []) [] (requires comparison)
val id : 'id
val s : Set<'item> (requires comparison)
val filter : predicate:('T -> bool) -> source:seq<'T> -> seq<'T>

Full name: Microsoft.FSharp.Collections.Seq.filter
val item : 'item (requires comparison)
IDictionary.ContainsKey(key: 'item) : bool
val sortByDescending : projection:('T -> 'Key) -> source:seq<'T> -> seq<'T> (requires comparison)

Full name: Microsoft.FSharp.Collections.Seq.sortByDescending
val insertRoot : r:Root<'a> -> item:'a -> Node<'a> (requires equality)

Full name: FPGrowth.insertRoot
val r : Root<'a> (requires equality)
val item : 'a (requires equality)
val node : Node<'a> (requires equality)
Dictionary.TryGetValue(key: 'a, value: byref<Node<'a>>) : bool
val n : Node<'a> (requires equality)
Dictionary.Add(key: 'a, value: Node<'a>) : unit
val insertNode : r:Node<'a> -> item:'a -> Node<'a> (requires equality)

Full name: FPGrowth.insertNode
val r : Node<'a> (requires equality)
Node.Children: Dictionary<'a,Node<'a>>
val insert : acc:Node<'a> list -> tree:Tree<'a> -> items:'a array -> i:int -> Node<'a> list (requires equality)

Full name: FPGrowth.insert
val acc : Node<'a> list (requires equality)
val tree : Tree<'a> (requires equality)
val items : 'a array (requires equality)
type 'T array = 'T []

Full name: Microsoft.FSharp.Core.array<_>
val i : int
property System.Array.Length: int
Tree.Root: Root<'a>
val prev : Node<'a> (requires equality)
Multiple items
type List<'T> =
new : unit -> List<'T> + 2 overloads
member Add : item:'T -> unit
member AddRange : collection:IEnumerable<'T> -> unit
member BinarySearch : item:'T -> int + 2 overloads
member Capacity : int with get, set
member Clear : unit -> unit
member Contains : item:'T -> bool
member ConvertAll<'TOutput> : converter:Converter<'T, 'TOutput> -> List<'TOutput>
member CopyTo : array:'T[] -> unit + 2 overloads
...
nested type Enumerator

Full name: System.Collections.Generic.List<_>

--------------------
List() : unit
List(capacity: int) : unit
List(collection: IEnumerable<'T>) : unit
val head : list:'T list -> 'T

val makeTree : transactions:seq<'id * Set<'item>> -> Tree<'item> (requires comparison)

Full name: FPGrowth.makeTree
val tree : Tree<'item> (requires comparison)
module Array

from Microsoft.FSharp.Collections
val fold : folder:('State -> 'T -> 'State) -> state:'State -> array:'T [] -> 'State

Full name: Microsoft.FSharp.Collections.Array.fold
val sortedTxns : 'item [] (requires comparison)
val mineNode : acc:'a list -> node:Node<'a> -> seq<Path<'a>>

Full name: FPGrowth.mineNode
val acc : 'a list
val node : Node<'a>
property Dictionary.Count: int
Node.Item: 'a
val kv : KeyValuePair<'a,Node<'a>>
property KeyValuePair.Value: Node<'a>
val allCombinations : lst:'a list -> 'a list list

Full name: FPGrowth.allCombinations
val lst : 'a list
val comb : ('b list list -> 'b list -> 'b list list)
val accLst : 'b list list
val elemLst : 'b list
val h : 'b
val t : 'b list
val next : 'b list list
val map : mapping:('T -> 'U) -> list:'T list -> 'U list

Full name: Microsoft.FSharp.Collections.List.map
val el : 'b list
val mineTree : support:int -> tree:Tree<'a> -> seq<Set<'a> * int> (requires comparison)

Full name: FPGrowth.mineTree
val support : int
val tree : Tree<'a> (requires comparison)
val paths : seq<Path<'a>> (requires comparison)
val kv : KeyValuePair<'a,Node<'a>> (requires comparison)
val is : 'a list (requires comparison)
val combos : Set<'a> list (requires comparison)
val set : elements:seq<'T> -> Set<'T> (requires comparison)

Full name: Microsoft.FSharp.Core.ExtraTopLevelOperators.set
val combo : Set<'a> (requires comparison)
val isEmpty : set:Set<'T> -> bool (requires comparison)

Full name: Microsoft.FSharp.Collections.Set.isEmpty
val not : value:bool -> bool

Full name: Microsoft.FSharp.Core.Operators.not
val groupBy : projection:('T -> 'Key) -> source:seq<'T> -> seq<'Key * seq<'T>> (requires equality)

Full name: Microsoft.FSharp.Collections.Seq.groupBy
val fst : tuple:('T1 * 'T2) -> 'T1

Full name: Microsoft.FSharp.Core.Operators.fst
val k : Set<'a> (requires comparison)
val vs : seq<Set<'a> * int> (requires comparison)
val sumBy : projection:('T -> 'U) -> source:seq<'T> -> 'U (requires member ( + ) and member get_Zero)

Full name: Microsoft.FSharp.Collections.Seq.sumBy
val s : int