0 people like it.
Like the snippet!
Frequent Pattern Growth algorithm
This is a commonly used algorithm for 'market basket' type analysis. It finds frequent itemsets from a series of transactions. Frequent itemsets are the item combinations that are frequently purchased together. See usage comment for example
1:
2:
3:
4:
5:
6:
7:
8:
9:
10:
11:
12:
13:
14:
15:
16:
17:
18:
19:
20:
21:
22:
23:
24:
25:
26:
27:
28:
29:
30:
31:
32:
33:
34:
35:
36:
37:
38:
39:
40:
41:
42:
43:
44:
45:
46:
47:
48:
49:
50:
51:
52:
53:
54:
55:
56:
57:
58:
59:
60:
61:
62:
63:
64:
65:
66:
67:
68:
69:
70:
71:
72:
73:
74:
75:
76:
77:
78:
79:
80:
81:
82:
83:
84:
85:
86:
87:
88:
89:
90:
91:
92:
93:
94:
95:
96:
97:
98:
99:
100:
101:
102:
103:
104:
105:
106:
107:
108:
109:
110:
111:
112:
113:
114:
115:
116:
117:
118:
119:
120:
121:
122:
123:
124:
125:
126:
127:
128:
129:
130:
131:
132:
133:
134:
135:
136:
137:
138:
139:
140:
141:
|
module FPGrowth
//An implementation of frequent pattern growth algorithm to find frequent itemsets
//(generates only frequent itemsets; does not mine association rules)
open System.Collections.Generic
type Node<'i> =
{
Item:'i
mutable Support: int;
Children:Dictionary<'i,Node<'i>>
}
type Root<'i> = {Heads:Dictionary<'i,Node<'i>>}
type TableEntry<'i>= {Item:'i; Frequency:int}
type Tree<'i> = {Root:Root<'i>}
type Path<'i> = {Items:'i list; Support:int}
let private yourself x = x
//scan transactions and find frequency of each distinct item
//sort results in descending order of frequency
let initTable (transactions:seq<'id*Set<'item>>) =
let freq = transactions |> Seq.collect snd |> Seq.countBy yourself
let table =
freq
|> Seq.map (fun (i,c) -> {Item=i; Frequency=c})
|> Seq.sortBy (fun te -> -te.Frequency)
|> Seq.toArray
table
//sort items in each *transaction* by descending order of (global) item frequency (calculated in initTable)
let sortTransactions (transactions:seq<'id*Set<'item>>) (table:TableEntry<'item>[]) =
let tblIdx = table |> Seq.map (fun te -> te.Item, te ) |> dict
let sorted =
transactions
|> Seq.map (fun (id,s)->
id,
s
|> Seq.filter (fun item -> tblIdx.ContainsKey item)
|> Seq.sortByDescending (fun item -> tblIdx.[item].Frequency)
|> Seq.toArray)
|> Seq.toArray
sorted
//insert item (should be the first item of each transaction) into the root of the tree
let insertRoot (r:Root<_>) item =
let node =
match r.Heads.TryGetValue item with
| true,n -> n
| false,_ ->
let n = {Item=item; Support=0; Children=Dictionary()}
r.Heads.Add(item,n)
n
node.Support <- node.Support + 1 //count how times this item shows up as the first element of sorted transactions
node
//insert a transaction item into the tree
let insertNode (r:Node<_>) item =
let node =
match r.Children.TryGetValue item with
| true,n -> n
| false,_ ->
let n = {Item=item; Support=0; Children=Dictionary()}
r.Children.Add(item,n)
n
node.Support <- node.Support + 1 //count how times this item shows in association with other items along the path to the root
node
//recursively insert items of a (sorted) transaction into the tree
let rec insert acc tree (items:array<_>) i =
if i >= items.Length then
acc
else
let item = items.[i]
let n =
if i = 0 then
insertRoot tree.Root item
else
let prev = List.head acc
insertNode prev item
insert (n::acc) tree items (i+1)
let makeTree (transactions:seq<'id*Set<'item>>) =
let table = initTable transactions
let sorted = sortTransactions transactions table
let tree = {Root={Heads=Dictionary()}}
let _ = (tree,sorted) ||> Array.fold (fun tree (_,sortedTxns) ->
let _ = insert [] tree sortedTxns 0
tree
)
tree
let rec mineNode acc (node:Node<_>) =
if node.Children.Count = 0 then //leaf
seq{yield {Items=acc; Support=node.Support}}
else
let acc = node.Item::acc //combine path items
node.Children |> Seq.collect (fun kv->mineNode acc kv.Value) //PSeq seems to be much slower for test data
//http://www.fssnip.net/2z/title/All-combinations-of-list-elements
let allCombinations lst =
let rec comb accLst elemLst =
match elemLst with
| h::t ->
let next = [h]::List.map (fun el -> h::el) accLst @ accLst
comb next t
| _ -> accLst
comb [] lst
let mineTree support (tree:Tree<_>) =
let paths = tree.Root.Heads |> Seq.collect (fun kv->mineNode [] kv.Value) //find support for each path from root to leaf
paths |> Seq.collect (fun {Items=is;Support=c} ->
let combos = allCombinations is |> List.map set //explode all possible combinations of items in the path
combos |> List.map (fun combo -> combo,c)) //associate them with support from this path
|> Seq.filter (fun (combo,_) -> Set.isEmpty combo |> not) //filter out empty sets
|> Seq.groupBy fst //group by each combination of items generated earlier
|> Seq.map (fun (k,vs)->k,vs|>Seq.sumBy snd) //sum up individual supports for each group into total support
|> Seq.filter (fun (_,s)-> s >= support) //filter out combinations that fall below the specified support threshold
(* usage
#load "FPGrowth.fs"
open FPGrowth
let transactions = //each character in the strings below is a transaction item
[|"abcefo"; "acg"; "ei"; "acdeg"; "acegl"; "ej"; "abcefp"; "acd"; "acegm"; "acegn"|]
|> Array.mapi (fun i s-> i,s |> Seq.toList |> set)
let support = 3
let tree = makeTree transactions //make a compressed representation of the transactions
let frequentItems = mineTree support tree |> Seq.toList
>
val frequentItems : (Set<char> * int) list =
[(set ['a'], 8); (set ['a'; 'c'], 8); (set ['a'; 'c'; 'e'], 6);
(set ['a'; 'e'], 6); (set ['c'], 8); (set ['c'; 'e'], 6); (set ['e'], 8);
(set ['a'; 'c'; 'e'; 'g'], 4); (set ['a'; 'c'; 'g'], 4);
(set ['a'; 'e'; 'g'], 4); (set ['a'; 'g'], 4); (set ['c'; 'e'; 'g'], 4);
(set ['c'; 'g'], 4); (set ['e'; 'g'], 4); (set ['g'], 4)]
*)
|
module FPGrowth
namespace System
namespace System.Collections
namespace System.Collections.Generic
type Node<'i> =
{Item: 'i;
mutable Support: int;
Children: Dictionary<'i,Node<'i>>;}
Full name: FPGrowth.Node<_>
Node.Item: 'i
Node.Support: int
Multiple items
val int : value:'T -> int (requires member op_Explicit)
Full name: Microsoft.FSharp.Core.Operators.int
--------------------
type int = int32
Full name: Microsoft.FSharp.Core.int
--------------------
type int<'Measure> = int
Full name: Microsoft.FSharp.Core.int<_>
Node.Children: Dictionary<'i,Node<'i>>
Multiple items
type Dictionary<'TKey,'TValue> =
new : unit -> Dictionary<'TKey, 'TValue> + 5 overloads
member Add : key:'TKey * value:'TValue -> unit
member Clear : unit -> unit
member Comparer : IEqualityComparer<'TKey>
member ContainsKey : key:'TKey -> bool
member ContainsValue : value:'TValue -> bool
member Count : int
member GetEnumerator : unit -> Enumerator<'TKey, 'TValue>
member GetObjectData : info:SerializationInfo * context:StreamingContext -> unit
member Item : 'TKey -> 'TValue with get, set
...
nested type Enumerator
nested type KeyCollection
nested type ValueCollection
Full name: System.Collections.Generic.Dictionary<_,_>
--------------------
Dictionary() : unit
Dictionary(capacity: int) : unit
Dictionary(comparer: IEqualityComparer<'TKey>) : unit
Dictionary(dictionary: IDictionary<'TKey,'TValue>) : unit
Dictionary(capacity: int, comparer: IEqualityComparer<'TKey>) : unit
Dictionary(dictionary: IDictionary<'TKey,'TValue>, comparer: IEqualityComparer<'TKey>) : unit
type Root<'i> =
{Heads: Dictionary<'i,Node<'i>>;}
Full name: FPGrowth.Root<_>
Root.Heads: Dictionary<'i,Node<'i>>
type TableEntry<'i> =
{Item: 'i;
Frequency: int;}
Full name: FPGrowth.TableEntry<_>
TableEntry.Item: 'i
TableEntry.Frequency: int
type Tree<'i> =
{Root: Root<'i>;}
Full name: FPGrowth.Tree<_>
Multiple items
Tree.Root: Root<'i>
--------------------
type Root<'i> =
{Heads: Dictionary<'i,Node<'i>>;}
Full name: FPGrowth.Root<_>
type Path<'i> =
{Items: 'i list;
Support: int;}
Full name: FPGrowth.Path<_>
Path.Items: 'i list
type 'T list = List<'T>
Full name: Microsoft.FSharp.Collections.list<_>
Path.Support: int
val private yourself : x:'a -> 'a
Full name: FPGrowth.yourself
val x : 'a
val initTable : transactions:seq<'id * Set<'item>> -> TableEntry<'item> [] (requires comparison)
Full name: FPGrowth.initTable
val transactions : seq<'id * Set<'item>> (requires comparison)
Multiple items
val seq : sequence:seq<'T> -> seq<'T>
Full name: Microsoft.FSharp.Core.Operators.seq
--------------------
type seq<'T> = IEnumerable<'T>
Full name: Microsoft.FSharp.Collections.seq<_>
val id : x:'T -> 'T
Full name: Microsoft.FSharp.Core.Operators.id
Multiple items
module Set
from Microsoft.FSharp.Collections
--------------------
type Set<'T (requires comparison)> =
interface IComparable
interface IEnumerable
interface IEnumerable<'T>
interface ICollection<'T>
new : elements:seq<'T> -> Set<'T>
member Add : value:'T -> Set<'T>
member Contains : value:'T -> bool
override Equals : obj -> bool
member IsProperSubsetOf : otherSet:Set<'T> -> bool
member IsProperSupersetOf : otherSet:Set<'T> -> bool
...
Full name: Microsoft.FSharp.Collections.Set<_>
--------------------
new : elements:seq<'T> -> Set<'T>
val freq : seq<'item * int> (requires comparison)
module Seq
from Microsoft.FSharp.Collections
val collect : mapping:('T -> #seq<'U>) -> source:seq<'T> -> seq<'U>
Full name: Microsoft.FSharp.Collections.Seq.collect
val snd : tuple:('T1 * 'T2) -> 'T2
Full name: Microsoft.FSharp.Core.Operators.snd
val countBy : projection:('T -> 'Key) -> source:seq<'T> -> seq<'Key * int> (requires equality)
Full name: Microsoft.FSharp.Collections.Seq.countBy
val table : TableEntry<'item> [] (requires comparison)
val map : mapping:('T -> 'U) -> source:seq<'T> -> seq<'U>
Full name: Microsoft.FSharp.Collections.Seq.map
val i : 'item (requires comparison)
val c : int
val sortBy : projection:('T -> 'Key) -> source:seq<'T> -> seq<'T> (requires comparison)
Full name: Microsoft.FSharp.Collections.Seq.sortBy
val te : TableEntry<'item> (requires comparison)
val toArray : source:seq<'T> -> 'T []
Full name: Microsoft.FSharp.Collections.Seq.toArray
val sortTransactions : transactions:seq<'id * Set<'item>> -> table:TableEntry<'item> [] -> ('id * 'item []) [] (requires comparison)
Full name: FPGrowth.sortTransactions
val tblIdx : IDictionary<'item,TableEntry<'item>> (requires comparison)
TableEntry.Item: 'item
val dict : keyValuePairs:seq<'Key * 'Value> -> IDictionary<'Key,'Value> (requires equality)
Full name: Microsoft.FSharp.Core.ExtraTopLevelOperators.dict
val sorted : ('id * 'item []) [] (requires comparison)
val id : 'id
val s : Set<'item> (requires comparison)
val filter : predicate:('T -> bool) -> source:seq<'T> -> seq<'T>
Full name: Microsoft.FSharp.Collections.Seq.filter
val item : 'item (requires comparison)
IDictionary.ContainsKey(key: 'item) : bool
val sortByDescending : projection:('T -> 'Key) -> source:seq<'T> -> seq<'T> (requires comparison)
Full name: Microsoft.FSharp.Collections.Seq.sortByDescending
val insertRoot : r:Root<'a> -> item:'a -> Node<'a> (requires equality)
Full name: FPGrowth.insertRoot
val r : Root<'a> (requires equality)
val item : 'a (requires equality)
val node : Node<'a> (requires equality)
Root.Heads: Dictionary<'a,Node<'a>>
Dictionary.TryGetValue(key: 'a, value: byref<Node<'a>>) : bool
val n : Node<'a> (requires equality)
Dictionary.Add(key: 'a, value: Node<'a>) : unit
val insertNode : r:Node<'a> -> item:'a -> Node<'a> (requires equality)
Full name: FPGrowth.insertNode
val r : Node<'a> (requires equality)
Node.Children: Dictionary<'a,Node<'a>>
val insert : acc:Node<'a> list -> tree:Tree<'a> -> items:'a array -> i:int -> Node<'a> list (requires equality)
Full name: FPGrowth.insert
val acc : Node<'a> list (requires equality)
val tree : Tree<'a> (requires equality)
val items : 'a array (requires equality)
type 'T array = 'T []
Full name: Microsoft.FSharp.Core.array<_>
val i : int
property System.Array.Length: int
Tree.Root: Root<'a>
val prev : Node<'a> (requires equality)
Multiple items
type List<'T> =
new : unit -> List<'T> + 2 overloads
member Add : item:'T -> unit
member AddRange : collection:IEnumerable<'T> -> unit
member AsReadOnly : unit -> ReadOnlyCollection<'T>
member BinarySearch : item:'T -> int + 2 overloads
member Capacity : int with get, set
member Clear : unit -> unit
member Contains : item:'T -> bool
member ConvertAll<'TOutput> : converter:Converter<'T, 'TOutput> -> List<'TOutput>
member CopyTo : array:'T[] -> unit + 2 overloads
...
nested type Enumerator
Full name: System.Collections.Generic.List<_>
--------------------
List() : unit
List(capacity: int) : unit
List(collection: IEnumerable<'T>) : unit
val head : list:'T list -> 'T
Full name: Microsoft.FSharp.Collections.List.head
val makeTree : transactions:seq<'id * Set<'item>> -> Tree<'item> (requires comparison)
Full name: FPGrowth.makeTree
val tree : Tree<'item> (requires comparison)
module Array
from Microsoft.FSharp.Collections
val fold : folder:('State -> 'T -> 'State) -> state:'State -> array:'T [] -> 'State
Full name: Microsoft.FSharp.Collections.Array.fold
val sortedTxns : 'item [] (requires comparison)
val mineNode : acc:'a list -> node:Node<'a> -> seq<Path<'a>>
Full name: FPGrowth.mineNode
val acc : 'a list
val node : Node<'a>
property Dictionary.Count: int
Node.Item: 'a
val kv : KeyValuePair<'a,Node<'a>>
property KeyValuePair.Value: Node<'a>
val allCombinations : lst:'a list -> 'a list list
Full name: FPGrowth.allCombinations
val lst : 'a list
val comb : ('b list list -> 'b list -> 'b list list)
val accLst : 'b list list
val elemLst : 'b list
val h : 'b
val t : 'b list
val next : 'b list list
val map : mapping:('T -> 'U) -> list:'T list -> 'U list
Full name: Microsoft.FSharp.Collections.List.map
val el : 'b list
val mineTree : support:int -> tree:Tree<'a> -> seq<Set<'a> * int> (requires comparison)
Full name: FPGrowth.mineTree
val support : int
val tree : Tree<'a> (requires comparison)
val paths : seq<Path<'a>> (requires comparison)
val kv : KeyValuePair<'a,Node<'a>> (requires comparison)
val is : 'a list (requires comparison)
val combos : Set<'a> list (requires comparison)
val set : elements:seq<'T> -> Set<'T> (requires comparison)
Full name: Microsoft.FSharp.Core.ExtraTopLevelOperators.set
val combo : Set<'a> (requires comparison)
val isEmpty : set:Set<'T> -> bool (requires comparison)
Full name: Microsoft.FSharp.Collections.Set.isEmpty
val not : value:bool -> bool
Full name: Microsoft.FSharp.Core.Operators.not
val groupBy : projection:('T -> 'Key) -> source:seq<'T> -> seq<'Key * seq<'T>> (requires equality)
Full name: Microsoft.FSharp.Collections.Seq.groupBy
val fst : tuple:('T1 * 'T2) -> 'T1
Full name: Microsoft.FSharp.Core.Operators.fst
val k : Set<'a> (requires comparison)
val vs : seq<Set<'a> * int> (requires comparison)
val sumBy : projection:('T -> 'U) -> source:seq<'T> -> 'U (requires member ( + ) and member get_Zero)
Full name: Microsoft.FSharp.Collections.Seq.sumBy
val s : int
More information