1 people like it.
Like the snippet!
Simple KMeans clustering in 2D
I needed a crude k-means (http://en.wikipedia.org/wiki/K-means_clustering) clustering method for a one off test of something, and decided to try to do in F# for learning purposes
1:
2:
3:
4:
5:
6:
7:
8:
9:
10:
11:
12:
13:
14:
15:
16:
17:
18:
19:
20:
21:
22:
23:
24:
25:
26:
27:
28:
29:
30:
31:
32:
33:
34:
35:
36:
37:
38:
39:
40:
41:
42:
43:
44:
45:
46:
47:
48:
49:
50:
51:
52:
53:
54:
55:
56:
57:
58:
59:
|
open System
// First attempt at a KMeans clustering.
// Doesn't have an epsilon diff check which would allow early exit from loop.
let KMeans2D maxIterations numClusters (data:(float*float) list) =
let rec getMin points cur =
let oldMinX,oldMinY = cur
match points with
| [] -> cur
| (minX,minY) :: tail when minX<oldMinX && minY < oldMinY -> getMin tail (minX,minY)
| (minX,minY) :: tail when minX>oldMinX && minY < oldMinY -> getMin tail (oldMinX,minY)
| (minX,minY) :: tail when minX<oldMinX && minY > oldMinY -> getMin tail (minX,oldMinY)
| (minX,minY) :: tail when minX>oldMinX && minY > oldMinY -> getMin tail (oldMinX,oldMinY)
| (minX,minY) :: tail -> getMin tail (oldMinX,oldMinY)
let rec getMax points cur =
let oldmaxX,oldmaxY = cur
match points with
| [] -> cur
| (maxX,maxY) :: tail when maxX>oldmaxX && maxY > oldmaxY -> getMax tail (maxX,maxY)
| (maxX,maxY) :: tail when maxX<oldmaxX && maxY > oldmaxY -> getMax tail (oldmaxX,maxY)
| (maxX,maxY) :: tail when maxX>oldmaxX && maxY < oldmaxY -> getMax tail (maxX,oldmaxY)
| (maxX,maxY) :: tail when maxX<oldmaxX && maxY < oldmaxY -> getMax tail (oldmaxX,oldmaxY)
| (maxX,maxY) :: tail -> getMax tail (oldmaxX,oldmaxY)
let minX,minY = getMin data (Double.MaxValue,Double.MaxValue)
let maxX,maxY = getMax data (Double.MinValue,Double.MinValue)
let partition means points =
let distance mean pt = let meanX,meanY=mean
let ptX,ptY=pt
(mean,Math.Sqrt((meanX-ptX)**2.0+(meanY-ptY)**2.0),(ptX,ptY))
let minDistance means pt = let (mean,len,pt) = means
|> List.map (fun mean -> (distance mean pt))
|> List.minBy (fun (mean,len,pt)->len)
(mean,pt)
let parts = List.map (fun pt-> minDistance means pt ) points
let groups = Seq.ofList parts |> Seq.groupBy (fun (mean,point)->mean)
Seq.map (fun (key,value)->(key,List.ofSeq (Seq.map (fun (x,y)->y) value))) groups|>List.ofSeq
let recalcMeans partitions =
let getClusterMean (cluster:(float*float)list) =
(List.averageBy (fun (ptX,ptY)->ptX) cluster,List.averageBy (fun (ptX,ptY)->ptY) cluster)
List.map (fun (oldMean,cluster) -> getClusterMean cluster) partitions
let getInitialClusters() =
let rnd = new System.Random()
let randMeans = List.init numClusters (fun _ -> (minX+((maxX-minX)*rnd.NextDouble()),minY+((maxY-minY)*rnd.NextDouble())) )
partition randMeans data
let rec impl iteration clusters =
if List.isEmpty clusters then []
else
match iteration with
| 0 -> clusters
| _ -> let means = recalcMeans clusters
let newClusters = partition means data
impl (iteration-1) newClusters
impl maxIterations (getInitialClusters())
|
namespace System
val KMeans2D : maxIterations:int -> numClusters:int -> data:(float * float) list -> ((float * float) * (float * float) list) list
Full name: Script.KMeans2D
val maxIterations : int
val numClusters : int
val data : (float * float) list
Multiple items
val float : value:'T -> float (requires member op_Explicit)
Full name: Microsoft.FSharp.Core.Operators.float
--------------------
type float = Double
Full name: Microsoft.FSharp.Core.float
--------------------
type float<'Measure> = float
Full name: Microsoft.FSharp.Core.float<_>
type 'T list = List<'T>
Full name: Microsoft.FSharp.Collections.list<_>
val getMin : (('a * 'b) list -> 'a * 'b -> 'a * 'b) (requires comparison and comparison)
val points : ('a * 'b) list (requires comparison and comparison)
val cur : 'a * 'b (requires comparison and comparison)
val oldMinX : 'a (requires comparison)
val oldMinY : 'b (requires comparison)
val minX : 'a (requires comparison)
val minY : 'b (requires comparison)
val tail : ('a * 'b) list (requires comparison and comparison)
val getMax : (('a * 'b) list -> 'a * 'b -> 'a * 'b) (requires comparison and comparison)
val oldmaxX : 'a (requires comparison)
val oldmaxY : 'b (requires comparison)
val maxX : 'a (requires comparison)
val maxY : 'b (requires comparison)
val minX : float
val minY : float
type Double =
struct
member CompareTo : value:obj -> int + 1 overload
member Equals : obj:obj -> bool + 1 overload
member GetHashCode : unit -> int
member GetTypeCode : unit -> TypeCode
member ToString : unit -> string + 3 overloads
static val MinValue : float
static val MaxValue : float
static val Epsilon : float
static val NegativeInfinity : float
static val PositiveInfinity : float
...
end
Full name: System.Double
field float.MaxValue = 1.79769313486e+308
val maxX : float
val maxY : float
field float.MinValue = -1.79769313486e+308
val partition : ((float * float) list -> (float * float) list -> ((float * float) * (float * float) list) list)
val means : (float * float) list
val points : (float * float) list
val distance : (float * float -> float * float -> (float * float) * float * (float * float))
val mean : float * float
val pt : float * float
val meanX : float
val meanY : float
val ptX : float
val ptY : float
type Math =
static val PI : float
static val E : float
static member Abs : value:sbyte -> sbyte + 6 overloads
static member Acos : d:float -> float
static member Asin : d:float -> float
static member Atan : d:float -> float
static member Atan2 : y:float * x:float -> float
static member BigMul : a:int * b:int -> int64
static member Ceiling : d:decimal -> decimal + 1 overload
static member Cos : d:float -> float
...
Full name: System.Math
Math.Sqrt(d: float) : float
val minDistance : ((float * float) list -> float * float -> (float * float) * (float * float))
val len : float
Multiple items
module List
from Microsoft.FSharp.Collections
--------------------
type List<'T> =
| ( [] )
| ( :: ) of Head: 'T * Tail: 'T list
interface IEnumerable
interface IEnumerable<'T>
member Head : 'T
member IsEmpty : bool
member Item : index:int -> 'T with get
member Length : int
member Tail : 'T list
static member Cons : head:'T * tail:'T list -> 'T list
static member Empty : 'T list
Full name: Microsoft.FSharp.Collections.List<_>
val map : mapping:('T -> 'U) -> list:'T list -> 'U list
Full name: Microsoft.FSharp.Collections.List.map
val minBy : projection:('T -> 'U) -> list:'T list -> 'T (requires comparison)
Full name: Microsoft.FSharp.Collections.List.minBy
val parts : ((float * float) * (float * float)) list
val groups : seq<(float * float) * seq<(float * float) * (float * float)>>
module Seq
from Microsoft.FSharp.Collections
val ofList : source:'T list -> seq<'T>
Full name: Microsoft.FSharp.Collections.Seq.ofList
val groupBy : projection:('T -> 'Key) -> source:seq<'T> -> seq<'Key * seq<'T>> (requires equality)
Full name: Microsoft.FSharp.Collections.Seq.groupBy
val point : float * float
val map : mapping:('T -> 'U) -> source:seq<'T> -> seq<'U>
Full name: Microsoft.FSharp.Collections.Seq.map
val key : float * float
val value : seq<(float * float) * (float * float)>
val ofSeq : source:seq<'T> -> 'T list
Full name: Microsoft.FSharp.Collections.List.ofSeq
val x : float * float
val y : float * float
val recalcMeans : (('a * (float * float) list) list -> (float * float) list)
val partitions : ('a * (float * float) list) list
val getClusterMean : ((float * float) list -> float * float)
val cluster : (float * float) list
val averageBy : projection:('T -> 'U) -> list:'T list -> 'U (requires member ( + ) and member DivideByInt and member get_Zero)
Full name: Microsoft.FSharp.Collections.List.averageBy
val oldMean : 'a
val getInitialClusters : (unit -> ((float * float) * (float * float) list) list)
val rnd : Random
Multiple items
type Random =
new : unit -> Random + 1 overload
member Next : unit -> int + 2 overloads
member NextBytes : buffer:byte[] -> unit
member NextDouble : unit -> float
Full name: System.Random
--------------------
Random() : unit
Random(Seed: int) : unit
val randMeans : (float * float) list
val init : length:int -> initializer:(int -> 'T) -> 'T list
Full name: Microsoft.FSharp.Collections.List.init
Random.NextDouble() : float
val impl : (int -> ((float * float) * (float * float) list) list -> ((float * float) * (float * float) list) list)
val iteration : int
val clusters : ((float * float) * (float * float) list) list
val isEmpty : list:'T list -> bool
Full name: Microsoft.FSharp.Collections.List.isEmpty
val newClusters : ((float * float) * (float * float) list) list
More information