1 people like it.

# Simple KMeans clustering in 2D

I needed a crude k-means (http://en.wikipedia.org/wiki/K-means_clustering) clustering method for a one off test of something, and decided to try to do in F# for learning purposes

 ``` 1: 2: 3: 4: 5: 6: 7: 8: 9: 10: 11: 12: 13: 14: 15: 16: 17: 18: 19: 20: 21: 22: 23: 24: 25: 26: 27: 28: 29: 30: 31: 32: 33: 34: 35: 36: 37: 38: 39: 40: 41: 42: 43: 44: 45: 46: 47: 48: 49: 50: 51: 52: 53: 54: 55: 56: 57: 58: 59: ``` ``````open System // First attempt at a KMeans clustering. // Doesn't have an epsilon diff check which would allow early exit from loop. let KMeans2D maxIterations numClusters (data:(float*float) list) = let rec getMin points cur = let oldMinX,oldMinY = cur match points with | [] -> cur | (minX,minY) :: tail when minX getMin tail (minX,minY) | (minX,minY) :: tail when minX>oldMinX && minY < oldMinY -> getMin tail (oldMinX,minY) | (minX,minY) :: tail when minX oldMinY -> getMin tail (minX,oldMinY) | (minX,minY) :: tail when minX>oldMinX && minY > oldMinY -> getMin tail (oldMinX,oldMinY) | (minX,minY) :: tail -> getMin tail (oldMinX,oldMinY) let rec getMax points cur = let oldmaxX,oldmaxY = cur match points with | [] -> cur | (maxX,maxY) :: tail when maxX>oldmaxX && maxY > oldmaxY -> getMax tail (maxX,maxY) | (maxX,maxY) :: tail when maxX oldmaxY -> getMax tail (oldmaxX,maxY) | (maxX,maxY) :: tail when maxX>oldmaxX && maxY < oldmaxY -> getMax tail (maxX,oldmaxY) | (maxX,maxY) :: tail when maxX getMax tail (oldmaxX,oldmaxY) | (maxX,maxY) :: tail -> getMax tail (oldmaxX,oldmaxY) let minX,minY = getMin data (Double.MaxValue,Double.MaxValue) let maxX,maxY = getMax data (Double.MinValue,Double.MinValue) let partition means points = let distance mean pt = let meanX,meanY=mean let ptX,ptY=pt (mean,Math.Sqrt((meanX-ptX)**2.0+(meanY-ptY)**2.0),(ptX,ptY)) let minDistance means pt = let (mean,len,pt) = means |> List.map (fun mean -> (distance mean pt)) |> List.minBy (fun (mean,len,pt)->len) (mean,pt) let parts = List.map (fun pt-> minDistance means pt ) points let groups = Seq.ofList parts |> Seq.groupBy (fun (mean,point)->mean) Seq.map (fun (key,value)->(key,List.ofSeq (Seq.map (fun (x,y)->y) value))) groups|>List.ofSeq let recalcMeans partitions = let getClusterMean (cluster:(float*float)list) = (List.averageBy (fun (ptX,ptY)->ptX) cluster,List.averageBy (fun (ptX,ptY)->ptY) cluster) List.map (fun (oldMean,cluster) -> getClusterMean cluster) partitions let getInitialClusters() = let rnd = new System.Random() let randMeans = List.init numClusters (fun _ -> (minX+((maxX-minX)*rnd.NextDouble()),minY+((maxY-minY)*rnd.NextDouble())) ) partition randMeans data let rec impl iteration clusters = if List.isEmpty clusters then [] else match iteration with | 0 -> clusters | _ -> let means = recalcMeans clusters let newClusters = partition means data impl (iteration-1) newClusters impl maxIterations (getInitialClusters()) ``````
namespace System
val KMeans2D : maxIterations:int -> numClusters:int -> data:(float * float) list -> ((float * float) * (float * float) list) list

Full name: Script.KMeans2D
val maxIterations : int
val numClusters : int
val data : (float * float) list
Multiple items
val float : value:'T -> float (requires member op_Explicit)

Full name: Microsoft.FSharp.Core.Operators.float

--------------------
type float = Double

Full name: Microsoft.FSharp.Core.float

--------------------
type float<'Measure> = float

Full name: Microsoft.FSharp.Core.float<_>
type 'T list = List<'T>

Full name: Microsoft.FSharp.Collections.list<_>
val getMin : (('a * 'b) list -> 'a * 'b -> 'a * 'b) (requires comparison and comparison)
val points : ('a * 'b) list (requires comparison and comparison)
val cur : 'a * 'b (requires comparison and comparison)
val oldMinX : 'a (requires comparison)
val oldMinY : 'b (requires comparison)
val minX : 'a (requires comparison)
val minY : 'b (requires comparison)
val tail : ('a * 'b) list (requires comparison and comparison)
val getMax : (('a * 'b) list -> 'a * 'b -> 'a * 'b) (requires comparison and comparison)
val oldmaxX : 'a (requires comparison)
val oldmaxY : 'b (requires comparison)
val maxX : 'a (requires comparison)
val maxY : 'b (requires comparison)
val minX : float
val minY : float
type Double =
struct
member CompareTo : value:obj -> int + 1 overload
member Equals : obj:obj -> bool + 1 overload
member GetHashCode : unit -> int
member GetTypeCode : unit -> TypeCode
member ToString : unit -> string + 3 overloads
static val MinValue : float
static val MaxValue : float
static val Epsilon : float
static val NegativeInfinity : float
static val PositiveInfinity : float
...
end

Full name: System.Double
field float.MaxValue = 1.79769313486e+308
val maxX : float
val maxY : float
field float.MinValue = -1.79769313486e+308
val partition : ((float * float) list -> (float * float) list -> ((float * float) * (float * float) list) list)
val means : (float * float) list
val points : (float * float) list
val distance : (float * float -> float * float -> (float * float) * float * (float * float))
val mean : float * float
val pt : float * float
val meanX : float
val meanY : float
val ptX : float
val ptY : float
type Math =
static val PI : float
static val E : float
static member Abs : value:sbyte -> sbyte + 6 overloads
static member Acos : d:float -> float
static member Asin : d:float -> float
static member Atan : d:float -> float
static member Atan2 : y:float * x:float -> float
static member BigMul : a:int * b:int -> int64
static member Ceiling : d:decimal -> decimal + 1 overload
static member Cos : d:float -> float
...

Full name: System.Math
Math.Sqrt(d: float) : float
val minDistance : ((float * float) list -> float * float -> (float * float) * (float * float))
val len : float
Multiple items
module List

from Microsoft.FSharp.Collections

--------------------
type List<'T> =
| ( [] )
| ( :: ) of Head: 'T * Tail: 'T list
interface IEnumerable
interface IEnumerable<'T>
member IsEmpty : bool
member Item : index:int -> 'T with get
member Length : int
member Tail : 'T list
static member Cons : head:'T * tail:'T list -> 'T list
static member Empty : 'T list

Full name: Microsoft.FSharp.Collections.List<_>
val map : mapping:('T -> 'U) -> list:'T list -> 'U list

Full name: Microsoft.FSharp.Collections.List.map
val minBy : projection:('T -> 'U) -> list:'T list -> 'T (requires comparison)

Full name: Microsoft.FSharp.Collections.List.minBy
val parts : ((float * float) * (float * float)) list
val groups : seq<(float * float) * seq<(float * float) * (float * float)>>
module Seq

from Microsoft.FSharp.Collections
val ofList : source:'T list -> seq<'T>

Full name: Microsoft.FSharp.Collections.Seq.ofList
val groupBy : projection:('T -> 'Key) -> source:seq<'T> -> seq<'Key * seq<'T>> (requires equality)

Full name: Microsoft.FSharp.Collections.Seq.groupBy
val point : float * float
val map : mapping:('T -> 'U) -> source:seq<'T> -> seq<'U>

Full name: Microsoft.FSharp.Collections.Seq.map
val key : float * float
val value : seq<(float * float) * (float * float)>
val ofSeq : source:seq<'T> -> 'T list

Full name: Microsoft.FSharp.Collections.List.ofSeq
val x : float * float
val y : float * float
val recalcMeans : (('a * (float * float) list) list -> (float * float) list)
val partitions : ('a * (float * float) list) list
val getClusterMean : ((float * float) list -> float * float)
val cluster : (float * float) list
val averageBy : projection:('T -> 'U) -> list:'T list -> 'U (requires member ( + ) and member DivideByInt and member get_Zero)

Full name: Microsoft.FSharp.Collections.List.averageBy
val oldMean : 'a
val getInitialClusters : (unit -> ((float * float) * (float * float) list) list)
val rnd : Random
Multiple items
type Random =
new : unit -> Random + 1 overload
member Next : unit -> int + 2 overloads
member NextBytes : buffer:byte[] -> unit
member NextDouble : unit -> float

Full name: System.Random

--------------------
Random() : unit
Random(Seed: int) : unit
val randMeans : (float * float) list
val init : length:int -> initializer:(int -> 'T) -> 'T list

Full name: Microsoft.FSharp.Collections.List.init
Random.NextDouble() : float
val impl : (int -> ((float * float) * (float * float) list) list -> ((float * float) * (float * float) list) list)
val iteration : int
val clusters : ((float * float) * (float * float) list) list
val isEmpty : list:'T list -> bool

Full name: Microsoft.FSharp.Collections.List.isEmpty
val newClusters : ((float * float) * (float * float) list) list