1 people like it.

Simple KMeans clustering in 2D

I needed a crude k-means (http://en.wikipedia.org/wiki/K-means_clustering) clustering method for a one off test of something, and decided to try to do in F# for learning purposes

 1: 
 2: 
 3: 
 4: 
 5: 
 6: 
 7: 
 8: 
 9: 
10: 
11: 
12: 
13: 
14: 
15: 
16: 
17: 
18: 
19: 
20: 
21: 
22: 
23: 
24: 
25: 
26: 
27: 
28: 
29: 
30: 
31: 
32: 
33: 
34: 
35: 
36: 
37: 
38: 
39: 
40: 
41: 
42: 
43: 
44: 
45: 
46: 
47: 
48: 
49: 
50: 
51: 
52: 
53: 
54: 
55: 
56: 
57: 
58: 
59: 
open System

// First attempt at a KMeans clustering.
// Doesn't have an epsilon diff check which would allow early exit from loop.
let KMeans2D maxIterations numClusters (data:(float*float) list) = 
    let rec getMin points cur = 
        let oldMinX,oldMinY = cur
        match points with
        | [] -> cur
        | (minX,minY) :: tail when minX<oldMinX && minY < oldMinY -> getMin tail (minX,minY)
        | (minX,minY) :: tail when minX>oldMinX && minY < oldMinY -> getMin tail (oldMinX,minY)
        | (minX,minY) :: tail when minX<oldMinX && minY > oldMinY -> getMin tail (minX,oldMinY)
        | (minX,minY) :: tail when minX>oldMinX && minY > oldMinY -> getMin tail (oldMinX,oldMinY)
        | (minX,minY) :: tail -> getMin tail (oldMinX,oldMinY)
    let rec getMax points cur = 
        let oldmaxX,oldmaxY = cur
        match points with
        | [] -> cur
        | (maxX,maxY) :: tail when maxX>oldmaxX && maxY > oldmaxY -> getMax tail (maxX,maxY)
        | (maxX,maxY) :: tail when maxX<oldmaxX && maxY > oldmaxY -> getMax tail (oldmaxX,maxY)
        | (maxX,maxY) :: tail when maxX>oldmaxX && maxY < oldmaxY -> getMax tail (maxX,oldmaxY)
        | (maxX,maxY) :: tail when maxX<oldmaxX && maxY < oldmaxY -> getMax tail (oldmaxX,oldmaxY)
        | (maxX,maxY) :: tail -> getMax tail (oldmaxX,oldmaxY)
            
    let minX,minY = getMin data (Double.MaxValue,Double.MaxValue)
    let maxX,maxY = getMax data (Double.MinValue,Double.MinValue)

    let partition means points = 
        let distance mean pt = let meanX,meanY=mean
                               let ptX,ptY=pt
                               (mean,Math.Sqrt((meanX-ptX)**2.0+(meanY-ptY)**2.0),(ptX,ptY))
        let minDistance means pt = let (mean,len,pt) = means 
                                                       |> List.map (fun mean -> (distance mean pt))
                                                       |> List.minBy (fun (mean,len,pt)->len)
                                   (mean,pt)
        let parts = List.map (fun pt-> minDistance means pt ) points
        let groups = Seq.ofList parts |> Seq.groupBy (fun (mean,point)->mean)
        Seq.map (fun (key,value)->(key,List.ofSeq (Seq.map (fun (x,y)->y) value))) groups|>List.ofSeq

    let recalcMeans partitions = 
        let getClusterMean (cluster:(float*float)list) = 
            (List.averageBy (fun (ptX,ptY)->ptX) cluster,List.averageBy (fun (ptX,ptY)->ptY) cluster)
        List.map (fun (oldMean,cluster) -> getClusterMean cluster) partitions

    let getInitialClusters() = 
        let rnd = new System.Random()
        let randMeans = List.init numClusters (fun _ -> (minX+((maxX-minX)*rnd.NextDouble()),minY+((maxY-minY)*rnd.NextDouble())) ) 
        partition randMeans data
                                        
    let rec impl iteration clusters = 
        if List.isEmpty clusters then []
        else
            match iteration with
            | 0 -> clusters
            | _ -> let means = recalcMeans clusters
                   let newClusters = partition means data
                   impl (iteration-1) newClusters

    impl maxIterations (getInitialClusters())
namespace System
val KMeans2D : maxIterations:int -> numClusters:int -> data:(float * float) list -> ((float * float) * (float * float) list) list

Full name: Script.KMeans2D
val maxIterations : int
val numClusters : int
val data : (float * float) list
Multiple items
val float : value:'T -> float (requires member op_Explicit)

Full name: Microsoft.FSharp.Core.Operators.float

--------------------
type float = Double

Full name: Microsoft.FSharp.Core.float

--------------------
type float<'Measure> = float

Full name: Microsoft.FSharp.Core.float<_>
type 'T list = List<'T>

Full name: Microsoft.FSharp.Collections.list<_>
val getMin : (('a * 'b) list -> 'a * 'b -> 'a * 'b) (requires comparison and comparison)
val points : ('a * 'b) list (requires comparison and comparison)
val cur : 'a * 'b (requires comparison and comparison)
val oldMinX : 'a (requires comparison)
val oldMinY : 'b (requires comparison)
val minX : 'a (requires comparison)
val minY : 'b (requires comparison)
val tail : ('a * 'b) list (requires comparison and comparison)
val getMax : (('a * 'b) list -> 'a * 'b -> 'a * 'b) (requires comparison and comparison)
val oldmaxX : 'a (requires comparison)
val oldmaxY : 'b (requires comparison)
val maxX : 'a (requires comparison)
val maxY : 'b (requires comparison)
val minX : float
val minY : float
type Double =
  struct
    member CompareTo : value:obj -> int + 1 overload
    member Equals : obj:obj -> bool + 1 overload
    member GetHashCode : unit -> int
    member GetTypeCode : unit -> TypeCode
    member ToString : unit -> string + 3 overloads
    static val MinValue : float
    static val MaxValue : float
    static val Epsilon : float
    static val NegativeInfinity : float
    static val PositiveInfinity : float
    ...
  end

Full name: System.Double
field float.MaxValue = 1.79769313486e+308
val maxX : float
val maxY : float
field float.MinValue = -1.79769313486e+308
val partition : ((float * float) list -> (float * float) list -> ((float * float) * (float * float) list) list)
val means : (float * float) list
val points : (float * float) list
val distance : (float * float -> float * float -> (float * float) * float * (float * float))
val mean : float * float
val pt : float * float
val meanX : float
val meanY : float
val ptX : float
val ptY : float
type Math =
  static val PI : float
  static val E : float
  static member Abs : value:sbyte -> sbyte + 6 overloads
  static member Acos : d:float -> float
  static member Asin : d:float -> float
  static member Atan : d:float -> float
  static member Atan2 : y:float * x:float -> float
  static member BigMul : a:int * b:int -> int64
  static member Ceiling : d:decimal -> decimal + 1 overload
  static member Cos : d:float -> float
  ...

Full name: System.Math
Math.Sqrt(d: float) : float
val minDistance : ((float * float) list -> float * float -> (float * float) * (float * float))
val len : float
Multiple items
module List

from Microsoft.FSharp.Collections

--------------------
type List<'T> =
  | ( [] )
  | ( :: ) of Head: 'T * Tail: 'T list
  interface IEnumerable
  interface IEnumerable<'T>
  member Head : 'T
  member IsEmpty : bool
  member Item : index:int -> 'T with get
  member Length : int
  member Tail : 'T list
  static member Cons : head:'T * tail:'T list -> 'T list
  static member Empty : 'T list

Full name: Microsoft.FSharp.Collections.List<_>
val map : mapping:('T -> 'U) -> list:'T list -> 'U list

Full name: Microsoft.FSharp.Collections.List.map
val minBy : projection:('T -> 'U) -> list:'T list -> 'T (requires comparison)

Full name: Microsoft.FSharp.Collections.List.minBy
val parts : ((float * float) * (float * float)) list
val groups : seq<(float * float) * seq<(float * float) * (float * float)>>
module Seq

from Microsoft.FSharp.Collections
val ofList : source:'T list -> seq<'T>

Full name: Microsoft.FSharp.Collections.Seq.ofList
val groupBy : projection:('T -> 'Key) -> source:seq<'T> -> seq<'Key * seq<'T>> (requires equality)

Full name: Microsoft.FSharp.Collections.Seq.groupBy
val point : float * float
val map : mapping:('T -> 'U) -> source:seq<'T> -> seq<'U>

Full name: Microsoft.FSharp.Collections.Seq.map
val key : float * float
val value : seq<(float * float) * (float * float)>
val ofSeq : source:seq<'T> -> 'T list

Full name: Microsoft.FSharp.Collections.List.ofSeq
val x : float * float
val y : float * float
val recalcMeans : (('a * (float * float) list) list -> (float * float) list)
val partitions : ('a * (float * float) list) list
val getClusterMean : ((float * float) list -> float * float)
val cluster : (float * float) list
val averageBy : projection:('T -> 'U) -> list:'T list -> 'U (requires member ( + ) and member DivideByInt and member get_Zero)

Full name: Microsoft.FSharp.Collections.List.averageBy
val oldMean : 'a
val getInitialClusters : (unit -> ((float * float) * (float * float) list) list)
val rnd : Random
Multiple items
type Random =
  new : unit -> Random + 1 overload
  member Next : unit -> int + 2 overloads
  member NextBytes : buffer:byte[] -> unit
  member NextDouble : unit -> float

Full name: System.Random

--------------------
Random() : unit
Random(Seed: int) : unit
val randMeans : (float * float) list
val init : length:int -> initializer:(int -> 'T) -> 'T list

Full name: Microsoft.FSharp.Collections.List.init
Random.NextDouble() : float
val impl : (int -> ((float * float) * (float * float) list) list -> ((float * float) * (float * float) list) list)
val iteration : int
val clusters : ((float * float) * (float * float) list) list
val isEmpty : list:'T list -> bool

Full name: Microsoft.FSharp.Collections.List.isEmpty
val newClusters : ((float * float) * (float * float) list) list
Next Version Raw view Test code New version

More information

Link:http://fssnip.net/lG
Posted:10 years ago
Author:@BrockSamsonUK
Tags: mathematics