0 people like it.

Principal Components Analysis

Efficient and elegant way of performing Principal Component Analysis (PCA) in native F# fashion

 1: 
 2: 
 3: 
 4: 
 5: 
 6: 
 7: 
 8: 
 9: 
10: 
11: 
12: 
13: 
14: 
15: 
16: 
17: 
18: 
19: 
20: 
21: 
22: 
23: 
24: 
25: 
26: 
27: 
28: 
29: 
30: 
31: 
32: 
33: 
34: 
35: 
36: 
37: 
38: 
39: 
40: 
41: 
42: 
43: 
44: 
45: 
46: 
47: 
48: 
49: 
50: 
51: 
52: 
53: 
54: 
55: 
56: 
57: 
58: 
59: 
60: 
61: 
62: 
63: 
64: 
65: 
66: 
open MathNet
open MathNet.Numerics
open MathNet.Numerics.LinearAlgebra
open MathNet.Numerics.LinearAlgebra.Double
open MathNet.Numerics.Statistics

let covarianceMatrix (M : Matrix<float>) =
    let cols = M.ColumnCount
    let C = DenseMatrix.Create(cols, cols, 0.0)
        
    for c1 in 0 .. (cols - 1) do
        C.[c1, c1] <- Statistics.Variance (M.Column c1)
            
        for c2 in 0 .. (cols - 1) do
            let cov = Statistics.Covariance (M.Column c1, M.Column c2)
            C.[c1, c2] <- cov
            C.[c2, c1] <- cov
                
    C  

let normalize dim (observations : float[][]) =
    let averages = 
        Array.init dim (fun i -> 
            observations |> Seq.averageBy (fun x -> x.[i]))

    let stdDevs = 
        Array.init dim (fun i -> 
            let avg = averages.[i]
            observations |> Seq.averageBy (fun x -> pown (float x.[i] - avg) 2 |> sqrt))

    observations
    |> Array.map (fun row ->
        row
        |> Array.mapi (fun i x -> (float x - averages.[i]) / stdDevs.[i]))

let pca (observations : float[][]) =
    let factorization = 
        observations
        |> Matrix.Build.DenseOfRowArrays
        |> covarianceMatrix

    let eigenValues = factorization.Evd().EigenValues
    let eigenVectors = factorization.Evd().EigenVectors

    let VectorToArray (v : Vector<float>) =
        v.ToArray

    let projector (obs : float[]) = 
        let obsVector = obs |> Vector.Build.DenseOfArray
        (eigenVectors.Transpose () * obsVector)
        |> VectorToArray

    (eigenValues, eigenVectors), projector

let pcaWithStats (observations : float[][]) =
    let (eValues, eVectors), projector = pca observations
    let total = eValues |> Seq.sumBy (fun x -> x.Magnitude)
    eValues
    |> Seq.toList
    |> List.rev
    |> List.scan (fun (percent, cumul) value -> 
        let percent = 100. * value.Magnitude / total
        let cumul = cumul + percent
        (percent, cumul)) (0., 0.)
    |> List.tail
    |> List.iteri (fun i (p, c) -> printfn "Feature %2i: %.2f%% (%.2f%%)" i p c)
val covarianceMatrix : M:'a -> 'b

Full name: Script.covarianceMatrix
val M : 'a
Multiple items
val float : value:'T -> float (requires member op_Explicit)

Full name: Microsoft.FSharp.Core.Operators.float

--------------------
type float = System.Double

Full name: Microsoft.FSharp.Core.float

--------------------
type float<'Measure> = float

Full name: Microsoft.FSharp.Core.float<_>
val cols : int
val C : 'b
val c1 : int32
val c2 : int32
val cov : obj
val normalize : dim:int -> observations:float [] [] -> float [] []

Full name: Script.normalize
val dim : int
val observations : float [] []
val averages : float []
module Array

from Microsoft.FSharp.Collections
val init : count:int -> initializer:(int -> 'T) -> 'T []

Full name: Microsoft.FSharp.Collections.Array.init
val i : int
module Seq

from Microsoft.FSharp.Collections
val averageBy : projection:('T -> 'U) -> source:seq<'T> -> 'U (requires member ( + ) and member DivideByInt and member get_Zero)

Full name: Microsoft.FSharp.Collections.Seq.averageBy
val x : float []
val stdDevs : float []
val avg : float
val pown : x:'T -> n:int -> 'T (requires member get_One and member ( * ) and member ( / ))

Full name: Microsoft.FSharp.Core.Operators.pown
val sqrt : value:'T -> 'U (requires member Sqrt)

Full name: Microsoft.FSharp.Core.Operators.sqrt
val map : mapping:('T -> 'U) -> array:'T [] -> 'U []

Full name: Microsoft.FSharp.Collections.Array.map
val row : float []
val mapi : mapping:(int -> 'T -> 'U) -> array:'T [] -> 'U []

Full name: Microsoft.FSharp.Collections.Array.mapi
val x : float
val pca : observations:float [] [] -> ('a * 'b) * (float [] -> 'c)

Full name: Script.pca
val factorization : obj
val eigenValues : 'a
val eigenVectors : 'b
val VectorToArray : ('d -> 'e)
val v : 'd
val projector : (float [] -> 'd)
val obs : float []
val obsVector : obj
val pcaWithStats : observations:float [] [] -> unit

Full name: Script.pcaWithStats
val eValues : seq<obj>
val eVectors : obj
val projector : (float [] -> obj)
val total : float
val sumBy : projection:('T -> 'U) -> source:seq<'T> -> 'U (requires member ( + ) and member get_Zero)

Full name: Microsoft.FSharp.Collections.Seq.sumBy
val x : obj
val toList : source:seq<'T> -> 'T list

Full name: Microsoft.FSharp.Collections.Seq.toList
Multiple items
module List

from Microsoft.FSharp.Collections

--------------------
type List<'T> =
  | ( [] )
  | ( :: ) of Head: 'T * Tail: 'T list
  interface IEnumerable
  interface IEnumerable<'T>
  member GetSlice : startIndex:int option * endIndex:int option -> 'T list
  member Head : 'T
  member IsEmpty : bool
  member Item : index:int -> 'T with get
  member Length : int
  member Tail : 'T list
  static member Cons : head:'T * tail:'T list -> 'T list
  static member Empty : 'T list

Full name: Microsoft.FSharp.Collections.List<_>
val rev : list:'T list -> 'T list

Full name: Microsoft.FSharp.Collections.List.rev
val scan : folder:('State -> 'T -> 'State) -> state:'State -> list:'T list -> 'State list

Full name: Microsoft.FSharp.Collections.List.scan
val percent : float
val cumul : float
val value : obj
val tail : list:'T list -> 'T list

Full name: Microsoft.FSharp.Collections.List.tail
val iteri : action:(int -> 'T -> unit) -> list:'T list -> unit

Full name: Microsoft.FSharp.Collections.List.iteri
val p : float
val c : float
val printfn : format:Printf.TextWriterFormat<'T> -> 'T

Full name: Microsoft.FSharp.Core.ExtraTopLevelOperators.printfn
Raw view Test code New version

More information

Link:http://fssnip.net/7WB
Posted:4 years ago
Author:Dr. Martin Lockstrom
Tags: #pca , #principalcomponents