0 people like it.

# Principal Components Analysis

Efficient and elegant way of performing Principal Component Analysis (PCA) in native F# fashion

 ``` 1: 2: 3: 4: 5: 6: 7: 8: 9: 10: 11: 12: 13: 14: 15: 16: 17: 18: 19: 20: 21: 22: 23: 24: 25: 26: 27: 28: 29: 30: 31: 32: 33: 34: 35: 36: 37: 38: 39: 40: 41: 42: 43: 44: 45: 46: 47: 48: 49: 50: 51: 52: 53: 54: 55: 56: 57: 58: 59: 60: 61: 62: 63: 64: 65: 66: ``` ``````open MathNet open MathNet.Numerics open MathNet.Numerics.LinearAlgebra open MathNet.Numerics.LinearAlgebra.Double open MathNet.Numerics.Statistics let covarianceMatrix (M : Matrix) = let cols = M.ColumnCount let C = DenseMatrix.Create(cols, cols, 0.0) for c1 in 0 .. (cols - 1) do C.[c1, c1] <- Statistics.Variance (M.Column c1) for c2 in 0 .. (cols - 1) do let cov = Statistics.Covariance (M.Column c1, M.Column c2) C.[c1, c2] <- cov C.[c2, c1] <- cov C let normalize dim (observations : float[][]) = let averages = Array.init dim (fun i -> observations |> Seq.averageBy (fun x -> x.[i])) let stdDevs = Array.init dim (fun i -> let avg = averages.[i] observations |> Seq.averageBy (fun x -> pown (float x.[i] - avg) 2 |> sqrt)) observations |> Array.map (fun row -> row |> Array.mapi (fun i x -> (float x - averages.[i]) / stdDevs.[i])) let pca (observations : float[][]) = let factorization = observations |> Matrix.Build.DenseOfRowArrays |> covarianceMatrix let eigenValues = factorization.Evd().EigenValues let eigenVectors = factorization.Evd().EigenVectors let VectorToArray (v : Vector) = v.ToArray let projector (obs : float[]) = let obsVector = obs |> Vector.Build.DenseOfArray (eigenVectors.Transpose () * obsVector) |> VectorToArray (eigenValues, eigenVectors), projector let pcaWithStats (observations : float[][]) = let (eValues, eVectors), projector = pca observations let total = eValues |> Seq.sumBy (fun x -> x.Magnitude) eValues |> Seq.toList |> List.rev |> List.scan (fun (percent, cumul) value -> let percent = 100. * value.Magnitude / total let cumul = cumul + percent (percent, cumul)) (0., 0.) |> List.tail |> List.iteri (fun i (p, c) -> printfn "Feature %2i: %.2f%% (%.2f%%)" i p c) ``````
val covarianceMatrix : M:'a -> 'b

Full name: Script.covarianceMatrix
val M : 'a
Multiple items
val float : value:'T -> float (requires member op_Explicit)

Full name: Microsoft.FSharp.Core.Operators.float

--------------------
type float = System.Double

Full name: Microsoft.FSharp.Core.float

--------------------
type float<'Measure> = float

Full name: Microsoft.FSharp.Core.float<_>
val cols : int
val C : 'b
val c1 : int32
val c2 : int32
val cov : obj
val normalize : dim:int -> observations:float [] [] -> float [] []

Full name: Script.normalize
val dim : int
val observations : float [] []
val averages : float []
module Array

from Microsoft.FSharp.Collections
val init : count:int -> initializer:(int -> 'T) -> 'T []

Full name: Microsoft.FSharp.Collections.Array.init
val i : int
module Seq

from Microsoft.FSharp.Collections
val averageBy : projection:('T -> 'U) -> source:seq<'T> -> 'U (requires member ( + ) and member DivideByInt and member get_Zero)

Full name: Microsoft.FSharp.Collections.Seq.averageBy
val x : float []
val stdDevs : float []
val avg : float
val pown : x:'T -> n:int -> 'T (requires member get_One and member ( * ) and member ( / ))

Full name: Microsoft.FSharp.Core.Operators.pown
val sqrt : value:'T -> 'U (requires member Sqrt)

Full name: Microsoft.FSharp.Core.Operators.sqrt
val map : mapping:('T -> 'U) -> array:'T [] -> 'U []

Full name: Microsoft.FSharp.Collections.Array.map
val row : float []
val mapi : mapping:(int -> 'T -> 'U) -> array:'T [] -> 'U []

Full name: Microsoft.FSharp.Collections.Array.mapi
val x : float
val pca : observations:float [] [] -> ('a * 'b) * (float [] -> 'c)

Full name: Script.pca
val factorization : obj
val eigenValues : 'a
val eigenVectors : 'b
val VectorToArray : ('d -> 'e)
val v : 'd
val projector : (float [] -> 'd)
val obs : float []
val obsVector : obj
val pcaWithStats : observations:float [] [] -> unit

Full name: Script.pcaWithStats
val eValues : seq<obj>
val eVectors : obj
val projector : (float [] -> obj)
val total : float
val sumBy : projection:('T -> 'U) -> source:seq<'T> -> 'U (requires member ( + ) and member get_Zero)

Full name: Microsoft.FSharp.Collections.Seq.sumBy
val x : obj
val toList : source:seq<'T> -> 'T list

Full name: Microsoft.FSharp.Collections.Seq.toList
Multiple items
module List

from Microsoft.FSharp.Collections

--------------------
type List<'T> =
| ( [] )
| ( :: ) of Head: 'T * Tail: 'T list
interface IEnumerable
interface IEnumerable<'T>
member GetSlice : startIndex:int option * endIndex:int option -> 'T list
member IsEmpty : bool
member Item : index:int -> 'T with get
member Length : int
member Tail : 'T list
static member Cons : head:'T * tail:'T list -> 'T list
static member Empty : 'T list

Full name: Microsoft.FSharp.Collections.List<_>
val rev : list:'T list -> 'T list

Full name: Microsoft.FSharp.Collections.List.rev
val scan : folder:('State -> 'T -> 'State) -> state:'State -> list:'T list -> 'State list

Full name: Microsoft.FSharp.Collections.List.scan
val percent : float
val cumul : float
val value : obj
val tail : list:'T list -> 'T list

Full name: Microsoft.FSharp.Collections.List.tail
val iteri : action:(int -> 'T -> unit) -> list:'T list -> unit

Full name: Microsoft.FSharp.Collections.List.iteri
val p : float
val c : float
val printfn : format:Printf.TextWriterFormat<'T> -> 'T

Full name: Microsoft.FSharp.Core.ExtraTopLevelOperators.printfn