0 people like it.
Like the snippet!
Principal Components Analysis
Efficient and elegant way of performing Principal Component Analysis (PCA) in native F# fashion
1:
2:
3:
4:
5:
6:
7:
8:
9:
10:
11:
12:
13:
14:
15:
16:
17:
18:
19:
20:
21:
22:
23:
24:
25:
26:
27:
28:
29:
30:
31:
32:
33:
34:
35:
36:
37:
38:
39:
40:
41:
42:
43:
44:
45:
46:
47:
48:
49:
50:
51:
52:
53:
54:
55:
56:
57:
58:
59:
60:
61:
62:
63:
64:
65:
66:
|
open MathNet
open MathNet.Numerics
open MathNet.Numerics.LinearAlgebra
open MathNet.Numerics.LinearAlgebra.Double
open MathNet.Numerics.Statistics
let covarianceMatrix (M : Matrix<float>) =
let cols = M.ColumnCount
let C = DenseMatrix.Create(cols, cols, 0.0)
for c1 in 0 .. (cols - 1) do
C.[c1, c1] <- Statistics.Variance (M.Column c1)
for c2 in 0 .. (cols - 1) do
let cov = Statistics.Covariance (M.Column c1, M.Column c2)
C.[c1, c2] <- cov
C.[c2, c1] <- cov
C
let normalize dim (observations : float[][]) =
let averages =
Array.init dim (fun i ->
observations |> Seq.averageBy (fun x -> x.[i]))
let stdDevs =
Array.init dim (fun i ->
let avg = averages.[i]
observations |> Seq.averageBy (fun x -> pown (float x.[i] - avg) 2 |> sqrt))
observations
|> Array.map (fun row ->
row
|> Array.mapi (fun i x -> (float x - averages.[i]) / stdDevs.[i]))
let pca (observations : float[][]) =
let factorization =
observations
|> Matrix.Build.DenseOfRowArrays
|> covarianceMatrix
let eigenValues = factorization.Evd().EigenValues
let eigenVectors = factorization.Evd().EigenVectors
let VectorToArray (v : Vector<float>) =
v.ToArray
let projector (obs : float[]) =
let obsVector = obs |> Vector.Build.DenseOfArray
(eigenVectors.Transpose () * obsVector)
|> VectorToArray
(eigenValues, eigenVectors), projector
let pcaWithStats (observations : float[][]) =
let (eValues, eVectors), projector = pca observations
let total = eValues |> Seq.sumBy (fun x -> x.Magnitude)
eValues
|> Seq.toList
|> List.rev
|> List.scan (fun (percent, cumul) value ->
let percent = 100. * value.Magnitude / total
let cumul = cumul + percent
(percent, cumul)) (0., 0.)
|> List.tail
|> List.iteri (fun i (p, c) -> printfn "Feature %2i: %.2f%% (%.2f%%)" i p c)
|
val covarianceMatrix : M:'a -> 'b
Full name: Script.covarianceMatrix
val M : 'a
Multiple items
val float : value:'T -> float (requires member op_Explicit)
Full name: Microsoft.FSharp.Core.Operators.float
--------------------
type float = System.Double
Full name: Microsoft.FSharp.Core.float
--------------------
type float<'Measure> = float
Full name: Microsoft.FSharp.Core.float<_>
val cols : int
val C : 'b
val c1 : int32
val c2 : int32
val cov : obj
val normalize : dim:int -> observations:float [] [] -> float [] []
Full name: Script.normalize
val dim : int
val observations : float [] []
val averages : float []
module Array
from Microsoft.FSharp.Collections
val init : count:int -> initializer:(int -> 'T) -> 'T []
Full name: Microsoft.FSharp.Collections.Array.init
val i : int
module Seq
from Microsoft.FSharp.Collections
val averageBy : projection:('T -> 'U) -> source:seq<'T> -> 'U (requires member ( + ) and member DivideByInt and member get_Zero)
Full name: Microsoft.FSharp.Collections.Seq.averageBy
val x : float []
val stdDevs : float []
val avg : float
val pown : x:'T -> n:int -> 'T (requires member get_One and member ( * ) and member ( / ))
Full name: Microsoft.FSharp.Core.Operators.pown
val sqrt : value:'T -> 'U (requires member Sqrt)
Full name: Microsoft.FSharp.Core.Operators.sqrt
val map : mapping:('T -> 'U) -> array:'T [] -> 'U []
Full name: Microsoft.FSharp.Collections.Array.map
val row : float []
val mapi : mapping:(int -> 'T -> 'U) -> array:'T [] -> 'U []
Full name: Microsoft.FSharp.Collections.Array.mapi
val x : float
val pca : observations:float [] [] -> ('a * 'b) * (float [] -> 'c)
Full name: Script.pca
val factorization : obj
val eigenValues : 'a
val eigenVectors : 'b
val VectorToArray : ('d -> 'e)
val v : 'd
val projector : (float [] -> 'd)
val obs : float []
val obsVector : obj
val pcaWithStats : observations:float [] [] -> unit
Full name: Script.pcaWithStats
val eValues : seq<obj>
val eVectors : obj
val projector : (float [] -> obj)
val total : float
val sumBy : projection:('T -> 'U) -> source:seq<'T> -> 'U (requires member ( + ) and member get_Zero)
Full name: Microsoft.FSharp.Collections.Seq.sumBy
val x : obj
val toList : source:seq<'T> -> 'T list
Full name: Microsoft.FSharp.Collections.Seq.toList
Multiple items
module List
from Microsoft.FSharp.Collections
--------------------
type List<'T> =
| ( [] )
| ( :: ) of Head: 'T * Tail: 'T list
interface IEnumerable
interface IEnumerable<'T>
member GetSlice : startIndex:int option * endIndex:int option -> 'T list
member Head : 'T
member IsEmpty : bool
member Item : index:int -> 'T with get
member Length : int
member Tail : 'T list
static member Cons : head:'T * tail:'T list -> 'T list
static member Empty : 'T list
Full name: Microsoft.FSharp.Collections.List<_>
val rev : list:'T list -> 'T list
Full name: Microsoft.FSharp.Collections.List.rev
val scan : folder:('State -> 'T -> 'State) -> state:'State -> list:'T list -> 'State list
Full name: Microsoft.FSharp.Collections.List.scan
val percent : float
val cumul : float
val value : obj
val tail : list:'T list -> 'T list
Full name: Microsoft.FSharp.Collections.List.tail
val iteri : action:(int -> 'T -> unit) -> list:'T list -> unit
Full name: Microsoft.FSharp.Collections.List.iteri
val p : float
val c : float
val printfn : format:Printf.TextWriterFormat<'T> -> 'T
Full name: Microsoft.FSharp.Core.ExtraTopLevelOperators.printfn
More information