5 people like it.

Statistical functions

Some basic statistics functions in F#, including erfc, erfcinv, normcdf, normpdf, norminv, additiveCorrection, multiplicativeCorrection, a Box-Mueller RandomSampler and a unitized type for a Gaussian distribution. Based on Ralf Herbrich's samples at http://blogs.technet.com/b/apg/archive/2008/04/05/trueskill-through-time.aspx

  1: 
  2: 
  3: 
  4: 
  5: 
  6: 
  7: 
  8: 
  9: 
 10: 
 11: 
 12: 
 13: 
 14: 
 15: 
 16: 
 17: 
 18: 
 19: 
 20: 
 21: 
 22: 
 23: 
 24: 
 25: 
 26: 
 27: 
 28: 
 29: 
 30: 
 31: 
 32: 
 33: 
 34: 
 35: 
 36: 
 37: 
 38: 
 39: 
 40: 
 41: 
 42: 
 43: 
 44: 
 45: 
 46: 
 47: 
 48: 
 49: 
 50: 
 51: 
 52: 
 53: 
 54: 
 55: 
 56: 
 57: 
 58: 
 59: 
 60: 
 61: 
 62: 
 63: 
 64: 
 65: 
 66: 
 67: 
 68: 
 69: 
 70: 
 71: 
 72: 
 73: 
 74: 
 75: 
 76: 
 77: 
 78: 
 79: 
 80: 
 81: 
 82: 
 83: 
 84: 
 85: 
 86: 
 87: 
 88: 
 89: 
 90: 
 91: 
 92: 
 93: 
 94: 
 95: 
 96: 
 97: 
 98: 
 99: 
100: 
101: 
102: 
103: 
104: 
105: 
106: 
107: 
108: 
109: 
110: 
111: 
112: 
113: 
114: 
115: 
116: 
117: 
118: 
119: 
120: 
121: 
122: 
123: 
124: 
125: 
126: 
127: 
128: 
129: 
130: 
131: 
132: 
133: 
134: 
135: 
136: 
137: 
138: 
139: 
140: 
141: 
142: 
143: 
144: 
145: 
146: 
147: 
148: 
149: 
150: 
151: 
152: 
153: 
154: 
155: 
156: 
157: 
158: 
159: 
160: 
161: 
162: 
163: 
164: 
165: 
166: 
167: 
168: 
169: 
170: 
171: 
172: 
173: 
174: 
175: 
176: 
177: 
178: 
179: 
180: 
181: 
182: 
183: 
184: 
185: 
186: 
187: 
188: 
189: 
190: 
191: 
192: 
193: 
194: 
195: 
196: 
197: 
198: 
199: 
200: 
201: 
202: 
203: 
204: 
205: 
206: 
207: 
208: 
209: 
210: 
211: 
212: 
213: 
214: 
215: 
216: 
/// Some basic statistics functions in F#, including erfc, erfcinv, normcdf, normpdf, 
/// norminv, additiveCorrection, multiplicativeCorrection, a Box-Mueller RandomSampler
/// and a unitized type for a Gaussian distribution. 
///
/// Based on Ralf Herbrich's samples at http://blogs.technet.com/b/apg/archive/2008/04/05/trueskill-through-time.aspx

module Gaussians = 

    open System
    
    /// Compute the square of a unitized number
    let sqr (x:float<'u>) = x * x 

    /// Computes the complementary error function. This function is defined 
    /// by 2/sqrt(pi) * integral from x to infinity of exp (-t^2) dt
    let erfc x =
        if (Double.IsNegativeInfinity x) then 2.0
        elif (Double.IsPositiveInfinity x) then 0.0
        else
            let z = abs x
            let t = 1.0 / (1.0 + 0.5 * z) 
            let res = t * exp (-z * z - 1.26551223 + t * (1.00002368 + t * (0.37409196 + t * (0.09678418 + t * (-0.18628806 + t * (0.27886807 + t * (-1.13520398 + t * (1.48851587 + t * (-0.82215223 + t * 0.17087277))))))))) 
            if (x >= 0.0) then res else 2.0 - res

    /// Computes the inverse of the complementary error function
    let erfcinv y = 
        if (y < 0.0 || y > 2.0) then
            failwith "Inverse complementary function not defined outside [0,2]."
        elif y = 0.0 then Double.PositiveInfinity
        elif y = 2.0 then Double.NegativeInfinity
        else 
            let x = 
                if (y >= 0.0485 && y <= 1.9515) then
                    let q = y - 1.0 
                    let r = q * q 
                    (((((0.01370600482778535*r - 0.3051415712357203)*r + 1.524304069216834)*r - 3.057303267970988)*r + 2.710410832036097)*r - 0.8862269264526915) * q /
                    (((((-0.05319931523264068*r + 0.6311946752267222)*r - 2.432796560310728)*r + 4.175081992982483)*r - 3.320170388221430)*r + 1.0)
                else if (y < 0.0485) then
                    let q = sqrt (-2.0 * log (y / 2.0)) 
                    (((((0.005504751339936943*q + 0.2279687217114118)*q + 1.697592457770869)*q + 1.802933168781950)*q + -3.093354679843504)*q - 2.077595676404383) / 
                    ((((0.007784695709041462*q + 0.3224671290700398)*q + 2.445134137142996)*q + 3.754408661907416)*q + 1.0)
                else if (y > 1.9515) then
                    let q = sqrt (-2.0 * log (1.0 - y / 2.0)) 
                    (-(((((0.005504751339936943*q + 0.2279687217114118)*q + 1.697592457770869)*q + 1.802933168781950)*q + -3.093354679843504)*q - 2.077595676404383) / 
                     ((((0.007784695709041462*q + 0.3224671290700398)*q + 2.445134137142996)*q + 3.754408661907416)*q + 1.0))
                else 0.0
            let u = (erfc x - y) / (-2.0 / sqrt Math.PI * exp (-x * x)) 
            x - u / (1.0 + x * u)

    /// Computes the cummulative Gaussian distribution at a specified point of interest
    let normcdf t = 
        let sqrt2 = 1.4142135623730951 
        (erfc (-t / sqrt2)) / 2.0

    /// Computes the Gaussian density at a specified point of interest
    let normpdf (t:float) = 
        let invsqrt2pi = 0.398942280401433
        invsqrt2pi * exp (- (t * t / 2.0))
        
    /// Computes the inverse of the cummulative Gaussian distribution (quantile function) at a specified point of interest
    let norminv p = 
        let sqrt2 = 1.4142135623730951 
        (-sqrt2 * erfcinv (2.0 * p))

    /// Computes the additive correction (v) of a single-sided truncated Gaussian with unit variance
    let additiveCorrection t = 
        match normcdf t with
        | denom when denom < 2.222758749e-162   -> -t
        | denom                                 -> (normpdf t) / denom                             

    /// Computes the multiplicative correction (w) of a single-sided truncated Gaussian with unit variance
    let multiplicativeCorrection t =
        match normcdf t with
        | denom when denom < 2.222758749e-162   -> if (t < 0.0) then 1.0 else 0.0
        | denom                                 -> let vt = additiveCorrection t in vt * (vt + t)

    /// Computes the additive correction of a double-sided truncated Gaussian with unit variance
    let additiveCorrection0 t epsilon = 
        let v = abs t
        match normcdf (epsilon - v) - normcdf (-epsilon - v) with
        | denom when denom < 2.222758749e-162   -> if t < 0.0 then -t-epsilon else -t+epsilon
        | denom                                 -> let num = normpdf (-epsilon-v) - normpdf (epsilon-v) in if t < 0.0 then -num/denom else num/denom

    /// Computes the multiplicative correction of a double-sided truncated Gaussian with unit variance
    let multiplicativeCorrection0 t epsilon =
        let v = abs t
        match normcdf (epsilon - v) - normcdf (-epsilon - v) with
        | denom when denom < 2.222758749e-162   -> 1.0
        | denom                                 -> let vt = additiveCorrection0 v epsilon in vt*vt + ((epsilon-v) * normpdf (epsilon-v) - (-epsilon-v) * normpdf (-epsilon-v))/denom


    /// Computes a random sampler using the Box-Mueller formula
    type RandomSampler(seed:int) =
        /// The internal state of the sampler
        let sampler = System.Random (seed)
        let mutable buffered = false
        let mutable buffer = 0.0
        // Generate a new pair of standard Gaussian distributed variables using the Box-Mueller algorithm.
        let rec nextSample () = 
            let u = sampler.NextDouble () 
            let v = sampler.NextDouble () 
            if (u = 0.0 || v = 0.0) then
                nextSample ()
            else
                let x = sqrt (-2.0 * log (u)) 
                (x * sin (2.0 * Math.PI * v), x * cos (2.0 * Math.PI * v)) 

        /// Generate a new normal sample distributed according to the standard Gaussian distribution
        member __.Sample () =

            if buffered then
                buffered <- not buffered
                buffer
            else
                let (x,y) = nextSample () 
                buffered <- not buffered
                buffer <- y
                x    

    let globalSampler = RandomSampler(42)


    /// A unitized Gaussian distribution based on float numbers (struct type for memory efficency) 
    /// in exponential parameterisation. 
    [<Struct>]
    type Gaussian<[<Measure>] 'u>(precisionMean:float<1/'u>,precision:float<1/'u^2>) = 
        static member FromMeanAndVariance(mean:float<'u>, variance:float<'u^2>) = 
            Gaussian<'u>(mean/variance, 1.0 / variance)
        static member FromMeanAndDeviation(mean:float<'u>, standardDeviation:float<'u>) = 
            let sigma = standardDeviation*standardDeviation
            Gaussian<'u>.FromMeanAndVariance(mean, sigma)
        /// Precision times the mean of the Gaussian
        member __.PrecisionMean  = precisionMean
        /// Precision of the Gaussian
        member __.Precision = precision
        /// Mean of the Gaussian
        member this.Mu = precisionMean / precision
        /// Mean of the Gaussian
        member this.Mean = this.Mu
        /// Variance of the Gaussian
        member this.Variance = 1.0 / precision
        /// Standard deviation of the Gaussian
        member this.StandardDeviation = sqrt this.Variance
        /// Standard deviation of the Gaussian
        member this.Sigma = this.StandardDeviation

        /// Multiplies two Gaussians  
        static member (*) (a:Gaussian<'u>,b:Gaussian<'u>) = 
            Gaussian<'u> (a.PrecisionMean + b.PrecisionMean, a.Precision + b.Precision)
        /// Divides two Gaussians
        static member (/) (a:Gaussian<'u>,b:Gaussian<'u>) =
            Gaussian<'u> (a.PrecisionMean - b.PrecisionMean, a.Precision - b.Precision)
        /// Computes the absolute difference between two Gaussians
        static member AbsoluteDifference (a:Gaussian<'u>) (b:Gaussian<'u>) = 
            max (abs (a.PrecisionMean - b.PrecisionMean)) (sqrt (abs (a.Precision - b.Precision)))
            //max (abs (a.PrecisionMean - b.PrecisionMean)) (abs (a.Precision - b.Precision))
        /// Computes the absolute difference between two Gaussians
        static member (-) (a:Gaussian<'u>,b:Gaussian<'u>) = Gaussian<'u>.AbsoluteDifference a b
        /// Used for string serialisation
        override this.ToString () = (string this.Mu) + ";" + (string this.Variance) 
        /// Generate a sample of this Gaussian using the global sampler
        member this.Sample() = this.Mean + this.Sigma * globalSampler.Sample()
        /// Computes the log-normalisation factor when two normalised Gaussians gets multiplied
        static member LogProductNormalisation (a:Gaussian<'u>,b:Gaussian<'u>) =
            if a.Precision = 0.0<_> then 
                0.0
            elif b.Precision = 0.0<_> then
                0.0
            else
                let varSum = a.Variance + b.Variance
                let muDiff = a.Mean - b.Mean
                -0.91893853320467267 - log(float varSum)/2.0 - muDiff*muDiff/(2.0 * varSum)
        /// Computes the log-normalisation factor when two normalised Gaussians gets divided
        static member LogRatioNormalisation (a:Gaussian<'u>,b:Gaussian<'u>) =
            if a.Precision = 0.0<_> then 
                0.0
            elif b.Precision = 0.0<_> then
                0.0
            else
                let v2 = b.Variance
                let varDiff = v2 - a.Variance
                let muDiff = a.Mean - b.Mean
                if varDiff = 0.0<_> then
                    0.0
                else
                    log(float v2) + 0.91893853320467267 - log(float varDiff)/2.0 + muDiff*muDiff/(2.0 * varDiff)

#if INTERACTIVE
module Tests = 
    open Gaussians
        
    for x in [ 0.0; 0.000001; 0.1; 1.0; 10.0 ] do 
        printfn "erfc %g = %g" x (erfc x)

        printfn "erfcinv (erfc %g) - %g" x (abs (Gaussians.erfcinv (Gaussians.erfc x) - x))


    //#endif

    let x = RandomSampler 10
    let samples1 = [ for i in 0 .. 10000 -> x.Sample() ]
    // the mean is approximately 1.0:
    let mean1 = samples1 |> Seq.average
    // the variance is approximately 1.0:
    let stddev1 = samples1 |> Seq.averageBy (fun x -> x*x) |> sqrt

    // A Gaussian of scores in a test centered on 50.0, standard deviation of 10.0
    [<Measure>] type score 
    let g = Gaussian<score>.FromMeanAndDeviation (mean=50.0<score>, standardDeviation=10.0<score>) 
    let scoreA = g.Sample()
    let scoreB = g.Sample()
    let samples2 = [ for i in 0 .. 10000 -> g.Sample() ] 
    let mean  = samples2 |> Seq.average
    let variance2 = samples2 |> Seq.averageBy (fun x -> sqr(mean - x))  
    let stddev2 = sqrt variance2
#endif
namespace System
val sqr : x:float<'u> -> float<'u ^ 2>

Full name: Script.Gaussians.sqr


 Compute the square of a unitized number
val x : float<'u>
Multiple items
val float : value:'T -> float (requires member op_Explicit)

Full name: Microsoft.FSharp.Core.Operators.float

--------------------
type float = Double

Full name: Microsoft.FSharp.Core.float

--------------------
type float<'Measure> = float

Full name: Microsoft.FSharp.Core.float<_>
val erfc : x:float -> float

Full name: Script.Gaussians.erfc


 Computes the complementary error function. This function is defined
 by 2/sqrt(pi) * integral from x to infinity of exp (-t^2) dt
val x : float
type Double =
  struct
    member CompareTo : value:obj -> int + 1 overload
    member Equals : obj:obj -> bool + 1 overload
    member GetHashCode : unit -> int
    member GetTypeCode : unit -> TypeCode
    member ToString : unit -> string + 3 overloads
    static val MinValue : float
    static val MaxValue : float
    static val Epsilon : float
    static val NegativeInfinity : float
    static val PositiveInfinity : float
    ...
  end

Full name: System.Double
Double.IsNegativeInfinity(d: float) : bool
Double.IsPositiveInfinity(d: float) : bool
val z : float
val abs : value:'T -> 'T (requires member Abs)

Full name: Microsoft.FSharp.Core.Operators.abs
val t : float
val res : float
val exp : value:'T -> 'T (requires member Exp)

Full name: Microsoft.FSharp.Core.Operators.exp
val erfcinv : y:float -> float

Full name: Script.Gaussians.erfcinv


 Computes the inverse of the complementary error function
val y : float
val failwith : message:string -> 'T

Full name: Microsoft.FSharp.Core.Operators.failwith
field float.PositiveInfinity = Infinity
field float.NegativeInfinity = -Infinity
val q : float
val r : float
val sqrt : value:'T -> 'U (requires member Sqrt)

Full name: Microsoft.FSharp.Core.Operators.sqrt
val log : value:'T -> 'T (requires member Log)

Full name: Microsoft.FSharp.Core.Operators.log
val u : float
type Math =
  static val PI : float
  static val E : float
  static member Abs : value:sbyte -> sbyte + 6 overloads
  static member Acos : d:float -> float
  static member Asin : d:float -> float
  static member Atan : d:float -> float
  static member Atan2 : y:float * x:float -> float
  static member BigMul : a:int * b:int -> int64
  static member Ceiling : d:decimal -> decimal + 1 overload
  static member Cos : d:float -> float
  ...

Full name: System.Math
field Math.PI = 3.14159265359
val normcdf : t:float -> float

Full name: Script.Gaussians.normcdf


 Computes the cummulative Gaussian distribution at a specified point of interest
val sqrt2 : float
val normpdf : t:float -> float

Full name: Script.Gaussians.normpdf


 Computes the Gaussian density at a specified point of interest
val invsqrt2pi : float
val norminv : p:float -> float

Full name: Script.Gaussians.norminv


 Computes the inverse of the cummulative Gaussian distribution (quantile function) at a specified point of interest
val p : float
val additiveCorrection : t:float -> float

Full name: Script.Gaussians.additiveCorrection


 Computes the additive correction (v) of a single-sided truncated Gaussian with unit variance
val denom : float
val multiplicativeCorrection : t:float -> float

Full name: Script.Gaussians.multiplicativeCorrection


 Computes the multiplicative correction (w) of a single-sided truncated Gaussian with unit variance
val vt : float
val additiveCorrection0 : t:float -> epsilon:float -> float

Full name: Script.Gaussians.additiveCorrection0


 Computes the additive correction of a double-sided truncated Gaussian with unit variance
val epsilon : float
val v : float
val num : float
val multiplicativeCorrection0 : t:float -> epsilon:float -> float

Full name: Script.Gaussians.multiplicativeCorrection0


 Computes the multiplicative correction of a double-sided truncated Gaussian with unit variance
Multiple items
type RandomSampler =
  new : seed:int -> RandomSampler
  member Sample : unit -> float

Full name: Script.Gaussians.RandomSampler


 Computes a random sampler using the Box-Mueller formula


--------------------
new : seed:int -> RandomSampler
val seed : int
Multiple items
val int : value:'T -> int (requires member op_Explicit)

Full name: Microsoft.FSharp.Core.Operators.int

--------------------
type int = int32

Full name: Microsoft.FSharp.Core.int

--------------------
type int<'Measure> = int

Full name: Microsoft.FSharp.Core.int<_>
val sampler : Random


 The internal state of the sampler
Multiple items
type Random =
  new : unit -> Random + 1 overload
  member Next : unit -> int + 2 overloads
  member NextBytes : buffer:byte[] -> unit
  member NextDouble : unit -> float

Full name: System.Random

--------------------
Random() : unit
Random(Seed: int) : unit
val mutable buffered : bool
val mutable buffer : float
val nextSample : (unit -> float * float)
Random.NextDouble() : float
val sin : value:'T -> 'T (requires member Sin)

Full name: Microsoft.FSharp.Core.Operators.sin
val cos : value:'T -> 'T (requires member Cos)

Full name: Microsoft.FSharp.Core.Operators.cos
member RandomSampler.Sample : unit -> float

Full name: Script.Gaussians.RandomSampler.Sample


 Generate a new normal sample distributed according to the standard Gaussian distribution
val not : value:bool -> bool

Full name: Microsoft.FSharp.Core.Operators.not
val globalSampler : RandomSampler

Full name: Script.Gaussians.globalSampler
Multiple items
type StructAttribute =
  inherit Attribute
  new : unit -> StructAttribute

Full name: Microsoft.FSharp.Core.StructAttribute

--------------------
new : unit -> StructAttribute
Multiple items
type Gaussian<'u> =
  struct
    new : precisionMean:float</'u> * precision:float</'u ^ 2> -> Gaussian<'u>
    member Sample : unit -> float<'u>
    override ToString : unit -> string
    member Mean : float<'u>
    member Mu : float<'u>
    member Precision : float</'u ^ 2>
    member PrecisionMean : float</'u>
    member Sigma : float<'u>
    member StandardDeviation : float<'u>
    member Variance : float<'u ^ 2>
    ...
  end

Full name: Script.Gaussians.Gaussian<_>


 A unitized Gaussian distribution based on float numbers (struct type for memory efficency)
 in exponential parameterisation.


--------------------
Gaussian()
new : precisionMean:float</'u> * precision:float</'u ^ 2> -> Gaussian<'u>
Multiple items
type MeasureAttribute =
  inherit Attribute
  new : unit -> MeasureAttribute

Full name: Microsoft.FSharp.Core.MeasureAttribute

--------------------
new : unit -> MeasureAttribute
val precisionMean : float</'u>
val precision : float</'u ^ 2>
static member Gaussian.FromMeanAndVariance : mean:float<'u> * variance:float<'u ^ 2> -> Gaussian<'u>

Full name: Script.Gaussians.Gaussian.FromMeanAndVariance
val mean : float<'u>
val variance : float<'u ^ 2>
static member Gaussian.FromMeanAndDeviation : mean:float<'u> * standardDeviation:float<'u> -> Gaussian<'u>

Full name: Script.Gaussians.Gaussian.FromMeanAndDeviation
val standardDeviation : float<'u>
val sigma : float<'u ^ 2>
member Gaussian.PrecisionMean : float</'u>

Full name: Script.Gaussians.Gaussian.PrecisionMean


 Precision times the mean of the Gaussian
val __ : byref<Gaussian<'u>>
member Gaussian.Precision : float</'u ^ 2>

Full name: Script.Gaussians.Gaussian.Precision


 Precision of the Gaussian
val this : byref<Gaussian<'u>>
member Gaussian.Mu : float<'u>

Full name: Script.Gaussians.Gaussian.Mu


 Mean of the Gaussian
member Gaussian.Mean : float<'u>

Full name: Script.Gaussians.Gaussian.Mean


 Mean of the Gaussian
property Gaussian.Mu: float<'u>


 Mean of the Gaussian
member Gaussian.Variance : float<'u ^ 2>

Full name: Script.Gaussians.Gaussian.Variance


 Variance of the Gaussian
member Gaussian.StandardDeviation : float<'u>

Full name: Script.Gaussians.Gaussian.StandardDeviation


 Standard deviation of the Gaussian
member Gaussian.Sigma : float<'u>

Full name: Script.Gaussians.Gaussian.Sigma


 Standard deviation of the Gaussian
val a : Gaussian<'u>
val b : Gaussian<'u>
property Gaussian.PrecisionMean: float</'u>


 Precision times the mean of the Gaussian
property Gaussian.Precision: float</'u ^ 2>


 Precision of the Gaussian
static member Gaussian.AbsoluteDifference : a:Gaussian<'u> -> b:Gaussian<'u> -> float</'u>

Full name: Script.Gaussians.Gaussian.AbsoluteDifference


 Computes the absolute difference between two Gaussians
val max : e1:'T -> e2:'T -> 'T (requires comparison)

Full name: Microsoft.FSharp.Core.Operators.max
override Gaussian.ToString : unit -> string

Full name: Script.Gaussians.Gaussian.ToString


 Used for string serialisation
Multiple items
val string : value:'T -> string

Full name: Microsoft.FSharp.Core.Operators.string

--------------------
type string = String

Full name: Microsoft.FSharp.Core.string
member Gaussian.Sample : unit -> float<'u>

Full name: Script.Gaussians.Gaussian.Sample


 Generate a sample of this Gaussian using the global sampler
member RandomSampler.Sample : unit -> float


 Generate a new normal sample distributed according to the standard Gaussian distribution
static member Gaussian.LogProductNormalisation : a:Gaussian<'u> * b:Gaussian<'u> -> float

Full name: Script.Gaussians.Gaussian.LogProductNormalisation


 Computes the log-normalisation factor when two normalised Gaussians gets multiplied
val varSum : float<'u ^ 2>
property Gaussian.Variance: float<'u ^ 2>


 Variance of the Gaussian
val muDiff : float<'u>
property Gaussian.Mean: float<'u>


 Mean of the Gaussian
static member Gaussian.LogRatioNormalisation : a:Gaussian<'u> * b:Gaussian<'u> -> float

Full name: Script.Gaussians.Gaussian.LogRatioNormalisation


 Computes the log-normalisation factor when two normalised Gaussians gets divided
val v2 : float<'u ^ 2>
val varDiff : float<'u ^ 2>
Raw view Test code New version

More information

Link:http://fssnip.net/bD
Posted:12 years ago
Author:Robert Herman
Tags: statistics , gaussian , normal , distributions erfc , erfcinv , sampling