## Statistical functions

Some basic statistics functions in F#, including erfc, erfcinv, normcdf, normpdf, norminv, additiveCorrection, multiplicativeCorrection, a Box-Mueller RandomSampler and a unitized type for a Gaussian distribution. Based on Ralf Herbrich's samples at http://blogs.technet.com/b/apg/archive/2008/04/05/trueskill-through-time.aspx

Tools:
```  1:
2: /// Some basic statistics functions in F#, including erfc, erfcinv, normcdf, normpdf,
3: /// norminv, additiveCorrection, multiplicativeCorrection, a Box-Mueller RandomSampler
4: /// and a unitized type for a Gaussian distribution.
5: ///
6: /// Based on Ralf Herbrich's samples at http://blogs.technet.com/b/apg/archive/2008/04/05/trueskill-through-time.aspx
7:
8: module Gaussians =
9:
10:     open System
11:
12:     /// Compute the square of a unitized number
13:     let sqr (x:float<'u>) = x * x
14:
15:     /// Computes the complementary error function. This function is defined
16:     /// by 2/sqrt(pi) * integral from x to infinity of exp (-t^2) dt
17:     let erfc x =
18:         if (Double.IsNegativeInfinity x) then 2.0
19:         elif (Double.IsPositiveInfinity x) then 0.0
20:         else
21:             let z = abs x
22:             let t = 1.0 / (1.0 + 0.5 * z)
23:             let res = t * exp (-z * z - 1.26551223 + t * (1.00002368 + t * (0.37409196 + t * (0.09678418 + t * (-0.18628806 + t * (0.27886807 + t * (-1.13520398 + t * (1.48851587 + t * (-0.82215223 + t * 0.17087277)))))))))
24:             if (x >= 0.0) then res else 2.0 - res
25:
26:     /// Computes the inverse of the complementary error function
27:     let erfcinv y =
28:         if (y < 0.0 || y > 2.0) then
29:             failwith "Inverse complementary function not defined outside [0,2]."
30:         elif y = 0.0 then Double.PositiveInfinity
31:         elif y = 2.0 then Double.NegativeInfinity
32:         else
33:             let x =
34:                 if (y >= 0.0485 && y <= 1.9515) then
35:                     let q = y - 1.0
36:                     let r = q * q
37:                     (((((0.01370600482778535*r - 0.3051415712357203)*r + 1.524304069216834)*r - 3.057303267970988)*r + 2.710410832036097)*r - 0.8862269264526915) * q /
38:                     (((((-0.05319931523264068*r + 0.6311946752267222)*r - 2.432796560310728)*r + 4.175081992982483)*r - 3.320170388221430)*r + 1.0)
39:                 else if (y < 0.0485) then
40:                     let q = sqrt (-2.0 * log (y / 2.0))
41:                     (((((0.005504751339936943*q + 0.2279687217114118)*q + 1.697592457770869)*q + 1.802933168781950)*q + -3.093354679843504)*q - 2.077595676404383) /
42:                     ((((0.007784695709041462*q + 0.3224671290700398)*q + 2.445134137142996)*q + 3.754408661907416)*q + 1.0)
43:                 else if (y > 1.9515) then
44:                     let q = sqrt (-2.0 * log (1.0 - y / 2.0))
45:                     (-(((((0.005504751339936943*q + 0.2279687217114118)*q + 1.697592457770869)*q + 1.802933168781950)*q + -3.093354679843504)*q - 2.077595676404383) /
46:                      ((((0.007784695709041462*q + 0.3224671290700398)*q + 2.445134137142996)*q + 3.754408661907416)*q + 1.0))
47:                 else 0.0
48:             let u = (erfc x - y) / (-2.0 / sqrt Math.PI * exp (-x * x))
49:             x - u / (1.0 + x * u)
50:
51:     /// Computes the cummulative Gaussian distribution at a specified point of interest
52:     let normcdf t =
53:         let sqrt2 = 1.4142135623730951
54:         (erfc (-t / sqrt2)) / 2.0
55:
56:     /// Computes the Gaussian density at a specified point of interest
57:     let normpdf (t:float) =
58:         let invsqrt2pi = 0.398942280401433
59:         invsqrt2pi * exp (- (t * t / 2.0))
60:
61:     /// Computes the inverse of the cummulative Gaussian distribution (quantile function) at a specified point of interest
62:     let norminv p =
63:         let sqrt2 = 1.4142135623730951
64:         (-sqrt2 * erfcinv (2.0 * p))
65:
66:     /// Computes the additive correction (v) of a single-sided truncated Gaussian with unit variance
68:         match normcdf t with
69:         | denom when denom < 2.222758749e-162   -> -t
70:         | denom                                 -> (normpdf t) / denom
71:
72:     /// Computes the multiplicative correction (w) of a single-sided truncated Gaussian with unit variance
73:     let multiplicativeCorrection t =
74:         match normcdf t with
75:         | denom when denom < 2.222758749e-162   -> if (t < 0.0) then 1.0 else 0.0
76:         | denom                                 -> let vt = additiveCorrection t in vt * (vt + t)
77:
78:     /// Computes the additive correction of a double-sided truncated Gaussian with unit variance
79:     let additiveCorrection0 t epsilon =
80:         let v = abs t
81:         match normcdf (epsilon - v) - normcdf (-epsilon - v) with
82:         | denom when denom < 2.222758749e-162   -> if t < 0.0 then -t-epsilon else -t+epsilon
83:         | denom                                 -> let num = normpdf (-epsilon-v) - normpdf (epsilon-v) in if t < 0.0 then -num/denom else num/denom
84:
85:     /// Computes the multiplicative correction of a double-sided truncated Gaussian with unit variance
86:     let multiplicativeCorrection0 t epsilon =
87:         let v = abs t
88:         match normcdf (epsilon - v) - normcdf (-epsilon - v) with
89:         | denom when denom < 2.222758749e-162   -> 1.0
90:         | denom                                 -> let vt = additiveCorrection0 v epsilon in vt*vt + ((epsilon-v) * normpdf (epsilon-v) - (-epsilon-v) * normpdf (-epsilon-v))/denom
91:
92:
93:     /// Computes a random sampler using the Box-Mueller formula
94:     type RandomSampler(seed:int) =
95:         /// The internal state of the sampler
96:         let sampler = System.Random (seed)
97:         let mutable buffered = false
98:         let mutable buffer = 0.0
99:         // Generate a new pair of standard Gaussian distributed variables using the Box-Mueller algorithm.
100:         let rec nextSample () =
101:             let u = sampler.NextDouble ()
102:             let v = sampler.NextDouble ()
103:             if (u = 0.0 || v = 0.0) then
104:                 nextSample ()
105:             else
106:                 let x = sqrt (-2.0 * log (u))
107:                 (x * sin (2.0 * Math.PI * v), x * cos (2.0 * Math.PI * v))
108:
109:         /// Generate a new normal sample distributed according to the standard Gaussian distribution
110:         member __.Sample () =
111:
112:             if buffered then
113:                 buffered <- not buffered
114:                 buffer
115:             else
116:                 let (x,y) = nextSample ()
117:                 buffered <- not buffered
118:                 buffer <- y
119:                 x
120:
121:     let globalSampler = RandomSampler(42)
122:
123:
124:     /// A unitized Gaussian distribution based on float numbers (struct type for memory efficency)
125:     /// in exponential parameterisation.
126:     [<Struct>]
127:     type Gaussian<[<Measure>] 'u>(precisionMean:float<1/'u>,precision:float<1/'u^2>) =
128:         static member FromMeanAndVariance(mean:float<'u>, variance:float<'u^2>) =
129:             Gaussian<'u>(mean/variance, 1.0 / variance)
130:         static member FromMeanAndDeviation(mean:float<'u>, standardDeviation:float<'u>) =
131:             let sigma = standardDeviation*standardDeviation
132:             Gaussian<'u>.FromMeanAndVariance(mean, sigma)
133:         /// Precision times the mean of the Gaussian
134:         member __.PrecisionMean  = precisionMean
135:         /// Precision of the Gaussian
136:         member __.Precision = precision
137:         /// Mean of the Gaussian
138:         member this.Mu = precisionMean / precision
139:         /// Mean of the Gaussian
140:         member this.Mean = this.Mu
141:         /// Variance of the Gaussian
142:         member this.Variance = 1.0 / precision
143:         /// Standard deviation of the Gaussian
144:         member this.StandardDeviation = sqrt this.Variance
145:         /// Standard deviation of the Gaussian
146:         member this.Sigma = this.StandardDeviation
147:
148:         /// Multiplies two Gaussians
149:         static member (*) (a:Gaussian<'u>,b:Gaussian<'u>) =
150:             Gaussian<'u> (a.PrecisionMean + b.PrecisionMean, a.Precision + b.Precision)
151:         /// Divides two Gaussians
152:         static member (/) (a:Gaussian<'u>,b:Gaussian<'u>) =
153:             Gaussian<'u> (a.PrecisionMean - b.PrecisionMean, a.Precision - b.Precision)
154:         /// Computes the absolute difference between two Gaussians
155:         static member AbsoluteDifference (a:Gaussian<'u>) (b:Gaussian<'u>) =
156:             max (abs (a.PrecisionMean - b.PrecisionMean)) (sqrt (abs (a.Precision - b.Precision)))
157:             //max (abs (a.PrecisionMean - b.PrecisionMean)) (abs (a.Precision - b.Precision))
158:         /// Computes the absolute difference between two Gaussians
159:         static member (-) (a:Gaussian<'u>,b:Gaussian<'u>) = Gaussian<'u>.AbsoluteDifference a b
160:         /// Used for string serialisation
161:         override this.ToString () = (string this.Mu) + ";" + (string this.Variance)
162:         /// Generate a sample of this Gaussian using the global sampler
163:         member this.Sample() = this.Mean + this.Sigma * globalSampler.Sample()
164:         /// Computes the log-normalisation factor when two normalised Gaussians gets multiplied
165:         static member LogProductNormalisation (a:Gaussian<'u>,b:Gaussian<'u>) =
166:             if a.Precision = 0.0<_> then
167:                 0.0
168:             elif b.Precision = 0.0<_> then
169:                 0.0
170:             else
171:                 let varSum = a.Variance + b.Variance
172:                 let muDiff = a.Mean - b.Mean
173:                 -0.91893853320467267 - log(float varSum)/2.0 - muDiff*muDiff/(2.0 * varSum)
174:         /// Computes the log-normalisation factor when two normalised Gaussians gets divided
175:         static member LogRatioNormalisation (a:Gaussian<'u>,b:Gaussian<'u>) =
176:             if a.Precision = 0.0<_> then
177:                 0.0
178:             elif b.Precision = 0.0<_> then
179:                 0.0
180:             else
181:                 let v2 = b.Variance
182:                 let varDiff = v2 - a.Variance
183:                 let muDiff = a.Mean - b.Mean
184:                 if varDiff = 0.0<_> then
185:                     0.0
186:                 else
187:                     log(float v2) + 0.91893853320467267 - log(float varDiff)/2.0 + muDiff*muDiff/(2.0 * varDiff)
188:
189: #if INTERACTIVE
190: module Tests =
191:     open Gaussians
192:
193:     for x in [ 0.0; 0.000001; 0.1; 1.0; 10.0 ] do
194:         printfn "erfc %g = %g" x (erfc x)
195:
196:         printfn "erfcinv (erfc %g) - %g" x (abs (Gaussians.erfcinv (Gaussians.erfc x) - x))
197:
198:
199:     //#endif
200:
201:     let x = RandomSampler 10
202:     let samples1 = [ for i in 0 .. 10000 -> x.Sample() ]
203:     // the mean is approximately 1.0:
204:     let mean1 = samples1 |> Seq.average
205:     // the variance is approximately 1.0:
206:     let stddev1 = samples1 |> Seq.averageBy (fun x -> x*x) |> sqrt
207:
208:     // A Gaussian of scores in a test centered on 50.0, standard deviation of 10.0
209:     [<Measure>] type score
210:     let g = Gaussian<score>.FromMeanAndDeviation (mean=50.0<score>, standardDeviation=10.0<score>)
211:     let scoreA = g.Sample()
212:     let scoreB = g.Sample()
213:     let samples2 = [ for i in 0 .. 10000 -> g.Sample() ]
214:     let mean  = samples2 |> Seq.average
215:     let variance2 = samples2 |> Seq.averageBy (fun x -> sqr(mean - x))
216:     let stddev2 = sqrt variance2
217: #endif
218: ```
