2 people like it.

Kernel Density estimation and visualization with MathNet.Numerics and FsPlotly

Sample to estimate and visualize kernel densities of multiple distributions for visual comparison. Here the distributions of NY taxi fares are being compared by payment type (e.g. cash, credit card, etc.)

 1: 
 2: 
 3: 
 4: 
 5: 
 6: 
 7: 
 8: 
 9: 
10: 
11: 
12: 
13: 
14: 
15: 
16: 
17: 
18: 
19: 
20: 
21: 
22: 
23: 
24: 
25: 
26: 
27: 
28: 
29: 
30: 
31: 
32: 
33: 
34: 
35: 
36: 
37: 
38: 
39: 
40: 
41: 
42: 
43: 
44: 
45: 
46: 
47: 
48: 
49: 
50: 
51: 
52: 
53: 
open FSharp.Data
open MathNet.Numerics
open FSharp.Plotly

(*
Sample based on NY Taxi data 

https://www1.nyc.gov/site/tlc/about/tlc-trip-record-data.page
*)


type Trip = CsvProvider< @"C:\s\AutoMLDemo\taxi-fare-test.csv" >

let trips = Trip.GetSample()

let trows = trips.Rows |> Seq.toArray

let fareByPaymentType =
    trows
    |> Array.groupBy(fun x->x.Payment_type)
    |> Array.map (fun (p,xs)->p, xs |> Array.map (fun x->float x.Fare_amount))

let histograms() =
    fareByPaymentType
    |> Array.map (fun (v,fs)->
        Chart.Histogram fs
        |> Chart.withTitle v
        |> Chart.Show
    )

open MathNet.Numerics.Statistics

let densityByPaymentType() =
    let dsByV = 
        fareByPaymentType 
        |> Array.map(fun (v,fares) ->
            let frs = fares //|> Seq.sample (0.5) |> Seq.toArray
            let sfrs = Array.sort frs
            let xs = [|for i in 0.0 .. 0.1 .. 100.0 -> i|]
            let ds = xs |> Array.map (fun x -> KernelDensity.EstimateGaussian(x,1.0,sfrs))
            v,xs,ds)

    let area  xs = Chart.Area(xs, Opacity=0.1)
    let colors = [|"blue"; "red"|]
                                                        
    dsByV
    |> Array.mapi (fun i (v,xs,ds) -> 
        Array.zip xs ds
        |> area 
        |> Chart.withTraceName v)
    |> Chart.Combine
    |> Chart.withTitle "Fare Density by Payment Type"
    |> Chart.Show
Multiple items
namespace FSharp

--------------------
namespace Microsoft.FSharp
Multiple items
namespace FSharp.Data

--------------------
namespace Microsoft.FSharp.Data
namespace MathNet
namespace MathNet.Numerics
namespace FSharp.Plotly
type Trip = obj

Full name: Script.Trip
type CsvProvider

Full name: FSharp.Data.CsvProvider


<summary>Typed representation of a CSV file.</summary>
       <param name='Sample'>Location of a CSV sample file or a string containing a sample CSV document.</param>
       <param name='Separators'>Column delimiter(s). Defaults to `,`.</param>
       <param name='InferRows'>Number of rows to use for inference. Defaults to `1000`. If this is zero, all rows are used.</param>
       <param name='Schema'>Optional column types, in a comma separated list. Valid types are `int`, `int64`, `bool`, `float`, `decimal`, `date`, `guid`, `string`, `int?`, `int64?`, `bool?`, `float?`, `decimal?`, `date?`, `guid?`, `int option`, `int64 option`, `bool option`, `float option`, `decimal option`, `date option`, `guid option` and `string option`.
       You can also specify a unit and the name of the column like this: `Name (type&lt;unit&gt;)`, or you can override only the name. If you don't want to specify all the columns, you can reference the columns by name like this: `ColumnName=type`.</param>
       <param name='HasHeaders'>Whether the sample contains the names of the columns as its first line.</param>
       <param name='IgnoreErrors'>Whether to ignore rows that have the wrong number of columns or which can't be parsed using the inferred or specified schema. Otherwise an exception is thrown when these rows are encountered.</param>
       <param name='SkipRows'>SKips the first n rows of the CSV file.</param>
       <param name='AssumeMissingValues'>When set to true, the type provider will assume all columns can have missing values, even if in the provided sample all values are present. Defaults to false.</param>
       <param name='PreferOptionals'>When set to true, inference will prefer to use the option type instead of nullable types, `double.NaN` or `""` for missing values. Defaults to false.</param>
       <param name='Quote'>The quotation mark (for surrounding values containing the delimiter). Defaults to `"`.</param>
       <param name='MissingValues'>The set of strings recogized as missing values. Defaults to `NaN,NA,N/A,#N/A,:,-,TBA,TBD`.</param>
       <param name='CacheRows'>Whether the rows should be caches so they can be iterated multiple times. Defaults to true. Disable for large datasets.</param>
       <param name='Culture'>The culture used for parsing numbers and dates. Defaults to the invariant culture.</param>
       <param name='Encoding'>The encoding used to read the sample. You can specify either the character set name or the codepage number. Defaults to UTF8 for files, and to ISO-8859-1 the for HTTP requests, unless `charset` is specified in the `Content-Type` response header.</param>
       <param name='ResolutionFolder'>A directory that is used when resolving relative file references (at design time and in hosted execution).</param>
       <param name='EmbeddedResource'>When specified, the type provider first attempts to load the sample from the specified resource
          (e.g. 'MyCompany.MyAssembly, resource_name.csv'). This is useful when exposing types generated by the type provider.</param>
val trips : obj

Full name: Script.trips
val trows : obj []

Full name: Script.trows
Multiple items
module Seq

from FSharp.Plotly

--------------------
module Seq

from Microsoft.FSharp.Collections
val toArray : source:seq<'T> -> 'T []

Full name: Microsoft.FSharp.Collections.Seq.toArray
val fareByPaymentType : (string * float []) []

Full name: Script.fareByPaymentType
module Array

from Microsoft.FSharp.Collections
val groupBy : projection:('T -> 'Key) -> array:'T [] -> ('Key * 'T []) [] (requires equality)

Full name: Microsoft.FSharp.Collections.Array.groupBy
val x : obj
val map : mapping:('T -> 'U) -> array:'T [] -> 'U []

Full name: Microsoft.FSharp.Collections.Array.map
val p : string
val xs : obj []
Multiple items
val float : value:'T -> float (requires member op_Explicit)

Full name: Microsoft.FSharp.Core.Operators.float

--------------------
type float = System.Double

Full name: Microsoft.FSharp.Core.float

--------------------
type float<'Measure> = float

Full name: Microsoft.FSharp.Core.float<_>
val histograms : unit -> unit []

Full name: Script.histograms
val v : string
val fs : float []
type Chart =
  static member Area : xy:seq<#IConvertible * #IConvertible> * ?Name:string * ?ShowMarkers:bool * ?Showlegend:bool * ?MarkerSymbol:Symbol * ?Color:'a2 * ?Opacity:float * ?Labels:seq<#IConvertible> * ?TextPosition:TextPosition * ?TextFont:Font * ?Dash:DrawingStyle * ?Width:'a4 -> GenericChart
  static member Area : x:seq<#IConvertible> * y:seq<#IConvertible> * ?Name:string * ?ShowMarkers:bool * ?Showlegend:bool * ?MarkerSymbol:Symbol * ?Color:'a2 * ?Opacity:float * ?Labels:seq<#IConvertible> * ?TextPosition:TextPosition * ?TextFont:Font * ?Dash:DrawingStyle * ?Width:'a4 -> GenericChart
  static member Bar : keysvalues:seq<#IConvertible * #IConvertible> * ?Name:string * ?Showlegend:bool * ?Color:'a2 * ?Opacity:float * ?Labels:seq<#IConvertible> * ?TextPosition:TextPosition * ?TextFont:Font * ?Marker:Marker -> GenericChart
  static member Bar : keys:seq<#IConvertible> * values:seq<#IConvertible> * ?Name:string * ?Showlegend:bool * ?Color:'a2 * ?Opacity:float * ?Labels:seq<#IConvertible> * ?TextPosition:TextPosition * ?TextFont:Font * ?Marker:Marker -> GenericChart
  static member BoxPlot : xy:seq<'a0 * 'a1> * ?Name:string * ?Showlegend:bool * ?Color:'a2 * ?Fillcolor:'a3 * ?Opacity:float * ?Whiskerwidth:'a4 * ?Boxpoints:Boxpoints * ?Boxmean:BoxMean * ?Jitter:'a5 * ?Pointpos:'a6 * ?Orientation:Orientation -> GenericChart
  static member BoxPlot : ?x:'a0 * ?y:'a1 * ?Name:string * ?Showlegend:bool * ?Color:'a2 * ?Fillcolor:'a3 * ?Opacity:float * ?Whiskerwidth:'a4 * ?Boxpoints:Boxpoints * ?Boxmean:BoxMean * ?Jitter:'a5 * ?Pointpos:'a6 * ?Orientation:Orientation -> GenericChart
  static member Bubble : xysizes:seq<#IConvertible * #IConvertible * #IConvertible> * ?Name:string * ?Showlegend:bool * ?MarkerSymbol:Symbol * ?Color:'a3 * ?Opacity:float * ?Labels:seq<#IConvertible> * ?TextPosition:TextPosition * ?TextFont:Font -> GenericChart
  static member Bubble : x:seq<#IConvertible> * y:seq<#IConvertible> * sizes:seq<#IConvertible> * ?Name:string * ?Showlegend:bool * ?MarkerSymbol:Symbol * ?Color:'a3 * ?Opacity:float * ?Labels:seq<#IConvertible> * ?TextPosition:TextPosition * ?TextFont:Font -> GenericChart
  static member ChoroplethMap : locations:seq<string> * z:seq<#IConvertible> * ?Text:seq<#IConvertible> * ?Locationmode:LocationFormat * ?Autocolorscale:bool * ?Colorscale:Colorscale * ?Colorbar:'a2 * ?Marker:Marker * ?Zmin:'a3 * ?Zmax:'a4 -> GenericChart
  static member Column : keysvalues:seq<#IConvertible * #IConvertible> * ?Name:string * ?Showlegend:bool * ?Color:'a2 * ?Opacity:float * ?Labels:seq<#IConvertible> * ?TextPosition:TextPosition * ?TextFont:Font * ?Marker:Marker -> GenericChart
  ...

Full name: FSharp.Plotly.Chart
static member Chart.Histogram : data:seq<#System.IConvertible> * ?Orientation:StyleParam.Orientation * ?Name:string * ?Showlegend:bool * ?Opacity:float * ?Color:'a1 * ?HistNorm:StyleParam.HistNorm * ?HistFunc:StyleParam.HistNorm * ?nBinsx:int * ?nBinsy:int * ?Xbins:Bins * ?Ybins:Bins * ?xError:'a2 * ?yError:'a3 -> GenericChart.GenericChart
static member Chart.withTitle : title:string * ?Titlefont:Font -> (GenericChart.GenericChart -> GenericChart.GenericChart)
static member Chart.Show : ch:GenericChart.GenericChart -> unit
namespace MathNet.Numerics.Statistics
val densityByPaymentType : unit -> unit

Full name: Script.densityByPaymentType
val dsByV : (string * float [] * float []) []
val fares : float []
val frs : float []
val sfrs : float []
val sort : array:'T [] -> 'T [] (requires comparison)

Full name: Microsoft.FSharp.Collections.Array.sort
val xs : float []
val i : float
val ds : float []
val x : float
type KernelDensity =
  static member EpanechnikovKernel : x:float -> float
  static member Estimate : x:float * bandwidth:float * samples:IList<float> * kernel:Func<float, float> -> float
  static member EstimateEpanechnikov : x:float * bandwidth:float * samples:IList<float> -> float
  static member EstimateGaussian : x:float * bandwidth:float * samples:IList<float> -> float
  static member EstimateTriangular : x:float * bandwidth:float * samples:IList<float> -> float
  static member EstimateUniform : x:float * bandwidth:float * samples:IList<float> -> float
  static member GaussianKernel : x:float -> float
  static member TriangularKernel : x:float -> float
  static member UniformKernel : x:float -> float

Full name: MathNet.Numerics.Statistics.KernelDensity
KernelDensity.EstimateGaussian(x: float, bandwidth: float, samples: System.Collections.Generic.IList<float>) : float
val area : (seq<#System.IConvertible * #System.IConvertible> -> GenericChart.GenericChart)
val xs : seq<#System.IConvertible * #System.IConvertible>
static member Chart.Area : xy:seq<#System.IConvertible * #System.IConvertible> * ?Name:string * ?ShowMarkers:bool * ?Showlegend:bool * ?MarkerSymbol:StyleParam.Symbol * ?Color:'a2 * ?Opacity:float * ?Labels:seq<#System.IConvertible> * ?TextPosition:StyleParam.TextPosition * ?TextFont:Font * ?Dash:StyleParam.DrawingStyle * ?Width:'a4 -> GenericChart.GenericChart
static member Chart.Area : x:seq<#System.IConvertible> * y:seq<#System.IConvertible> * ?Name:string * ?ShowMarkers:bool * ?Showlegend:bool * ?MarkerSymbol:StyleParam.Symbol * ?Color:'a2 * ?Opacity:float * ?Labels:seq<#System.IConvertible> * ?TextPosition:StyleParam.TextPosition * ?TextFont:Font * ?Dash:StyleParam.DrawingStyle * ?Width:'a4 -> GenericChart.GenericChart
val colors : string []
val mapi : mapping:(int -> 'T -> 'U) -> array:'T [] -> 'U []

Full name: Microsoft.FSharp.Collections.Array.mapi
val i : int
val zip : array1:'T1 [] -> array2:'T2 [] -> ('T1 * 'T2) []

Full name: Microsoft.FSharp.Collections.Array.zip
static member Chart.withTraceName : ?Name:string * ?Showlegend:bool * ?Legendgroup:string * ?Visible:StyleParam.Visible -> (GenericChart.GenericChart -> GenericChart.GenericChart)
static member Chart.Combine : gCharts:seq<GenericChart.GenericChart> -> GenericChart.GenericChart
Raw view New version

More information

Link:http://fssnip.net/7X9
Posted:10 months ago
Author:Faisal Waris
Tags: data science , visualization , kernel density