open FSharp.Data
open MathNet.Numerics
open FSharp.Plotly

(*
Sample based on NY Taxi data 

https://www1.nyc.gov/site/tlc/about/tlc-trip-record-data.page
*)


type Trip = CsvProvider< @"C:\s\AutoMLDemo\taxi-fare-test.csv" >

let trips = Trip.GetSample()

let trows = trips.Rows |> Seq.toArray

let fareByPaymentType =
    trows
    |> Array.groupBy(fun x->x.Payment_type)
    |> Array.map (fun (p,xs)->p, xs |> Array.map (fun x->float x.Fare_amount))

let histograms() =
    fareByPaymentType
    |> Array.map (fun (v,fs)->
        Chart.Histogram fs
        |> Chart.withTitle v
        |> Chart.Show
    )

open MathNet.Numerics.Statistics

let densityByPaymentType() =
    let dsByV = 
        fareByPaymentType 
        |> Array.map(fun (v,fares) ->
            let frs = fares //|> Seq.sample (0.5) |> Seq.toArray
            let sfrs = Array.sort frs
            let xs = [|for i in 0.0 .. 0.1 .. 100.0 -> i|]
            let ds = xs |> Array.map (fun x -> KernelDensity.EstimateGaussian(x,1.0,sfrs))
            v,xs,ds)

    let area  xs = Chart.Area(xs, Opacity=0.1)
    let colors = [|"blue"; "red"|]
                                                        
    dsByV
    |> Array.mapi (fun i (v,xs,ds) -> 
        Array.zip xs ds
        |> area 
        |> Chart.withTraceName v)
    |> Chart.Combine
    |> Chart.withTitle "Fare Density by Payment Type"
    |> Chart.Show