(*

Install-Package FSharp.Data
Install-Package Accord
Install-Package Accord.MachineLearning
Install-Package Accord.Math
Install-Package Accord.Statistics

*)
#if INTERACTIVE
#I @"./packages"
#r @"FSharp.Data.2.3.3/lib/net40/FSharp.Data.dll"
#r @"Accord.3.7.0/lib/net45/Accord.dll"
#r @"Accord.MachineLearning.3.7.0/lib/net45/Accord.MachineLearning.dll"
#r @"Accord.Math.3.7.0/lib/net45/Accord.Math.Core.dll"
#r @"Accord.Math.3.7.0/lib/net45/Accord.Math.dll"
#r @"Accord.Statistics.3.7.0/lib/net45/Accord.Statistics.dll"
#time
#else
module DecisionTree
#endif


open System
open FSharp.Data
open Accord.MachineLearning.DecisionTrees.Learning

type Wines = CsvProvider<"https://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-red.csv",";",InferRows=2000>
let inputs, output = 
    Wines.Load(@"https://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-red.csv").Rows
    |> Seq.map (fun row -> 
        ([| 
            row.Alcohol;
            row.Chlorides;
            row.``Citric acid``;
            row.Density;
            row.``Fixed acidity``;
            row.``Free sulfur dioxide``;
            row.Sulphates;
            row.PH;
            row.``Residual sugar``;
            row.``Total sulfur dioxide``;
            row.``Volatile acidity``
        |] |> Array.map Convert.ToDouble), row.Quality)
    |> Seq.toArray
    |> Array.unzip

let learner = 
    /// There are multiple algorithms available.
    /// http://scikit-learn.org/stable/_static/ml_map.png
    new Accord.MachineLearning.DecisionTrees.Learning.C45Learning()

let source = new System.Threading.CancellationTokenSource()

/// Teach the model in background thread. This may take some time.
let modelTask = 
    System.Threading.Tasks.Task.Run(fun () ->
        learner.Learn(inputs, output)
    , source.Token) |> Async.AwaitTask

// When running background, you could cancel the task:
//source.Cancel()

// For now, let's just run as non-async:
let model = modelTask |> Async.RunSynchronously

// Test with current items. There is no point of course:
// You should split your sample data to two sets, and use the other to train
// the model, and the other to test the accuracy of predictions.
let predicted = model.Decide(inputs)

// Actual vs expected, 10 first ones:
// [1..10] |> List.iter (fun i -> printfn "Actual %d vs %d" output.[i] predicted.[i])

// The classification error
// let err = Accord.Math.Optimization.Losses.ZeroOneLoss(output).Loss(predicted)