1 people like it.

Wine-quality decision-tree using Accord.Net

Wine-quality decision-tree using machine-learning tool Accord.Net with F#

 1: 
 2: 
 3: 
 4: 
 5: 
 6: 
 7: 
 8: 
 9: 
10: 
11: 
12: 
13: 
14: 
15: 
16: 
17: 
18: 
19: 
20: 
21: 
22: 
23: 
24: 
25: 
26: 
27: 
28: 
29: 
30: 
31: 
32: 
33: 
34: 
35: 
36: 
37: 
38: 
39: 
40: 
41: 
42: 
43: 
44: 
45: 
46: 
47: 
48: 
49: 
50: 
51: 
52: 
53: 
54: 
55: 
56: 
57: 
58: 
59: 
60: 
61: 
62: 
63: 
64: 
65: 
66: 
67: 
68: 
69: 
70: 
71: 
72: 
73: 
74: 
75: 
76: 
77: 
(*

Install-Package FSharp.Data
Install-Package Accord
Install-Package Accord.MachineLearning
Install-Package Accord.Math
Install-Package Accord.Statistics

*)
#if INTERACTIVE
#I @"./packages"
#r @"FSharp.Data.2.3.3/lib/net40/FSharp.Data.dll"
#r @"Accord.3.7.0/lib/net45/Accord.dll"
#r @"Accord.MachineLearning.3.7.0/lib/net45/Accord.MachineLearning.dll"
#r @"Accord.Math.3.7.0/lib/net45/Accord.Math.Core.dll"
#r @"Accord.Math.3.7.0/lib/net45/Accord.Math.dll"
#r @"Accord.Statistics.3.7.0/lib/net45/Accord.Statistics.dll"
#time
#else
module DecisionTree
#endif

open System
open FSharp.Data
open Accord.MachineLearning.DecisionTrees.Learning

type Wines = CsvProvider<"https://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-red.csv",";",InferRows=2000>
let inputs, output = 
    Wines.Load(@"https://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-red.csv").Rows
    |> Seq.map (fun row -> 
        ([| 
            row.Alcohol;
            row.Chlorides;
            row.``Citric acid``;
            row.Density;
            row.``Fixed acidity``;
            row.``Free sulfur dioxide``;
            row.Sulphates;
            row.PH;
            row.``Residual sugar``;
            row.``Total sulfur dioxide``;
            row.``Volatile acidity``
        |] |> Array.map Convert.ToDouble), row.Quality)
    |> Seq.toArray
    |> Array.unzip

let learner = 
    let cancellationToken, source = 
        let s = new System.Threading.CancellationTokenSource()
        s.Token, s
    
    /// There are multiple algorithms available.
    /// http://scikit-learn.org/stable/_static/ml_map.png
    new Accord.MachineLearning.DecisionTrees.Learning.C45Learning()

/// Teach the model in background thread. This may take some time.
let modelTask = 
    System.Threading.Tasks.Task.Run(fun () ->
        learner.Learn(inputs, output)
    ) |> Async.AwaitTask

// When running background, you could cancel the task:
// source.Cancel()

// For now, let's just run as non-async:
let model = modelTask |> Async.RunSynchronously

// Test with current items. There is no point of course:
// You should split your sample data to two sets, and use the other to train
// the model, and the other to test the accuracy of predictions.
let predicted = model.Decide(inputs)

// Actual vs expected, 10 first ones:
// [1..10] |> List.iter (fun i -> printfn "Actual %d vs %d" output.[i] predicted.[i])

// The classification error
//let err = Accord.Math.Optimization.Losses.ZeroOneLoss(output).Loss(predicted)
namespace System
Multiple items
namespace FSharp

--------------------
namespace Microsoft.FSharp
Multiple items
namespace FSharp.Data

--------------------
namespace Microsoft.FSharp.Data
namespace Accord
namespace Accord.MachineLearning
namespace Accord.MachineLearning.DecisionTrees
namespace Accord.MachineLearning.DecisionTrees.Learning
type Wines = CsvProvider<...>

Full name: Script.Wines
type CsvProvider

Full name: FSharp.Data.CsvProvider


<summary>Typed representation of a CSV file.</summary>
       <param name='Sample'>Location of a CSV sample file or a string containing a sample CSV document.</param>
       <param name='Separators'>Column delimiter(s). Defaults to `,`.</param>
       <param name='InferRows'>Number of rows to use for inference. Defaults to `1000`. If this is zero, all rows are used.</param>
       <param name='Schema'>Optional column types, in a comma separated list. Valid types are `int`, `int64`, `bool`, `float`, `decimal`, `date`, `guid`, `string`, `int?`, `int64?`, `bool?`, `float?`, `decimal?`, `date?`, `guid?`, `int option`, `int64 option`, `bool option`, `float option`, `decimal option`, `date option`, `guid option` and `string option`.
       You can also specify a unit and the name of the column like this: `Name (type&lt;unit&gt;)`, or you can override only the name. If you don't want to specify all the columns, you can reference the columns by name like this: `ColumnName=type`.</param>
       <param name='HasHeaders'>Whether the sample contains the names of the columns as its first line.</param>
       <param name='IgnoreErrors'>Whether to ignore rows that have the wrong number of columns or which can't be parsed using the inferred or specified schema. Otherwise an exception is thrown when these rows are encountered.</param>
       <param name='SkipRows'>SKips the first n rows of the CSV file.</param>
       <param name='AssumeMissingValues'>When set to true, the type provider will assume all columns can have missing values, even if in the provided sample all values are present. Defaults to false.</param>
       <param name='PreferOptionals'>When set to true, inference will prefer to use the option type instead of nullable types, `double.NaN` or `""` for missing values. Defaults to false.</param>
       <param name='Quote'>The quotation mark (for surrounding values containing the delimiter). Defaults to `"`.</param>
       <param name='MissingValues'>The set of strings recogized as missing values. Defaults to `NaN,NA,N/A,#N/A,:,-,TBA,TBD`.</param>
       <param name='CacheRows'>Whether the rows should be caches so they can be iterated multiple times. Defaults to true. Disable for large datasets.</param>
       <param name='Culture'>The culture used for parsing numbers and dates. Defaults to the invariant culture.</param>
       <param name='Encoding'>The encoding used to read the sample. You can specify either the character set name or the codepage number. Defaults to UTF8 for files, and to ISO-8859-1 the for HTTP requests, unless `charset` is specified in the `Content-Type` response header.</param>
       <param name='ResolutionFolder'>A directory that is used when resolving relative file references (at design time and in hosted execution).</param>
       <param name='EmbeddedResource'>When specified, the type provider first attempts to load the sample from the specified resource
          (e.g. 'MyCompany.MyAssembly, resource_name.csv'). This is useful when exposing types generated by the type provider.</param>
val inputs : float [] []

Full name: Script.inputs
val output : int []

Full name: Script.output
CsvProvider<...>.Load(uri: string) : CsvProvider<...>


Loads CSV from the specified uri

CsvProvider<...>.Load(reader: IO.TextReader) : CsvProvider<...>


Loads CSV from the specified reader

CsvProvider<...>.Load(stream: IO.Stream) : CsvProvider<...>


Loads CSV from the specified stream
module Seq

from Microsoft.FSharp.Collections
val map : mapping:('T -> 'U) -> source:seq<'T> -> seq<'U>

Full name: Microsoft.FSharp.Collections.Seq.map
val row : CsvProvider<...>.Row
property CsvProvider<...>.Row.Alcohol: decimal
property CsvProvider<...>.Row.Chlorides: decimal
property CsvProvider<...>.Row.Density: decimal
property CsvProvider<...>.Row.Sulphates: decimal
property CsvProvider<...>.Row.PH: decimal
type Array =
  member Clone : unit -> obj
  member CopyTo : array:Array * index:int -> unit + 1 overload
  member GetEnumerator : unit -> IEnumerator
  member GetLength : dimension:int -> int
  member GetLongLength : dimension:int -> int64
  member GetLowerBound : dimension:int -> int
  member GetUpperBound : dimension:int -> int
  member GetValue : [<ParamArray>] indices:int[] -> obj + 7 overloads
  member Initialize : unit -> unit
  member IsFixedSize : bool
  ...

Full name: System.Array
val map : mapping:('T -> 'U) -> array:'T [] -> 'U []

Full name: Microsoft.FSharp.Collections.Array.map
type Convert =
  static val DBNull : obj
  static member ChangeType : value:obj * typeCode:TypeCode -> obj + 3 overloads
  static member FromBase64CharArray : inArray:char[] * offset:int * length:int -> byte[]
  static member FromBase64String : s:string -> byte[]
  static member GetTypeCode : value:obj -> TypeCode
  static member IsDBNull : value:obj -> bool
  static member ToBase64CharArray : inArray:byte[] * offsetIn:int * length:int * outArray:char[] * offsetOut:int -> int + 1 overload
  static member ToBase64String : inArray:byte[] -> string + 3 overloads
  static member ToBoolean : value:obj -> bool + 17 overloads
  static member ToByte : value:obj -> byte + 18 overloads
  ...

Full name: System.Convert
Convert.ToDouble(value: DateTime) : float
   (+0 other overloads)
Convert.ToDouble(value: bool) : float
   (+0 other overloads)
Convert.ToDouble(value: string) : float
   (+0 other overloads)
Convert.ToDouble(value: decimal) : float
   (+0 other overloads)
Convert.ToDouble(value: float) : float
   (+0 other overloads)
Convert.ToDouble(value: float32) : float
   (+0 other overloads)
Convert.ToDouble(value: uint64) : float
   (+0 other overloads)
Convert.ToDouble(value: int64) : float
   (+0 other overloads)
Convert.ToDouble(value: uint32) : float
   (+0 other overloads)
Convert.ToDouble(value: int) : float
   (+0 other overloads)
property CsvProvider<...>.Row.Quality: int
val toArray : source:seq<'T> -> 'T []

Full name: Microsoft.FSharp.Collections.Seq.toArray
val unzip : array:('T1 * 'T2) [] -> 'T1 [] * 'T2 []

Full name: Microsoft.FSharp.Collections.Array.unzip
val learner : C45Learning

Full name: Script.learner
val cancellationToken : Threading.CancellationToken
val source : Threading.CancellationTokenSource
val s : Threading.CancellationTokenSource
namespace System.Threading
Multiple items
type CancellationTokenSource =
  new : unit -> CancellationTokenSource
  member Cancel : unit -> unit + 1 overload
  member Dispose : unit -> unit
  member IsCancellationRequested : bool
  member Token : CancellationToken
  static member CreateLinkedTokenSource : [<ParamArray>] tokens:CancellationToken[] -> CancellationTokenSource + 1 overload

Full name: System.Threading.CancellationTokenSource

--------------------
Threading.CancellationTokenSource() : unit
property Threading.CancellationTokenSource.Token: Threading.CancellationToken
Multiple items
type C45Learning =
  inherit DecisionTreeLearningBase
  new : unit -> C45Learning + 2 overloads
  member ComputeError : inputs:float[][] * outputs:int[] -> float
  member Learn : x:float[][] * y:int[] * ?weights:float[] -> DecisionTree + 2 overloads
  member MaxVariables : int with get, set
  member Run : inputs:float[][] * outputs:int[] -> float
  member SplitStep : int with get, set

Full name: Accord.MachineLearning.DecisionTrees.Learning.C45Learning

--------------------
C45Learning() : unit
C45Learning(attributes: Accord.MachineLearning.DecisionTrees.DecisionVariable []) : unit
C45Learning(tree: Accord.MachineLearning.DecisionTrees.DecisionTree) : unit
val modelTask : Async<obj>

Full name: Script.modelTask


 There are multiple algorithms available.
 http://scikit-learn.org/stable/_static/ml_map.png
 Teach the model in background thread. This may take some time.
namespace System.Threading.Tasks
Multiple items
type Task<'TResult> =
  inherit Task
  new : function:Func<'TResult> -> Task<'TResult> + 7 overloads
  member ContinueWith : continuationAction:Action<Task<'TResult>> -> Task + 9 overloads
  member Result : 'TResult with get, set
  static member Factory : TaskFactory<'TResult>

Full name: System.Threading.Tasks.Task<_>

--------------------
type Task =
  new : action:Action -> Task + 7 overloads
  member AsyncState : obj
  member ContinueWith : continuationAction:Action<Task> -> Task + 9 overloads
  member CreationOptions : TaskCreationOptions
  member Dispose : unit -> unit
  member Exception : AggregateException
  member Id : int
  member IsCanceled : bool
  member IsCompleted : bool
  member IsFaulted : bool
  ...

Full name: System.Threading.Tasks.Task

--------------------
Threading.Tasks.Task(function: Func<'TResult>) : unit
Threading.Tasks.Task(function: Func<'TResult>, cancellationToken: Threading.CancellationToken) : unit
Threading.Tasks.Task(function: Func<'TResult>, creationOptions: Threading.Tasks.TaskCreationOptions) : unit
Threading.Tasks.Task(function: Func<obj,'TResult>, state: obj) : unit
Threading.Tasks.Task(function: Func<'TResult>, cancellationToken: Threading.CancellationToken, creationOptions: Threading.Tasks.TaskCreationOptions) : unit
Threading.Tasks.Task(function: Func<obj,'TResult>, state: obj, cancellationToken: Threading.CancellationToken) : unit
Threading.Tasks.Task(function: Func<obj,'TResult>, state: obj, creationOptions: Threading.Tasks.TaskCreationOptions) : unit
Threading.Tasks.Task(function: Func<obj,'TResult>, state: obj, cancellationToken: Threading.CancellationToken, creationOptions: Threading.Tasks.TaskCreationOptions) : unit

--------------------
Threading.Tasks.Task(action: Action) : unit
Threading.Tasks.Task(action: Action, cancellationToken: Threading.CancellationToken) : unit
Threading.Tasks.Task(action: Action, creationOptions: Threading.Tasks.TaskCreationOptions) : unit
Threading.Tasks.Task(action: Action<obj>, state: obj) : unit
Threading.Tasks.Task(action: Action, cancellationToken: Threading.CancellationToken, creationOptions: Threading.Tasks.TaskCreationOptions) : unit
Threading.Tasks.Task(action: Action<obj>, state: obj, cancellationToken: Threading.CancellationToken) : unit
Threading.Tasks.Task(action: Action<obj>, state: obj, creationOptions: Threading.Tasks.TaskCreationOptions) : unit
Threading.Tasks.Task(action: Action<obj>, state: obj, cancellationToken: Threading.CancellationToken, creationOptions: Threading.Tasks.TaskCreationOptions) : unit
C45Learning.Learn(x: int [] [], y: int [], ?weights: float []) : Accord.MachineLearning.DecisionTrees.DecisionTree
C45Learning.Learn(x: Nullable<int> [] [], y: int [], ?weights: float []) : Accord.MachineLearning.DecisionTrees.DecisionTree
C45Learning.Learn(x: float [] [], y: int [], ?weights: float []) : Accord.MachineLearning.DecisionTrees.DecisionTree
Multiple items
type Async
static member AsBeginEnd : computation:('Arg -> Async<'T>) -> ('Arg * AsyncCallback * obj -> IAsyncResult) * (IAsyncResult -> 'T) * (IAsyncResult -> unit)
static member AwaitEvent : event:IEvent<'Del,'T> * ?cancelAction:(unit -> unit) -> Async<'T> (requires delegate and 'Del :> Delegate)
static member AwaitIAsyncResult : iar:IAsyncResult * ?millisecondsTimeout:int -> Async<bool>
static member AwaitTask : task:Task -> Async<unit>
static member AwaitTask : task:Task<'T> -> Async<'T>
static member AwaitWaitHandle : waitHandle:WaitHandle * ?millisecondsTimeout:int -> Async<bool>
static member CancelDefaultToken : unit -> unit
static member Catch : computation:Async<'T> -> Async<Choice<'T,exn>>
static member FromBeginEnd : beginAction:(AsyncCallback * obj -> IAsyncResult) * endAction:(IAsyncResult -> 'T) * ?cancelAction:(unit -> unit) -> Async<'T>
static member FromBeginEnd : arg:'Arg1 * beginAction:('Arg1 * AsyncCallback * obj -> IAsyncResult) * endAction:(IAsyncResult -> 'T) * ?cancelAction:(unit -> unit) -> Async<'T>
static member FromBeginEnd : arg1:'Arg1 * arg2:'Arg2 * beginAction:('Arg1 * 'Arg2 * AsyncCallback * obj -> IAsyncResult) * endAction:(IAsyncResult -> 'T) * ?cancelAction:(unit -> unit) -> Async<'T>
static member FromBeginEnd : arg1:'Arg1 * arg2:'Arg2 * arg3:'Arg3 * beginAction:('Arg1 * 'Arg2 * 'Arg3 * AsyncCallback * obj -> IAsyncResult) * endAction:(IAsyncResult -> 'T) * ?cancelAction:(unit -> unit) -> Async<'T>
static member FromContinuations : callback:(('T -> unit) * (exn -> unit) * (OperationCanceledException -> unit) -> unit) -> Async<'T>
static member Ignore : computation:Async<'T> -> Async<unit>
static member OnCancel : interruption:(unit -> unit) -> Async<IDisposable>
static member Parallel : computations:seq<Async<'T>> -> Async<'T []>
static member RunSynchronously : computation:Async<'T> * ?timeout:int * ?cancellationToken:CancellationToken -> 'T
static member Sleep : millisecondsDueTime:int -> Async<unit>
static member Start : computation:Async<unit> * ?cancellationToken:CancellationToken -> unit
static member StartAsTask : computation:Async<'T> * ?taskCreationOptions:TaskCreationOptions * ?cancellationToken:CancellationToken -> Task<'T>
static member StartChild : computation:Async<'T> * ?millisecondsTimeout:int -> Async<Async<'T>>
static member StartChildAsTask : computation:Async<'T> * ?taskCreationOptions:TaskCreationOptions -> Async<Task<'T>>
static member StartImmediate : computation:Async<unit> * ?cancellationToken:CancellationToken -> unit
static member StartWithContinuations : computation:Async<'T> * continuation:('T -> unit) * exceptionContinuation:(exn -> unit) * cancellationContinuation:(OperationCanceledException -> unit) * ?cancellationToken:CancellationToken -> unit
static member SwitchToContext : syncContext:SynchronizationContext -> Async<unit>
static member SwitchToNewThread : unit -> Async<unit>
static member SwitchToThreadPool : unit -> Async<unit>
static member TryCancelled : computation:Async<'T> * compensation:(OperationCanceledException -> unit) -> Async<'T>
static member CancellationToken : Async<CancellationToken>
static member DefaultCancellationToken : CancellationToken

Full name: Microsoft.FSharp.Control.Async

--------------------
type Async<'T>

Full name: Microsoft.FSharp.Control.Async<_>
static member Async.AwaitTask : task:Threading.Tasks.Task -> Async<unit>
static member Async.AwaitTask : task:Threading.Tasks.Task<'T> -> Async<'T>
val model : obj

Full name: Script.model
static member Async.RunSynchronously : computation:Async<'T> * ?timeout:int * ?cancellationToken:Threading.CancellationToken -> 'T
val predicted : obj

Full name: Script.predicted
Next Version Raw view Test code New version

More information

Link:http://fssnip.net/7Tz
Posted:7 years ago
Author:Tuomas Hietanen
Tags: machine learning