8 people like it.
Like the snippet!
Using machine learning tool Accord.Net from F#
This example uses the same data and methods as
http://accord-framework.net/docs/html/T_Accord_Statistics_Models_Regression_LogisticRegression.htm
1:
2:
3:
4:
5:
6:
7:
8:
9:
10:
11:
12:
13:
14:
15:
16:
17:
18:
19:
20:
21:
22:
23:
24:
25:
26:
27:
28:
29:
30:
31:
32:
33:
34:
35:
36:
37:
38:
39:
40:
41:
42:
43:
44:
45:
46:
47:
48:
49:
50:
51:
52:
53:
54:
55:
56:
57:
58:
59:
60:
61:
62:
63:
64:
65:
66:
67:
68:
69:
70:
71:
72:
73:
74:
75:
76:
77:
78:
79:
80:
81:
82:
83:
84:
85:
86:
87:
88:
89:
90:
91:
92:
93:
94:
95:
96:
97:
98:
99:
100:
101:
102:
|
#I @"./packages"
#r @"FSharp.Data.2.3.2/lib/net40/FSharp.Data.dll"
#r @"Accord.3.4.0/lib/net45/Accord.dll"
#r @"Accord.MachineLearning.3.4.0/lib/net45/Accord.MachineLearning.dll"
#r @"Accord.Math.3.4.0/lib/net45/Accord.Math.Core.dll"
#r @"Accord.Math.3.4.0/lib/net45/Accord.Math.dll"
#r @"Accord.Statistics.3.4.0/lib/net45/Accord.Statistics.dll"
open System
open FSharp.Data
// We have some sample data that we already know the results
// and use that to teach the machine:
(* sample.csv data content:
Age,Smokes,Had cancer
55,0,false
28,0,false
65,1,false
46,0,true
86,1,true
56,1,true
85,0,false
33,0,false
21,1,false
42,1,true
*)
#time
open Accord.Statistics.Models.Regression
open Accord.Statistics.Models.Regression.Fitting
type People = CsvProvider<"sample.csv",",",InferRows=2000>
let inputs, output =
People.Load(@"sample.csv").Rows
|> Seq.map (fun row -> [|float row.Age; row.Smokes |> Convert.ToDouble|], row.``Had cancer``)
|> Seq.toArray
|> Array.unzip
type Observation = People.Row
[<StructuredFormatDisplay("{AsString}")>]
type Feature = string * (Observation -> int)
let features:Feature[] = [|
"Age",(fun obs -> obs.Age)
"Smokes",(fun obs -> obs.Smokes |> Convert.ToInt32)
|]
let learner =
let cancellationToken, source =
let s = new System.Threading.CancellationTokenSource()
s.Token, s
/// There are multiple algorithms available.
/// For example:
IterativeReweightedLeastSquares<LogisticRegression>(
Tolerance = 1e-4,
Iterations = 1000,
Regularization = 0.0,
Token=cancellationToken
)
// Another one would be:
// let alg = LogisticRegression(NumberOfInputs = (features |> Seq.length))
// LogisticGradientDescent(alg,
// Tolerance = 0.001,
// Iterations = 100000,
// Token=cancellationToken)
/// Teach the model in background thread. This may take some time.
let modelTask =
System.Threading.Tasks.Task.Run(fun () ->
learner.Learn(inputs, output)
) |> Async.AwaitTask
// When running background, you could cancel the task:
//source.Cancel()
// For now, let's just run as non-async:
let model= modelTask |> Async.RunSynchronously
// Print odds ratios:
features |> Seq.mapi(fun idx f ->
let name = fst f
let odds = model.GetOddsRatio(idx+1)
name,odds )
|> Seq.sortBy snd
|> Seq.iter (printfn "%A")
// Output:
//("Age", 1.020859703)
//("Smokes", 5.858474898)
// Print estimated linear regression formula:
let formula = model.Linear.ToString()
// Output:
// "y(x0, x1) = 0.0206451183100222*x0 + 1.76788931343272*x1 + -2.45774643623285"
// Test with current items. There is no point of course:
// You should split your sample data to two sets, and use the other to train
// the model, and the other to test the accuracy of predictions.
let items = model.Decide(inputs) |> Array.map Convert.ToDouble
// [|0.0; 0.0; 1.0; 0.0; 1.0; 1.0; 0.0; 0.0; 0.0; 1.0|]
|
namespace System
Multiple items
namespace FSharp
--------------------
namespace Microsoft.FSharp
Multiple items
namespace FSharp.Data
--------------------
namespace Microsoft.FSharp.Data
namespace Accord
namespace Accord.Statistics
namespace Accord.Statistics.Models
namespace Accord.Statistics.Models.Regression
namespace Accord.Statistics.Models.Regression.Fitting
type People = CsvProvider<...>
Full name: Script.People
type CsvProvider
Full name: FSharp.Data.CsvProvider
<summary>Typed representation of a CSV file.</summary>
<param name='Sample'>Location of a CSV sample file or a string containing a sample CSV document.</param>
<param name='Separators'>Column delimiter(s). Defaults to `,`.</param>
<param name='InferRows'>Number of rows to use for inference. Defaults to `1000`. If this is zero, all rows are used.</param>
<param name='Schema'>Optional column types, in a comma separated list. Valid types are `int`, `int64`, `bool`, `float`, `decimal`, `date`, `guid`, `string`, `int?`, `int64?`, `bool?`, `float?`, `decimal?`, `date?`, `guid?`, `int option`, `int64 option`, `bool option`, `float option`, `decimal option`, `date option`, `guid option` and `string option`.
You can also specify a unit and the name of the column like this: `Name (type<unit>)`, or you can override only the name. If you don't want to specify all the columns, you can reference the columns by name like this: `ColumnName=type`.</param>
<param name='HasHeaders'>Whether the sample contains the names of the columns as its first line.</param>
<param name='IgnoreErrors'>Whether to ignore rows that have the wrong number of columns or which can't be parsed using the inferred or specified schema. Otherwise an exception is thrown when these rows are encountered.</param>
<param name='SkipRows'>SKips the first n rows of the CSV file.</param>
<param name='AssumeMissingValues'>When set to true, the type provider will assume all columns can have missing values, even if in the provided sample all values are present. Defaults to false.</param>
<param name='PreferOptionals'>When set to true, inference will prefer to use the option type instead of nullable types, `double.NaN` or `""` for missing values. Defaults to false.</param>
<param name='Quote'>The quotation mark (for surrounding values containing the delimiter). Defaults to `"`.</param>
<param name='MissingValues'>The set of strings recogized as missing values. Defaults to `NaN,NA,N/A,#N/A,:,-,TBA,TBD`.</param>
<param name='CacheRows'>Whether the rows should be caches so they can be iterated multiple times. Defaults to true. Disable for large datasets.</param>
<param name='Culture'>The culture used for parsing numbers and dates. Defaults to the invariant culture.</param>
<param name='Encoding'>The encoding used to read the sample. You can specify either the character set name or the codepage number. Defaults to UTF8 for files, and to ISO-8859-1 the for HTTP requests, unless `charset` is specified in the `Content-Type` response header.</param>
<param name='ResolutionFolder'>A directory that is used when resolving relative file references (at design time and in hosted execution).</param>
<param name='EmbeddedResource'>When specified, the type provider first attempts to load the sample from the specified resource
(e.g. 'MyCompany.MyAssembly, resource_name.csv'). This is useful when exposing types generated by the type provider.</param>
val inputs : float [] []
Full name: Script.inputs
val output : obj []
Full name: Script.output
CsvProvider<...>.Load(uri: string) : CsvProvider<...>
Loads CSV from the specified uri
CsvProvider<...>.Load(reader: IO.TextReader) : CsvProvider<...>
Loads CSV from the specified reader
CsvProvider<...>.Load(stream: IO.Stream) : CsvProvider<...>
Loads CSV from the specified stream
module Seq
from Microsoft.FSharp.Collections
val map : mapping:('T -> 'U) -> source:seq<'T> -> seq<'U>
Full name: Microsoft.FSharp.Collections.Seq.map
val row : CsvProvider<...>.Row
Multiple items
val float : value:'T -> float (requires member op_Explicit)
Full name: Microsoft.FSharp.Core.Operators.float
--------------------
type float = Double
Full name: Microsoft.FSharp.Core.float
--------------------
type float<'Measure> = float
Full name: Microsoft.FSharp.Core.float<_>
type Convert =
static val DBNull : obj
static member ChangeType : value:obj * typeCode:TypeCode -> obj + 3 overloads
static member FromBase64CharArray : inArray:char[] * offset:int * length:int -> byte[]
static member FromBase64String : s:string -> byte[]
static member GetTypeCode : value:obj -> TypeCode
static member IsDBNull : value:obj -> bool
static member ToBase64CharArray : inArray:byte[] * offsetIn:int * length:int * outArray:char[] * offsetOut:int -> int + 1 overload
static member ToBase64String : inArray:byte[] -> string + 3 overloads
static member ToBoolean : value:obj -> bool + 17 overloads
static member ToByte : value:obj -> byte + 18 overloads
...
Full name: System.Convert
Convert.ToDouble(value: DateTime) : float
(+0 other overloads)
Convert.ToDouble(value: bool) : float
(+0 other overloads)
Convert.ToDouble(value: string) : float
(+0 other overloads)
Convert.ToDouble(value: decimal) : float
(+0 other overloads)
Convert.ToDouble(value: float) : float
(+0 other overloads)
Convert.ToDouble(value: float32) : float
(+0 other overloads)
Convert.ToDouble(value: uint64) : float
(+0 other overloads)
Convert.ToDouble(value: int64) : float
(+0 other overloads)
Convert.ToDouble(value: uint32) : float
(+0 other overloads)
Convert.ToDouble(value: int) : float
(+0 other overloads)
val toArray : source:seq<'T> -> 'T []
Full name: Microsoft.FSharp.Collections.Seq.toArray
type Array =
member Clone : unit -> obj
member CopyTo : array:Array * index:int -> unit + 1 overload
member GetEnumerator : unit -> IEnumerator
member GetLength : dimension:int -> int
member GetLongLength : dimension:int -> int64
member GetLowerBound : dimension:int -> int
member GetUpperBound : dimension:int -> int
member GetValue : [<ParamArray>] indices:int[] -> obj + 7 overloads
member Initialize : unit -> unit
member IsFixedSize : bool
...
Full name: System.Array
val unzip : array:('T1 * 'T2) [] -> 'T1 [] * 'T2 []
Full name: Microsoft.FSharp.Collections.Array.unzip
type Observation = CsvProvider<...>.Row
Full name: Script.Observation
type Row =
inherit Tuple<string>
new : sampleCsv: string -> Row
member Item1 : string
member ``Sample.csv`` : string
member ``System.ITuple.Size`` : int
Full name: FSharp.Data.CsvProvider,Sample="sample.csv",Separators=",",InferRows="2000".Row
Multiple items
type StructuredFormatDisplayAttribute =
inherit Attribute
new : value:string -> StructuredFormatDisplayAttribute
member Value : string
Full name: Microsoft.FSharp.Core.StructuredFormatDisplayAttribute
--------------------
new : value:string -> StructuredFormatDisplayAttribute
type Feature = string * (Observation -> int)
Full name: Script.Feature
Multiple items
val string : value:'T -> string
Full name: Microsoft.FSharp.Core.Operators.string
--------------------
type string = String
Full name: Microsoft.FSharp.Core.string
Multiple items
val int : value:'T -> int (requires member op_Explicit)
Full name: Microsoft.FSharp.Core.Operators.int
--------------------
type int = int32
Full name: Microsoft.FSharp.Core.int
--------------------
type int<'Measure> = int
Full name: Microsoft.FSharp.Core.int<_>
val features : Feature []
Full name: Script.features
val obs : Observation
Convert.ToInt32(value: DateTime) : int
(+0 other overloads)
Convert.ToInt32(value: string) : int
(+0 other overloads)
Convert.ToInt32(value: decimal) : int
(+0 other overloads)
Convert.ToInt32(value: float) : int
(+0 other overloads)
Convert.ToInt32(value: float32) : int
(+0 other overloads)
Convert.ToInt32(value: uint64) : int
(+0 other overloads)
Convert.ToInt32(value: int64) : int
(+0 other overloads)
Convert.ToInt32(value: int) : int
(+0 other overloads)
Convert.ToInt32(value: uint32) : int
(+0 other overloads)
Convert.ToInt32(value: uint16) : int
(+0 other overloads)
val learner : IterativeReweightedLeastSquares<LogisticRegression>
Full name: Script.learner
val cancellationToken : Threading.CancellationToken
val source : Threading.CancellationTokenSource
val s : Threading.CancellationTokenSource
namespace System.Threading
Multiple items
type CancellationTokenSource =
new : unit -> CancellationTokenSource
member Cancel : unit -> unit + 1 overload
member Dispose : unit -> unit
member IsCancellationRequested : bool
member Token : CancellationToken
static member CreateLinkedTokenSource : [<ParamArray>] tokens:CancellationToken[] -> CancellationTokenSource + 1 overload
Full name: System.Threading.CancellationTokenSource
--------------------
Threading.CancellationTokenSource() : unit
property Threading.CancellationTokenSource.Token: Threading.CancellationToken
Multiple items
type IterativeReweightedLeastSquares =
inherit IterativeReweightedLeastSquares<GeneralizedLinearRegression>
new : regression:LogisticRegression -> IterativeReweightedLeastSquares + 1 overload
member ComputeError : inputs:float[][] * outputs:float[] -> float
member Run : inputs:float[][] * outputs:int[] -> float + 6 overloads
Full name: Accord.Statistics.Models.Regression.Fitting.IterativeReweightedLeastSquares
--------------------
type IterativeReweightedLeastSquares<'TModel (requires default constructor and 'TModel :> GeneralizedLinearRegression)> =
new : unit -> IterativeReweightedLeastSquares<'TModel>
member ComputeStandardErrors : bool with get, set
member GetInformationMatrix : unit -> float[][]
member Gradient : float[]
member Hessian : float[][]
member Iterations : int with get, set
member Learn : x:float[][] * y:int[] * ?weights:float[] -> 'TModel + 2 overloads
member Model : 'TModel with get, set
member Parameters : int
member Previous : float[]
...
Full name: Accord.Statistics.Models.Regression.Fitting.IterativeReweightedLeastSquares<_>
--------------------
IterativeReweightedLeastSquares(regression: LogisticRegression) : unit
IterativeReweightedLeastSquares(regression: GeneralizedLinearRegression) : unit
--------------------
IterativeReweightedLeastSquares() : unit
Multiple items
type LogisticRegression =
inherit GeneralizedLinearRegression
new : unit -> LogisticRegression + 2 overloads
member GetConfidenceInterval : index:int -> DoubleRange
member GetOddsRatio : index:int -> float
static member FromWeights : weights:float[] -> LogisticRegression + 1 overload
Full name: Accord.Statistics.Models.Regression.LogisticRegression
--------------------
LogisticRegression() : unit
val modelTask : Async<obj>
Full name: Script.modelTask
There are multiple algorithms available.
For example:
Teach the model in background thread. This may take some time.
namespace System.Threading.Tasks
Multiple items
type Task<'TResult> =
inherit Task
new : function:Func<'TResult> -> Task<'TResult> + 7 overloads
member ContinueWith : continuationAction:Action<Task<'TResult>> -> Task + 9 overloads
member Result : 'TResult with get, set
static member Factory : TaskFactory<'TResult>
Full name: System.Threading.Tasks.Task<_>
--------------------
type Task =
new : action:Action -> Task + 7 overloads
member AsyncState : obj
member ContinueWith : continuationAction:Action<Task> -> Task + 9 overloads
member CreationOptions : TaskCreationOptions
member Dispose : unit -> unit
member Exception : AggregateException
member Id : int
member IsCanceled : bool
member IsCompleted : bool
member IsFaulted : bool
...
Full name: System.Threading.Tasks.Task
--------------------
Threading.Tasks.Task(function: Func<'TResult>) : unit
Threading.Tasks.Task(function: Func<'TResult>, cancellationToken: Threading.CancellationToken) : unit
Threading.Tasks.Task(function: Func<'TResult>, creationOptions: Threading.Tasks.TaskCreationOptions) : unit
Threading.Tasks.Task(function: Func<obj,'TResult>, state: obj) : unit
Threading.Tasks.Task(function: Func<'TResult>, cancellationToken: Threading.CancellationToken, creationOptions: Threading.Tasks.TaskCreationOptions) : unit
Threading.Tasks.Task(function: Func<obj,'TResult>, state: obj, cancellationToken: Threading.CancellationToken) : unit
Threading.Tasks.Task(function: Func<obj,'TResult>, state: obj, creationOptions: Threading.Tasks.TaskCreationOptions) : unit
Threading.Tasks.Task(function: Func<obj,'TResult>, state: obj, cancellationToken: Threading.CancellationToken, creationOptions: Threading.Tasks.TaskCreationOptions) : unit
--------------------
Threading.Tasks.Task(action: Action) : unit
Threading.Tasks.Task(action: Action, cancellationToken: Threading.CancellationToken) : unit
Threading.Tasks.Task(action: Action, creationOptions: Threading.Tasks.TaskCreationOptions) : unit
Threading.Tasks.Task(action: Action<obj>, state: obj) : unit
Threading.Tasks.Task(action: Action, cancellationToken: Threading.CancellationToken, creationOptions: Threading.Tasks.TaskCreationOptions) : unit
Threading.Tasks.Task(action: Action<obj>, state: obj, cancellationToken: Threading.CancellationToken) : unit
Threading.Tasks.Task(action: Action<obj>, state: obj, creationOptions: Threading.Tasks.TaskCreationOptions) : unit
Threading.Tasks.Task(action: Action<obj>, state: obj, cancellationToken: Threading.CancellationToken, creationOptions: Threading.Tasks.TaskCreationOptions) : unit
IterativeReweightedLeastSquares.Learn(x: float [] [], y: float [], ?weights: float []) : LogisticRegression
IterativeReweightedLeastSquares.Learn(x: float [] [], y: bool [], ?weights: float []) : LogisticRegression
IterativeReweightedLeastSquares.Learn(x: float [] [], y: int [], ?weights: float []) : LogisticRegression
Multiple items
type Async
static member AsBeginEnd : computation:('Arg -> Async<'T>) -> ('Arg * AsyncCallback * obj -> IAsyncResult) * (IAsyncResult -> 'T) * (IAsyncResult -> unit)
static member AwaitEvent : event:IEvent<'Del,'T> * ?cancelAction:(unit -> unit) -> Async<'T> (requires delegate and 'Del :> Delegate)
static member AwaitIAsyncResult : iar:IAsyncResult * ?millisecondsTimeout:int -> Async<bool>
static member AwaitTask : task:Task -> Async<unit>
static member AwaitTask : task:Task<'T> -> Async<'T>
static member AwaitWaitHandle : waitHandle:WaitHandle * ?millisecondsTimeout:int -> Async<bool>
static member CancelDefaultToken : unit -> unit
static member Catch : computation:Async<'T> -> Async<Choice<'T,exn>>
static member FromBeginEnd : beginAction:(AsyncCallback * obj -> IAsyncResult) * endAction:(IAsyncResult -> 'T) * ?cancelAction:(unit -> unit) -> Async<'T>
static member FromBeginEnd : arg:'Arg1 * beginAction:('Arg1 * AsyncCallback * obj -> IAsyncResult) * endAction:(IAsyncResult -> 'T) * ?cancelAction:(unit -> unit) -> Async<'T>
static member FromBeginEnd : arg1:'Arg1 * arg2:'Arg2 * beginAction:('Arg1 * 'Arg2 * AsyncCallback * obj -> IAsyncResult) * endAction:(IAsyncResult -> 'T) * ?cancelAction:(unit -> unit) -> Async<'T>
static member FromBeginEnd : arg1:'Arg1 * arg2:'Arg2 * arg3:'Arg3 * beginAction:('Arg1 * 'Arg2 * 'Arg3 * AsyncCallback * obj -> IAsyncResult) * endAction:(IAsyncResult -> 'T) * ?cancelAction:(unit -> unit) -> Async<'T>
static member FromContinuations : callback:(('T -> unit) * (exn -> unit) * (OperationCanceledException -> unit) -> unit) -> Async<'T>
static member Ignore : computation:Async<'T> -> Async<unit>
static member OnCancel : interruption:(unit -> unit) -> Async<IDisposable>
static member Parallel : computations:seq<Async<'T>> -> Async<'T []>
static member RunSynchronously : computation:Async<'T> * ?timeout:int * ?cancellationToken:CancellationToken -> 'T
static member Sleep : millisecondsDueTime:int -> Async<unit>
static member Start : computation:Async<unit> * ?cancellationToken:CancellationToken -> unit
static member StartAsTask : computation:Async<'T> * ?taskCreationOptions:TaskCreationOptions * ?cancellationToken:CancellationToken -> Task<'T>
static member StartChild : computation:Async<'T> * ?millisecondsTimeout:int -> Async<Async<'T>>
static member StartChildAsTask : computation:Async<'T> * ?taskCreationOptions:TaskCreationOptions -> Async<Task<'T>>
static member StartImmediate : computation:Async<unit> * ?cancellationToken:CancellationToken -> unit
static member StartWithContinuations : computation:Async<'T> * continuation:('T -> unit) * exceptionContinuation:(exn -> unit) * cancellationContinuation:(OperationCanceledException -> unit) * ?cancellationToken:CancellationToken -> unit
static member SwitchToContext : syncContext:SynchronizationContext -> Async<unit>
static member SwitchToNewThread : unit -> Async<unit>
static member SwitchToThreadPool : unit -> Async<unit>
static member TryCancelled : computation:Async<'T> * compensation:(OperationCanceledException -> unit) -> Async<'T>
static member CancellationToken : Async<CancellationToken>
static member DefaultCancellationToken : CancellationToken
Full name: Microsoft.FSharp.Control.Async
--------------------
type Async<'T>
Full name: Microsoft.FSharp.Control.Async<_>
static member Async.AwaitTask : task:Threading.Tasks.Task -> Async<unit>
static member Async.AwaitTask : task:Threading.Tasks.Task<'T> -> Async<'T>
val model : obj
Full name: Script.model
static member Async.RunSynchronously : computation:Async<'T> * ?timeout:int * ?cancellationToken:Threading.CancellationToken -> 'T
val mapi : mapping:(int -> 'T -> 'U) -> source:seq<'T> -> seq<'U>
Full name: Microsoft.FSharp.Collections.Seq.mapi
val idx : int
val f : Feature
val name : string
val fst : tuple:('T1 * 'T2) -> 'T1
Full name: Microsoft.FSharp.Core.Operators.fst
val odds : IComparable
val sortBy : projection:('T -> 'Key) -> source:seq<'T> -> seq<'T> (requires comparison)
Full name: Microsoft.FSharp.Collections.Seq.sortBy
val snd : tuple:('T1 * 'T2) -> 'T2
Full name: Microsoft.FSharp.Core.Operators.snd
val iter : action:('T -> unit) -> source:seq<'T> -> unit
Full name: Microsoft.FSharp.Collections.Seq.iter
val printfn : format:Printf.TextWriterFormat<'T> -> 'T
Full name: Microsoft.FSharp.Core.ExtraTopLevelOperators.printfn
val formula : obj
Full name: Script.formula
namespace Accord.Statistics.Models.Regression.Linear
val items : obj []
Full name: Script.items
val map : mapping:('T -> 'U) -> array:'T [] -> 'U []
Full name: Microsoft.FSharp.Collections.Array.map
More information