2 people like it.
Like the snippet!
Sentiment Classification and Cross-Validation with ML.Net - Sample - Using Anonymous Records
ML.Net sentiment classification and cross-validation example using Gradient Boosted trees Needs to be compiled in a dotnet core F# project. Uses F# 4.6 anonymous records which work well with the ML.Net API static api
1:
2:
3:
4:
5:
6:
7:
8:
9:
10:
11:
12:
13:
14:
15:
16:
17:
18:
19:
20:
21:
22:
23:
24:
25:
26:
27:
28:
29:
30:
31:
32:
33:
34:
35:
36:
37:
38:
39:
40:
41:
42:
43:
44:
45:
46:
47:
48:
49:
50:
51:
52:
53:
54:
55:
56:
57:
58:
59:
60:
61:
62:
63:
64:
65:
66:
67:
68:
69:
70:
71:
72:
73:
74:
75:
76:
77:
78:
79:
80:
81:
82:
83:
84:
85:
86:
87:
88:
89:
90:
91:
92:
93:
94:
95:
96:
97:
98:
99:
100:
101:
102:
103:
104:
105:
106:
107:
108:
109:
110:
111:
112:
113:
114:
115:
116:
117:
118:
119:
120:
121:
122:
123:
|
module SampleModelP
open System
open Microsoft.ML
open Microsoft.ML.StaticPipe
open Microsoft.ML.Data.IO
open System.IO
(*
Contrast two snippets of code to highlight usefulness of
F# anonymous records.
Evquivalent code, based on F# 4.5 syntax, is here: http://fssnip.net/7VS
The marker '//***' is used to explain anonmyous record usage
in comments below
*)
module Train =
(*
Data:
download test / train datasets from here:
https://github.com/dotnet/machinelearning/blob/master/test/data/wikipedia-detox-250-line-data.tsv
*)
//*** change this for your run
let dataPath = @"C:\s\repodata\hpto\train.txt"
//train the model using the supplied hyperparameters
//and print its performance results
let trainModel (hp:{|Trees:int; Leaves:int; LearningRate:float |}) = //*** anonymous rec as function args
let ctx = MLContext(Nullable 10)
let reader =
TextLoaderStatic.CreateReader(
ctx,
(fun (c:TextLoaderStatic.Context) ->
{| //***
Label=c.LoadBool(0) //create anonymous record
Text =c.LoadText(1) //with labeled fields
|}),
separator = '\t',
hasHeader = true)
let trainData = reader.Read(dataPath)
let pipeline =
(reader :> SchemaBearing<_>).MakeNewEstimator() //*** upcasting required here as inference does not work
.Append(fun a_rec ->
let features = a_rec.Text.FeaturizeText() //*** get field of a-rec by name
let score = // (no need to deconstruct tuple)
ctx.BinaryClassification.Trainers.FastTree(
a_rec.Label, //*** same here
features,
numTrees= hp.Trees,
numLeaves = hp.Leaves,
learningRate = hp.LearningRate,
minDatapointsInLeaves=20
)
{| a_rec with //*** extend a_rec to include
Features = features // features and score values
Score = score
|})
//perform 5-fold crossvalidation and print results to gauge model performance
let metrics = ctx.BinaryClassification.CrossValidate(trainData, pipeline, (fun x->x.Label) , numFolds=5)
let m = metrics |> Seq.map(fun struct(m,a,b)->m.Auc) |> Seq.average
printfn "trees=%d, leaves=%d, lr=%f -> %f" hp.Trees hp.Leaves hp.LearningRate m
{| Metric=m; Model=pipeline; Data=trainData|} //*** return anonymous record with info needed for prediction
//generate predictions from the model
let pred() =
let hp = {|Trees=50; Leaves=50; LearningRate=0.1 |} //*** model hyperparameters
let modelInfo = trainModel hp //play with hyperparameters to get better performance results
//try various values for #trees #leaves and learning rate
//alternatively try a hyperparameter optimization framework
//e.g. https://github.com/fwaris/hpopt
let mdl = modelInfo.Model.Fit(modelInfo.Data) //fit model to data using the chosen hyperparameters
let predictions = mdl.Transform(modelInfo.Data) //generate predictions using same data for now
//in reality you will likely generate predcitions
//a record at a time with new data as
//it becomes available
//code below saves predictions to a text file
let ctx = MLContext(Nullable 10)
let txa = TextSaver.Arguments()
txa.OutputHeader <-true
txa.OutputSchema <- true
let tx = TextSaver(ctx,txa)
use fn = File.Create(@"C:\repodata\hpopt\t1.txt")
let s = predictions.AsDynamic.Schema //*** *very useful*
// field names from anonymous records are
// are preserved in the schema for the data
for c in 0 .. s.Count-1 do //*** print field names - these come from
printfn "%A - %A" (s.Item(c).Name) (s.[c].Type) // anonymous records used before
//actual field names printed by code:
//"Label" - Bool
//"Text" - Text
//"Features" - Vec<R4, 9141>
//"PredictedLabel" - Bool
//"Score" - R4
//"Probability" - R4
//"Score.Item1" - R4
//"Score.Item2" - R4
//"Score.Item3" - Bool
tx.SaveData(fn,predictions.AsDynamic,0,3,4) //save selected fields from the schema to text file
//Note: Here (for now) you have to switch to dynamic DataView
//and use indices for the columns you want output
|
module SampleModelP
namespace System
namespace Microsoft
Multiple items
namespace System.Data
--------------------
namespace Microsoft.FSharp.Data
namespace System.IO
module Train
from SampleModelP
val dataPath : string
Full name: SampleModelP.Train.dataPath
val trainModel : 'a -> 'b -> 'c
Full name: SampleModelP.Train.trainModel
Multiple items
val int : value:'T -> int (requires member op_Explicit)
Full name: Microsoft.FSharp.Core.Operators.int
--------------------
type int = int32
Full name: Microsoft.FSharp.Core.int
--------------------
type int<'Measure> = int
Full name: Microsoft.FSharp.Core.int<_>
Multiple items
val float : value:'T -> float (requires member op_Explicit)
Full name: Microsoft.FSharp.Core.Operators.float
--------------------
type float = Double
Full name: Microsoft.FSharp.Core.float
--------------------
type float<'Measure> = float
Full name: Microsoft.FSharp.Core.float<_>
Multiple items
type Nullable =
static member Compare<'T> : n1:Nullable<'T> * n2:Nullable<'T> -> int
static member Equals<'T> : n1:Nullable<'T> * n2:Nullable<'T> -> bool
static member GetUnderlyingType : nullableType:Type -> Type
Full name: System.Nullable
--------------------
type Nullable<'T (requires default constructor and value type and 'T :> ValueType)> =
struct
new : value:'T -> Nullable<'T>
member Equals : other:obj -> bool
member GetHashCode : unit -> int
member GetValueOrDefault : unit -> 'T + 1 overload
member HasValue : bool
member ToString : unit -> string
member Value : 'T
end
Full name: System.Nullable<_>
--------------------
Nullable()
Nullable(value: 'T) : unit
namespace System.Text
val trainData : obj
Full name: SampleModelP.Train.trainData
val pipeline : obj
Full name: SampleModelP.Train.pipeline
val metrics : seq<obj>
Full name: SampleModelP.Train.metrics
val m : float
Full name: SampleModelP.Train.m
module Seq
from Microsoft.FSharp.Collections
val map : mapping:('T -> 'U) -> source:seq<'T> -> seq<'U>
Full name: Microsoft.FSharp.Collections.Seq.map
val average : source:seq<'T> -> 'T (requires member ( + ) and member DivideByInt and member get_Zero)
Full name: Microsoft.FSharp.Collections.Seq.average
val printfn : format:Printf.TextWriterFormat<'T> -> 'T
Full name: Microsoft.FSharp.Core.ExtraTopLevelOperators.printfn
type File =
static member AppendAllLines : path:string * contents:IEnumerable<string> -> unit + 1 overload
static member AppendAllText : path:string * contents:string -> unit + 1 overload
static member AppendText : path:string -> StreamWriter
static member Copy : sourceFileName:string * destFileName:string -> unit + 1 overload
static member Create : path:string -> FileStream + 3 overloads
static member CreateText : path:string -> StreamWriter
static member Decrypt : path:string -> unit
static member Delete : path:string -> unit
static member Encrypt : path:string -> unit
static member Exists : path:string -> bool
...
Full name: System.IO.File
File.Create(path: string) : FileStream
File.Create(path: string, bufferSize: int) : FileStream
File.Create(path: string, bufferSize: int, options: FileOptions) : FileStream
File.Create(path: string, bufferSize: int, options: FileOptions, fileSecurity: Security.AccessControl.FileSecurity) : FileStream
type Type =
inherit MemberInfo
member Assembly : Assembly
member AssemblyQualifiedName : string
member Attributes : TypeAttributes
member BaseType : Type
member ContainsGenericParameters : bool
member DeclaringMethod : MethodBase
member DeclaringType : Type
member Equals : o:obj -> bool + 1 overload
member FindInterfaces : filter:TypeFilter * filterCriteria:obj -> Type[]
member FindMembers : memberType:MemberTypes * bindingAttr:BindingFlags * filter:MemberFilter * filterCriteria:obj -> MemberInfo[]
...
Full name: System.Type
More information