2 people like it.
Like the snippet!
Fast(er) Set creation
Set creation can be quite slow for large sets (> 15000ish string items). If input sequence to create the set is sorted then some optimizations can be applied. For even larger unordered sets (> 30000ish string items) it can be faster doing an up front sort on the data, and then using the Set creation method as described.
1) Set.union is very fast when the greatest element in one of the sets is less than the smallest element in the other; basically becoming an O(1) operation. And Set.add is faster for smaller sets than larger sets, given O(log2 n) of the add operation. So when we have ordered data, makings lots of smaller sets from the stream and union-ing them together can provide a performance boost.
2) On top of the method described in (1), because all the sets are immutable inputs and outputs, then they can be partitioned off onto Tasks to perform the set creation in parallel.
If you are using Newtonsoft's Json.net, then provided is a JsonConverter that can be added to the serializer to use this for Set creation like:
serializer.Converters.Add Newtonsoft.fastFSharpSetConverter
1:
2:
3:
4:
5:
6:
7:
8:
9:
10:
11:
12:
13:
14:
15:
16:
17:
18:
19:
20:
21:
22:
23:
24:
25:
26:
27:
28:
29:
30:
31:
32:
33:
34:
35:
36:
37:
38:
39:
40:
41:
42:
43:
44:
45:
46:
47:
48:
49:
50:
51:
52:
53:
54:
55:
56:
57:
58:
59:
60:
61:
62:
63:
64:
65:
66:
67:
68:
|
module Set =
open System.Threading.Tasks
[<Literal>]
let private SUB_SET_SIZE = 150
let ofSortedSeq (sortedItems:seq<_>) =
let whenAll a b f =
let waitingTask = Task.WhenAll [|a; b|]
waitingTask.ContinueWith (fun _ -> f ())
let rec partialUnion = function
| [_:Task<_>] :: _ as original -> original
| [[a;b]] -> [] :: [whenAll a b (fun () -> Set.union a.Result b.Result)] :: []
| [a;b]::[]::tl -> [] :: [whenAll a b (fun () -> Set.union a.Result b.Result)] :: tl
| [a;b]::[c]::tl -> [] :: (partialUnion ([whenAll a b (fun () -> Set.union a.Result b.Result); c] :: tl))
| _ -> failwith "Unexpected state"
let rec combinePartialUnion (combined:Task<_>) = function
| [] -> combined.Result
| [] :: tl -> combinePartialUnion combined tl
| (a:Task<_> :: tl1) :: tl2 -> combinePartialUnion (whenAll combined a (fun () -> Set.union combined.Result a.Result)) (tl1 :: tl2)
let enumerator =
sortedItems.GetEnumerator ()
let rec addNext current counter results =
let next =
enumerator.Current :: current
let createSubSet () =
Task.Run (fun () -> Set.ofSeq next)
match enumerator.MoveNext (), results with
| false, _ -> combinePartialUnion (createSubSet ()) results
| true, _ when counter < SUB_SET_SIZE -> addNext next (counter+1) results
| true, []::tl -> addNext [] 0 ([createSubSet ()]::tl)
| true, [a]::tl -> addNext [] 0 (partialUnion ([a;createSubSet ()]::tl))
| _ -> failwith "Unexpected state"
if enumerator.MoveNext ()
then addNext [] 0 ([[]])
else Set.empty
let ofSeqViaSort (items:seq<_>) =
items
|> Seq.sort
|> fun sorted -> ofSortedSeq sorted
module Newtonsoft =
open System.Reflection
open Newtonsoft.Json
type private fastFSharpSetHelper<'f when 'f : comparison>() =
static member readJson (reader:JsonReader, serializer:JsonSerializer) =
serializer.Deserialize<ResizeArray<'f>> (reader)
|> Set.ofSortedSeq
let fastFSharpSetConverter = {
new JsonConverter() with
override __.CanConvert t = t.IsGenericType && t.GetGenericTypeDefinition() = typedefof<Set<_>>
override __.CanWrite = false
override __.WriteJson (_, _, _) = raise <| System.NotImplementedException "CanWrite = false"
override __.ReadJson (reader, t, _, serializer) =
typedefof<fastFSharpSetHelper<_>>.MakeGenericType (t.GetGenericArguments())
|> fun converter -> converter.GetMethod ("readJson", BindingFlags.Static ||| BindingFlags.NonPublic)
|> fun readJson -> readJson.Invoke (null, [| reader; serializer |]) }
|
Multiple items
module Set
from Microsoft.FSharp.Collections
--------------------
type Set<'T (requires comparison)> =
interface IComparable
interface IEnumerable
interface IEnumerable<'T>
interface ICollection<'T>
new : elements:seq<'T> -> Set<'T>
member Add : value:'T -> Set<'T>
member Contains : value:'T -> bool
override Equals : obj -> bool
member IsProperSubsetOf : otherSet:Set<'T> -> bool
member IsProperSupersetOf : otherSet:Set<'T> -> bool
...
Full name: Microsoft.FSharp.Collections.Set<_>
--------------------
new : elements:seq<'T> -> Set<'T>
namespace System
namespace System.Threading
namespace System.Threading.Tasks
Multiple items
type LiteralAttribute =
inherit Attribute
new : unit -> LiteralAttribute
Full name: Microsoft.FSharp.Core.LiteralAttribute
--------------------
new : unit -> LiteralAttribute
val private SUB_SET_SIZE : int
Full name: Script.Set.SUB_SET_SIZE
val ofSortedSeq : sortedItems:seq<'a> -> Set<'b> (requires comparison)
Full name: Script.Set.ofSortedSeq
val sortedItems : seq<'a>
Multiple items
val seq : sequence:seq<'T> -> seq<'T>
Full name: Microsoft.FSharp.Core.Operators.seq
--------------------
type seq<'T> = System.Collections.Generic.IEnumerable<'T>
Full name: Microsoft.FSharp.Collections.seq<_>
val whenAll : ('c -> 'd -> 'e -> 'f)
val a : 'c
val b : 'd
val f : 'e
val waitingTask : obj
Multiple items
type Task =
new : action:Action -> Task + 7 overloads
member AsyncState : obj
member ContinueWith : continuationAction:Action<Task> -> Task + 9 overloads
member CreationOptions : TaskCreationOptions
member Dispose : unit -> unit
member Exception : AggregateException
member Id : int
member IsCanceled : bool
member IsCompleted : bool
member IsFaulted : bool
...
Full name: System.Threading.Tasks.Task
--------------------
type Task<'TResult> =
inherit Task
new : function:Func<'TResult> -> Task<'TResult> + 7 overloads
member ContinueWith : continuationAction:Action<Task<'TResult>> -> Task + 9 overloads
member Result : 'TResult with get, set
static member Factory : TaskFactory<'TResult>
Full name: System.Threading.Tasks.Task<_>
--------------------
Task(action: System.Action) : unit
Task(action: System.Action, cancellationToken: System.Threading.CancellationToken) : unit
Task(action: System.Action, creationOptions: TaskCreationOptions) : unit
Task(action: System.Action<obj>, state: obj) : unit
Task(action: System.Action, cancellationToken: System.Threading.CancellationToken, creationOptions: TaskCreationOptions) : unit
Task(action: System.Action<obj>, state: obj, cancellationToken: System.Threading.CancellationToken) : unit
Task(action: System.Action<obj>, state: obj, creationOptions: TaskCreationOptions) : unit
Task(action: System.Action<obj>, state: obj, cancellationToken: System.Threading.CancellationToken, creationOptions: TaskCreationOptions) : unit
--------------------
Task(function: System.Func<'TResult>) : unit
Task(function: System.Func<'TResult>, cancellationToken: System.Threading.CancellationToken) : unit
Task(function: System.Func<'TResult>, creationOptions: TaskCreationOptions) : unit
Task(function: System.Func<obj,'TResult>, state: obj) : unit
Task(function: System.Func<'TResult>, cancellationToken: System.Threading.CancellationToken, creationOptions: TaskCreationOptions) : unit
Task(function: System.Func<obj,'TResult>, state: obj, cancellationToken: System.Threading.CancellationToken) : unit
Task(function: System.Func<obj,'TResult>, state: obj, creationOptions: TaskCreationOptions) : unit
Task(function: System.Func<obj,'TResult>, state: obj, cancellationToken: System.Threading.CancellationToken, creationOptions: TaskCreationOptions) : unit
val partialUnion : (Task<Set<'c>> list list -> Task<Set<'c>> list list) (requires comparison)
val original : Task<Set<'c>> list list (requires comparison)
val a : Task<Set<'c>> (requires comparison)
val b : Task<Set<'c>> (requires comparison)
val union : set1:Set<'T> -> set2:Set<'T> -> Set<'T> (requires comparison)
Full name: Microsoft.FSharp.Collections.Set.union
property Task.Result: Set<'c>
val tl : Task<Set<'c>> list list (requires comparison)
val c : Task<Set<'c>> (requires comparison)
val failwith : message:string -> 'T
Full name: Microsoft.FSharp.Core.Operators.failwith
val combinePartialUnion : (Task<Set<'c>> -> Task<Set<'c>> list list -> Set<'c>) (requires comparison)
val combined : Task<Set<'c>> (requires comparison)
val tl1 : Task<Set<'c>> list (requires comparison)
val tl2 : Task<Set<'c>> list list (requires comparison)
val enumerator : System.Collections.Generic.IEnumerator<'a>
System.Collections.Generic.IEnumerable.GetEnumerator() : System.Collections.Generic.IEnumerator<'a>
val addNext : ('a list -> int -> Task<Set<'c>> list list -> Set<'c>) (requires comparison)
val current : 'a list
val counter : int
val results : Task<Set<'c>> list list (requires comparison)
val next : 'a list
property System.Collections.Generic.IEnumerator.Current: 'a
val createSubSet : (unit -> 'd)
val ofSeq : elements:seq<'T> -> Set<'T> (requires comparison)
Full name: Microsoft.FSharp.Collections.Set.ofSeq
System.Collections.IEnumerator.MoveNext() : bool
val empty<'T (requires comparison)> : Set<'T> (requires comparison)
Full name: Microsoft.FSharp.Collections.Set.empty
val ofSeqViaSort : items:seq<'a> -> Set<'b> (requires comparison and comparison)
Full name: Script.Set.ofSeqViaSort
val items : seq<'a> (requires comparison)
module Seq
from Microsoft.FSharp.Collections
val sort : source:seq<'T> -> seq<'T> (requires comparison)
Full name: Microsoft.FSharp.Collections.Seq.sort
val sorted : seq<'a> (requires comparison)
Multiple items
module Newtonsoft
from Script
--------------------
namespace Newtonsoft
namespace System.Reflection
namespace Newtonsoft
namespace Newtonsoft.Json
Multiple items
type private fastFSharpSetHelper<'f (requires comparison)> =
new : unit -> fastFSharpSetHelper<'f>
static member readJson : reader:JsonReader * serializer:JsonSerializer -> Set<'a> (requires comparison)
Full name: Script.Newtonsoft.fastFSharpSetHelper<_>
--------------------
private new : unit -> fastFSharpSetHelper<'f>
static member private fastFSharpSetHelper.readJson : reader:JsonReader * serializer:JsonSerializer -> Set<'a> (requires comparison)
Full name: Script.Newtonsoft.fastFSharpSetHelper`1.readJson
val reader : JsonReader
type JsonReader =
member Close : unit -> unit
member CloseInput : bool with get, set
member Culture : CultureInfo with get, set
member DateFormatString : string with get, set
member DateParseHandling : DateParseHandling with get, set
member DateTimeZoneHandling : DateTimeZoneHandling with get, set
member Depth : int
member FloatParseHandling : FloatParseHandling with get, set
member MaxDepth : Nullable<int> with get, set
member Path : string
...
Full name: Newtonsoft.Json.JsonReader
val serializer : JsonSerializer
Multiple items
type JsonSerializer =
new : unit -> JsonSerializer
member Binder : SerializationBinder with get, set
member CheckAdditionalContent : bool with get, set
member ConstructorHandling : ConstructorHandling with get, set
member Context : StreamingContext with get, set
member ContractResolver : IContractResolver with get, set
member Converters : JsonConverterCollection
member Culture : CultureInfo with get, set
member DateFormatHandling : DateFormatHandling with get, set
member DateFormatString : string with get, set
...
Full name: Newtonsoft.Json.JsonSerializer
--------------------
JsonSerializer() : unit
JsonSerializer.Deserialize<'T>(reader: JsonReader) : 'T
JsonSerializer.Deserialize(reader: JsonReader) : obj
JsonSerializer.Deserialize(reader: JsonReader, objectType: System.Type) : obj
JsonSerializer.Deserialize(reader: System.IO.TextReader, objectType: System.Type) : obj
type ResizeArray<'T> = System.Collections.Generic.List<'T>
Full name: Microsoft.FSharp.Collections.ResizeArray<_>
Multiple items
module Set
from Script
--------------------
module Set
from Microsoft.FSharp.Collections
--------------------
type Set<'T (requires comparison)> =
interface IComparable
interface IEnumerable
interface IEnumerable<'T>
interface ICollection<'T>
new : elements:seq<'T> -> Set<'T>
member Add : value:'T -> Set<'T>
member Contains : value:'T -> bool
override Equals : obj -> bool
member IsProperSubsetOf : otherSet:Set<'T> -> bool
member IsProperSupersetOf : otherSet:Set<'T> -> bool
...
Full name: Microsoft.FSharp.Collections.Set<_>
--------------------
new : elements:seq<'T> -> Set<'T>
val fastFSharpSetConverter : JsonConverter
Full name: Script.Newtonsoft.fastFSharpSetConverter
Multiple items
type JsonConverter =
member CanConvert : objectType:Type -> bool
member CanRead : bool
member CanWrite : bool
member GetSchema : unit -> JsonSchema
member ReadJson : reader:JsonReader * objectType:Type * existingValue:obj * serializer:JsonSerializer -> obj
member WriteJson : writer:JsonWriter * value:obj * serializer:JsonSerializer -> unit
Full name: Newtonsoft.Json.JsonConverter
--------------------
type JsonConverterAttribute =
inherit Attribute
new : converterType:Type -> JsonConverterAttribute + 1 overload
member ConverterParameters : obj[] with get, set
member ConverterType : Type
Full name: Newtonsoft.Json.JsonConverterAttribute
--------------------
JsonConverter() : unit
--------------------
JsonConverterAttribute(converterType: System.Type) : unit
JsonConverterAttribute(converterType: System.Type, [<System.ParamArray>] converterParameters: obj []) : unit
val t : System.Type
property System.Type.IsGenericType: bool
System.Type.GetGenericTypeDefinition() : System.Type
val typedefof<'T> : System.Type
Full name: Microsoft.FSharp.Core.Operators.typedefof
val __ : JsonConverter
property JsonConverter.CanWrite: bool
JsonConverter.WriteJson(writer: JsonWriter, value: obj, serializer: JsonSerializer) : unit
val raise : exn:System.Exception -> 'T
Full name: Microsoft.FSharp.Core.Operators.raise
Multiple items
type NotImplementedException =
inherit SystemException
new : unit -> NotImplementedException + 2 overloads
Full name: System.NotImplementedException
--------------------
System.NotImplementedException() : unit
System.NotImplementedException(message: string) : unit
System.NotImplementedException(message: string, inner: exn) : unit
JsonConverter.ReadJson(reader: JsonReader, objectType: System.Type, existingValue: obj, serializer: JsonSerializer) : obj
System.Type.GetGenericArguments() : System.Type []
val converter : System.Type
System.Type.GetMethod(name: string) : MethodInfo
System.Type.GetMethod(name: string, bindingAttr: BindingFlags) : MethodInfo
System.Type.GetMethod(name: string, types: System.Type []) : MethodInfo
System.Type.GetMethod(name: string, types: System.Type [], modifiers: ParameterModifier []) : MethodInfo
System.Type.GetMethod(name: string, bindingAttr: BindingFlags, binder: Binder, types: System.Type [], modifiers: ParameterModifier []) : MethodInfo
System.Type.GetMethod(name: string, bindingAttr: BindingFlags, binder: Binder, callConvention: CallingConventions, types: System.Type [], modifiers: ParameterModifier []) : MethodInfo
type BindingFlags =
| Default = 0
| IgnoreCase = 1
| DeclaredOnly = 2
| Instance = 4
| Static = 8
| Public = 16
| NonPublic = 32
| FlattenHierarchy = 64
| InvokeMethod = 256
| CreateInstance = 512
...
Full name: System.Reflection.BindingFlags
field BindingFlags.Static = 8
field BindingFlags.NonPublic = 32
val readJson : MethodInfo
MethodBase.Invoke(obj: obj, parameters: obj []) : obj
MethodBase.Invoke(obj: obj, invokeAttr: BindingFlags, binder: Binder, parameters: obj [], culture: System.Globalization.CultureInfo) : obj
More information