2 people like it.

Fast(er) Set creation

Set creation can be quite slow for large sets (> 15000ish string items). If input sequence to create the set is sorted then some optimizations can be applied. For even larger unordered sets (> 30000ish string items) it can be faster doing an up front sort on the data, and then using the Set creation method as described. 1) Set.union is very fast when the greatest element in one of the sets is less than the smallest element in the other; basically becoming an O(1) operation. And Set.add is faster for smaller sets than larger sets, given O(log2 n) of the add operation. So when we have ordered data, makings lots of smaller sets from the stream and union-ing them together can provide a performance boost. 2) On top of the method described in (1), because all the sets are immutable inputs and outputs, then they can be partitioned off onto Tasks to perform the set creation in parallel. If you are using Newtonsoft's Json.net, then provided is a JsonConverter that can be added to the serializer to use this for Set creation like: serializer.Converters.Add Newtonsoft.fastFSharpSetConverter

 1: 
 2: 
 3: 
 4: 
 5: 
 6: 
 7: 
 8: 
 9: 
10: 
11: 
12: 
13: 
14: 
15: 
16: 
17: 
18: 
19: 
20: 
21: 
22: 
23: 
24: 
25: 
26: 
27: 
28: 
29: 
30: 
31: 
32: 
33: 
34: 
35: 
36: 
37: 
38: 
39: 
40: 
41: 
42: 
43: 
44: 
45: 
46: 
47: 
48: 
49: 
50: 
51: 
52: 
53: 
54: 
55: 
56: 
57: 
58: 
59: 
60: 
61: 
62: 
63: 
64: 
65: 
66: 
67: 
68: 
module Set =
    open System.Threading.Tasks

    [<Literal>]
    let private SUB_SET_SIZE = 150

    let ofSortedSeq (sortedItems:seq<_>) =
        let whenAll a b f =
            let waitingTask = Task.WhenAll [|a; b|]
            waitingTask.ContinueWith (fun _ -> f ())

        let rec partialUnion = function
        | [_:Task<_>] :: _ as original -> original
        | [[a;b]]        -> [] :: [whenAll a b (fun () -> Set.union a.Result b.Result)] :: []
        | [a;b]::[]::tl  -> [] :: [whenAll a b (fun () -> Set.union a.Result b.Result)] :: tl
        | [a;b]::[c]::tl -> [] :: (partialUnion ([whenAll a b (fun () -> Set.union a.Result b.Result); c] :: tl))
        | _              -> failwith "Unexpected state"

        let rec combinePartialUnion (combined:Task<_>) = function
        | [] -> combined.Result
        | [] :: tl -> combinePartialUnion combined tl
        | (a:Task<_> :: tl1) :: tl2 -> combinePartialUnion (whenAll combined a (fun () -> Set.union combined.Result a.Result)) (tl1 :: tl2)

        let enumerator =
            sortedItems.GetEnumerator ()

        let rec addNext current counter results =
            let next =
                enumerator.Current :: current

            let createSubSet () =
                Task.Run (fun () -> Set.ofSeq next)

            match enumerator.MoveNext (), results with
            | false, _                            -> combinePartialUnion (createSubSet ()) results
            | true, _ when counter < SUB_SET_SIZE -> addNext next (counter+1) results                      
            | true, []::tl                        -> addNext []   0           ([createSubSet ()]::tl)                 
            | true, [a]::tl                       -> addNext []   0           (partialUnion ([a;createSubSet ()]::tl))
            | _                                   -> failwith "Unexpected state"

        if enumerator.MoveNext () 
            then addNext [] 0 ([[]])
            else Set.empty

    let ofSeqViaSort (items:seq<_>) =
        items
        |> Seq.sort
        |> fun sorted -> ofSortedSeq sorted


module Newtonsoft =
    open System.Reflection
    open Newtonsoft.Json

    type private fastFSharpSetHelper<'f when 'f : comparison>() =
        static member readJson (reader:JsonReader, serializer:JsonSerializer) =
            serializer.Deserialize<ResizeArray<'f>> (reader)
            |> Set.ofSortedSeq

    let fastFSharpSetConverter = {
        new JsonConverter() with
            override __.CanConvert t                        = t.IsGenericType && t.GetGenericTypeDefinition() = typedefof<Set<_>>
            override __.CanWrite                            = false
            override __.WriteJson (_, _, _)                 = raise <| System.NotImplementedException "CanWrite = false"
            override __.ReadJson (reader, t, _, serializer) =
                typedefof<fastFSharpSetHelper<_>>.MakeGenericType (t.GetGenericArguments())
                |> fun converter -> converter.GetMethod ("readJson", BindingFlags.Static ||| BindingFlags.NonPublic)
                |> fun readJson -> readJson.Invoke (null, [| reader; serializer |]) }
Multiple items
module Set

from Microsoft.FSharp.Collections

--------------------
type Set<'T (requires comparison)> =
  interface IComparable
  interface IEnumerable
  interface IEnumerable<'T>
  interface ICollection<'T>
  new : elements:seq<'T> -> Set<'T>
  member Add : value:'T -> Set<'T>
  member Contains : value:'T -> bool
  override Equals : obj -> bool
  member IsProperSubsetOf : otherSet:Set<'T> -> bool
  member IsProperSupersetOf : otherSet:Set<'T> -> bool
  ...

Full name: Microsoft.FSharp.Collections.Set<_>

--------------------
new : elements:seq<'T> -> Set<'T>
namespace System
namespace System.Threading
namespace System.Threading.Tasks
Multiple items
type LiteralAttribute =
  inherit Attribute
  new : unit -> LiteralAttribute

Full name: Microsoft.FSharp.Core.LiteralAttribute

--------------------
new : unit -> LiteralAttribute
val private SUB_SET_SIZE : int

Full name: Script.Set.SUB_SET_SIZE
val ofSortedSeq : sortedItems:seq<'a> -> Set<'b> (requires comparison)

Full name: Script.Set.ofSortedSeq
val sortedItems : seq<'a>
Multiple items
val seq : sequence:seq<'T> -> seq<'T>

Full name: Microsoft.FSharp.Core.Operators.seq

--------------------
type seq<'T> = System.Collections.Generic.IEnumerable<'T>

Full name: Microsoft.FSharp.Collections.seq<_>
val whenAll : ('c -> 'd -> 'e -> 'f)
val a : 'c
val b : 'd
val f : 'e
val waitingTask : obj
Multiple items
type Task =
  new : action:Action -> Task + 7 overloads
  member AsyncState : obj
  member ContinueWith : continuationAction:Action<Task> -> Task + 9 overloads
  member CreationOptions : TaskCreationOptions
  member Dispose : unit -> unit
  member Exception : AggregateException
  member Id : int
  member IsCanceled : bool
  member IsCompleted : bool
  member IsFaulted : bool
  ...

Full name: System.Threading.Tasks.Task

--------------------
type Task<'TResult> =
  inherit Task
  new : function:Func<'TResult> -> Task<'TResult> + 7 overloads
  member ContinueWith : continuationAction:Action<Task<'TResult>> -> Task + 9 overloads
  member Result : 'TResult with get, set
  static member Factory : TaskFactory<'TResult>

Full name: System.Threading.Tasks.Task<_>

--------------------
Task(action: System.Action) : unit
Task(action: System.Action, cancellationToken: System.Threading.CancellationToken) : unit
Task(action: System.Action, creationOptions: TaskCreationOptions) : unit
Task(action: System.Action<obj>, state: obj) : unit
Task(action: System.Action, cancellationToken: System.Threading.CancellationToken, creationOptions: TaskCreationOptions) : unit
Task(action: System.Action<obj>, state: obj, cancellationToken: System.Threading.CancellationToken) : unit
Task(action: System.Action<obj>, state: obj, creationOptions: TaskCreationOptions) : unit
Task(action: System.Action<obj>, state: obj, cancellationToken: System.Threading.CancellationToken, creationOptions: TaskCreationOptions) : unit

--------------------
Task(function: System.Func<'TResult>) : unit
Task(function: System.Func<'TResult>, cancellationToken: System.Threading.CancellationToken) : unit
Task(function: System.Func<'TResult>, creationOptions: TaskCreationOptions) : unit
Task(function: System.Func<obj,'TResult>, state: obj) : unit
Task(function: System.Func<'TResult>, cancellationToken: System.Threading.CancellationToken, creationOptions: TaskCreationOptions) : unit
Task(function: System.Func<obj,'TResult>, state: obj, cancellationToken: System.Threading.CancellationToken) : unit
Task(function: System.Func<obj,'TResult>, state: obj, creationOptions: TaskCreationOptions) : unit
Task(function: System.Func<obj,'TResult>, state: obj, cancellationToken: System.Threading.CancellationToken, creationOptions: TaskCreationOptions) : unit
val partialUnion : (Task<Set<'c>> list list -> Task<Set<'c>> list list) (requires comparison)
val original : Task<Set<'c>> list list (requires comparison)
val a : Task<Set<'c>> (requires comparison)
val b : Task<Set<'c>> (requires comparison)
val union : set1:Set<'T> -> set2:Set<'T> -> Set<'T> (requires comparison)

Full name: Microsoft.FSharp.Collections.Set.union
property Task.Result: Set<'c>
val tl : Task<Set<'c>> list list (requires comparison)
val c : Task<Set<'c>> (requires comparison)
val failwith : message:string -> 'T

Full name: Microsoft.FSharp.Core.Operators.failwith
val combinePartialUnion : (Task<Set<'c>> -> Task<Set<'c>> list list -> Set<'c>) (requires comparison)
val combined : Task<Set<'c>> (requires comparison)
val tl1 : Task<Set<'c>> list (requires comparison)
val tl2 : Task<Set<'c>> list list (requires comparison)
val enumerator : System.Collections.Generic.IEnumerator<'a>
System.Collections.Generic.IEnumerable.GetEnumerator() : System.Collections.Generic.IEnumerator<'a>
val addNext : ('a list -> int -> Task<Set<'c>> list list -> Set<'c>) (requires comparison)
val current : 'a list
val counter : int
val results : Task<Set<'c>> list list (requires comparison)
val next : 'a list
property System.Collections.Generic.IEnumerator.Current: 'a
val createSubSet : (unit -> 'd)
val ofSeq : elements:seq<'T> -> Set<'T> (requires comparison)

Full name: Microsoft.FSharp.Collections.Set.ofSeq
System.Collections.IEnumerator.MoveNext() : bool
val empty<'T (requires comparison)> : Set<'T> (requires comparison)

Full name: Microsoft.FSharp.Collections.Set.empty
val ofSeqViaSort : items:seq<'a> -> Set<'b> (requires comparison and comparison)

Full name: Script.Set.ofSeqViaSort
val items : seq<'a> (requires comparison)
module Seq

from Microsoft.FSharp.Collections
val sort : source:seq<'T> -> seq<'T> (requires comparison)

Full name: Microsoft.FSharp.Collections.Seq.sort
val sorted : seq<'a> (requires comparison)
Multiple items
module Newtonsoft

from Script

--------------------
namespace Newtonsoft
namespace System.Reflection
namespace Newtonsoft
namespace Newtonsoft.Json
Multiple items
type private fastFSharpSetHelper<'f (requires comparison)> =
  new : unit -> fastFSharpSetHelper<'f>
  static member readJson : reader:JsonReader * serializer:JsonSerializer -> Set<'a> (requires comparison)

Full name: Script.Newtonsoft.fastFSharpSetHelper<_>

--------------------
private new : unit -> fastFSharpSetHelper<'f>
static member private fastFSharpSetHelper.readJson : reader:JsonReader * serializer:JsonSerializer -> Set<'a> (requires comparison)

Full name: Script.Newtonsoft.fastFSharpSetHelper`1.readJson
val reader : JsonReader
type JsonReader =
  member Close : unit -> unit
  member CloseInput : bool with get, set
  member Culture : CultureInfo with get, set
  member DateFormatString : string with get, set
  member DateParseHandling : DateParseHandling with get, set
  member DateTimeZoneHandling : DateTimeZoneHandling with get, set
  member Depth : int
  member FloatParseHandling : FloatParseHandling with get, set
  member MaxDepth : Nullable<int> with get, set
  member Path : string
  ...

Full name: Newtonsoft.Json.JsonReader
val serializer : JsonSerializer
Multiple items
type JsonSerializer =
  new : unit -> JsonSerializer
  member Binder : SerializationBinder with get, set
  member CheckAdditionalContent : bool with get, set
  member ConstructorHandling : ConstructorHandling with get, set
  member Context : StreamingContext with get, set
  member ContractResolver : IContractResolver with get, set
  member Converters : JsonConverterCollection
  member Culture : CultureInfo with get, set
  member DateFormatHandling : DateFormatHandling with get, set
  member DateFormatString : string with get, set
  ...

Full name: Newtonsoft.Json.JsonSerializer

--------------------
JsonSerializer() : unit
JsonSerializer.Deserialize<'T>(reader: JsonReader) : 'T
JsonSerializer.Deserialize(reader: JsonReader) : obj
JsonSerializer.Deserialize(reader: JsonReader, objectType: System.Type) : obj
JsonSerializer.Deserialize(reader: System.IO.TextReader, objectType: System.Type) : obj
type ResizeArray<'T> = System.Collections.Generic.List<'T>

Full name: Microsoft.FSharp.Collections.ResizeArray<_>
Multiple items
module Set

from Script

--------------------
module Set

from Microsoft.FSharp.Collections

--------------------
type Set<'T (requires comparison)> =
  interface IComparable
  interface IEnumerable
  interface IEnumerable<'T>
  interface ICollection<'T>
  new : elements:seq<'T> -> Set<'T>
  member Add : value:'T -> Set<'T>
  member Contains : value:'T -> bool
  override Equals : obj -> bool
  member IsProperSubsetOf : otherSet:Set<'T> -> bool
  member IsProperSupersetOf : otherSet:Set<'T> -> bool
  ...

Full name: Microsoft.FSharp.Collections.Set<_>

--------------------
new : elements:seq<'T> -> Set<'T>
val fastFSharpSetConverter : JsonConverter

Full name: Script.Newtonsoft.fastFSharpSetConverter
Multiple items
type JsonConverter =
  member CanConvert : objectType:Type -> bool
  member CanRead : bool
  member CanWrite : bool
  member GetSchema : unit -> JsonSchema
  member ReadJson : reader:JsonReader * objectType:Type * existingValue:obj * serializer:JsonSerializer -> obj
  member WriteJson : writer:JsonWriter * value:obj * serializer:JsonSerializer -> unit

Full name: Newtonsoft.Json.JsonConverter

--------------------
type JsonConverterAttribute =
  inherit Attribute
  new : converterType:Type -> JsonConverterAttribute + 1 overload
  member ConverterParameters : obj[] with get, set
  member ConverterType : Type

Full name: Newtonsoft.Json.JsonConverterAttribute

--------------------
JsonConverter() : unit

--------------------
JsonConverterAttribute(converterType: System.Type) : unit
JsonConverterAttribute(converterType: System.Type, [<System.ParamArray>] converterParameters: obj []) : unit
val t : System.Type
property System.Type.IsGenericType: bool
System.Type.GetGenericTypeDefinition() : System.Type
val typedefof<'T> : System.Type

Full name: Microsoft.FSharp.Core.Operators.typedefof
val __ : JsonConverter
property JsonConverter.CanWrite: bool
JsonConverter.WriteJson(writer: JsonWriter, value: obj, serializer: JsonSerializer) : unit
val raise : exn:System.Exception -> 'T

Full name: Microsoft.FSharp.Core.Operators.raise
Multiple items
type NotImplementedException =
  inherit SystemException
  new : unit -> NotImplementedException + 2 overloads

Full name: System.NotImplementedException

--------------------
System.NotImplementedException() : unit
System.NotImplementedException(message: string) : unit
System.NotImplementedException(message: string, inner: exn) : unit
JsonConverter.ReadJson(reader: JsonReader, objectType: System.Type, existingValue: obj, serializer: JsonSerializer) : obj
System.Type.GetGenericArguments() : System.Type []
val converter : System.Type
System.Type.GetMethod(name: string) : MethodInfo
System.Type.GetMethod(name: string, bindingAttr: BindingFlags) : MethodInfo
System.Type.GetMethod(name: string, types: System.Type []) : MethodInfo
System.Type.GetMethod(name: string, types: System.Type [], modifiers: ParameterModifier []) : MethodInfo
System.Type.GetMethod(name: string, bindingAttr: BindingFlags, binder: Binder, types: System.Type [], modifiers: ParameterModifier []) : MethodInfo
System.Type.GetMethod(name: string, bindingAttr: BindingFlags, binder: Binder, callConvention: CallingConventions, types: System.Type [], modifiers: ParameterModifier []) : MethodInfo
type BindingFlags =
  | Default = 0
  | IgnoreCase = 1
  | DeclaredOnly = 2
  | Instance = 4
  | Static = 8
  | Public = 16
  | NonPublic = 32
  | FlattenHierarchy = 64
  | InvokeMethod = 256
  | CreateInstance = 512
  ...

Full name: System.Reflection.BindingFlags
field BindingFlags.Static = 8
field BindingFlags.NonPublic = 32
val readJson : MethodInfo
MethodBase.Invoke(obj: obj, parameters: obj []) : obj
MethodBase.Invoke(obj: obj, invokeAttr: BindingFlags, binder: Binder, parameters: obj [], culture: System.Globalization.CultureInfo) : obj
Next Version Raw view Test code New version

More information

Link:http://fssnip.net/rs
Posted:9 years ago
Author:manofstick
Tags: set , performance , speed , newtonsoft json , json