3 people like it.

Grouping Data Into Buckets Using Fsharp

Sample code snippet to group the data into buckets

 1: 
 2: 
 3: 
 4: 
 5: 
 6: 
 7: 
 8: 
 9: 
10: 
11: 
12: 
13: 
14: 
15: 
16: 
17: 
18: 
19: 
20: 
21: 
22: 
23: 
24: 
25: 
26: 
27: 
28: 
29: 
30: 
31: 
32: 
33: 
34: 
35: 
36: 
37: 
38: 
39: 
40: 
41: 
42: 
43: 
44: 
45: 
open System.Text.RegularExpressions

let (|Regex|_|) pattern input =
    let m = Regex.Match(input, pattern)
    if m.Success then Some(List.tail [ for g in m.Groups -> g.Value ])
    else None   

let scores = [9;10;1;34;45;26;78;100;93]

type BucketRule = { Label : string; Rule : int -> bool}

type Filter = 
    | GreaterThan of int 
    | Between of int * int 
    | LessThan of int

let createFilter filterString =
    match filterString with    
    | Regex "^>(\d+)$" [min] -> GreaterThan(int min)
    | Regex "^(\d+)-(\d+)$" [min;max] -> Between(int min,int max)
    | Regex "^<(\d+)$" [max] -> LessThan(int max)
    | _ -> failwith "Invalid Filter"
    
let createPredicate = function
    | GreaterThan min -> fun n -> n > min
    | Between (min, max) -> fun n -> n >= min && n <= max
    | LessThan max -> fun n -> n < max
    
let createRule = createFilter >> createPredicate   
    
let createBucketRule filterString =
    { Label = filterString; Rule = createRule filterString }

let bucketRules = ["<21";"21-40";"41-60";">61"] 

let createBucket numbers bucketRule =
    let bucketContent = numbers |> List.filter bucketRule.Rule
    (bucketRule.Label, bucketContent)
        
let bucketify numbers filterStrings  =
    filterStrings 
    |> List.map createBucketRule
    |> List.map (createBucket numbers)
    
let buckets = bucketify scores bucketRules 
namespace System
namespace System.Text
namespace System.Text.RegularExpressions
Multiple items
type Regex =
  new : pattern:string -> Regex + 1 overload
  member GetGroupNames : unit -> string[]
  member GetGroupNumbers : unit -> int[]
  member GroupNameFromNumber : i:int -> string
  member GroupNumberFromName : name:string -> int
  member IsMatch : input:string -> bool + 1 overload
  member Match : input:string -> Match + 2 overloads
  member Matches : input:string -> MatchCollection + 1 overload
  member Options : RegexOptions
  member Replace : input:string * replacement:string -> string + 5 overloads
  ...

Full name: System.Text.RegularExpressions.Regex

--------------------
Regex(pattern: string) : unit
Regex(pattern: string, options: RegexOptions) : unit
val pattern : string
val input : string
val m : Match
Regex.Match(input: string, pattern: string) : Match
Regex.Match(input: string, pattern: string, options: RegexOptions) : Match
property Group.Success: bool
union case Option.Some: Value: 'T -> Option<'T>
Multiple items
module List

from Microsoft.FSharp.Collections

--------------------
type List<'T> =
  | ( [] )
  | ( :: ) of Head: 'T * Tail: 'T list
  interface IEnumerable
  interface IEnumerable<'T>
  member Head : 'T
  member IsEmpty : bool
  member Item : index:int -> 'T with get
  member Length : int
  member Tail : 'T list
  static member Cons : head:'T * tail:'T list -> 'T list
  static member Empty : 'T list

Full name: Microsoft.FSharp.Collections.List<_>
val tail : list:'T list -> 'T list

Full name: Microsoft.FSharp.Collections.List.tail
val g : Group
property Match.Groups: GroupCollection
property Capture.Value: string
union case Option.None: Option<'T>
val scores : int list

Full name: Script.scores
type BucketRule =
  {Label: string;
   Rule: int -> bool;}

Full name: Script.BucketRule
BucketRule.Label: string
Multiple items
val string : value:'T -> string

Full name: Microsoft.FSharp.Core.Operators.string

--------------------
type string = System.String

Full name: Microsoft.FSharp.Core.string
BucketRule.Rule: int -> bool
Multiple items
val int : value:'T -> int (requires member op_Explicit)

Full name: Microsoft.FSharp.Core.Operators.int

--------------------
type int = int32

Full name: Microsoft.FSharp.Core.int

--------------------
type int<'Measure> = int

Full name: Microsoft.FSharp.Core.int<_>
type bool = System.Boolean

Full name: Microsoft.FSharp.Core.bool
type Filter =
  | GreaterThan of int
  | Between of int * int
  | LessThan of int

Full name: Script.Filter
union case Filter.GreaterThan: int -> Filter
union case Filter.Between: int * int -> Filter
union case Filter.LessThan: int -> Filter
val createFilter : filterString:string -> Filter

Full name: Script.createFilter
val filterString : string
Multiple items
active recognizer Regex: string -> string -> string list option

Full name: Script.( |Regex|_| )

--------------------
type Regex =
  new : pattern:string -> Regex + 1 overload
  member GetGroupNames : unit -> string[]
  member GetGroupNumbers : unit -> int[]
  member GroupNameFromNumber : i:int -> string
  member GroupNumberFromName : name:string -> int
  member IsMatch : input:string -> bool + 1 overload
  member Match : input:string -> Match + 2 overloads
  member Matches : input:string -> MatchCollection + 1 overload
  member Options : RegexOptions
  member Replace : input:string * replacement:string -> string + 5 overloads
  ...

Full name: System.Text.RegularExpressions.Regex

--------------------
Regex(pattern: string) : unit
Regex(pattern: string, options: RegexOptions) : unit
val min : string
val max : string
val failwith : message:string -> 'T

Full name: Microsoft.FSharp.Core.Operators.failwith
val createPredicate : _arg1:Filter -> (int -> bool)

Full name: Script.createPredicate
val min : int
val n : int
val max : int
val createRule : (string -> int -> bool)

Full name: Script.createRule
val createBucketRule : filterString:string -> BucketRule

Full name: Script.createBucketRule
val bucketRules : string list

Full name: Script.bucketRules
val createBucket : numbers:int list -> bucketRule:BucketRule -> string * int list

Full name: Script.createBucket
val numbers : int list
val bucketRule : BucketRule
val bucketContent : int list
val filter : predicate:('T -> bool) -> list:'T list -> 'T list

Full name: Microsoft.FSharp.Collections.List.filter
val bucketify : numbers:int list -> filterStrings:string list -> (string * int list) list

Full name: Script.bucketify
val filterStrings : string list
val map : mapping:('T -> 'U) -> list:'T list -> 'U list

Full name: Microsoft.FSharp.Collections.List.map
val buckets : (string * int list) list

Full name: Script.buckets
Next Version Raw view Test code New version

More information

Link:http://fssnip.net/qZ
Posted:8 years ago
Author:Tamizhvendan
Tags: active patterns , pattern matching , pipeline operator