21 people like it.

sscanf - parsing with format strings

A more complete version of sscanf, as proposed on stackoverflow by kvb: http://stackoverflow.com/questions/2415705/read-from-console-in-f Fixed bug in previous version to work with visual studio '13

 1: 
 2: 
 3: 
 4: 
 5: 
 6: 
 7: 
 8: 
 9: 
10: 
11: 
12: 
13: 
14: 
15: 
16: 
17: 
18: 
19: 
20: 
21: 
22: 
23: 
24: 
25: 
26: 
27: 
28: 
29: 
30: 
31: 
32: 
33: 
34: 
35: 
36: 
37: 
38: 
39: 
40: 
41: 
42: 
43: 
44: 
45: 
46: 
47: 
48: 
49: 
50: 
51: 
52: 
53: 
54: 
55: 
56: 
57: 
58: 
59: 
60: 
61: 
62: 
63: 
64: 
65: 
66: 
67: 
68: 
69: 
70: 
71: 
72: 
73: 
74: 
open System
open System.Text
open System.Text.RegularExpressions
open Microsoft.FSharp.Reflection


let check f x = if f x then x
                else failwithf "format failure \"%s\"" x


let parseDecimal x = Decimal.Parse(x, System.Globalization.CultureInfo.InvariantCulture)


let parsers = dict [
                 'b', Boolean.Parse >> box
                 'd', int >> box
                 'i', int >> box
                 's', box
                 'u', uint32 >> int >> box
                 'x', check (String.forall Char.IsLower) >> ((+) "0x") >> int >> box
                 'X', check (String.forall Char.IsUpper) >> ((+) "0x") >> int >> box
                 'o', ((+) "0o") >> int >> box
                 'e', float >> box // no check for correct format for floats
                 'E', float >> box
                 'f', float >> box
                 'F', float >> box
                 'g', float >> box
                 'G', float >> box
                 'M', parseDecimal >> box
                 'c', char >> box
                ]


// array of all possible formatters, i.e. [|"%b"; "%d"; ...|]
let separators =
   parsers.Keys
   |> Seq.map (fun c -> "%" + sprintf "%c" c) 
   |> Seq.toArray


// Creates a list of formatter characters from a format string,
// for example "(%s,%d)" -> ['s', 'd']
let rec getFormatters xs =
   match xs with
   | '%'::'%'::xr -> getFormatters xr
   | '%'::x::xr -> if parsers.ContainsKey x then x::getFormatters xr
                   else failwithf "Unknown formatter %%%c" x
   | x::xr -> getFormatters xr
   | [] -> []


let sscanf (pf:PrintfFormat<_,_,_,_,'t>) s : 't =
  let formatStr = pf.Value.Replace("%%", "%")
  let constants = formatStr.Split(separators, StringSplitOptions.None)
  let regex = Regex("^" + String.Join("(.*?)", constants |> Array.map Regex.Escape) + "$")
  let formatters = pf.Value.ToCharArray() // need original string here (possibly with "%%"s)
                   |> Array.toList |> getFormatters 
  let groups = 
    regex.Match(s).Groups 
    |> Seq.cast<Group> 
    |> Seq.skip 1
  let matches =
    (groups, formatters)
    ||> Seq.map2 (fun g f -> g.Value |> parsers.[f])
    |> Seq.toArray

  if matches.Length = 1 then matches.[0] :?> 't
  else FSharpValue.MakeTuple(matches, typeof<'t>) :?> 't

// some basic testing
let (a,b) = sscanf "(%%%s,%M)" "(%hello, 4.53)"
let (x,y,z) = sscanf "%s-%s-%s" "test-this-string"
let (c,d,e,f,g,h,i) = sscanf "%b-%d-%i,%u,%x,%X,%o" "false-42--31,13,ff,FF,42"
let (j,k,l,m,n,o,p) = sscanf "%f %F %g %G %e %E %c" "1 2.1 3.4 .3 43.2e32 0 f"
namespace System
namespace System.Text
namespace System.Text.RegularExpressions
namespace Microsoft
namespace Microsoft.FSharp
namespace Microsoft.FSharp.Reflection
val check : f:(string -> bool) -> x:string -> string

Full name: Script.check
val f : (string -> bool)
val x : string
val failwithf : format:Printf.StringFormat<'T,'Result> -> 'T

Full name: Microsoft.FSharp.Core.ExtraTopLevelOperators.failwithf
val parseDecimal : x:string -> decimal

Full name: Script.parseDecimal
Multiple items
type Decimal =
  struct
    new : value:int -> decimal + 7 overloads
    member CompareTo : value:obj -> int + 1 overload
    member Equals : value:obj -> bool + 1 overload
    member GetHashCode : unit -> int
    member GetTypeCode : unit -> TypeCode
    member ToString : unit -> string + 3 overloads
    static val Zero : decimal
    static val One : decimal
    static val MinusOne : decimal
    static val MaxValue : decimal
    ...
  end

Full name: System.Decimal

--------------------
Decimal()
Decimal(value: int) : unit
Decimal(value: uint32) : unit
Decimal(value: int64) : unit
Decimal(value: uint64) : unit
Decimal(value: float32) : unit
Decimal(value: float) : unit
Decimal(bits: int []) : unit
Decimal(lo: int, mid: int, hi: int, isNegative: bool, scale: byte) : unit
Decimal.Parse(s: string) : decimal
Decimal.Parse(s: string, provider: IFormatProvider) : decimal
Decimal.Parse(s: string, style: Globalization.NumberStyles) : decimal
Decimal.Parse(s: string, style: Globalization.NumberStyles, provider: IFormatProvider) : decimal
namespace System.Globalization
Multiple items
type CultureInfo =
  new : name:string -> CultureInfo + 3 overloads
  member Calendar : Calendar
  member ClearCachedData : unit -> unit
  member Clone : unit -> obj
  member CompareInfo : CompareInfo
  member CultureTypes : CultureTypes
  member DateTimeFormat : DateTimeFormatInfo with get, set
  member DisplayName : string
  member EnglishName : string
  member Equals : value:obj -> bool
  ...

Full name: System.Globalization.CultureInfo

--------------------
Globalization.CultureInfo(name: string) : unit
Globalization.CultureInfo(culture: int) : unit
Globalization.CultureInfo(name: string, useUserOverride: bool) : unit
Globalization.CultureInfo(culture: int, useUserOverride: bool) : unit
property Globalization.CultureInfo.InvariantCulture: Globalization.CultureInfo
val parsers : Collections.Generic.IDictionary<char,(string -> obj)>

Full name: Script.parsers
val dict : keyValuePairs:seq<'Key * 'Value> -> Collections.Generic.IDictionary<'Key,'Value> (requires equality)

Full name: Microsoft.FSharp.Core.ExtraTopLevelOperators.dict
type Boolean =
  struct
    member CompareTo : obj:obj -> int + 1 overload
    member Equals : obj:obj -> bool + 1 overload
    member GetHashCode : unit -> int
    member GetTypeCode : unit -> TypeCode
    member ToString : unit -> string + 1 overload
    static val TrueString : string
    static val FalseString : string
    static member Parse : value:string -> bool
    static member TryParse : value:string * result:bool -> bool
  end

Full name: System.Boolean
Boolean.Parse(value: string) : bool
val box : value:'T -> obj

Full name: Microsoft.FSharp.Core.Operators.box
Multiple items
val int : value:'T -> int (requires member op_Explicit)

Full name: Microsoft.FSharp.Core.Operators.int

--------------------
type int = int32

Full name: Microsoft.FSharp.Core.int

--------------------
type int<'Measure> = int

Full name: Microsoft.FSharp.Core.int<_>
Multiple items
val uint32 : value:'T -> uint32 (requires member op_Explicit)

Full name: Microsoft.FSharp.Core.Operators.uint32

--------------------
type uint32 = UInt32

Full name: Microsoft.FSharp.Core.uint32
Multiple items
type String =
  new : value:char -> string + 7 overloads
  member Chars : int -> char
  member Clone : unit -> obj
  member CompareTo : value:obj -> int + 1 overload
  member Contains : value:string -> bool
  member CopyTo : sourceIndex:int * destination:char[] * destinationIndex:int * count:int -> unit
  member EndsWith : value:string -> bool + 2 overloads
  member Equals : obj:obj -> bool + 2 overloads
  member GetEnumerator : unit -> CharEnumerator
  member GetHashCode : unit -> int
  ...

Full name: System.String

--------------------
String(value: nativeptr<char>) : unit
String(value: nativeptr<sbyte>) : unit
String(value: char []) : unit
String(c: char, count: int) : unit
String(value: nativeptr<char>, startIndex: int, length: int) : unit
String(value: nativeptr<sbyte>, startIndex: int, length: int) : unit
String(value: char [], startIndex: int, length: int) : unit
String(value: nativeptr<sbyte>, startIndex: int, length: int, enc: Encoding) : unit
val forall : predicate:(char -> bool) -> str:string -> bool

Full name: Microsoft.FSharp.Core.String.forall
type Char =
  struct
    member CompareTo : value:obj -> int + 1 overload
    member Equals : obj:obj -> bool + 1 overload
    member GetHashCode : unit -> int
    member GetTypeCode : unit -> TypeCode
    member ToString : unit -> string + 1 overload
    static val MaxValue : char
    static val MinValue : char
    static member ConvertFromUtf32 : utf32:int -> string
    static member ConvertToUtf32 : highSurrogate:char * lowSurrogate:char -> int + 1 overload
    static member GetNumericValue : c:char -> float + 1 overload
    ...
  end

Full name: System.Char
Char.IsLower(c: char) : bool
Char.IsLower(s: string, index: int) : bool
Char.IsUpper(c: char) : bool
Char.IsUpper(s: string, index: int) : bool
Multiple items
val float : value:'T -> float (requires member op_Explicit)

Full name: Microsoft.FSharp.Core.Operators.float

--------------------
type float = Double

Full name: Microsoft.FSharp.Core.float

--------------------
type float<'Measure> = float

Full name: Microsoft.FSharp.Core.float<_>
Multiple items
val char : value:'T -> char (requires member op_Explicit)

Full name: Microsoft.FSharp.Core.Operators.char

--------------------
type char = Char

Full name: Microsoft.FSharp.Core.char
val separators : string []

Full name: Script.separators
property Collections.Generic.IDictionary.Keys: Collections.Generic.ICollection<char>
module Seq

from Microsoft.FSharp.Collections
val map : mapping:('T -> 'U) -> source:seq<'T> -> seq<'U>

Full name: Microsoft.FSharp.Collections.Seq.map
val c : char
val sprintf : format:Printf.StringFormat<'T> -> 'T

Full name: Microsoft.FSharp.Core.ExtraTopLevelOperators.sprintf
val toArray : source:seq<'T> -> 'T []

Full name: Microsoft.FSharp.Collections.Seq.toArray
val getFormatters : xs:char list -> char list

Full name: Script.getFormatters
val xs : char list
val xr : char list
val x : char
Collections.Generic.IDictionary.ContainsKey(key: char) : bool
val sscanf : pf:PrintfFormat<'a,'b,'c,'d,'t> -> s:string -> 't

Full name: Script.sscanf
val pf : PrintfFormat<'a,'b,'c,'d,'t>
Multiple items
type PrintfFormat<'Printer,'State,'Residue,'Result> =
  new : value:string -> PrintfFormat<'Printer,'State,'Residue,'Result>
  member Value : string

Full name: Microsoft.FSharp.Core.PrintfFormat<_,_,_,_>

--------------------
type PrintfFormat<'Printer,'State,'Residue,'Result,'Tuple> =
  inherit PrintfFormat<'Printer,'State,'Residue,'Result>
  new : value:string -> PrintfFormat<'Printer,'State,'Residue,'Result,'Tuple>

Full name: Microsoft.FSharp.Core.PrintfFormat<_,_,_,_,_>

--------------------
new : value:string -> PrintfFormat<'Printer,'State,'Residue,'Result>

--------------------
new : value:string -> PrintfFormat<'Printer,'State,'Residue,'Result,'Tuple>
val s : string
val formatStr : string
property PrintfFormat.Value: string
String.Replace(oldValue: string, newValue: string) : string
String.Replace(oldChar: char, newChar: char) : string
val constants : string []
String.Split([<ParamArray>] separator: char []) : string []
String.Split(separator: string [], options: StringSplitOptions) : string []
String.Split(separator: char [], options: StringSplitOptions) : string []
String.Split(separator: char [], count: int) : string []
String.Split(separator: string [], count: int, options: StringSplitOptions) : string []
String.Split(separator: char [], count: int, options: StringSplitOptions) : string []
type StringSplitOptions =
  | None = 0
  | RemoveEmptyEntries = 1

Full name: System.StringSplitOptions
field StringSplitOptions.None = 0
val regex : Regex
Multiple items
type Regex =
  new : pattern:string -> Regex + 1 overload
  member GetGroupNames : unit -> string[]
  member GetGroupNumbers : unit -> int[]
  member GroupNameFromNumber : i:int -> string
  member GroupNumberFromName : name:string -> int
  member IsMatch : input:string -> bool + 1 overload
  member Match : input:string -> Match + 2 overloads
  member Matches : input:string -> MatchCollection + 1 overload
  member Options : RegexOptions
  member Replace : input:string * replacement:string -> string + 5 overloads
  ...

Full name: System.Text.RegularExpressions.Regex

--------------------
Regex(pattern: string) : unit
Regex(pattern: string, options: RegexOptions) : unit
String.Join(separator: string, values: Collections.Generic.IEnumerable<string>) : string
String.Join<'T>(separator: string, values: Collections.Generic.IEnumerable<'T>) : string
String.Join(separator: string, [<ParamArray>] values: obj []) : string
String.Join(separator: string, [<ParamArray>] value: string []) : string
String.Join(separator: string, value: string [], startIndex: int, count: int) : string
type Array =
  member Clone : unit -> obj
  member CopyTo : array:Array * index:int -> unit + 1 overload
  member GetEnumerator : unit -> IEnumerator
  member GetLength : dimension:int -> int
  member GetLongLength : dimension:int -> int64
  member GetLowerBound : dimension:int -> int
  member GetUpperBound : dimension:int -> int
  member GetValue : [<ParamArray>] indices:int[] -> obj + 7 overloads
  member Initialize : unit -> unit
  member IsFixedSize : bool
  ...

Full name: System.Array
val map : mapping:('T -> 'U) -> array:'T [] -> 'U []

Full name: Microsoft.FSharp.Collections.Array.map
Regex.Escape(str: string) : string
val formatters : char list
String.ToCharArray() : char []
String.ToCharArray(startIndex: int, length: int) : char []
val toList : array:'T [] -> 'T list

Full name: Microsoft.FSharp.Collections.Array.toList
val groups : seq<Group>
Regex.Match(input: string) : Match
Regex.Match(input: string, startat: int) : Match
Regex.Match(input: string, beginning: int, length: int) : Match
val cast : source:Collections.IEnumerable -> seq<'T>

Full name: Microsoft.FSharp.Collections.Seq.cast
type Group =
  inherit Capture
  member Captures : CaptureCollection
  member Success : bool
  static member Synchronized : inner:Group -> Group

Full name: System.Text.RegularExpressions.Group
val skip : count:int -> source:seq<'T> -> seq<'T>

Full name: Microsoft.FSharp.Collections.Seq.skip
val matches : obj []
val map2 : mapping:('T1 -> 'T2 -> 'U) -> source1:seq<'T1> -> source2:seq<'T2> -> seq<'U>

Full name: Microsoft.FSharp.Collections.Seq.map2
val g : Group
val f : char
property Capture.Value: string
property Array.Length: int
type FSharpValue =
  static member GetExceptionFields : exn:obj * ?bindingFlags:BindingFlags -> obj []
  static member GetRecordField : record:obj * info:PropertyInfo -> obj
  static member GetRecordFields : record:obj * ?bindingFlags:BindingFlags -> obj []
  static member GetTupleField : tuple:obj * index:int -> obj
  static member GetTupleFields : tuple:obj -> obj []
  static member GetUnionFields : value:obj * unionType:Type * ?bindingFlags:BindingFlags -> UnionCaseInfo * obj []
  static member MakeFunction : functionType:Type * implementation:(obj -> obj) -> obj
  static member MakeRecord : recordType:Type * values:obj [] * ?bindingFlags:BindingFlags -> obj
  static member MakeTuple : tupleElements:obj [] * tupleType:Type -> obj
  static member MakeUnion : unionCase:UnionCaseInfo * args:obj [] * ?bindingFlags:BindingFlags -> obj
  ...

Full name: Microsoft.FSharp.Reflection.FSharpValue
static member FSharpValue.MakeTuple : tupleElements:obj [] * tupleType:Type -> obj
val typeof<'T> : Type

Full name: Microsoft.FSharp.Core.Operators.typeof
val a : string

Full name: Script.a
val b : decimal

Full name: Script.b
val x : string

Full name: Script.x
val y : string

Full name: Script.y
val z : string

Full name: Script.z
val c : bool

Full name: Script.c
val d : int

Full name: Script.d
val e : int

Full name: Script.e
val f : int

Full name: Script.f
val g : int

Full name: Script.g
val h : int

Full name: Script.h
val i : int

Full name: Script.i
val j : float

Full name: Script.j
val k : float

Full name: Script.k
val l : float

Full name: Script.l
val m : float

Full name: Script.m
val n : float

Full name: Script.n
val o : float

Full name: Script.o
val p : char

Full name: Script.p

More information

Link:http://fssnip.net/4I
Posted:12 years ago
Author:Wolfgang Meyer
Tags: sscanf , scanf , parsing , printf