4 people like it.

Csv Type Provider Schema Parser

Parses the schema string format used by the CSV Type Provider from the FSharp.Data library ...because sometimes you need the metadata.

 1: 
 2: 
 3: 
 4: 
 5: 
 6: 
 7: 
 8: 
 9: 
10: 
11: 
12: 
13: 
14: 
15: 
16: 
17: 
18: 
19: 
20: 
21: 
22: 
23: 
24: 
25: 
26: 
27: 
28: 
29: 
30: 
31: 
32: 
33: 
34: 
35: 
36: 
37: 
38: 
39: 
40: 
41: 
42: 
43: 
44: 
45: 
46: 
47: 
48: 
49: 
50: 
51: 
52: 
53: 
54: 
55: 
56: 
57: 
58: 
59: 
60: 
61: 
62: 
63: 
64: 
65: 
66: 
67: 
68: 
69: 
70: 
71: 
72: 
73: 
74: 
75: 
76: 
77: 
78: 
79: 
80: 
81: 
82: 
83: 
84: 
85: 
86: 
87: 
88: 
89: 
90: 
91: 
92: 
93: 
94: 
95: 
96: 
97: 
98: 
99: 
module CsvSchema
open System

module TypeDetermination =
        type TypeWrapper = None | Nullable | Option

        //https://github.com/fsharp/FSharp.Data/blob/master/src/Csv/CsvInference.fs
        let private nameToType =
          ["int" ,           (typeof<int>     , TypeWrapper.None    )
           "int64",          (typeof<int64>   , TypeWrapper.None    )
           "bool",           (typeof<bool>    , TypeWrapper.None    )
           "float",          (typeof<float>   , TypeWrapper.None    )
           "decimal",        (typeof<decimal> , TypeWrapper.None    )
           "date",           (typeof<DateTime>, TypeWrapper.None    )
           "guid",           (typeof<Guid>    , TypeWrapper.None    )
           "string",         (typeof<String>  , TypeWrapper.None    )
           "int?",           (typeof<int>     , TypeWrapper.Nullable)
           "int64?",         (typeof<int64>   , TypeWrapper.Nullable)
           "bool?",          (typeof<bool>    , TypeWrapper.Nullable)
           "float?",         (typeof<float>   , TypeWrapper.Nullable)
           "decimal?",       (typeof<decimal> , TypeWrapper.Nullable)
           "date?",          (typeof<DateTime>, TypeWrapper.Nullable)
           "guid?",          (typeof<Guid>    , TypeWrapper.Nullable)
           "int option",     (typeof<int>     , TypeWrapper.Option  )
           "int64 option",   (typeof<int64>   , TypeWrapper.Option  )
           "bool option",    (typeof<bool>    , TypeWrapper.Option  )
           "float option",   (typeof<float>   , TypeWrapper.Option  )
           "decimal option", (typeof<decimal> , TypeWrapper.Option  )
           "date option",    (typeof<DateTime>, TypeWrapper.Option  )
           "guid option",    (typeof<Guid>    , TypeWrapper.Option  )
           "string option",  (typeof<string>  , TypeWrapper.Option  )]
          |> dict
        let determinType s = nameToType.[s]

module Parsing =

    let take i f (s:string) = 
        let rec loop i acc =
            if i < s.Length then
                let c = s.[i]
                if f c then loop (i+1) (c::acc)
                else String(acc |> List.rev |> List.toArray),(i,s)
            else String(acc |> List.rev |> List.toArray),(i,s)
        loop i []

    let isLetterOrDigit = Char.IsLetterOrDigit
    let isWhitespace = Char.IsWhiteSpace
    let isNotQuote = fun c -> c <> '"'

    let (|Char|_|) c (i,s:string)  = 
        if i < s.Length then 
            if s.[i] = c then Some(i+1,s) else None 
        else None

    let (|Comma|_|)         = (|Char|_|) ','
    let (|QuestionMark|_|)  = (|Char|_|) '?'
    let (|LP|_|)            = (|Char|_|) '('
    let (|RP|_|)            = (|Char|_|) ')'
    let (|Quote|_|)         = (|Char|_|) '"'

    let (|WS|) (i,s) = s |> take i isWhitespace |> snd
    let (|TillEndQuote|) (i,s) = s |> take i isNotQuote
    let (|Eof|_|) (i,s:string) = if i>=s.Length then Some(i) else None

    let (|Token|_|) = function
        |  Eof _-> None 
        | (i,s) when  isLetterOrDigit s.[i] -> s |> take i isLetterOrDigit |> Some 
        | _ -> None

    let (|Option|_|) = function Token ("option", rest) -> Some rest | _ -> None

    let (|DataType|) = function
        | WS (Token (n, WS (QuestionMark rest)))     -> TypeDetermination.determinType (n+"?"), rest
        | WS (Token (n, WS (Option rest)))           -> TypeDetermination.determinType (n+" option"), rest
        | WS (Token (n, rest))                       -> TypeDetermination.determinType (n+" option"), rest
        | _ -> failwith "expected datatype"

    let (|Name|_|) = function
        | WS (Quote (TillEndQuote (n, Quote(rest)))) -> Some (n,rest)
        | WS (Token (n, rest))                       -> Some (n,rest)
        | _                                          -> None

    let rec (|Schema|) acc = function
        | Eof _ -> acc |> List.rev
        | Name (name, WS (LP ( WS (DataType (dt, RP(rest)))))) -> (|Schema|) ((name,dt)::acc) rest
        | Name (name, rest) -> (|Schema|) ((name,(typeof<string>,TypeDetermination.TypeWrapper.None))::acc) rest
        | Comma rest -> (|Schema|) acc rest 
        | WS rest -> (|Schema|) acc rest 
        | x -> failwithf "Unable to parse at %A" x

let parseSchema s = Parsing.(|Schema|) [] (0,s)

(*
[<Literal>]
let scma = """
"f1 field" (int),"f2" (int),"f3","f4" (int),"f5" (date),"f6" (int?),"f7","f8","f9"(int?),"f10", f11 (float option)
"""
parseSchema scma
*)
module CsvSchema
namespace System
type TypeWrapper =
  | None
  | Nullable
  | Option

Full name: CsvSchema.TypeDetermination.TypeWrapper
union case TypeWrapper.None: TypeWrapper
Multiple items
union case TypeWrapper.Nullable: TypeWrapper

--------------------
type Nullable =
  static member Compare<'T> : n1:Nullable<'T> * n2:Nullable<'T> -> int
  static member Equals<'T> : n1:Nullable<'T> * n2:Nullable<'T> -> bool
  static member GetUnderlyingType : nullableType:Type -> Type

Full name: System.Nullable

--------------------
type Nullable<'T (requires default constructor and value type and 'T :> ValueType)> =
  struct
    new : value:'T -> Nullable<'T>
    member Equals : other:obj -> bool
    member GetHashCode : unit -> int
    member GetValueOrDefault : unit -> 'T + 1 overload
    member HasValue : bool
    member ToString : unit -> string
    member Value : 'T
  end

Full name: System.Nullable<_>

--------------------
Nullable()
Nullable(value: 'T) : unit
Multiple items
union case TypeWrapper.Option: TypeWrapper

--------------------
module Option

from Microsoft.FSharp.Core
val private nameToType : Collections.Generic.IDictionary<string,(Type * TypeWrapper)>

Full name: CsvSchema.TypeDetermination.nameToType
val typeof<'T> : Type

Full name: Microsoft.FSharp.Core.Operators.typeof
Multiple items
val int : value:'T -> int (requires member op_Explicit)

Full name: Microsoft.FSharp.Core.Operators.int

--------------------
type int = int32

Full name: Microsoft.FSharp.Core.int

--------------------
type int<'Measure> = int

Full name: Microsoft.FSharp.Core.int<_>
Multiple items
val int64 : value:'T -> int64 (requires member op_Explicit)

Full name: Microsoft.FSharp.Core.Operators.int64

--------------------
type int64 = Int64

Full name: Microsoft.FSharp.Core.int64

--------------------
type int64<'Measure> = int64

Full name: Microsoft.FSharp.Core.int64<_>
type bool = Boolean

Full name: Microsoft.FSharp.Core.bool
Multiple items
val float : value:'T -> float (requires member op_Explicit)

Full name: Microsoft.FSharp.Core.Operators.float

--------------------
type float = Double

Full name: Microsoft.FSharp.Core.float

--------------------
type float<'Measure> = float

Full name: Microsoft.FSharp.Core.float<_>
Multiple items
val decimal : value:'T -> decimal (requires member op_Explicit)

Full name: Microsoft.FSharp.Core.Operators.decimal

--------------------
type decimal = Decimal

Full name: Microsoft.FSharp.Core.decimal

--------------------
type decimal<'Measure> = decimal

Full name: Microsoft.FSharp.Core.decimal<_>
Multiple items
type DateTime =
  struct
    new : ticks:int64 -> DateTime + 10 overloads
    member Add : value:TimeSpan -> DateTime
    member AddDays : value:float -> DateTime
    member AddHours : value:float -> DateTime
    member AddMilliseconds : value:float -> DateTime
    member AddMinutes : value:float -> DateTime
    member AddMonths : months:int -> DateTime
    member AddSeconds : value:float -> DateTime
    member AddTicks : value:int64 -> DateTime
    member AddYears : value:int -> DateTime
    ...
  end

Full name: System.DateTime

--------------------
DateTime()
   (+0 other overloads)
DateTime(ticks: int64) : unit
   (+0 other overloads)
DateTime(ticks: int64, kind: DateTimeKind) : unit
   (+0 other overloads)
DateTime(year: int, month: int, day: int) : unit
   (+0 other overloads)
DateTime(year: int, month: int, day: int, calendar: Globalization.Calendar) : unit
   (+0 other overloads)
DateTime(year: int, month: int, day: int, hour: int, minute: int, second: int) : unit
   (+0 other overloads)
DateTime(year: int, month: int, day: int, hour: int, minute: int, second: int, kind: DateTimeKind) : unit
   (+0 other overloads)
DateTime(year: int, month: int, day: int, hour: int, minute: int, second: int, calendar: Globalization.Calendar) : unit
   (+0 other overloads)
DateTime(year: int, month: int, day: int, hour: int, minute: int, second: int, millisecond: int) : unit
   (+0 other overloads)
DateTime(year: int, month: int, day: int, hour: int, minute: int, second: int, millisecond: int, kind: DateTimeKind) : unit
   (+0 other overloads)
Multiple items
type Guid =
  struct
    new : b:byte[] -> Guid + 4 overloads
    member CompareTo : value:obj -> int + 1 overload
    member Equals : o:obj -> bool + 1 overload
    member GetHashCode : unit -> int
    member ToByteArray : unit -> byte[]
    member ToString : unit -> string + 2 overloads
    static val Empty : Guid
    static member NewGuid : unit -> Guid
    static member Parse : input:string -> Guid
    static member ParseExact : input:string * format:string -> Guid
    ...
  end

Full name: System.Guid

--------------------
Guid()
Guid(b: byte []) : unit
Guid(g: string) : unit
Guid(a: int, b: int16, c: int16, d: byte []) : unit
Guid(a: uint32, b: uint16, c: uint16, d: byte, e: byte, f: byte, g: byte, h: byte, i: byte, j: byte, k: byte) : unit
Guid(a: int, b: int16, c: int16, d: byte, e: byte, f: byte, g: byte, h: byte, i: byte, j: byte, k: byte) : unit
Multiple items
type String =
  new : value:char -> string + 7 overloads
  member Chars : int -> char
  member Clone : unit -> obj
  member CompareTo : value:obj -> int + 1 overload
  member Contains : value:string -> bool
  member CopyTo : sourceIndex:int * destination:char[] * destinationIndex:int * count:int -> unit
  member EndsWith : value:string -> bool + 2 overloads
  member Equals : obj:obj -> bool + 2 overloads
  member GetEnumerator : unit -> CharEnumerator
  member GetHashCode : unit -> int
  ...

Full name: System.String

--------------------
String(value: nativeptr<char>) : unit
String(value: nativeptr<sbyte>) : unit
String(value: char []) : unit
String(c: char, count: int) : unit
String(value: nativeptr<char>, startIndex: int, length: int) : unit
String(value: nativeptr<sbyte>, startIndex: int, length: int) : unit
String(value: char [], startIndex: int, length: int) : unit
String(value: nativeptr<sbyte>, startIndex: int, length: int, enc: Text.Encoding) : unit
union case TypeWrapper.Nullable: TypeWrapper
union case TypeWrapper.Option: TypeWrapper
Multiple items
val string : value:'T -> string

Full name: Microsoft.FSharp.Core.Operators.string

--------------------
type string = String

Full name: Microsoft.FSharp.Core.string
val dict : keyValuePairs:seq<'Key * 'Value> -> Collections.Generic.IDictionary<'Key,'Value> (requires equality)

Full name: Microsoft.FSharp.Core.ExtraTopLevelOperators.dict
val determinType : s:string -> Type * TypeWrapper

Full name: CsvSchema.TypeDetermination.determinType
val s : string
val take : i:int -> f:(char -> bool) -> s:string -> String * (int * string)

Full name: CsvSchema.Parsing.take
val i : int
val f : (char -> bool)
val loop : (int -> char list -> String * (int * string))
val acc : char list
property String.Length: int
val c : char
Multiple items
module List

from Microsoft.FSharp.Collections

--------------------
type List<'T> =
  | ( [] )
  | ( :: ) of Head: 'T * Tail: 'T list
  interface IEnumerable
  interface IEnumerable<'T>
  member Head : 'T
  member IsEmpty : bool
  member Item : index:int -> 'T with get
  member Length : int
  member Tail : 'T list
  static member Cons : head:'T * tail:'T list -> 'T list
  static member Empty : 'T list

Full name: Microsoft.FSharp.Collections.List<_>
val rev : list:'T list -> 'T list

Full name: Microsoft.FSharp.Collections.List.rev
val toArray : list:'T list -> 'T []

Full name: Microsoft.FSharp.Collections.List.toArray
val isLetterOrDigit : arg00:char -> bool

Full name: CsvSchema.Parsing.isLetterOrDigit
type Char =
  struct
    member CompareTo : value:obj -> int + 1 overload
    member Equals : obj:obj -> bool + 1 overload
    member GetHashCode : unit -> int
    member GetTypeCode : unit -> TypeCode
    member ToString : unit -> string + 1 overload
    static val MaxValue : char
    static val MinValue : char
    static member ConvertFromUtf32 : utf32:int -> string
    static member ConvertToUtf32 : highSurrogate:char * lowSurrogate:char -> int + 1 overload
    static member GetNumericValue : c:char -> float + 1 overload
    ...
  end

Full name: System.Char
Char.IsLetterOrDigit(c: char) : bool
Char.IsLetterOrDigit(s: string, index: int) : bool
val isWhitespace : arg00:char -> bool

Full name: CsvSchema.Parsing.isWhitespace
Char.IsWhiteSpace(c: char) : bool
Char.IsWhiteSpace(s: string, index: int) : bool
val isNotQuote : c:char -> bool

Full name: CsvSchema.Parsing.isNotQuote
union case Option.Some: Value: 'T -> Option<'T>
union case Option.None: Option<'T>
Multiple items
active recognizer Char: char -> int * string -> (int * string) option

Full name: CsvSchema.Parsing.( |Char|_| )

--------------------
type Char =
  struct
    member CompareTo : value:obj -> int + 1 overload
    member Equals : obj:obj -> bool + 1 overload
    member GetHashCode : unit -> int
    member GetTypeCode : unit -> TypeCode
    member ToString : unit -> string + 1 overload
    static val MaxValue : char
    static val MinValue : char
    static member ConvertFromUtf32 : utf32:int -> string
    static member ConvertToUtf32 : highSurrogate:char * lowSurrogate:char -> int + 1 overload
    static member GetNumericValue : c:char -> float + 1 overload
    ...
  end

Full name: System.Char
val snd : tuple:('T1 * 'T2) -> 'T2

Full name: Microsoft.FSharp.Core.Operators.snd
active recognizer Eof: int * string -> int option

Full name: CsvSchema.Parsing.( |Eof|_| )
module Option

from Microsoft.FSharp.Core
active recognizer Token: int * string -> (String * (int * string)) option

Full name: CsvSchema.Parsing.( |Token|_| )
val rest : int * string
active recognizer WS: int * string -> int * string

Full name: CsvSchema.Parsing.( |WS| )
val n : String
active recognizer QuestionMark: int * string -> (int * string) option

Full name: CsvSchema.Parsing.( |QuestionMark|_| )
module TypeDetermination

from CsvSchema
val determinType : s:string -> Type * TypeDetermination.TypeWrapper

Full name: CsvSchema.TypeDetermination.determinType
Multiple items
active recognizer Option: int * string -> (int * string) option

Full name: CsvSchema.Parsing.( |Option|_| )

--------------------
module Option

from Microsoft.FSharp.Core
val failwith : message:string -> 'T

Full name: Microsoft.FSharp.Core.Operators.failwith
active recognizer Quote: int * string -> (int * string) option

Full name: CsvSchema.Parsing.( |Quote|_| )
active recognizer TillEndQuote: int * string -> String * (int * string)

Full name: CsvSchema.Parsing.( |TillEndQuote| )
val acc : (String * (Type * TypeDetermination.TypeWrapper)) list
active recognizer Name: int * string -> (String * (int * string)) option

Full name: CsvSchema.Parsing.( |Name|_| )
val name : String
active recognizer LP: int * string -> (int * string) option

Full name: CsvSchema.Parsing.( |LP|_| )
active recognizer DataType: int * string -> (Type * TypeDetermination.TypeWrapper) * (int * string)

Full name: CsvSchema.Parsing.( |DataType| )
val dt : Type * TypeDetermination.TypeWrapper
active recognizer RP: int * string -> (int * string) option

Full name: CsvSchema.Parsing.( |RP|_| )
active recognizer Schema: (String * (Type * TypeDetermination.TypeWrapper)) list -> int * string -> (String * (Type * TypeDetermination.TypeWrapper)) list

Full name: CsvSchema.Parsing.( |Schema| )
union case TypeDetermination.TypeWrapper.None: TypeDetermination.TypeWrapper
active recognizer Comma: int * string -> (int * string) option

Full name: CsvSchema.Parsing.( |Comma|_| )
val x : int * string
val failwithf : format:Printf.StringFormat<'T,'Result> -> 'T

Full name: Microsoft.FSharp.Core.ExtraTopLevelOperators.failwithf
val parseSchema : s:string -> (String * (Type * TypeDetermination.TypeWrapper)) list

Full name: CsvSchema.parseSchema
module Parsing

from CsvSchema
Raw view New version

More information

Link:http://fssnip.net/te
Posted:1 years ago
Author:Faisal Waris
Tags: csv , typeprovider , parser