5 people like it.

Csv Type Provider Schema Parser

Parses the schema string format used by the CSV Type Provider from the FSharp.Data library ...because sometimes you need the metadata. Support for names with underscore

  1: 
  2: 
  3: 
  4: 
  5: 
  6: 
  7: 
  8: 
  9: 
 10: 
 11: 
 12: 
 13: 
 14: 
 15: 
 16: 
 17: 
 18: 
 19: 
 20: 
 21: 
 22: 
 23: 
 24: 
 25: 
 26: 
 27: 
 28: 
 29: 
 30: 
 31: 
 32: 
 33: 
 34: 
 35: 
 36: 
 37: 
 38: 
 39: 
 40: 
 41: 
 42: 
 43: 
 44: 
 45: 
 46: 
 47: 
 48: 
 49: 
 50: 
 51: 
 52: 
 53: 
 54: 
 55: 
 56: 
 57: 
 58: 
 59: 
 60: 
 61: 
 62: 
 63: 
 64: 
 65: 
 66: 
 67: 
 68: 
 69: 
 70: 
 71: 
 72: 
 73: 
 74: 
 75: 
 76: 
 77: 
 78: 
 79: 
 80: 
 81: 
 82: 
 83: 
 84: 
 85: 
 86: 
 87: 
 88: 
 89: 
 90: 
 91: 
 92: 
 93: 
 94: 
 95: 
 96: 
 97: 
 98: 
 99: 
100: 
module CsvSchema
open System

module TypeDetermination =
        type TypeWrapper = None | Nullable | Option

        //https://github.com/fsharp/FSharp.Data/blob/master/src/Csv/CsvInference.fs
        let private nameToType =
          ["int" ,           (typeof<int>     , TypeWrapper.None    )
           "int64",          (typeof<int64>   , TypeWrapper.None    )
           "bool",           (typeof<bool>    , TypeWrapper.None    )
           "float",          (typeof<float>   , TypeWrapper.None    )
           "decimal",        (typeof<decimal> , TypeWrapper.None    )
           "date",           (typeof<DateTime>, TypeWrapper.None    )
           "guid",           (typeof<Guid>    , TypeWrapper.None    )
           "string",         (typeof<String>  , TypeWrapper.None    )
           "int?",           (typeof<int>     , TypeWrapper.Nullable)
           "int64?",         (typeof<int64>   , TypeWrapper.Nullable)
           "bool?",          (typeof<bool>    , TypeWrapper.Nullable)
           "float?",         (typeof<float>   , TypeWrapper.Nullable)
           "decimal?",       (typeof<decimal> , TypeWrapper.Nullable)
           "date?",          (typeof<DateTime>, TypeWrapper.Nullable)
           "guid?",          (typeof<Guid>    , TypeWrapper.Nullable)
           "int option",     (typeof<int>     , TypeWrapper.Option  )
           "int64 option",   (typeof<int64>   , TypeWrapper.Option  )
           "bool option",    (typeof<bool>    , TypeWrapper.Option  )
           "float option",   (typeof<float>   , TypeWrapper.Option  )
           "decimal option", (typeof<decimal> , TypeWrapper.Option  )
           "date option",    (typeof<DateTime>, TypeWrapper.Option  )
           "guid option",    (typeof<Guid>    , TypeWrapper.Option  )
           "string option",  (typeof<string>  , TypeWrapper.Option  )]
          |> dict
        let determinType s = nameToType.[s]

module Parsing =

    let take i f (s:string) = 
        let rec loop i acc =
            if i < s.Length then
                let c = s.[i]
                if f c then loop (i+1) (c::acc)
                else String(acc |> List.rev |> List.toArray),(i,s)
            else String(acc |> List.rev |> List.toArray),(i,s)
        loop i []

    let isTokenChar c = Char.IsLetterOrDigit (c) || c = '_'
    let isWhitespace = Char.IsWhiteSpace
    let isNotQuote = fun c -> c <> '"'

    let (|Char|_|) c (i,s:string)  = 
        if i < s.Length then 
            if s.[i] = c then Some(i+1,s) else None 
        else None

    let (|Comma|_|)         = (|Char|_|) ','
    let (|QuestionMark|_|)  = (|Char|_|) '?'
    let (|LP|_|)            = (|Char|_|) '('
    let (|RP|_|)            = (|Char|_|) ')'
    let (|Quote|_|)         = (|Char|_|) '"'

    let (|WS|) (i,s) = s |> take i isWhitespace |> snd
    let (|TillEndQuote|) (i,s) = s |> take i isNotQuote
    let (|Eof|_|) (i,s:string) = if i>=s.Length then Some(i) else None

    let (|Token|_|) = function
        |  Eof _-> None 
        | (i,s) when  isTokenChar s.[i] -> s |> take i isTokenChar |> Some 
        | _ -> None

    let (|Option|_|) = function Token ("option", rest) -> Some rest | _ -> None

    let (|DataType|) = function
        | WS (Token (n, WS (QuestionMark rest)))     -> TypeDetermination.determinType (n+"?"), rest
        | WS (Token (n, WS (Option rest)))           -> TypeDetermination.determinType (n+" option"), rest
        | WS (Token (n, rest))                       -> TypeDetermination.determinType (n+" option"), rest
        | _ -> failwith "expected datatype"

    let (|Name|_|) = function
        | WS (Quote (TillEndQuote (n, Quote(rest)))) -> Some (n,rest)
        | WS (Token (n, rest))                       -> Some (n,rest)
        | _                                          -> None

    let rec (|Schema|) acc = function
        | Eof _ -> acc |> List.rev
        | Name (name, WS (LP ( WS (DataType (dt, RP(rest)))))) -> (|Schema|) ((name,dt)::acc) rest
        | Name (name, rest) -> (|Schema|) ((name,(typeof<string>,TypeDetermination.TypeWrapper.None))::acc) rest
        | Comma rest -> (|Schema|) acc rest 
        | WS rest -> (|Schema|) acc rest 
        | x -> failwithf "Unable to parse at %A" x

let parseSchema s = Parsing.(|Schema|) [] (0,s)

(*
[<Literal>]
let scma = """
"f1 field" (int),"f2" (int),"f3","f4" (int),"f5" (date),"f6" (int?),"f7","f8","f9"(int?),"f10", f11 (float option)
"""
parseSchema scma

*)
module CsvSchema
namespace System
type TypeWrapper =
  | None
  | Nullable
  | Option

Full name: CsvSchema.TypeDetermination.TypeWrapper
union case TypeWrapper.None: TypeWrapper
Multiple items
union case TypeWrapper.Nullable: TypeWrapper

--------------------
type Nullable =
  static member Compare<'T> : n1:Nullable<'T> * n2:Nullable<'T> -> int
  static member Equals<'T> : n1:Nullable<'T> * n2:Nullable<'T> -> bool
  static member GetUnderlyingType : nullableType:Type -> Type

Full name: System.Nullable

--------------------
type Nullable<'T (requires default constructor and value type and 'T :> ValueType)> =
  struct
    new : value:'T -> Nullable<'T>
    member Equals : other:obj -> bool
    member GetHashCode : unit -> int
    member GetValueOrDefault : unit -> 'T + 1 overload
    member HasValue : bool
    member ToString : unit -> string
    member Value : 'T
  end

Full name: System.Nullable<_>

--------------------
Nullable()
Nullable(value: 'T) : unit
Multiple items
union case TypeWrapper.Option: TypeWrapper

--------------------
module Option

from Microsoft.FSharp.Core
val private nameToType : Collections.Generic.IDictionary<string,(Type * TypeWrapper)>

Full name: CsvSchema.TypeDetermination.nameToType
val typeof<'T> : Type

Full name: Microsoft.FSharp.Core.Operators.typeof
Multiple items
val int : value:'T -> int (requires member op_Explicit)

Full name: Microsoft.FSharp.Core.Operators.int

--------------------
type int = int32

Full name: Microsoft.FSharp.Core.int

--------------------
type int<'Measure> = int

Full name: Microsoft.FSharp.Core.int<_>
Multiple items
val int64 : value:'T -> int64 (requires member op_Explicit)

Full name: Microsoft.FSharp.Core.Operators.int64

--------------------
type int64 = Int64

Full name: Microsoft.FSharp.Core.int64

--------------------
type int64<'Measure> = int64

Full name: Microsoft.FSharp.Core.int64<_>
type bool = Boolean

Full name: Microsoft.FSharp.Core.bool
Multiple items
val float : value:'T -> float (requires member op_Explicit)

Full name: Microsoft.FSharp.Core.Operators.float

--------------------
type float = Double

Full name: Microsoft.FSharp.Core.float

--------------------
type float<'Measure> = float

Full name: Microsoft.FSharp.Core.float<_>
Multiple items
val decimal : value:'T -> decimal (requires member op_Explicit)

Full name: Microsoft.FSharp.Core.Operators.decimal

--------------------
type decimal = Decimal

Full name: Microsoft.FSharp.Core.decimal

--------------------
type decimal<'Measure> = decimal

Full name: Microsoft.FSharp.Core.decimal<_>
Multiple items
type DateTime =
  struct
    new : ticks:int64 -> DateTime + 10 overloads
    member Add : value:TimeSpan -> DateTime
    member AddDays : value:float -> DateTime
    member AddHours : value:float -> DateTime
    member AddMilliseconds : value:float -> DateTime
    member AddMinutes : value:float -> DateTime
    member AddMonths : months:int -> DateTime
    member AddSeconds : value:float -> DateTime
    member AddTicks : value:int64 -> DateTime
    member AddYears : value:int -> DateTime
    ...
  end

Full name: System.DateTime

--------------------
DateTime()
   (+0 other overloads)
DateTime(ticks: int64) : unit
   (+0 other overloads)
DateTime(ticks: int64, kind: DateTimeKind) : unit
   (+0 other overloads)
DateTime(year: int, month: int, day: int) : unit
   (+0 other overloads)
DateTime(year: int, month: int, day: int, calendar: Globalization.Calendar) : unit
   (+0 other overloads)
DateTime(year: int, month: int, day: int, hour: int, minute: int, second: int) : unit
   (+0 other overloads)
DateTime(year: int, month: int, day: int, hour: int, minute: int, second: int, kind: DateTimeKind) : unit
   (+0 other overloads)
DateTime(year: int, month: int, day: int, hour: int, minute: int, second: int, calendar: Globalization.Calendar) : unit
   (+0 other overloads)
DateTime(year: int, month: int, day: int, hour: int, minute: int, second: int, millisecond: int) : unit
   (+0 other overloads)
DateTime(year: int, month: int, day: int, hour: int, minute: int, second: int, millisecond: int, kind: DateTimeKind) : unit
   (+0 other overloads)
Multiple items
type Guid =
  struct
    new : b:byte[] -> Guid + 4 overloads
    member CompareTo : value:obj -> int + 1 overload
    member Equals : o:obj -> bool + 1 overload
    member GetHashCode : unit -> int
    member ToByteArray : unit -> byte[]
    member ToString : unit -> string + 2 overloads
    static val Empty : Guid
    static member NewGuid : unit -> Guid
    static member Parse : input:string -> Guid
    static member ParseExact : input:string * format:string -> Guid
    ...
  end

Full name: System.Guid

--------------------
Guid()
Guid(b: byte []) : unit
Guid(g: string) : unit
Guid(a: int, b: int16, c: int16, d: byte []) : unit
Guid(a: uint32, b: uint16, c: uint16, d: byte, e: byte, f: byte, g: byte, h: byte, i: byte, j: byte, k: byte) : unit
Guid(a: int, b: int16, c: int16, d: byte, e: byte, f: byte, g: byte, h: byte, i: byte, j: byte, k: byte) : unit
Multiple items
type String =
  new : value:char -> string + 7 overloads
  member Chars : int -> char
  member Clone : unit -> obj
  member CompareTo : value:obj -> int + 1 overload
  member Contains : value:string -> bool
  member CopyTo : sourceIndex:int * destination:char[] * destinationIndex:int * count:int -> unit
  member EndsWith : value:string -> bool + 2 overloads
  member Equals : obj:obj -> bool + 2 overloads
  member GetEnumerator : unit -> CharEnumerator
  member GetHashCode : unit -> int
  ...

Full name: System.String

--------------------
String(value: nativeptr<char>) : unit
String(value: nativeptr<sbyte>) : unit
String(value: char []) : unit
String(c: char, count: int) : unit
String(value: nativeptr<char>, startIndex: int, length: int) : unit
String(value: nativeptr<sbyte>, startIndex: int, length: int) : unit
String(value: char [], startIndex: int, length: int) : unit
String(value: nativeptr<sbyte>, startIndex: int, length: int, enc: Text.Encoding) : unit
union case TypeWrapper.Nullable: TypeWrapper
union case TypeWrapper.Option: TypeWrapper
Multiple items
val string : value:'T -> string

Full name: Microsoft.FSharp.Core.Operators.string

--------------------
type string = String

Full name: Microsoft.FSharp.Core.string
val dict : keyValuePairs:seq<'Key * 'Value> -> Collections.Generic.IDictionary<'Key,'Value> (requires equality)

Full name: Microsoft.FSharp.Core.ExtraTopLevelOperators.dict
val determinType : s:string -> Type * TypeWrapper

Full name: CsvSchema.TypeDetermination.determinType
val s : string
val take : i:int -> f:(char -> bool) -> s:string -> String * (int * string)

Full name: CsvSchema.Parsing.take
val i : int
val f : (char -> bool)
val loop : (int -> char list -> String * (int * string))
val acc : char list
property String.Length: int
val c : char
Multiple items
module List

from Microsoft.FSharp.Collections

--------------------
type List<'T> =
  | ( [] )
  | ( :: ) of Head: 'T * Tail: 'T list
  interface IEnumerable
  interface IEnumerable<'T>
  member GetSlice : startIndex:int option * endIndex:int option -> 'T list
  member Head : 'T
  member IsEmpty : bool
  member Item : index:int -> 'T with get
  member Length : int
  member Tail : 'T list
  static member Cons : head:'T * tail:'T list -> 'T list
  static member Empty : 'T list

Full name: Microsoft.FSharp.Collections.List<_>
val rev : list:'T list -> 'T list

Full name: Microsoft.FSharp.Collections.List.rev
val toArray : list:'T list -> 'T []

Full name: Microsoft.FSharp.Collections.List.toArray
val isTokenChar : c:char -> bool

Full name: CsvSchema.Parsing.isTokenChar
type Char =
  struct
    member CompareTo : value:obj -> int + 1 overload
    member Equals : obj:obj -> bool + 1 overload
    member GetHashCode : unit -> int
    member GetTypeCode : unit -> TypeCode
    member ToString : unit -> string + 1 overload
    static val MaxValue : char
    static val MinValue : char
    static member ConvertFromUtf32 : utf32:int -> string
    static member ConvertToUtf32 : highSurrogate:char * lowSurrogate:char -> int + 1 overload
    static member GetNumericValue : c:char -> float + 1 overload
    ...
  end

Full name: System.Char
Char.IsLetterOrDigit(c: char) : bool
Char.IsLetterOrDigit(s: string, index: int) : bool
val isWhitespace : arg00:char -> bool

Full name: CsvSchema.Parsing.isWhitespace
Char.IsWhiteSpace(c: char) : bool
Char.IsWhiteSpace(s: string, index: int) : bool
val isNotQuote : c:char -> bool

Full name: CsvSchema.Parsing.isNotQuote
union case Option.Some: Value: 'T -> Option<'T>
union case Option.None: Option<'T>
Multiple items
active recognizer Char: char -> int * string -> (int * string) option

Full name: CsvSchema.Parsing.( |Char|_| )

--------------------
type Char =
  struct
    member CompareTo : value:obj -> int + 1 overload
    member Equals : obj:obj -> bool + 1 overload
    member GetHashCode : unit -> int
    member GetTypeCode : unit -> TypeCode
    member ToString : unit -> string + 1 overload
    static val MaxValue : char
    static val MinValue : char
    static member ConvertFromUtf32 : utf32:int -> string
    static member ConvertToUtf32 : highSurrogate:char * lowSurrogate:char -> int + 1 overload
    static member GetNumericValue : c:char -> float + 1 overload
    ...
  end

Full name: System.Char
val snd : tuple:('T1 * 'T2) -> 'T2

Full name: Microsoft.FSharp.Core.Operators.snd
active recognizer Eof: int * string -> int option

Full name: CsvSchema.Parsing.( |Eof|_| )
module Option

from Microsoft.FSharp.Core
active recognizer Token: int * string -> (String * (int * string)) option

Full name: CsvSchema.Parsing.( |Token|_| )
val rest : int * string
active recognizer WS: int * string -> int * string

Full name: CsvSchema.Parsing.( |WS| )
val n : String
active recognizer QuestionMark: int * string -> (int * string) option

Full name: CsvSchema.Parsing.( |QuestionMark|_| )
module TypeDetermination

from CsvSchema
val determinType : s:string -> Type * TypeDetermination.TypeWrapper

Full name: CsvSchema.TypeDetermination.determinType
Multiple items
active recognizer Option: int * string -> (int * string) option

Full name: CsvSchema.Parsing.( |Option|_| )

--------------------
module Option

from Microsoft.FSharp.Core
val failwith : message:string -> 'T

Full name: Microsoft.FSharp.Core.Operators.failwith
active recognizer Quote: int * string -> (int * string) option

Full name: CsvSchema.Parsing.( |Quote|_| )
active recognizer TillEndQuote: int * string -> String * (int * string)

Full name: CsvSchema.Parsing.( |TillEndQuote| )
val acc : (String * (Type * TypeDetermination.TypeWrapper)) list
active recognizer Name: int * string -> (String * (int * string)) option

Full name: CsvSchema.Parsing.( |Name|_| )
val name : String
active recognizer LP: int * string -> (int * string) option

Full name: CsvSchema.Parsing.( |LP|_| )
active recognizer DataType: int * string -> (Type * TypeDetermination.TypeWrapper) * (int * string)

Full name: CsvSchema.Parsing.( |DataType| )
val dt : Type * TypeDetermination.TypeWrapper
active recognizer RP: int * string -> (int * string) option

Full name: CsvSchema.Parsing.( |RP|_| )
active recognizer Schema: (String * (Type * TypeDetermination.TypeWrapper)) list -> int * string -> (String * (Type * TypeDetermination.TypeWrapper)) list

Full name: CsvSchema.Parsing.( |Schema| )
union case TypeDetermination.TypeWrapper.None: TypeDetermination.TypeWrapper
active recognizer Comma: int * string -> (int * string) option

Full name: CsvSchema.Parsing.( |Comma|_| )
val x : int * string
val failwithf : format:Printf.StringFormat<'T,'Result> -> 'T

Full name: Microsoft.FSharp.Core.ExtraTopLevelOperators.failwithf
val parseSchema : s:string -> (String * (Type * TypeDetermination.TypeWrapper)) list

Full name: CsvSchema.parseSchema
module Parsing

from CsvSchema

More information

Link:http://fssnip.net/te
Posted:5 years ago
Author:Faisal Waris
Tags: csv , typeprovider , parser