module CsvSchema open System module TypeDetermination = type TypeWrapper = None | Nullable | Option //https://github.com/fsharp/FSharp.Data/blob/master/src/Csv/CsvInference.fs let private nameToType = ["int" , (typeof , TypeWrapper.None ) "int64", (typeof , TypeWrapper.None ) "bool", (typeof , TypeWrapper.None ) "float", (typeof , TypeWrapper.None ) "decimal", (typeof , TypeWrapper.None ) "date", (typeof, TypeWrapper.None ) "guid", (typeof , TypeWrapper.None ) "string", (typeof , TypeWrapper.None ) "int?", (typeof , TypeWrapper.Nullable) "int64?", (typeof , TypeWrapper.Nullable) "bool?", (typeof , TypeWrapper.Nullable) "float?", (typeof , TypeWrapper.Nullable) "decimal?", (typeof , TypeWrapper.Nullable) "date?", (typeof, TypeWrapper.Nullable) "guid?", (typeof , TypeWrapper.Nullable) "int option", (typeof , TypeWrapper.Option ) "int64 option", (typeof , TypeWrapper.Option ) "bool option", (typeof , TypeWrapper.Option ) "float option", (typeof , TypeWrapper.Option ) "decimal option", (typeof , TypeWrapper.Option ) "date option", (typeof, TypeWrapper.Option ) "guid option", (typeof , TypeWrapper.Option ) "string option", (typeof , TypeWrapper.Option )] |> dict let determinType s = nameToType.[s] module Parsing = let take i f (s:string) = let rec loop i acc = if i < s.Length then let c = s.[i] if f c then loop (i+1) (c::acc) else String(acc |> List.rev |> List.toArray),(i,s) else String(acc |> List.rev |> List.toArray),(i,s) loop i [] let isTokenChar c = Char.IsLetterOrDigit (c) || c = '_' let isWhitespace = Char.IsWhiteSpace let isNotQuote = fun c -> c <> '"' let (|Char|_|) c (i,s:string) = if i < s.Length then if s.[i] = c then Some(i+1,s) else None else None let (|Comma|_|) = (|Char|_|) ',' let (|QuestionMark|_|) = (|Char|_|) '?' let (|LP|_|) = (|Char|_|) '(' let (|RP|_|) = (|Char|_|) ')' let (|Quote|_|) = (|Char|_|) '"' let (|WS|) (i,s) = s |> take i isWhitespace |> snd let (|TillEndQuote|) (i,s) = s |> take i isNotQuote let (|Eof|_|) (i,s:string) = if i>=s.Length then Some(i) else None let (|Token|_|) = function | Eof _-> None | (i,s) when isTokenChar s.[i] -> s |> take i isTokenChar |> Some | _ -> None let (|Option|_|) = function Token ("option", rest) -> Some rest | _ -> None let (|DataType|) = function | WS (Token (n, WS (QuestionMark rest))) -> TypeDetermination.determinType (n+"?"), rest | WS (Token (n, WS (Option rest))) -> TypeDetermination.determinType (n+" option"), rest | WS (Token (n, rest)) -> TypeDetermination.determinType (n+" option"), rest | _ -> failwith "expected datatype" let (|Name|_|) = function | WS (Quote (TillEndQuote (n, Quote(rest)))) -> Some (n,rest) | WS (Token (n, rest)) -> Some (n,rest) | _ -> None let rec (|Schema|) acc = function | Eof _ -> acc |> List.rev | Name (name, WS (LP ( WS (DataType (dt, RP(rest)))))) -> (|Schema|) ((name,dt)::acc) rest | Name (name, rest) -> (|Schema|) ((name,(typeof,TypeDetermination.TypeWrapper.None))::acc) rest | Comma rest -> (|Schema|) acc rest | WS rest -> (|Schema|) acc rest | x -> failwithf "Unable to parse at %A" x let parseSchema s = Parsing.(|Schema|) [] (0,s) (* [] let scma = """ "f1 field" (int),"f2" (int),"f3","f4" (int),"f5" (date),"f6" (int?),"f7","f8","f9"(int?),"f10", f11 (float option) """ parseSchema scma *)