4 people like it.
Like the snippet!
CSV reader
I've modified the CSV sample from Expert F# to my needs. I don't wann be forced to use the csv schema as defined by column rows. Therefore I've done two major modifications.
1. remove the permutation
2. added a new column name option to the ColumnAttribute
3. added a name to csv index mapping
So basically you now have 3 options.
1. Don't annotate your record at all and use it as POCO. The order of the record fields is mapped directly to the order in the csv. UPDATE: I don't recommend this any more. As of the writing of this snippet I wasn't aware of the fact, that field order isn't guaranted by the reflection mechanism.
2. Use the index option of the ColumnAttribute. Same as before.
3. Use the name option. This is what I've looked for. I've to deal with tons of csv that has more columns I'm interested in. Have a look at the sample usage below.
I've moved the type conversion out of the CsvReader class in order to be easyly expandable with custom type conversation (i.e. for combined column values - denormalized data)
1:
2:
3:
4:
5:
6:
7:
8:
9:
10:
11:
12:
13:
14:
15:
16:
17:
18:
19:
20:
21:
22:
23:
24:
25:
26:
27:
28:
29:
30:
31:
32:
33:
34:
35:
36:
37:
38:
39:
40:
41:
42:
43:
44:
45:
46:
47:
48:
49:
50:
51:
52:
53:
54:
55:
56:
57:
58:
59:
60:
61:
62:
63:
64:
65:
66:
67:
68:
69:
70:
71:
72:
73:
74:
75:
76:
77:
78:
79:
80:
81:
82:
83:
84:
85:
86:
87:
88:
89:
90:
91:
92:
93:
94:
95:
96:
97:
98:
99:
100:
101:
102:
103:
104:
105:
106:
107:
108:
109:
110:
111:
112:
113:
114:
115:
116:
117:
118:
119:
120:
121:
122:
|
module Csv
open System
open System.IO
open System.Reflection
open Microsoft.FSharp.Reflection
type ColumnAttribute(index:int option,name:string option) =
inherit Attribute()
let mutable index = index
let mutable name = name
new () = ColumnAttribute (None, None)
member x.Index
with get() = match index with | Some i -> i | None -> -1
and set value = index <- Some value
member x.Name
with get() = match name with | Some n -> n | None -> ""
and set value = name <- Some value
type CsvReader<'a>(typeConverter:Type -> (string -> obj)) =
let mutable header = Map.empty
let recordType = typeof<'a>
let fields = FSharpType.GetRecordFields(recordType)
let objectBuilder = FSharpValue.PreComputeRecordConstructor(recordType)
let split (delim:char) (line:string) =
line.Split([|delim|]) |> Array.map( fun s -> s.Trim())
member x.CreateRecord(header:Map<string,int>, delim, line) =
let lookupFromHeader (column:ColumnAttribute) =
match column.Name with
| name when name <> String.Empty ->
try
Some header.[name]
with e -> failwithf "no"
| _ -> None
let schema = fields |> Array.mapi( fun fieldIndex field ->
let propertyInfo = recordType.GetProperty(field.Name)
let deserializeColumnData = typeConverter field.PropertyType
let columnIndex =
match propertyInfo.GetCustomAttributes(typeof<ColumnAttribute>,false) with
| [| (:? ColumnAttribute as col) |] ->
match col.Index with
| i when i >= 0 -> i
| _ ->
match lookupFromHeader col with
| Some(i) -> i
| None -> fieldIndex
| _ -> fieldIndex
(fieldIndex, field.Name, columnIndex, deserializeColumnData) )
let fieldContentFromSchema (words:string[]) =
let deserializedData =
schema
|> Array.map( fun (fieldIndex, fieldName, columnIndex, deserializeColumnData) ->
deserializeColumnData words.[columnIndex])
deserializedData
let words = line |> split delim |> fieldContentFromSchema
let convertColumn colText (fieldName, deserializeColumnData) =
try deserializeColumnData colText
with e ->
failwithf "error converting '%s' to field '%s'" colText fieldName
let obj = objectBuilder(words)
unbox<'a>(obj)
member x.ReadFile(file, separator:char, firstLineHasHeader:bool) =
seq {
use textReader = File.OpenText(file)
if firstLineHasHeader then
header <-
textReader.ReadLine()
|> split separator
|> Array.filter (fun name -> not (String.IsNullOrWhiteSpace name))
|> Array.mapi (fun i name -> (name, i))
|> Map.ofArray
while not textReader.EndOfStream do
let line = textReader.ReadLine()
if not (String.IsNullOrEmpty line) then
yield x.CreateRecord(header, separator, line)
}
//Examples:
//the csv-header is mandatory for this case!
type Substance = {
[<Column(Name="subst id")>] Id : int
[<Column(Name="name")>] Name : string
[<Column(Name="sequence")>] Sequence : string
}
// a one-to-one mapping to the column names
type Probe = {
Name : string
Mismatches : int
Feature : string
HitLocation : string
Strain : string
}
//0 based index mapping
type ProbeAlt = {
[<Column(Index=4)>]Strain : string
[<Column(Index=0)>]Name : string
}
//read the csv
let typeConverter _type =
match _type with
| t when t = typeof<float> -> (System.Double.Parse >> box)
| t when t = typeof<int> -> (System.Int32.Parse >> box)
| t when t = typeof<string> -> (fun(s:string) -> box s)
| t when t = typeof<bool> -> (System.Boolean.Parse >> box)
| t -> failwithf "Unknown type %A" t
let path = "" //....
let reader = new CsvReader<Probe>(typeConverter)
let hasHeader = true
let separator = '\t'
let probes = reader.ReadFile(path, separator, hasHeader)
|
module Csv
namespace System
namespace System.IO
namespace System.Reflection
namespace Microsoft
namespace Microsoft.FSharp
namespace Microsoft.FSharp.Reflection
Multiple items
type ColumnAttribute =
inherit Attribute
new : unit -> ColumnAttribute
new : index:int option * name:string option -> ColumnAttribute
member Index : int
member Name : string
member Index : int with set
member Name : string with set
Full name: Csv.ColumnAttribute
--------------------
new : unit -> ColumnAttribute
new : index:int option * name:string option -> ColumnAttribute
val index : int option
Multiple items
val int : value:'T -> int (requires member op_Explicit)
Full name: Microsoft.FSharp.Core.Operators.int
--------------------
type int = int32
Full name: Microsoft.FSharp.Core.int
--------------------
type int<'Measure> = int
Full name: Microsoft.FSharp.Core.int<_>
type 'T option = Option<'T>
Full name: Microsoft.FSharp.Core.option<_>
val name : string option
Multiple items
val string : value:'T -> string
Full name: Microsoft.FSharp.Core.Operators.string
--------------------
type string = String
Full name: Microsoft.FSharp.Core.string
Multiple items
type Attribute =
member Equals : obj:obj -> bool
member GetHashCode : unit -> int
member IsDefaultAttribute : unit -> bool
member Match : obj:obj -> bool
member TypeId : obj
static member GetCustomAttribute : element:MemberInfo * attributeType:Type -> Attribute + 7 overloads
static member GetCustomAttributes : element:MemberInfo -> Attribute[] + 15 overloads
static member IsDefined : element:MemberInfo * attributeType:Type -> bool + 7 overloads
Full name: System.Attribute
--------------------
Attribute() : unit
val mutable index : int option
val mutable name : string option
union case Option.None: Option<'T>
val x : ColumnAttribute
member ColumnAttribute.Index : int with set
Full name: Csv.ColumnAttribute.Index
union case Option.Some: Value: 'T -> Option<'T>
val i : int
val set : elements:seq<'T> -> Set<'T> (requires comparison)
Full name: Microsoft.FSharp.Core.ExtraTopLevelOperators.set
val value : int
member ColumnAttribute.Name : string with set
Full name: Csv.ColumnAttribute.Name
val n : string
val value : string
Multiple items
type CsvReader<'a> =
new : typeConverter:(Type -> string -> obj) -> CsvReader<'a>
member CreateRecord : header:Map<string,int> * delim:char * line:string -> 'a
member ReadFile : file:string * separator:char * firstLineHasHeader:bool -> seq<'a>
Full name: Csv.CsvReader<_>
--------------------
new : typeConverter:(Type -> string -> obj) -> CsvReader<'a>
val typeConverter : (Type -> string -> obj)
type Type =
inherit MemberInfo
member Assembly : Assembly
member AssemblyQualifiedName : string
member Attributes : TypeAttributes
member BaseType : Type
member ContainsGenericParameters : bool
member DeclaringMethod : MethodBase
member DeclaringType : Type
member Equals : o:obj -> bool + 1 overload
member FindInterfaces : filter:TypeFilter * filterCriteria:obj -> Type[]
member FindMembers : memberType:MemberTypes * bindingAttr:BindingFlags * filter:MemberFilter * filterCriteria:obj -> MemberInfo[]
...
Full name: System.Type
type obj = Object
Full name: Microsoft.FSharp.Core.obj
val mutable header : Map<string,int>
Multiple items
module Map
from Microsoft.FSharp.Collections
--------------------
type Map<'Key,'Value (requires comparison)> =
interface IEnumerable
interface IComparable
interface IEnumerable<KeyValuePair<'Key,'Value>>
interface ICollection<KeyValuePair<'Key,'Value>>
interface IDictionary<'Key,'Value>
new : elements:seq<'Key * 'Value> -> Map<'Key,'Value>
member Add : key:'Key * value:'Value -> Map<'Key,'Value>
member ContainsKey : key:'Key -> bool
override Equals : obj -> bool
member Remove : key:'Key -> Map<'Key,'Value>
...
Full name: Microsoft.FSharp.Collections.Map<_,_>
--------------------
new : elements:seq<'Key * 'Value> -> Map<'Key,'Value>
val empty<'Key,'T (requires comparison)> : Map<'Key,'T> (requires comparison)
Full name: Microsoft.FSharp.Collections.Map.empty
val recordType : Type
val typeof<'T> : Type
Full name: Microsoft.FSharp.Core.Operators.typeof
val fields : PropertyInfo []
type FSharpType =
static member GetExceptionFields : exceptionType:Type * ?bindingFlags:BindingFlags -> PropertyInfo []
static member GetFunctionElements : functionType:Type -> Type * Type
static member GetRecordFields : recordType:Type * ?bindingFlags:BindingFlags -> PropertyInfo []
static member GetTupleElements : tupleType:Type -> Type []
static member GetUnionCases : unionType:Type * ?bindingFlags:BindingFlags -> UnionCaseInfo []
static member IsExceptionRepresentation : exceptionType:Type * ?bindingFlags:BindingFlags -> bool
static member IsFunction : typ:Type -> bool
static member IsModule : typ:Type -> bool
static member IsRecord : typ:Type * ?bindingFlags:BindingFlags -> bool
static member IsTuple : typ:Type -> bool
...
Full name: Microsoft.FSharp.Reflection.FSharpType
static member FSharpType.GetRecordFields : recordType:Type * ?allowAccessToPrivateRepresentation:bool -> PropertyInfo []
static member FSharpType.GetRecordFields : recordType:Type * ?bindingFlags:BindingFlags -> PropertyInfo []
val objectBuilder : (obj [] -> obj)
type FSharpValue =
static member GetExceptionFields : exn:obj * ?bindingFlags:BindingFlags -> obj []
static member GetRecordField : record:obj * info:PropertyInfo -> obj
static member GetRecordFields : record:obj * ?bindingFlags:BindingFlags -> obj []
static member GetTupleField : tuple:obj * index:int -> obj
static member GetTupleFields : tuple:obj -> obj []
static member GetUnionFields : value:obj * unionType:Type * ?bindingFlags:BindingFlags -> UnionCaseInfo * obj []
static member MakeFunction : functionType:Type * implementation:(obj -> obj) -> obj
static member MakeRecord : recordType:Type * values:obj [] * ?bindingFlags:BindingFlags -> obj
static member MakeTuple : tupleElements:obj [] * tupleType:Type -> obj
static member MakeUnion : unionCase:UnionCaseInfo * args:obj [] * ?bindingFlags:BindingFlags -> obj
...
Full name: Microsoft.FSharp.Reflection.FSharpValue
static member FSharpValue.PreComputeRecordConstructor : recordType:Type * ?allowAccessToPrivateRepresentation:bool -> (obj [] -> obj)
static member FSharpValue.PreComputeRecordConstructor : recordType:Type * ?bindingFlags:BindingFlags -> (obj [] -> obj)
val split : (char -> string -> string [])
val delim : char
Multiple items
val char : value:'T -> char (requires member op_Explicit)
Full name: Microsoft.FSharp.Core.Operators.char
--------------------
type char = Char
Full name: Microsoft.FSharp.Core.char
val line : string
String.Split([<ParamArray>] separator: char []) : string []
String.Split(separator: string [], options: StringSplitOptions) : string []
String.Split(separator: char [], options: StringSplitOptions) : string []
String.Split(separator: char [], count: int) : string []
String.Split(separator: string [], count: int, options: StringSplitOptions) : string []
String.Split(separator: char [], count: int, options: StringSplitOptions) : string []
type Array =
member Clone : unit -> obj
member CopyTo : array:Array * index:int -> unit + 1 overload
member GetEnumerator : unit -> IEnumerator
member GetLength : dimension:int -> int
member GetLongLength : dimension:int -> int64
member GetLowerBound : dimension:int -> int
member GetUpperBound : dimension:int -> int
member GetValue : [<ParamArray>] indices:int[] -> obj + 7 overloads
member Initialize : unit -> unit
member IsFixedSize : bool
...
Full name: System.Array
val map : mapping:('T -> 'U) -> array:'T [] -> 'U []
Full name: Microsoft.FSharp.Collections.Array.map
val s : string
String.Trim() : string
String.Trim([<ParamArray>] trimChars: char []) : string
val x : CsvReader<'a>
member CsvReader.CreateRecord : header:Map<string,int> * delim:char * line:string -> 'a
Full name: Csv.CsvReader`1.CreateRecord
val header : Map<string,int>
val lookupFromHeader : (ColumnAttribute -> int option)
val column : ColumnAttribute
property ColumnAttribute.Name: string
val name : string
Multiple items
type String =
new : value:char -> string + 7 overloads
member Chars : int -> char
member Clone : unit -> obj
member CompareTo : value:obj -> int + 1 overload
member Contains : value:string -> bool
member CopyTo : sourceIndex:int * destination:char[] * destinationIndex:int * count:int -> unit
member EndsWith : value:string -> bool + 2 overloads
member Equals : obj:obj -> bool + 2 overloads
member GetEnumerator : unit -> CharEnumerator
member GetHashCode : unit -> int
...
Full name: System.String
--------------------
String(value: nativeptr<char>) : unit
String(value: nativeptr<sbyte>) : unit
String(value: char []) : unit
String(c: char, count: int) : unit
String(value: nativeptr<char>, startIndex: int, length: int) : unit
String(value: nativeptr<sbyte>, startIndex: int, length: int) : unit
String(value: char [], startIndex: int, length: int) : unit
String(value: nativeptr<sbyte>, startIndex: int, length: int, enc: Text.Encoding) : unit
field string.Empty
val e : exn
val failwithf : format:Printf.StringFormat<'T,'Result> -> 'T
Full name: Microsoft.FSharp.Core.ExtraTopLevelOperators.failwithf
val schema : (int * string * int * (string -> obj)) []
val mapi : mapping:(int -> 'T -> 'U) -> array:'T [] -> 'U []
Full name: Microsoft.FSharp.Collections.Array.mapi
val fieldIndex : int
val field : PropertyInfo
val propertyInfo : PropertyInfo
Type.GetProperty(name: string) : PropertyInfo
Type.GetProperty(name: string, returnType: Type) : PropertyInfo
Type.GetProperty(name: string, types: Type []) : PropertyInfo
Type.GetProperty(name: string, bindingAttr: BindingFlags) : PropertyInfo
Type.GetProperty(name: string, returnType: Type, types: Type []) : PropertyInfo
Type.GetProperty(name: string, returnType: Type, types: Type [], modifiers: ParameterModifier []) : PropertyInfo
Type.GetProperty(name: string, bindingAttr: BindingFlags, binder: Binder, returnType: Type, types: Type [], modifiers: ParameterModifier []) : PropertyInfo
property MemberInfo.Name: string
val deserializeColumnData : (string -> obj)
property PropertyInfo.PropertyType: Type
val columnIndex : int
MemberInfo.GetCustomAttributes(inherit: bool) : obj []
MemberInfo.GetCustomAttributes(attributeType: Type, inherit: bool) : obj []
val col : ColumnAttribute
property ColumnAttribute.Index: int
val fieldContentFromSchema : (string [] -> obj [])
val words : string []
val deserializedData : obj []
val fieldName : string
val words : obj []
val convertColumn : (string -> string * (string -> 'b) -> 'b)
val colText : string
val deserializeColumnData : (string -> 'b)
Multiple items
val obj : obj
--------------------
type obj = Object
Full name: Microsoft.FSharp.Core.obj
val unbox : value:obj -> 'T
Full name: Microsoft.FSharp.Core.Operators.unbox
member CsvReader.ReadFile : file:string * separator:char * firstLineHasHeader:bool -> seq<'a>
Full name: Csv.CsvReader`1.ReadFile
val file : string
val separator : char
val firstLineHasHeader : bool
type bool = Boolean
Full name: Microsoft.FSharp.Core.bool
Multiple items
val seq : sequence:seq<'T> -> seq<'T>
Full name: Microsoft.FSharp.Core.Operators.seq
--------------------
type seq<'T> = Collections.Generic.IEnumerable<'T>
Full name: Microsoft.FSharp.Collections.seq<_>
val textReader : StreamReader
type File =
static member AppendAllLines : path:string * contents:IEnumerable<string> -> unit + 1 overload
static member AppendAllText : path:string * contents:string -> unit + 1 overload
static member AppendText : path:string -> StreamWriter
static member Copy : sourceFileName:string * destFileName:string -> unit + 1 overload
static member Create : path:string -> FileStream + 3 overloads
static member CreateText : path:string -> StreamWriter
static member Decrypt : path:string -> unit
static member Delete : path:string -> unit
static member Encrypt : path:string -> unit
static member Exists : path:string -> bool
...
Full name: System.IO.File
File.OpenText(path: string) : StreamReader
StreamReader.ReadLine() : string
val filter : predicate:('T -> bool) -> array:'T [] -> 'T []
Full name: Microsoft.FSharp.Collections.Array.filter
val not : value:bool -> bool
Full name: Microsoft.FSharp.Core.Operators.not
String.IsNullOrWhiteSpace(value: string) : bool
val ofArray : elements:('Key * 'T) [] -> Map<'Key,'T> (requires comparison)
Full name: Microsoft.FSharp.Collections.Map.ofArray
property StreamReader.EndOfStream: bool
String.IsNullOrEmpty(value: string) : bool
member CsvReader.CreateRecord : header:Map<string,int> * delim:char * line:string -> 'a
type Substance =
{Id: int;
Name: string;
Sequence: string;}
Full name: Csv.Substance
Substance.Id: int
Substance.Name: string
Substance.Sequence: string
type Probe =
{Name: string;
Mismatches: int;
Feature: string;
HitLocation: string;
Strain: string;}
Full name: Csv.Probe
Probe.Name: string
Probe.Mismatches: int
Probe.Feature: string
Probe.HitLocation: string
Probe.Strain: string
type ProbeAlt =
{Strain: string;
Name: string;}
Full name: Csv.ProbeAlt
ProbeAlt.Strain: string
ProbeAlt.Name: string
val typeConverter : _type:Type -> (string -> obj)
Full name: Csv.typeConverter
val _type : Type
val t : Type
Multiple items
val float : value:'T -> float (requires member op_Explicit)
Full name: Microsoft.FSharp.Core.Operators.float
--------------------
type float = Double
Full name: Microsoft.FSharp.Core.float
--------------------
type float<'Measure> = float
Full name: Microsoft.FSharp.Core.float<_>
type Double =
struct
member CompareTo : value:obj -> int + 1 overload
member Equals : obj:obj -> bool + 1 overload
member GetHashCode : unit -> int
member GetTypeCode : unit -> TypeCode
member ToString : unit -> string + 3 overloads
static val MinValue : float
static val MaxValue : float
static val Epsilon : float
static val NegativeInfinity : float
static val PositiveInfinity : float
...
end
Full name: System.Double
Double.Parse(s: string) : float
Double.Parse(s: string, provider: IFormatProvider) : float
Double.Parse(s: string, style: Globalization.NumberStyles) : float
Double.Parse(s: string, style: Globalization.NumberStyles, provider: IFormatProvider) : float
val box : value:'T -> obj
Full name: Microsoft.FSharp.Core.Operators.box
type Int32 =
struct
member CompareTo : value:obj -> int + 1 overload
member Equals : obj:obj -> bool + 1 overload
member GetHashCode : unit -> int
member GetTypeCode : unit -> TypeCode
member ToString : unit -> string + 3 overloads
static val MaxValue : int
static val MinValue : int
static member Parse : s:string -> int + 3 overloads
static member TryParse : s:string * result:int -> bool + 1 overload
end
Full name: System.Int32
Int32.Parse(s: string) : int
Int32.Parse(s: string, provider: IFormatProvider) : int
Int32.Parse(s: string, style: Globalization.NumberStyles) : int
Int32.Parse(s: string, style: Globalization.NumberStyles, provider: IFormatProvider) : int
type Boolean =
struct
member CompareTo : obj:obj -> int + 1 overload
member Equals : obj:obj -> bool + 1 overload
member GetHashCode : unit -> int
member GetTypeCode : unit -> TypeCode
member ToString : unit -> string + 1 overload
static val TrueString : string
static val FalseString : string
static member Parse : value:string -> bool
static member TryParse : value:string * result:bool -> bool
end
Full name: System.Boolean
Boolean.Parse(value: string) : bool
val path : string
Full name: Csv.path
val reader : CsvReader<Probe>
Full name: Csv.reader
val hasHeader : bool
Full name: Csv.hasHeader
val separator : char
Full name: Csv.separator
val probes : seq<Probe>
Full name: Csv.probes
member CsvReader.ReadFile : file:string * separator:char * firstLineHasHeader:bool -> seq<'a>
More information