10 people like it.

Structural XML reader

A simple library that reads XML documents into user-defined F# types. The user defines a set of discriminated unions that model the elements of the file and a library automatically creates these types from a XML file.

Implementation of the structural XML parser

 1: 
 2: 
 3: 
 4: 
 5: 
 6: 
 7: 
 8: 
 9: 
10: 
11: 
12: 
13: 
14: 
15: 
16: 
17: 
18: 
19: 
20: 
21: 
22: 
23: 
24: 
25: 
26: 
27: 
28: 
29: 
30: 
31: 
32: 
33: 
34: 
35: 
36: 
37: 
38: 
39: 
40: 
41: 
42: 
43: 
44: 
45: 
46: 
47: 
48: 
49: 
50: 
51: 
52: 
53: 
54: 
55: 
56: 
57: 
58: 
59: 
60: 
61: 
62: 
63: 
64: 
65: 
66: 
67: 
68: 
69: 
70: 
71: 
72: 
73: 
74: 
75: 
76: 
77: 
78: 
79: 
80: 
81: 
82: 
83: 
84: 
85: 
86: 
// Simple API for working with XML based on matching data 
// to a structure defined using F# discriminated unions
#r "System.Xml.Linq.dll"

open System
open System.Net
open System.Xml.Linq

open Microsoft.FSharp.Reflection

module Internal = 
  // Type to hold a generic method (for easy access using reflection)
  type Helper = 
    static member CastList<'T>(input:System.Collections.IEnumerable) =
      input |> Seq.cast<'T> |> List.ofSeq  

  /// Converts any 'IEnumerable' to a list 'list<#type>' where
  /// type is specified as an argument System.Type
  let castList typ input = 
    let mi = typeof<Helper>.GetMethod("CastList").MakeGenericMethod [| typ |]
    mi.Invoke(null, [| input |])


/// Provides an easy access to XML data
type StructuralXml<'T> private (url:string, ns, lowerCase) = 

  /// A name resolver that turns member name into XName
  /// depending on the class configuration (namespace, lowerCase flag)
  let resolveName (str:string) = 
    let str = if lowerCase then str.ToLower() else str
    match ns with 
    | Some(ns) -> XName.Get(str, ns)
    | _ -> XName.Get(str)
    
  let rec parseType (element:XContainer) (targetType:System.Type) = 

    // Determine information about the target type
    // If it is list, the 'typ' is the element type.
    let isList, typ = 
      if targetType.IsGenericType && 
         targetType.GetGenericTypeDefinition() = typedefof<_ list> then 
        true, targetType.GetGenericArguments().[0]
      else false, targetType
  
    if typ = typeof<string> then
      // When target is 'string', get the XElement's body
      box (element :?> XElement).Value
    elif not(FSharpType.IsUnion(typ)) then 
      // When it's not a discriminated union, then that's error
      failwithf "Expected discriminated union!\nGot: %s" typ.Name
    else
      // For every union case, find all children matching the case name
      let children =
        [ for case in FSharpType.GetUnionCases(typ) do
            let fields = case.GetFields()
            let children = element.Elements(resolveName case.Name)
            for ch in children do 
              // Recursively parse children and match them to the required type
              let args = [| for field in fields -> parseType ch field.PropertyType |]
              yield FSharpValue.MakeUnion(case, args) ]
      
      // If the result is list, convert it to the right type.
      // If it's not a list, return the child as object.
      match isList, children with
      | true, children -> Internal.castList typ children
      | false, [child] -> child
      | false, _ -> 
          // When expected type is non-list, but we find
          // multiple children, that's an error...
          failwithf 
            "Wrong number of children in node (%d).\nWhen formatting XML as '%s'." 
            children.Length typ.Name


  // Parse the document & store it in a local field
  let root : 'T = (parseType (XDocument.Load(url)) typeof<'T>) :?> 'T

  /// Returns the parsed XML data structure as a value of the user-specified type
  member x.Root = root

  /// Load XML data from the specified URI and dynamically match them
  /// to a structure described by the discriminated union 'T. Optional
  /// arguments can be used to specify default XML namespace and to 
  /// specify that case names should be treated as lower case.
  static member Load<'T>(url, ?Namespace, ?LowerCase) = 
    new StructuralXml<'T>(url, Namespace, defaultArg LowerCase false)

Sample usage - parsing XML

 1: 
 2: 
 3: 
 4: 
 5: 
 6: 
 7: 
 8: 
 9: 
10: 
11: 
12: 
13: 
14: 
15: 
16: 
17: 
18: 
19: 
20: 
21: 
// Modelling RSS feeds - the following types define the structure of the
// expected XML file. The StructuralXml parser can automatically read
// XML file into a structure formed by these types.
type Title = Title of string
type Link = Link of string
type Description = Description of string

type Item = Item of Title * Link * Description
type Channel = Channel of Title * Link * Description * list<Item>
type Rss = Rss of Channel

// Load data and specify that names in XML are all lowercase
let url = "http://feeds.guardian.co.uk/theguardian/world/rss"
let doc : StructuralXml<Rss> = 
  StructuralXml.Load(url, LowerCase = true)

// Match the data against a type modeling the RSS feed structure
let (Rss(channel)) = doc.Root
let (Channel(_, _, _, items)) = channel
for (Item(Title t, _, _)) in items do
  printfn "%s" t
namespace System
namespace System.Net
namespace System.Xml
namespace System.Xml.Linq
namespace Microsoft
namespace Microsoft.FSharp
namespace Microsoft.FSharp.Reflection
type Helper =
  static member CastList : input:IEnumerable -> 'T list

Full name: Script.Internal.Helper
static member Helper.CastList : input:Collections.IEnumerable -> 'T list

Full name: Script.Internal.Helper.CastList
val input : Collections.IEnumerable
namespace System.Collections
type IEnumerable =
  member GetEnumerator : unit -> IEnumerator

Full name: System.Collections.IEnumerable
module Seq

from Microsoft.FSharp.Collections
val cast : source:Collections.IEnumerable -> seq<'T>

Full name: Microsoft.FSharp.Collections.Seq.cast
Multiple items
module List

from Microsoft.FSharp.Collections

--------------------
type List<'T> =
  | ( [] )
  | ( :: ) of Head: 'T * Tail: 'T list
  interface IEnumerable
  interface IEnumerable<'T>
  member Head : 'T
  member IsEmpty : bool
  member Item : index:int -> 'T with get
  member Length : int
  member Tail : 'T list
  static member Cons : head:'T * tail:'T list -> 'T list
  static member Empty : 'T list

Full name: Microsoft.FSharp.Collections.List<_>
val ofSeq : source:seq<'T> -> 'T list

Full name: Microsoft.FSharp.Collections.List.ofSeq
val castList : typ:Type -> input:'a -> obj

Full name: Script.Internal.castList


 Converts any 'IEnumerable' to a list 'list<#type>' where
 type is specified as an argument System.Type
val typ : Type
val input : 'a
val mi : Reflection.MethodInfo
val typeof<'T> : Type

Full name: Microsoft.FSharp.Core.Operators.typeof
Reflection.MethodBase.Invoke(obj: obj, parameters: obj []) : obj
Reflection.MethodBase.Invoke(obj: obj, invokeAttr: Reflection.BindingFlags, binder: Reflection.Binder, parameters: obj [], culture: Globalization.CultureInfo) : obj
Multiple items
type StructuralXml<'T> =
  private new : url:string * ns:string option * lowerCase:bool -> StructuralXml<'T>
  member Root : 'T
  static member Load : url:string * ?Namespace:string * ?LowerCase:bool -> StructuralXml<'T>

Full name: Script.StructuralXml<_>


 Provides an easy access to XML data


--------------------
private new : url:string * ns:string option * lowerCase:bool -> StructuralXml<'T>
val url : string
Multiple items
val string : value:'T -> string

Full name: Microsoft.FSharp.Core.Operators.string

--------------------
type string = String

Full name: Microsoft.FSharp.Core.string
val ns : string option
val lowerCase : bool
val resolveName : (string -> XName)


 A name resolver that turns member name into XName
 depending on the class configuration (namespace, lowerCase flag)
val str : string
String.ToLower() : string
String.ToLower(culture: Globalization.CultureInfo) : string
union case Option.Some: Value: 'T -> Option<'T>
val ns : string
type XName =
  member Equals : obj:obj -> bool
  member GetHashCode : unit -> int
  member LocalName : string
  member Namespace : XNamespace
  member NamespaceName : string
  member ToString : unit -> string
  static member Get : expandedName:string -> XName + 1 overload

Full name: System.Xml.Linq.XName
XName.Get(expandedName: string) : XName
XName.Get(localName: string, namespaceName: string) : XName
val parseType : (XContainer -> Type -> obj)
val element : XContainer
type XContainer =
  inherit XNode
  member Add : content:obj -> unit + 1 overload
  member AddFirst : content:obj -> unit + 1 overload
  member CreateWriter : unit -> XmlWriter
  member DescendantNodes : unit -> IEnumerable<XNode>
  member Descendants : unit -> IEnumerable<XElement> + 1 overload
  member Element : name:XName -> XElement
  member Elements : unit -> IEnumerable<XElement> + 1 overload
  member FirstNode : XNode
  member LastNode : XNode
  member Nodes : unit -> IEnumerable<XNode>
  ...

Full name: System.Xml.Linq.XContainer
val targetType : Type
type Type =
  inherit MemberInfo
  member Assembly : Assembly
  member AssemblyQualifiedName : string
  member Attributes : TypeAttributes
  member BaseType : Type
  member ContainsGenericParameters : bool
  member DeclaringMethod : MethodBase
  member DeclaringType : Type
  member Equals : o:obj -> bool + 1 overload
  member FindInterfaces : filter:TypeFilter * filterCriteria:obj -> Type[]
  member FindMembers : memberType:MemberTypes * bindingAttr:BindingFlags * filter:MemberFilter * filterCriteria:obj -> MemberInfo[]
  ...

Full name: System.Type
val isList : bool
property Type.IsGenericType: bool
Type.GetGenericTypeDefinition() : Type
val typedefof<'T> : Type

Full name: Microsoft.FSharp.Core.Operators.typedefof
type 'T list = List<'T>

Full name: Microsoft.FSharp.Collections.list<_>
Type.GetGenericArguments() : Type []
val box : value:'T -> obj

Full name: Microsoft.FSharp.Core.Operators.box
Multiple items
type XElement =
  inherit XContainer
  new : name:XName -> XElement + 4 overloads
  member AncestorsAndSelf : unit -> IEnumerable<XElement> + 1 overload
  member Attribute : name:XName -> XAttribute
  member Attributes : unit -> IEnumerable<XAttribute> + 1 overload
  member DescendantNodesAndSelf : unit -> IEnumerable<XNode>
  member DescendantsAndSelf : unit -> IEnumerable<XElement> + 1 overload
  member FirstAttribute : XAttribute
  member GetDefaultNamespace : unit -> XNamespace
  member GetNamespaceOfPrefix : prefix:string -> XNamespace
  member GetPrefixOfNamespace : ns:XNamespace -> string
  ...

Full name: System.Xml.Linq.XElement

--------------------
XElement(name: XName) : unit
XElement(other: XElement) : unit
XElement(other: XStreamingElement) : unit
XElement(name: XName, content: obj) : unit
XElement(name: XName, [<ParamArray>] content: obj []) : unit
val not : value:bool -> bool

Full name: Microsoft.FSharp.Core.Operators.not
type FSharpType =
  static member GetExceptionFields : exceptionType:Type * ?bindingFlags:BindingFlags -> PropertyInfo []
  static member GetFunctionElements : functionType:Type -> Type * Type
  static member GetRecordFields : recordType:Type * ?bindingFlags:BindingFlags -> PropertyInfo []
  static member GetTupleElements : tupleType:Type -> Type []
  static member GetUnionCases : unionType:Type * ?bindingFlags:BindingFlags -> UnionCaseInfo []
  static member IsExceptionRepresentation : exceptionType:Type * ?bindingFlags:BindingFlags -> bool
  static member IsFunction : typ:Type -> bool
  static member IsModule : typ:Type -> bool
  static member IsRecord : typ:Type * ?bindingFlags:BindingFlags -> bool
  static member IsTuple : typ:Type -> bool
  ...

Full name: Microsoft.FSharp.Reflection.FSharpType
static member FSharpType.IsUnion : typ:Type * ?allowAccessToPrivateRepresentation:bool -> bool
static member FSharpType.IsUnion : typ:Type * ?bindingFlags:Reflection.BindingFlags -> bool
val failwithf : format:Printf.StringFormat<'T,'Result> -> 'T

Full name: Microsoft.FSharp.Core.ExtraTopLevelOperators.failwithf
property Reflection.MemberInfo.Name: string
val children : obj list
val case : UnionCaseInfo
static member FSharpType.GetUnionCases : unionType:Type * ?allowAccessToPrivateRepresentation:bool -> UnionCaseInfo []
static member FSharpType.GetUnionCases : unionType:Type * ?bindingFlags:Reflection.BindingFlags -> UnionCaseInfo []
val fields : Reflection.PropertyInfo []
member UnionCaseInfo.GetFields : unit -> Reflection.PropertyInfo []
val children : Collections.Generic.IEnumerable<XElement>
XContainer.Elements() : Collections.Generic.IEnumerable<XElement>
XContainer.Elements(name: XName) : Collections.Generic.IEnumerable<XElement>
property UnionCaseInfo.Name: string
val ch : XElement
val args : obj []
val field : Reflection.PropertyInfo
property Reflection.PropertyInfo.PropertyType: Type
type FSharpValue =
  static member GetExceptionFields : exn:obj * ?bindingFlags:BindingFlags -> obj []
  static member GetRecordField : record:obj * info:PropertyInfo -> obj
  static member GetRecordFields : record:obj * ?bindingFlags:BindingFlags -> obj []
  static member GetTupleField : tuple:obj * index:int -> obj
  static member GetTupleFields : tuple:obj -> obj []
  static member GetUnionFields : value:obj * unionType:Type * ?bindingFlags:BindingFlags -> UnionCaseInfo * obj []
  static member MakeFunction : functionType:Type * implementation:(obj -> obj) -> obj
  static member MakeRecord : recordType:Type * values:obj [] * ?bindingFlags:BindingFlags -> obj
  static member MakeTuple : tupleElements:obj [] * tupleType:Type -> obj
  static member MakeUnion : unionCase:UnionCaseInfo * args:obj [] * ?bindingFlags:BindingFlags -> obj
  ...

Full name: Microsoft.FSharp.Reflection.FSharpValue
static member FSharpValue.MakeUnion : unionCase:UnionCaseInfo * args:obj [] * ?allowAccessToPrivateRepresentation:bool -> obj
static member FSharpValue.MakeUnion : unionCase:UnionCaseInfo * args:obj [] * ?bindingFlags:Reflection.BindingFlags -> obj
module Internal

from Script
val child : obj
property List.Length: int
val root : 'T
Multiple items
type XDocument =
  inherit XContainer
  new : unit -> XDocument + 3 overloads
  member Declaration : XDeclaration with get, set
  member DocumentType : XDocumentType
  member NodeType : XmlNodeType
  member Root : XElement
  member Save : fileName:string -> unit + 6 overloads
  member WriteTo : writer:XmlWriter -> unit
  static member Load : uri:string -> XDocument + 7 overloads
  static member Parse : text:string -> XDocument + 1 overload

Full name: System.Xml.Linq.XDocument

--------------------
XDocument() : unit
XDocument([<ParamArray>] content: obj []) : unit
XDocument(other: XDocument) : unit
XDocument(declaration: XDeclaration, [<ParamArray>] content: obj []) : unit
XDocument.Load(reader: Xml.XmlReader) : XDocument
XDocument.Load(textReader: IO.TextReader) : XDocument
XDocument.Load(stream: IO.Stream) : XDocument
XDocument.Load(uri: string) : XDocument
XDocument.Load(reader: Xml.XmlReader, options: LoadOptions) : XDocument
XDocument.Load(textReader: IO.TextReader, options: LoadOptions) : XDocument
XDocument.Load(stream: IO.Stream, options: LoadOptions) : XDocument
XDocument.Load(uri: string, options: LoadOptions) : XDocument
val x : StructuralXml<'T>
member StructuralXml.Root : 'T

Full name: Script.StructuralXml`1.Root


 Returns the parsed XML data structure as a value of the user-specified type
static member StructuralXml.Load : url:string * ?Namespace:string * ?LowerCase:bool -> StructuralXml<'T>

Full name: Script.StructuralXml`1.Load


 Load XML data from the specified URI and dynamically match them
 to a structure described by the discriminated union 'T. Optional
 arguments can be used to specify default XML namespace and to
 specify that case names should be treated as lower case.
val Namespace : string option
val LowerCase : bool option
val defaultArg : arg:'T option -> defaultValue:'T -> 'T

Full name: Microsoft.FSharp.Core.Operators.defaultArg
Multiple items
union case Title.Title: string -> Title

--------------------
type Title = | Title of string

Full name: Script.Title
Multiple items
union case Link.Link: string -> Link

--------------------
type Link = | Link of string

Full name: Script.Link
Multiple items
union case Description.Description: string -> Description

--------------------
type Description = | Description of string

Full name: Script.Description
Multiple items
union case Item.Item: Title * Link * Description -> Item

--------------------
type Item = | Item of Title * Link * Description

Full name: Script.Item
Multiple items
union case Channel.Channel: Title * Link * Description * Item list -> Channel

--------------------
type Channel = | Channel of Title * Link * Description * Item list

Full name: Script.Channel
Multiple items
union case Rss.Rss: Channel -> Rss

--------------------
type Rss = | Rss of Channel

Full name: Script.Rss
val url : string

Full name: Script.url
val doc : StructuralXml<Rss>

Full name: Script.doc
type StructuralXml<'T> =
  private new : url:string * ns:string option * lowerCase:bool -> StructuralXml<'T>
  member Root : 'T
  static member Load : url:string * ?Namespace:string * ?LowerCase:bool -> StructuralXml<'T>

Full name: Script.StructuralXml<_>


 Provides an easy access to XML data
static member StructuralXml.Load : url:string * ?Namespace:string * ?LowerCase:bool -> StructuralXml<'T>


 Load XML data from the specified URI and dynamically match them
 to a structure described by the discriminated union 'T. Optional
 arguments can be used to specify default XML namespace and to
 specify that case names should be treated as lower case.
val channel : Channel

Full name: Script.channel
property StructuralXml.Root: Rss


 Returns the parsed XML data structure as a value of the user-specified type
val items : Item list

Full name: Script.items
val t : string
val printfn : format:Printf.TextWriterFormat<'T> -> 'T

Full name: Microsoft.FSharp.Core.ExtraTopLevelOperators.printfn

More information

Link:http://fssnip.net/jk
Posted:10 years ago
Author:Tomas Petricek
Tags: xml , rss , type