2 people like it.

Infer Xml Schemas from Sample Documents

XmlSchemas are used to validate XML documents. Following demonstrates how to infer a schema from sample documents. They are also used to visualize the structure of a class of documents.

 1: 
 2: 
 3: 
 4: 
 5: 
 6: 
 7: 
 8: 
 9: 
10: 
11: 
12: 
13: 
14: 
15: 
16: 
#r "System.Xml.dll";;
open System;;
open System.Linq;;
open System.Xml;;
open System.Xml.Schema;;
 
// open sample XML files, and infer a schema
let samples = [@"file1.xml"; @"file2.xml"; @"fileN.xml" ];;
let inference = new XmlSchemaInference();;
let inferred =
    samples
    |> List.map (fun x -> XmlReader.Create x)
    |> List.fold (fun schema x -> inference.InferSchema(x, schema)) (new XmlSchemaSet());;

Enumerable.Cast<XmlSchema>(inferred.Schemas())
|> Seq.iter (fun schema -> schema.Write(Console.Out));;
namespace System
namespace System.Linq
namespace System.Xml
namespace System.Xml.Schema
val samples : string list

Full name: Script.samples
val inference : XmlSchemaInference

Full name: Script.inference
Multiple items
type XmlSchemaInference =
  new : unit -> XmlSchemaInference
  member InferSchema : instanceDocument:XmlReader -> XmlSchemaSet + 1 overload
  member Occurrence : InferenceOption with get, set
  member TypeInference : InferenceOption with get, set
  nested type InferenceOption

Full name: System.Xml.Schema.XmlSchemaInference

--------------------
XmlSchemaInference() : unit
val inferred : XmlSchemaSet

Full name: Script.inferred
Multiple items
module List

from Microsoft.FSharp.Collections

--------------------
type List<'T> =
  | ( [] )
  | ( :: ) of Head: 'T * Tail: 'T list
  interface IEnumerable
  interface IEnumerable<'T>
  member Head : 'T
  member IsEmpty : bool
  member Item : index:int -> 'T with get
  member Length : int
  member Tail : 'T list
  static member Cons : head:'T * tail:'T list -> 'T list
  static member Empty : 'T list

Full name: Microsoft.FSharp.Collections.List<_>
val map : mapping:('T -> 'U) -> list:'T list -> 'U list

Full name: Microsoft.FSharp.Collections.List.map
val x : string
type XmlReader =
  member AttributeCount : int
  member BaseURI : string
  member CanReadBinaryContent : bool
  member CanReadValueChunk : bool
  member CanResolveEntity : bool
  member Close : unit -> unit
  member Depth : int
  member EOF : bool
  member GetAttribute : name:string -> string + 2 overloads
  member HasAttributes : bool
  ...

Full name: System.Xml.XmlReader
XmlReader.Create(input: IO.TextReader) : XmlReader
   (+0 other overloads)
XmlReader.Create(input: IO.Stream) : XmlReader
   (+0 other overloads)
XmlReader.Create(inputUri: string) : XmlReader
   (+0 other overloads)
XmlReader.Create(reader: XmlReader, settings: XmlReaderSettings) : XmlReader
   (+0 other overloads)
XmlReader.Create(input: IO.TextReader, settings: XmlReaderSettings) : XmlReader
   (+0 other overloads)
XmlReader.Create(input: IO.Stream, settings: XmlReaderSettings) : XmlReader
   (+0 other overloads)
XmlReader.Create(inputUri: string, settings: XmlReaderSettings) : XmlReader
   (+0 other overloads)
XmlReader.Create(input: IO.TextReader, settings: XmlReaderSettings, inputContext: XmlParserContext) : XmlReader
   (+0 other overloads)
XmlReader.Create(input: IO.TextReader, settings: XmlReaderSettings, baseUri: string) : XmlReader
   (+0 other overloads)
XmlReader.Create(input: IO.Stream, settings: XmlReaderSettings, inputContext: XmlParserContext) : XmlReader
   (+0 other overloads)
val fold : folder:('State -> 'T -> 'State) -> state:'State -> list:'T list -> 'State

Full name: Microsoft.FSharp.Collections.List.fold
val schema : XmlSchemaSet
val x : XmlReader
XmlSchemaInference.InferSchema(instanceDocument: XmlReader) : XmlSchemaSet
XmlSchemaInference.InferSchema(instanceDocument: XmlReader, schemas: XmlSchemaSet) : XmlSchemaSet
Multiple items
type XmlSchemaSet =
  new : unit -> XmlSchemaSet + 1 overload
  member Add : schemas:XmlSchemaSet -> unit + 3 overloads
  member CompilationSettings : XmlSchemaCompilationSettings with get, set
  member Compile : unit -> unit
  member Contains : targetNamespace:string -> bool + 1 overload
  member CopyTo : schemas:XmlSchema[] * index:int -> unit
  member Count : int
  member GlobalAttributes : XmlSchemaObjectTable
  member GlobalElements : XmlSchemaObjectTable
  member GlobalTypes : XmlSchemaObjectTable
  ...

Full name: System.Xml.Schema.XmlSchemaSet

--------------------
XmlSchemaSet() : unit
XmlSchemaSet(nameTable: XmlNameTable) : unit
type Enumerable =
  static member Aggregate<'TSource> : source:IEnumerable<'TSource> * func:Func<'TSource, 'TSource, 'TSource> -> 'TSource + 2 overloads
  static member All<'TSource> : source:IEnumerable<'TSource> * predicate:Func<'TSource, bool> -> bool
  static member Any<'TSource> : source:IEnumerable<'TSource> -> bool + 1 overload
  static member AsEnumerable<'TSource> : source:IEnumerable<'TSource> -> IEnumerable<'TSource>
  static member Average : source:IEnumerable<int> -> float + 19 overloads
  static member Cast<'TResult> : source:IEnumerable -> IEnumerable<'TResult>
  static member Concat<'TSource> : first:IEnumerable<'TSource> * second:IEnumerable<'TSource> -> IEnumerable<'TSource>
  static member Contains<'TSource> : source:IEnumerable<'TSource> * value:'TSource -> bool + 1 overload
  static member Count<'TSource> : source:IEnumerable<'TSource> -> int + 1 overload
  static member DefaultIfEmpty<'TSource> : source:IEnumerable<'TSource> -> IEnumerable<'TSource> + 1 overload
  ...

Full name: System.Linq.Enumerable
Enumerable.Cast<'TResult>(source: Collections.IEnumerable) : Collections.Generic.IEnumerable<'TResult>
Multiple items
type XmlSchema =
  inherit XmlSchemaObject
  new : unit -> XmlSchema
  member AttributeFormDefault : XmlSchemaForm with get, set
  member AttributeGroups : XmlSchemaObjectTable
  member Attributes : XmlSchemaObjectTable
  member BlockDefault : XmlSchemaDerivationMethod with get, set
  member Compile : validationEventHandler:ValidationEventHandler -> unit + 1 overload
  member ElementFormDefault : XmlSchemaForm with get, set
  member Elements : XmlSchemaObjectTable
  member FinalDefault : XmlSchemaDerivationMethod with get, set
  member Groups : XmlSchemaObjectTable
  ...

Full name: System.Xml.Schema.XmlSchema

--------------------
type XmlSchemaAttribute =
  inherit XmlSchemaAnnotated
  new : unit -> XmlSchemaAttribute
  member AttributeSchemaType : XmlSchemaSimpleType
  member AttributeType : obj
  member DefaultValue : string with get, set
  member FixedValue : string with get, set
  member Form : XmlSchemaForm with get, set
  member Name : string with get, set
  member QualifiedName : XmlQualifiedName
  member RefName : XmlQualifiedName with get, set
  member SchemaType : XmlSchemaSimpleType with get, set
  ...

Full name: System.Xml.Schema.XmlSchemaAttribute

--------------------
XmlSchema() : unit

--------------------
XmlSchemaAttribute() : unit
XmlSchemaSet.Schemas() : Collections.ICollection
XmlSchemaSet.Schemas(targetNamespace: string) : Collections.ICollection
module Seq

from Microsoft.FSharp.Collections
val iter : action:('T -> unit) -> source:seq<'T> -> unit

Full name: Microsoft.FSharp.Collections.Seq.iter
val schema : XmlSchema
XmlSchema.Write(writer: XmlWriter) : unit
XmlSchema.Write(writer: IO.TextWriter) : unit
XmlSchema.Write(stream: IO.Stream) : unit
XmlSchema.Write(writer: XmlWriter, namespaceManager: XmlNamespaceManager) : unit
XmlSchema.Write(writer: IO.TextWriter, namespaceManager: XmlNamespaceManager) : unit
XmlSchema.Write(stream: IO.Stream, namespaceManager: XmlNamespaceManager) : unit
type Console =
  static member BackgroundColor : ConsoleColor with get, set
  static member Beep : unit -> unit + 1 overload
  static member BufferHeight : int with get, set
  static member BufferWidth : int with get, set
  static member CapsLock : bool
  static member Clear : unit -> unit
  static member CursorLeft : int with get, set
  static member CursorSize : int with get, set
  static member CursorTop : int with get, set
  static member CursorVisible : bool with get, set
  ...

Full name: System.Console
property Console.Out: IO.TextWriter
Raw view Test code New version

More information

Link:http://fssnip.net/mx
Posted:10 years ago
Author:Jonathan Leaver
Tags: infer , xml , schema