11 people like it.
Like the snippet!
Lazy Xml
A Lazy Xml structure for processing large xml documents.
1:
2:
3:
4:
5:
6:
7:
8:
9:
10:
11:
12:
13:
14:
15:
16:
17:
18:
19:
20:
21:
22:
23:
24:
25:
26:
27:
28:
29:
30:
31:
32:
33:
34:
35:
36:
37:
38:
39:
40:
41:
42:
43:
44:
45:
46:
47:
48:
49:
50:
51:
52:
53:
54:
55:
56:
57:
58:
59:
60:
61:
62:
63:
64:
65:
66:
67:
68:
69:
70:
71:
72:
73:
74:
75:
76:
77:
78:
79:
80:
81:
82:
83:
84:
85:
86:
87:
88:
|
#r "FSharp.Powerpack.dll"
open System
open System.Xml
open Microsoft.FSharp.Collections
let wiki = "c:\enwiki-20120307-pages-articles\enwiki-20120307-pages-articles.xml"
type name = string
type attributes = (string * string) list
type LazyXml =
| Element of (name * attributes * LazyList<LazyXml>)
| Text of string
let readLazyXml (xmlUri : string) : LazyXml =
let readAttributes (reader : XmlReader) =
if reader.HasAttributes then
[ while reader.MoveToNextAttribute() do yield (reader.Name, reader.Value) ]
else []
let rec read (reader : XmlReader) =
seq {
if reader.Read() then
match reader.NodeType with
| XmlNodeType.Element ->
let reader' = (reader.ReadSubtree() |> (fun reader' -> reader'.Read() |> ignore; reader'))
yield Element (reader.Name, readAttributes reader, reader' |> read |> LazyList.ofSeq)
reader'.Close(); reader.Skip() // close nested reader, move forward current reader
// continue
yield! read reader
| XmlNodeType.EndElement ->
()
| XmlNodeType.Whitespace ->
yield! read reader
| XmlNodeType.Text ->
yield Text reader.Value
yield! read reader
| _ -> failwithf "Not supported XmlNodeType: %s" <| reader.NodeType.ToString()
else
()
}
XmlReader.Create(xmlUri) |> read |> LazyList.ofSeq |> LazyList.head
printfn "%A" <| readLazyXml wiki
//Element
// ("mediawiki",
// [("xmlns", "http://www.mediawiki.org/xml/export-0.6/");
// ("xmlns:xsi", "http://www.w3.org/2001/XMLSchema-instance");
// ("xsi:schemaLocation",
// "http://www.mediawiki.org/xml/export-0.6/ http://www.mediawiki.org/xml/expo
//rt-0.6.xsd");
// ("version", "0.6"); ("xml:lang", "en")],
// seq
// [Element
// ("siteinfo", [],
// seq
// [Element ("sitename", [], seq [Text "Wikipedia"]);
// Element
// ("base", [], seq [Text "http://en.wikipedia.org/wiki/Main_Page"]);
//
// Element ("generator", [], seq [Text "MediaWiki 1.19wmf1"]);
// Element ("case", [], seq [Text "first-letter"]); ...]);
// Element
// ("page", [],
// seq
// [Element ("title", [], seq [Text "AccessibleComputing"]);
// Element ("ns", [], seq [Text "0"]);
// Element ("id", [], seq [Text "10"]);
// Element ("redirect", [("title", "Computer accessibility")], seq []);
//
// ...]);
// Element
// ("page", [],
// seq
// [Element ("title", [], seq [Text "Anarchism"]);
// Element ("ns", [], seq [Text "0"]);
// Element ("id", [], seq [Text "12"]); Element ("sha1", [], seq []);
// ...]);
// Element
// ("page", [],
// seq
// [Element ("title", [], seq [Text "AfghanistanHistory"]);
// Element ("ns", [], seq [Text "0"]);
// Element ("id", [], seq [Text "13"]);
// Element ("redirect", [("title", "History of Afghanistan")], seq []);
//
// ...]); ...])
|
namespace System
namespace System.Xml
namespace Microsoft
namespace Microsoft.FSharp
namespace Microsoft.FSharp.Collections
val wiki : string
Full name: Script.wiki
type name = string
Full name: Script.name
Multiple items
val string : value:'T -> string
Full name: Microsoft.FSharp.Core.Operators.string
--------------------
type string = String
Full name: Microsoft.FSharp.Core.string
type attributes = (string * string) list
Full name: Script.attributes
type 'T list = List<'T>
Full name: Microsoft.FSharp.Collections.list<_>
type LazyXml =
| Element of (name * attributes * obj)
| Text of string
Full name: Script.LazyXml
union case LazyXml.Element: (name * attributes * obj) -> LazyXml
Multiple items
union case LazyXml.Text: string -> LazyXml
--------------------
namespace System.Text
val readLazyXml : xmlUri:string -> LazyXml
Full name: Script.readLazyXml
val xmlUri : string
val readAttributes : (XmlReader -> (string * string) list)
val reader : XmlReader
type XmlReader =
member AttributeCount : int
member BaseURI : string
member CanReadBinaryContent : bool
member CanReadValueChunk : bool
member CanResolveEntity : bool
member Close : unit -> unit
member Depth : int
member EOF : bool
member GetAttribute : name:string -> string + 2 overloads
member HasAttributes : bool
...
Full name: System.Xml.XmlReader
property XmlReader.HasAttributes: bool
XmlReader.MoveToNextAttribute() : bool
property XmlReader.Name: string
property XmlReader.Value: string
val read : (XmlReader -> seq<LazyXml>)
Multiple items
val seq : sequence:seq<'T> -> seq<'T>
Full name: Microsoft.FSharp.Core.Operators.seq
--------------------
type seq<'T> = Collections.Generic.IEnumerable<'T>
Full name: Microsoft.FSharp.Collections.seq<_>
XmlReader.Read() : bool
property XmlReader.NodeType: XmlNodeType
type XmlNodeType =
| None = 0
| Element = 1
| Attribute = 2
| Text = 3
| CDATA = 4
| EntityReference = 5
| Entity = 6
| ProcessingInstruction = 7
| Comment = 8
| Document = 9
...
Full name: System.Xml.XmlNodeType
field XmlNodeType.Element = 1
val reader' : XmlReader
XmlReader.ReadSubtree() : XmlReader
val ignore : value:'T -> unit
Full name: Microsoft.FSharp.Core.Operators.ignore
XmlReader.Close() : unit
XmlReader.Skip() : unit
field XmlNodeType.EndElement = 15
field XmlNodeType.Whitespace = 13
field XmlNodeType.Text = 3
val failwithf : format:Printf.StringFormat<'T,'Result> -> 'T
Full name: Microsoft.FSharp.Core.ExtraTopLevelOperators.failwithf
Enum.ToString() : string
Enum.ToString(format: string) : string
XmlReader.Create(input: IO.TextReader) : XmlReader
(+0 other overloads)
XmlReader.Create(input: IO.Stream) : XmlReader
(+0 other overloads)
XmlReader.Create(inputUri: string) : XmlReader
(+0 other overloads)
XmlReader.Create(reader: XmlReader, settings: XmlReaderSettings) : XmlReader
(+0 other overloads)
XmlReader.Create(input: IO.TextReader, settings: XmlReaderSettings) : XmlReader
(+0 other overloads)
XmlReader.Create(input: IO.Stream, settings: XmlReaderSettings) : XmlReader
(+0 other overloads)
XmlReader.Create(inputUri: string, settings: XmlReaderSettings) : XmlReader
(+0 other overloads)
XmlReader.Create(input: IO.TextReader, settings: XmlReaderSettings, inputContext: XmlParserContext) : XmlReader
(+0 other overloads)
XmlReader.Create(input: IO.TextReader, settings: XmlReaderSettings, baseUri: string) : XmlReader
(+0 other overloads)
XmlReader.Create(input: IO.Stream, settings: XmlReaderSettings, inputContext: XmlParserContext) : XmlReader
(+0 other overloads)
val printfn : format:Printf.TextWriterFormat<'T> -> 'T
Full name: Microsoft.FSharp.Core.ExtraTopLevelOperators.printfn
More information