11 people like it.
Like the snippet!
A minimalist XML Parser
A minimalist XML Parser
1:
2:
3:
4:
5:
6:
7:
8:
9:
10:
11:
12:
13:
14:
15:
16:
17:
18:
19:
20:
21:
22:
23:
24:
25:
26:
27:
28:
29:
30:
31:
32:
33:
34:
35:
36:
37:
38:
39:
40:
41:
42:
43:
44:
45:
46:
47:
48:
49:
50:
51:
52:
53:
54:
55:
56:
57:
58:
59:
60:
61:
62:
63:
64:
65:
66:
67:
68:
69:
70:
71:
72:
73:
74:
75:
76:
77:
78:
79:
80:
81:
82:
83:
84:
85:
86:
87:
88:
89:
90:
91:
92:
93:
94:
95:
96:
97:
98:
99:
100:
101:
102:
103:
104:
105:
106:
107:
108:
109:
110:
111:
112:
113:
114:
115:
116:
117:
118:
119:
120:
121:
122:
123:
124:
125:
126:
127:
128:
129:
130:
131:
132:
133:
134:
135:
136:
137:
138:
139:
140:
141:
142:
143:
144:
145:
146:
147:
148:
149:
150:
151:
152:
153:
154:
155:
156:
157:
158:
159:
160:
161:
162:
163:
164:
165:
166:
167:
168:
169:
170:
171:
172:
173:
174:
175:
176:
177:
178:
179:
180:
181:
182:
183:
184:
185:
186:
187:
188:
189:
190:
191:
192:
193:
194:
195:
196:
197:
198:
199:
200:
201:
202:
203:
204:
205:
206:
207:
208:
209:
210:
211:
212:
213:
214:
215:
216:
217:
218:
219:
220:
221:
222:
223:
224:
225:
226:
227:
228:
229:
230:
231:
232:
233:
234:
235:
236:
237:
238:
239:
240:
241:
242:
243:
244:
245:
246:
247:
248:
249:
250:
251:
252:
253:
254:
255:
256:
257:
258:
259:
260:
261:
262:
263:
264:
265:
266:
267:
268:
269:
270:
271:
|
//BNF-like for XML:
//XmlRoot := XmlElement
//XmlElement := '<' Name ' ' Attribute* '>' (XmlElement* | XmlValueContent) '<\' Name '>' |
// '<' Name ' ' Attribute* '\>'
//Attribute := Name '=' '"' Value '"'
//Value := any char except "
//XmlValueContent := any char except <
//Name := A-Za-z_[A-Za-z0-9_-]*
open System
[<CustomEquality; NoComparison>]
type XmlAttribute =
{Name : string; Value : string}
override this.Equals(that) =
match that with
| :? XmlAttribute as other -> this.Name = other.Name
| _ -> false
override this.GetHashCode() = hash this.Name
type XmlElementBody =
| StrValue of string
| XmlValue of XmlElement array
and
XmlElement = {Name : string; Attributes : XmlAttribute array; Body : XmlElementBody}
let NullXmlAttribute = {Name = null; Value = null}
let NullXmlElement = {Name = null; Attributes = null; Body = XmlValue null}
let readWhitespace (s : string, idx) =
let mutable idx1 = idx
while idx1 < s.Length && (s.[idx1] = ' ' || s.[idx1] = '\r' || s.[idx1] = '\n' || s.[idx1] = '\t') do
idx1 <- idx1 + 1
(s, idx1)
let subString idxDelta (s : string, idx) =
if idxDelta > idx
then (s.Substring(idx, idxDelta - idx), (s, idxDelta))
else (null, (s, idx))
let readName (name : string byref) (s : string, idx) =
let checkValidChars (idx1) =
('_' = s.[idx1]) || ('A' <= s.[idx1] && s.[idx1] <= 'Z') || ('a' <= s.[idx1] && s.[idx1] <= 'z')
let mutable idx1 = idx
if idx1 < s.Length && checkValidChars idx1
then idx1 <- idx1 + 1
else failwith (sprintf "Invalid Name at %d" idx1)
while idx1 < s.Length && (checkValidChars idx1 || ('0' <= s.[idx1] && s.[idx1] <= '9') || s.[idx1] = '-')
do idx1 <- idx1 + 1
let (name_, retVal) = subString idx1 (s, idx)
name <- name_; retVal
let readXmlValueContent (content : string byref) (s : string, idx) =
let mutable idx1 = idx
while idx1 < s.Length && '<' <> s.[idx1] do idx1 <- idx1 + 1
let (content_, retVal) = subString idx1 (s, idx)
content <- content_; retVal
let readValue (value : string byref) (s : string, idx) =
let mutable idx1 = idx
while idx1 < s.Length && '\"' <> s.[idx1] do idx1 <- idx1 + 1
let (value_, retVal) = subString idx1 (s, idx)
value <- value_; retVal
let readFixed ch (s : string, idx) =
if s.[idx] <> ch then failwith (sprintf "Invalid token at %d" idx)
(s, idx + 1)
let checkFixed ch (s : string, idx) = s.[idx] = ch
let readAttribute (attr : XmlAttribute byref) (s : string, idx) =
let mutable name : string = null
let (_, idx1) = (s, idx) |> readName &name
if name <> null then
let mutable value : string = null
let (_, idx2) = (s, idx1) |> readWhitespace |> readFixed '=' |> readWhitespace |>
readFixed '\"' |> readValue &value |> readFixed '\"'
attr <- {XmlAttribute.Name = name; Value = value}; (s, idx2)
else
attr <- NullXmlAttribute; (s, idx1)
let readAttributeList (attrs : XmlAttribute array byref) (s : string, idx) =
let (_, idx1) = (s, idx) |> readWhitespace
let mutable a : XmlAttribute list = []
let mutable idx2 = idx1
while (not (checkFixed '>' (s, idx2))) && (not (checkFixed '/' (s, idx2))) do
let mutable attr = NullXmlAttribute
let (_, idx3) = (s, idx2) |> readAttribute &attr |> readWhitespace
if attr.Name <> null then a <- attr :: a
if idx2 = idx3 then failwith (sprintf "Malformed XML at %d" idx2)
idx2 <- idx3
if a |> List.ofSeq |> Seq.distinct |> Seq.length <> (a |> List.length) then
failwith "Attribute names must be unique"
attrs <- a |> List.rev |> List.toArray; (s, idx2)
let readClosingElement (name : string byref) (s : string, idx) =
let (_, idx1) = (s, idx) |> readFixed '<' |> readFixed '/' |> readWhitespace |>
readName &name |> readWhitespace |> readFixed '>' |> readWhitespace
(s, idx1)
let rec readElement (elem : XmlElement byref) (s : string, idx) =
let mutable name : string = null
let mutable attrs : XmlAttribute array = null
let (_, idx1) = (s, idx) |> readWhitespace |> readFixed '<' |> readName &name |>
readWhitespace |> readAttributeList &attrs |> readWhitespace
if checkFixed '/' (s, idx1) then //'<' Name ' ' Attribute* '\>'
let (_, idx2) = (s, idx1) |> readFixed '/' |> readFixed '>'
elem <- {XmlElement.Name = name; Attributes = attrs; Body = StrValue String.Empty}; (s, idx2) //return
else //'<' Name ' ' Attribute* '>' (XmlElement* | XmlValueContent) '<\' Name '>'
let (_, idx2) = (s, idx1) |> readFixed '>' |> readWhitespace
let (e, idx3) =
if checkFixed '<' (s, idx2) then //XmlElement*
if checkFixed '/' (s, idx2 + 1) then //no xml value content
(XmlValue [||], idx2)
else //element list
let mutable elems : XmlElement array = null
let (_, idx3) = (s, idx2) |> readElementList &elems
(XmlValue elems, idx3)
else //XmlValueContent
let mutable content : string = null
let (_, idx3) = (s, idx2) |> readXmlValueContent &content
(StrValue content, idx3)
let mutable name_ : string = null
let (_, idx4) = (s, idx3) |> readClosingElement &name_
if name <> name_ then failwith (sprintf "Closing element <%s> is missing" name)
elem <- {XmlElement.Name = name; Attributes = attrs; Body = e}; (s, idx4) //return
and
readElementList (elems : XmlElement array byref) (s : string, idx) =
let mutable a : XmlElement list = []
let mutable idx1 = idx
while checkFixed '<' (s, idx1) && (not (checkFixed '/' (s, idx1 + 1))) do
let mutable e : XmlElement = NullXmlElement
let (_, idx2) = (s, idx1) |> readElement &e |> readWhitespace
idx1 <- idx2
a <- e :: a
elems <- a |> List.rev |> List.toArray; (s, idx1)
let readXmlFragment (s: string) =
let mutable idx = 0
let mutable a = []
while (idx < s.Length) do
let mutable e : XmlElement = NullXmlElement
let (_, idx1) = (s, idx) |> readElement &e
idx <- idx1
a <- e :: a
a |> List.rev |> List.toArray
let readXmlRoot (s: string) = readXmlFragment(s).[0]
//helper:
let print (node: XmlElement) =
let printAttributes elem =
for a in elem.Attributes do printf " @%s: %s" a.Name a.Value
let rec rec_print (node: XmlElement, tab : int) =
for i = 1 to tab do printf " "
printf "{%s: " node.Name
match node.Body with
| StrValue x ->
printf "%s" x
| XmlValue xs ->
for x in xs do printfn ""; rec_print(x, tab + 1)
printAttributes node
printf "}"
rec_print (node, 0); printfn ""
let printn node = print node; printfn ""
//test cases:
let test1 = "<note>
<to>Tove</to>
<from>Jani</from>
<heading>Reminder</heading>
<body>Don't forget me this weekend!</body>
</note>"
let result1 = readXmlRoot(test1)
printn(result1)
let test2 = "<A>
<B/>
<C/>
<D>
<E/>
<F/>
<G>
<H><I></I></H>
</G>
<J/>
</D>
</A>"
let result2 = readXmlRoot(test2)
printn(result2)
let test3 = "<bookstore>
<book category=\"COOKING\">
<title lang=\"en\">Everyday Italian</title>
<author>Giada De Laurentiis</author>
<year>2005</year>
<price>30.00</price>
</book>
<book category=\"CHILDREN\">
<title lang=\"en\">Harry Potter</title>
<author>J K. Rowling</author>
<year>2005</year>
<price>29.99</price>
</book>
<book category=\"WEB\">
<title lang=\"en\">Learning XML</title>
<author>Erik T. Ray</author>
<year>2003</year>
<price>39.95</price>
</book>
</bookstore>"
let result3 = readXmlRoot(test3)
printn(result3)
let test4 = "<Signature xmlns=\"http://www.w3.org/2000/09/xmldsig#\">
<SignedInfo>
<CanonicalizationMethod Algorithm=\"http://www.w3.org/TR/2001/REC-xml-c14n-20010315\" />
<SignatureMethod Algorithm=\"http://www.w3.org/2000/09/xmldsig#rsa-sha1\" />
<Reference URI=\"#object\">
<DigestMethod Algorithm=\"http://www.w3.org/2000/09/xmldsig#sha1\" />
<DigestValue>OPnpF/ZNLDxJ/I+1F3iHhlmSwgo=</DigestValue>
</Reference>
</SignedInfo>
<SignatureValue>nihUFQg4mDhLgecvhIcKb9Gz8VRTOlw+adiZOBBXgK4JodEe5aFfCqm8WcRIT8GLLXSk8PsUP4//SsKqUBQkpotcAqQAhtz2v9kCWdoUDnAOtFZkd/CnsZ1sge0ndha40wWDV+nOWyJxkYgicvB8POYtSmldLLepPGMz+J7/Uws=</SignatureValue>
<KeyInfo>
<KeyValue>
<RSAKeyValue>
<Modulus>4IlzOY3Y9fXoh3Y5f06wBbtTg94Pt6vcfcd1KQ0FLm0S36aGJtTSb6pYKfyX7PqCUQ8wgL6xUJ5GRPEsu9gyz8ZobwfZsGCsvu40CWoT9fcFBZPfXro1Vtlh/xl/yYHm+Gzqh0Bw76xtLHSfLfpVOrmZdwKmSFKMTvNXOFd0V18=</Modulus>
<Exponent>AQAB</Exponent>
</RSAKeyValue>
</KeyValue>
</KeyInfo>
<Object Id=\"object\">some text
with spaces and CR-LF.</Object>
</Signature>"
let result4 = readXmlRoot(test4)
printn(result4)
let test5 = "<html xmlns=\"http://www.w3.org/1999/xhtml\" lang=\"en\">
<head>
<meta http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\" />
<title>XHTML 1.0 Strict Example</title>
<script type=\"text/javascript\">
function loadpdf() {
document.getElementById(\"pdf-object\").src=\"http://www.w3.org/TR/xhtml1/xhtml1.pdf\";
}
</script>
</head>
<body onload=\"loadpdf()\">
<p>
<p>This is an example of an Extensible HyperText Markup Language</p>
<br />
<img id=\"validation-icon\"
src=\"http://www.w3.org/Icons/valid-xhtml10\"
alt=\"Valid XHTML 1.0 Strict\" /><br />
<object id=\"pdf-object\"
name=\"pdf-object\"
type=\"application/pdf\"
data=\"http://www.w3.org/TR/xhtml1/xhtml1.pdf\"
width=\"100%\"
height=\"500\">
</object>
</p>
</body>
</html>"
let result5 = readXmlRoot(test5)
printn(result5)
Console.ReadLine() |> ignore
|
namespace System
Multiple items
type CustomEqualityAttribute =
inherit Attribute
new : unit -> CustomEqualityAttribute
Full name: Microsoft.FSharp.Core.CustomEqualityAttribute
--------------------
new : unit -> CustomEqualityAttribute
Multiple items
type NoComparisonAttribute =
inherit Attribute
new : unit -> NoComparisonAttribute
Full name: Microsoft.FSharp.Core.NoComparisonAttribute
--------------------
new : unit -> NoComparisonAttribute
type XmlAttribute =
{Name: string;
Value: string;}
override Equals : that:obj -> bool
override GetHashCode : unit -> int
Full name: Script.XmlAttribute
XmlAttribute.Name: string
Multiple items
val string : value:'T -> string
Full name: Microsoft.FSharp.Core.Operators.string
--------------------
type string = String
Full name: Microsoft.FSharp.Core.string
XmlAttribute.Value: string
val this : XmlAttribute
override XmlAttribute.Equals : that:obj -> bool
Full name: Script.XmlAttribute.Equals
val that : obj
val other : XmlAttribute
override XmlAttribute.GetHashCode : unit -> int
Full name: Script.XmlAttribute.GetHashCode
val hash : obj:'T -> int (requires equality)
Full name: Microsoft.FSharp.Core.Operators.hash
type XmlElementBody =
| StrValue of string
| XmlValue of XmlElement array
Full name: Script.XmlElementBody
union case XmlElementBody.StrValue: string -> XmlElementBody
union case XmlElementBody.XmlValue: XmlElement array -> XmlElementBody
type XmlElement =
{Name: string;
Attributes: XmlAttribute array;
Body: XmlElementBody;}
Full name: Script.XmlElement
type 'T array = 'T []
Full name: Microsoft.FSharp.Core.array<_>
XmlElement.Name: string
XmlElement.Attributes: XmlAttribute array
XmlElement.Body: XmlElementBody
val NullXmlAttribute : XmlAttribute
Full name: Script.NullXmlAttribute
val NullXmlElement : XmlElement
Full name: Script.NullXmlElement
val readWhitespace : s:string * idx:int -> string * int
Full name: Script.readWhitespace
val s : string
val idx : int
val mutable idx1 : int
property String.Length: int
val subString : idxDelta:int -> s:string * idx:int -> string * (string * int)
Full name: Script.subString
val idxDelta : int
String.Substring(startIndex: int) : string
String.Substring(startIndex: int, length: int) : string
val readName : name:byref<string> -> s:string * idx:int -> string * int
Full name: Script.readName
val name : byref<string>
type byref<'T> = (# "<Common IL Type Omitted>" #)
Full name: Microsoft.FSharp.Core.byref<_>
val checkValidChars : (int -> bool)
val idx1 : int
val failwith : message:string -> 'T
Full name: Microsoft.FSharp.Core.Operators.failwith
val sprintf : format:Printf.StringFormat<'T> -> 'T
Full name: Microsoft.FSharp.Core.ExtraTopLevelOperators.sprintf
val name_ : string
val retVal : string * int
val readXmlValueContent : content:byref<string> -> s:string * idx:int -> string * int
Full name: Script.readXmlValueContent
val content : byref<string>
val content_ : string
val readValue : value:byref<string> -> s:string * idx:int -> string * int
Full name: Script.readValue
val value : byref<string>
val value_ : string
val readFixed : ch:char -> s:string * idx:int -> string * int
Full name: Script.readFixed
val ch : char
val checkFixed : ch:char -> s:string * idx:int -> bool
Full name: Script.checkFixed
val readAttribute : attr:byref<XmlAttribute> -> s:string * idx:int -> string * int
Full name: Script.readAttribute
val attr : byref<XmlAttribute>
val mutable name : string
val mutable value : string
val idx2 : int
val readAttributeList : attrs:byref<XmlAttribute array> -> s:string * idx:int -> string * int
Full name: Script.readAttributeList
val attrs : byref<XmlAttribute array>
val mutable a : XmlAttribute list
type 'T list = List<'T>
Full name: Microsoft.FSharp.Collections.list<_>
val mutable idx2 : int
val not : value:bool -> bool
Full name: Microsoft.FSharp.Core.Operators.not
val mutable attr : XmlAttribute
val idx3 : int
Multiple items
module List
from Microsoft.FSharp.Collections
--------------------
type List<'T> =
| ( [] )
| ( :: ) of Head: 'T * Tail: 'T list
interface IEnumerable
interface IEnumerable<'T>
member Head : 'T
member IsEmpty : bool
member Item : index:int -> 'T with get
member Length : int
member Tail : 'T list
static member Cons : head:'T * tail:'T list -> 'T list
static member Empty : 'T list
Full name: Microsoft.FSharp.Collections.List<_>
val ofSeq : source:seq<'T> -> 'T list
Full name: Microsoft.FSharp.Collections.List.ofSeq
module Seq
from Microsoft.FSharp.Collections
val distinct : source:seq<'T> -> seq<'T> (requires equality)
Full name: Microsoft.FSharp.Collections.Seq.distinct
val length : source:seq<'T> -> int
Full name: Microsoft.FSharp.Collections.Seq.length
val length : list:'T list -> int
Full name: Microsoft.FSharp.Collections.List.length
val rev : list:'T list -> 'T list
Full name: Microsoft.FSharp.Collections.List.rev
val toArray : list:'T list -> 'T []
Full name: Microsoft.FSharp.Collections.List.toArray
val readClosingElement : name:byref<string> -> s:string * idx:int -> string * int
Full name: Script.readClosingElement
val readElement : elem:byref<XmlElement> -> s:string * idx:int -> string * int
Full name: Script.readElement
val elem : byref<XmlElement>
val mutable attrs : XmlAttribute array
Multiple items
type String =
new : value:char -> string + 7 overloads
member Chars : int -> char
member Clone : unit -> obj
member CompareTo : value:obj -> int + 1 overload
member Contains : value:string -> bool
member CopyTo : sourceIndex:int * destination:char[] * destinationIndex:int * count:int -> unit
member EndsWith : value:string -> bool + 2 overloads
member Equals : obj:obj -> bool + 2 overloads
member GetEnumerator : unit -> CharEnumerator
member GetHashCode : unit -> int
...
Full name: System.String
--------------------
String(value: nativeptr<char>) : unit
String(value: nativeptr<sbyte>) : unit
String(value: char []) : unit
String(c: char, count: int) : unit
String(value: nativeptr<char>, startIndex: int, length: int) : unit
String(value: nativeptr<sbyte>, startIndex: int, length: int) : unit
String(value: char [], startIndex: int, length: int) : unit
String(value: nativeptr<sbyte>, startIndex: int, length: int, enc: Text.Encoding) : unit
field string.Empty
val e : XmlElementBody
val mutable elems : XmlElement array
val readElementList : elems:byref<XmlElement array> -> s:string * idx:int -> string * int
Full name: Script.readElementList
val mutable content : string
val mutable name_ : string
val idx4 : int
val elems : byref<XmlElement array>
val mutable a : XmlElement list
val mutable e : XmlElement
val readXmlFragment : s:string -> XmlElement []
Full name: Script.readXmlFragment
val mutable idx : int
val readXmlRoot : s:string -> XmlElement
Full name: Script.readXmlRoot
val print : node:XmlElement -> unit
Full name: Script.print
val node : XmlElement
val printAttributes : (XmlElement -> unit)
val elem : XmlElement
val a : XmlAttribute
val printf : format:Printf.TextWriterFormat<'T> -> 'T
Full name: Microsoft.FSharp.Core.ExtraTopLevelOperators.printf
val rec_print : (XmlElement * int -> unit)
val tab : int
Multiple items
val int : value:'T -> int (requires member op_Explicit)
Full name: Microsoft.FSharp.Core.Operators.int
--------------------
type int = int32
Full name: Microsoft.FSharp.Core.int
--------------------
type int<'Measure> = int
Full name: Microsoft.FSharp.Core.int<_>
val i : int
val x : string
val xs : XmlElement array
val x : XmlElement
val printfn : format:Printf.TextWriterFormat<'T> -> 'T
Full name: Microsoft.FSharp.Core.ExtraTopLevelOperators.printfn
val printn : node:XmlElement -> unit
Full name: Script.printn
val test1 : string
Full name: Script.test1
val result1 : XmlElement
Full name: Script.result1
val test2 : string
Full name: Script.test2
val result2 : XmlElement
Full name: Script.result2
val test3 : string
Full name: Script.test3
val result3 : XmlElement
Full name: Script.result3
val test4 : string
Full name: Script.test4
val result4 : XmlElement
Full name: Script.result4
val test5 : string
Full name: Script.test5
val result5 : XmlElement
Full name: Script.result5
type Console =
static member BackgroundColor : ConsoleColor with get, set
static member Beep : unit -> unit + 1 overload
static member BufferHeight : int with get, set
static member BufferWidth : int with get, set
static member CapsLock : bool
static member Clear : unit -> unit
static member CursorLeft : int with get, set
static member CursorSize : int with get, set
static member CursorTop : int with get, set
static member CursorVisible : bool with get, set
...
Full name: System.Console
Console.ReadLine() : string
val ignore : value:'T -> unit
Full name: Microsoft.FSharp.Core.Operators.ignore
More information