3 people like it.
Like the snippet!
Poor man's HTML renderer
Takes some HTML, outputs some plaintext. Not tail recursive, not efficient, but got the job done for my use case (had a product feed where the descriptions were HTML, but I needed plaintext).
1:
2:
3:
4:
5:
6:
7:
8:
9:
10:
11:
12:
13:
14:
15:
16:
17:
18:
19:
20:
21:
22:
23:
24:
25:
26:
27:
28:
29:
30:
31:
32:
33:
34:
35:
36:
37:
38:
39:
40:
41:
42:
43:
44:
45:
|
open HtmlAgilityPack
open System.Text
let rec getText (sb:StringBuilder) (node:HtmlNode) : StringBuilder =
match node.NodeType with
| HtmlNodeType.Document ->
node.ChildNodes |> Seq.fold (getText) sb
| HtmlNodeType.Text ->
let textNode = (node :?> HtmlTextNode)
let text = textNode.Text |> HtmlEntity.DeEntitize
sb.Append text
| HtmlNodeType.Element ->
match node.Name with
| "p" ->
let sb = node.ChildNodes |> Seq.fold getText (sb.AppendLine())
sb.AppendLine()
| "li" ->
let sb = node.ChildNodes |> Seq.fold (getText) sb
sb.AppendLine()
| "div" ->
node.ChildNodes |> Seq.fold getText (sb.AppendLine())
| "ul" ->
let sb = sb.AppendLine()
node.ChildNodes
|> Seq.fold(fun (stateSb:StringBuilder) node ->
let itemSb = new StringBuilder()
itemSb.Append("• ") |> ignore
let itemSb = getText itemSb node
stateSb.Append(itemSb)
) (new StringBuilder())
|> sb.Append
| "ol" ->
let sb = sb.AppendLine()
node.ChildNodes
|> Seq.fold(fun ((stateSb:StringBuilder),count) node ->
let itemSb = new StringBuilder()
itemSb.Append(sprintf "%i. ") |> ignore
let itemSb = getText itemSb node
stateSb.Append(itemSb), count + 1
) ((new StringBuilder()),1)
|> fst
|> sb.Append
| _ -> node.ChildNodes |> Seq.fold (getText) sb
| HtmlNodeType.Comment -> sb
| unknown -> printfn "Unknown value: %A" unknown; sb
|
namespace System
namespace System.Text
val getText : sb:StringBuilder -> node:'a -> StringBuilder
Full name: Script.getText
val sb : StringBuilder
Multiple items
type StringBuilder =
new : unit -> StringBuilder + 5 overloads
member Append : value:string -> StringBuilder + 18 overloads
member AppendFormat : format:string * arg0:obj -> StringBuilder + 4 overloads
member AppendLine : unit -> StringBuilder + 1 overload
member Capacity : int with get, set
member Chars : int -> char with get, set
member Clear : unit -> StringBuilder
member CopyTo : sourceIndex:int * destination:char[] * destinationIndex:int * count:int -> unit
member EnsureCapacity : capacity:int -> int
member Equals : sb:StringBuilder -> bool
...
Full name: System.Text.StringBuilder
--------------------
StringBuilder() : unit
StringBuilder(capacity: int) : unit
StringBuilder(value: string) : unit
StringBuilder(value: string, capacity: int) : unit
StringBuilder(capacity: int, maxCapacity: int) : unit
StringBuilder(value: string, startIndex: int, length: int, capacity: int) : unit
val node : 'a
module Seq
from Microsoft.FSharp.Collections
val fold : folder:('State -> 'T -> 'State) -> state:'State -> source:seq<'T> -> 'State
Full name: Microsoft.FSharp.Collections.Seq.fold
StringBuilder.Append(value: char []) : StringBuilder
(+0 other overloads)
StringBuilder.Append(value: obj) : StringBuilder
(+0 other overloads)
StringBuilder.Append(value: uint64) : StringBuilder
(+0 other overloads)
StringBuilder.Append(value: uint32) : StringBuilder
(+0 other overloads)
StringBuilder.Append(value: uint16) : StringBuilder
(+0 other overloads)
StringBuilder.Append(value: decimal) : StringBuilder
(+0 other overloads)
StringBuilder.Append(value: float) : StringBuilder
(+0 other overloads)
StringBuilder.Append(value: float32) : StringBuilder
(+0 other overloads)
StringBuilder.Append(value: int64) : StringBuilder
(+0 other overloads)
StringBuilder.Append(value: int) : StringBuilder
(+0 other overloads)
StringBuilder.AppendLine() : StringBuilder
StringBuilder.AppendLine(value: string) : StringBuilder
val ignore : value:'T -> unit
Full name: Microsoft.FSharp.Core.Operators.ignore
val sprintf : format:Printf.StringFormat<'T> -> 'T
Full name: Microsoft.FSharp.Core.ExtraTopLevelOperators.sprintf
val fst : tuple:('T1 * 'T2) -> 'T1
Full name: Microsoft.FSharp.Core.Operators.fst
val printfn : format:Printf.TextWriterFormat<'T> -> 'T
Full name: Microsoft.FSharp.Core.ExtraTopLevelOperators.printfn
More information