1 people like it.
Like the snippet!
Find HTML Page Titles
Finds potential page titles in a HTML page.
1:
2:
3:
4:
5:
6:
7:
8:
9:
10:
11:
12:
13:
14:
15:
16:
17:
18:
19:
20:
21:
22:
23:
24:
25:
|
open FSharp.Data
let metaTitle (doc:HtmlDocument) =
doc.Descendants "meta"
|> Seq.choose (fun x ->
match x.AttributeValue("name"), x.AttributeValue("property") with
| "title", _
| "headline", _
| "twitter:title", _
| _, "og:title" ->
Some(x.AttributeValue("content"))
| _, _ -> None
)
let titles (doc:HtmlDocument) =
let tagged (tag:string) =
doc.Descendants tag |> Seq.map (fun x -> x.InnerText())
Seq.concat [tagged "title"; metaTitle doc; tagged "h1"]
let title (doc:HtmlDocument) =
titles doc |> Seq.tryHead
"http://www.telegraph.co.uk"
|> HtmlDocument.Load
|> titles
|
Multiple items
namespace FSharp
--------------------
namespace Microsoft.FSharp
Multiple items
namespace FSharp.Data
--------------------
namespace Microsoft.FSharp.Data
val metaTitle : doc:HtmlDocument -> seq<string>
Full name: Script.metaTitle
val doc : HtmlDocument
Multiple items
module HtmlDocument
from FSharp.Data
--------------------
type HtmlDocument =
private | HtmlDocument of docType: string * elements: HtmlNode list
override ToString : unit -> string
static member AsyncLoad : uri:string -> Async<HtmlDocument>
static member Load : uri:string -> HtmlDocument
static member Load : reader:TextReader -> HtmlDocument
static member Load : stream:Stream -> HtmlDocument
static member New : children:seq<HtmlNode> -> HtmlDocument
static member New : docType:string * children:seq<HtmlNode> -> HtmlDocument
static member Parse : text:string -> HtmlDocument
Full name: FSharp.Data.HtmlDocument
static member HtmlDocumentExtensions.Descendants : doc:HtmlDocument -> seq<HtmlNode>
static member HtmlDocumentExtensions.Descendants : doc:HtmlDocument * predicate:(HtmlNode -> bool) -> seq<HtmlNode>
static member HtmlDocumentExtensions.Descendants : doc:HtmlDocument * names:seq<string> -> seq<HtmlNode>
static member HtmlDocumentExtensions.Descendants : doc:HtmlDocument * name:string -> seq<HtmlNode>
static member HtmlDocumentExtensions.Descendants : doc:HtmlDocument * predicate:(HtmlNode -> bool) * recurseOnMatch:bool -> seq<HtmlNode>
static member HtmlDocumentExtensions.Descendants : doc:HtmlDocument * names:seq<string> * recurseOnMatch:bool -> seq<HtmlNode>
static member HtmlDocumentExtensions.Descendants : doc:HtmlDocument * name:string * recurseOnMatch:bool -> seq<HtmlNode>
module Seq
from Microsoft.FSharp.Collections
val choose : chooser:('T -> 'U option) -> source:seq<'T> -> seq<'U>
Full name: Microsoft.FSharp.Collections.Seq.choose
val x : HtmlNode
static member HtmlNodeExtensions.AttributeValue : n:HtmlNode * name:string -> string
union case Option.Some: Value: 'T -> Option<'T>
union case Option.None: Option<'T>
val titles : doc:HtmlDocument -> seq<string>
Full name: Script.titles
val tagged : (string -> seq<string>)
val tag : string
Multiple items
val string : value:'T -> string
Full name: Microsoft.FSharp.Core.Operators.string
--------------------
type string = System.String
Full name: Microsoft.FSharp.Core.string
val map : mapping:('T -> 'U) -> source:seq<'T> -> seq<'U>
Full name: Microsoft.FSharp.Collections.Seq.map
static member HtmlNodeExtensions.InnerText : n:HtmlNode -> string
val concat : sources:seq<#seq<'T>> -> seq<'T>
Full name: Microsoft.FSharp.Collections.Seq.concat
val title : doc:HtmlDocument -> string option
Full name: Script.title
val tryHead : source:seq<'T> -> 'T option
Full name: Microsoft.FSharp.Collections.Seq.tryHead
static member HtmlDocument.Load : uri:string -> HtmlDocument
static member HtmlDocument.Load : reader:System.IO.TextReader -> HtmlDocument
static member HtmlDocument.Load : stream:System.IO.Stream -> HtmlDocument
More information