1 people like it.

Find HTML Page Titles

Finds potential page titles in a HTML page.

 1: 
 2: 
 3: 
 4: 
 5: 
 6: 
 7: 
 8: 
 9: 
10: 
11: 
12: 
13: 
14: 
15: 
16: 
17: 
18: 
19: 
20: 
21: 
22: 
23: 
24: 
25: 
open FSharp.Data

let metaTitle (doc:HtmlDocument) =
    doc.Descendants "meta"
    |> Seq.choose (fun x ->
        match x.AttributeValue("name"), x.AttributeValue("property") with
        | "title", _
        | "headline", _
        | "twitter:title", _
        | _, "og:title" ->
            Some(x.AttributeValue("content"))
        | _, _ -> None
    )

let titles (doc:HtmlDocument) =
    let tagged (tag:string) =
        doc.Descendants tag |> Seq.map (fun x -> x.InnerText())
    Seq.concat [tagged "title"; metaTitle doc; tagged "h1"]

let title (doc:HtmlDocument) =
    titles doc |> Seq.tryHead

"http://www.telegraph.co.uk"
|> HtmlDocument.Load
|> titles
Multiple items
namespace FSharp

--------------------
namespace Microsoft.FSharp
Multiple items
namespace FSharp.Data

--------------------
namespace Microsoft.FSharp.Data
val metaTitle : doc:HtmlDocument -> seq<string>

Full name: Script.metaTitle
val doc : HtmlDocument
Multiple items
module HtmlDocument

from FSharp.Data

--------------------
type HtmlDocument =
  private | HtmlDocument of docType: string * elements: HtmlNode list
  override ToString : unit -> string
  static member AsyncLoad : uri:string -> Async<HtmlDocument>
  static member Load : uri:string -> HtmlDocument
  static member Load : reader:TextReader -> HtmlDocument
  static member Load : stream:Stream -> HtmlDocument
  static member New : children:seq<HtmlNode> -> HtmlDocument
  static member New : docType:string * children:seq<HtmlNode> -> HtmlDocument
  static member Parse : text:string -> HtmlDocument

Full name: FSharp.Data.HtmlDocument
static member HtmlDocumentExtensions.Descendants : doc:HtmlDocument -> seq<HtmlNode>
static member HtmlDocumentExtensions.Descendants : doc:HtmlDocument * predicate:(HtmlNode -> bool) -> seq<HtmlNode>
static member HtmlDocumentExtensions.Descendants : doc:HtmlDocument * names:seq<string> -> seq<HtmlNode>
static member HtmlDocumentExtensions.Descendants : doc:HtmlDocument * name:string -> seq<HtmlNode>
static member HtmlDocumentExtensions.Descendants : doc:HtmlDocument * predicate:(HtmlNode -> bool) * recurseOnMatch:bool -> seq<HtmlNode>
static member HtmlDocumentExtensions.Descendants : doc:HtmlDocument * names:seq<string> * recurseOnMatch:bool -> seq<HtmlNode>
static member HtmlDocumentExtensions.Descendants : doc:HtmlDocument * name:string * recurseOnMatch:bool -> seq<HtmlNode>
module Seq

from Microsoft.FSharp.Collections
val choose : chooser:('T -> 'U option) -> source:seq<'T> -> seq<'U>

Full name: Microsoft.FSharp.Collections.Seq.choose
val x : HtmlNode
static member HtmlNodeExtensions.AttributeValue : n:HtmlNode * name:string -> string
union case Option.Some: Value: 'T -> Option<'T>
union case Option.None: Option<'T>
val titles : doc:HtmlDocument -> seq<string>

Full name: Script.titles
val tagged : (string -> seq<string>)
val tag : string
Multiple items
val string : value:'T -> string

Full name: Microsoft.FSharp.Core.Operators.string

--------------------
type string = System.String

Full name: Microsoft.FSharp.Core.string
val map : mapping:('T -> 'U) -> source:seq<'T> -> seq<'U>

Full name: Microsoft.FSharp.Collections.Seq.map
static member HtmlNodeExtensions.InnerText : n:HtmlNode -> string
val concat : sources:seq<#seq<'T>> -> seq<'T>

Full name: Microsoft.FSharp.Collections.Seq.concat
val title : doc:HtmlDocument -> string option

Full name: Script.title
val tryHead : source:seq<'T> -> 'T option

Full name: Microsoft.FSharp.Collections.Seq.tryHead
static member HtmlDocument.Load : uri:string -> HtmlDocument
static member HtmlDocument.Load : reader:System.IO.TextReader -> HtmlDocument
static member HtmlDocument.Load : stream:System.IO.Stream -> HtmlDocument
Raw view Test code New version

More information

Link:http://fssnip.net/7Qu
Posted:8 years ago
Author:Phillip Trelford
Tags: html