2 people like it.
Like the snippet!
Extract script blocks from html page (FP)
A bit more functional version of this: http://fssnip.net/iR
1:
2:
3:
4:
5:
6:
7:
8:
9:
10:
11:
12:
13:
14:
15:
16:
17:
18:
19:
20:
21:
22:
23:
24:
25:
26:
27:
28:
29:
30:
31:
32:
33:
34:
35:
36:
37:
38:
|
open HtmlAgilityPack
open System.Text.RegularExpressions
open System.IO
open System
open System.Text.RegularExpressions
open FSharpx
open FSharpx.Choice
module JsRetriever =
let stripHtml text =
["<script\s*"
"\"?\s*type\s*=\s*\"\s*text/javascript\s*\"\s*"
"</script>"
"src\s*=\s*"
"\""
">"
"</"
"<"]
|> List.fold (fun res pattern -> Regex.Replace(res, pattern, "").Trim()) text
let convertToAbsolute parent path =
Path.Combine (Path.GetDirectoryName (parent), path) |> Path.GetFullPath
let endsOn ext file = Path.GetExtension(file) = ext
let getJsFiles (defaultAspxPath: string) =
let doc = HtmlDocument()
doc.Load defaultAspxPath
doc.DocumentNode.SelectNodes "/html/head/script/@src"
|> Seq.map (fun x -> x.OuterHtml)
|> Seq.map (Choice.protect stripHtml >=> Choice.protect (convertToAbsolute defaultAspxPath))
|> Seq.fold (fun (files, es) ->
Choice.choice
(fun f -> f :: files, es)
(fun e -> files, e :: es)) ([], [])
|> fun (files, es) ->
es |> List.fold (fun acc e -> sprintf "%s, %O" acc e) "" |> printfn "%s"
files |> Seq.filter (endsOn ".js")
|
namespace HtmlAgilityPack
namespace System
namespace System.Text
namespace System.Text.RegularExpressions
namespace System.IO
namespace FSharpx
module Choice
from FSharpx
module JsRetriever
from Script
val stripHtml : text:string -> string
Full name: Script.JsRetriever.stripHtml
val text : string
Multiple items
module List
from Microsoft.FSharp.Collections
--------------------
type List<'T> =
| ( [] )
| ( :: ) of Head: 'T * Tail: 'T list
interface IEnumerable
interface IEnumerable<'T>
member Head : 'T
member IsEmpty : bool
member Item : index:int -> 'T with get
member Length : int
member Tail : 'T list
static member Cons : head:'T * tail:'T list -> 'T list
static member Empty : 'T list
Full name: Microsoft.FSharp.Collections.List<_>
val fold : folder:('State -> 'T -> 'State) -> state:'State -> list:'T list -> 'State
Full name: Microsoft.FSharp.Collections.List.fold
val res : string
val pattern : string
Multiple items
type Regex =
new : pattern:string -> Regex + 1 overload
member GetGroupNames : unit -> string[]
member GetGroupNumbers : unit -> int[]
member GroupNameFromNumber : i:int -> string
member GroupNumberFromName : name:string -> int
member IsMatch : input:string -> bool + 1 overload
member Match : input:string -> Match + 2 overloads
member Matches : input:string -> MatchCollection + 1 overload
member Options : RegexOptions
member Replace : input:string * replacement:string -> string + 5 overloads
...
Full name: System.Text.RegularExpressions.Regex
--------------------
Regex(pattern: string) : unit
Regex(pattern: string, options: RegexOptions) : unit
Regex.Replace(input: string, pattern: string, evaluator: MatchEvaluator) : string
Regex.Replace(input: string, pattern: string, replacement: string) : string
Regex.Replace(input: string, pattern: string, evaluator: MatchEvaluator, options: RegexOptions) : string
Regex.Replace(input: string, pattern: string, replacement: string, options: RegexOptions) : string
val convertToAbsolute : parent:string -> path:string -> string
Full name: Script.JsRetriever.convertToAbsolute
val parent : string
val path : string
type Path =
static val DirectorySeparatorChar : char
static val AltDirectorySeparatorChar : char
static val VolumeSeparatorChar : char
static val InvalidPathChars : char[]
static val PathSeparator : char
static member ChangeExtension : path:string * extension:string -> string
static member Combine : [<ParamArray>] paths:string[] -> string + 3 overloads
static member GetDirectoryName : path:string -> string
static member GetExtension : path:string -> string
static member GetFileName : path:string -> string
...
Full name: System.IO.Path
Path.Combine([<ParamArray>] paths: string []) : string
Path.Combine(path1: string, path2: string) : string
Path.Combine(path1: string, path2: string, path3: string) : string
Path.Combine(path1: string, path2: string, path3: string, path4: string) : string
Path.GetDirectoryName(path: string) : string
Path.GetFullPath(path: string) : string
val endsOn : ext:string -> file:string -> bool
Full name: Script.JsRetriever.endsOn
val ext : string
val file : string
Path.GetExtension(path: string) : string
val getJsFiles : defaultAspxPath:string -> seq<string>
Full name: Script.JsRetriever.getJsFiles
val defaultAspxPath : string
Multiple items
val string : value:'T -> string
Full name: Microsoft.FSharp.Core.Operators.string
--------------------
type string = String
Full name: Microsoft.FSharp.Core.string
val doc : HtmlDocument
Multiple items
type HtmlDocument =
new : unit -> HtmlDocument
val OptionAddDebuggingAttributes : bool
val OptionAutoCloseOnEnd : bool
val OptionCheckSyntax : bool
val OptionComputeChecksum : bool
val OptionDefaultStreamEncoding : Encoding
val OptionExtractErrorSourceText : bool
val OptionExtractErrorSourceTextMaxLength : int
val OptionFixNestedTags : bool
val OptionOutputAsXml : bool
...
Full name: HtmlAgilityPack.HtmlDocument
--------------------
HtmlDocument() : unit
HtmlDocument.Load(reader: TextReader) : unit
(+0 other overloads)
HtmlDocument.Load(stream: Stream) : unit
(+0 other overloads)
HtmlDocument.Load(path: string) : unit
(+0 other overloads)
HtmlDocument.Load(stream: Stream, encoding: Text.Encoding) : unit
(+0 other overloads)
HtmlDocument.Load(stream: Stream, detectEncodingFromByteOrderMarks: bool) : unit
(+0 other overloads)
HtmlDocument.Load(path: string, encoding: Text.Encoding) : unit
(+0 other overloads)
HtmlDocument.Load(path: string, detectEncodingFromByteOrderMarks: bool) : unit
(+0 other overloads)
HtmlDocument.Load(stream: Stream, encoding: Text.Encoding, detectEncodingFromByteOrderMarks: bool) : unit
(+0 other overloads)
HtmlDocument.Load(path: string, encoding: Text.Encoding, detectEncodingFromByteOrderMarks: bool) : unit
(+0 other overloads)
HtmlDocument.Load(stream: Stream, encoding: Text.Encoding, detectEncodingFromByteOrderMarks: bool, buffersize: int) : unit
(+0 other overloads)
property HtmlDocument.DocumentNode: HtmlNode
HtmlNode.SelectNodes(xpath: string) : HtmlNodeCollection
module Seq
from Microsoft.FSharp.Collections
val map : mapping:('T -> 'U) -> source:seq<'T> -> seq<'U>
Full name: Microsoft.FSharp.Collections.Seq.map
val x : HtmlNode
property HtmlNode.OuterHtml: string
Multiple items
module Choice
from FSharpx
--------------------
type Choice<'T1,'T2> =
| Choice1Of2 of 'T1
| Choice2Of2 of 'T2
Full name: Microsoft.FSharp.Core.Choice<_,_>
--------------------
type Choice<'T1,'T2,'T3> =
| Choice1Of3 of 'T1
| Choice2Of3 of 'T2
| Choice3Of3 of 'T3
Full name: Microsoft.FSharp.Core.Choice<_,_,_>
--------------------
type Choice<'T1,'T2,'T3,'T4> =
| Choice1Of4 of 'T1
| Choice2Of4 of 'T2
| Choice3Of4 of 'T3
| Choice4Of4 of 'T4
Full name: Microsoft.FSharp.Core.Choice<_,_,_,_>
--------------------
type Choice<'T1,'T2,'T3,'T4,'T5> =
| Choice1Of5 of 'T1
| Choice2Of5 of 'T2
| Choice3Of5 of 'T3
| Choice4Of5 of 'T4
| Choice5Of5 of 'T5
Full name: Microsoft.FSharp.Core.Choice<_,_,_,_,_>
--------------------
type Choice<'T1,'T2,'T3,'T4,'T5,'T6> =
| Choice1Of6 of 'T1
| Choice2Of6 of 'T2
| Choice3Of6 of 'T3
| Choice4Of6 of 'T4
| Choice5Of6 of 'T5
| Choice6Of6 of 'T6
Full name: Microsoft.FSharp.Core.Choice<_,_,_,_,_,_>
--------------------
type Choice<'T1,'T2,'T3,'T4,'T5,'T6,'T7> =
| Choice1Of7 of 'T1
| Choice2Of7 of 'T2
| Choice3Of7 of 'T3
| Choice4Of7 of 'T4
| Choice5Of7 of 'T5
| Choice6Of7 of 'T6
| Choice7Of7 of 'T7
Full name: Microsoft.FSharp.Core.Choice<_,_,_,_,_,_,_>
val protect : f:('a -> 'b) -> x:'a -> Choice<'b,exn>
Full name: FSharpx.Choice.protect
val fold : folder:('State -> 'T -> 'State) -> state:'State -> source:seq<'T> -> 'State
Full name: Microsoft.FSharp.Collections.Seq.fold
val files : string list
val es : exn list
val choice : f1:('a -> 'b) -> f2:('c -> 'b) -> _arg1:Choice<'a,'c> -> 'b
Full name: FSharpx.Choice.choice
val f : string
val e : exn
val acc : string
val sprintf : format:Printf.StringFormat<'T> -> 'T
Full name: Microsoft.FSharp.Core.ExtraTopLevelOperators.sprintf
val printfn : format:Printf.TextWriterFormat<'T> -> 'T
Full name: Microsoft.FSharp.Core.ExtraTopLevelOperators.printfn
val filter : predicate:('T -> bool) -> source:seq<'T> -> seq<'T>
Full name: Microsoft.FSharp.Collections.Seq.filter
More information