0 people like it.

Extract script blocks from html page

This module extracts all the blocks out of an html page's header. This can be useful if you are writing tools to merge all the js for future minifaction, or for otherwise manipulating the files.

 1: 
 2: 
 3: 
 4: 
 5: 
 6: 
 7: 
 8: 
 9: 
10: 
11: 
12: 
13: 
14: 
15: 
16: 
17: 
18: 
19: 
20: 
21: 
22: 
23: 
24: 
25: 
26: 
27: 
28: 
29: 
30: 
31: 
32: 
33: 
34: 
35: 
36: 
37: 
38: 
39: 
40: 
41: 
42: 
43: 
44: 
45: 
46: 
47: 
48: 
49: 
50: 
51: 
52: 
53: 
54: 
55: 
56: 
namespace MergeJsFiles

open HtmlAgilityPack 
open System.Text.RegularExpressions
open System.IO
open System

module JsRetriever =
    
    let stripHtml (text:string) = 
        try   
            let mutable target = text
                     
            let regex = [
                "<script\s*", "";            
                "\"?\s*type\s*=\s*\"\s*text/javascript\s*\"\s*", "";                 
                "</script>", "";
                "src\s*=\s*", ""
                "\"", "";
                ">", "";
                "</",""
                "<",""

            ] 
                
            for (pattern, replacement) in regex do
                    target <- Regex.Replace(target,pattern,replacement).Trim()

            target                 
        with
            | ex -> 
                Console.WriteLine ("Error handling " + text + ", " + ex.ToString())
                ""          

    let convertToAbsolute parent path =
        try            
            Path.Combine(Path.GetDirectoryName(parent), path) |> Path.GetFullPath
        with
            | ex -> 
                Console.WriteLine ("Error handling " + path)
                ""
        

    let endsOn ext file = 
        Path.GetExtension(file) = ext
            
    let getJsFiles (defaultAspxPath:string) = 
        let doc = new HtmlDocument()

        doc.Load defaultAspxPath

        doc.DocumentNode.SelectNodes "/html/head/script/@src" 
            |> Seq.map (fun i -> i.OuterHtml) 
            |> Seq.map stripHtml            
            |> Seq.map (convertToAbsolute defaultAspxPath)
            |> Seq.filter (endsOn ".js")
namespace System
namespace System.Text
namespace System.Text.RegularExpressions
namespace System.IO
module JsRetriever

from MergeJsFiles
val stripHtml : text:string -> string
val text : string
Multiple items
val string : value:'T -> string

--------------------
type string = String
val mutable target : string
val regex : (string * string) list
val pattern : string
val replacement : string
Multiple items
type Regex =
  new : pattern:string -> Regex + 2 overloads
  member GetGroupNames : unit -> string[]
  member GetGroupNumbers : unit -> int[]
  member GroupNameFromNumber : i:int -> string
  member GroupNumberFromName : name:string -> int
  member IsMatch : input:string -> bool + 1 overload
  member Match : input:string -> Match + 2 overloads
  member MatchTimeout : TimeSpan
  member Matches : input:string -> MatchCollection + 1 overload
  member Options : RegexOptions
  ...

--------------------
Regex(pattern: string) : Regex
Regex(pattern: string, options: RegexOptions) : Regex
Regex(pattern: string, options: RegexOptions, matchTimeout: TimeSpan) : Regex
Regex.Replace(input: string, pattern: string, evaluator: MatchEvaluator) : string
Regex.Replace(input: string, pattern: string, replacement: string) : string
Regex.Replace(input: string, pattern: string, evaluator: MatchEvaluator, options: RegexOptions) : string
Regex.Replace(input: string, pattern: string, replacement: string, options: RegexOptions) : string
Regex.Replace(input: string, pattern: string, evaluator: MatchEvaluator, options: RegexOptions, matchTimeout: TimeSpan) : string
Regex.Replace(input: string, pattern: string, replacement: string, options: RegexOptions, matchTimeout: TimeSpan) : string
val ex : exn
type Console =
  static member BackgroundColor : ConsoleColor with get, set
  static member Beep : unit -> unit + 1 overload
  static member BufferHeight : int with get, set
  static member BufferWidth : int with get, set
  static member CapsLock : bool
  static member Clear : unit -> unit
  static member CursorLeft : int with get, set
  static member CursorSize : int with get, set
  static member CursorTop : int with get, set
  static member CursorVisible : bool with get, set
  ...
Console.WriteLine() : unit
   (+0 other overloads)
Console.WriteLine(value: string) : unit
   (+0 other overloads)
Console.WriteLine(value: obj) : unit
   (+0 other overloads)
Console.WriteLine(value: uint64) : unit
   (+0 other overloads)
Console.WriteLine(value: int64) : unit
   (+0 other overloads)
Console.WriteLine(value: uint32) : unit
   (+0 other overloads)
Console.WriteLine(value: int) : unit
   (+0 other overloads)
Console.WriteLine(value: float32) : unit
   (+0 other overloads)
Console.WriteLine(value: float) : unit
   (+0 other overloads)
Console.WriteLine(value: decimal) : unit
   (+0 other overloads)
val convertToAbsolute : parent:'a -> path:string -> string
val parent : 'a
val path : string
type Path =
  static val DirectorySeparatorChar : char
  static val AltDirectorySeparatorChar : char
  static val VolumeSeparatorChar : char
  static val PathSeparator : char
  static val InvalidPathChars : char[]
  static member ChangeExtension : path:string * extension:string -> string
  static member Combine : [<ParamArray>] paths:string[] -> string + 3 overloads
  static member EndsInDirectorySeparator : path:ReadOnlySpan<char> -> bool + 1 overload
  static member GetDirectoryName : path:string -> string + 1 overload
  static member GetExtension : path:string -> string + 1 overload
  ...
Path.Combine([<ParamArray>] paths: string []) : string
Path.Combine(path1: string, path2: string) : string
Path.Combine(path1: string, path2: string, path3: string) : string
Path.Combine(path1: string, path2: string, path3: string, path4: string) : string
Path.GetDirectoryName(path: ReadOnlySpan<char>) : ReadOnlySpan<char>
Path.GetDirectoryName(path: string) : string
Path.GetFullPath(path: string) : string
Path.GetFullPath(path: string, basePath: string) : string
val endsOn : ext:'a -> file:'b -> bool (requires equality)
val ext : 'a (requires equality)
val file : 'b
Path.GetExtension(path: ReadOnlySpan<char>) : ReadOnlySpan<char>
Path.GetExtension(path: string) : string
val getJsFiles : defaultAspxPath:string -> seq<string>
val defaultAspxPath : string
val doc : obj
module Seq

from Microsoft.FSharp.Collections
val map : mapping:('T -> 'U) -> source:seq<'T> -> seq<'U>
val i : obj
val filter : predicate:('T -> bool) -> source:seq<'T> -> seq<'T>

More information

Link:http://fssnip.net/iR
Posted:2 years ago
Author:devshorts
Tags: html , parsing