8 people like it.
Like the snippet!
URL Canonicalization
This function produces safe URLs for Web requests or URI construction. It forces the pattern:
http:// + www. OR subdomain. + domain + absolute path.
When used in an application that takes URLs as input, the user would be able to type "example.com" instead of "http://example.com" or "http://www.example.com". It also supports domains like google.co.uk or google.com.au.
1:
2:
3:
4:
5:
6:
7:
8:
9:
10:
11:
12:
13:
14:
15:
16:
17:
18:
19:
20:
21:
22:
23:
24:
25:
26:
27:
28:
29:
30:
31:
32:
33:
34:
35:
36:
37:
38:
39:
40:
41:
42:
|
open System
open System.Text.RegularExpressions
let canonicalize (url : string) =
let domPat = "[^\.]+\.\w{2,3}(\.\w{2})?"
let url' = Uri.TryCreate(url, UriKind.Absolute)
let uri =
match url' with
| true, str -> Some str
| _ ->
let url'' = Uri.TryCreate("http://" + url, UriKind.Absolute)
match url'' with
| true, str -> Some str
| _ -> None
match uri with
| Some x ->
let host = x.Host
let path = x.AbsolutePath
let host' = Regex(domPat, RegexOptions.RightToLeft).Match(host).Value
let pattern = "(?i)^https?://((www\.)|([^\.]+\.))" + Regex.Escape(host') + "[^\"]*"
let m = Regex(pattern).IsMatch(string x)
match m with
| true -> "http://" + host + path
| false -> "http://www." + host + path
| None -> ""
// Example:
let a = canonicalize "microsoft.com/web"
let b = canonicalize "www.bing.com"
let c = canonicalize "http://fssnip.net/tags/seq"
let d = canonicalize "fsharp-code.blogspot.com"
let e = canonicalize "google.co.uk"
let f = canonicalize "google.com.au"
// Output:
// val a : string = "http://www.microsoft.com/web"
// val b : string = "http://www.bing.com/"
// val c : string = "http://www.fssnip.net/tags/seq"
// val d : string = "http://fsharp-code.blogspot.com/"
// val e : string = "http://www.google.co.uk/"
// val f : string = "http://www.google.com.au/"
|
namespace System
namespace System.Text
namespace System.Text.RegularExpressions
val canonicalize : url:string -> string
Full name: Script.canonicalize
val url : string
Multiple items
val string : value:'T -> string
Full name: Microsoft.FSharp.Core.Operators.string
--------------------
type string = String
Full name: Microsoft.FSharp.Core.string
val domPat : string
val url' : bool * Uri
Multiple items
type Uri =
new : uriString:string -> Uri + 5 overloads
member AbsolutePath : string
member AbsoluteUri : string
member Authority : string
member DnsSafeHost : string
member Equals : comparand:obj -> bool
member Fragment : string
member GetComponents : components:UriComponents * format:UriFormat -> string
member GetHashCode : unit -> int
member GetLeftPart : part:UriPartial -> string
...
Full name: System.Uri
--------------------
Uri(uriString: string) : unit
Uri(uriString: string, uriKind: UriKind) : unit
Uri(baseUri: Uri, relativeUri: string) : unit
Uri(baseUri: Uri, relativeUri: Uri) : unit
Uri.TryCreate(baseUri: Uri, relativeUri: Uri, result: byref<Uri>) : bool
Uri.TryCreate(baseUri: Uri, relativeUri: string, result: byref<Uri>) : bool
Uri.TryCreate(uriString: string, uriKind: UriKind, result: byref<Uri>) : bool
type UriKind =
| RelativeOrAbsolute = 0
| Absolute = 1
| Relative = 2
Full name: System.UriKind
field UriKind.Absolute = 1
val uri : Uri option
val str : Uri
union case Option.Some: Value: 'T -> Option<'T>
val url'' : bool * Uri
union case Option.None: Option<'T>
val x : Uri
val host : string
property Uri.Host: string
val path : string
property Uri.AbsolutePath: string
val host' : string
Multiple items
type Regex =
new : pattern:string -> Regex + 1 overload
member GetGroupNames : unit -> string[]
member GetGroupNumbers : unit -> int[]
member GroupNameFromNumber : i:int -> string
member GroupNumberFromName : name:string -> int
member IsMatch : input:string -> bool + 1 overload
member Match : input:string -> Match + 2 overloads
member Matches : input:string -> MatchCollection + 1 overload
member Options : RegexOptions
member Replace : input:string * replacement:string -> string + 5 overloads
...
Full name: System.Text.RegularExpressions.Regex
--------------------
Regex(pattern: string) : unit
Regex(pattern: string, options: RegexOptions) : unit
type RegexOptions =
| None = 0
| IgnoreCase = 1
| Multiline = 2
| ExplicitCapture = 4
| Compiled = 8
| Singleline = 16
| IgnorePatternWhitespace = 32
| RightToLeft = 64
| ECMAScript = 256
| CultureInvariant = 512
Full name: System.Text.RegularExpressions.RegexOptions
field RegexOptions.RightToLeft = 64
type Match =
inherit Group
member Groups : GroupCollection
member NextMatch : unit -> Match
member Result : replacement:string -> string
static member Empty : Match
static member Synchronized : inner:Match -> Match
Full name: System.Text.RegularExpressions.Match
val pattern : string
Regex.Escape(str: string) : string
val m : bool
val a : string
Full name: Script.a
val b : string
Full name: Script.b
val c : string
Full name: Script.c
val d : string
Full name: Script.d
val e : string
Full name: Script.e
val f : string
Full name: Script.f
More information