3 people like it.

Parsing UserAgent strings with FSharp

Identify user's browser, Os and device by the browser's UserAgent string.

  1: 
  2: 
  3: 
  4: 
  5: 
  6: 
  7: 
  8: 
  9: 
 10: 
 11: 
 12: 
 13: 
 14: 
 15: 
 16: 
 17: 
 18: 
 19: 
 20: 
 21: 
 22: 
 23: 
 24: 
 25: 
 26: 
 27: 
 28: 
 29: 
 30: 
 31: 
 32: 
 33: 
 34: 
 35: 
 36: 
 37: 
 38: 
 39: 
 40: 
 41: 
 42: 
 43: 
 44: 
 45: 
 46: 
 47: 
 48: 
 49: 
 50: 
 51: 
 52: 
 53: 
 54: 
 55: 
 56: 
 57: 
 58: 
 59: 
 60: 
 61: 
 62: 
 63: 
 64: 
 65: 
 66: 
 67: 
 68: 
 69: 
 70: 
 71: 
 72: 
 73: 
 74: 
 75: 
 76: 
 77: 
 78: 
 79: 
 80: 
 81: 
 82: 
 83: 
 84: 
 85: 
 86: 
 87: 
 88: 
 89: 
 90: 
 91: 
 92: 
 93: 
 94: 
 95: 
 96: 
 97: 
 98: 
 99: 
100: 
open System
open System.IO
open System.Net
open System.Text.RegularExpressions

let req = HttpWebRequest.Create "https://raw.githubusercontent.com/ua-parser/uap-core/master/regexes.yaml"
let resp = (new StreamReader(req.GetResponse().GetResponseStream())).ReadToEnd()
let lines = resp.Split( [| Environment.NewLine; "\r"; "\n"; "\r\n" |], StringSplitOptions.RemoveEmptyEntries)
/// Minimal YAML-file parsing
let yamlParse = 
    let yamlParsed, lastName, lastMap =
        lines |> 
            Seq.filter(fun line -> 
                not(line.Trim().StartsWith("#") || line.Trim().Length = 0) && line.Contains(":")
            ) |> Seq.fold(fun (mapping:Map<string,ResizeArray<Map<string,string>>>,name,activemap:ResizeArray<Map<string,string>>) line ->
                let mc = line.IndexOf ':'
                match line.[0] with
                | ' ' ->
                    let key, startLine = 
                        match line.Substring(0, mc).Trim() with
                        | x when x.StartsWith("-") -> 
                            x.Substring(1).Trim(), true
                        | y -> y, false
                    let valu = 
                        match line.Substring(mc + 1).Trim() with
                        | x when x.StartsWith("'") && x.EndsWith("'") -> x.Substring(1, x.Length-2)
                        | x when x.StartsWith("\"") && x.EndsWith("\"") -> x.Substring(1, x.Length-2)
                        | y -> y
                    if startLine then 
                        activemap.Add( Map.empty.Add(key, valu))
                    else 
                        activemap.[activemap.Count-1] <- activemap.[activemap.Count-1].Add(key, valu)

                    mapping, name, activemap
                | _ -> 
                    let mapped = 
                        if name <> "" && (not (activemap |> Seq.isEmpty)) then
                            mapping.Add(name, activemap)
                        else mapping
                    let newMap = ResizeArray()
                    mapped,line.Substring(0, mc).Trim(), newMap
        ) (Map.empty,"", ResizeArray())
    if lastName <> "" && (not (lastMap |> Seq.isEmpty)) then
        yamlParsed.Add(lastName, lastMap)
    else yamlParsed

let getParser parserName (parameters:string list) =
    yamlParse.[parserName] 
    |> Seq.filter(fun p -> p.ContainsKey("regex"))
    |> Seq.map(fun parser ->
        let reg = Regex(parser.["regex"], RegexOptions.IgnoreCase ||| RegexOptions.Compiled)
        let groups = reg.GetGroupNumbers().Length
        reg, 
        parameters |> List.mapi(fun idx p -> 
            if groups > idx && parser.ContainsKey p then parser.[p] else ""
        ) |> List.distinct
    ) |> Seq.toArray 

// To add more versions, add more parameters. https://github.com/ua-parser/uap-core/blob/master/docs/specification.md
let os = getParser "os_parsers" ["os_replacement"; "os_v1_replacement"; "os_v2_replacement"]
let browser = getParser "user_agent_parsers" ["family_replacement"; "v1_replacement"; "v2_replacement"]
let device = getParser "device_parsers" ["device_replacement"; "brand_replacement"; "model_replacement"]

let parseCollection (coll:(Regex*List<string>)[]) (uaString:string) = 
    coll |> Array.filter(fun (regex,_) ->
        regex.IsMatch(uaString))        
    |> Array.map(fun (regex,pars) ->
        let matchedData = regex.Match uaString
        pars 
        |> List.mapi(fun idx label -> 
            let itemName = (idx+1).ToString()
            let groupName = regex.GroupNumberFromName(itemName)
            let itemValue = matchedData.Groups.[groupName].Value
            if label <> "" then 
                label.Replace("$"+itemName, itemValue)
            else itemValue
        ) |> List.filter(fun p -> p <> "") |> List.distinct
    ) |> Array.filter(fun p -> p |> List.isEmpty |> not) |> Array.distinct

type UASoftware = { Item: string; MajorVersion: string; MinorVersion: string }
type UADevice = { Item: string; Brand: string; Model: string }
type UAInfo = { Browser: UASoftware option; Os: UASoftware option; Device: UADevice option }

/// Parse the UserAgent
let parse uaString =
    let pickInfo =
        Array.tryHead >> Option.bind(function
            | [] -> None
            | [h] -> Some {Item = h; MajorVersion = ""; MinorVersion = ""}
            | [h;v] -> Some {Item = h; MajorVersion = v; MinorVersion = ""}
            | [h;v;t]
            | h::v::t::_ -> Some {Item = h; MajorVersion = v; MinorVersion = t})
    let browserInfo = parseCollection browser uaString |> pickInfo
    let osInfo = parseCollection os uaString |> pickInfo
    let deviceInfo = parseCollection device uaString |> pickInfo
    { Browser = browserInfo; Os = osInfo; Device = deviceInfo |> Option.map(fun d -> 
        { Item = d.Item; Brand = d.MajorVersion; Model = d.MinorVersion})}

//let uaString = "Mozilla/5.0 (iPhone; CPU iPhone OS 5_1_1 like Mac OS X) AppleWebKit/534.46 (KHTML, like Gecko) Version/5.1 Mobile/9B206 Safari/7534.48.3"
//parse uaString
namespace System
namespace System.IO
namespace System.Net
namespace System.Text
namespace System.Text.RegularExpressions
val req : WebRequest

Full name: Script.req
type HttpWebRequest =
  inherit WebRequest
  member Abort : unit -> unit
  member Accept : string with get, set
  member AddRange : range:int -> unit + 7 overloads
  member Address : Uri
  member AllowAutoRedirect : bool with get, set
  member AllowWriteStreamBuffering : bool with get, set
  member AutomaticDecompression : DecompressionMethods with get, set
  member BeginGetRequestStream : callback:AsyncCallback * state:obj -> IAsyncResult
  member BeginGetResponse : callback:AsyncCallback * state:obj -> IAsyncResult
  member ClientCertificates : X509CertificateCollection with get, set
  ...

Full name: System.Net.HttpWebRequest
WebRequest.Create(requestUri: Uri) : WebRequest
WebRequest.Create(requestUriString: string) : WebRequest
val resp : string

Full name: Script.resp
Multiple items
type StreamReader =
  inherit TextReader
  new : stream:Stream -> StreamReader + 9 overloads
  member BaseStream : Stream
  member Close : unit -> unit
  member CurrentEncoding : Encoding
  member DiscardBufferedData : unit -> unit
  member EndOfStream : bool
  member Peek : unit -> int
  member Read : unit -> int + 1 overload
  member ReadLine : unit -> string
  member ReadToEnd : unit -> string
  ...

Full name: System.IO.StreamReader

--------------------
StreamReader(stream: Stream) : unit
StreamReader(path: string) : unit
StreamReader(stream: Stream, detectEncodingFromByteOrderMarks: bool) : unit
StreamReader(stream: Stream, encoding: Text.Encoding) : unit
StreamReader(path: string, detectEncodingFromByteOrderMarks: bool) : unit
StreamReader(path: string, encoding: Text.Encoding) : unit
StreamReader(stream: Stream, encoding: Text.Encoding, detectEncodingFromByteOrderMarks: bool) : unit
StreamReader(path: string, encoding: Text.Encoding, detectEncodingFromByteOrderMarks: bool) : unit
StreamReader(stream: Stream, encoding: Text.Encoding, detectEncodingFromByteOrderMarks: bool, bufferSize: int) : unit
StreamReader(path: string, encoding: Text.Encoding, detectEncodingFromByteOrderMarks: bool, bufferSize: int) : unit
WebRequest.GetResponse() : WebResponse
val lines : string []

Full name: Script.lines
String.Split([<ParamArray>] separator: char []) : string []
String.Split(separator: string [], options: StringSplitOptions) : string []
String.Split(separator: char [], options: StringSplitOptions) : string []
String.Split(separator: char [], count: int) : string []
String.Split(separator: string [], count: int, options: StringSplitOptions) : string []
String.Split(separator: char [], count: int, options: StringSplitOptions) : string []
type Environment =
  static member CommandLine : string
  static member CurrentDirectory : string with get, set
  static member Exit : exitCode:int -> unit
  static member ExitCode : int with get, set
  static member ExpandEnvironmentVariables : name:string -> string
  static member FailFast : message:string -> unit + 1 overload
  static member GetCommandLineArgs : unit -> string[]
  static member GetEnvironmentVariable : variable:string -> string + 1 overload
  static member GetEnvironmentVariables : unit -> IDictionary + 1 overload
  static member GetFolderPath : folder:SpecialFolder -> string + 1 overload
  ...
  nested type SpecialFolder
  nested type SpecialFolderOption

Full name: System.Environment
property Environment.NewLine: string
type StringSplitOptions =
  | None = 0
  | RemoveEmptyEntries = 1

Full name: System.StringSplitOptions
field StringSplitOptions.RemoveEmptyEntries = 1
val yamlParse : Map<string,ResizeArray<Map<string,string>>>

Full name: Script.yamlParse


 Minimal YAML-file parsing
val yamlParsed : Map<string,ResizeArray<Map<string,string>>>
val lastName : string
val lastMap : ResizeArray<Map<string,string>>
module Seq

from Microsoft.FSharp.Collections
val filter : predicate:('T -> bool) -> source:seq<'T> -> seq<'T>

Full name: Microsoft.FSharp.Collections.Seq.filter
val line : string
val not : value:bool -> bool

Full name: Microsoft.FSharp.Core.Operators.not
String.Trim() : string
String.Trim([<ParamArray>] trimChars: char []) : string
String.Contains(value: string) : bool
val fold : folder:('State -> 'T -> 'State) -> state:'State -> source:seq<'T> -> 'State

Full name: Microsoft.FSharp.Collections.Seq.fold
val mapping : Map<string,ResizeArray<Map<string,string>>>
Multiple items
module Map

from Microsoft.FSharp.Collections

--------------------
type Map<'Key,'Value (requires comparison)> =
  interface IEnumerable
  interface IComparable
  interface IEnumerable<KeyValuePair<'Key,'Value>>
  interface ICollection<KeyValuePair<'Key,'Value>>
  interface IDictionary<'Key,'Value>
  new : elements:seq<'Key * 'Value> -> Map<'Key,'Value>
  member Add : key:'Key * value:'Value -> Map<'Key,'Value>
  member ContainsKey : key:'Key -> bool
  override Equals : obj -> bool
  member Remove : key:'Key -> Map<'Key,'Value>
  ...

Full name: Microsoft.FSharp.Collections.Map<_,_>

--------------------
new : elements:seq<'Key * 'Value> -> Map<'Key,'Value>
Multiple items
val string : value:'T -> string

Full name: Microsoft.FSharp.Core.Operators.string

--------------------
type string = String

Full name: Microsoft.FSharp.Core.string
type ResizeArray<'T> = Collections.Generic.List<'T>

Full name: Microsoft.FSharp.Collections.ResizeArray<_>
val name : string
val activemap : ResizeArray<Map<string,string>>
val mc : int
String.IndexOf(value: string) : int
String.IndexOf(value: char) : int
String.IndexOf(value: string, comparisonType: StringComparison) : int
String.IndexOf(value: string, startIndex: int) : int
String.IndexOf(value: char, startIndex: int) : int
String.IndexOf(value: string, startIndex: int, comparisonType: StringComparison) : int
String.IndexOf(value: string, startIndex: int, count: int) : int
String.IndexOf(value: char, startIndex: int, count: int) : int
String.IndexOf(value: string, startIndex: int, count: int, comparisonType: StringComparison) : int
val key : string
val startLine : bool
String.Substring(startIndex: int) : string
String.Substring(startIndex: int, length: int) : string
val x : string
String.StartsWith(value: string) : bool
String.StartsWith(value: string, comparisonType: StringComparison) : bool
String.StartsWith(value: string, ignoreCase: bool, culture: Globalization.CultureInfo) : bool
val y : string
val valu : string
String.EndsWith(value: string) : bool
String.EndsWith(value: string, comparisonType: StringComparison) : bool
String.EndsWith(value: string, ignoreCase: bool, culture: Globalization.CultureInfo) : bool
property String.Length: int
Collections.Generic.List.Add(item: Map<string,string>) : unit
val empty<'Key,'T (requires comparison)> : Map<'Key,'T> (requires comparison)

Full name: Microsoft.FSharp.Collections.Map.empty
property Collections.Generic.List.Count: int
val mapped : Map<string,ResizeArray<Map<string,string>>>
val isEmpty : source:seq<'T> -> bool

Full name: Microsoft.FSharp.Collections.Seq.isEmpty
member Map.Add : key:'Key * value:'Value -> Map<'Key,'Value>
val newMap : Collections.Generic.List<Map<string,string>>
val getParser : parserName:string -> parameters:string list -> (Regex * string list) []

Full name: Script.getParser
val parserName : string
val parameters : string list
type 'T list = List<'T>

Full name: Microsoft.FSharp.Collections.list<_>
val p : Map<string,string>
member Map.ContainsKey : key:'Key -> bool
val map : mapping:('T -> 'U) -> source:seq<'T> -> seq<'U>

Full name: Microsoft.FSharp.Collections.Seq.map
val parser : Map<string,string>
val reg : Regex
Multiple items
type Regex =
  new : pattern:string -> Regex + 1 overload
  member GetGroupNames : unit -> string[]
  member GetGroupNumbers : unit -> int[]
  member GroupNameFromNumber : i:int -> string
  member GroupNumberFromName : name:string -> int
  member IsMatch : input:string -> bool + 1 overload
  member Match : input:string -> Match + 2 overloads
  member Matches : input:string -> MatchCollection + 1 overload
  member Options : RegexOptions
  member Replace : input:string * replacement:string -> string + 5 overloads
  ...

Full name: System.Text.RegularExpressions.Regex

--------------------
Regex(pattern: string) : unit
Regex(pattern: string, options: RegexOptions) : unit
type RegexOptions =
  | None = 0
  | IgnoreCase = 1
  | Multiline = 2
  | ExplicitCapture = 4
  | Compiled = 8
  | Singleline = 16
  | IgnorePatternWhitespace = 32
  | RightToLeft = 64
  | ECMAScript = 256
  | CultureInvariant = 512

Full name: System.Text.RegularExpressions.RegexOptions
field RegexOptions.IgnoreCase = 1
field RegexOptions.Compiled = 8
val groups : int
Regex.GetGroupNumbers() : int []
Multiple items
module List

from Microsoft.FSharp.Collections

--------------------
type List<'T> =
  | ( [] )
  | ( :: ) of Head: 'T * Tail: 'T list
  interface IEnumerable
  interface IEnumerable<'T>
  member GetSlice : startIndex:int option * endIndex:int option -> 'T list
  member Head : 'T
  member IsEmpty : bool
  member Item : index:int -> 'T with get
  member Length : int
  member Tail : 'T list
  static member Cons : head:'T * tail:'T list -> 'T list
  static member Empty : 'T list

Full name: Microsoft.FSharp.Collections.List<_>
val mapi : mapping:(int -> 'T -> 'U) -> list:'T list -> 'U list

Full name: Microsoft.FSharp.Collections.List.mapi
val idx : int
val p : string
val distinct : list:'T list -> 'T list (requires equality)

Full name: Microsoft.FSharp.Collections.List.distinct
val toArray : source:seq<'T> -> 'T []

Full name: Microsoft.FSharp.Collections.Seq.toArray
val os : (Regex * string list) []

Full name: Script.os
val browser : (Regex * string list) []

Full name: Script.browser
val device : (Regex * string list) []

Full name: Script.device
val parseCollection : coll:(Regex * List<string>) [] -> uaString:string -> string list []

Full name: Script.parseCollection
val coll : (Regex * List<string>) []
val uaString : string
type Array =
  member Clone : unit -> obj
  member CopyTo : array:Array * index:int -> unit + 1 overload
  member GetEnumerator : unit -> IEnumerator
  member GetLength : dimension:int -> int
  member GetLongLength : dimension:int -> int64
  member GetLowerBound : dimension:int -> int
  member GetUpperBound : dimension:int -> int
  member GetValue : [<ParamArray>] indices:int[] -> obj + 7 overloads
  member Initialize : unit -> unit
  member IsFixedSize : bool
  ...

Full name: System.Array
val filter : predicate:('T -> bool) -> array:'T [] -> 'T []

Full name: Microsoft.FSharp.Collections.Array.filter
val regex : Regex
Regex.IsMatch(input: string) : bool
Regex.IsMatch(input: string, startat: int) : bool
val map : mapping:('T -> 'U) -> array:'T [] -> 'U []

Full name: Microsoft.FSharp.Collections.Array.map
val pars : List<string>
val matchedData : Match
Regex.Match(input: string) : Match
Regex.Match(input: string, startat: int) : Match
Regex.Match(input: string, beginning: int, length: int) : Match
val label : string
val itemName : string
val groupName : int
Regex.GroupNumberFromName(name: string) : int
val itemValue : string
property Match.Groups: GroupCollection
String.Replace(oldValue: string, newValue: string) : string
String.Replace(oldChar: char, newChar: char) : string
val filter : predicate:('T -> bool) -> list:'T list -> 'T list

Full name: Microsoft.FSharp.Collections.List.filter
val p : string list
val isEmpty : list:'T list -> bool

Full name: Microsoft.FSharp.Collections.List.isEmpty
val distinct : array:'T [] -> 'T [] (requires equality)

Full name: Microsoft.FSharp.Collections.Array.distinct
type UASoftware =
  {Item: string;
   MajorVersion: string;
   MinorVersion: string;}

Full name: Script.UASoftware
UASoftware.Item: string
UASoftware.MajorVersion: string
UASoftware.MinorVersion: string
type UADevice =
  {Item: string;
   Brand: string;
   Model: string;}

Full name: Script.UADevice
UADevice.Item: string
UADevice.Brand: string
UADevice.Model: string
type UAInfo =
  {Browser: UASoftware option;
   Os: UASoftware option;
   Device: UADevice option;}

Full name: Script.UAInfo
UAInfo.Browser: UASoftware option
type 'T option = Option<'T>

Full name: Microsoft.FSharp.Core.option<_>
UAInfo.Os: UASoftware option
UAInfo.Device: UADevice option
val parse : uaString:string -> UAInfo

Full name: Script.parse


 Parse the UserAgent
val pickInfo : (string list [] -> UASoftware option)
val tryHead : array:'T [] -> 'T option

Full name: Microsoft.FSharp.Collections.Array.tryHead
module Option

from Microsoft.FSharp.Core
val bind : binder:('T -> 'U option) -> option:'T option -> 'U option

Full name: Microsoft.FSharp.Core.Option.bind
union case Option.None: Option<'T>
val h : string
union case Option.Some: Value: 'T -> Option<'T>
val v : string
val t : string
val browserInfo : UASoftware option
val osInfo : UASoftware option
val deviceInfo : UASoftware option
val map : mapping:('T -> 'U) -> option:'T option -> 'U option

Full name: Microsoft.FSharp.Core.Option.map
val d : UASoftware
Next Version Raw view Test code New version

More information

Link:http://fssnip.net/7W1
Posted:5 years ago
Author:Tuomas Hietanen
Tags: parse , parser , parsing , useragent