2 people like it.

The Missing #Rio2016 REST API

The snippet shows how to extract list of Olympic medalists from the Rio 2016 web site in a nice JSON format. It turns out you just need the right regular expression to extract the data from a script tag...

1: 
2: 
3: 
4: 
5: 
6: 
7: 
open FSharp.Data
open System.IO
open System.Text.RegularExpressions

let regex = Regex("pageAllMedalistsJson =([^<]*);[ \t\r\n]*<\/script>")
let page = Http.RequestString("https://www.rio2016.com/en/medal-count-athletes")
let json = regex.Match(page).Groups.[1].Value
Multiple items
namespace FSharp

--------------------
namespace Microsoft.FSharp
Multiple items
namespace FSharp.Data

--------------------
namespace Microsoft.FSharp.Data
namespace System
namespace System.IO
namespace System.Text
namespace System.Text.RegularExpressions
val regex : Regex

Full name: Script.regex
Multiple items
type Regex =
  new : pattern:string -> Regex + 1 overload
  member GetGroupNames : unit -> string[]
  member GetGroupNumbers : unit -> int[]
  member GroupNameFromNumber : i:int -> string
  member GroupNumberFromName : name:string -> int
  member IsMatch : input:string -> bool + 1 overload
  member Match : input:string -> Match + 2 overloads
  member Matches : input:string -> MatchCollection + 1 overload
  member Options : RegexOptions
  member Replace : input:string * replacement:string -> string + 5 overloads
  ...

Full name: System.Text.RegularExpressions.Regex

--------------------
Regex(pattern: string) : unit
Regex(pattern: string, options: RegexOptions) : unit
val page : string

Full name: Script.page
type Http =
  private new : unit -> Http
  static member private AppendQueryToUrl : url:string * query:(string * string) list -> string
  static member AsyncRequest : url:string * ?query:(string * string) list * ?headers:seq<string * string> * ?httpMethod:string * ?body:HttpRequestBody * ?cookies:seq<string * string> * ?cookieContainer:CookieContainer * ?silentHttpErrors:bool * ?responseEncodingOverride:string * ?customizeHttpRequest:(HttpWebRequest -> HttpWebRequest) -> Async<HttpResponse>
  static member AsyncRequestStream : url:string * ?query:(string * string) list * ?headers:seq<string * string> * ?httpMethod:string * ?body:HttpRequestBody * ?cookies:seq<string * string> * ?cookieContainer:CookieContainer * ?silentHttpErrors:bool * ?customizeHttpRequest:(HttpWebRequest -> HttpWebRequest) -> Async<HttpResponseWithStream>
  static member AsyncRequestString : url:string * ?query:(string * string) list * ?headers:seq<string * string> * ?httpMethod:string * ?body:HttpRequestBody * ?cookies:seq<string * string> * ?cookieContainer:CookieContainer * ?silentHttpErrors:bool * ?responseEncodingOverride:string * ?customizeHttpRequest:(HttpWebRequest -> HttpWebRequest) -> Async<string>
  static member private InnerRequest : url:string * toHttpResponse:(string -> int -> string -> string -> string -> 'a0 option -> Map<string,string> -> Map<string,string> -> Stream -> Async<'a1>) * ?query:(string * string) list * ?headers:seq<string * string> * ?httpMethod:string * ?body:HttpRequestBody * ?cookies:seq<string * string> * ?cookieContainer:CookieContainer * ?silentHttpErrors:bool * ?responseEncodingOverride:'a0 * ?customizeHttpRequest:(HttpWebRequest -> HttpWebRequest) -> Async<'a1>
  static member Request : url:string * ?query:(string * string) list * ?headers:seq<string * string> * ?httpMethod:string * ?body:HttpRequestBody * ?cookies:seq<string * string> * ?cookieContainer:CookieContainer * ?silentHttpErrors:bool * ?responseEncodingOverride:string * ?customizeHttpRequest:(HttpWebRequest -> HttpWebRequest) -> HttpResponse
  static member RequestStream : url:string * ?query:(string * string) list * ?headers:seq<string * string> * ?httpMethod:string * ?body:HttpRequestBody * ?cookies:seq<string * string> * ?cookieContainer:CookieContainer * ?silentHttpErrors:bool * ?customizeHttpRequest:(HttpWebRequest -> HttpWebRequest) -> HttpResponseWithStream
  static member RequestString : url:string * ?query:(string * string) list * ?headers:seq<string * string> * ?httpMethod:string * ?body:HttpRequestBody * ?cookies:seq<string * string> * ?cookieContainer:CookieContainer * ?silentHttpErrors:bool * ?responseEncodingOverride:string * ?customizeHttpRequest:(HttpWebRequest -> HttpWebRequest) -> string

Full name: FSharp.Data.Http
static member Http.RequestString : url:string * ?query:(string * string) list * ?headers:seq<string * string> * ?httpMethod:string * ?body:HttpRequestBody * ?cookies:seq<string * string> * ?cookieContainer:System.Net.CookieContainer * ?silentHttpErrors:bool * ?responseEncodingOverride:string * ?customizeHttpRequest:(System.Net.HttpWebRequest -> System.Net.HttpWebRequest) -> string
val json : string

Full name: Script.json
Regex.Match(input: string) : Match
Regex.Match(input: string, startat: int) : Match
Regex.Match(input: string, beginning: int, length: int) : Match

More information

Link:http://fssnip.net/7Qt
Posted:9 months ago
Author:Tomas Petricek
Tags: f# data , regular expressions , rest