26 people like it.
Like the snippet!
F# googelsearch
A google search automation.
1:
2:
3:
4:
5:
6:
7:
8:
9:
10:
11:
12:
13:
14:
15:
16:
17:
18:
19:
20:
21:
22:
23:
24:
25:
26:
27:
28:
29:
30:
31:
32:
33:
34:
35:
36:
37:
38:
39:
40:
41:
42:
43:
44:
45:
46:
47:
48:
49:
50:
51:
52:
53:
54:
55:
56:
57:
58:
59:
60:
61:
62:
63:
64:
65:
66:
67:
68:
69:
70:
71:
72:
73:
74:
75:
76:
77:
78:
79:
|
open System
open System.IO
open System.Net
open System.Threading
open System.Windows.Forms
open SHDocVw
open mshtml
open System.Windows.Forms
open System.Text.RegularExpressions
let (|Match|_|) (pat:string) (inp:string) =
let m = Regex.Match(inp, pat) in
if m.Success
then Some (List.tail [ for g in m.Groups -> g.Value ])
else None
let (|Matches|_|) (pat:string) (inp:string) =
let m = Regex.Matches(inp, pat) in
if m.Count > 0
then Some ( [ for g in m -> g.Value ])
else None
let getPage (url:string) =
let (html:HtmlDocument ref) = ref null
let handler (sender:obj) (e: WebBrowserDocumentCompletedEventArgs) =
let wb = sender :?> (WebBrowser)
html := (wb.Document)
use wb = new WebBrowser()
wb.Visible<-true
wb.DocumentCompleted.Add(handler wb)
wb.Navigate(url)
while wb.ReadyState <> WebBrowserReadyState.Complete do
Application.DoEvents()
html
let googleSearch term =
let rec googleSearch link pass res =
let search = getPage link
Thread.Sleep(TimeSpan.FromSeconds(5.0))
let potentialLinks =
seq{ for link in search.Value.Links do
if not(link.OuterHtml.Contains("google")) then
yield link.OuterHtml }
|>Seq.toArray
let pat = "href=\"([^>]+)\">"
let links =
potentialLinks
|> Array.filter(fun elem -> elem.Contains("<A class=\"l\" onmousedown="))
|> Array.map( fun elem -> match elem with
|Match pat (link::t) -> link
| _ -> failwith "critical parsing error")
let next =
potentialLinks
|> Array.filter(fun elem -> elem.Contains("<A class=\"fl\"")&& elem.Contains("/search?"))
|> Array.map( fun elem -> match elem with
| Match pat (link::t) -> "http://www.google.com"+link
| _ -> failwith "critical parsing error")
let newRes = res|>Array.append(links)
if (next.Length-1) <= pass then
newRes
else
let newPass = pass+1
googleSearch ((next.[newPass]).Replace(";","&")) newPass newRes
googleSearch ("http://www.google.com/search?num=100&hl=en&q="+term+"&btnG=Search&aq=f&aqi=&aql=&oq=") -1 [||]
googleSearch "F# love"
|
namespace System
namespace System.IO
namespace System.Net
namespace System.Threading
namespace System.Windows
namespace System.Windows.Forms
namespace System.Text
namespace System.Text.RegularExpressions
type Match =
inherit Group
member Groups : GroupCollection
member NextMatch : unit -> Match
member Result : replacement:string -> string
static member Empty : Match
static member Synchronized : inner:Match -> Match
Full name: System.Text.RegularExpressions.Match
val pat : string
Multiple items
val string : value:'T -> string
Full name: Microsoft.FSharp.Core.Operators.string
--------------------
type string = String
Full name: Microsoft.FSharp.Core.string
val inp : string
val m : Match
Multiple items
type Regex =
new : pattern:string -> Regex + 1 overload
member GetGroupNames : unit -> string[]
member GetGroupNumbers : unit -> int[]
member GroupNameFromNumber : i:int -> string
member GroupNumberFromName : name:string -> int
member IsMatch : input:string -> bool + 1 overload
member Match : input:string -> Match + 2 overloads
member Matches : input:string -> MatchCollection + 1 overload
member Options : RegexOptions
member Replace : input:string * replacement:string -> string + 5 overloads
...
Full name: System.Text.RegularExpressions.Regex
--------------------
Regex(pattern: string) : unit
Regex(pattern: string, options: RegexOptions) : unit
Regex.Match(input: string, pattern: string) : Match
Regex.Match(input: string, pattern: string, options: RegexOptions) : Match
property Group.Success: bool
union case Option.Some: Value: 'T -> Option<'T>
Multiple items
module List
from Microsoft.FSharp.Collections
--------------------
type List<'T> =
| ( [] )
| ( :: ) of Head: 'T * Tail: 'T list
interface IEnumerable
interface IEnumerable<'T>
member Head : 'T
member IsEmpty : bool
member Item : index:int -> 'T with get
member Length : int
member Tail : 'T list
static member Cons : head:'T * tail:'T list -> 'T list
static member Empty : 'T list
Full name: Microsoft.FSharp.Collections.List<_>
val tail : list:'T list -> 'T list
Full name: Microsoft.FSharp.Collections.List.tail
val g : Group
property Match.Groups: GroupCollection
property Capture.Value: string
union case Option.None: Option<'T>
val m : MatchCollection
Regex.Matches(input: string, pattern: string) : MatchCollection
Regex.Matches(input: string, pattern: string, options: RegexOptions) : MatchCollection
property MatchCollection.Count: int
val g : Match
val getPage : url:string -> HtmlDocument ref
Full name: Script.getPage
val url : string
val html : HtmlDocument ref
type HtmlDocument =
member ActiveElement : HtmlElement
member ActiveLinkColor : Color with get, set
member All : HtmlElementCollection
member AttachEventHandler : eventName:string * eventHandler:EventHandler -> unit
member BackColor : Color with get, set
member Body : HtmlElement
member Cookie : string with get, set
member CreateElement : elementTag:string -> HtmlElement
member DefaultEncoding : string
member DetachEventHandler : eventName:string * eventHandler:EventHandler -> unit
...
Full name: System.Windows.Forms.HtmlDocument
Multiple items
val ref : value:'T -> 'T ref
Full name: Microsoft.FSharp.Core.Operators.ref
--------------------
type 'T ref = Ref<'T>
Full name: Microsoft.FSharp.Core.ref<_>
val handler : (obj -> WebBrowserDocumentCompletedEventArgs -> unit)
val sender : obj
type obj = Object
Full name: Microsoft.FSharp.Core.obj
val e : WebBrowserDocumentCompletedEventArgs
Multiple items
type WebBrowserDocumentCompletedEventArgs =
inherit EventArgs
new : url:Uri -> WebBrowserDocumentCompletedEventArgs
member Url : Uri
Full name: System.Windows.Forms.WebBrowserDocumentCompletedEventArgs
--------------------
WebBrowserDocumentCompletedEventArgs(url: Uri) : unit
val wb : WebBrowser
Multiple items
type WebBrowser =
inherit WebBrowserBase
new : unit -> WebBrowser
member AllowNavigation : bool with get, set
member AllowWebBrowserDrop : bool with get, set
member CanGoBack : bool
member CanGoForward : bool
member Document : HtmlDocument
member DocumentStream : Stream with get, set
member DocumentText : string with get, set
member DocumentTitle : string
member DocumentType : string
...
Full name: System.Windows.Forms.WebBrowser
--------------------
WebBrowser() : unit
property WebBrowser.Document: HtmlDocument
property Control.Visible: bool
event WebBrowser.DocumentCompleted: IEvent<WebBrowserDocumentCompletedEventHandler,WebBrowserDocumentCompletedEventArgs>
member IObservable.Add : callback:('T -> unit) -> unit
WebBrowser.Navigate(urlString: string) : unit
WebBrowser.Navigate(url: Uri) : unit
WebBrowser.Navigate(urlString: string, newWindow: bool) : unit
WebBrowser.Navigate(url: Uri, newWindow: bool) : unit
WebBrowser.Navigate(urlString: string, targetFrameName: string) : unit
WebBrowser.Navigate(url: Uri, targetFrameName: string) : unit
WebBrowser.Navigate(urlString: string, targetFrameName: string, postData: byte [], additionalHeaders: string) : unit
WebBrowser.Navigate(url: Uri, targetFrameName: string, postData: byte [], additionalHeaders: string) : unit
property WebBrowser.ReadyState: WebBrowserReadyState
type WebBrowserReadyState =
| Uninitialized = 0
| Loading = 1
| Loaded = 2
| Interactive = 3
| Complete = 4
Full name: System.Windows.Forms.WebBrowserReadyState
field WebBrowserReadyState.Complete = 4
type Application =
static member AddMessageFilter : value:IMessageFilter -> unit
static member AllowQuit : bool
static member CommonAppDataPath : string
static member CommonAppDataRegistry : RegistryKey
static member CompanyName : string
static member CurrentCulture : CultureInfo with get, set
static member CurrentInputLanguage : InputLanguage with get, set
static member DoEvents : unit -> unit
static member EnableVisualStyles : unit -> unit
static member ExecutablePath : string
...
nested type MessageLoopCallback
Full name: System.Windows.Forms.Application
Application.DoEvents() : unit
val googleSearch : term:string -> string []
Full name: Script.googleSearch
val term : string
val googleSearch : (string -> int -> string [] -> string [])
val link : string
val pass : int
val res : string []
val search : HtmlDocument ref
Multiple items
type Thread =
inherit CriticalFinalizerObject
new : start:ThreadStart -> Thread + 3 overloads
member Abort : unit -> unit + 1 overload
member ApartmentState : ApartmentState with get, set
member CurrentCulture : CultureInfo with get, set
member CurrentUICulture : CultureInfo with get, set
member DisableComObjectEagerCleanup : unit -> unit
member ExecutionContext : ExecutionContext
member GetApartmentState : unit -> ApartmentState
member GetCompressedStack : unit -> CompressedStack
member GetHashCode : unit -> int
...
Full name: System.Threading.Thread
--------------------
Thread(start: ThreadStart) : unit
Thread(start: ParameterizedThreadStart) : unit
Thread(start: ThreadStart, maxStackSize: int) : unit
Thread(start: ParameterizedThreadStart, maxStackSize: int) : unit
Thread.Sleep(timeout: TimeSpan) : unit
Thread.Sleep(millisecondsTimeout: int) : unit
Multiple items
type TimeSpan =
struct
new : ticks:int64 -> TimeSpan + 3 overloads
member Add : ts:TimeSpan -> TimeSpan
member CompareTo : value:obj -> int + 1 overload
member Days : int
member Duration : unit -> TimeSpan
member Equals : value:obj -> bool + 1 overload
member GetHashCode : unit -> int
member Hours : int
member Milliseconds : int
member Minutes : int
...
end
Full name: System.TimeSpan
--------------------
TimeSpan()
TimeSpan(ticks: int64) : unit
TimeSpan(hours: int, minutes: int, seconds: int) : unit
TimeSpan(days: int, hours: int, minutes: int, seconds: int) : unit
TimeSpan(days: int, hours: int, minutes: int, seconds: int, milliseconds: int) : unit
TimeSpan.FromSeconds(value: float) : TimeSpan
val potentialLinks : string []
Multiple items
val seq : sequence:seq<'T> -> seq<'T>
Full name: Microsoft.FSharp.Core.Operators.seq
--------------------
type seq<'T> = Collections.Generic.IEnumerable<'T>
Full name: Microsoft.FSharp.Collections.seq<_>
val link : HtmlElement
property Ref.Value: HtmlDocument
property HtmlDocument.Links: HtmlElementCollection
val not : value:bool -> bool
Full name: Microsoft.FSharp.Core.Operators.not
property HtmlElement.OuterHtml: string
String.Contains(value: string) : bool
module Seq
from Microsoft.FSharp.Collections
val toArray : source:seq<'T> -> 'T []
Full name: Microsoft.FSharp.Collections.Seq.toArray
val links : string []
type Array =
member Clone : unit -> obj
member CopyTo : array:Array * index:int -> unit + 1 overload
member GetEnumerator : unit -> IEnumerator
member GetLength : dimension:int -> int
member GetLongLength : dimension:int -> int64
member GetLowerBound : dimension:int -> int
member GetUpperBound : dimension:int -> int
member GetValue : [<ParamArray>] indices:int[] -> obj + 7 overloads
member Initialize : unit -> unit
member IsFixedSize : bool
...
Full name: System.Array
val filter : predicate:('T -> bool) -> array:'T [] -> 'T []
Full name: Microsoft.FSharp.Collections.Array.filter
val elem : string
val map : mapping:('T -> 'U) -> array:'T [] -> 'U []
Full name: Microsoft.FSharp.Collections.Array.map
Multiple items
active recognizer Match: string -> string -> string list option
Full name: Script.( |Match|_| )
--------------------
type Match =
inherit Group
member Groups : GroupCollection
member NextMatch : unit -> Match
member Result : replacement:string -> string
static member Empty : Match
static member Synchronized : inner:Match -> Match
Full name: System.Text.RegularExpressions.Match
val t : string list
val failwith : message:string -> 'T
Full name: Microsoft.FSharp.Core.Operators.failwith
val next : string []
val newRes : string []
val append : array1:'T [] -> array2:'T [] -> 'T []
Full name: Microsoft.FSharp.Collections.Array.append
property Array.Length: int
val newPass : int
More information