2 people like it.

Query Google.com

Query google.com (in english) without using any API key with the default WebClient class We are also instrumenting google ability to search between certain time period example: querygooglen 500 3000 "F#" 30 Here you fetch for 500 urls (we are grabbing 100 urls by query on google) wait between each query to google.com for 3 seconds you are seeking for the keyword "F#" you only want results who are at least newer than 30 days ago -Nicolas http://hackeratwork.com/

 1: 
 2: 
 3: 
 4: 
 5: 
 6: 
 7: 
 8: 
 9: 
10: 
11: 
12: 
13: 
14: 
15: 
16: 
17: 
18: 
19: 
20: 
21: 
22: 
23: 
24: 
25: 
26: 
27: 
28: 
29: 
open System
open System.Data
open System.Linq
open System.Net
open System.Text.RegularExpressions
open System.Threading

let querygooglen maxn (milli:int) (q:string) dayback =
    use client = new WebClient()       
    client.Headers.["User-Agent"] <- "Lynx/2.8.7rel.1 libwww-FM/2.14 SSL-MM/1.4.1 OpenSSL/1.0.1c"    
    [0..100..maxn] 
    |> List.collect (fun x -> 
                    printfn "Query %d" x                                    
                    Thread.Sleep(milli)                                    
                    let str1 = "http://www.google.com/search?q="+q.ToString()+"&hl=en&start="+x.ToString()+"&num=100"
                    let q = if dayback > 0 then
                                let tend = DateTime.Now.ToString("MM/dd/yyyy")
                                let tbeg = DateTime.Now.AddDays( float(-dayback)).ToString("MM/dd/yyyy")
                                str1+"&tbs=cdr:1,cd_min:"+tend+",cd_max:"+tbeg
                            else str1
                    printfn "GOOGLE:%s" q
                    let str = client.DownloadString(q)
                    let regexurl = new  Regex (@"url\?q=([^\&]+)")
                    let rez = [ for url in regexurl.Matches(str) do 
                                    yield url.Groups.[1].Captures.[0].Value ] 
                    printfn "Returned %d Results" (rez.Count())
                    rez) 
    |> Set.ofList 
    |> Set.toList
namespace System
namespace System.Data
namespace System.Linq
namespace System.Net
namespace System.Text
namespace System.Text.RegularExpressions
namespace System.Threading
val querygooglen : maxn:int -> milli:int -> q:string -> dayback:int -> string list

Full name: Script.querygooglen
val maxn : int
val milli : int
Multiple items
val int : value:'T -> int (requires member op_Explicit)

Full name: Microsoft.FSharp.Core.Operators.int

--------------------
type int = int32

Full name: Microsoft.FSharp.Core.int

--------------------
type int<'Measure> = int

Full name: Microsoft.FSharp.Core.int<_>
val q : string
Multiple items
val string : value:'T -> string

Full name: Microsoft.FSharp.Core.Operators.string

--------------------
type string = String

Full name: Microsoft.FSharp.Core.string
val dayback : int
val client : WebClient
Multiple items
type WebClient =
  inherit Component
  new : unit -> WebClient
  member BaseAddress : string with get, set
  member CachePolicy : RequestCachePolicy with get, set
  member CancelAsync : unit -> unit
  member Credentials : ICredentials with get, set
  member DownloadData : address:string -> byte[] + 1 overload
  member DownloadDataAsync : address:Uri -> unit + 1 overload
  member DownloadFile : address:string * fileName:string -> unit + 1 overload
  member DownloadFileAsync : address:Uri * fileName:string -> unit + 1 overload
  member DownloadString : address:string -> string + 1 overload
  ...

Full name: System.Net.WebClient

--------------------
WebClient() : unit
property WebClient.Headers: WebHeaderCollection
Multiple items
module List

from Microsoft.FSharp.Collections

--------------------
type List<'T> =
  | ( [] )
  | ( :: ) of Head: 'T * Tail: 'T list
  interface IEnumerable
  interface IEnumerable<'T>
  member Head : 'T
  member IsEmpty : bool
  member Item : index:int -> 'T with get
  member Length : int
  member Tail : 'T list
  static member Cons : head:'T * tail:'T list -> 'T list
  static member Empty : 'T list

Full name: Microsoft.FSharp.Collections.List<_>
val collect : mapping:('T -> 'U list) -> list:'T list -> 'U list

Full name: Microsoft.FSharp.Collections.List.collect
val x : int
val printfn : format:Printf.TextWriterFormat<'T> -> 'T

Full name: Microsoft.FSharp.Core.ExtraTopLevelOperators.printfn
Multiple items
type Thread =
  inherit CriticalFinalizerObject
  new : start:ThreadStart -> Thread + 3 overloads
  member Abort : unit -> unit + 1 overload
  member ApartmentState : ApartmentState with get, set
  member CurrentCulture : CultureInfo with get, set
  member CurrentUICulture : CultureInfo with get, set
  member DisableComObjectEagerCleanup : unit -> unit
  member ExecutionContext : ExecutionContext
  member GetApartmentState : unit -> ApartmentState
  member GetCompressedStack : unit -> CompressedStack
  member GetHashCode : unit -> int
  ...

Full name: System.Threading.Thread

--------------------
Thread(start: ThreadStart) : unit
Thread(start: ParameterizedThreadStart) : unit
Thread(start: ThreadStart, maxStackSize: int) : unit
Thread(start: ParameterizedThreadStart, maxStackSize: int) : unit
Thread.Sleep(timeout: TimeSpan) : unit
Thread.Sleep(millisecondsTimeout: int) : unit
val str1 : string
String.ToString() : string
String.ToString(provider: IFormatProvider) : string
Int32.ToString() : string
Int32.ToString(provider: IFormatProvider) : string
Int32.ToString(format: string) : string
Int32.ToString(format: string, provider: IFormatProvider) : string
val tend : string
Multiple items
type DateTime =
  struct
    new : ticks:int64 -> DateTime + 10 overloads
    member Add : value:TimeSpan -> DateTime
    member AddDays : value:float -> DateTime
    member AddHours : value:float -> DateTime
    member AddMilliseconds : value:float -> DateTime
    member AddMinutes : value:float -> DateTime
    member AddMonths : months:int -> DateTime
    member AddSeconds : value:float -> DateTime
    member AddTicks : value:int64 -> DateTime
    member AddYears : value:int -> DateTime
    ...
  end

Full name: System.DateTime

--------------------
DateTime()
   (+0 other overloads)
DateTime(ticks: int64) : unit
   (+0 other overloads)
DateTime(ticks: int64, kind: DateTimeKind) : unit
   (+0 other overloads)
DateTime(year: int, month: int, day: int) : unit
   (+0 other overloads)
DateTime(year: int, month: int, day: int, calendar: Globalization.Calendar) : unit
   (+0 other overloads)
DateTime(year: int, month: int, day: int, hour: int, minute: int, second: int) : unit
   (+0 other overloads)
DateTime(year: int, month: int, day: int, hour: int, minute: int, second: int, kind: DateTimeKind) : unit
   (+0 other overloads)
DateTime(year: int, month: int, day: int, hour: int, minute: int, second: int, calendar: Globalization.Calendar) : unit
   (+0 other overloads)
DateTime(year: int, month: int, day: int, hour: int, minute: int, second: int, millisecond: int) : unit
   (+0 other overloads)
DateTime(year: int, month: int, day: int, hour: int, minute: int, second: int, millisecond: int, kind: DateTimeKind) : unit
   (+0 other overloads)
property DateTime.Now: DateTime
DateTime.ToString() : string
DateTime.ToString(provider: IFormatProvider) : string
DateTime.ToString(format: string) : string
DateTime.ToString(format: string, provider: IFormatProvider) : string
val tbeg : string
DateTime.AddDays(value: float) : DateTime
Multiple items
val float : value:'T -> float (requires member op_Explicit)

Full name: Microsoft.FSharp.Core.Operators.float

--------------------
type float = Double

Full name: Microsoft.FSharp.Core.float

--------------------
type float<'Measure> = float

Full name: Microsoft.FSharp.Core.float<_>
val str : string
WebClient.DownloadString(address: Uri) : string
WebClient.DownloadString(address: string) : string
val regexurl : Regex
Multiple items
type Regex =
  new : pattern:string -> Regex + 1 overload
  member GetGroupNames : unit -> string[]
  member GetGroupNumbers : unit -> int[]
  member GroupNameFromNumber : i:int -> string
  member GroupNumberFromName : name:string -> int
  member IsMatch : input:string -> bool + 1 overload
  member Match : input:string -> Match + 2 overloads
  member Matches : input:string -> MatchCollection + 1 overload
  member Options : RegexOptions
  member Replace : input:string * replacement:string -> string + 5 overloads
  ...

Full name: System.Text.RegularExpressions.Regex

--------------------
Regex(pattern: string) : unit
Regex(pattern: string, options: RegexOptions) : unit
val rez : string list
val url : Match
Regex.Matches(input: string) : MatchCollection
Regex.Matches(input: string, startat: int) : MatchCollection
property Match.Groups: GroupCollection
(extension) Collections.Generic.IEnumerable.Count<'TSource>() : int
(extension) Collections.Generic.IEnumerable.Count<'TSource>(predicate: Func<'TSource,bool>) : int
Multiple items
module Set

from Microsoft.FSharp.Collections

--------------------
type Set<'T (requires comparison)> =
  interface IComparable
  interface IEnumerable
  interface IEnumerable<'T>
  interface ICollection<'T>
  new : elements:seq<'T> -> Set<'T>
  member Add : value:'T -> Set<'T>
  member Contains : value:'T -> bool
  override Equals : obj -> bool
  member IsProperSubsetOf : otherSet:Set<'T> -> bool
  member IsProperSupersetOf : otherSet:Set<'T> -> bool
  ...

Full name: Microsoft.FSharp.Collections.Set<_>

--------------------
new : elements:seq<'T> -> Set<'T>
val ofList : elements:'T list -> Set<'T> (requires comparison)

Full name: Microsoft.FSharp.Collections.Set.ofList
val toList : set:Set<'T> -> 'T list (requires comparison)

Full name: Microsoft.FSharp.Collections.Set.toList
Raw view Test code New version

More information

Link:http://fssnip.net/iw
Posted:11 years ago
Author:Nicolas Dirand
Tags: google , search engine , web , seo