0 people like it.

pushkin tree

  1: 
  2: 
  3: 
  4: 
  5: 
  6: 
  7: 
  8: 
  9: 
 10: 
 11: 
 12: 
 13: 
 14: 
 15: 
 16: 
 17: 
 18: 
 19: 
 20: 
 21: 
 22: 
 23: 
 24: 
 25: 
 26: 
 27: 
 28: 
 29: 
 30: 
 31: 
 32: 
 33: 
 34: 
 35: 
 36: 
 37: 
 38: 
 39: 
 40: 
 41: 
 42: 
 43: 
 44: 
 45: 
 46: 
 47: 
 48: 
 49: 
 50: 
 51: 
 52: 
 53: 
 54: 
 55: 
 56: 
 57: 
 58: 
 59: 
 60: 
 61: 
 62: 
 63: 
 64: 
 65: 
 66: 
 67: 
 68: 
 69: 
 70: 
 71: 
 72: 
 73: 
 74: 
 75: 
 76: 
 77: 
 78: 
 79: 
 80: 
 81: 
 82: 
 83: 
 84: 
 85: 
 86: 
 87: 
 88: 
 89: 
 90: 
 91: 
 92: 
 93: 
 94: 
 95: 
 96: 
 97: 
 98: 
 99: 
100: 
101: 
102: 
103: 
104: 
105: 
106: 
107: 
108: 
109: 
110: 
111: 
112: 
113: 
114: 
115: 
116: 
117: 
118: 
119: 
120: 
121: 
122: 
123: 
124: 
125: 
126: 
127: 
128: 
129: 
130: 
131: 
132: 
133: 
134: 
135: 
136: 
137: 
138: 
139: 
140: 
141: 
142: 
143: 
144: 
145: 
146: 
147: 
148: 
149: 
150: 
151: 
152: 
153: 
154: 
155: 
156: 
157: 
158: 
159: 
160: 
161: 
162: 
163: 
164: 
165: 
166: 
167: 
168: 
169: 
170: 
171: 
172: 
173: 
174: 
175: 
176: 
177: 
178: 
179: 
180: 
181: 
182: 
183: 
184: 
185: 
186: 
187: 
188: 
189: 
190: 
191: 
192: 
193: 
194: 
195: 
196: 
197: 
198: 
199: 
200: 
201: 
202: 
203: 
204: 
205: 
206: 
207: 
208: 
209: 
210: 
211: 
212: 
213: 
214: 
215: 
216: 
217: 
218: 
219: 
220: 
221: 
222: 
223: 
224: 
225: 
226: 
227: 
228: 
229: 
230: 
231: 
232: 
233: 
234: 
235: 
236: 
237: 
238: 
239: 
240: 
241: 
242: 
243: 
244: 
245: 
246: 
247: 
248: 
249: 
250: 
251: 
252: 
253: 
254: 
255: 
256: 
257: 
258: 
259: 
260: 
261: 
262: 
263: 
264: 
265: 
266: 
267: 
268: 
269: 
270: 
271: 
272: 
273: 
274: 
275: 
276: 
277: 
278: 
279: 
280: 
281: 
282: 
283: 
284: 
285: 
286: 
287: 
288: 
289: 
290: 
291: 
292: 
293: 
294: 
295: 
296: 
297: 
298: 
299: 
300: 
301: 
302: 
303: 
304: 
305: 
306: 
307: 
308: 
309: 
310: 
311: 
312: 
313: 
314: 
315: 
316: 
317: 
318: 
319: 
320: 
321: 
322: 
323: 
324: 
325: 
326: 
327: 
328: 
329: 
330: 
331: 
332: 
333: 
334: 
335: 
336: 
337: 
338: 
339: 
340: 
341: 
342: 
343: 
344: 
345: 
346: 
347: 
348: 
349: 
350: 
351: 
352: 
353: 
354: 
355: 
356: 
357: 
358: 
359: 
360: 
361: 
362: 
363: 
364: 
365: 
366: 
367: 
368: 
369: 
370: 
371: 
372: 
373: 
374: 
375: 
376: 
377: 
378: 
379: 
380: 
381: 
382: 
383: 
384: 
385: 
386: 
387: 
388: 
389: 
390: 
391: 
392: 
393: 
394: 
395: 
396: 
397: 
398: 
399: 
400: 
open System
open System.Net
open System.Text
open System.IO
open System.Text.RegularExpressions

//just a short snippet to measure time spent in an eagerly executed function
//not gonna work with lazy function, e.g. function returning a sequence (IEnumerable)
let time jobName job = 
    let startTime = DateTime.Now;
    let returnValue = job()
    let endTime = DateTime.Now;
    printfn "%s took %d ms" jobName (int((endTime - startTime).TotalMilliseconds))
    returnValue

//goes through 2 lists in linear time and looks for equal keys
//for elements with equal keys a given function is called to produce output merged element
//NOTE:
//1. function assumes that keys in the second list are unique, otherwise results will be surprising, see (*) below
//2. function assumes both lists are ordered ascending
let rec orderedListsMerge xs ys keyExtractor merger =
    match xs, ys with
    | [],_ | _,[] -> []
    | x::xs', y::ys' ->
        let xkey = keyExtractor x
        let ykey = keyExtractor y
        if(xkey = ykey) then
            //here we move xs forward, but keep ys the same,
            //because we assume that next y will have different key while next x might still have the same key, 
            //otherwise this logic is incorrect
            (merger x y) :: orderedListsMerge xs' ys keyExtractor merger            // (*)
        elif(xkey > ykey) then
            orderedListsMerge xs ys' keyExtractor merger
        else
            orderedListsMerge xs' ys keyExtractor merger

let webRequestHtml (url : string) =
    let req = WebRequest.Create(url)
    let resp = req.GetResponse()
    let stream = resp.GetResponseStream()
    let reader = new StreamReader(stream, Encoding.GetEncoding("Windows-1251"))     //don't forget the Encoding, when you work with international documents
    let html = reader.ReadToEnd()
    resp.Close()
    html

let regexSingleLineMatch input pattern =
    Regex.Match(input, pattern, RegexOptions.Singleline).Groups.Item(1).Value

let regexMatches input pattern =
    seq { for m in Regex.Matches(input, pattern) -> m.Groups.Item(1).Value }

//only named hrefs point to poems
let extractNamedHrefs html = 
    //I tried XmlDocument here, but it doesn't work as HTML can contain some "invalid" elements like  
    //Stand back now, I'm going to use regular expressions!
    let hrefPattern = "<a name=.* href=\"(.+?)\">.*</a>"
    regexMatches html hrefPattern


//remove all html markup from the line
let cleanupHtml text =
    let htmlTagPattern = "<.+?>"
    Regex.Replace(text, htmlTagPattern, String.Empty)

    
//remove all html markup from the line
let takeFirstLine text =
    let firstLinePattern = "(.*)"
    Regex.Match(text, firstLinePattern).Groups.Item(1).Value


type Poem(poemHref : string, title : string, lines : seq<string>) =
    let MAX_TITLE_LENGTH = 30
    member this.Href = poemHref
    member this.Title =
        let newTitle = 
            match title with
            | "* * *" -> (lines |> Seq.nth 0)
            | _ -> title
        if(newTitle.Length > MAX_TITLE_LENGTH) then
            newTitle.Substring(0, MAX_TITLE_LENGTH-3) + "..."
        else
            newTitle

    member this.Lines = 
        seq {
            for line in lines ->
                let nbspPattern = "&nbsp;"
                Regex.Replace(line, nbspPattern, "")
            }
    member this.LineTokens =
        seq { 
            for line in lines ->
                let russianWordPattern = "([а-яА-Я]+)"
                regexMatches (line.ToLower()) russianWordPattern
            }

//TODO: add more structural analysis -> handle sub-titles and personas
let producePoem poemHref poemHtml =
    //titles can be multiline, sometimes they include sub-titles
    let titlePattern = "<h1>(.+?)</h1>" 
    let linePattern = "<span class=\"line.*>(.+?)</span>"
    new Poem(
        poemHref,
        (regexSingleLineMatch poemHtml titlePattern) |> cleanupHtml |> takeFirstLine, 
        regexMatches poemHtml linePattern |> Seq.map cleanupHtml)


//check that the given link is a link to a final edition poem, not early edition to avoid duplicate texts in index
let isFinalEditionHref (href : string) =
    not (href.Contains("03edit"))

let crawlPoems =
    let domainUrl = "http://www.rvb.ru/pushkin/"
    let volumeUrlTemplate = domainUrl + "tocvol{0}.htm"
    let poemUrlTemplate = domainUrl + "{0}"

    //take only first 4 volumes -- they contain poems
    seq { for volumeNumber in 1..4 -> String.Format(volumeUrlTemplate, volumeNumber) }
        |> Seq.map webRequestHtml
        |> Seq.collect extractNamedHrefs
        |> Seq.filter isFinalEditionHref 
        |> Seq.map (fun href -> String.Format(poemUrlTemplate, href))

//        //development mode -- comment later
//        |> Seq.take 40

        //requesting individual poems
        |> Seq.map (fun href -> (producePoem href (webRequestHtml href)))
        |> Seq.cache
        

//building inversed index of tokens in poems
//so that we have a way to index (token -> poem number -> (line number,position in line))
let indexPoems (poems : seq<Poem>) = 
    poems
        |> Seq.mapi 
        (
            fun poemNumber poem -> 
                poem.LineTokens 
                    |> Seq.mapi 
                    (
                        fun lineNumber tokens ->
                            tokens
                                |> Seq.mapi 
                                (
                                    fun position token ->
                                        (token, poemNumber, lineNumber, position)
                                )
                    )
                    |> Seq.concat
        )
        |> Seq.concat

        //now we have raw list of tuples, we will turn it into ordered inversed index

        |> Seq.groupBy (fun (token, _, _, _) -> token)
        |> Seq.sortBy (fun (token, _) -> token)
        |> Seq.map 
        (
            fun (token, tuples) ->
                let poems =
                    tuples 
                        |> Seq.map ( fun (token, poemNumber, lineNumber, position) -> (poemNumber,lineNumber,position) )
                        |> Seq.groupBy (fun (poemNumber,lineNumber,position) -> poemNumber)
                        |> Seq.sortBy (fun (poemNumber, _) -> poemNumber)
                        |> Seq.map 
                        (
                            fun (poemNumber, tuples) ->
                                let linesPositions =
                                    tuples
                                        |> Seq.map (fun (poemNumber,lineNumber,position) -> (lineNumber,position))
                                        |> Seq.sortBy ( fun (lineNumber,position) -> position)
                                        |> Seq.sortBy ( fun (lineNumber,position) -> lineNumber)    //sortBy is stable according to MSDN
                                        |> Seq.toList
                                (poemNumber, linesPositions)
                        )
                        |> Seq.toList
                (token, poems)
        )
        |> Seq.toList

//token index is a subtree of full index only including poems and lines with the given token in given position
let tokenIndex fullIndex filterToken filterPosition =
    let (token, poems) = 
        fullIndex
            |> List.find (fun(token, poems) -> token=filterToken)
    poems
        |> List.map
        (
            fun (poemNumber, linesPositions) ->
                let filteredLines = linesPositions |> List.filter (fun (lineNumber, position) -> position = filterPosition)
                (poemNumber, filteredLines)
        )
        |> List.filter (fun (poemNumber, linesPositions) -> not (Seq.isEmpty linesPositions))


//intersect current index with token index
//we want to only keep tokens and poems which are present in the token index (which is a subtree of full index, see above)
let intersectIndex currentIndex tokenIndex =
    currentIndex
        |> List.map
        (
            fun (token, poems) ->
                let mergePoems currentPoems tokenPoems =
                    let mergeLinesPositions currentLinesPositions tokenLinesPositions =
                        let keyExtractor = (fun (lineNumber, _) -> lineNumber)
                        let merger = (fun (currentLineNumber, currentPosition) (_,_) -> (currentLineNumber, currentPosition))
                        orderedListsMerge currentLinesPositions tokenLinesPositions keyExtractor merger

                    let keyExtractor = (fun (poemNumber, _) -> poemNumber)
                    let merger = (fun (currentPoemNumber, currentLinesPositions) (_, tokenLinesPositions) -> (currentPoemNumber, mergeLinesPositions currentLinesPositions tokenLinesPositions))
                    orderedListsMerge currentPoems tokenPoems keyExtractor merger
                        |> List.filter (fun (poemNumber, linesPositions) -> not (List.isEmpty linesPositions))

                (token, mergePoems poems tokenIndex)
        )
        |> List.filter (fun (token, poems) -> not (List.isEmpty poems))


// The main function to query reverse index
// index -- index per se, we assume that the index is already filtered by caller using intersect\tokenFilter
// findPosition -- number of position to search tokens for
//                 the query function will return a list of terms that can be in this position
// count -- number of most frequent terms to return
let queryIndex index findPosition count =
    index 
        |> List.map
        (
            fun (token, poems) ->
                let tokenFreq = 
                    poems
                        |> List.sumBy
                        (
                            fun (_, linesPositions) ->
                                linesPositions
                                    |> List.sumBy
                                    (
                                        fun (lineNumber, position) ->
                                            if (position = findPosition) then 1 else 0
                                    )
                        )
                (token, tokenFreq)
        )
        |> Seq.filter (fun (token, tokenFreq) -> tokenFreq > 0)
        |> Seq.sortBy (fun (token, tokenFreq) -> -tokenFreq)
        |> Seq.zip [1..count] // Seq.take fails if there is less than "count" elements
        |> Seq.map (fun (index, element) -> element)
        |> Seq.toList

//acquire first poem for given token and position
//used to resolve single query result token into poems
let getPoemResult index findToken findPosition =
    let (token, poems) = 
        index
            |> List.find (fun (token, poems) -> token = findToken)
    
    poems
        |> List.collect
        (
            fun (poemNumber, linesPositions) ->
                linesPositions
                    |> List.filter (fun (lineNumber, position) -> position = findPosition)
                    |> List.map (fun (lineNumber, position) -> (poemNumber, lineNumber))
        )
        |> Seq.nth 0


type QueryResult =
    //token + count
    | LineVariant       of string*int
    //poemNumber, lineNumber
    | SinglePoem    of int*int

//TODO: identical strings currently will not be resolved to their poems

//this is a wrapper around query index that will perform the same action,
//but the result will be translated and wrapped into QueryResult type
//single result tokens will be returned as (poemNumber, lineNumber) tuple
let wrappedQueryIndex filteredIndex searchPosition count =
    queryIndex filteredIndex searchPosition count
        |> List.map 
        (
            fun (token, count) ->
                match count with
                | 1 -> SinglePoem(getPoemResult filteredIndex token searchPosition)
                | _ -> LineVariant(token,count)
        )


type PrettyResult =
    | PrettyLineVariant       of string*int
    | PrettySinglePoem        of string*string*int*string

let prettifyQueryResults queryResults poems =
    queryResults
        |> List.map
        (
            fun result ->
                match result with
                | SinglePoem (poemNumber, lineNumber) -> 
                    let (poem : Poem) = 
                        poems
                            |> Seq.nth poemNumber
                    let line =
                        poem.Lines
                            |> Seq.nth lineNumber
                    PrettySinglePoem(poem.Title, poem.Href, lineNumber, line)
                | LineVariant (token, count) -> PrettyLineVariant(token,count)
        )

type PushkinTreeNode =
    | VariantNode     of string*int*seq<PushkinTreeNode>
    | PoemNode        of string*string*int*string

let createPushkinTree pushkinPoems poemsIndex count =
    let rec createTreeLevel count currentQuery currentIndex =
        let searchPosition = List.length currentQuery
        let queryResult = wrappedQueryIndex currentIndex searchPosition count
        let prettyResult = prettifyQueryResults queryResult pushkinPoems
        prettyResult
            |> List.map
            (
                fun result ->
                    match result with
                    | PrettySinglePoem (title, href, lineNumber, line) -> PoemNode(title, href, lineNumber, line)
                    | PrettyLineVariant (token, freq) -> VariantNode(token, freq, createTreeLevel count (currentQuery @ [token]) (intersectIndex currentIndex (tokenIndex currentIndex token searchPosition)))
            )
    createTreeLevel count [] poemsIndex

let resultsToHtml pushkinTree =
    let rec treeToHtml tree currentPath (currentNumber:int) startingNumber =
        let zippedTreeLevel =
            tree
                |> Seq.zip (Seq.initInfinite (fun i -> startingNumber+i))

        let pathToString path =
            let parts = 
                path
                    |> Seq.map (fun x -> "'"+x+"'")
            String.Join(",", parts)

        let thisLevelStart = String.Format("<div class=\"x\"><table id=\"{0}\" class=\"p\">", currentNumber) + Environment.NewLine
        let thisLevelTable = 
            zippedTreeLevel
                |> Seq.fold
                (
                    fun acc treeNode ->
                        match treeNode with
                        | (number, PoemNode (title, href, lineNumber, line)) ->
                            acc + String.Format("<tr><td><span class=line>{0}</span> &#8658; <a target=blank class=fromlink href=\"{1}\">{2}</a>, строка {3}</tr>",line, href, title, lineNumber+1) + Environment.NewLine
                        | (number, VariantNode (token, freq, subtree)) ->
                            acc + String.Format("<tr id=\"r{0}\"><td>{1} &#8658; <span class=\"lv\" onClick=\"x([{2}])\">{3}</span></td></tr>", number, token, (pathToString (currentPath@[string(number)])), freq) + Environment.NewLine
                ) ""
        let thisLevelEnd = @"</table></div>" + Environment.NewLine + Environment.NewLine

        let thisLevelOutput = (thisLevelStart+thisLevelTable+thisLevelEnd)

        let levelLength = 
            tree
                |> Seq.length

        let (subTreeCount, subTreeOutput) =
            zippedTreeLevel 
                |> Seq.fold
                (
                    fun (acc, result) treeNode ->
                        match treeNode with
                        | (number, PoemNode (title, href, lineNumber, line)) ->
                            (acc, result)
                        | (number, VariantNode (token, freq, subtree)) ->
                            let (subTreeCount, subTreeOutput) = treeToHtml subtree (currentPath@[string(number)]) number (startingNumber+acc+levelLength)
                            (acc + subTreeCount, result + subTreeOutput)
                ) (0, "")

        (levelLength + subTreeCount, thisLevelOutput + subTreeOutput)
       
    let (_, content) = treeToHtml pushkinTree ["0"] 0 1
    content

let outputResultsToFile (content:string) =
    let templateFile = "template.htm"
    let outputFile = "output.htm"
    let templateReplacePattern = "#HERE_GOES_CONTENT#"
    let templateHtml = File.ReadAllText(templateFile)
    let resultHtml = Regex.Replace(templateHtml, templateReplacePattern, content)
    File.WriteAllText(outputFile, resultHtml)


let poems = crawlPoems //lazy operation, so we can't time it here, we do it the in next line
printfn "Crawled %d poems" (time "Crawling poems" (fun() -> poems |> Seq.length))
printfn "Crawled %d lines" (poems |> Seq.sumBy ( fun poem -> poem.Lines |> Seq.length))
let poemIndex = time "Indexing poems" (fun () -> poems |> indexPoems)
printfn "Index contains %d terms" poemIndex.Length
let tree = time "Generating result tree" (fun () -> createPushkinTree poems poemIndex 20) 
let htmlContent = time "Generating html content" (fun () -> resultsToHtml tree)
time "Output content" (fun() -> outputResultsToFile htmlContent)

//let queryResult = queryIndex poemIndex ["но"] 10
//let prettyResult = prettifyQueryResults queryResult poems
namespace System
namespace System.Net
namespace System.Text
namespace System.IO
namespace System.Text.RegularExpressions
val time : jobName:string -> job:(unit -> 'a) -> 'a

Full name: Script.time
val jobName : string
val job : (unit -> 'a)
val startTime : DateTime
Multiple items
type DateTime =
  struct
    new : ticks:int64 -> DateTime + 10 overloads
    member Add : value:TimeSpan -> DateTime
    member AddDays : value:float -> DateTime
    member AddHours : value:float -> DateTime
    member AddMilliseconds : value:float -> DateTime
    member AddMinutes : value:float -> DateTime
    member AddMonths : months:int -> DateTime
    member AddSeconds : value:float -> DateTime
    member AddTicks : value:int64 -> DateTime
    member AddYears : value:int -> DateTime
    ...
  end

Full name: System.DateTime

--------------------
DateTime()
   (+0 other overloads)
DateTime(ticks: int64) : unit
   (+0 other overloads)
DateTime(ticks: int64, kind: DateTimeKind) : unit
   (+0 other overloads)
DateTime(year: int, month: int, day: int) : unit
   (+0 other overloads)
DateTime(year: int, month: int, day: int, calendar: Globalization.Calendar) : unit
   (+0 other overloads)
DateTime(year: int, month: int, day: int, hour: int, minute: int, second: int) : unit
   (+0 other overloads)
DateTime(year: int, month: int, day: int, hour: int, minute: int, second: int, kind: DateTimeKind) : unit
   (+0 other overloads)
DateTime(year: int, month: int, day: int, hour: int, minute: int, second: int, calendar: Globalization.Calendar) : unit
   (+0 other overloads)
DateTime(year: int, month: int, day: int, hour: int, minute: int, second: int, millisecond: int) : unit
   (+0 other overloads)
DateTime(year: int, month: int, day: int, hour: int, minute: int, second: int, millisecond: int, kind: DateTimeKind) : unit
   (+0 other overloads)
property DateTime.Now: DateTime
val returnValue : 'a
val endTime : DateTime
val printfn : format:Printf.TextWriterFormat<'T> -> 'T

Full name: Microsoft.FSharp.Core.ExtraTopLevelOperators.printfn
Multiple items
val int : value:'T -> int (requires member op_Explicit)

Full name: Microsoft.FSharp.Core.Operators.int

--------------------
type int = int32

Full name: Microsoft.FSharp.Core.int

--------------------
type int<'Measure> = int

Full name: Microsoft.FSharp.Core.int<_>
val orderedListsMerge : xs:'a list -> ys:'a list -> keyExtractor:('a -> 'b) -> merger:('a -> 'a -> 'c) -> 'c list (requires comparison)

Full name: Script.orderedListsMerge
val xs : 'a list
val ys : 'a list
val keyExtractor : ('a -> 'b) (requires comparison)
val merger : ('a -> 'a -> 'c)
val x : 'a
val xs' : 'a list
val y : 'a
val ys' : 'a list
val xkey : 'b (requires comparison)
val ykey : 'b (requires comparison)
val webRequestHtml : url:string -> string

Full name: Script.webRequestHtml
val url : string
Multiple items
val string : value:'T -> string

Full name: Microsoft.FSharp.Core.Operators.string

--------------------
type string = String

Full name: Microsoft.FSharp.Core.string
val req : WebRequest
type WebRequest =
  inherit MarshalByRefObject
  member Abort : unit -> unit
  member AuthenticationLevel : AuthenticationLevel with get, set
  member BeginGetRequestStream : callback:AsyncCallback * state:obj -> IAsyncResult
  member BeginGetResponse : callback:AsyncCallback * state:obj -> IAsyncResult
  member CachePolicy : RequestCachePolicy with get, set
  member ConnectionGroupName : string with get, set
  member ContentLength : int64 with get, set
  member ContentType : string with get, set
  member Credentials : ICredentials with get, set
  member EndGetRequestStream : asyncResult:IAsyncResult -> Stream
  ...

Full name: System.Net.WebRequest
WebRequest.Create(requestUri: Uri) : WebRequest
WebRequest.Create(requestUriString: string) : WebRequest
val resp : WebResponse
WebRequest.GetResponse() : WebResponse
val stream : Stream
WebResponse.GetResponseStream() : Stream
val reader : StreamReader
Multiple items
type StreamReader =
  inherit TextReader
  new : stream:Stream -> StreamReader + 9 overloads
  member BaseStream : Stream
  member Close : unit -> unit
  member CurrentEncoding : Encoding
  member DiscardBufferedData : unit -> unit
  member EndOfStream : bool
  member Peek : unit -> int
  member Read : unit -> int + 1 overload
  member ReadLine : unit -> string
  member ReadToEnd : unit -> string
  ...

Full name: System.IO.StreamReader

--------------------
StreamReader(stream: Stream) : unit
StreamReader(path: string) : unit
StreamReader(stream: Stream, detectEncodingFromByteOrderMarks: bool) : unit
StreamReader(stream: Stream, encoding: Encoding) : unit
StreamReader(path: string, detectEncodingFromByteOrderMarks: bool) : unit
StreamReader(path: string, encoding: Encoding) : unit
StreamReader(stream: Stream, encoding: Encoding, detectEncodingFromByteOrderMarks: bool) : unit
StreamReader(path: string, encoding: Encoding, detectEncodingFromByteOrderMarks: bool) : unit
StreamReader(stream: Stream, encoding: Encoding, detectEncodingFromByteOrderMarks: bool, bufferSize: int) : unit
StreamReader(path: string, encoding: Encoding, detectEncodingFromByteOrderMarks: bool, bufferSize: int) : unit
type Encoding =
  member BodyName : string
  member Clone : unit -> obj
  member CodePage : int
  member DecoderFallback : DecoderFallback with get, set
  member EncoderFallback : EncoderFallback with get, set
  member EncodingName : string
  member Equals : value:obj -> bool
  member GetByteCount : chars:char[] -> int + 3 overloads
  member GetBytes : chars:char[] -> byte[] + 5 overloads
  member GetCharCount : bytes:byte[] -> int + 2 overloads
  ...

Full name: System.Text.Encoding
Encoding.GetEncoding(name: string) : Encoding
Encoding.GetEncoding(codepage: int) : Encoding
Encoding.GetEncoding(name: string, encoderFallback: EncoderFallback, decoderFallback: DecoderFallback) : Encoding
Encoding.GetEncoding(codepage: int, encoderFallback: EncoderFallback, decoderFallback: DecoderFallback) : Encoding
val html : string
StreamReader.ReadToEnd() : string
WebResponse.Close() : unit
val regexSingleLineMatch : input:string -> pattern:string -> string

Full name: Script.regexSingleLineMatch
val input : string
val pattern : string
Multiple items
type Regex =
  new : pattern:string -> Regex + 1 overload
  member GetGroupNames : unit -> string[]
  member GetGroupNumbers : unit -> int[]
  member GroupNameFromNumber : i:int -> string
  member GroupNumberFromName : name:string -> int
  member IsMatch : input:string -> bool + 1 overload
  member Match : input:string -> Match + 2 overloads
  member Matches : input:string -> MatchCollection + 1 overload
  member Options : RegexOptions
  member Replace : input:string * replacement:string -> string + 5 overloads
  ...

Full name: System.Text.RegularExpressions.Regex

--------------------
Regex(pattern: string) : unit
Regex(pattern: string, options: RegexOptions) : unit
Regex.Match(input: string, pattern: string) : Match
Regex.Match(input: string, pattern: string, options: RegexOptions) : Match
type RegexOptions =
  | None = 0
  | IgnoreCase = 1
  | Multiline = 2
  | ExplicitCapture = 4
  | Compiled = 8
  | Singleline = 16
  | IgnorePatternWhitespace = 32
  | RightToLeft = 64
  | ECMAScript = 256
  | CultureInvariant = 512

Full name: System.Text.RegularExpressions.RegexOptions
field RegexOptions.Singleline = 16
val regexMatches : input:string -> pattern:string -> seq<string>

Full name: Script.regexMatches
Multiple items
val seq : sequence:seq<'T> -> seq<'T>

Full name: Microsoft.FSharp.Core.Operators.seq

--------------------
type seq<'T> = Collections.Generic.IEnumerable<'T>

Full name: Microsoft.FSharp.Collections.seq<_>
val m : Match
Regex.Matches(input: string, pattern: string) : MatchCollection
Regex.Matches(input: string, pattern: string, options: RegexOptions) : MatchCollection
property Match.Groups: GroupCollection
Multiple items
property GroupCollection.Item: int -> Group

--------------------
property GroupCollection.Item: string -> Group
val extractNamedHrefs : html:string -> seq<string>

Full name: Script.extractNamedHrefs
val hrefPattern : string
val cleanupHtml : text:string -> string

Full name: Script.cleanupHtml
val text : string
val htmlTagPattern : string
Regex.Replace(input: string, pattern: string, evaluator: MatchEvaluator) : string
Regex.Replace(input: string, pattern: string, replacement: string) : string
Regex.Replace(input: string, pattern: string, evaluator: MatchEvaluator, options: RegexOptions) : string
Regex.Replace(input: string, pattern: string, replacement: string, options: RegexOptions) : string
Multiple items
type String =
  new : value:char -> string + 7 overloads
  member Chars : int -> char
  member Clone : unit -> obj
  member CompareTo : value:obj -> int + 1 overload
  member Contains : value:string -> bool
  member CopyTo : sourceIndex:int * destination:char[] * destinationIndex:int * count:int -> unit
  member EndsWith : value:string -> bool + 2 overloads
  member Equals : obj:obj -> bool + 2 overloads
  member GetEnumerator : unit -> CharEnumerator
  member GetHashCode : unit -> int
  ...

Full name: System.String

--------------------
String(value: nativeptr<char>) : unit
String(value: nativeptr<sbyte>) : unit
String(value: char []) : unit
String(c: char, count: int) : unit
String(value: nativeptr<char>, startIndex: int, length: int) : unit
String(value: nativeptr<sbyte>, startIndex: int, length: int) : unit
String(value: char [], startIndex: int, length: int) : unit
String(value: nativeptr<sbyte>, startIndex: int, length: int, enc: Encoding) : unit
field string.Empty
val takeFirstLine : text:string -> string

Full name: Script.takeFirstLine
val firstLinePattern : string
Multiple items
type Poem =
  new : poemHref:string * title:string * lines:seq<string> -> Poem
  member Href : string
  member LineTokens : seq<seq<string>>
  member Lines : seq<string>
  member Title : string

Full name: Script.Poem

--------------------
new : poemHref:string * title:string * lines:seq<string> -> Poem
val poemHref : string
val title : string
val lines : seq<string>
val MAX_TITLE_LENGTH : int
val this : Poem
member Poem.Href : string

Full name: Script.Poem.Href
member Poem.Title : string

Full name: Script.Poem.Title
val newTitle : string
module Seq

from Microsoft.FSharp.Collections
val nth : index:int -> source:seq<'T> -> 'T

Full name: Microsoft.FSharp.Collections.Seq.nth
property String.Length: int
String.Substring(startIndex: int) : string
String.Substring(startIndex: int, length: int) : string
member Poem.Lines : seq<string>

Full name: Script.Poem.Lines
val line : string
val nbspPattern : string
member Poem.LineTokens : seq<seq<string>>

Full name: Script.Poem.LineTokens
val russianWordPattern : string
String.ToLower() : string
String.ToLower(culture: Globalization.CultureInfo) : string
val producePoem : poemHref:string -> poemHtml:string -> Poem

Full name: Script.producePoem
val poemHtml : string
val titlePattern : string
val linePattern : string
val map : mapping:('T -> 'U) -> source:seq<'T> -> seq<'U>

Full name: Microsoft.FSharp.Collections.Seq.map
val isFinalEditionHref : href:string -> bool

Full name: Script.isFinalEditionHref
val href : string
val not : value:bool -> bool

Full name: Microsoft.FSharp.Core.Operators.not
String.Contains(value: string) : bool
val crawlPoems : seq<Poem>

Full name: Script.crawlPoems
val domainUrl : string
val volumeUrlTemplate : string
val poemUrlTemplate : string
val volumeNumber : int
String.Format(format: string, [<ParamArray>] args: obj []) : string
String.Format(format: string, arg0: obj) : string
String.Format(provider: IFormatProvider, format: string, [<ParamArray>] args: obj []) : string
String.Format(format: string, arg0: obj, arg1: obj) : string
String.Format(format: string, arg0: obj, arg1: obj, arg2: obj) : string
val collect : mapping:('T -> #seq<'U>) -> source:seq<'T> -> seq<'U>

Full name: Microsoft.FSharp.Collections.Seq.collect
val filter : predicate:('T -> bool) -> source:seq<'T> -> seq<'T>

Full name: Microsoft.FSharp.Collections.Seq.filter
val cache : source:seq<'T> -> seq<'T>

Full name: Microsoft.FSharp.Collections.Seq.cache
val indexPoems : poems:seq<Poem> -> (string * (int * (int * int) list) list) list

Full name: Script.indexPoems
val poems : seq<Poem>
val mapi : mapping:(int -> 'T -> 'U) -> source:seq<'T> -> seq<'U>

Full name: Microsoft.FSharp.Collections.Seq.mapi
val poemNumber : int
val poem : Poem
property Poem.LineTokens: seq<seq<string>>
val lineNumber : int
val tokens : seq<string>
val position : int
val token : string
val concat : sources:seq<#seq<'T>> -> seq<'T>

Full name: Microsoft.FSharp.Collections.Seq.concat
val groupBy : projection:('T -> 'Key) -> source:seq<'T> -> seq<'Key * seq<'T>> (requires equality)

Full name: Microsoft.FSharp.Collections.Seq.groupBy
val sortBy : projection:('T -> 'Key) -> source:seq<'T> -> seq<'T> (requires comparison)

Full name: Microsoft.FSharp.Collections.Seq.sortBy
val tuples : seq<string * int * int * int>
val poems : (int * (int * int) list) list
val tuples : seq<int * int * int>
val linesPositions : (int * int) list
val toList : source:seq<'T> -> 'T list

Full name: Microsoft.FSharp.Collections.Seq.toList
val tokenIndex : fullIndex:('a * ('b * ('c * 'd) list) list) list -> filterToken:'a -> filterPosition:'d -> ('b * ('c * 'd) list) list (requires equality and equality)

Full name: Script.tokenIndex
val fullIndex : ('a * ('b * ('c * 'd) list) list) list (requires equality and equality)
val filterToken : 'a (requires equality)
val filterPosition : 'd (requires equality)
val token : 'a (requires equality)
val poems : ('b * ('c * 'd) list) list (requires equality)
Multiple items
module List

from Microsoft.FSharp.Collections

--------------------
type List<'T> =
  | ( [] )
  | ( :: ) of Head: 'T * Tail: 'T list
  interface IEnumerable
  interface IEnumerable<'T>
  member Head : 'T
  member IsEmpty : bool
  member Item : index:int -> 'T with get
  member Length : int
  member Tail : 'T list
  static member Cons : head:'T * tail:'T list -> 'T list
  static member Empty : 'T list

Full name: Microsoft.FSharp.Collections.List<_>
val find : predicate:('T -> bool) -> list:'T list -> 'T

Full name: Microsoft.FSharp.Collections.List.find
val map : mapping:('T -> 'U) -> list:'T list -> 'U list

Full name: Microsoft.FSharp.Collections.List.map
val poemNumber : 'b
val linesPositions : ('c * 'd) list (requires equality)
val filteredLines : ('c * 'd) list (requires equality)
val filter : predicate:('T -> bool) -> list:'T list -> 'T list

Full name: Microsoft.FSharp.Collections.List.filter
val lineNumber : 'c
val position : 'd (requires equality)
val isEmpty : source:seq<'T> -> bool

Full name: Microsoft.FSharp.Collections.Seq.isEmpty
val intersectIndex : currentIndex:('a * ('b * ('c * 'd) list) list) list -> tokenIndex:('b * ('c * 'd) list) list -> ('a * ('b * ('c * 'd) list) list) list (requires comparison and comparison)

Full name: Script.intersectIndex
val currentIndex : ('a * ('b * ('c * 'd) list) list) list (requires comparison and comparison)
val tokenIndex : ('b * ('c * 'd) list) list (requires comparison and comparison)
val token : 'a
val poems : ('b * ('c * 'd) list) list (requires comparison and comparison)
val mergePoems : (('e * ('f * 'g) list) list -> ('e * ('f * 'g) list) list -> ('e * ('f * 'g) list) list) (requires comparison and comparison)
val currentPoems : ('e * ('f * 'g) list) list (requires comparison and comparison)
val tokenPoems : ('e * ('f * 'g) list) list (requires comparison and comparison)
val mergeLinesPositions : (('h * 'i) list -> ('h * 'i) list -> ('h * 'i) list) (requires comparison)
val currentLinesPositions : ('h * 'i) list (requires comparison)
val tokenLinesPositions : ('h * 'i) list (requires comparison)
val keyExtractor : ('j * 'k -> 'j)
val lineNumber : 'j
val merger : ('j * 'k -> 'l * 'm -> 'j * 'k)
val currentLineNumber : 'j
val currentPosition : 'k
val keyExtractor : ('h * 'i -> 'h)
val poemNumber : 'h
val merger : ('h * ('i * 'j) list -> 'k * ('i * 'j) list -> 'h * ('i * 'j) list) (requires comparison)
val currentPoemNumber : 'h
val currentLinesPositions : ('i * 'j) list (requires comparison)
val tokenLinesPositions : ('i * 'j) list (requires comparison)
val poemNumber : 'e (requires comparison)
val linesPositions : ('f * 'g) list (requires comparison)
val isEmpty : list:'T list -> bool

Full name: Microsoft.FSharp.Collections.List.isEmpty
val queryIndex : index:('a * ('b * ('c * 'd) list) list) list -> findPosition:'d -> count:int -> ('a * int) list (requires equality)

Full name: Script.queryIndex
val index : ('a * ('b * ('c * 'd) list) list) list (requires equality)
val findPosition : 'd (requires equality)
val count : int
val tokenFreq : int
val sumBy : projection:('T -> 'U) -> list:'T list -> 'U (requires member ( + ) and member get_Zero)

Full name: Microsoft.FSharp.Collections.List.sumBy
val zip : source1:seq<'T1> -> source2:seq<'T2> -> seq<'T1 * 'T2>

Full name: Microsoft.FSharp.Collections.Seq.zip
val index : int
val element : 'a * int
val getPoemResult : index:('a * ('b * ('c * 'd) list) list) list -> findToken:'a -> findPosition:'d -> 'b * 'c (requires equality and equality)

Full name: Script.getPoemResult
val index : ('a * ('b * ('c * 'd) list) list) list (requires equality and equality)
val findToken : 'a (requires equality)
val collect : mapping:('T -> 'U list) -> list:'T list -> 'U list

Full name: Microsoft.FSharp.Collections.List.collect
type QueryResult =
  | LineVariant of string * int
  | SinglePoem of int * int

Full name: Script.QueryResult
union case QueryResult.LineVariant: string * int -> QueryResult
union case QueryResult.SinglePoem: int * int -> QueryResult
val wrappedQueryIndex : filteredIndex:(string * (int * (int * 'a) list) list) list -> searchPosition:'a -> count:int -> QueryResult list (requires equality)

Full name: Script.wrappedQueryIndex
val filteredIndex : (string * (int * (int * 'a) list) list) list (requires equality)
val searchPosition : 'a (requires equality)
type PrettyResult =
  | PrettyLineVariant of string * int
  | PrettySinglePoem of string * string * int * string

Full name: Script.PrettyResult
union case PrettyResult.PrettyLineVariant: string * int -> PrettyResult
union case PrettyResult.PrettySinglePoem: string * string * int * string -> PrettyResult
val prettifyQueryResults : queryResults:QueryResult list -> poems:seq<Poem> -> PrettyResult list

Full name: Script.prettifyQueryResults
val queryResults : QueryResult list
val result : QueryResult
property Poem.Lines: seq<string>
property Poem.Title: string
property Poem.Href: string
type PushkinTreeNode =
  | VariantNode of string * int * seq<PushkinTreeNode>
  | PoemNode of string * string * int * string

Full name: Script.PushkinTreeNode
union case PushkinTreeNode.VariantNode: string * int * seq<PushkinTreeNode> -> PushkinTreeNode
union case PushkinTreeNode.PoemNode: string * string * int * string -> PushkinTreeNode
val createPushkinTree : pushkinPoems:seq<Poem> -> poemsIndex:(string * (int * (int * int) list) list) list -> count:int -> PushkinTreeNode list

Full name: Script.createPushkinTree
val pushkinPoems : seq<Poem>
val poemsIndex : (string * (int * (int * int) list) list) list
val createTreeLevel : (int -> string list -> (string * (int * (int * int) list) list) list -> PushkinTreeNode list)
val currentQuery : string list
val currentIndex : (string * (int * (int * int) list) list) list
val searchPosition : int
val length : list:'T list -> int

Full name: Microsoft.FSharp.Collections.List.length
val queryResult : QueryResult list
val prettyResult : PrettyResult list
val result : PrettyResult
val freq : int
val resultsToHtml : pushkinTree:seq<PushkinTreeNode> -> string

Full name: Script.resultsToHtml
val pushkinTree : seq<PushkinTreeNode>
val treeToHtml : (seq<PushkinTreeNode> -> string list -> int -> int -> int * string)
val tree : seq<PushkinTreeNode>
val currentPath : string list
val currentNumber : int
val startingNumber : int
val zippedTreeLevel : seq<int * PushkinTreeNode>
val initInfinite : initializer:(int -> 'T) -> seq<'T>

Full name: Microsoft.FSharp.Collections.Seq.initInfinite
val i : int
val pathToString : (seq<string> -> string)
val path : seq<string>
val parts : seq<string>
val x : string
String.Join(separator: string, values: Collections.Generic.IEnumerable<string>) : string
String.Join<'T>(separator: string, values: Collections.Generic.IEnumerable<'T>) : string
String.Join(separator: string, [<ParamArray>] values: obj []) : string
String.Join(separator: string, [<ParamArray>] value: string []) : string
String.Join(separator: string, value: string [], startIndex: int, count: int) : string
val thisLevelStart : string
type Environment =
  static member CommandLine : string
  static member CurrentDirectory : string with get, set
  static member Exit : exitCode:int -> unit
  static member ExitCode : int with get, set
  static member ExpandEnvironmentVariables : name:string -> string
  static member FailFast : message:string -> unit + 1 overload
  static member GetCommandLineArgs : unit -> string[]
  static member GetEnvironmentVariable : variable:string -> string + 1 overload
  static member GetEnvironmentVariables : unit -> IDictionary + 1 overload
  static member GetFolderPath : folder:SpecialFolder -> string + 1 overload
  ...
  nested type SpecialFolder
  nested type SpecialFolderOption

Full name: System.Environment
property Environment.NewLine: string
val thisLevelTable : string
val fold : folder:('State -> 'T -> 'State) -> state:'State -> source:seq<'T> -> 'State

Full name: Microsoft.FSharp.Collections.Seq.fold
val acc : string
val treeNode : int * PushkinTreeNode
val number : int
val subtree : seq<PushkinTreeNode>
val thisLevelEnd : string
val thisLevelOutput : string
val levelLength : int
val length : source:seq<'T> -> int

Full name: Microsoft.FSharp.Collections.Seq.length
val subTreeCount : int
val subTreeOutput : string
val acc : int
val result : string
val content : string
val outputResultsToFile : content:string -> unit

Full name: Script.outputResultsToFile
val templateFile : string
val outputFile : string
val templateReplacePattern : string
val templateHtml : string
type File =
  static member AppendAllLines : path:string * contents:IEnumerable<string> -> unit + 1 overload
  static member AppendAllText : path:string * contents:string -> unit + 1 overload
  static member AppendText : path:string -> StreamWriter
  static member Copy : sourceFileName:string * destFileName:string -> unit + 1 overload
  static member Create : path:string -> FileStream + 3 overloads
  static member CreateText : path:string -> StreamWriter
  static member Decrypt : path:string -> unit
  static member Delete : path:string -> unit
  static member Encrypt : path:string -> unit
  static member Exists : path:string -> bool
  ...

Full name: System.IO.File
File.ReadAllText(path: string) : string
File.ReadAllText(path: string, encoding: Encoding) : string
val resultHtml : string
File.WriteAllText(path: string, contents: string) : unit
File.WriteAllText(path: string, contents: string, encoding: Encoding) : unit
val poems : seq<Poem>

Full name: Script.poems
val sumBy : projection:('T -> 'U) -> source:seq<'T> -> 'U (requires member ( + ) and member get_Zero)

Full name: Microsoft.FSharp.Collections.Seq.sumBy
val poemIndex : (string * (int * (int * int) list) list) list

Full name: Script.poemIndex
property List.Length: int
val tree : PushkinTreeNode list

Full name: Script.tree
val htmlContent : string

Full name: Script.htmlContent
Raw view Test code New version

More information

Link:http://fssnip.net/7w
Posted:12 years ago
Author:
Tags: