0 people like it.
Like the snippet!
pushkin tree
1:
2:
3:
4:
5:
6:
7:
8:
9:
10:
11:
12:
13:
14:
15:
16:
17:
18:
19:
20:
21:
22:
23:
24:
25:
26:
27:
28:
29:
30:
31:
32:
33:
34:
35:
36:
37:
38:
39:
40:
41:
42:
43:
44:
45:
46:
47:
48:
49:
50:
51:
52:
53:
54:
55:
56:
57:
58:
59:
60:
61:
62:
63:
64:
65:
66:
67:
68:
69:
70:
71:
72:
73:
74:
75:
76:
77:
78:
79:
80:
81:
82:
83:
84:
85:
86:
87:
88:
89:
90:
91:
92:
93:
94:
95:
96:
97:
98:
99:
100:
101:
102:
103:
104:
105:
106:
107:
108:
109:
110:
111:
112:
113:
114:
115:
116:
117:
118:
119:
120:
121:
122:
123:
124:
125:
126:
127:
128:
129:
130:
131:
132:
133:
134:
135:
136:
137:
138:
139:
140:
141:
142:
143:
144:
145:
146:
147:
148:
149:
150:
151:
152:
153:
154:
155:
156:
157:
158:
159:
160:
161:
162:
163:
164:
165:
166:
167:
168:
169:
170:
171:
172:
173:
174:
175:
176:
177:
178:
179:
180:
181:
182:
183:
184:
185:
186:
187:
188:
189:
190:
191:
192:
193:
194:
195:
196:
197:
198:
199:
200:
201:
202:
203:
204:
205:
206:
207:
208:
209:
210:
211:
212:
213:
214:
215:
216:
217:
218:
219:
220:
221:
222:
223:
224:
225:
226:
227:
228:
229:
230:
231:
232:
233:
234:
235:
236:
237:
238:
239:
240:
241:
242:
243:
244:
245:
246:
247:
248:
249:
250:
251:
252:
253:
254:
255:
256:
257:
258:
259:
260:
261:
262:
263:
264:
265:
266:
267:
268:
269:
270:
271:
272:
273:
274:
275:
276:
277:
278:
279:
280:
281:
282:
283:
284:
285:
286:
287:
288:
289:
290:
291:
292:
293:
294:
295:
296:
297:
298:
299:
300:
301:
302:
303:
304:
305:
306:
307:
308:
309:
310:
311:
312:
313:
314:
315:
316:
317:
318:
319:
320:
321:
322:
323:
324:
325:
326:
327:
328:
329:
330:
331:
332:
333:
334:
335:
336:
337:
338:
339:
340:
341:
342:
343:
344:
345:
346:
347:
348:
349:
350:
351:
352:
353:
354:
355:
356:
357:
358:
359:
360:
361:
362:
363:
364:
365:
366:
367:
368:
369:
370:
371:
372:
373:
374:
375:
376:
377:
378:
379:
380:
381:
382:
383:
384:
385:
386:
387:
388:
389:
390:
391:
392:
393:
394:
395:
396:
397:
398:
399:
400:
|
open System
open System.Net
open System.Text
open System.IO
open System.Text.RegularExpressions
//just a short snippet to measure time spent in an eagerly executed function
//not gonna work with lazy function, e.g. function returning a sequence (IEnumerable)
let time jobName job =
let startTime = DateTime.Now;
let returnValue = job()
let endTime = DateTime.Now;
printfn "%s took %d ms" jobName (int((endTime - startTime).TotalMilliseconds))
returnValue
//goes through 2 lists in linear time and looks for equal keys
//for elements with equal keys a given function is called to produce output merged element
//NOTE:
//1. function assumes that keys in the second list are unique, otherwise results will be surprising, see (*) below
//2. function assumes both lists are ordered ascending
let rec orderedListsMerge xs ys keyExtractor merger =
match xs, ys with
| [],_ | _,[] -> []
| x::xs', y::ys' ->
let xkey = keyExtractor x
let ykey = keyExtractor y
if(xkey = ykey) then
//here we move xs forward, but keep ys the same,
//because we assume that next y will have different key while next x might still have the same key,
//otherwise this logic is incorrect
(merger x y) :: orderedListsMerge xs' ys keyExtractor merger // (*)
elif(xkey > ykey) then
orderedListsMerge xs ys' keyExtractor merger
else
orderedListsMerge xs' ys keyExtractor merger
let webRequestHtml (url : string) =
let req = WebRequest.Create(url)
let resp = req.GetResponse()
let stream = resp.GetResponseStream()
let reader = new StreamReader(stream, Encoding.GetEncoding("Windows-1251")) //don't forget the Encoding, when you work with international documents
let html = reader.ReadToEnd()
resp.Close()
html
let regexSingleLineMatch input pattern =
Regex.Match(input, pattern, RegexOptions.Singleline).Groups.Item(1).Value
let regexMatches input pattern =
seq { for m in Regex.Matches(input, pattern) -> m.Groups.Item(1).Value }
//only named hrefs point to poems
let extractNamedHrefs html =
//I tried XmlDocument here, but it doesn't work as HTML can contain some "invalid" elements like
//Stand back now, I'm going to use regular expressions!
let hrefPattern = "<a name=.* href=\"(.+?)\">.*</a>"
regexMatches html hrefPattern
//remove all html markup from the line
let cleanupHtml text =
let htmlTagPattern = "<.+?>"
Regex.Replace(text, htmlTagPattern, String.Empty)
//remove all html markup from the line
let takeFirstLine text =
let firstLinePattern = "(.*)"
Regex.Match(text, firstLinePattern).Groups.Item(1).Value
type Poem(poemHref : string, title : string, lines : seq<string>) =
let MAX_TITLE_LENGTH = 30
member this.Href = poemHref
member this.Title =
let newTitle =
match title with
| "* * *" -> (lines |> Seq.nth 0)
| _ -> title
if(newTitle.Length > MAX_TITLE_LENGTH) then
newTitle.Substring(0, MAX_TITLE_LENGTH-3) + "..."
else
newTitle
member this.Lines =
seq {
for line in lines ->
let nbspPattern = " "
Regex.Replace(line, nbspPattern, "")
}
member this.LineTokens =
seq {
for line in lines ->
let russianWordPattern = "([а-яА-Я]+)"
regexMatches (line.ToLower()) russianWordPattern
}
//TODO: add more structural analysis -> handle sub-titles and personas
let producePoem poemHref poemHtml =
//titles can be multiline, sometimes they include sub-titles
let titlePattern = "<h1>(.+?)</h1>"
let linePattern = "<span class=\"line.*>(.+?)</span>"
new Poem(
poemHref,
(regexSingleLineMatch poemHtml titlePattern) |> cleanupHtml |> takeFirstLine,
regexMatches poemHtml linePattern |> Seq.map cleanupHtml)
//check that the given link is a link to a final edition poem, not early edition to avoid duplicate texts in index
let isFinalEditionHref (href : string) =
not (href.Contains("03edit"))
let crawlPoems =
let domainUrl = "http://www.rvb.ru/pushkin/"
let volumeUrlTemplate = domainUrl + "tocvol{0}.htm"
let poemUrlTemplate = domainUrl + "{0}"
//take only first 4 volumes -- they contain poems
seq { for volumeNumber in 1..4 -> String.Format(volumeUrlTemplate, volumeNumber) }
|> Seq.map webRequestHtml
|> Seq.collect extractNamedHrefs
|> Seq.filter isFinalEditionHref
|> Seq.map (fun href -> String.Format(poemUrlTemplate, href))
// //development mode -- comment later
// |> Seq.take 40
//requesting individual poems
|> Seq.map (fun href -> (producePoem href (webRequestHtml href)))
|> Seq.cache
//building inversed index of tokens in poems
//so that we have a way to index (token -> poem number -> (line number,position in line))
let indexPoems (poems : seq<Poem>) =
poems
|> Seq.mapi
(
fun poemNumber poem ->
poem.LineTokens
|> Seq.mapi
(
fun lineNumber tokens ->
tokens
|> Seq.mapi
(
fun position token ->
(token, poemNumber, lineNumber, position)
)
)
|> Seq.concat
)
|> Seq.concat
//now we have raw list of tuples, we will turn it into ordered inversed index
|> Seq.groupBy (fun (token, _, _, _) -> token)
|> Seq.sortBy (fun (token, _) -> token)
|> Seq.map
(
fun (token, tuples) ->
let poems =
tuples
|> Seq.map ( fun (token, poemNumber, lineNumber, position) -> (poemNumber,lineNumber,position) )
|> Seq.groupBy (fun (poemNumber,lineNumber,position) -> poemNumber)
|> Seq.sortBy (fun (poemNumber, _) -> poemNumber)
|> Seq.map
(
fun (poemNumber, tuples) ->
let linesPositions =
tuples
|> Seq.map (fun (poemNumber,lineNumber,position) -> (lineNumber,position))
|> Seq.sortBy ( fun (lineNumber,position) -> position)
|> Seq.sortBy ( fun (lineNumber,position) -> lineNumber) //sortBy is stable according to MSDN
|> Seq.toList
(poemNumber, linesPositions)
)
|> Seq.toList
(token, poems)
)
|> Seq.toList
//token index is a subtree of full index only including poems and lines with the given token in given position
let tokenIndex fullIndex filterToken filterPosition =
let (token, poems) =
fullIndex
|> List.find (fun(token, poems) -> token=filterToken)
poems
|> List.map
(
fun (poemNumber, linesPositions) ->
let filteredLines = linesPositions |> List.filter (fun (lineNumber, position) -> position = filterPosition)
(poemNumber, filteredLines)
)
|> List.filter (fun (poemNumber, linesPositions) -> not (Seq.isEmpty linesPositions))
//intersect current index with token index
//we want to only keep tokens and poems which are present in the token index (which is a subtree of full index, see above)
let intersectIndex currentIndex tokenIndex =
currentIndex
|> List.map
(
fun (token, poems) ->
let mergePoems currentPoems tokenPoems =
let mergeLinesPositions currentLinesPositions tokenLinesPositions =
let keyExtractor = (fun (lineNumber, _) -> lineNumber)
let merger = (fun (currentLineNumber, currentPosition) (_,_) -> (currentLineNumber, currentPosition))
orderedListsMerge currentLinesPositions tokenLinesPositions keyExtractor merger
let keyExtractor = (fun (poemNumber, _) -> poemNumber)
let merger = (fun (currentPoemNumber, currentLinesPositions) (_, tokenLinesPositions) -> (currentPoemNumber, mergeLinesPositions currentLinesPositions tokenLinesPositions))
orderedListsMerge currentPoems tokenPoems keyExtractor merger
|> List.filter (fun (poemNumber, linesPositions) -> not (List.isEmpty linesPositions))
(token, mergePoems poems tokenIndex)
)
|> List.filter (fun (token, poems) -> not (List.isEmpty poems))
// The main function to query reverse index
// index -- index per se, we assume that the index is already filtered by caller using intersect\tokenFilter
// findPosition -- number of position to search tokens for
// the query function will return a list of terms that can be in this position
// count -- number of most frequent terms to return
let queryIndex index findPosition count =
index
|> List.map
(
fun (token, poems) ->
let tokenFreq =
poems
|> List.sumBy
(
fun (_, linesPositions) ->
linesPositions
|> List.sumBy
(
fun (lineNumber, position) ->
if (position = findPosition) then 1 else 0
)
)
(token, tokenFreq)
)
|> Seq.filter (fun (token, tokenFreq) -> tokenFreq > 0)
|> Seq.sortBy (fun (token, tokenFreq) -> -tokenFreq)
|> Seq.zip [1..count] // Seq.take fails if there is less than "count" elements
|> Seq.map (fun (index, element) -> element)
|> Seq.toList
//acquire first poem for given token and position
//used to resolve single query result token into poems
let getPoemResult index findToken findPosition =
let (token, poems) =
index
|> List.find (fun (token, poems) -> token = findToken)
poems
|> List.collect
(
fun (poemNumber, linesPositions) ->
linesPositions
|> List.filter (fun (lineNumber, position) -> position = findPosition)
|> List.map (fun (lineNumber, position) -> (poemNumber, lineNumber))
)
|> Seq.nth 0
type QueryResult =
//token + count
| LineVariant of string*int
//poemNumber, lineNumber
| SinglePoem of int*int
//TODO: identical strings currently will not be resolved to their poems
//this is a wrapper around query index that will perform the same action,
//but the result will be translated and wrapped into QueryResult type
//single result tokens will be returned as (poemNumber, lineNumber) tuple
let wrappedQueryIndex filteredIndex searchPosition count =
queryIndex filteredIndex searchPosition count
|> List.map
(
fun (token, count) ->
match count with
| 1 -> SinglePoem(getPoemResult filteredIndex token searchPosition)
| _ -> LineVariant(token,count)
)
type PrettyResult =
| PrettyLineVariant of string*int
| PrettySinglePoem of string*string*int*string
let prettifyQueryResults queryResults poems =
queryResults
|> List.map
(
fun result ->
match result with
| SinglePoem (poemNumber, lineNumber) ->
let (poem : Poem) =
poems
|> Seq.nth poemNumber
let line =
poem.Lines
|> Seq.nth lineNumber
PrettySinglePoem(poem.Title, poem.Href, lineNumber, line)
| LineVariant (token, count) -> PrettyLineVariant(token,count)
)
type PushkinTreeNode =
| VariantNode of string*int*seq<PushkinTreeNode>
| PoemNode of string*string*int*string
let createPushkinTree pushkinPoems poemsIndex count =
let rec createTreeLevel count currentQuery currentIndex =
let searchPosition = List.length currentQuery
let queryResult = wrappedQueryIndex currentIndex searchPosition count
let prettyResult = prettifyQueryResults queryResult pushkinPoems
prettyResult
|> List.map
(
fun result ->
match result with
| PrettySinglePoem (title, href, lineNumber, line) -> PoemNode(title, href, lineNumber, line)
| PrettyLineVariant (token, freq) -> VariantNode(token, freq, createTreeLevel count (currentQuery @ [token]) (intersectIndex currentIndex (tokenIndex currentIndex token searchPosition)))
)
createTreeLevel count [] poemsIndex
let resultsToHtml pushkinTree =
let rec treeToHtml tree currentPath (currentNumber:int) startingNumber =
let zippedTreeLevel =
tree
|> Seq.zip (Seq.initInfinite (fun i -> startingNumber+i))
let pathToString path =
let parts =
path
|> Seq.map (fun x -> "'"+x+"'")
String.Join(",", parts)
let thisLevelStart = String.Format("<div class=\"x\"><table id=\"{0}\" class=\"p\">", currentNumber) + Environment.NewLine
let thisLevelTable =
zippedTreeLevel
|> Seq.fold
(
fun acc treeNode ->
match treeNode with
| (number, PoemNode (title, href, lineNumber, line)) ->
acc + String.Format("<tr><td><span class=line>{0}</span> ⇒ <a target=blank class=fromlink href=\"{1}\">{2}</a>, строка {3}</tr>",line, href, title, lineNumber+1) + Environment.NewLine
| (number, VariantNode (token, freq, subtree)) ->
acc + String.Format("<tr id=\"r{0}\"><td>{1} ⇒ <span class=\"lv\" onClick=\"x([{2}])\">{3}</span></td></tr>", number, token, (pathToString (currentPath@[string(number)])), freq) + Environment.NewLine
) ""
let thisLevelEnd = @"</table></div>" + Environment.NewLine + Environment.NewLine
let thisLevelOutput = (thisLevelStart+thisLevelTable+thisLevelEnd)
let levelLength =
tree
|> Seq.length
let (subTreeCount, subTreeOutput) =
zippedTreeLevel
|> Seq.fold
(
fun (acc, result) treeNode ->
match treeNode with
| (number, PoemNode (title, href, lineNumber, line)) ->
(acc, result)
| (number, VariantNode (token, freq, subtree)) ->
let (subTreeCount, subTreeOutput) = treeToHtml subtree (currentPath@[string(number)]) number (startingNumber+acc+levelLength)
(acc + subTreeCount, result + subTreeOutput)
) (0, "")
(levelLength + subTreeCount, thisLevelOutput + subTreeOutput)
let (_, content) = treeToHtml pushkinTree ["0"] 0 1
content
let outputResultsToFile (content:string) =
let templateFile = "template.htm"
let outputFile = "output.htm"
let templateReplacePattern = "#HERE_GOES_CONTENT#"
let templateHtml = File.ReadAllText(templateFile)
let resultHtml = Regex.Replace(templateHtml, templateReplacePattern, content)
File.WriteAllText(outputFile, resultHtml)
let poems = crawlPoems //lazy operation, so we can't time it here, we do it the in next line
printfn "Crawled %d poems" (time "Crawling poems" (fun() -> poems |> Seq.length))
printfn "Crawled %d lines" (poems |> Seq.sumBy ( fun poem -> poem.Lines |> Seq.length))
let poemIndex = time "Indexing poems" (fun () -> poems |> indexPoems)
printfn "Index contains %d terms" poemIndex.Length
let tree = time "Generating result tree" (fun () -> createPushkinTree poems poemIndex 20)
let htmlContent = time "Generating html content" (fun () -> resultsToHtml tree)
time "Output content" (fun() -> outputResultsToFile htmlContent)
//let queryResult = queryIndex poemIndex ["но"] 10
//let prettyResult = prettifyQueryResults queryResult poems
|
namespace System
namespace System.Net
namespace System.Text
namespace System.IO
namespace System.Text.RegularExpressions
val time : jobName:string -> job:(unit -> 'a) -> 'a
Full name: Script.time
val jobName : string
val job : (unit -> 'a)
val startTime : DateTime
Multiple items
type DateTime =
struct
new : ticks:int64 -> DateTime + 10 overloads
member Add : value:TimeSpan -> DateTime
member AddDays : value:float -> DateTime
member AddHours : value:float -> DateTime
member AddMilliseconds : value:float -> DateTime
member AddMinutes : value:float -> DateTime
member AddMonths : months:int -> DateTime
member AddSeconds : value:float -> DateTime
member AddTicks : value:int64 -> DateTime
member AddYears : value:int -> DateTime
...
end
Full name: System.DateTime
--------------------
DateTime()
(+0 other overloads)
DateTime(ticks: int64) : unit
(+0 other overloads)
DateTime(ticks: int64, kind: DateTimeKind) : unit
(+0 other overloads)
DateTime(year: int, month: int, day: int) : unit
(+0 other overloads)
DateTime(year: int, month: int, day: int, calendar: Globalization.Calendar) : unit
(+0 other overloads)
DateTime(year: int, month: int, day: int, hour: int, minute: int, second: int) : unit
(+0 other overloads)
DateTime(year: int, month: int, day: int, hour: int, minute: int, second: int, kind: DateTimeKind) : unit
(+0 other overloads)
DateTime(year: int, month: int, day: int, hour: int, minute: int, second: int, calendar: Globalization.Calendar) : unit
(+0 other overloads)
DateTime(year: int, month: int, day: int, hour: int, minute: int, second: int, millisecond: int) : unit
(+0 other overloads)
DateTime(year: int, month: int, day: int, hour: int, minute: int, second: int, millisecond: int, kind: DateTimeKind) : unit
(+0 other overloads)
property DateTime.Now: DateTime
val returnValue : 'a
val endTime : DateTime
val printfn : format:Printf.TextWriterFormat<'T> -> 'T
Full name: Microsoft.FSharp.Core.ExtraTopLevelOperators.printfn
Multiple items
val int : value:'T -> int (requires member op_Explicit)
Full name: Microsoft.FSharp.Core.Operators.int
--------------------
type int = int32
Full name: Microsoft.FSharp.Core.int
--------------------
type int<'Measure> = int
Full name: Microsoft.FSharp.Core.int<_>
val orderedListsMerge : xs:'a list -> ys:'a list -> keyExtractor:('a -> 'b) -> merger:('a -> 'a -> 'c) -> 'c list (requires comparison)
Full name: Script.orderedListsMerge
val xs : 'a list
val ys : 'a list
val keyExtractor : ('a -> 'b) (requires comparison)
val merger : ('a -> 'a -> 'c)
val x : 'a
val xs' : 'a list
val y : 'a
val ys' : 'a list
val xkey : 'b (requires comparison)
val ykey : 'b (requires comparison)
val webRequestHtml : url:string -> string
Full name: Script.webRequestHtml
val url : string
Multiple items
val string : value:'T -> string
Full name: Microsoft.FSharp.Core.Operators.string
--------------------
type string = String
Full name: Microsoft.FSharp.Core.string
val req : WebRequest
type WebRequest =
inherit MarshalByRefObject
member Abort : unit -> unit
member AuthenticationLevel : AuthenticationLevel with get, set
member BeginGetRequestStream : callback:AsyncCallback * state:obj -> IAsyncResult
member BeginGetResponse : callback:AsyncCallback * state:obj -> IAsyncResult
member CachePolicy : RequestCachePolicy with get, set
member ConnectionGroupName : string with get, set
member ContentLength : int64 with get, set
member ContentType : string with get, set
member Credentials : ICredentials with get, set
member EndGetRequestStream : asyncResult:IAsyncResult -> Stream
...
Full name: System.Net.WebRequest
WebRequest.Create(requestUri: Uri) : WebRequest
WebRequest.Create(requestUriString: string) : WebRequest
val resp : WebResponse
WebRequest.GetResponse() : WebResponse
val stream : Stream
WebResponse.GetResponseStream() : Stream
val reader : StreamReader
Multiple items
type StreamReader =
inherit TextReader
new : stream:Stream -> StreamReader + 9 overloads
member BaseStream : Stream
member Close : unit -> unit
member CurrentEncoding : Encoding
member DiscardBufferedData : unit -> unit
member EndOfStream : bool
member Peek : unit -> int
member Read : unit -> int + 1 overload
member ReadLine : unit -> string
member ReadToEnd : unit -> string
...
Full name: System.IO.StreamReader
--------------------
StreamReader(stream: Stream) : unit
StreamReader(path: string) : unit
StreamReader(stream: Stream, detectEncodingFromByteOrderMarks: bool) : unit
StreamReader(stream: Stream, encoding: Encoding) : unit
StreamReader(path: string, detectEncodingFromByteOrderMarks: bool) : unit
StreamReader(path: string, encoding: Encoding) : unit
StreamReader(stream: Stream, encoding: Encoding, detectEncodingFromByteOrderMarks: bool) : unit
StreamReader(path: string, encoding: Encoding, detectEncodingFromByteOrderMarks: bool) : unit
StreamReader(stream: Stream, encoding: Encoding, detectEncodingFromByteOrderMarks: bool, bufferSize: int) : unit
StreamReader(path: string, encoding: Encoding, detectEncodingFromByteOrderMarks: bool, bufferSize: int) : unit
type Encoding =
member BodyName : string
member Clone : unit -> obj
member CodePage : int
member DecoderFallback : DecoderFallback with get, set
member EncoderFallback : EncoderFallback with get, set
member EncodingName : string
member Equals : value:obj -> bool
member GetByteCount : chars:char[] -> int + 3 overloads
member GetBytes : chars:char[] -> byte[] + 5 overloads
member GetCharCount : bytes:byte[] -> int + 2 overloads
...
Full name: System.Text.Encoding
Encoding.GetEncoding(name: string) : Encoding
Encoding.GetEncoding(codepage: int) : Encoding
Encoding.GetEncoding(name: string, encoderFallback: EncoderFallback, decoderFallback: DecoderFallback) : Encoding
Encoding.GetEncoding(codepage: int, encoderFallback: EncoderFallback, decoderFallback: DecoderFallback) : Encoding
val html : string
StreamReader.ReadToEnd() : string
WebResponse.Close() : unit
val regexSingleLineMatch : input:string -> pattern:string -> string
Full name: Script.regexSingleLineMatch
val input : string
val pattern : string
Multiple items
type Regex =
new : pattern:string -> Regex + 1 overload
member GetGroupNames : unit -> string[]
member GetGroupNumbers : unit -> int[]
member GroupNameFromNumber : i:int -> string
member GroupNumberFromName : name:string -> int
member IsMatch : input:string -> bool + 1 overload
member Match : input:string -> Match + 2 overloads
member Matches : input:string -> MatchCollection + 1 overload
member Options : RegexOptions
member Replace : input:string * replacement:string -> string + 5 overloads
...
Full name: System.Text.RegularExpressions.Regex
--------------------
Regex(pattern: string) : unit
Regex(pattern: string, options: RegexOptions) : unit
Regex.Match(input: string, pattern: string) : Match
Regex.Match(input: string, pattern: string, options: RegexOptions) : Match
type RegexOptions =
| None = 0
| IgnoreCase = 1
| Multiline = 2
| ExplicitCapture = 4
| Compiled = 8
| Singleline = 16
| IgnorePatternWhitespace = 32
| RightToLeft = 64
| ECMAScript = 256
| CultureInvariant = 512
Full name: System.Text.RegularExpressions.RegexOptions
field RegexOptions.Singleline = 16
val regexMatches : input:string -> pattern:string -> seq<string>
Full name: Script.regexMatches
Multiple items
val seq : sequence:seq<'T> -> seq<'T>
Full name: Microsoft.FSharp.Core.Operators.seq
--------------------
type seq<'T> = Collections.Generic.IEnumerable<'T>
Full name: Microsoft.FSharp.Collections.seq<_>
val m : Match
Regex.Matches(input: string, pattern: string) : MatchCollection
Regex.Matches(input: string, pattern: string, options: RegexOptions) : MatchCollection
property Match.Groups: GroupCollection
Multiple items
property GroupCollection.Item: int -> Group
--------------------
property GroupCollection.Item: string -> Group
val extractNamedHrefs : html:string -> seq<string>
Full name: Script.extractNamedHrefs
val hrefPattern : string
val cleanupHtml : text:string -> string
Full name: Script.cleanupHtml
val text : string
val htmlTagPattern : string
Regex.Replace(input: string, pattern: string, evaluator: MatchEvaluator) : string
Regex.Replace(input: string, pattern: string, replacement: string) : string
Regex.Replace(input: string, pattern: string, evaluator: MatchEvaluator, options: RegexOptions) : string
Regex.Replace(input: string, pattern: string, replacement: string, options: RegexOptions) : string
Multiple items
type String =
new : value:char -> string + 7 overloads
member Chars : int -> char
member Clone : unit -> obj
member CompareTo : value:obj -> int + 1 overload
member Contains : value:string -> bool
member CopyTo : sourceIndex:int * destination:char[] * destinationIndex:int * count:int -> unit
member EndsWith : value:string -> bool + 2 overloads
member Equals : obj:obj -> bool + 2 overloads
member GetEnumerator : unit -> CharEnumerator
member GetHashCode : unit -> int
...
Full name: System.String
--------------------
String(value: nativeptr<char>) : unit
String(value: nativeptr<sbyte>) : unit
String(value: char []) : unit
String(c: char, count: int) : unit
String(value: nativeptr<char>, startIndex: int, length: int) : unit
String(value: nativeptr<sbyte>, startIndex: int, length: int) : unit
String(value: char [], startIndex: int, length: int) : unit
String(value: nativeptr<sbyte>, startIndex: int, length: int, enc: Encoding) : unit
field string.Empty
val takeFirstLine : text:string -> string
Full name: Script.takeFirstLine
val firstLinePattern : string
Multiple items
type Poem =
new : poemHref:string * title:string * lines:seq<string> -> Poem
member Href : string
member LineTokens : seq<seq<string>>
member Lines : seq<string>
member Title : string
Full name: Script.Poem
--------------------
new : poemHref:string * title:string * lines:seq<string> -> Poem
val poemHref : string
val title : string
val lines : seq<string>
val MAX_TITLE_LENGTH : int
val this : Poem
member Poem.Href : string
Full name: Script.Poem.Href
member Poem.Title : string
Full name: Script.Poem.Title
val newTitle : string
module Seq
from Microsoft.FSharp.Collections
val nth : index:int -> source:seq<'T> -> 'T
Full name: Microsoft.FSharp.Collections.Seq.nth
property String.Length: int
String.Substring(startIndex: int) : string
String.Substring(startIndex: int, length: int) : string
member Poem.Lines : seq<string>
Full name: Script.Poem.Lines
val line : string
val nbspPattern : string
member Poem.LineTokens : seq<seq<string>>
Full name: Script.Poem.LineTokens
val russianWordPattern : string
String.ToLower() : string
String.ToLower(culture: Globalization.CultureInfo) : string
val producePoem : poemHref:string -> poemHtml:string -> Poem
Full name: Script.producePoem
val poemHtml : string
val titlePattern : string
val linePattern : string
val map : mapping:('T -> 'U) -> source:seq<'T> -> seq<'U>
Full name: Microsoft.FSharp.Collections.Seq.map
val isFinalEditionHref : href:string -> bool
Full name: Script.isFinalEditionHref
val href : string
val not : value:bool -> bool
Full name: Microsoft.FSharp.Core.Operators.not
String.Contains(value: string) : bool
val crawlPoems : seq<Poem>
Full name: Script.crawlPoems
val domainUrl : string
val volumeUrlTemplate : string
val poemUrlTemplate : string
val volumeNumber : int
String.Format(format: string, [<ParamArray>] args: obj []) : string
String.Format(format: string, arg0: obj) : string
String.Format(provider: IFormatProvider, format: string, [<ParamArray>] args: obj []) : string
String.Format(format: string, arg0: obj, arg1: obj) : string
String.Format(format: string, arg0: obj, arg1: obj, arg2: obj) : string
val collect : mapping:('T -> #seq<'U>) -> source:seq<'T> -> seq<'U>
Full name: Microsoft.FSharp.Collections.Seq.collect
val filter : predicate:('T -> bool) -> source:seq<'T> -> seq<'T>
Full name: Microsoft.FSharp.Collections.Seq.filter
val cache : source:seq<'T> -> seq<'T>
Full name: Microsoft.FSharp.Collections.Seq.cache
val indexPoems : poems:seq<Poem> -> (string * (int * (int * int) list) list) list
Full name: Script.indexPoems
val poems : seq<Poem>
val mapi : mapping:(int -> 'T -> 'U) -> source:seq<'T> -> seq<'U>
Full name: Microsoft.FSharp.Collections.Seq.mapi
val poemNumber : int
val poem : Poem
property Poem.LineTokens: seq<seq<string>>
val lineNumber : int
val tokens : seq<string>
val position : int
val token : string
val concat : sources:seq<#seq<'T>> -> seq<'T>
Full name: Microsoft.FSharp.Collections.Seq.concat
val groupBy : projection:('T -> 'Key) -> source:seq<'T> -> seq<'Key * seq<'T>> (requires equality)
Full name: Microsoft.FSharp.Collections.Seq.groupBy
val sortBy : projection:('T -> 'Key) -> source:seq<'T> -> seq<'T> (requires comparison)
Full name: Microsoft.FSharp.Collections.Seq.sortBy
val tuples : seq<string * int * int * int>
val poems : (int * (int * int) list) list
val tuples : seq<int * int * int>
val linesPositions : (int * int) list
val toList : source:seq<'T> -> 'T list
Full name: Microsoft.FSharp.Collections.Seq.toList
val tokenIndex : fullIndex:('a * ('b * ('c * 'd) list) list) list -> filterToken:'a -> filterPosition:'d -> ('b * ('c * 'd) list) list (requires equality and equality)
Full name: Script.tokenIndex
val fullIndex : ('a * ('b * ('c * 'd) list) list) list (requires equality and equality)
val filterToken : 'a (requires equality)
val filterPosition : 'd (requires equality)
val token : 'a (requires equality)
val poems : ('b * ('c * 'd) list) list (requires equality)
Multiple items
module List
from Microsoft.FSharp.Collections
--------------------
type List<'T> =
| ( [] )
| ( :: ) of Head: 'T * Tail: 'T list
interface IEnumerable
interface IEnumerable<'T>
member Head : 'T
member IsEmpty : bool
member Item : index:int -> 'T with get
member Length : int
member Tail : 'T list
static member Cons : head:'T * tail:'T list -> 'T list
static member Empty : 'T list
Full name: Microsoft.FSharp.Collections.List<_>
val find : predicate:('T -> bool) -> list:'T list -> 'T
Full name: Microsoft.FSharp.Collections.List.find
val map : mapping:('T -> 'U) -> list:'T list -> 'U list
Full name: Microsoft.FSharp.Collections.List.map
val poemNumber : 'b
val linesPositions : ('c * 'd) list (requires equality)
val filteredLines : ('c * 'd) list (requires equality)
val filter : predicate:('T -> bool) -> list:'T list -> 'T list
Full name: Microsoft.FSharp.Collections.List.filter
val lineNumber : 'c
val position : 'd (requires equality)
val isEmpty : source:seq<'T> -> bool
Full name: Microsoft.FSharp.Collections.Seq.isEmpty
val intersectIndex : currentIndex:('a * ('b * ('c * 'd) list) list) list -> tokenIndex:('b * ('c * 'd) list) list -> ('a * ('b * ('c * 'd) list) list) list (requires comparison and comparison)
Full name: Script.intersectIndex
val currentIndex : ('a * ('b * ('c * 'd) list) list) list (requires comparison and comparison)
val tokenIndex : ('b * ('c * 'd) list) list (requires comparison and comparison)
val token : 'a
val poems : ('b * ('c * 'd) list) list (requires comparison and comparison)
val mergePoems : (('e * ('f * 'g) list) list -> ('e * ('f * 'g) list) list -> ('e * ('f * 'g) list) list) (requires comparison and comparison)
val currentPoems : ('e * ('f * 'g) list) list (requires comparison and comparison)
val tokenPoems : ('e * ('f * 'g) list) list (requires comparison and comparison)
val mergeLinesPositions : (('h * 'i) list -> ('h * 'i) list -> ('h * 'i) list) (requires comparison)
val currentLinesPositions : ('h * 'i) list (requires comparison)
val tokenLinesPositions : ('h * 'i) list (requires comparison)
val keyExtractor : ('j * 'k -> 'j)
val lineNumber : 'j
val merger : ('j * 'k -> 'l * 'm -> 'j * 'k)
val currentLineNumber : 'j
val currentPosition : 'k
val keyExtractor : ('h * 'i -> 'h)
val poemNumber : 'h
val merger : ('h * ('i * 'j) list -> 'k * ('i * 'j) list -> 'h * ('i * 'j) list) (requires comparison)
val currentPoemNumber : 'h
val currentLinesPositions : ('i * 'j) list (requires comparison)
val tokenLinesPositions : ('i * 'j) list (requires comparison)
val poemNumber : 'e (requires comparison)
val linesPositions : ('f * 'g) list (requires comparison)
val isEmpty : list:'T list -> bool
Full name: Microsoft.FSharp.Collections.List.isEmpty
val queryIndex : index:('a * ('b * ('c * 'd) list) list) list -> findPosition:'d -> count:int -> ('a * int) list (requires equality)
Full name: Script.queryIndex
val index : ('a * ('b * ('c * 'd) list) list) list (requires equality)
val findPosition : 'd (requires equality)
val count : int
val tokenFreq : int
val sumBy : projection:('T -> 'U) -> list:'T list -> 'U (requires member ( + ) and member get_Zero)
Full name: Microsoft.FSharp.Collections.List.sumBy
val zip : source1:seq<'T1> -> source2:seq<'T2> -> seq<'T1 * 'T2>
Full name: Microsoft.FSharp.Collections.Seq.zip
val index : int
val element : 'a * int
val getPoemResult : index:('a * ('b * ('c * 'd) list) list) list -> findToken:'a -> findPosition:'d -> 'b * 'c (requires equality and equality)
Full name: Script.getPoemResult
val index : ('a * ('b * ('c * 'd) list) list) list (requires equality and equality)
val findToken : 'a (requires equality)
val collect : mapping:('T -> 'U list) -> list:'T list -> 'U list
Full name: Microsoft.FSharp.Collections.List.collect
type QueryResult =
| LineVariant of string * int
| SinglePoem of int * int
Full name: Script.QueryResult
union case QueryResult.LineVariant: string * int -> QueryResult
union case QueryResult.SinglePoem: int * int -> QueryResult
val wrappedQueryIndex : filteredIndex:(string * (int * (int * 'a) list) list) list -> searchPosition:'a -> count:int -> QueryResult list (requires equality)
Full name: Script.wrappedQueryIndex
val filteredIndex : (string * (int * (int * 'a) list) list) list (requires equality)
val searchPosition : 'a (requires equality)
type PrettyResult =
| PrettyLineVariant of string * int
| PrettySinglePoem of string * string * int * string
Full name: Script.PrettyResult
union case PrettyResult.PrettyLineVariant: string * int -> PrettyResult
union case PrettyResult.PrettySinglePoem: string * string * int * string -> PrettyResult
val prettifyQueryResults : queryResults:QueryResult list -> poems:seq<Poem> -> PrettyResult list
Full name: Script.prettifyQueryResults
val queryResults : QueryResult list
val result : QueryResult
property Poem.Lines: seq<string>
property Poem.Title: string
property Poem.Href: string
type PushkinTreeNode =
| VariantNode of string * int * seq<PushkinTreeNode>
| PoemNode of string * string * int * string
Full name: Script.PushkinTreeNode
union case PushkinTreeNode.VariantNode: string * int * seq<PushkinTreeNode> -> PushkinTreeNode
union case PushkinTreeNode.PoemNode: string * string * int * string -> PushkinTreeNode
val createPushkinTree : pushkinPoems:seq<Poem> -> poemsIndex:(string * (int * (int * int) list) list) list -> count:int -> PushkinTreeNode list
Full name: Script.createPushkinTree
val pushkinPoems : seq<Poem>
val poemsIndex : (string * (int * (int * int) list) list) list
val createTreeLevel : (int -> string list -> (string * (int * (int * int) list) list) list -> PushkinTreeNode list)
val currentQuery : string list
val currentIndex : (string * (int * (int * int) list) list) list
val searchPosition : int
val length : list:'T list -> int
Full name: Microsoft.FSharp.Collections.List.length
val queryResult : QueryResult list
val prettyResult : PrettyResult list
val result : PrettyResult
val freq : int
val resultsToHtml : pushkinTree:seq<PushkinTreeNode> -> string
Full name: Script.resultsToHtml
val pushkinTree : seq<PushkinTreeNode>
val treeToHtml : (seq<PushkinTreeNode> -> string list -> int -> int -> int * string)
val tree : seq<PushkinTreeNode>
val currentPath : string list
val currentNumber : int
val startingNumber : int
val zippedTreeLevel : seq<int * PushkinTreeNode>
val initInfinite : initializer:(int -> 'T) -> seq<'T>
Full name: Microsoft.FSharp.Collections.Seq.initInfinite
val i : int
val pathToString : (seq<string> -> string)
val path : seq<string>
val parts : seq<string>
val x : string
String.Join(separator: string, values: Collections.Generic.IEnumerable<string>) : string
String.Join<'T>(separator: string, values: Collections.Generic.IEnumerable<'T>) : string
String.Join(separator: string, [<ParamArray>] values: obj []) : string
String.Join(separator: string, [<ParamArray>] value: string []) : string
String.Join(separator: string, value: string [], startIndex: int, count: int) : string
val thisLevelStart : string
type Environment =
static member CommandLine : string
static member CurrentDirectory : string with get, set
static member Exit : exitCode:int -> unit
static member ExitCode : int with get, set
static member ExpandEnvironmentVariables : name:string -> string
static member FailFast : message:string -> unit + 1 overload
static member GetCommandLineArgs : unit -> string[]
static member GetEnvironmentVariable : variable:string -> string + 1 overload
static member GetEnvironmentVariables : unit -> IDictionary + 1 overload
static member GetFolderPath : folder:SpecialFolder -> string + 1 overload
...
nested type SpecialFolder
nested type SpecialFolderOption
Full name: System.Environment
property Environment.NewLine: string
val thisLevelTable : string
val fold : folder:('State -> 'T -> 'State) -> state:'State -> source:seq<'T> -> 'State
Full name: Microsoft.FSharp.Collections.Seq.fold
val acc : string
val treeNode : int * PushkinTreeNode
val number : int
val subtree : seq<PushkinTreeNode>
val thisLevelEnd : string
val thisLevelOutput : string
val levelLength : int
val length : source:seq<'T> -> int
Full name: Microsoft.FSharp.Collections.Seq.length
val subTreeCount : int
val subTreeOutput : string
val acc : int
val result : string
val content : string
val outputResultsToFile : content:string -> unit
Full name: Script.outputResultsToFile
val templateFile : string
val outputFile : string
val templateReplacePattern : string
val templateHtml : string
type File =
static member AppendAllLines : path:string * contents:IEnumerable<string> -> unit + 1 overload
static member AppendAllText : path:string * contents:string -> unit + 1 overload
static member AppendText : path:string -> StreamWriter
static member Copy : sourceFileName:string * destFileName:string -> unit + 1 overload
static member Create : path:string -> FileStream + 3 overloads
static member CreateText : path:string -> StreamWriter
static member Decrypt : path:string -> unit
static member Delete : path:string -> unit
static member Encrypt : path:string -> unit
static member Exists : path:string -> bool
...
Full name: System.IO.File
File.ReadAllText(path: string) : string
File.ReadAllText(path: string, encoding: Encoding) : string
val resultHtml : string
File.WriteAllText(path: string, contents: string) : unit
File.WriteAllText(path: string, contents: string, encoding: Encoding) : unit
val poems : seq<Poem>
Full name: Script.poems
val sumBy : projection:('T -> 'U) -> source:seq<'T> -> 'U (requires member ( + ) and member get_Zero)
Full name: Microsoft.FSharp.Collections.Seq.sumBy
val poemIndex : (string * (int * (int * int) list) list) list
Full name: Script.poemIndex
property List.Length: int
val tree : PushkinTreeNode list
Full name: Script.tree
val htmlContent : string
Full name: Script.htmlContent
More information