3 people like it.

Word count analyzer

Analyse word count from files. You can use it e.g. to create Tag Clouds

 1: 
 2: 
 3: 
 4: 
 5: 
 6: 
 7: 
 8: 
 9: 
10: 
11: 
12: 
13: 
14: 
15: 
16: 
17: 
18: 
19: 
20: 
21: 
22: 
//From NuGet: Sparc.TagCloud
#if INTERACTIVE
#r @"..\packages\Sparc.TagCloud.0.0.1\lib\net40\Sparc.TagCloud.dll"
#else
module MyTagCloud
#endif
    open System.IO
    open Sparc.TagCloud

    let analyzer = new TagCloudAnalyzer()
    let path = @"C:\sourcecodes\"
    let extension = "*.cs" //e.g. c# files
    let lines = 
        Directory.GetFiles(path, extension, SearchOption.AllDirectories)
        |> Seq.map(fun i -> File.ReadAllLines(i)) 
        |> Seq.concat

    let ``analyze and print`` = 
        analyzer.ComputeTagCloud(lines)//.Shuffle()
        |> Seq.where(fun i -> i.Text.Length > 3) //over 3 letter words only...
        |> Seq.take(50) //top 50 only...
        |> Seq.iter(fun r -> printfn "%s\t%i" r.Text r.Count)
namespace System
namespace System.IO
namespace Sparc
namespace Sparc.TagCloud
val analyzer : TagCloudAnalyzer

Full name: Script.analyzer
Multiple items
type TagCloudAnalyzer =
  new : ?setting:TagCloudSetting -> TagCloudAnalyzer
  member ComputeTagCloud : phrases:IEnumerable<string> -> IEnumerable<TagCloudTag>

Full name: Sparc.TagCloud.TagCloudAnalyzer

--------------------
TagCloudAnalyzer(?setting: TagCloudSetting) : unit
val path : string

Full name: Script.path
val extension : string

Full name: Script.extension
val lines : seq<string>

Full name: Script.lines
type Directory =
  static member CreateDirectory : path:string -> DirectoryInfo + 1 overload
  static member Delete : path:string -> unit + 1 overload
  static member EnumerateDirectories : path:string -> IEnumerable<string> + 2 overloads
  static member EnumerateFileSystemEntries : path:string -> IEnumerable<string> + 2 overloads
  static member EnumerateFiles : path:string -> IEnumerable<string> + 2 overloads
  static member Exists : path:string -> bool
  static member GetAccessControl : path:string -> DirectorySecurity + 1 overload
  static member GetCreationTime : path:string -> DateTime
  static member GetCreationTimeUtc : path:string -> DateTime
  static member GetCurrentDirectory : unit -> string
  ...

Full name: System.IO.Directory
Directory.GetFiles(path: string) : string []
Directory.GetFiles(path: string, searchPattern: string) : string []
Directory.GetFiles(path: string, searchPattern: string, searchOption: SearchOption) : string []
type SearchOption =
  | TopDirectoryOnly = 0
  | AllDirectories = 1

Full name: System.IO.SearchOption
field SearchOption.AllDirectories = 1
module Seq

from Microsoft.FSharp.Collections
val map : mapping:('T -> 'U) -> source:seq<'T> -> seq<'U>

Full name: Microsoft.FSharp.Collections.Seq.map
val i : string
type File =
  static member AppendAllLines : path:string * contents:IEnumerable<string> -> unit + 1 overload
  static member AppendAllText : path:string * contents:string -> unit + 1 overload
  static member AppendText : path:string -> StreamWriter
  static member Copy : sourceFileName:string * destFileName:string -> unit + 1 overload
  static member Create : path:string -> FileStream + 3 overloads
  static member CreateText : path:string -> StreamWriter
  static member Decrypt : path:string -> unit
  static member Delete : path:string -> unit
  static member Encrypt : path:string -> unit
  static member Exists : path:string -> bool
  ...

Full name: System.IO.File
File.ReadAllLines(path: string) : string []
File.ReadAllLines(path: string, encoding: System.Text.Encoding) : string []
val concat : sources:seq<#seq<'T>> -> seq<'T>

Full name: Microsoft.FSharp.Collections.Seq.concat
val ( analyze and print ) : unit

Full name: Script.( analyze and print )
TagCloudAnalyzer.ComputeTagCloud(phrases: System.Collections.Generic.IEnumerable<string>) : System.Collections.Generic.IEnumerable<TagCloudTag>
val where : predicate:('T -> bool) -> source:seq<'T> -> seq<'T>

Full name: Microsoft.FSharp.Collections.Seq.where
val i : TagCloudTag
property TagCloudTag.Text: string
property System.String.Length: int
val take : count:int -> source:seq<'T> -> seq<'T>

Full name: Microsoft.FSharp.Collections.Seq.take
val iter : action:('T -> unit) -> source:seq<'T> -> unit

Full name: Microsoft.FSharp.Collections.Seq.iter
val r : TagCloudTag
val printfn : format:Printf.TextWriterFormat<'T> -> 'T

Full name: Microsoft.FSharp.Core.ExtraTopLevelOperators.printfn
property TagCloudTag.Count: int
Raw view Test code New version

More information

Link:http://fssnip.net/oT
Posted:9 years ago
Author:Tuomas Hietanen
Tags: tagcloud , count