2 people like it.
Like the snippet!
Text file line filter (script)
A script suitable for FSI that copies a (UTF-8) text file throwing away uninteresting lines in the process.
1:
2:
3:
4:
5:
6:
7:
8:
9:
10:
11:
12:
13:
14:
15:
16:
17:
18:
19:
20:
21:
|
// ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ----
// -- Copy text file but omit certain lines, i.e. keep only specified lines.
// ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ----
// At work we had a semicolon separated file that was too large for the
// current Excel version. We knew we didn't need the entire file anyway.
// A colleague had Visual Studio and hence F# installed on his computer,
// so we filtered away everything except the lines containing particular
// key values that we needed. Soon after we had a small filtered copy of
// the csv file that we could easily load in Excel.
// In the example below, the lines (or rows) that are copied to the new
// text file are the ones that contain either the text ";Some value;" or
// ";Some other value;" (or both), which in practice turns out to be the
// rows where one of the separated values are either "Some value" or
// "Some other value".
// In our case we knew that the key was so particular that there would
// be no possibility that we would end up with too many lines. In other
// cases maybe it wouldn't hurt to end up with too many lines as long as
// file is short enough to be imported to Excel.
|
1:
2:
3:
4:
5:
6:
7:
8:
9:
10:
11:
12:
13:
14:
15:
16:
17:
18:
19:
20:
21:
|
open System.IO
/// Read all lines from UTF-8 encoded text file as a sequence.
let linesFromFile filename =
seq { use reader = File.OpenText filename
while not reader.EndOfStream
do yield reader.ReadLine () }
/// Create a new UTF-8 encoded text file and
/// write all lines from a sequence to the new file.
let linesTofile filename (lines: string seq) =
use writer = File.CreateText filename
for line in lines
do writer.WriteLine line
/// Filter to apply for each line.
let lineFilter keeperPhrases (line: string) =
Array.exists line.Contains keeperPhrases
|
1:
2:
3:
4:
5:
6:
7:
8:
9:
10:
11:
12:
13:
14:
15:
16:
17:
18:
19:
20:
21:
|
[<Literal>]
let inputfilename = @"C:\inputfile.csv"
[<Literal>]
let outputfilename = @"C:\filteredfile.csv"
let textToKeep = [| @";Some value;"; @";Some other value;" |]
// When the final do command is issued the first file, named "inputfile.csv",
// is made into a sequence of text lines (strings) that are read as they are
// needed.
// The sequence is filtered, so that some text lines are kept while others
// are skipped. The lines that are kept are those that contains the text
// ";Some value;" or ";Some other value;" (or both) somewhere in the line.
// The kept lines are written to the output file, named "filteredfile.csv",
// as the the input file is processed.
// Read, filter and write sequence:
do linesFromFile inputfilename
|> Seq.filter (lineFilter textToKeep)
|> linesTofile outputfilename
|
namespace System
namespace System.IO
val linesFromFile : filename:string -> seq<string>
Full name: Script.linesFromFile
Read all lines from UTF-8 encoded text file as a sequence.
val filename : string
Multiple items
val seq : sequence:seq<'T> -> seq<'T>
Full name: Microsoft.FSharp.Core.Operators.seq
--------------------
type seq<'T> = System.Collections.Generic.IEnumerable<'T>
Full name: Microsoft.FSharp.Collections.seq<_>
val reader : StreamReader
type File =
static member AppendAllLines : path:string * contents:IEnumerable<string> -> unit + 1 overload
static member AppendAllText : path:string * contents:string -> unit + 1 overload
static member AppendText : path:string -> StreamWriter
static member Copy : sourceFileName:string * destFileName:string -> unit + 1 overload
static member Create : path:string -> FileStream + 3 overloads
static member CreateText : path:string -> StreamWriter
static member Decrypt : path:string -> unit
static member Delete : path:string -> unit
static member Encrypt : path:string -> unit
static member Exists : path:string -> bool
...
Full name: System.IO.File
File.OpenText(path: string) : StreamReader
val not : value:bool -> bool
Full name: Microsoft.FSharp.Core.Operators.not
property StreamReader.EndOfStream: bool
StreamReader.ReadLine() : string
val linesTofile : filename:string -> lines:seq<string> -> unit
Full name: Script.linesTofile
Create a new UTF-8 encoded text file and
write all lines from a sequence to the new file.
val lines : seq<string>
Multiple items
val string : value:'T -> string
Full name: Microsoft.FSharp.Core.Operators.string
--------------------
type string = System.String
Full name: Microsoft.FSharp.Core.string
val writer : StreamWriter
File.CreateText(path: string) : StreamWriter
val line : string
TextWriter.WriteLine() : unit
(+0 other overloads)
TextWriter.WriteLine(value: obj) : unit
(+0 other overloads)
TextWriter.WriteLine(value: string) : unit
(+0 other overloads)
TextWriter.WriteLine(value: decimal) : unit
(+0 other overloads)
TextWriter.WriteLine(value: float) : unit
(+0 other overloads)
TextWriter.WriteLine(value: float32) : unit
(+0 other overloads)
TextWriter.WriteLine(value: uint64) : unit
(+0 other overloads)
TextWriter.WriteLine(value: int64) : unit
(+0 other overloads)
TextWriter.WriteLine(value: uint32) : unit
(+0 other overloads)
TextWriter.WriteLine(value: int) : unit
(+0 other overloads)
val lineFilter : keeperPhrases:string [] -> line:string -> bool
Full name: Script.lineFilter
Filter to apply for each line.
val keeperPhrases : string []
module Array
from Microsoft.FSharp.Collections
val exists : predicate:('T -> bool) -> array:'T [] -> bool
Full name: Microsoft.FSharp.Collections.Array.exists
System.String.Contains(value: string) : bool
Multiple items
type LiteralAttribute =
inherit Attribute
new : unit -> LiteralAttribute
Full name: Microsoft.FSharp.Core.LiteralAttribute
--------------------
new : unit -> LiteralAttribute
val inputfilename : string
Full name: Script.inputfilename
val outputfilename : string
Full name: Script.outputfilename
val textToKeep : string []
Full name: Script.textToKeep
module Seq
from Microsoft.FSharp.Collections
val filter : predicate:('T -> bool) -> source:seq<'T> -> seq<'T>
Full name: Microsoft.FSharp.Collections.Seq.filter
More information