2 people like it.
Like the snippet!
Seq.splitBy, Seq.splitAt, Seq.tryHeadTail
these `Seq` functions allow to split and process Seqs in a sequential manner without the need to rescan or cache elements into memory.
1:
2:
3:
4:
5:
6:
7:
8:
9:
10:
11:
12:
13:
14:
15:
16:
17:
18:
19:
20:
21:
22:
23:
24:
25:
26:
27:
28:
29:
30:
31:
32:
33:
34:
35:
36:
37:
38:
39:
40:
41:
42:
43:
44:
45:
46:
47:
48:
49:
50:
51:
52:
53:
54:
55:
56:
57:
58:
59:
60:
61:
62:
63:
64:
65:
66:
67:
68:
69:
70:
71:
72:
73:
74:
75:
76:
77:
78:
79:
80:
81:
82:
83:
84:
85:
86:
87:
88:
89:
90:
91:
92:
93:
94:
95:
96:
97:
98:
99:
100:
101:
102:
103:
104:
105:
106:
107:
108:
109:
110:
111:
112:
113:
114:
115:
116:
117:
118:
119:
120:
121:
122:
123:
124:
125:
126:
127:
128:
129:
130:
131:
132:
133:
134:
135:
136:
137:
138:
139:
140:
141:
142:
143:
144:
145:
146:
147:
148:
149:
150:
151:
152:
153:
154:
155:
156:
157:
158:
159:
160:
161:
162:
163:
164:
165:
166:
167:
168:
169:
170:
171:
172:
173:
174:
175:
176:
177:
178:
179:
180:
181:
182:
|
module Seq =
type [< RequireQualifiedAccess >] SplitByOption = Exclude | IncludeInFirst | IncludeInSecond
type [< RequireQualifiedAccess >] private SplitSubUnfoldState<'T> =
| PostValue of 'T * SplitSubUnfoldState<'T>
| Start of seqNo: int * start: int
| Started of tryNext: (unit -> 'T option) * bingo: ('T -> unit) * finish: (unit -> unit)
| Finish
type [< RequireQualifiedAccess >] private SplitUnfoldState<'T> = {
enumerator : System.Collections.Generic.IEnumerator<'T>
mutable currentPos : int
mutable isDone : bool
mutable splitterO : 'T option
mutable currentSeqO : 'T seq option
}
/// Straight scan through is efficient, reusing seqs causes rescan from beginning
let splitBy f opt (input: 'a seq) =
let getEnumerator() = input.GetEnumerator()
let startingState() =
{
SplitUnfoldState.enumerator = getEnumerator()
SplitUnfoldState.currentPos = 0
SplitUnfoldState.isDone = false
SplitUnfoldState.splitterO = None
SplitUnfoldState.currentSeqO = None
}, 0
Seq.unfold(fun stateO ->
let state, currentSeqNo = stateO |> Option.defaultWith startingState
let tryNextMain() : 'a option=
if state.isDone then None else
if state.enumerator.MoveNext() then
state.currentPos <- state.currentPos + 1
Some state.enumerator.Current
else
state.enumerator.Dispose()
None
let bingo v = state.splitterO <- Some v ; state.currentSeqO <- None
let finish () = state.splitterO <- None ; state.currentSeqO <- None ; state.isDone <- true
let tryNextSub start =
let en = getEnumerator()
for i in 0..start - 1 do (en.MoveNext() |> ignore)
fun () ->
if en.MoveNext() then Some en.Current
else en.Dispose()
None
let subUnFold(tryNext: unit -> 'a option, bingo, finish) =
match tryNext() with
| None -> finish()
None
| Some v ->
if f v then bingo v
if opt = SplitByOption.IncludeInFirst
then Some( v, SplitSubUnfoldState.Finish )
else None
else Some( v, SplitSubUnfoldState.Started(tryNext, bingo, finish) )
//printfn "Unfold %A" currentSeqNo
while state.currentSeqO |> Option.isSome do
//printfn "skipping"
subUnFold(tryNextMain, bingo , finish) |> ignore
if state.isDone then None else
if opt <> SplitByOption.IncludeInSecond then state.splitterO <- None
let s0 =
let start = SplitSubUnfoldState.Start(currentSeqNo, state.currentPos)
match state.splitterO, opt with
| Some v, SplitByOption.IncludeInSecond -> SplitSubUnfoldState.PostValue(v, start)
| _ -> start
|> Seq.unfold(function
| SplitSubUnfoldState.PostValue(v, next) -> Some(v, next)
| SplitSubUnfoldState.Finish -> None
| SplitSubUnfoldState.Started(tryNext, bingo, finish) -> subUnFold(tryNext, bingo, finish)
| SplitSubUnfoldState.Start(seqNo, myStart) -> //printfn "Starting %d at %d = %d" seqNo myStart state.currentPos
if state.currentPos = myStart // && not state.isDone
then subUnFold(tryNextMain , bingo , finish)
else subUnFold(tryNextSub myStart, ignore, ignore)
)
state.currentSeqO <- Some s0
Some (s0, Some(state, currentSeqNo + 1) )
) None
let splitAt n s =
s
|> Seq.mapi(fun i v -> i,v)
|> splitBy (fst >> ((=) n )) SplitByOption.IncludeInSecond
|> Seq.map (Seq.map snd)
|> Seq.truncate 2
let tryHeadTail fhead ftail s =
( Choice1Of3(), splitAt 1 s )
||> Seq.fold(function
| Choice1Of3 ( ) -> Seq.tryHead >> Option.map fhead >> Choice2Of3
| Choice2Of3 (Some headv) -> ftail headv >> Choice3Of3
| result -> fun _ -> result
)
|> function
| Choice2Of3 (Some headv) -> Seq.empty |> ftail headv |> Some
| Choice3Of3 v -> Some v
| _ -> None
/////////// Sample Usage
[ -1 ; 0 ; 1; 2; 3; -1; 3; 5; 7; -1; 2; 3; 9 ; -1]
|> Seq.map (fun x -> printfn "---> %A" x; x)
|> splitBy ((=) -1) SplitByOption.Exclude
|> Seq.skip 2
//|> Seq.take 2
|> Seq.iter (fun s ->
let s = Seq.cache s
Seq.length s |> printfn " ==> %A %A" (Seq.toList s) )
"Hello friend how are you? Good "
|> Seq.toArray
|> Seq.map (fun x -> printfn "---> %A" x; x)
|> splitBy ((=) ' ') SplitByOption.IncludeInFirst
//|> Seq.iter (printfn "%A")
|> Seq.skip 3
//|> Seq.take 2
|> Seq.iter (fun s ->
let s = Seq.cache s
Seq.length s |> printfn " ==> %A %A" (System.String(Seq.toArray s)) )
[ 1 ; 2 ; 3 ; 4 ; 5 ; 6 ; 7 ; 8 ]
|> Seq.map (fun x -> printfn "---> %A" x; x)
|> splitAt 2
|> printfn "%A"
[ 1 ; 2 ; 3 ; 4 ; 5 ; 6 ; 7 ; 8 ]
|> Seq.map (fun x -> printfn "---> %A" x; x)
|> tryHeadTail id (fun head tail -> printfn "head = %A, tail = %A" head tail )
|> Option.defaultWith (fun () -> printfn "No Head Tail" )
// The following example uses Seq.splitBy and Seq.tryHeadTail
// twice to split the input file first in 2 and then into several pieces
// collecting the output file names in the first section and the content in the subsequent pieces
// it can split a huge 2.2GB file into several subfiles without having to rescan the file
// or cache in memory any portions of the file apart from the current line
// the first Seq.tryHeadTail operates on a seq<seq<string>>, because of the double seq it must use 2
// separate functions to process the head and then the tail without backtracking,
// the first function must process the head seq<string> before continuing to the tail function
// the result from the first function is passed to the second:
// |> Seq.tryHeadTail (fun head -> ...; filelist) (fun filelist tail -> ...)
// the second Seq.tryHeadTail operates on a seq<string>, because it is not nested seqs it can be called simply:
// |> Seq.tryHeadTail id (fun head tail -> ...)
open System.IO
let splitFile path file =
File.ReadLines file
//|> Seq.mapi (fun i v -> printfn "%d -> %s" i v; v)
|> Seq.skip 1
|> Seq.splitBy (fun l -> l.EndsWith "---*/") Seq.SplitByOption.IncludeInSecond
|> Seq.tryHeadTail(fun head ->
let filelist = head |> Seq.map (fun s-> s.Split('.') |> Seq.last) |> Seq.toArray
for file in filelist do printfn "file: %A" file
printfn "--------"
filelist
) (fun filelist tail ->
tail
|> Seq.collect id
|> Seq.splitBy (fun l -> l.EndsWith " rows affected)" || l = "(1 row affected)") Seq.SplitByOption.IncludeInFirst
|> Seq.map (Seq.skip 1)
|> Seq.iteri(fun i s ->
if i < filelist.Length then
s
|> Seq.tryHeadTail id (fun head tail ->
printfn "%s => %s" filelist.[i] head
File.WriteAllLines(path + filelist.[i] + ".rpt", Seq.append [head] tail)
)
|> Option.defaultValue ()
)
)
|> Option.defaultValue ()
|
module Seq
from Microsoft.FSharp.Collections
Multiple items
type RequireQualifiedAccessAttribute =
inherit Attribute
new : unit -> RequireQualifiedAccessAttribute
--------------------
new : unit -> RequireQualifiedAccessAttribute
type SplitByOption =
| Exclude
| IncludeInFirst
| IncludeInSecond
union case SplitByOption.Exclude: SplitByOption
union case SplitByOption.IncludeInFirst: SplitByOption
union case SplitByOption.IncludeInSecond: SplitByOption
type private SplitSubUnfoldState<'T> =
| PostValue of 'T * SplitSubUnfoldState<'T>
| Start of seqNo: int * start: int
| Started of tryNext: unit -> 'T option * bingo: 'T -> unit * finish: unit -> unit
| Finish
union case SplitSubUnfoldState.PostValue: 'T * SplitSubUnfoldState<'T> -> SplitSubUnfoldState<'T>
union case SplitSubUnfoldState.Start: seqNo: int * start: int -> SplitSubUnfoldState<'T>
Multiple items
val int : value:'T -> int (requires member op_Explicit)
--------------------
type int = int32
--------------------
type int<'Measure> = int
union case SplitSubUnfoldState.Started: tryNext: unit -> 'T option * bingo: 'T -> unit * finish: unit -> unit -> SplitSubUnfoldState<'T>
type unit = Unit
type 'T option = Option<'T>
union case SplitSubUnfoldState.Finish: SplitSubUnfoldState<'T>
type private SplitUnfoldState<'T> =
{ enumerator: IEnumerator<'T>
mutable currentPos: int
mutable isDone: bool
mutable splitterO: 'T option
mutable currentSeqO: seq<'T> option }
SplitUnfoldState.enumerator: System.Collections.Generic.IEnumerator<'T>
namespace System
namespace System.Collections
namespace System.Collections.Generic
type IEnumerator<'T> =
inherit IDisposable
inherit IEnumerator
member Current : 'T
SplitUnfoldState.currentPos: int
SplitUnfoldState.isDone: bool
type bool = System.Boolean
SplitUnfoldState.splitterO: 'T option
SplitUnfoldState.currentSeqO: seq<'T> option
Multiple items
val seq : sequence:seq<'T> -> seq<'T>
--------------------
type seq<'T> = System.Collections.Generic.IEnumerable<'T>
val splitBy : f:('a -> bool) -> opt:SplitByOption -> input:seq<'a> -> seq<seq<'a>>
Straight scan through is efficient, reusing seqs causes rescan from beginning
val f : ('a -> bool)
val opt : SplitByOption
val input : seq<'a>
val getEnumerator : (unit -> System.Collections.Generic.IEnumerator<'a>)
System.Collections.Generic.IEnumerable.GetEnumerator() : System.Collections.Generic.IEnumerator<'a>
val startingState : (unit -> SplitUnfoldState<'a> * int)
union case Option.None: Option<'T>
val unfold : generator:('State -> ('T * 'State) option) -> state:'State -> seq<'T>
val stateO : (SplitUnfoldState<'a> * int) option
val state : SplitUnfoldState<'a>
val currentSeqNo : int
module Option
from Microsoft.FSharp.Core
val defaultWith : defThunk:(unit -> 'T) -> option:'T option -> 'T
val tryNextMain : (unit -> 'a option)
SplitUnfoldState.enumerator: System.Collections.Generic.IEnumerator<'a>
System.Collections.IEnumerator.MoveNext() : bool
union case Option.Some: Value: 'T -> Option<'T>
property System.Collections.Generic.IEnumerator.Current: 'a with get
System.IDisposable.Dispose() : unit
val bingo : ('a -> unit)
val v : 'a
SplitUnfoldState.splitterO: 'a option
SplitUnfoldState.currentSeqO: seq<'a> option
val finish : (unit -> unit)
val tryNextSub : (int -> unit -> 'a option)
val start : int
val en : System.Collections.Generic.IEnumerator<'a>
val i : int32
val ignore : value:'T -> unit
val subUnFold : ((unit -> 'a option) * ('a -> unit) * (unit -> unit) -> ('a * SplitSubUnfoldState<'a>) option)
val tryNext : (unit -> 'a option)
val isSome : option:'T option -> bool
val s0 : seq<'a>
val start : SplitSubUnfoldState<'a>
val next : SplitSubUnfoldState<'a>
val seqNo : int
val myStart : int
val splitAt : n:int -> s:seq<'a> -> seq<seq<'a>>
val n : int
val s : seq<'a>
val mapi : mapping:(int -> 'T -> 'U) -> source:seq<'T> -> seq<'U>
val i : int
val fst : tuple:('T1 * 'T2) -> 'T1
val map : mapping:('T -> 'U) -> source:seq<'T> -> seq<'U>
val snd : tuple:('T1 * 'T2) -> 'T2
val truncate : count:int -> source:seq<'T> -> seq<'T>
val tryHeadTail : fhead:('a -> 'b) -> ftail:('b -> seq<'a> -> 'c) -> s:seq<'a> -> 'c option
val fhead : ('a -> 'b)
val ftail : ('b -> seq<'a> -> 'c)
union case Choice.Choice1Of3: 'T1 -> Choice<'T1,'T2,'T3>
val fold : folder:('State -> 'T -> 'State) -> state:'State -> source:seq<'T> -> 'State
val tryHead : source:seq<'T> -> 'T option
val map : mapping:('T -> 'U) -> option:'T option -> 'U option
union case Choice.Choice2Of3: 'T2 -> Choice<'T1,'T2,'T3>
val headv : 'b
union case Choice.Choice3Of3: 'T3 -> Choice<'T1,'T2,'T3>
val result : Choice<unit,'b option,'c>
val empty<'T> : seq<'T>
val v : 'c
val x : int
val printfn : format:Printf.TextWriterFormat<'T> -> 'T
val skip : count:int -> source:seq<'T> -> seq<'T>
val iter : action:('T -> unit) -> source:seq<'T> -> unit
val s : seq<int>
val cache : source:seq<'T> -> seq<'T>
val length : source:seq<'T> -> int
val toList : source:seq<'T> -> 'T list
val toArray : source:seq<'T> -> 'T []
val x : char
val s : seq<char>
Multiple items
type String =
new : value:char[] -> string + 8 overloads
member Chars : int -> char
member Clone : unit -> obj
member CompareTo : value:obj -> int + 1 overload
member Contains : value:string -> bool + 3 overloads
member CopyTo : sourceIndex:int * destination:char[] * destinationIndex:int * count:int -> unit
member EndsWith : value:string -> bool + 3 overloads
member EnumerateRunes : unit -> StringRuneEnumerator
member Equals : obj:obj -> bool + 2 overloads
member GetEnumerator : unit -> CharEnumerator
...
--------------------
System.String(value: char []) : System.String
System.String(value: nativeptr<char>) : System.String
System.String(value: nativeptr<sbyte>) : System.String
System.String(value: System.ReadOnlySpan<char>) : System.String
System.String(c: char, count: int) : System.String
System.String(value: char [], startIndex: int, length: int) : System.String
System.String(value: nativeptr<char>, startIndex: int, length: int) : System.String
System.String(value: nativeptr<sbyte>, startIndex: int, length: int) : System.String
System.String(value: nativeptr<sbyte>, startIndex: int, length: int, enc: System.Text.Encoding) : System.String
val id : x:'T -> 'T
val head : int
val tail : seq<int>
namespace System.IO
val splitFile : path:string -> file:string -> unit
val path : string
val file : string
type File =
static member AppendAllLines : path:string * contents:IEnumerable<string> -> unit + 1 overload
static member AppendAllLinesAsync : path:string * contents:IEnumerable<string> * ?cancellationToken:CancellationToken -> Task + 1 overload
static member AppendAllText : path:string * contents:string -> unit + 1 overload
static member AppendAllTextAsync : path:string * contents:string * ?cancellationToken:CancellationToken -> Task + 1 overload
static member AppendText : path:string -> StreamWriter
static member Copy : sourceFileName:string * destFileName:string -> unit + 1 overload
static member Create : path:string -> FileStream + 2 overloads
static member CreateText : path:string -> StreamWriter
static member Decrypt : path:string -> unit
static member Delete : path:string -> unit
...
File.ReadLines(path: string) : System.Collections.Generic.IEnumerable<string>
File.ReadLines(path: string, encoding: System.Text.Encoding) : System.Collections.Generic.IEnumerable<string>
Multiple items
module Seq
from Script
--------------------
module Seq
from Microsoft.FSharp.Collections
val splitBy : f:('a -> bool) -> opt:Seq.SplitByOption -> input:seq<'a> -> seq<seq<'a>>
Straight scan through is efficient, reusing seqs causes rescan from beginning
val l : string
union case Seq.SplitByOption.IncludeInSecond: Seq.SplitByOption
val head : seq<string>
val filelist : string []
val s : string
val last : source:seq<'T> -> 'T
val tail : seq<seq<string>>
val collect : mapping:('T -> #seq<'U>) -> source:seq<'T> -> seq<'U>
union case Seq.SplitByOption.IncludeInFirst: Seq.SplitByOption
val iteri : action:(int -> 'T -> unit) -> source:seq<'T> -> unit
val s : seq<string>
val head : string
val tail : seq<string>
File.WriteAllLines(path: string, contents: System.Collections.Generic.IEnumerable<string>) : unit
File.WriteAllLines(path: string, contents: string []) : unit
File.WriteAllLines(path: string, contents: System.Collections.Generic.IEnumerable<string>, encoding: System.Text.Encoding) : unit
File.WriteAllLines(path: string, contents: string [], encoding: System.Text.Encoding) : unit
val append : source1:seq<'T> -> source2:seq<'T> -> seq<'T>
val defaultValue : value:'T -> option:'T option -> 'T
More information