2 people like it.
Like the snippet!
Similar string Markov chain
Generates strings that are similar to the input, as measured by the probability of a symbol depending on preceding symbols. (Markov chain)
The order, which defines how many preceding symbols to look at before placing another, is variable.
1:
2:
3:
4:
5:
6:
7:
8:
9:
10:
11:
12:
13:
14:
15:
16:
17:
18:
19:
20:
21:
22:
23:
24:
25:
26:
27:
28:
29:
30:
31:
32:
33:
34:
35:
36:
37:
38:
39:
40:
41:
42:
43:
44:
45:
46:
47:
48:
49:
50:
51:
52:
53:
54:
55:
|
// Generates strings that are similar to the input, as measured by the
// probability of a symbol depending on preceding symbols. (Markov chain)
// The order defines how many preceding symbols to look at to place another.
/// Reads a map of next characters' probabilities from a sample.
let readMap order s =
s |> Seq.windowed (order + 1)
|> Seq.groupBy (fun a -> System.String( a.[0 .. order - 1] ))
|> Seq.map (fun (a, b) ->
let counted = b |> Seq.countBy (fun a -> a.[order]) |> Seq.toList
let total = List.sumBy snd counted
a, counted |> List.map (fun (c, i) -> c, float i / float total))
|> Map.ofSeq
// System.Random is broken. Replace it if you want reliable randomness.
let random = let r = System.Random() in fun () -> r.NextDouble()
/// Helper to get one character from a list of choices with probabilities
let getChar cases =
let rec run r = function
| [] -> failwith "getChar error"
| (c, p) :: t when r > p -> run (r-p) t
| (c, _) :: _ -> c
run (random() * 0.999) cases // precision safety
/// Creates text according to a distribution. Defaults to spaces on unknown cases.
let rec generate order length acc map =
if length < 1 then System.String(acc |> List.toArray |> Array.rev) else
let sub = System.String(Seq.truncate order acc |> Seq.toArray |> Array.rev)
let newChar = match Map.tryFind sub map with
| Some l when sub.Length = order -> getChar l
| _ -> ' '
generate order (length - 1) (newChar :: acc) map
/// Generates words from sample. Generates space-separated words independently.
let wordwise order approxLength input =
let out = (" " + input + " ").Replace(" ", String.replicate order " ")
|> readMap order |> generate order approxLength []
out.Replace(String.replicate order " ", " ").Remove(0,1) |> printfn "%s"
// Samples (input a long list of names to get more useful results):
let lolz = "thorne rudenti"
wordwise 1 60 lolz
wordwise 2 60 lolz
"Mercury Venus Earth Mars Jupiter Saturn Neptune Pluto Moon Terra Luna \
Adrastea Ganymede Callisto Europa Himalia Amalthea Thebe Elara Metis Pasiphae Carme \
Sinope Lysithea Ananke Leda Themisto Callirrhoe Praxidike Megaclite Iocaste Taygete \
Kalyke Autonoe Harpalyke Titan Rhea Iapetus Dione Tethys Enceladus Mimas Hyperion \
Phoebe Janus Epimetheus Prometheus Pandora Titania Oberon Umbriel Ariel Miranda \
Sycorax Puck Portia Juliet Caliban Belinda Cressida Triton Proteus Nereid Larissa \
Galatea Despina Thalassa Charon"
|> wordwise 2 200
|
val readMap : order:int -> s:seq<char> -> Map<System.String,(char * float) list>
Reads a map of next characters' probabilities from a sample.
val order : int
val s : seq<char>
module Seq
from Microsoft.FSharp.Collections
val windowed : windowSize:int -> source:seq<'T> -> seq<'T []>
val groupBy : projection:('T -> 'Key) -> source:seq<'T> -> seq<'Key * seq<'T>> (requires equality)
val a : char []
namespace System
Multiple items
type String =
new : value:char[] -> string + 8 overloads
member Chars : int -> char
member Clone : unit -> obj
member CompareTo : value:obj -> int + 1 overload
member Contains : value:string -> bool + 3 overloads
member CopyTo : sourceIndex:int * destination:char[] * destinationIndex:int * count:int -> unit
member EndsWith : value:string -> bool + 3 overloads
member EnumerateRunes : unit -> StringRuneEnumerator
member Equals : obj:obj -> bool + 2 overloads
member GetEnumerator : unit -> CharEnumerator
...
--------------------
System.String(value: char []) : System.String
System.String(value: nativeptr<char>) : System.String
System.String(value: nativeptr<sbyte>) : System.String
System.String(value: System.ReadOnlySpan<char>) : System.String
System.String(c: char, count: int) : System.String
System.String(value: char [], startIndex: int, length: int) : System.String
System.String(value: nativeptr<char>, startIndex: int, length: int) : System.String
System.String(value: nativeptr<sbyte>, startIndex: int, length: int) : System.String
System.String(value: nativeptr<sbyte>, startIndex: int, length: int, enc: System.Text.Encoding) : System.String
val map : mapping:('T -> 'U) -> source:seq<'T> -> seq<'U>
val a : System.String
val b : seq<char []>
val counted : (char * int) list
val countBy : projection:('T -> 'Key) -> source:seq<'T> -> seq<'Key * int> (requires equality)
val toList : source:seq<'T> -> 'T list
val total : int
Multiple items
module List
from Microsoft.FSharp.Collections
--------------------
type List<'T> =
| ( [] )
| ( :: ) of Head: 'T * Tail: 'T list
interface IReadOnlyList<'T>
interface IReadOnlyCollection<'T>
interface IEnumerable
interface IEnumerable<'T>
member GetReverseIndex : rank:int * offset:int -> int
member GetSlice : startIndex:int option * endIndex:int option -> 'T list
member Head : 'T
member IsEmpty : bool
member Item : index:int -> 'T with get
member Length : int
...
val sumBy : projection:('T -> 'U) -> list:'T list -> 'U (requires member ( + ) and member get_Zero)
val snd : tuple:('T1 * 'T2) -> 'T2
val map : mapping:('T -> 'U) -> list:'T list -> 'U list
val c : char
val i : int
Multiple items
val float : value:'T -> float (requires member op_Explicit)
--------------------
type float = System.Double
--------------------
type float<'Measure> = float
Multiple items
module Map
from Microsoft.FSharp.Collections
--------------------
type Map<'Key,'Value (requires comparison)> =
interface IReadOnlyDictionary<'Key,'Value>
interface IReadOnlyCollection<KeyValuePair<'Key,'Value>>
interface IEnumerable
interface IComparable
interface IEnumerable<KeyValuePair<'Key,'Value>>
interface ICollection<KeyValuePair<'Key,'Value>>
interface IDictionary<'Key,'Value>
new : elements:seq<'Key * 'Value> -> Map<'Key,'Value>
member Add : key:'Key * value:'Value -> Map<'Key,'Value>
member ContainsKey : key:'Key -> bool
...
--------------------
new : elements:seq<'Key * 'Value> -> Map<'Key,'Value>
val ofSeq : elements:seq<'Key * 'T> -> Map<'Key,'T> (requires comparison)
val random : (unit -> float)
val r : System.Random
Multiple items
type Random =
new : unit -> Random + 1 overload
member Next : unit -> int + 2 overloads
member NextBytes : buffer:byte[] -> unit + 1 overload
member NextDouble : unit -> float
--------------------
System.Random() : System.Random
System.Random(Seed: int) : System.Random
System.Random.NextDouble() : float
val getChar : cases:('a * float) list -> 'a
Helper to get one character from a list of choices with probabilities
val cases : ('a * float) list
val run : (float -> ('b * float) list -> 'b)
val r : float
val failwith : message:string -> 'T
val c : 'b
val p : float
val t : ('b * float) list
val generate : order:int -> length:int -> acc:char list -> map:Map<System.String,(char * float) list> -> System.String
Creates text according to a distribution. Defaults to spaces on unknown cases.
val length : int
val acc : char list
val map : Map<System.String,(char * float) list>
val toArray : list:'T list -> 'T []
module Array
from Microsoft.FSharp.Collections
val rev : array:'T [] -> 'T []
val sub : System.String
val truncate : count:int -> source:seq<'T> -> seq<'T>
val toArray : source:seq<'T> -> 'T []
val newChar : char
val tryFind : key:'Key -> table:Map<'Key,'T> -> 'T option (requires comparison)
union case Option.Some: Value: 'T -> Option<'T>
val l : (char * float) list
property System.String.Length: int with get
val wordwise : order:int -> approxLength:int -> input:string -> unit
Generates words from sample. Generates space-separated words independently.
val approxLength : int
val input : string
val out : System.String
module String
from Microsoft.FSharp.Core
val replicate : count:int -> str:string -> string
System.String.Replace(oldValue: string, newValue: string) : string
System.String.Replace(oldChar: char, newChar: char) : string
System.String.Replace(oldValue: string, newValue: string, comparisonType: System.StringComparison) : string
System.String.Replace(oldValue: string, newValue: string, ignoreCase: bool, culture: System.Globalization.CultureInfo) : string
val printfn : format:Printf.TextWriterFormat<'T> -> 'T
val lolz : string
More information