2 people like it.

Similar string Markov chain

Generates strings that are similar to the input, as measured by the probability of a symbol depending on preceding symbols. (Markov chain) The order, which defines how many preceding symbols to look at before placing another, is variable.

 1: 
 2: 
 3: 
 4: 
 5: 
 6: 
 7: 
 8: 
 9: 
10: 
11: 
12: 
13: 
14: 
15: 
16: 
17: 
18: 
19: 
20: 
21: 
22: 
23: 
24: 
25: 
26: 
27: 
28: 
29: 
30: 
31: 
32: 
33: 
34: 
35: 
36: 
37: 
38: 
39: 
40: 
41: 
42: 
43: 
44: 
45: 
46: 
47: 
48: 
49: 
50: 
51: 
52: 
53: 
54: 
55: 
// Generates strings that are similar to the input, as measured by the
// probability of a symbol depending on preceding symbols. (Markov chain)
// The order defines how many preceding symbols to look at to place another.

/// Reads a map of next characters' probabilities from a sample.
let readMap order s =
    s |> Seq.windowed (order + 1)
    |> Seq.groupBy (fun a -> System.String( a.[0 .. order - 1] ))
    |> Seq.map (fun (a, b) ->
        let counted = b |> Seq.countBy (fun a -> a.[order]) |> Seq.toList
        let total = List.sumBy snd counted
        a, counted |> List.map (fun (c, i) -> c, float i / float total))
    |> Map.ofSeq

// System.Random is broken. Replace it if you want reliable randomness.
let random = let r = System.Random() in fun () -> r.NextDouble()

/// Helper to get one character from a list of choices with probabilities
let getChar cases =
    let rec run r = function
        | [] -> failwith "getChar error"
        | (c, p) :: t when r > p -> run (r-p) t
        | (c, _) :: _ -> c
    run (random() * 0.999) cases // precision safety

/// Creates text according to a distribution. Defaults to spaces on unknown cases.
let rec generate order length acc map =
    if length < 1 then System.String(acc |> List.toArray |> Array.rev) else
    let sub = System.String(Seq.truncate order acc |> Seq.toArray |> Array.rev)
    let newChar = match Map.tryFind sub map with
                  | Some l when sub.Length = order -> getChar l
                  | _ -> ' '
    generate order (length - 1) (newChar :: acc) map

/// Generates words from sample. Generates space-separated words independently.
let wordwise order approxLength input =
    let out = (" " + input + " ").Replace(" ", String.replicate order " ")
              |> readMap order |> generate order approxLength []
    out.Replace(String.replicate order " ", " ").Remove(0,1) |> printfn "%s"


// Samples (input a long list of names to get more useful results):

let lolz = "thorne rudenti"
wordwise 1 60 lolz
wordwise 2 60 lolz

"Mercury Venus Earth Mars Jupiter Saturn Neptune Pluto Moon Terra Luna \
Adrastea Ganymede Callisto Europa Himalia Amalthea Thebe Elara Metis Pasiphae Carme \
Sinope Lysithea Ananke Leda Themisto Callirrhoe Praxidike Megaclite Iocaste Taygete \
Kalyke Autonoe Harpalyke Titan Rhea Iapetus Dione Tethys Enceladus Mimas Hyperion \
Phoebe Janus Epimetheus Prometheus Pandora Titania Oberon Umbriel Ariel Miranda \
Sycorax Puck Portia Juliet Caliban Belinda Cressida Triton Proteus Nereid Larissa \
Galatea Despina Thalassa Charon"
|> wordwise 2 200
val readMap : order:int -> s:seq<char> -> Map<System.String,(char * float) list>


 Reads a map of next characters' probabilities from a sample.
val order : int
val s : seq<char>
module Seq

from Microsoft.FSharp.Collections
val windowed : windowSize:int -> source:seq<'T> -> seq<'T []>
val groupBy : projection:('T -> 'Key) -> source:seq<'T> -> seq<'Key * seq<'T>> (requires equality)
val a : char []
namespace System
Multiple items
type String =
  new : value:char[] -> string + 8 overloads
  member Chars : int -> char
  member Clone : unit -> obj
  member CompareTo : value:obj -> int + 1 overload
  member Contains : value:string -> bool + 3 overloads
  member CopyTo : sourceIndex:int * destination:char[] * destinationIndex:int * count:int -> unit
  member EndsWith : value:string -> bool + 3 overloads
  member EnumerateRunes : unit -> StringRuneEnumerator
  member Equals : obj:obj -> bool + 2 overloads
  member GetEnumerator : unit -> CharEnumerator
  ...

--------------------
System.String(value: char []) : System.String
System.String(value: nativeptr<char>) : System.String
System.String(value: nativeptr<sbyte>) : System.String
System.String(value: System.ReadOnlySpan<char>) : System.String
System.String(c: char, count: int) : System.String
System.String(value: char [], startIndex: int, length: int) : System.String
System.String(value: nativeptr<char>, startIndex: int, length: int) : System.String
System.String(value: nativeptr<sbyte>, startIndex: int, length: int) : System.String
System.String(value: nativeptr<sbyte>, startIndex: int, length: int, enc: System.Text.Encoding) : System.String
val map : mapping:('T -> 'U) -> source:seq<'T> -> seq<'U>
val a : System.String
val b : seq<char []>
val counted : (char * int) list
val countBy : projection:('T -> 'Key) -> source:seq<'T> -> seq<'Key * int> (requires equality)
val toList : source:seq<'T> -> 'T list
val total : int
Multiple items
module List

from Microsoft.FSharp.Collections

--------------------
type List<'T> =
  | ( [] )
  | ( :: ) of Head: 'T * Tail: 'T list
    interface IReadOnlyList<'T>
    interface IReadOnlyCollection<'T>
    interface IEnumerable
    interface IEnumerable<'T>
    member GetReverseIndex : rank:int * offset:int -> int
    member GetSlice : startIndex:int option * endIndex:int option -> 'T list
    member Head : 'T
    member IsEmpty : bool
    member Item : index:int -> 'T with get
    member Length : int
    ...
val sumBy : projection:('T -> 'U) -> list:'T list -> 'U (requires member ( + ) and member get_Zero)
val snd : tuple:('T1 * 'T2) -> 'T2
val map : mapping:('T -> 'U) -> list:'T list -> 'U list
val c : char
val i : int
Multiple items
val float : value:'T -> float (requires member op_Explicit)

--------------------
type float = System.Double

--------------------
type float<'Measure> = float
Multiple items
module Map

from Microsoft.FSharp.Collections

--------------------
type Map<'Key,'Value (requires comparison)> =
  interface IReadOnlyDictionary<'Key,'Value>
  interface IReadOnlyCollection<KeyValuePair<'Key,'Value>>
  interface IEnumerable
  interface IComparable
  interface IEnumerable<KeyValuePair<'Key,'Value>>
  interface ICollection<KeyValuePair<'Key,'Value>>
  interface IDictionary<'Key,'Value>
  new : elements:seq<'Key * 'Value> -> Map<'Key,'Value>
  member Add : key:'Key * value:'Value -> Map<'Key,'Value>
  member ContainsKey : key:'Key -> bool
  ...

--------------------
new : elements:seq<'Key * 'Value> -> Map<'Key,'Value>
val ofSeq : elements:seq<'Key * 'T> -> Map<'Key,'T> (requires comparison)
val random : (unit -> float)
val r : System.Random
Multiple items
type Random =
  new : unit -> Random + 1 overload
  member Next : unit -> int + 2 overloads
  member NextBytes : buffer:byte[] -> unit + 1 overload
  member NextDouble : unit -> float

--------------------
System.Random() : System.Random
System.Random(Seed: int) : System.Random
System.Random.NextDouble() : float
val getChar : cases:('a * float) list -> 'a


 Helper to get one character from a list of choices with probabilities
val cases : ('a * float) list
val run : (float -> ('b * float) list -> 'b)
val r : float
val failwith : message:string -> 'T
val c : 'b
val p : float
val t : ('b * float) list
val generate : order:int -> length:int -> acc:char list -> map:Map<System.String,(char * float) list> -> System.String


 Creates text according to a distribution. Defaults to spaces on unknown cases.
val length : int
val acc : char list
val map : Map<System.String,(char * float) list>
val toArray : list:'T list -> 'T []
module Array

from Microsoft.FSharp.Collections
val rev : array:'T [] -> 'T []
val sub : System.String
val truncate : count:int -> source:seq<'T> -> seq<'T>
val toArray : source:seq<'T> -> 'T []
val newChar : char
val tryFind : key:'Key -> table:Map<'Key,'T> -> 'T option (requires comparison)
union case Option.Some: Value: 'T -> Option<'T>
val l : (char * float) list
property System.String.Length: int with get
val wordwise : order:int -> approxLength:int -> input:string -> unit


 Generates words from sample. Generates space-separated words independently.
val approxLength : int
val input : string
val out : System.String
module String

from Microsoft.FSharp.Core
val replicate : count:int -> str:string -> string
System.String.Replace(oldValue: string, newValue: string) : string
System.String.Replace(oldChar: char, newChar: char) : string
System.String.Replace(oldValue: string, newValue: string, comparisonType: System.StringComparison) : string
System.String.Replace(oldValue: string, newValue: string, ignoreCase: bool, culture: System.Globalization.CultureInfo) : string
val printfn : format:Printf.TextWriterFormat<'T> -> 'T
val lolz : string

More information

Link:http://fssnip.net/ol
Posted:3 years ago
Author:Vandroiy
Tags: random , generator , markov chain