3 people like it.

# What word is most like the word "turtle"?

A while ago I posted a snippet to calculate the 'Discrete Fréchet Distance' between two curves. If we treat a word as a 'curve' by giving each letter an index (with similar-sounding letters having closer indices) we can compare words by the Fréchet distance between them! An alternative to edit-distance...

 ``` 1: 2: 3: 4: 5: 6: 7: 8: 9: 10: 11: 12: 13: 14: 15: 16: 17: 18: 19: 20: 21: 22: 23: 24: 25: 26: 27: 28: 29: 30: 31: 32: 33: 34: 35: 36: 37: 38: 39: 40: 41: 42: 43: 44: 45: 46: 47: 48: 49: 50: 51: 52: 53: 54: 55: 56: 57: 58: 59: 60: 61: 62: 63: 64: 65: 66: 67: 68: 69: 70: 71: 72: 73: 74: 75: 76: 77: 78: 79: 80: 81: 82: 83: 84: 85: 86: 87: 88: 89: 90: 91: 92: 93: 94: 95: 96: 97: 98: 99: 100: 101: 102: 103: 104: 105: 106: 107: 108: 109: 110: 111: 112: 113: 114: 115: 116: 117: 118: 119: 120: 121: 122: 123: 124: 125: 126: 127: 128: 129: 130: 131: 132: 133: 134: 135: 136: 137: 138: 139: 140: 141: 142: 143: 144: 145: 146: 147: 148: 149: 150: 151: 152: 153: 154: 155: 156: 157: 158: 159: 160: 161: 162: 163: 164: ``` ``````open System open System.Net // Discrete Frechet Distance: // (Based on the 1994 algorithm from Thomas Eiter and Heikki Mannila.) let frechet (P : array) (Q : array) = let sq (x : float) = x * x let min3 x y z = [x; y; z] |> List.min let d (a : float*float) (b: float*float) = let ab_x = fst(a) - fst(b) let ab_y = snd(a) - snd(b) sqrt(sq ab_x + sq ab_y) let p, q = Array.length P, Array.length Q let ca = Array2D.init p q (fun _ _ -> -1.0) let rec c i j = if ca.[i, j] > -1.0 then ca.[i, j] else if i = 0 && j = 0 then ca.[i, j] <- d (P.[0]) (Q.[0]) elif i > 0 && j = 0 then ca.[i, j] <- Math.Max((c (i-1) 0), (d P.[i] Q.[0])) elif i = 0 && j > 0 then ca.[i, j] <- Math.Max((c 0 (j-1)), (d P.[0] Q.[j])) elif i > 0 && j > 0 then ca.[i, j] <- Math.Max(min3 (c (i-1) j) (c (i-1) (j-1)) (c i (j-1)), (d P.[i] Q.[j])) else ca.[i, j] <- nan ca.[i, j] c (p-1) (q-1) // Use frechet as an operator: let (-~~) a1 a2 = abs(frechet a1 a2) // Make an array of letters in roughly sound-alike order, with an index to reflect roughly how // similar each sounds to its neighbour in the list: let letters = [| 'a', 1.0 'e', 2.0 'i', 3.0 'o', 4.0 'u', 5.0 'y', 6.0 'h', 7.0 'b', 17.0 'p', 18.0 't', 19.0 'd', 20.0 'j', 21.0 'r', 25.0 'c', 35.0 'k', 36.0 'q', 37.0 'x', 38.0 'g', 39.0 'l', 49.0 'm', 50.0 'n', 51.0 's', 61.0 'z', 62.0 'f', 72.0 'v', 73.0 'w', 83.0 |] // Get the 'similarity index' of a letter: let letterValue letter = let pair = try letters |> Array.find (fun elem -> fst(elem) = letter) with | _ -> ' ', 99.9 snd(pair) // Treat a word as a curve of similarity indices: let wordCurve (word : string) = word.ToLower().ToCharArray() |> Array.mapi (fun i letter -> float(i), letterValue letter) // Work out the Frechet distance between two word curves: let wordDistance word1 word2 = (wordCurve word1) -~~ (wordCurve word2) // Make a funky operator for wordDistance: let (<-->) = wordDistance // Read a file from a url but as if it's local for performance: let urlReader (url : string) = let req = WebRequest.Create(url, Timeout = 1000 * 60 * 20) try let resp = req.GetResponse() let stream = resp.GetResponseStream() let tempFileName = System.IO.Path.GetTempFileName() let tempFileStream = new System.IO.FileStream(tempFileName, System.IO.FileMode.Truncate) stream.CopyTo(tempFileStream) tempFileStream.Seek(int64(0), System.IO.SeekOrigin.Begin) |> ignore new System.IO.StreamReader(tempFileStream) with | _ as ex -> failwith ex.Message // Read a word list and break it up into non-trivial, lowercase words: let longWordList() = let goodLetters = [|'a'..'z'|] let goodWord (word : string) = let badIndices = word.ToCharArray() |> Array.map (fun letter -> Array.IndexOf(goodLetters, letter)) |> Array.filter (fun index -> index < 0) (badIndices |> Array.length) = 0 let reader = urlReader "http://unix-tree.huihoo.org/V7/usr/dict/words.html" seq { while not (reader.EndOfStream) do yield (reader.ReadLine().ToLower()) } |> Seq.skip 17 // Skip HTML |> Seq.filter (fun word -> word.Length > 2) |> Seq.filter (fun word -> goodWord word) |> Seq.cache let mostLike word n = longWordList() |> Seq.sortBy (fun listWord -> listWord <--> word) |> Seq.truncate n let leastLike word n = longWordList() |> Seq.sortBy (fun listWord -> -(listWord <--> word)) |> Seq.truncate n // Examples: // mostLike "turtle" 10 |> Seq.iter (fun item -> printfn "%s" item);; // turtle // purple // diddle // dudley // paddle // peddle // piddle // puddle // puddly // toddle // leastLike "turtle" 10 |> Seq.iter (fun item -> printfn "%s" item);; // bartholomew // counterflow // grandnephew // hereinbelow // marshmallow // longfellow // afterglow // bmw // bow // caw ``````
namespace System
namespace System.Net
val frechet : P:(float * float) array -> Q:(float * float) array -> float

Full name: Script.frechet
val P : (float * float) array
type 'T array = 'T []

Full name: Microsoft.FSharp.Core.array<_>
Multiple items
val float : value:'T -> float (requires member op_Explicit)

Full name: Microsoft.FSharp.Core.Operators.float

--------------------
type float = Double

Full name: Microsoft.FSharp.Core.float

--------------------
type float<'Measure> = float

Full name: Microsoft.FSharp.Core.float<_>
val Q : (float * float) array
val sq : (float -> float)
val x : float
val min3 : ('a -> 'a -> 'a -> 'a) (requires comparison)
val x : 'a (requires comparison)
val y : 'a (requires comparison)
val z : 'a (requires comparison)
Multiple items
module List

from Microsoft.FSharp.Collections

--------------------
type List<'T> =
| ( [] )
| ( :: ) of Head: 'T * Tail: 'T list
interface IEnumerable
interface IEnumerable<'T>
member IsEmpty : bool
member Item : index:int -> 'T with get
member Length : int
member Tail : 'T list
static member Cons : head:'T * tail:'T list -> 'T list
static member Empty : 'T list

Full name: Microsoft.FSharp.Collections.List<_>
val min : list:'T list -> 'T (requires comparison)

Full name: Microsoft.FSharp.Collections.List.min
val d : (float * float -> float * float -> float)
val a : float * float
val b : float * float
val ab_x : float
val fst : tuple:('T1 * 'T2) -> 'T1

Full name: Microsoft.FSharp.Core.Operators.fst
val ab_y : float
val snd : tuple:('T1 * 'T2) -> 'T2

Full name: Microsoft.FSharp.Core.Operators.snd
val sqrt : value:'T -> 'U (requires member Sqrt)

Full name: Microsoft.FSharp.Core.Operators.sqrt
val p : int
val q : int
type Array =
member Clone : unit -> obj
member CopyTo : array:Array * index:int -> unit + 1 overload
member GetEnumerator : unit -> IEnumerator
member GetLength : dimension:int -> int
member GetLongLength : dimension:int -> int64
member GetLowerBound : dimension:int -> int
member GetUpperBound : dimension:int -> int
member GetValue : [<ParamArray>] indices:int[] -> obj + 7 overloads
member Initialize : unit -> unit
member IsFixedSize : bool
...

Full name: System.Array
val length : array:'T [] -> int

Full name: Microsoft.FSharp.Collections.Array.length
val ca : float [,]
module Array2D

from Microsoft.FSharp.Collections
val init : length1:int -> length2:int -> initializer:(int -> int -> 'T) -> 'T [,]

Full name: Microsoft.FSharp.Collections.Array2D.init
val c : (int -> int -> float)
val i : int
val j : int
type Math =
static val PI : float
static val E : float
static member Abs : value:sbyte -> sbyte + 6 overloads
static member Acos : d:float -> float
static member Asin : d:float -> float
static member Atan : d:float -> float
static member Atan2 : y:float * x:float -> float
static member BigMul : a:int * b:int -> int64
static member Ceiling : d:decimal -> decimal + 1 overload
static member Cos : d:float -> float
...

Full name: System.Math
Math.Max(val1: decimal, val2: decimal) : decimal
Math.Max(val1: float, val2: float) : float
Math.Max(val1: float32, val2: float32) : float32
Math.Max(val1: uint64, val2: uint64) : uint64
Math.Max(val1: int64, val2: int64) : int64
Math.Max(val1: uint32, val2: uint32) : uint32
Math.Max(val1: int, val2: int) : int
Math.Max(val1: uint16, val2: uint16) : uint16
Math.Max(val1: int16, val2: int16) : int16
Math.Max(val1: byte, val2: byte) : byte
val nan : float

Full name: Microsoft.FSharp.Core.Operators.nan
val a1 : (float * float) array
val a2 : (float * float) array
val abs : value:'T -> 'T (requires member Abs)

Full name: Microsoft.FSharp.Core.Operators.abs
val letters : (char * float) []

Full name: Script.letters
val letterValue : letter:char -> float

Full name: Script.letterValue
val letter : char
val pair : char * float
val find : predicate:('T -> bool) -> array:'T [] -> 'T

Full name: Microsoft.FSharp.Collections.Array.find
val elem : char * float
val wordCurve : word:string -> (float * float) []

Full name: Script.wordCurve
val word : string
Multiple items
val string : value:'T -> string

Full name: Microsoft.FSharp.Core.Operators.string

--------------------
type string = String

Full name: Microsoft.FSharp.Core.string
String.ToLower() : string
String.ToLower(culture: Globalization.CultureInfo) : string
val mapi : mapping:(int -> 'T -> 'U) -> array:'T [] -> 'U []

Full name: Microsoft.FSharp.Collections.Array.mapi
val wordDistance : word1:string -> word2:string -> float

Full name: Script.wordDistance
val word1 : string
val word2 : string

val url : string
val req : WebRequest
type WebRequest =
inherit MarshalByRefObject
member Abort : unit -> unit
member AuthenticationLevel : AuthenticationLevel with get, set
member BeginGetRequestStream : callback:AsyncCallback * state:obj -> IAsyncResult
member BeginGetResponse : callback:AsyncCallback * state:obj -> IAsyncResult
member CachePolicy : RequestCachePolicy with get, set
member ConnectionGroupName : string with get, set
member ContentLength : int64 with get, set
member ContentType : string with get, set
member Credentials : ICredentials with get, set
member EndGetRequestStream : asyncResult:IAsyncResult -> Stream
...

Full name: System.Net.WebRequest
WebRequest.Create(requestUri: Uri) : WebRequest
WebRequest.Create(requestUriString: string) : WebRequest
val resp : WebResponse
WebRequest.GetResponse() : WebResponse
val stream : IO.Stream
WebResponse.GetResponseStream() : IO.Stream
val tempFileName : string
namespace System.IO
type Path =
static val DirectorySeparatorChar : char
static val AltDirectorySeparatorChar : char
static val VolumeSeparatorChar : char
static val InvalidPathChars : char[]
static val PathSeparator : char
static member ChangeExtension : path:string * extension:string -> string
static member Combine : [<ParamArray>] paths:string[] -> string + 3 overloads
static member GetDirectoryName : path:string -> string
static member GetExtension : path:string -> string
static member GetFileName : path:string -> string
...

Full name: System.IO.Path
IO.Path.GetTempFileName() : string
val tempFileStream : IO.FileStream
Multiple items
type FileStream =
inherit Stream
new : path:string * mode:FileMode -> FileStream + 14 overloads
member BeginRead : array:byte[] * offset:int * numBytes:int * userCallback:AsyncCallback * stateObject:obj -> IAsyncResult
member BeginWrite : array:byte[] * offset:int * numBytes:int * userCallback:AsyncCallback * stateObject:obj -> IAsyncResult
member CanSeek : bool
member CanWrite : bool
member EndRead : asyncResult:IAsyncResult -> int
member EndWrite : asyncResult:IAsyncResult -> unit
member Flush : unit -> unit + 1 overload
member GetAccessControl : unit -> FileSecurity
...

Full name: System.IO.FileStream

--------------------
IO.FileStream(path: string, mode: IO.FileMode) : unit
IO.FileStream(handle: Win32.SafeHandles.SafeFileHandle, access: IO.FileAccess) : unit
IO.FileStream(path: string, mode: IO.FileMode, access: IO.FileAccess) : unit
IO.FileStream(handle: Win32.SafeHandles.SafeFileHandle, access: IO.FileAccess, bufferSize: int) : unit
IO.FileStream(path: string, mode: IO.FileMode, access: IO.FileAccess, share: IO.FileShare) : unit
IO.FileStream(handle: Win32.SafeHandles.SafeFileHandle, access: IO.FileAccess, bufferSize: int, isAsync: bool) : unit
IO.FileStream(path: string, mode: IO.FileMode, access: IO.FileAccess, share: IO.FileShare, bufferSize: int) : unit
IO.FileStream(path: string, mode: IO.FileMode, access: IO.FileAccess, share: IO.FileShare, bufferSize: int, options: IO.FileOptions) : unit
IO.FileStream(path: string, mode: IO.FileMode, access: IO.FileAccess, share: IO.FileShare, bufferSize: int, useAsync: bool) : unit
IO.FileStream(path: string, mode: IO.FileMode, rights: Security.AccessControl.FileSystemRights, share: IO.FileShare, bufferSize: int, options: IO.FileOptions) : unit
type FileMode =
| CreateNew = 1
| Create = 2
| Open = 3
| OpenOrCreate = 4
| Truncate = 5
| Append = 6

Full name: System.IO.FileMode
field IO.FileMode.Truncate = 5
IO.Stream.CopyTo(destination: IO.Stream) : unit
IO.Stream.CopyTo(destination: IO.Stream, bufferSize: int) : unit
IO.FileStream.Seek(offset: int64, origin: IO.SeekOrigin) : int64
Multiple items
val int64 : value:'T -> int64 (requires member op_Explicit)

Full name: Microsoft.FSharp.Core.Operators.int64

--------------------
type int64 = Int64

Full name: Microsoft.FSharp.Core.int64

--------------------
type int64<'Measure> = int64

Full name: Microsoft.FSharp.Core.int64<_>
type SeekOrigin =
| Begin = 0
| Current = 1
| End = 2

Full name: System.IO.SeekOrigin
field IO.SeekOrigin.Begin = 0
val ignore : value:'T -> unit

Full name: Microsoft.FSharp.Core.Operators.ignore
Multiple items
member BaseStream : Stream
member Close : unit -> unit
member CurrentEncoding : Encoding
member DiscardBufferedData : unit -> unit
member EndOfStream : bool
member Peek : unit -> int
member ReadLine : unit -> string
member ReadToEnd : unit -> string
...

--------------------
IO.StreamReader(stream: IO.Stream, detectEncodingFromByteOrderMarks: bool) : unit
IO.StreamReader(stream: IO.Stream, encoding: Text.Encoding) : unit
IO.StreamReader(path: string, detectEncodingFromByteOrderMarks: bool) : unit
IO.StreamReader(path: string, encoding: Text.Encoding) : unit
IO.StreamReader(stream: IO.Stream, encoding: Text.Encoding, detectEncodingFromByteOrderMarks: bool) : unit
IO.StreamReader(path: string, encoding: Text.Encoding, detectEncodingFromByteOrderMarks: bool) : unit
IO.StreamReader(stream: IO.Stream, encoding: Text.Encoding, detectEncodingFromByteOrderMarks: bool, bufferSize: int) : unit
IO.StreamReader(path: string, encoding: Text.Encoding, detectEncodingFromByteOrderMarks: bool, bufferSize: int) : unit
val ex : exn
val failwith : message:string -> 'T

Full name: Microsoft.FSharp.Core.Operators.failwith
property Exception.Message: string
val longWordList : unit -> seq<string>

Full name: Script.longWordList
val goodLetters : char []
val goodWord : (string -> bool)
String.ToCharArray() : char []
String.ToCharArray(startIndex: int, length: int) : char []
val map : mapping:('T -> 'U) -> array:'T [] -> 'U []

Full name: Microsoft.FSharp.Collections.Array.map
Array.IndexOf<'T>(array: 'T [], value: 'T) : int
Array.IndexOf(array: Array, value: obj) : int
Array.IndexOf<'T>(array: 'T [], value: 'T, startIndex: int) : int
Array.IndexOf(array: Array, value: obj, startIndex: int) : int
Array.IndexOf<'T>(array: 'T [], value: 'T, startIndex: int, count: int) : int
Array.IndexOf(array: Array, value: obj, startIndex: int, count: int) : int
val filter : predicate:('T -> bool) -> array:'T [] -> 'T []

Full name: Microsoft.FSharp.Collections.Array.filter
val index : int
Multiple items
val seq : sequence:seq<'T> -> seq<'T>

Full name: Microsoft.FSharp.Core.Operators.seq

--------------------
type seq<'T> = Collections.Generic.IEnumerable<'T>

Full name: Microsoft.FSharp.Collections.seq<_>
val not : value:bool -> bool

Full name: Microsoft.FSharp.Core.Operators.not
module Seq

from Microsoft.FSharp.Collections
val skip : count:int -> source:seq<'T> -> seq<'T>

Full name: Microsoft.FSharp.Collections.Seq.skip
val filter : predicate:('T -> bool) -> source:seq<'T> -> seq<'T>

Full name: Microsoft.FSharp.Collections.Seq.filter
property String.Length: int
val cache : source:seq<'T> -> seq<'T>

Full name: Microsoft.FSharp.Collections.Seq.cache
val mostLike : word:string -> n:int -> seq<string>

Full name: Script.mostLike
val n : int
val sortBy : projection:('T -> 'Key) -> source:seq<'T> -> seq<'T> (requires comparison)

Full name: Microsoft.FSharp.Collections.Seq.sortBy
val listWord : string
val truncate : count:int -> source:seq<'T> -> seq<'T>

Full name: Microsoft.FSharp.Collections.Seq.truncate
val leastLike : word:string -> n:int -> seq<string>

Full name: Script.leastLike