3 people like it.
Like the snippet!
What word is most like the word "turtle"?
A while ago I posted a snippet to calculate the 'Discrete Fréchet Distance' between two curves. If we treat a word as a 'curve' by giving each letter an index (with similar-sounding letters having closer indices) we can compare words by the Fréchet distance between them! An alternative to edit-distance...
1:
2:
3:
4:
5:
6:
7:
8:
9:
10:
11:
12:
13:
14:
15:
16:
17:
18:
19:
20:
21:
22:
23:
24:
25:
26:
27:
28:
29:
30:
31:
32:
33:
34:
35:
36:
37:
38:
39:
40:
41:
42:
43:
44:
45:
46:
47:
48:
49:
50:
51:
52:
53:
54:
55:
56:
57:
58:
59:
60:
61:
62:
63:
64:
65:
66:
67:
68:
69:
70:
71:
72:
73:
74:
75:
76:
77:
78:
79:
80:
81:
82:
83:
84:
85:
86:
87:
88:
89:
90:
91:
92:
93:
94:
95:
96:
97:
98:
99:
100:
101:
102:
103:
104:
105:
106:
107:
108:
109:
110:
111:
112:
113:
114:
115:
116:
117:
118:
119:
120:
121:
122:
123:
124:
125:
126:
127:
128:
129:
130:
131:
132:
133:
134:
135:
136:
137:
138:
139:
140:
141:
142:
143:
144:
145:
146:
147:
148:
149:
150:
151:
152:
153:
154:
155:
156:
157:
158:
159:
160:
161:
162:
163:
164:
|
open System
open System.Net
// Discrete Frechet Distance:
// (Based on the 1994 algorithm from Thomas Eiter and Heikki Mannila.)
let frechet (P : array<float*float>) (Q : array<float*float>) =
let sq (x : float) = x * x
let min3 x y z = [x; y; z] |> List.min
let d (a : float*float) (b: float*float) =
let ab_x = fst(a) - fst(b)
let ab_y = snd(a) - snd(b)
sqrt(sq ab_x + sq ab_y)
let p, q = Array.length P, Array.length Q
let ca = Array2D.init p q (fun _ _ -> -1.0)
let rec c i j =
if ca.[i, j] > -1.0 then
ca.[i, j]
else
if i = 0 && j = 0 then
ca.[i, j] <- d (P.[0]) (Q.[0])
elif i > 0 && j = 0 then
ca.[i, j] <- Math.Max((c (i-1) 0), (d P.[i] Q.[0]))
elif i = 0 && j > 0 then
ca.[i, j] <- Math.Max((c 0 (j-1)), (d P.[0] Q.[j]))
elif i > 0 && j > 0 then
ca.[i, j] <- Math.Max(min3 (c (i-1) j) (c (i-1) (j-1)) (c i (j-1)), (d P.[i] Q.[j]))
else
ca.[i, j] <- nan
ca.[i, j]
c (p-1) (q-1)
// Use frechet as an operator:
let (-~~) a1 a2 = abs(frechet a1 a2)
// Make an array of letters in roughly sound-alike order, with an index to reflect roughly how
// similar each sounds to its neighbour in the list:
let letters = [|
'a', 1.0
'e', 2.0
'i', 3.0
'o', 4.0
'u', 5.0
'y', 6.0
'h', 7.0
'b', 17.0
'p', 18.0
't', 19.0
'd', 20.0
'j', 21.0
'r', 25.0
'c', 35.0
'k', 36.0
'q', 37.0
'x', 38.0
'g', 39.0
'l', 49.0
'm', 50.0
'n', 51.0
's', 61.0
'z', 62.0
'f', 72.0
'v', 73.0
'w', 83.0
|]
// Get the 'similarity index' of a letter:
let letterValue letter =
let pair = try
letters
|> Array.find (fun elem -> fst(elem) = letter)
with
| _ -> ' ', 99.9
snd(pair)
// Treat a word as a curve of similarity indices:
let wordCurve (word : string) =
word.ToLower().ToCharArray()
|> Array.mapi (fun i letter -> float(i), letterValue letter)
// Work out the Frechet distance between two word curves:
let wordDistance word1 word2 =
(wordCurve word1) -~~ (wordCurve word2)
// Make a funky operator for wordDistance:
let (<-->) = wordDistance
// Read a file from a url but as if it's local for performance:
let urlReader (url : string) =
let req = WebRequest.Create(url, Timeout = 1000 * 60 * 20)
try
let resp = req.GetResponse()
let stream = resp.GetResponseStream()
let tempFileName = System.IO.Path.GetTempFileName()
let tempFileStream = new System.IO.FileStream(tempFileName, System.IO.FileMode.Truncate)
stream.CopyTo(tempFileStream)
tempFileStream.Seek(int64(0), System.IO.SeekOrigin.Begin) |> ignore
new System.IO.StreamReader(tempFileStream)
with
| _ as ex -> failwith ex.Message
// Read a word list and break it up into non-trivial, lowercase words:
let longWordList() =
let goodLetters = [|'a'..'z'|]
let goodWord (word : string) =
let badIndices =
word.ToCharArray()
|> Array.map (fun letter -> Array.IndexOf(goodLetters, letter))
|> Array.filter (fun index -> index < 0)
(badIndices |> Array.length) = 0
let reader = urlReader "http://unix-tree.huihoo.org/V7/usr/dict/words.html"
seq {
while not (reader.EndOfStream) do
yield (reader.ReadLine().ToLower())
}
|> Seq.skip 17 // Skip HTML
|> Seq.filter (fun word -> word.Length > 2)
|> Seq.filter (fun word -> goodWord word)
|> Seq.cache
let mostLike word n =
longWordList()
|> Seq.sortBy (fun listWord -> listWord <--> word)
|> Seq.truncate n
let leastLike word n =
longWordList()
|> Seq.sortBy (fun listWord -> -(listWord <--> word))
|> Seq.truncate n
// Examples:
// mostLike "turtle" 10 |> Seq.iter (fun item -> printfn "%s" item);;
// turtle
// purple
// diddle
// dudley
// paddle
// peddle
// piddle
// puddle
// puddly
// toddle
// leastLike "turtle" 10 |> Seq.iter (fun item -> printfn "%s" item);;
// bartholomew
// counterflow
// grandnephew
// hereinbelow
// marshmallow
// longfellow
// afterglow
// bmw
// bow
// caw
|
namespace System
namespace System.Net
val frechet : P:(float * float) array -> Q:(float * float) array -> float
Full name: Script.frechet
val P : (float * float) array
type 'T array = 'T []
Full name: Microsoft.FSharp.Core.array<_>
Multiple items
val float : value:'T -> float (requires member op_Explicit)
Full name: Microsoft.FSharp.Core.Operators.float
--------------------
type float = Double
Full name: Microsoft.FSharp.Core.float
--------------------
type float<'Measure> = float
Full name: Microsoft.FSharp.Core.float<_>
val Q : (float * float) array
val sq : (float -> float)
val x : float
val min3 : ('a -> 'a -> 'a -> 'a) (requires comparison)
val x : 'a (requires comparison)
val y : 'a (requires comparison)
val z : 'a (requires comparison)
Multiple items
module List
from Microsoft.FSharp.Collections
--------------------
type List<'T> =
| ( [] )
| ( :: ) of Head: 'T * Tail: 'T list
interface IEnumerable
interface IEnumerable<'T>
member Head : 'T
member IsEmpty : bool
member Item : index:int -> 'T with get
member Length : int
member Tail : 'T list
static member Cons : head:'T * tail:'T list -> 'T list
static member Empty : 'T list
Full name: Microsoft.FSharp.Collections.List<_>
val min : list:'T list -> 'T (requires comparison)
Full name: Microsoft.FSharp.Collections.List.min
val d : (float * float -> float * float -> float)
val a : float * float
val b : float * float
val ab_x : float
val fst : tuple:('T1 * 'T2) -> 'T1
Full name: Microsoft.FSharp.Core.Operators.fst
val ab_y : float
val snd : tuple:('T1 * 'T2) -> 'T2
Full name: Microsoft.FSharp.Core.Operators.snd
val sqrt : value:'T -> 'U (requires member Sqrt)
Full name: Microsoft.FSharp.Core.Operators.sqrt
val p : int
val q : int
type Array =
member Clone : unit -> obj
member CopyTo : array:Array * index:int -> unit + 1 overload
member GetEnumerator : unit -> IEnumerator
member GetLength : dimension:int -> int
member GetLongLength : dimension:int -> int64
member GetLowerBound : dimension:int -> int
member GetUpperBound : dimension:int -> int
member GetValue : [<ParamArray>] indices:int[] -> obj + 7 overloads
member Initialize : unit -> unit
member IsFixedSize : bool
...
Full name: System.Array
val length : array:'T [] -> int
Full name: Microsoft.FSharp.Collections.Array.length
val ca : float [,]
module Array2D
from Microsoft.FSharp.Collections
val init : length1:int -> length2:int -> initializer:(int -> int -> 'T) -> 'T [,]
Full name: Microsoft.FSharp.Collections.Array2D.init
val c : (int -> int -> float)
val i : int
val j : int
type Math =
static val PI : float
static val E : float
static member Abs : value:sbyte -> sbyte + 6 overloads
static member Acos : d:float -> float
static member Asin : d:float -> float
static member Atan : d:float -> float
static member Atan2 : y:float * x:float -> float
static member BigMul : a:int * b:int -> int64
static member Ceiling : d:decimal -> decimal + 1 overload
static member Cos : d:float -> float
...
Full name: System.Math
Math.Max(val1: decimal, val2: decimal) : decimal
(+0 other overloads)
Math.Max(val1: float, val2: float) : float
(+0 other overloads)
Math.Max(val1: float32, val2: float32) : float32
(+0 other overloads)
Math.Max(val1: uint64, val2: uint64) : uint64
(+0 other overloads)
Math.Max(val1: int64, val2: int64) : int64
(+0 other overloads)
Math.Max(val1: uint32, val2: uint32) : uint32
(+0 other overloads)
Math.Max(val1: int, val2: int) : int
(+0 other overloads)
Math.Max(val1: uint16, val2: uint16) : uint16
(+0 other overloads)
Math.Max(val1: int16, val2: int16) : int16
(+0 other overloads)
Math.Max(val1: byte, val2: byte) : byte
(+0 other overloads)
val nan : float
Full name: Microsoft.FSharp.Core.Operators.nan
val a1 : (float * float) array
val a2 : (float * float) array
val abs : value:'T -> 'T (requires member Abs)
Full name: Microsoft.FSharp.Core.Operators.abs
val letters : (char * float) []
Full name: Script.letters
val letterValue : letter:char -> float
Full name: Script.letterValue
val letter : char
val pair : char * float
val find : predicate:('T -> bool) -> array:'T [] -> 'T
Full name: Microsoft.FSharp.Collections.Array.find
val elem : char * float
val wordCurve : word:string -> (float * float) []
Full name: Script.wordCurve
val word : string
Multiple items
val string : value:'T -> string
Full name: Microsoft.FSharp.Core.Operators.string
--------------------
type string = String
Full name: Microsoft.FSharp.Core.string
String.ToLower() : string
String.ToLower(culture: Globalization.CultureInfo) : string
val mapi : mapping:(int -> 'T -> 'U) -> array:'T [] -> 'U []
Full name: Microsoft.FSharp.Collections.Array.mapi
val wordDistance : word1:string -> word2:string -> float
Full name: Script.wordDistance
val word1 : string
val word2 : string
val urlReader : url:string -> IO.StreamReader
Full name: Script.urlReader
val url : string
val req : WebRequest
type WebRequest =
inherit MarshalByRefObject
member Abort : unit -> unit
member AuthenticationLevel : AuthenticationLevel with get, set
member BeginGetRequestStream : callback:AsyncCallback * state:obj -> IAsyncResult
member BeginGetResponse : callback:AsyncCallback * state:obj -> IAsyncResult
member CachePolicy : RequestCachePolicy with get, set
member ConnectionGroupName : string with get, set
member ContentLength : int64 with get, set
member ContentType : string with get, set
member Credentials : ICredentials with get, set
member EndGetRequestStream : asyncResult:IAsyncResult -> Stream
...
Full name: System.Net.WebRequest
WebRequest.Create(requestUri: Uri) : WebRequest
WebRequest.Create(requestUriString: string) : WebRequest
val resp : WebResponse
WebRequest.GetResponse() : WebResponse
val stream : IO.Stream
WebResponse.GetResponseStream() : IO.Stream
val tempFileName : string
namespace System.IO
type Path =
static val DirectorySeparatorChar : char
static val AltDirectorySeparatorChar : char
static val VolumeSeparatorChar : char
static val InvalidPathChars : char[]
static val PathSeparator : char
static member ChangeExtension : path:string * extension:string -> string
static member Combine : [<ParamArray>] paths:string[] -> string + 3 overloads
static member GetDirectoryName : path:string -> string
static member GetExtension : path:string -> string
static member GetFileName : path:string -> string
...
Full name: System.IO.Path
IO.Path.GetTempFileName() : string
val tempFileStream : IO.FileStream
Multiple items
type FileStream =
inherit Stream
new : path:string * mode:FileMode -> FileStream + 14 overloads
member BeginRead : array:byte[] * offset:int * numBytes:int * userCallback:AsyncCallback * stateObject:obj -> IAsyncResult
member BeginWrite : array:byte[] * offset:int * numBytes:int * userCallback:AsyncCallback * stateObject:obj -> IAsyncResult
member CanRead : bool
member CanSeek : bool
member CanWrite : bool
member EndRead : asyncResult:IAsyncResult -> int
member EndWrite : asyncResult:IAsyncResult -> unit
member Flush : unit -> unit + 1 overload
member GetAccessControl : unit -> FileSecurity
...
Full name: System.IO.FileStream
--------------------
IO.FileStream(path: string, mode: IO.FileMode) : unit
(+0 other overloads)
IO.FileStream(handle: Win32.SafeHandles.SafeFileHandle, access: IO.FileAccess) : unit
(+0 other overloads)
IO.FileStream(path: string, mode: IO.FileMode, access: IO.FileAccess) : unit
(+0 other overloads)
IO.FileStream(handle: Win32.SafeHandles.SafeFileHandle, access: IO.FileAccess, bufferSize: int) : unit
(+0 other overloads)
IO.FileStream(path: string, mode: IO.FileMode, access: IO.FileAccess, share: IO.FileShare) : unit
(+0 other overloads)
IO.FileStream(handle: Win32.SafeHandles.SafeFileHandle, access: IO.FileAccess, bufferSize: int, isAsync: bool) : unit
(+0 other overloads)
IO.FileStream(path: string, mode: IO.FileMode, access: IO.FileAccess, share: IO.FileShare, bufferSize: int) : unit
(+0 other overloads)
IO.FileStream(path: string, mode: IO.FileMode, access: IO.FileAccess, share: IO.FileShare, bufferSize: int, options: IO.FileOptions) : unit
(+0 other overloads)
IO.FileStream(path: string, mode: IO.FileMode, access: IO.FileAccess, share: IO.FileShare, bufferSize: int, useAsync: bool) : unit
(+0 other overloads)
IO.FileStream(path: string, mode: IO.FileMode, rights: Security.AccessControl.FileSystemRights, share: IO.FileShare, bufferSize: int, options: IO.FileOptions) : unit
(+0 other overloads)
type FileMode =
| CreateNew = 1
| Create = 2
| Open = 3
| OpenOrCreate = 4
| Truncate = 5
| Append = 6
Full name: System.IO.FileMode
field IO.FileMode.Truncate = 5
IO.Stream.CopyTo(destination: IO.Stream) : unit
IO.Stream.CopyTo(destination: IO.Stream, bufferSize: int) : unit
IO.FileStream.Seek(offset: int64, origin: IO.SeekOrigin) : int64
Multiple items
val int64 : value:'T -> int64 (requires member op_Explicit)
Full name: Microsoft.FSharp.Core.Operators.int64
--------------------
type int64 = Int64
Full name: Microsoft.FSharp.Core.int64
--------------------
type int64<'Measure> = int64
Full name: Microsoft.FSharp.Core.int64<_>
type SeekOrigin =
| Begin = 0
| Current = 1
| End = 2
Full name: System.IO.SeekOrigin
field IO.SeekOrigin.Begin = 0
val ignore : value:'T -> unit
Full name: Microsoft.FSharp.Core.Operators.ignore
Multiple items
type StreamReader =
inherit TextReader
new : stream:Stream -> StreamReader + 9 overloads
member BaseStream : Stream
member Close : unit -> unit
member CurrentEncoding : Encoding
member DiscardBufferedData : unit -> unit
member EndOfStream : bool
member Peek : unit -> int
member Read : unit -> int + 1 overload
member ReadLine : unit -> string
member ReadToEnd : unit -> string
...
Full name: System.IO.StreamReader
--------------------
IO.StreamReader(stream: IO.Stream) : unit
IO.StreamReader(path: string) : unit
IO.StreamReader(stream: IO.Stream, detectEncodingFromByteOrderMarks: bool) : unit
IO.StreamReader(stream: IO.Stream, encoding: Text.Encoding) : unit
IO.StreamReader(path: string, detectEncodingFromByteOrderMarks: bool) : unit
IO.StreamReader(path: string, encoding: Text.Encoding) : unit
IO.StreamReader(stream: IO.Stream, encoding: Text.Encoding, detectEncodingFromByteOrderMarks: bool) : unit
IO.StreamReader(path: string, encoding: Text.Encoding, detectEncodingFromByteOrderMarks: bool) : unit
IO.StreamReader(stream: IO.Stream, encoding: Text.Encoding, detectEncodingFromByteOrderMarks: bool, bufferSize: int) : unit
IO.StreamReader(path: string, encoding: Text.Encoding, detectEncodingFromByteOrderMarks: bool, bufferSize: int) : unit
val ex : exn
val failwith : message:string -> 'T
Full name: Microsoft.FSharp.Core.Operators.failwith
property Exception.Message: string
val longWordList : unit -> seq<string>
Full name: Script.longWordList
val goodLetters : char []
val goodWord : (string -> bool)
val badIndices : int []
String.ToCharArray() : char []
String.ToCharArray(startIndex: int, length: int) : char []
val map : mapping:('T -> 'U) -> array:'T [] -> 'U []
Full name: Microsoft.FSharp.Collections.Array.map
Array.IndexOf<'T>(array: 'T [], value: 'T) : int
Array.IndexOf(array: Array, value: obj) : int
Array.IndexOf<'T>(array: 'T [], value: 'T, startIndex: int) : int
Array.IndexOf(array: Array, value: obj, startIndex: int) : int
Array.IndexOf<'T>(array: 'T [], value: 'T, startIndex: int, count: int) : int
Array.IndexOf(array: Array, value: obj, startIndex: int, count: int) : int
val filter : predicate:('T -> bool) -> array:'T [] -> 'T []
Full name: Microsoft.FSharp.Collections.Array.filter
val index : int
val reader : IO.StreamReader
Multiple items
val seq : sequence:seq<'T> -> seq<'T>
Full name: Microsoft.FSharp.Core.Operators.seq
--------------------
type seq<'T> = Collections.Generic.IEnumerable<'T>
Full name: Microsoft.FSharp.Collections.seq<_>
val not : value:bool -> bool
Full name: Microsoft.FSharp.Core.Operators.not
property IO.StreamReader.EndOfStream: bool
IO.StreamReader.ReadLine() : string
module Seq
from Microsoft.FSharp.Collections
val skip : count:int -> source:seq<'T> -> seq<'T>
Full name: Microsoft.FSharp.Collections.Seq.skip
val filter : predicate:('T -> bool) -> source:seq<'T> -> seq<'T>
Full name: Microsoft.FSharp.Collections.Seq.filter
property String.Length: int
val cache : source:seq<'T> -> seq<'T>
Full name: Microsoft.FSharp.Collections.Seq.cache
val mostLike : word:string -> n:int -> seq<string>
Full name: Script.mostLike
val n : int
val sortBy : projection:('T -> 'Key) -> source:seq<'T> -> seq<'T> (requires comparison)
Full name: Microsoft.FSharp.Collections.Seq.sortBy
val listWord : string
val truncate : count:int -> source:seq<'T> -> seq<'T>
Full name: Microsoft.FSharp.Collections.Seq.truncate
val leastLike : word:string -> n:int -> seq<string>
Full name: Script.leastLike
More information