4 people like it.
Like the snippet!
A Staged Regular Expression Matcher
A staged a regular expression interpreter is a compiler!!!
1:
2:
3:
4:
5:
6:
7:
8:
9:
10:
11:
12:
13:
14:
15:
16:
17:
18:
19:
20:
21:
22:
23:
24:
25:
26:
27:
28:
29:
30:
31:
32:
33:
34:
35:
36:
37:
38:
39:
40:
41:
42:
43:
44:
45:
46:
47:
48:
49:
50:
51:
52:
53:
54:
55:
56:
57:
58:
59:
60:
61:
62:
63:
64:
65:
66:
67:
68:
69:
70:
71:
72:
73:
74:
75:
76:
77:
78:
79:
80:
81:
82:
83:
84:
85:
86:
87:
88:
89:
90:
91:
92:
93:
94:
95:
96:
97:
98:
99:
100:
101:
102:
103:
104:
105:
|
// http://www.cs.princeton.edu/courses/archive/spr09/cos333/beautiful.html
// http://scala-lms.github.io/tutorials/regex.html
#r "../packages/FSharp.Compiler.Service.1.3.1.0/lib/net45/FSharp.Compiler.Service.dll"
#r "../packages/QuotationCompiler.0.0.7-alpha/lib/net45/QuotationCompiler.dll"
open QuotationCompiler
open Microsoft.FSharp.Quotations
// <@ fun x -> (% <@ x @> ) @> ~ lambda (fun x -> x)
let lambda (f : Expr<'T> -> Expr<'R>) : Expr<'T -> 'R> =
let var = new Var("__temp__", typeof<'T>)
Expr.Cast<_>(Expr.Lambda(var, f (Expr.Cast<_>(Expr.Var var))))
// <@ fun x y -> (% <@ x @> ... <@ y @> ) @> ~ lambda (fun x y -> x ... y )
let lambda2 (f : Expr<'T> -> Expr<'S> -> Expr<'R>) : Expr<'T -> 'S -> 'R> =
let var = new Var("__temp__", typeof<'T>)
let var' = new Var("__temp'__", typeof<'S>)
Expr.Cast<_>(Expr.Lambda(var, Expr.Lambda(var', f (Expr.Cast<_>(Expr.Var var)) (Expr.Cast<_>(Expr.Var var')))))
let rec matchsearch (regexp : string) (text : Expr<string>) : Expr<bool> =
if regexp.[0] = '^' then
matchhere regexp 1 text <@ 0 @>
else
<@
let text = %text
let mutable start = -1
let mutable found = false
while not found && start < text.Length do
start <- start + 1
found <- (% lambda2(fun text start -> matchhere regexp 0 text start) ) text start
found
@>
and matchhere (regexp : string) (restart : int)
(text : Expr<string>) (start : Expr<int>) : Expr<bool> =
if restart = regexp.Length then
<@ true @>
else if regexp.[restart] = '$' && restart + 1 = regexp.Length then
<@ %start = String.length %text @>
else if restart + 1 < regexp.Length && regexp.[restart + 1] = '*' then
matchstar regexp.[restart] regexp (restart + 2) text start
else
<@
if %start < (%text).Length && (% matchchar regexp.[restart] <@ (%text).[%start] @> ) then
(% matchhere regexp (restart + 1) text <@ %start + 1 @> )
else false
@>
and matchstar (c : char) (regexp : string) (restart : int) (text : Expr<string>) (start: Expr<int>) : Expr<bool> =
<@
let text = %text
let mutable sstart = %start
let mutable found = (% lambda2(fun text sstart -> matchhere regexp restart text sstart) ) text sstart
let mutable failed = false
while not failed && not found && sstart < text.Length do
failed <- not ((% lambda2(fun (text : Expr<string>) (sstart : Expr<int>) -> matchchar c <@ (%text).[%sstart] @>) ) text sstart)
sstart <- sstart + 1
found <- (% lambda2(fun text sstart -> matchhere regexp restart text sstart) ) text sstart
not failed && found
@>
and matchchar (c: char) (t : Expr<char>) : Expr<bool> =
if c = '.' then <@ true @>
else <@ c = %t @>
let compileRegEx (pattern : string) : string -> bool =
let f = QuotationCompiler.ToFunc(lambda (fun text -> matchsearch pattern text))
f ()
let testmatch (f : string -> bool) (text : string) (expected : bool) =
if f text <> expected then
failwith "oups"
// Examples
let ``^hello$`` = compileRegEx "^hello$"
let ``hell`` = compileRegEx "hell"
let ``hel*`` = compileRegEx "hel*"
let ``hel*$`` = compileRegEx "hel*$"
let ``ab`` = compileRegEx "ab"
let ``^ab`` = compileRegEx "^ab"
let ``a*b`` = compileRegEx "a*b"
let ``^ab*`` = compileRegEx "^ab*"
let ``^ab*$`` = compileRegEx "^ab*$"
testmatch ``^hello$`` "hello" true
testmatch ``^hello$`` "hell" false
testmatch ``hell`` "hello" true
testmatch ``hell`` "hell" true
testmatch ``hel*`` "he" true
testmatch ``hel*$`` "hello" false
testmatch ``hel*`` "yo hello" true
testmatch ``ab`` "hello ab hello" true
testmatch ``^ab`` "hello ab hello" false
testmatch ``a*b`` "hello aab hello" true
testmatch ``^ab*`` "abcd" true
testmatch ``^ab*`` "a" true
testmatch ``^ab*`` "ac" true
testmatch ``^ab*`` "bac" false
testmatch ``^ab*$`` "ac" false
|
namespace Microsoft
namespace Microsoft.FSharp
namespace Microsoft.FSharp.Quotations
val lambda : f:(Expr<'T> -> Expr<'R>) -> Expr<('T -> 'R)>
Full name: Script.lambda
val f : (Expr<'T> -> Expr<'R>)
Multiple items
type Expr =
override Equals : obj:obj -> bool
member GetFreeVars : unit -> seq<Var>
member Substitute : substitution:(Var -> Expr option) -> Expr
member ToString : full:bool -> string
member CustomAttributes : Expr list
member Type : Type
static member AddressOf : target:Expr -> Expr
static member AddressSet : target:Expr * value:Expr -> Expr
static member Application : functionExpr:Expr * argument:Expr -> Expr
static member Applications : functionExpr:Expr * arguments:Expr list list -> Expr
...
Full name: Microsoft.FSharp.Quotations.Expr
--------------------
type Expr<'T> =
inherit Expr
member Raw : Expr
Full name: Microsoft.FSharp.Quotations.Expr<_>
val var : Var
Multiple items
type Var =
interface IComparable
new : name:string * typ:Type * ?isMutable:bool -> Var
member IsMutable : bool
member Name : string
member Type : Type
static member Global : name:string * typ:Type -> Var
Full name: Microsoft.FSharp.Quotations.Var
--------------------
new : name:string * typ:System.Type * ?isMutable:bool -> Var
val typeof<'T> : System.Type
Full name: Microsoft.FSharp.Core.Operators.typeof
static member Expr.Cast : source:Expr -> Expr<'T>
static member Expr.Lambda : parameter:Var * body:Expr -> Expr
static member Expr.Var : variable:Var -> Expr
val lambda2 : f:(Expr<'T> -> Expr<'S> -> Expr<'R>) -> Expr<('T -> 'S -> 'R)>
Full name: Script.lambda2
val f : (Expr<'T> -> Expr<'S> -> Expr<'R>)
val var' : Var
val matchsearch : regexp:string -> text:Expr<string> -> Expr<bool>
Full name: Script.matchsearch
val regexp : string
Multiple items
val string : value:'T -> string
Full name: Microsoft.FSharp.Core.Operators.string
--------------------
type string = System.String
Full name: Microsoft.FSharp.Core.string
val text : Expr<string>
type bool = System.Boolean
Full name: Microsoft.FSharp.Core.bool
val matchhere : regexp:string -> restart:int -> text:Expr<string> -> start:Expr<int> -> Expr<bool>
Full name: Script.matchhere
val text : string
val mutable start : int
val mutable found : bool
val not : value:bool -> bool
Full name: Microsoft.FSharp.Core.Operators.not
property System.String.Length: int
val start : Expr<int>
val restart : int
Multiple items
val int : value:'T -> int (requires member op_Explicit)
Full name: Microsoft.FSharp.Core.Operators.int
--------------------
type int = int32
Full name: Microsoft.FSharp.Core.int
--------------------
type int<'Measure> = int
Full name: Microsoft.FSharp.Core.int<_>
module String
from Microsoft.FSharp.Core
val length : str:string -> int
Full name: Microsoft.FSharp.Core.String.length
val matchstar : c:char -> regexp:string -> restart:int -> text:Expr<string> -> start:Expr<int> -> Expr<bool>
Full name: Script.matchstar
val matchchar : c:char -> t:Expr<char> -> Expr<bool>
Full name: Script.matchchar
val c : char
Multiple items
val char : value:'T -> char (requires member op_Explicit)
Full name: Microsoft.FSharp.Core.Operators.char
--------------------
type char = System.Char
Full name: Microsoft.FSharp.Core.char
val mutable sstart : int
val sstart : Expr<int>
val mutable failed : bool
val t : Expr<char>
val compileRegEx : pattern:string -> (string -> bool)
Full name: Script.compileRegEx
val pattern : string
val f : (unit -> string -> bool)
val testmatch : f:(string -> bool) -> text:string -> expected:bool -> unit
Full name: Script.testmatch
val f : (string -> bool)
val expected : bool
val failwith : message:string -> 'T
Full name: Microsoft.FSharp.Core.Operators.failwith
val ( ^hello$ ) : (string -> bool)
Full name: Script.( ^hello$ )
val ( hel* ) : (string -> bool)
Full name: Script.( hel* )
val ( hel*$ ) : (string -> bool)
Full name: Script.( hel*$ )
val ( ^ab ) : (string -> bool)
Full name: Script.( ^ab )
val ( a*b ) : (string -> bool)
Full name: Script.( a*b )
val ( ^ab* ) : (string -> bool)
Full name: Script.( ^ab* )
val ( ^ab*$ ) : (string -> bool)
Full name: Script.( ^ab*$ )
More information