6 people like it.

Parser Monad and Combinators

Simple Parser Monad implementation based on the paper "Monadic Parsing in Haskel" by Graham Hutton and Erik Meijer. Code discussion available here: http://blogs.msdn.com/b/fzandona/archive/2011/10/17/parsing-json-the-fun-way-monadic-parsers-records-and-type-providers-part-2.aspx

  1: 
  2: 
  3: 
  4: 
  5: 
  6: 
  7: 
  8: 
  9: 
 10: 
 11: 
 12: 
 13: 
 14: 
 15: 
 16: 
 17: 
 18: 
 19: 
 20: 
 21: 
 22: 
 23: 
 24: 
 25: 
 26: 
 27: 
 28: 
 29: 
 30: 
 31: 
 32: 
 33: 
 34: 
 35: 
 36: 
 37: 
 38: 
 39: 
 40: 
 41: 
 42: 
 43: 
 44: 
 45: 
 46: 
 47: 
 48: 
 49: 
 50: 
 51: 
 52: 
 53: 
 54: 
 55: 
 56: 
 57: 
 58: 
 59: 
 60: 
 61: 
 62: 
 63: 
 64: 
 65: 
 66: 
 67: 
 68: 
 69: 
 70: 
 71: 
 72: 
 73: 
 74: 
 75: 
 76: 
 77: 
 78: 
 79: 
 80: 
 81: 
 82: 
 83: 
 84: 
 85: 
 86: 
 87: 
 88: 
 89: 
 90: 
 91: 
 92: 
 93: 
 94: 
 95: 
 96: 
 97: 
 98: 
 99: 
100: 
101: 
102: 
103: 
104: 
105: 
106: 
107: 
108: 
109: 
open System

type 'a Parser = Parser of (char list -> ('a * char list) list)

let parse (Parser p) = p 

type ParserBuilder () =
    member x.Return a = Parser (fun cs -> [a, cs])
    member x.Bind (p, f) = Parser (fun cs -> 
        match parse p cs with
        | (c', cs')::_ -> parse (f c') cs'
        | [] -> []
    )
    member x.Zero () = Parser (fun _ -> [])
    member x.ReturnFrom a = a

let parser = ParserBuilder() 

let item = Parser (function [] -> [] | c::cs -> [c, cs])
let sat pred = parser {
    let! c = item
    if pred c then return c
}
let tChar c = sat ((=) c)

/// Concatenates the results of applying parser p and parser q
let (<+>) p q = Parser (fun cs -> (parse p cs) @ (parse q cs))
/// Applies parser p or parser q and returns at most one result
let (<|>) p q = Parser (fun cs -> 
    match (parse (p <+> q) cs) with
    | []    -> []
    | x::xs -> [x]
)

/// Given a char list, returns a parser that parsers it
let rec text = function
    | []  -> parser { return [] }
    | c::cs -> parser { 
        let! _ = tChar c
        let! _ = text cs
        return c::cs 
    } 

/// Combines many (0 or more) applications of parser p
let rec many p = (many1 p) <|> parser { return [] }
/// Combines at least one (1 or more) applications of parser p
and many1 p = 
    parser { 
        let! r = p
        let! rs = many p
        return r::rs
    } 

/// Combines 0 or more applications of parser p separated by parser sep
let rec sepby p sep =  (sepby1 p sep) <|> parser { return [] }
/// Combines 1 or more applications of parser p separated by parser sep
and sepby1 p sep = 
    parser {
        let! r = p
        let! rs = many (parser { 
            let! _ = sep
            return! p
        })
        return r::rs
    } 

/// Chain 0 or more applications of parser p separated by applications of parser op
let rec chainl p op a = (chainl1  p op) <|> parser { return a }
/// Chain 1 or more applications of parser p separated by applications of parser op
and chainl1 p op =  
    let rec rest r = 
        parser {
            let! f = op
            let! r' = p
            return! rest (f r r')
        } <|> parser {return r}

    parser { let! a = p in return! rest a }

let isSpace =
    // list of "space" chars based on 
    // http://hackage.haskell.org/packages/archive/base/latest/doc/html/Data-Char.html#v:isSpace
    let cs = [' '; '\t'; '\n'; '\r'; '\f'; '\v'] |> Set.ofList
    cs.Contains
let space = many (sat isSpace)

let token p = parser { 
    let! r = p
    let! _ = space
    return r
}

let symb = text >> token

let apply p = parse (parser {
    let! _ = space
    let! r = p
    return r
})

let s2cs = List.ofSeq
let cs2s cs = new String(Array.ofList cs)

let runParser p = 
    s2cs >>
    apply p >>
    function
        | [] -> failwith "Error parsing input string."
        | (result,_)::_ -> result
namespace System
Multiple items
union case Parser.Parser: (char list -> ('a * char list) list) -> 'a Parser

--------------------
type 'a Parser = | Parser of (char list -> ('a * char list) list)

Full name: Script.Parser<_>
Multiple items
val char : value:'T -> char (requires member op_Explicit)

Full name: Microsoft.FSharp.Core.Operators.char

--------------------
type char = Char

Full name: Microsoft.FSharp.Core.char
type 'T list = List<'T>

Full name: Microsoft.FSharp.Collections.list<_>
val parse : 'a Parser -> (char list -> ('a * char list) list)

Full name: Script.parse
val p : (char list -> ('a * char list) list)
Multiple items
type ParserBuilder =
  new : unit -> ParserBuilder
  member Bind : p:'c Parser * f:('c -> 'd Parser) -> 'd Parser
  member Return : a:'e -> 'e Parser
  member ReturnFrom : a:'a -> 'a
  member Zero : unit -> 'b Parser

Full name: Script.ParserBuilder

--------------------
new : unit -> ParserBuilder
val x : ParserBuilder
member ParserBuilder.Return : a:'e -> 'e Parser

Full name: Script.ParserBuilder.Return
val a : 'e
val cs : char list
member ParserBuilder.Bind : p:'c Parser * f:('c -> 'd Parser) -> 'd Parser

Full name: Script.ParserBuilder.Bind
val p : 'c Parser
val f : ('c -> 'd Parser)
val c' : 'c
val cs' : char list
member ParserBuilder.Zero : unit -> 'b Parser

Full name: Script.ParserBuilder.Zero
member ParserBuilder.ReturnFrom : a:'a -> 'a

Full name: Script.ParserBuilder.ReturnFrom
val a : 'a
val parser : ParserBuilder

Full name: Script.parser
val item : char Parser

Full name: Script.item
val c : char
val sat : pred:(char -> bool) -> char Parser

Full name: Script.sat
val pred : (char -> bool)
val tChar : c:char -> char Parser

Full name: Script.tChar
val p : 'a Parser
val q : 'a Parser
val x : 'a * char list
val xs : ('a * char list) list
val text : _arg1:char list -> char list Parser

Full name: Script.text


 Given a char list, returns a parser that parsers it
val many : p:'a Parser -> 'a list Parser

Full name: Script.many


 Combines many (0 or more) applications of parser p
val many1 : p:'a Parser -> 'a list Parser

Full name: Script.many1


 Combines at least one (1 or more) applications of parser p
val r : 'a
val rs : 'a list
val sepby : p:'a Parser -> sep:'b Parser -> 'a list Parser

Full name: Script.sepby


 Combines 0 or more applications of parser p separated by parser sep
val sep : 'b Parser
val sepby1 : p:'a Parser -> sep:'b Parser -> 'a list Parser

Full name: Script.sepby1


 Combines 1 or more applications of parser p separated by parser sep
val chainl : p:'a Parser -> op:('a -> 'a -> 'a) Parser -> a:'a -> 'a Parser

Full name: Script.chainl


 Chain 0 or more applications of parser p separated by applications of parser op
val op : ('a -> 'a -> 'a) Parser
val chainl1 : p:'a Parser -> op:('a -> 'a -> 'a) Parser -> 'a Parser

Full name: Script.chainl1


 Chain 1 or more applications of parser p separated by applications of parser op
val rest : ('a -> 'a Parser)
val f : ('a -> 'a -> 'a)
val r' : 'a
val isSpace : (char -> bool)

Full name: Script.isSpace
val cs : Set<char>
Multiple items
module Set

from Microsoft.FSharp.Collections

--------------------
type Set<'T (requires comparison)> =
  interface IComparable
  interface IEnumerable
  interface IEnumerable<'T>
  interface ICollection<'T>
  new : elements:seq<'T> -> Set<'T>
  member Add : value:'T -> Set<'T>
  member Contains : value:'T -> bool
  override Equals : obj -> bool
  member IsProperSubsetOf : otherSet:Set<'T> -> bool
  member IsProperSupersetOf : otherSet:Set<'T> -> bool
  ...

Full name: Microsoft.FSharp.Collections.Set<_>

--------------------
new : elements:seq<'T> -> Set<'T>
val ofList : elements:'T list -> Set<'T> (requires comparison)

Full name: Microsoft.FSharp.Collections.Set.ofList
member Set.Contains : value:'T -> bool
val space : char list Parser

Full name: Script.space
val token : p:'a Parser -> 'a Parser

Full name: Script.token
val symb : (char list -> char list Parser)

Full name: Script.symb
val apply : p:'a Parser -> (char list -> ('a * char list) list)

Full name: Script.apply
val s2cs : (seq<'a> -> 'a list)

Full name: Script.s2cs
Multiple items
module List

from Microsoft.FSharp.Collections

--------------------
type List<'T> =
  | ( [] )
  | ( :: ) of Head: 'T * Tail: 'T list
  interface IEnumerable
  interface IEnumerable<'T>
  member Head : 'T
  member IsEmpty : bool
  member Item : index:int -> 'T with get
  member Length : int
  member Tail : 'T list
  static member Cons : head:'T * tail:'T list -> 'T list
  static member Empty : 'T list

Full name: Microsoft.FSharp.Collections.List<_>
val ofSeq : source:seq<'T> -> 'T list

Full name: Microsoft.FSharp.Collections.List.ofSeq
val cs2s : cs:char list -> String

Full name: Script.cs2s
Multiple items
type String =
  new : value:char -> string + 7 overloads
  member Chars : int -> char
  member Clone : unit -> obj
  member CompareTo : value:obj -> int + 1 overload
  member Contains : value:string -> bool
  member CopyTo : sourceIndex:int * destination:char[] * destinationIndex:int * count:int -> unit
  member EndsWith : value:string -> bool + 2 overloads
  member Equals : obj:obj -> bool + 2 overloads
  member GetEnumerator : unit -> CharEnumerator
  member GetHashCode : unit -> int
  ...

Full name: System.String

--------------------
String(value: nativeptr<char>) : unit
String(value: nativeptr<sbyte>) : unit
String(value: char []) : unit
String(c: char, count: int) : unit
String(value: nativeptr<char>, startIndex: int, length: int) : unit
String(value: nativeptr<sbyte>, startIndex: int, length: int) : unit
String(value: char [], startIndex: int, length: int) : unit
String(value: nativeptr<sbyte>, startIndex: int, length: int, enc: Text.Encoding) : unit
type Array =
  member Clone : unit -> obj
  member CopyTo : array:Array * index:int -> unit + 1 overload
  member GetEnumerator : unit -> IEnumerator
  member GetLength : dimension:int -> int
  member GetLongLength : dimension:int -> int64
  member GetLowerBound : dimension:int -> int
  member GetUpperBound : dimension:int -> int
  member GetValue : [<ParamArray>] indices:int[] -> obj + 7 overloads
  member Initialize : unit -> unit
  member IsFixedSize : bool
  ...

Full name: System.Array
val ofList : list:'T list -> 'T []

Full name: Microsoft.FSharp.Collections.Array.ofList
val runParser : p:'a Parser -> (seq<char> -> 'a)

Full name: Script.runParser
val failwith : message:string -> 'T

Full name: Microsoft.FSharp.Core.Operators.failwith
val result : 'a
Raw view Test code New version

More information

Link:http://fssnip.net/8S
Posted:12 years ago
Author:fzandona
Tags: parser monad; monad