3 people like it.

JSON Parser Monad

Simple JSON parser implemented using the Parser Monad. Code discussion here: http://blogs.msdn.com/b/fzandona/archive/2011/11/02/parsing-json-the-fun-way-json-parser-monad-part-3.aspx

  1: 
  2: 
  3: 
  4: 
  5: 
  6: 
  7: 
  8: 
  9: 
 10: 
 11: 
 12: 
 13: 
 14: 
 15: 
 16: 
 17: 
 18: 
 19: 
 20: 
 21: 
 22: 
 23: 
 24: 
 25: 
 26: 
 27: 
 28: 
 29: 
 30: 
 31: 
 32: 
 33: 
 34: 
 35: 
 36: 
 37: 
 38: 
 39: 
 40: 
 41: 
 42: 
 43: 
 44: 
 45: 
 46: 
 47: 
 48: 
 49: 
 50: 
 51: 
 52: 
 53: 
 54: 
 55: 
 56: 
 57: 
 58: 
 59: 
 60: 
 61: 
 62: 
 63: 
 64: 
 65: 
 66: 
 67: 
 68: 
 69: 
 70: 
 71: 
 72: 
 73: 
 74: 
 75: 
 76: 
 77: 
 78: 
 79: 
 80: 
 81: 
 82: 
 83: 
 84: 
 85: 
 86: 
 87: 
 88: 
 89: 
 90: 
 91: 
 92: 
 93: 
 94: 
 95: 
 96: 
 97: 
 98: 
 99: 
100: 
101: 
102: 
103: 
104: 
105: 
106: 
107: 
108: 
109: 
110: 
111: 
112: 
113: 
114: 
115: 
116: 
117: 
118: 
119: 
120: 
121: 
122: 
123: 
124: 
125: 
126: 
127: 
128: 
129: 
130: 
131: 
132: 
133: 
134: 
135: 
136: 
137: 
138: 
139: 
140: 
141: 
142: 
143: 
144: 
145: 
open System

(Parser Monad and combinators omitted. Code available here: http://fssnip.net/8S)

// New parser combinators/helpers
let isHexDigit = 
    let ds = ['A'..'F'] @ ['a'..'f'] @ ['0'..'9'] |> Set.ofList
    ds.Contains
let hexDigit = sat isHexDigit

let charToken = tChar >> token

let betweenChars c1 c2 f = parser {
    let! _ = charToken c1
    let! r = f()
    let! _ = charToken c2
    return r
}

let zeroOrOne p = parser { let! ret = p in return ret } <|> parser { return [] }

let (<@>) p q = parser {
    let! rp = p
    let! rq= q
    return (rp @ rq)
}


//
// JSON Paser Monad
//
#nowarn "40"

type JSValue = 
    | JSString of string
    | JSNumber of float
    | JSObject of (JSValue * JSValue) list
    | JSArray of JSValue list
    | JSBool of bool
    | JSNull

let jsNull =
    let nullLiteral = s2cs "null"
    parser { let! _ = symb nullLiteral in return JSNull}

let jsBool = 
    let trueLit = "true" |> s2cs
    let falseLit = "false" |> s2cs
    parser { let! _ = symb trueLit in return JSBool(true) } 
    <|> parser { let! _ = symb falseLit in return JSBool(false) }

let jsNumber = 
    let digitsParser = many1 (sat System.Char.IsDigit)
    let intParser = (zeroOrOne (text ['-'])) <@> digitsParser
    let fracParser = text ['.'] <@> digitsParser
    let expParser = 
        (text ['e'] <|> text ['E']) <@> 
        (zeroOrOne (text ['+'] <|> text ['-'])) <@>
        digitsParser
    parser {
        let! result = intParser <@> zeroOrOne fracParser <@> zeroOrOne expParser 
        let! _ = space
        return (result |> cs2s |> System.Double.Parse |> JSNumber)
    }     

let jsString = 
    let isChar c = (c <> '\"') && (c <> '\\')
    let isEscChar = 
        let cs = "\"\\/bfnrt" |> List.ofSeq |> Set.ofList
        cs.Contains
    let replaceEscChar = function 'b' -> '\b' | 'f' -> '\f' | 'n' -> '\n'
                                | 'r' -> '\r' | 't' -> '\t' | other -> other
    let escChars = parser {
        let! _ = tChar '\\'
        let! c = sat isEscChar
        return (replaceEscChar c)
    }
    let uniChars = parser {
        let! _ = text [ '\\'; 'u' ]
        let! d1 = hexDigit
        let! d2 = hexDigit
        let! d3 = hexDigit
        let! d4 = hexDigit
        let r = 
            let s = new String [|d1; d2; d3; d4|]
            Byte.Parse(s, Globalization.NumberStyles.HexNumber)
            |> char
        return r
    }
    let chars = many ((sat isChar) <|> escChars <|> uniChars)
    parser {
        let! cs = betweenChars '\"' '\"' (fun () -> chars)
        return (cs |> cs2s |> JSString)
    }

let rec jsValue = jsString <|> jsNumber <|> jsArray <|> jsBool <|> jsNull <|> jsObject
and jsElements = sepby jsValue (charToken ',')
and jsArray = parser {
    let! values = betweenChars '[' ']' (fun () -> jsElements)
    return (JSArray values)
    }
and jsMembers = sepby jsPair (charToken ',')
and jsPair = parser {
    let! key = jsString
    let! _ = charToken ':'
    let! value = jsValue
    return (key, value)
    }
and jsObject = parser {
    let! members = betweenChars '{' '}' (fun () -> jsMembers)
    return (JSObject members)
    }

let parseJson : (string -> JSValue) = runParser jsObject


////////////////////////////////////////////
// Sample JSON from http://json.org/example
let widgetJson = "{\"widget\": {
    \"debug\": \"on\",
    \"window\": {
        \"title\": \"Sample Konfabulator Widget\",        
        \"name\": \"main_window\",        
        \"width\": 500,        
        \"height\": 500
    },    \"image\": { 
        \"src\": \"Images/Sun.png\",
        \"name\": \"sun1\",        
        \"hOffset\": 250,        
        \"vOffset\": 250,        
        \"alignment\": \"center\"
    },    \"text\": {
        \"data\": \"Click Here\",
        \"size\": 36,
        \"style\": \"bold\",        
        \"name\": \"text1\",        
        \"hOffset\": 250,        
        \"vOffset\": 100,        
        \"alignment\": \"center\",
        \"onMouseUp\": \"sun1.opacity = (sun1.opacity / 100) * 90;\"
    }
}}"

// Testing
let jsonAst = parseJson widgetJson
namespace System
type 'a Parser = Parser of (char list -> ('a * char list) list)
let parse (Parser p) = p

type ParserBuilder () =
    member x.Return a = Parser (fun cs -> [a, cs])
    member x.Bind (p, f) = Parser (fun cs ->
        match parse p cs with
        | (c', cs')::_ -> parse (f c') cs'
        | [] -> []
    )
    member x.Zero () = Parser (fun _ -> [])
    member x.ReturnFrom a = a

let parser = ParserBuilder()

let item = Parser (function [] -> [] | c::cs -> [c, cs])
let sat pred = parser {
    let! c = item
    if pred c then return c
}
let tChar c = sat ((=) c)

/// Concatenates the results of applying parser p and parser q
let (<+>) p q = Parser (fun cs -> (parse p cs) @ (parse q cs))
/// Applies parser p or parser q and returns at most one result
let (<|>) p q = Parser (fun cs ->
    match (parse (p <+> q) cs) with
    | [] -> []
    | x::xs -> [x]
)

/// Given a char list, returns a parser that parsers it
let rec text = function
    | [] -> parser { return [] }
    | c::cs -> parser {
        let! _ = tChar c
        let! _ = text cs
        return c::cs
    }

/// Combines many (0 or more) applications of parser p
let rec many p = (many1 p) <|> parser { return [] }
/// Combines at least one (1 or more) applications of parser p
and many1 p =
    parser {
        let! r = p
        let! rs = many p
        return r::rs
    }

/// Combines 0 or more applications of parser p separated by parser sep
let rec sepby p sep = (sepby1 p sep) <|> parser { return [] }
/// Combines 1 or more applications of parser p separated by parser sep
and sepby1 p sep =
    parser {
        let! r = p
        let! rs = many (parser {
            let! _ = sep
            return! p
        })
        return r::rs
    }

/// Chain 0 or more applications of parser p separated by applications of parser op
let rec chainl p op a = (chainl1 p op) <|> parser { return a }
/// Chain 1 or more applications of parser p separated by applications of parser op
and chainl1 p op =
    let rec rest r =
        parser {
            let! f = op
            let! r' = p
            return! rest (f r r')
        } <|> parser {return r}

    parser { let! a = p in return! rest a }

let isSpace =
    // list of "space" chars based on
    // http://hackage.haskell.org/packages/archive/base/latest/doc/html/Data-Char.html#v:isSpace
    let cs = [' '; '\t'; '\n'; '\r'; '\f'; '\v'] |> Set.ofList
    cs.Contains
let space = many (sat isSpace)

let token p = parser {
    let! r = p
    let! _ = space
    return r
}

let symb = text >> token

let apply p = parse (parser {
    let! _ = space
    let! r = p
    return r
})

let s2cs = List.ofSeq
let cs2s cs = new String(Array.ofList cs)

let runParser p =
    s2cs >>
    apply p >>
    function
        | [] -> failwith "Error parsing string"
        | (result,_)::_ -> result
val isHexDigit : (char -> bool)

Full name: Script.isHexDigit
val ds : Set<char>
Multiple items
module Set

from Microsoft.FSharp.Collections

--------------------
type Set<'T (requires comparison)> =
  interface IComparable
  interface IEnumerable
  interface IEnumerable<'T>
  interface ICollection<'T>
  new : elements:seq<'T> -> Set<'T>
  member Add : value:'T -> Set<'T>
  member Contains : value:'T -> bool
  override Equals : obj -> bool
  member IsProperSubsetOf : otherSet:Set<'T> -> bool
  member IsProperSupersetOf : otherSet:Set<'T> -> bool
  ...

Full name: Microsoft.FSharp.Collections.Set<_>

--------------------
new : elements:seq<'T> -> Set<'T>
val ofList : elements:'T list -> Set<'T> (requires comparison)

Full name: Microsoft.FSharp.Collections.Set.ofList
member Set.Contains : value:'T -> bool
val hexDigit : char Parser

Full name: Script.hexDigit
val sat : pred:(char -> bool) -> char Parser

Full name: Script.sat
val charToken : (char -> char Parser)

Full name: Script.charToken
val tChar : c:char -> char Parser

Full name: Script.tChar
val token : p:'a Parser -> 'a Parser

Full name: Script.token
val betweenChars : c1:char -> c2:char -> f:(unit -> 'a Parser) -> 'a Parser

Full name: Script.betweenChars
val c1 : char
val c2 : char
val f : (unit -> 'a Parser)
val parser : ParserBuilder

Full name: Script.parser
val r : 'a
val zeroOrOne : p:'a list Parser -> 'a list Parser

Full name: Script.zeroOrOne
val p : 'a list Parser
val ret : 'a list
val q : 'a list Parser
val rp : 'a list
val rq : 'a list
type JSValue =
  | JSString of string
  | JSNumber of float
  | JSObject of (JSValue * JSValue) list
  | JSArray of JSValue list
  | JSBool of bool
  | JSNull

Full name: Script.JSValue
union case JSValue.JSString: string -> JSValue
Multiple items
val string : value:'T -> string

Full name: Microsoft.FSharp.Core.Operators.string

--------------------
type string = String

Full name: Microsoft.FSharp.Core.string
union case JSValue.JSNumber: float -> JSValue
Multiple items
val float : value:'T -> float (requires member op_Explicit)

Full name: Microsoft.FSharp.Core.Operators.float

--------------------
type float = Double

Full name: Microsoft.FSharp.Core.float

--------------------
type float<'Measure> = float

Full name: Microsoft.FSharp.Core.float<_>
union case JSValue.JSObject: (JSValue * JSValue) list -> JSValue
type 'T list = List<'T>

Full name: Microsoft.FSharp.Collections.list<_>
union case JSValue.JSArray: JSValue list -> JSValue
union case JSValue.JSBool: bool -> JSValue
type bool = Boolean

Full name: Microsoft.FSharp.Core.bool
union case JSValue.JSNull: JSValue
val jsNull : JSValue Parser

Full name: Script.jsNull
val nullLiteral : char list
val s2cs : (seq<'a> -> 'a list)

Full name: Script.s2cs
val symb : (char list -> char list Parser)

Full name: Script.symb
val jsBool : JSValue Parser

Full name: Script.jsBool
val trueLit : char list
val falseLit : char list
val jsNumber : JSValue Parser

Full name: Script.jsNumber
val digitsParser : char list Parser
val many1 : p:'a Parser -> 'a list Parser

Full name: Script.many1


 Combines at least one (1 or more) applications of parser p
type Char =
  struct
    member CompareTo : value:obj -> int + 1 overload
    member Equals : obj:obj -> bool + 1 overload
    member GetHashCode : unit -> int
    member GetTypeCode : unit -> TypeCode
    member ToString : unit -> string + 1 overload
    static val MaxValue : char
    static val MinValue : char
    static member ConvertFromUtf32 : utf32:int -> string
    static member ConvertToUtf32 : highSurrogate:char * lowSurrogate:char -> int + 1 overload
    static member GetNumericValue : c:char -> float + 1 overload
    ...
  end

Full name: System.Char
Char.IsDigit(c: char) : bool
Char.IsDigit(s: string, index: int) : bool
val intParser : char list Parser
val text : _arg1:char list -> char list Parser

Full name: Script.text


 Given a char list, returns a parser that parsers it
val fracParser : char list Parser
val expParser : char list Parser
val result : char list
val space : char list Parser

Full name: Script.space
val cs2s : cs:char list -> String

Full name: Script.cs2s
type Double =
  struct
    member CompareTo : value:obj -> int + 1 overload
    member Equals : obj:obj -> bool + 1 overload
    member GetHashCode : unit -> int
    member GetTypeCode : unit -> TypeCode
    member ToString : unit -> string + 3 overloads
    static val MinValue : float
    static val MaxValue : float
    static val Epsilon : float
    static val NegativeInfinity : float
    static val PositiveInfinity : float
    ...
  end

Full name: System.Double
Double.Parse(s: string) : float
Double.Parse(s: string, provider: IFormatProvider) : float
Double.Parse(s: string, style: Globalization.NumberStyles) : float
Double.Parse(s: string, style: Globalization.NumberStyles, provider: IFormatProvider) : float
val jsString : JSValue Parser

Full name: Script.jsString
val isChar : (char -> bool)
val c : char
val isEscChar : (char -> bool)
val cs : Set<char>
Multiple items
module List

from Microsoft.FSharp.Collections

--------------------
type List<'T> =
  | ( [] )
  | ( :: ) of Head: 'T * Tail: 'T list
  interface IEnumerable
  interface IEnumerable<'T>
  member Head : 'T
  member IsEmpty : bool
  member Item : index:int -> 'T with get
  member Length : int
  member Tail : 'T list
  static member Cons : head:'T * tail:'T list -> 'T list
  static member Empty : 'T list

Full name: Microsoft.FSharp.Collections.List<_>
val ofSeq : source:seq<'T> -> 'T list

Full name: Microsoft.FSharp.Collections.List.ofSeq
val replaceEscChar : (char -> char)
val other : char
val escChars : char Parser
val uniChars : char Parser
val d1 : char
val d2 : char
val d3 : char
val d4 : char
val r : char
val s : String
Multiple items
type String =
  new : value:char -> string + 7 overloads
  member Chars : int -> char
  member Clone : unit -> obj
  member CompareTo : value:obj -> int + 1 overload
  member Contains : value:string -> bool
  member CopyTo : sourceIndex:int * destination:char[] * destinationIndex:int * count:int -> unit
  member EndsWith : value:string -> bool + 2 overloads
  member Equals : obj:obj -> bool + 2 overloads
  member GetEnumerator : unit -> CharEnumerator
  member GetHashCode : unit -> int
  ...

Full name: System.String

--------------------
String(value: nativeptr<char>) : unit
String(value: nativeptr<sbyte>) : unit
String(value: char []) : unit
String(c: char, count: int) : unit
String(value: nativeptr<char>, startIndex: int, length: int) : unit
String(value: nativeptr<sbyte>, startIndex: int, length: int) : unit
String(value: char [], startIndex: int, length: int) : unit
String(value: nativeptr<sbyte>, startIndex: int, length: int, enc: Text.Encoding) : unit
type Byte =
  struct
    member CompareTo : value:obj -> int + 1 overload
    member Equals : obj:obj -> bool + 1 overload
    member GetHashCode : unit -> int
    member GetTypeCode : unit -> TypeCode
    member ToString : unit -> string + 3 overloads
    static val MaxValue : byte
    static val MinValue : byte
    static member Parse : s:string -> byte + 3 overloads
    static member TryParse : s:string * result:byte -> bool + 1 overload
  end

Full name: System.Byte
Byte.Parse(s: string) : byte
Byte.Parse(s: string, provider: IFormatProvider) : byte
Byte.Parse(s: string, style: Globalization.NumberStyles) : byte
Byte.Parse(s: string, style: Globalization.NumberStyles, provider: IFormatProvider) : byte
namespace System.Globalization
type NumberStyles =
  | None = 0
  | AllowLeadingWhite = 1
  | AllowTrailingWhite = 2
  | AllowLeadingSign = 4
  | AllowTrailingSign = 8
  | AllowParentheses = 16
  | AllowDecimalPoint = 32
  | AllowThousands = 64
  | AllowExponent = 128
  | AllowCurrencySymbol = 256
  ...

Full name: System.Globalization.NumberStyles
field Globalization.NumberStyles.HexNumber = 515
Multiple items
val char : value:'T -> char (requires member op_Explicit)

Full name: Microsoft.FSharp.Core.Operators.char

--------------------
type char = Char

Full name: Microsoft.FSharp.Core.char
val chars : char list Parser
val many : p:'a Parser -> 'a list Parser

Full name: Script.many


 Combines many (0 or more) applications of parser p
val cs : char list
val jsValue : JSValue Parser

Full name: Script.jsValue
val jsElements : JSValue list Parser

Full name: Script.jsElements
val sepby : p:'a Parser -> sep:'b Parser -> 'a list Parser

Full name: Script.sepby


 Combines 0 or more applications of parser p separated by parser sep
val jsArray : JSValue Parser

Full name: Script.jsArray
val values : JSValue list
val jsMembers : (JSValue * JSValue) list Parser

Full name: Script.jsMembers
val jsPair : (JSValue * JSValue) Parser

Full name: Script.jsPair
val key : JSValue
val value : JSValue
val jsObject : JSValue Parser

Full name: Script.jsObject
val members : (JSValue * JSValue) list
val parseJson : (string -> JSValue)

Full name: Script.parseJson
val runParser : p:'a Parser -> (seq<char> -> 'a)

Full name: Script.runParser
val widgetJson : string

Full name: Script.widgetJson
val jsonAst : JSValue

Full name: Script.jsonAst
Raw view Test code New version

More information

Link:http://fssnip.net/8T
Posted:13 years ago
Author:fzandona
Tags: json; parser monad