2 people like it.

Detecting fraud with Benford's law

Many datasets, particularly those which span several orders of magnitude, have a special property. About 30% of the initial digits of all the data items will be the digit '1'. This can be used to detect fraud, for instance in expenses claims, as people tend to concoct figures which don't have this property. These functions implement one possible test for matching Benford's law. (Credits in the comments.)

  1: 
  2: 
  3: 
  4: 
  5: 
  6: 
  7: 
  8: 
  9: 
 10: 
 11: 
 12: 
 13: 
 14: 
 15: 
 16: 
 17: 
 18: 
 19: 
 20: 
 21: 
 22: 
 23: 
 24: 
 25: 
 26: 
 27: 
 28: 
 29: 
 30: 
 31: 
 32: 
 33: 
 34: 
 35: 
 36: 
 37: 
 38: 
 39: 
 40: 
 41: 
 42: 
 43: 
 44: 
 45: 
 46: 
 47: 
 48: 
 49: 
 50: 
 51: 
 52: 
 53: 
 54: 
 55: 
 56: 
 57: 
 58: 
 59: 
 60: 
 61: 
 62: 
 63: 
 64: 
 65: 
 66: 
 67: 
 68: 
 69: 
 70: 
 71: 
 72: 
 73: 
 74: 
 75: 
 76: 
 77: 
 78: 
 79: 
 80: 
 81: 
 82: 
 83: 
 84: 
 85: 
 86: 
 87: 
 88: 
 89: 
 90: 
 91: 
 92: 
 93: 
 94: 
 95: 
 96: 
 97: 
 98: 
 99: 
100: 
101: 
102: 
103: 
104: 
module ChoAndGaines

open System

// Cho WKT, Gaines BJ (2007) Breaking the (Benford) law: Statistical fraud detection in campaign finance. 
// Amer Stat 61, 218–223

/// Observed frequency in a 'bin'.
let e i s =
    let n = s |> Seq.length |> float
    let startsi = s |> Seq.filter (fun x -> let digit = Int32.Parse(x.ToString().[0..0])
                                            digit = i)
                    |> Seq.length |> float
    startsi / n

/// Probability of appearing in a 'bin' according to Benford's law.
let b i =
    Math.Log10(1. + 1./(i |> float))
    
/// Cho and Gaines distance.
let d s =
    [1..9]
    |> Seq.map (fun i -> (b i - (e i s)) ** 2.)
    |> Seq.sum
    |> Math.Sqrt

/// Cho and Gaines distance times square root of data count. (d*)
let d' s =
    let n2 = s |> Seq.length |> float |> sqrt
    n2 * d s

/// Interpret the d* statistic based on critical values established by Morrow.
///
/// From http://www.johnmorrow.info/projects/benford/benfordMain.pdf
let interpret d' =
    let limits = 
        [
            "α 0.1", 1.212 
            "α 0.05", 1.330 
            "α 0.01", 1.569
        ]
    let below, above = 
        limits 
        |> Array.ofSeq 
        |> Array.partition (fun (_, t) -> d' < t)

    let belowStr, aboveStr =

        below 
        |> Array.map (fun (name, _) -> name)
        |> Array.fold (fun acc name -> sprintf "%s %s" acc name) "Below: ",

        above 
        |> Array.map (fun (name, _) -> name)
        |> Array.fold (fun acc name -> sprintf "%s %s" acc name) "Above: "

    belowStr, aboveStr

// Usage examples:

// ("Below: ", "Above:  α 0.1 α 0.05 α 0.01")
let randomTest =
    let r = new Random()
    [1..1000] 
    |> Seq.map (fun _ -> r.Next())
    |> d'
    |> interpret

// ("Below:  α 0.1 α 0.05 α 0.01", "Above: ")
let fileSizeTest =
    System.IO.Directory.EnumerateFiles(@"c:\windows", "*.*", System.IO.SearchOption.TopDirectoryOnly)
    |> Seq.map (fun name -> let f = new System.IO.FileInfo(name)
                            f.Length)
    |> d'
    |> interpret

// ("Below:  α 0.01", "Above:  α 0.1 α 0.05")
let welshLocalAuthorityPopulationsTest = 
    [
        69700
        121900
        115200
        93700
        152500
        134800
        133000
        75900
        122400
        183800
        239000
        139800
        139200
        126300
        346100
        234400
        58800
        178800
        69800
        91100
        91300
        145700
    ] 
    |> d'
    |> interpret
module ChoAndGaines
namespace System
val e : i:int -> s:seq<'a> -> float

Full name: ChoAndGaines.e


 Observed frequency in a 'bin'.
val i : int
val s : seq<'a>
val n : float
module Seq

from Microsoft.FSharp.Collections
val length : source:seq<'T> -> int

Full name: Microsoft.FSharp.Collections.Seq.length
Multiple items
val float : value:'T -> float (requires member op_Explicit)

Full name: Microsoft.FSharp.Core.Operators.float

--------------------
type float = Double

Full name: Microsoft.FSharp.Core.float

--------------------
type float<'Measure> = float

Full name: Microsoft.FSharp.Core.float<_>
val startsi : float
val filter : predicate:('T -> bool) -> source:seq<'T> -> seq<'T>

Full name: Microsoft.FSharp.Collections.Seq.filter
val x : 'a
val digit : int
type Int32 =
  struct
    member CompareTo : value:obj -> int + 1 overload
    member Equals : obj:obj -> bool + 1 overload
    member GetHashCode : unit -> int
    member GetTypeCode : unit -> TypeCode
    member ToString : unit -> string + 3 overloads
    static val MaxValue : int
    static val MinValue : int
    static member Parse : s:string -> int + 3 overloads
    static member TryParse : s:string * result:int -> bool + 1 overload
  end

Full name: System.Int32
Int32.Parse(s: string) : int
Int32.Parse(s: string, provider: IFormatProvider) : int
Int32.Parse(s: string, style: Globalization.NumberStyles) : int
Int32.Parse(s: string, style: Globalization.NumberStyles, provider: IFormatProvider) : int
Object.ToString() : string
val b : i:int -> float

Full name: ChoAndGaines.b


 Probability of appearing in a 'bin' according to Benford's law.
type Math =
  static val PI : float
  static val E : float
  static member Abs : value:sbyte -> sbyte + 6 overloads
  static member Acos : d:float -> float
  static member Asin : d:float -> float
  static member Atan : d:float -> float
  static member Atan2 : y:float * x:float -> float
  static member BigMul : a:int * b:int -> int64
  static member Ceiling : d:decimal -> decimal + 1 overload
  static member Cos : d:float -> float
  ...

Full name: System.Math
Math.Log10(d: float) : float
val d : s:seq<'a> -> float

Full name: ChoAndGaines.d


 Cho and Gaines distance.
val map : mapping:('T -> 'U) -> source:seq<'T> -> seq<'U>

Full name: Microsoft.FSharp.Collections.Seq.map
val sum : source:seq<'T> -> 'T (requires member ( + ) and member get_Zero)

Full name: Microsoft.FSharp.Collections.Seq.sum
Math.Sqrt(d: float) : float
val d' : s:seq<'a> -> float

Full name: ChoAndGaines.d'


 Cho and Gaines distance times square root of data count. (d*)
val n2 : float
val sqrt : value:'T -> 'U (requires member Sqrt)

Full name: Microsoft.FSharp.Core.Operators.sqrt
val interpret : d':float -> string * string

Full name: ChoAndGaines.interpret


 Interpret the d* statistic based on critical values established by Morrow.

 From http://www.johnmorrow.info/projects/benford/benfordMain.pdf
val d' : float
val limits : (string * float) list
val below : (string * float) []
val above : (string * float) []
type Array =
  member Clone : unit -> obj
  member CopyTo : array:Array * index:int -> unit + 1 overload
  member GetEnumerator : unit -> IEnumerator
  member GetLength : dimension:int -> int
  member GetLongLength : dimension:int -> int64
  member GetLowerBound : dimension:int -> int
  member GetUpperBound : dimension:int -> int
  member GetValue : [<ParamArray>] indices:int[] -> obj + 7 overloads
  member Initialize : unit -> unit
  member IsFixedSize : bool
  ...

Full name: System.Array
val ofSeq : source:seq<'T> -> 'T []

Full name: Microsoft.FSharp.Collections.Array.ofSeq
val partition : predicate:('T -> bool) -> array:'T [] -> 'T [] * 'T []

Full name: Microsoft.FSharp.Collections.Array.partition
val t : float
val belowStr : string
val aboveStr : string
val map : mapping:('T -> 'U) -> array:'T [] -> 'U []

Full name: Microsoft.FSharp.Collections.Array.map
val name : string
val fold : folder:('State -> 'T -> 'State) -> state:'State -> array:'T [] -> 'State

Full name: Microsoft.FSharp.Collections.Array.fold
val acc : string
val sprintf : format:Printf.StringFormat<'T> -> 'T

Full name: Microsoft.FSharp.Core.ExtraTopLevelOperators.sprintf
val randomTest : string * string

Full name: ChoAndGaines.randomTest
val r : Random
Multiple items
type Random =
  new : unit -> Random + 1 overload
  member Next : unit -> int + 2 overloads
  member NextBytes : buffer:byte[] -> unit
  member NextDouble : unit -> float

Full name: System.Random

--------------------
Random() : unit
Random(Seed: int) : unit
Random.Next() : int
Random.Next(maxValue: int) : int
Random.Next(minValue: int, maxValue: int) : int
val fileSizeTest : string * string

Full name: ChoAndGaines.fileSizeTest
namespace System.IO
type Directory =
  static member CreateDirectory : path:string -> DirectoryInfo + 1 overload
  static member Delete : path:string -> unit + 1 overload
  static member EnumerateDirectories : path:string -> IEnumerable<string> + 2 overloads
  static member EnumerateFileSystemEntries : path:string -> IEnumerable<string> + 2 overloads
  static member EnumerateFiles : path:string -> IEnumerable<string> + 2 overloads
  static member Exists : path:string -> bool
  static member GetAccessControl : path:string -> DirectorySecurity + 1 overload
  static member GetCreationTime : path:string -> DateTime
  static member GetCreationTimeUtc : path:string -> DateTime
  static member GetCurrentDirectory : unit -> string
  ...

Full name: System.IO.Directory
IO.Directory.EnumerateFiles(path: string) : Collections.Generic.IEnumerable<string>
IO.Directory.EnumerateFiles(path: string, searchPattern: string) : Collections.Generic.IEnumerable<string>
IO.Directory.EnumerateFiles(path: string, searchPattern: string, searchOption: IO.SearchOption) : Collections.Generic.IEnumerable<string>
type SearchOption =
  | TopDirectoryOnly = 0
  | AllDirectories = 1

Full name: System.IO.SearchOption
field IO.SearchOption.TopDirectoryOnly = 0
val f : IO.FileInfo
Multiple items
type FileInfo =
  inherit FileSystemInfo
  new : fileName:string -> FileInfo
  member AppendText : unit -> StreamWriter
  member CopyTo : destFileName:string -> FileInfo + 1 overload
  member Create : unit -> FileStream
  member CreateText : unit -> StreamWriter
  member Decrypt : unit -> unit
  member Delete : unit -> unit
  member Directory : DirectoryInfo
  member DirectoryName : string
  member Encrypt : unit -> unit
  ...

Full name: System.IO.FileInfo

--------------------
IO.FileInfo(fileName: string) : unit
property IO.FileInfo.Length: int64
val welshLocalAuthorityPopulationsTest : string * string

Full name: ChoAndGaines.welshLocalAuthorityPopulationsTest
Raw view Test code New version

More information

Link:http://fssnip.net/fw
Posted:12 years ago
Author:Kit Eason
Tags: financial