0 people like it.

Top k values from an array of length N using Knuth's tournament method

This method is described in Knuth's Art of Programming, Volume 3, Page 212. See https://stackoverflow.com/questions/4956593/optimal-algorithm-for-returning-top-k-values-from-an-array-of-length-n This isn't the fastest possible implementation, but it is pure functional code (no mutation).

  1: 
  2: 
  3: 
  4: 
  5: 
  6: 
  7: 
  8: 
  9: 
 10: 
 11: 
 12: 
 13: 
 14: 
 15: 
 16: 
 17: 
 18: 
 19: 
 20: 
 21: 
 22: 
 23: 
 24: 
 25: 
 26: 
 27: 
 28: 
 29: 
 30: 
 31: 
 32: 
 33: 
 34: 
 35: 
 36: 
 37: 
 38: 
 39: 
 40: 
 41: 
 42: 
 43: 
 44: 
 45: 
 46: 
 47: 
 48: 
 49: 
 50: 
 51: 
 52: 
 53: 
 54: 
 55: 
 56: 
 57: 
 58: 
 59: 
 60: 
 61: 
 62: 
 63: 
 64: 
 65: 
 66: 
 67: 
 68: 
 69: 
 70: 
 71: 
 72: 
 73: 
 74: 
 75: 
 76: 
 77: 
 78: 
 79: 
 80: 
 81: 
 82: 
 83: 
 84: 
 85: 
 86: 
 87: 
 88: 
 89: 
 90: 
 91: 
 92: 
 93: 
 94: 
 95: 
 96: 
 97: 
 98: 
 99: 
100: 
101: 
102: 
103: 
104: 
105: 
106: 
107: 
108: 
109: 
110: 
111: 
112: 
113: 
114: 
115: 
116: 
117: 
118: 
119: 
120: 
121: 
122: 
123: 
124: 
125: 
126: 
127: 
128: 
129: 
130: 
131: 
132: 
133: 
134: 
135: 
136: 
137: 
138: 
139: 
140: 
141: 
142: 
143: 
144: 
145: 
146: 
147: 
148: 
149: 
150: 
151: 
152: 
153: 
154: 
155: 
156: 
157: 
158: 
159: 
160: 
161: 
162: 
163: 
164: 
165: 
166: 
open System

/// Path to a node.
type Path =
    | Left
    | Right

/// An inner node.
type Inner<'a> =
    {
        /// Value of this node.
        Value : 'a

        /// Left child node.
        Left : Node<'a>

        /// Right child node.
        Right : Node<'a>

        /// Path to this node.
        Path : Path
    }

/// Nodes in a tree.
and Node<'a> =
    | Leaf of 'a
    | Inner of Inner<'a>

module Node =

    /// Extracts the value from a node.
    let value =
        function
            | Leaf v -> v
            | Inner inner -> inner.Value

    /// Creates an inner node. This is where the actual comparison happens.
    let createInner comparer left right =
        let leftVal = value left
        let rightVal = value right
        let value, path =
            if comparer leftVal rightVal < 0 then
                leftVal, Left
            else
                rightVal, Right
        Inner {
            Value = value
            Left = left
            Right = right
            Path = path
        }

/// A tree of nodes.
type Tree<'a> =
    {
        Root : Node<'a>
        Comparer : 'a -> 'a -> int
    }

module Tree =

    /// Creates a tree containing the given values.
    let create comparer values =

            // recursively builds a layer in the tree
        let rec loop (nodes : _[]) =
            let parents =
                let isOdd = nodes.Length % 2 = 1
                let n = nodes.Length / 2 + if isOdd then 1 else 0
                Array.init n (fun i ->
                    if i < nodes.Length / 2 then
                        Node.createInner
                            comparer
                            nodes.[2 * i]
                            nodes.[2 * i + 1]
                    else
                        assert(isOdd)
                        assert(i = n - 1)
                        nodes.[2 * i])
            if parents.Length = 1 then parents.[0]
            else loop parents

        {
            Root =
                values
                    |> Seq.map Leaf
                    |> Seq.toArray
                    |> loop
            Comparer = comparer
        }

    /// Adds a value to a tree, replacing the current root value.
    let rec add value tree =
        let rec loop node =
            match node with
                | Leaf _ -> Leaf value
                | Inner inner ->
                    match inner.Path with
                        | Left ->
                            let left = loop inner.Left
                            Node.createInner tree.Comparer left inner.Right
                        | Right ->
                            let right = loop inner.Right
                            Node.createInner tree.Comparer inner.Left right
        { tree with Root = loop tree.Root }

module Select =

    /// Answers the top k values.
    let top comparer k (values : _[]) =

        assert(k >= 2)
        let n = values.Length
        assert(n >= k)

            // a tree's root value
        let value tree =
            tree.Root
                |> Node.value
                |> Option.get

            // compare options instead of raw values
        let optComparer aOpt bOpt =
            match (aOpt, bOpt) with
                | Some a, Some b -> comparer a b
                | None, Some _ -> 1
                | Some _, None -> -1
                | _ -> failwith "Unexpected"

            // initial tree
        let tree =
            values.[0 .. n - k + 1]
                |> Array.map Some
                |> Tree.create optComparer

        let result =
            [|
                    // obtain results iteratively
                let trees =
                    (tree, values.[n - k + 2 ..])
                        ||> Array.scan (fun tr value ->
                            tr |> Tree.add (Some value))
                yield! trees |> Seq.map value

                    // add a value that always loses to get the last result
                yield
                    trees
                        |> Array.last
                        |> Tree.add None
                        |> value
            |]
        assert(result.Length = k)
        assert(
            let sorted = values |> Array.sortWith comparer |> Array.take k
            Array.compareWith comparer result sorted = 0)
        result

[<EntryPoint>]
let main argv =
    let rng = Random(0)
    let n = 1000000
    let values = Array.init n (fun _ -> rng.Next())
    let k = 20
    let selected = values |> Select.top compare k
    printfn "%A" selected
    0
namespace System
type Path =
  | Left
  | Right

Full name: Script.Path


 Path to a node.
union case Path.Left: Path
union case Path.Right: Path
Multiple items
union case Node.Inner: Inner<'a> -> Node<'a>

--------------------
type Inner<'a> =
  {Value: 'a;
   Left: Node<'a>;
   Right: Node<'a>;
   Path: Path;}

Full name: Script.Inner<_>


 An inner node.
Inner.Value: 'a


 Value of this node.
Inner.Left: Node<'a>


 Left child node.
type Node<'a> =
  | Leaf of 'a
  | Inner of Inner<'a>

Full name: Script.Node<_>


 Nodes in a tree.
Inner.Right: Node<'a>


 Right child node.
Multiple items
Inner.Path: Path


 Path to this node.


--------------------
type Path =
  | Left
  | Right

Full name: Script.Path


 Path to a node.
union case Node.Leaf: 'a -> Node<'a>
val value : _arg1:Node<'a> -> 'a

Full name: Script.Node.value


 Extracts the value from a node.
val v : 'a
val inner : Inner<'a>
val createInner : comparer:('a -> 'a -> int) -> left:Node<'a> -> right:Node<'a> -> Node<'a>

Full name: Script.Node.createInner


 Creates an inner node. This is where the actual comparison happens.
val comparer : ('a -> 'a -> int)
val left : Node<'a>
val right : Node<'a>
val leftVal : 'a
val rightVal : 'a
val value : 'a
val path : Path
type Tree<'a> =
  {Root: Node<'a>;
   Comparer: 'a -> 'a -> int;}

Full name: Script.Tree<_>


 A tree of nodes.
Tree.Root: Node<'a>
Multiple items
module Node

from Script

--------------------
type Node<'a> =
  | Leaf of 'a
  | Inner of Inner<'a>

Full name: Script.Node<_>


 Nodes in a tree.
Tree.Comparer: 'a -> 'a -> int
Multiple items
val int : value:'T -> int (requires member op_Explicit)

Full name: Microsoft.FSharp.Core.Operators.int

--------------------
type int = int32

Full name: Microsoft.FSharp.Core.int

--------------------
type int<'Measure> = int

Full name: Microsoft.FSharp.Core.int<_>
val create : comparer:('a -> 'a -> int) -> values:seq<'a> -> Tree<'a>

Full name: Script.Tree.create


 Creates a tree containing the given values.
val values : seq<'a>
val loop : (Node<'a> [] -> Node<'a>)
val nodes : Node<'a> []
val parents : Node<'a> []
val isOdd : bool
property Array.Length: int
val n : int
type Array =
  member Clone : unit -> obj
  member CopyTo : array:Array * index:int -> unit + 1 overload
  member GetEnumerator : unit -> IEnumerator
  member GetLength : dimension:int -> int
  member GetLongLength : dimension:int -> int64
  member GetLowerBound : dimension:int -> int
  member GetUpperBound : dimension:int -> int
  member GetValue : [<ParamArray>] indices:int[] -> obj + 7 overloads
  member Initialize : unit -> unit
  member IsFixedSize : bool
  ...

Full name: System.Array
val init : count:int -> initializer:(int -> 'T) -> 'T []

Full name: Microsoft.FSharp.Collections.Array.init
val i : int
module Seq

from Microsoft.FSharp.Collections
val map : mapping:('T -> 'U) -> source:seq<'T> -> seq<'U>

Full name: Microsoft.FSharp.Collections.Seq.map
val toArray : source:seq<'T> -> 'T []

Full name: Microsoft.FSharp.Collections.Seq.toArray
val add : value:'a -> tree:Tree<'a> -> Tree<'a>

Full name: Script.Tree.add


 Adds a value to a tree, replacing the current root value.
val tree : Tree<'a>
val loop : (Node<'a> -> Node<'a>)
val node : Node<'a>
Inner.Path: Path


 Path to this node.
val top : comparer:('a -> 'a -> int) -> k:int -> values:'a [] -> 'a []

Full name: Script.Select.top


 Answers the top k values.
val k : int
val values : 'a []
val value : (Tree<'b option> -> 'b)
val tree : Tree<'b option>
Tree.Root: Node<'b option>
module Option

from Microsoft.FSharp.Core
val get : option:'T option -> 'T

Full name: Microsoft.FSharp.Core.Option.get
val optComparer : ('a option -> 'a option -> int)
val aOpt : 'a option
val bOpt : 'a option
union case Option.Some: Value: 'T -> Option<'T>
val a : 'a
val b : 'a
union case Option.None: Option<'T>
val failwith : message:string -> 'T

Full name: Microsoft.FSharp.Core.Operators.failwith
val tree : Tree<'a option>
val map : mapping:('T -> 'U) -> array:'T [] -> 'U []

Full name: Microsoft.FSharp.Collections.Array.map
Multiple items
module Tree

from Script

--------------------
type Tree<'a> =
  {Root: Node<'a>;
   Comparer: 'a -> 'a -> int;}

Full name: Script.Tree<_>


 A tree of nodes.
val result : 'a []
val trees : Tree<'a option> []
val scan : folder:('State -> 'T -> 'State) -> state:'State -> array:'T [] -> 'State []

Full name: Microsoft.FSharp.Collections.Array.scan
val tr : Tree<'a option>
val last : array:'T [] -> 'T

Full name: Microsoft.FSharp.Collections.Array.last
val sorted : 'a []
val sortWith : comparer:('T -> 'T -> int) -> array:'T [] -> 'T []

Full name: Microsoft.FSharp.Collections.Array.sortWith
val take : count:int -> array:'T [] -> 'T []

Full name: Microsoft.FSharp.Collections.Array.take
val compareWith : comparer:('T -> 'T -> int) -> array1:'T [] -> array2:'T [] -> int

Full name: Microsoft.FSharp.Collections.Array.compareWith
Multiple items
type EntryPointAttribute =
  inherit Attribute
  new : unit -> EntryPointAttribute

Full name: Microsoft.FSharp.Core.EntryPointAttribute

--------------------
new : unit -> EntryPointAttribute
val main : argv:string [] -> int

Full name: Script.main
val argv : string []
val rng : Random
Multiple items
type Random =
  new : unit -> Random + 1 overload
  member Next : unit -> int + 2 overloads
  member NextBytes : buffer:byte[] -> unit
  member NextDouble : unit -> float

Full name: System.Random

--------------------
Random() : unit
Random(Seed: int) : unit
val values : int []
Random.Next() : int
Random.Next(maxValue: int) : int
Random.Next(minValue: int, maxValue: int) : int
val selected : int []
module Select

from Script
val compare : e1:'T -> e2:'T -> int (requires comparison)

Full name: Microsoft.FSharp.Core.Operators.compare
val printfn : format:Printf.TextWriterFormat<'T> -> 'T

Full name: Microsoft.FSharp.Core.ExtraTopLevelOperators.printfn
Raw view New version

More information

Link:http://fssnip.net/7Vf
Posted:10 months ago
Author:Brian Berns
Tags: #knuth , #tournament , #selection