1 people like it.
Like the snippet!
Course 3: Exploring Titanic dataset
F# introduction course - Getting data about Titanic passengers using CSV type provider and analyzing them using standard sequence-processing functions known from LINQ. To be used in Try F#.
1:
2:
3:
4:
5:
6:
7:
8:
9:
10:
11:
12:
13:
14:
15:
16:
17:
18:
19:
20:
21:
22:
23:
24:
25:
26:
27:
28:
29:
30:
31:
32:
33:
34:
35:
36:
37:
38:
39:
40:
41:
42:
43:
44:
45:
46:
47:
48:
49:
50:
51:
52:
53:
54:
55:
56:
57:
58:
59:
60:
61:
62:
63:
64:
65:
66:
67:
68:
69:
70:
71:
72:
73:
74:
75:
76:
77:
78:
79:
80:
81:
82:
83:
84:
85:
86:
87:
88:
89:
90:
91:
92:
93:
94:
95:
|
// Load type provider for CSV files
#r "Samples.Csv.dll"
open Samples.Csv
// ------------------------------------------------------------------
// TUTORIAL: Parsing and exploring the Titanic CSV data set
// ------------------------------------------------------------------
// Download data from the web, use CSV provider to infer colum names
let [<Literal>] DataUrl =
"https://gist.github.com/tpetricek/263fb1bee9366170b2ef/raw/90d012bac3713e8618d3ae2f83f2f6535b6bebd9/titanic.csv"
type Titanic = CsvFile<DataUrl, Schema="int,int,int,string,string,string,string,string,string,string,string,string">
// Load & explore the data from the web URL
let data = new Titanic()
let first = data.Data |> Seq.head
first.Name
first.Age
// Print names of surviving children
// (Note - the value of age may be missing, or silly)
for row in data.Data do
if row.Survived = 1 && row.Age <> "" && (float row.Age) < 18.0 then
printfn "%s (%s)" row.Name row.Age
// TASK #1: Skip suspicious floating point values
// (You can use Contains member method to test for "."
// or you can look for values less than 1)
// TASK #2: Print names of surviving males
// who have name longer than 40 characters
// ------------------------------------------------------------------
// TUTORIAL: Introdcing higher-order, first-class functions & collections
// ------------------------------------------------------------------
// Helper functions that extract information from a row
let survived (row:Titanic.Row) =
row.Survived = 1
let name (row:Titanic.Row) =
row.Name
let hasAge (row:Titanic.Row) =
(row.Age <> "") && (not (row.Age.Contains(".")))
let age (row:Titanic.Row) =
float row.Age
// Call them on the first line
name first
hasAge first
age first
// Seq.* functions can be used to implement LINQ-like queries
// For example, get a sequence of names:
Seq.map name data.Data
// Get count of passangers & average age on Titanic
Seq.length data.Data
Seq.average (Seq.map age (Seq.filter hasAge data.Data))
// Nicer notation using the pipelining operator
data.Data
|> Seq.filter hasAge
|> Seq.map age
|> Seq.average
// Or we can use lambda functions, which makes things easier
data.Data
|> Seq.filter (fun r -> r.Age <> "" && not (r.Age.Contains(".")))
|> Seq.averageBy (fun r -> float r.Age)
// TASK #3: Find out whether the average age of those who survived
// is greater/smaller than the average age of those who died
// ------------------------------------------------------------------
// TUTORIAL: More things to try on your own!
// ------------------------------------------------------------------
// Calculate the percentage of survivors by different embarkation point
data.Data
|> Seq.groupBy (fun row -> row.Embarked)
|> Seq.map (fun (embarked, data) ->
let survivors =
data |> Seq.filter (fun r -> r.Survived = 1)
|> Seq.length
let total = data |> Seq.length
embarked, float survivors / float total * 100.0)
// TASK #4: Calculate average age by different embarkation point
// (Use Seq.groupBy as above and then use Seq.averageBy on the
// group 'data' as above to get average age)
|
Multiple items
type LiteralAttribute =
inherit Attribute
new : unit -> LiteralAttribute
Full name: Microsoft.FSharp.Core.LiteralAttribute
--------------------
new : unit -> LiteralAttribute
val DataUrl : string
Full name: Script.DataUrl
type Titanic = obj
Full name: Script.Titanic
val data : Titanic
Full name: Script.data
val first : obj
Full name: Script.first
namespace Microsoft.FSharp.Data
module Seq
from Microsoft.FSharp.Collections
val head : source:seq<'T> -> 'T
Full name: Microsoft.FSharp.Collections.Seq.head
val row : obj
Multiple items
val float : value:'T -> float (requires member op_Explicit)
Full name: Microsoft.FSharp.Core.Operators.float
--------------------
type float = System.Double
Full name: Microsoft.FSharp.Core.float
--------------------
type float<'Measure> = float
Full name: Microsoft.FSharp.Core.float<_>
val printfn : format:Printf.TextWriterFormat<'T> -> 'T
Full name: Microsoft.FSharp.Core.ExtraTopLevelOperators.printfn
val survived : row:'a -> bool
Full name: Script.survived
val row : 'a
val name : row:'a -> 'b
Full name: Script.name
val hasAge : row:'a -> bool
Full name: Script.hasAge
val not : value:bool -> bool
Full name: Microsoft.FSharp.Core.Operators.not
val age : row:'a -> float
Full name: Script.age
val map : mapping:('T -> 'U) -> source:seq<'T> -> seq<'U>
Full name: Microsoft.FSharp.Collections.Seq.map
val length : source:seq<'T> -> int
Full name: Microsoft.FSharp.Collections.Seq.length
val average : source:seq<'T> -> 'T (requires member ( + ) and member DivideByInt and member get_Zero)
Full name: Microsoft.FSharp.Collections.Seq.average
val filter : predicate:('T -> bool) -> source:seq<'T> -> seq<'T>
Full name: Microsoft.FSharp.Collections.Seq.filter
val r : obj
val averageBy : projection:('T -> 'U) -> source:seq<'T> -> 'U (requires member ( + ) and member DivideByInt and member get_Zero)
Full name: Microsoft.FSharp.Collections.Seq.averageBy
val groupBy : projection:('T -> 'Key) -> source:seq<'T> -> seq<'Key * seq<'T>> (requires equality)
Full name: Microsoft.FSharp.Collections.Seq.groupBy
val embarked : obj
val data : seq<obj>
val survivors : int
val total : int
More information