// Load type provider for CSV files #r "Samples.Csv.dll" open Samples.Csv // ------------------------------------------------------------------ // TUTORIAL: Parsing and exploring the Titanic CSV data set // ------------------------------------------------------------------ // Download data from the web, use CSV provider to infer colum names let [] DataUrl = "https://gist.github.com/tpetricek/263fb1bee9366170b2ef/raw/90d012bac3713e8618d3ae2f83f2f6535b6bebd9/titanic.csv" type Titanic = CsvFile // Load & explore the data from the web URL let data = new Titanic() let first = data.Data |> Seq.head first.Name first.Age // Print names of surviving children // (Note - the value of age may be missing, or silly) for row in data.Data do if row.Survived = 1 && row.Age <> "" && (float row.Age) < 18.0 then printfn "%s (%s)" row.Name row.Age // TASK #1: Skip suspicious floating point values // (You can use Contains member method to test for "." // or you can look for values less than 1) // TASK #2: Print names of surviving males // who have name longer than 40 characters // ------------------------------------------------------------------ // TUTORIAL: Introdcing higher-order, first-class functions & collections // ------------------------------------------------------------------ // Helper functions that extract information from a row let survived (row:Titanic.Row) = row.Survived = 1 let name (row:Titanic.Row) = row.Name let hasAge (row:Titanic.Row) = (row.Age <> "") && (not (row.Age.Contains("."))) let age (row:Titanic.Row) = float row.Age // Call them on the first line name first hasAge first age first // Seq.* functions can be used to implement LINQ-like queries // For example, get a sequence of names: Seq.map name data.Data // Get count of passangers & average age on Titanic Seq.length data.Data Seq.average (Seq.map age (Seq.filter hasAge data.Data)) // Nicer notation using the pipelining operator data.Data |> Seq.filter hasAge |> Seq.map age |> Seq.average // Or we can use lambda functions, which makes things easier data.Data |> Seq.filter (fun r -> r.Age <> "" && not (r.Age.Contains("."))) |> Seq.averageBy (fun r -> float r.Age) // TASK #3: Find out whether the average age of those who survived // is greater/smaller than the average age of those who died // ------------------------------------------------------------------ // TUTORIAL: More things to try on your own! // ------------------------------------------------------------------ // Calculate the percentage of survivors by different embarkation point data.Data |> Seq.groupBy (fun row -> row.Embarked) |> Seq.map (fun (embarked, data) -> let survivors = data |> Seq.filter (fun r -> r.Survived = 1) |> Seq.length let total = data |> Seq.length embarked, float survivors / float total * 100.0) // TASK #4: Calculate average age by different embarkation point // (Use Seq.groupBy as above and then use Seq.averageBy on the // group 'data' as above to get average age)