2 Accessing Data from the WRC Live Timing API

We can get rally details, timing and results data from the WRC live timing service JSON API.

2.1 Current Season Rallies

To start with, let’s see what rallies are scheduled for the current, active season. The jsonlite::fromJSON() will retrieve a JSON (JavaScript Object Notation) file from a URL and attempt to unpack it into an R dataframe:

library(jsonlite)
library(stringr)
library(dplyr)

season_url = "https://api.wrc.com/contel-page/83388/calendar/active-season/"

get_active_season = function(active_season_url=season_url, all=FALSE) {
  if (all)
    jsonlite::fromJSON(active_season_url)
  else
    jsonlite::fromJSON(active_season_url)$rallyEvents$items
}

s = get_active_season()

# Preview the column names of the resulting dataframe
colnames(s)

##  [1] "id"         "name"       "active"     "jwrc"       "deleted"   
##  [6] "rally"      "status"     "pageInfo"   "pageResult" "eventDays" 
## [11] "winner"

Let’s preview the contents of a couple of those columns:

# The tidyr / magrittr pipe syntax makes things easier to read
s %>% select(c('id', 'name')) %>% head()

##     id                                           name
## 1 1695                         WRC Rallye Monte-Carlo
## 2 1745 WRC Arctic Rally Finland Powered by CapitalBox
## 3 1696                              WRC Croatia Rally
## 4 1698                 WRC Vodafone Rally de Portugal
## 5 1704                      WRC Rally Italia Sardegna
## 6 1709                         WRC Safari Rally Kenya

We can search the name column to find the unique identifier value for a particular event:

eventId = s[s['name']=='WRC Arctic Rally Finland Powered by CapitalBox','id']

eventId

## [1] 1745

Or we can be more generic with a regular expression lookup:

get_eventId_from_name = function(season, name){
  season[str_detect(season$name,
                    regex(name, ignore_case = T)), 'id']
}

get_eventId_from_name(s, 'arctic')

## [1] 1745

2.2 Itinerary Lookup

We can make another call to the WRC API to look up the itinerary for the event. Each leg of the event corresponds to a particular day:

results_api = 'https://api.wrc.com/results-api'

get_itinerary = function(eventId) {
  jsonlite::fromJSON(paste0(results_api,"/rally-event/",
                            eventId, "/itinerary"))$itineraryLegs
}

itinerary = get_itinerary(eventId)

itinerary %>% select(-itinerarySections)

##   itineraryLegId itineraryId startListId                   name    legDate
## 1            320         266         523   Friday 26th February 2021-02-26
## 2            319         266         524 Saturday 27th February 2021-02-27
## 3            318         266         525   Sunday 28th February 2021-02-28
##   order    status
## 1     1 Completed
## 2     2 Completed
## 3     3   Running

The itinerarySections columns dataframes describing details of each leg.

2.2.1 Leg Sections

Within each leg, the itinerary provides information about each section (that is, each “loop”) of the rally. This information is retrieved in form of a dataframe in a standard format. We can use the base R do.call() to call the rbind() function against each row of the dataframe and bind all the dataframes in a specified column into a single dataframe:

get_sections = function(itinerary){
  sections = do.call(rbind, itinerary$itinerarySections)
  sections
}

sections = get_sections(itinerary)

sections %>% select(-c(controls, stages))

##   itinerarySectionId itineraryLegId order      name
## 1                733            320     1 Section 1
## 2                734            320     2 Section 2
## 3                735            319     3 Section 3
## 4                736            319     4 Section 4
## 5                737            318     5 Section 5
## 6                738            318     6 Section 6

In the sections dataframe we have one row per section. Two of the columns, `controls and stages each use dataframes to “nest” subdataframes within each row.

For example, here’s one of the controls dataframes that describes timing controls:

sections$controls[[1]]

##   controlId eventId stageId         type code                        location
## 1      7527     170      NA  TimeControl  TC0 Start ( Santasport, Rovaniemi )
## 2      7564     170    1747  TimeControl  TC1                     Sarriojärvi
## 3      7563     170    1747   StageStart  SS1         Sarriojärvi 1 (Live TV)
## 4      7562     170    1747 FlyingFinish  SF1         Sarriojärvi 1 (Live TV)
## 5      7531     170      NA    RegroupIn TC1A                      Regroup IN
##   timingPrecision distance targetDuration targetDurationMs firstCarDueDateTime
## 1          Minute       NA           <NA>               NA 2021-02-26T11:30:00
## 2          Minute    77.79       01:32:00          5520000 2021-02-26T13:02:00
## 3          Minute    31.05       00:06:00           360000 2021-02-26T13:08:00
## 4           Tenth       NA           <NA>               NA                <NA>
## 5          Minute    31.57       01:20:00          4800000 2021-02-26T14:28:00
##    firstCarDueDateTimeLocal    status controlPenalties       roundingPolicy
## 1 2021-02-26T13:30:00+02:00 Completed              All           NoRounding
## 2 2021-02-26T15:02:00+02:00 Completed              All           NoRounding
## 3 2021-02-26T15:08:00+02:00 Completed             None RoundToClosestMinute
## 4                      <NA> Completed             None           NoRounding
## 5 2021-02-26T16:28:00+02:00 Completed              All           NoRounding
##   locked
## 1   TRUE
## 2   TRUE
## 3   TRUE
## 4   TRUE
## 5   TRUE

And an example of a dataframe from the first row of the stages column:

sections$stages[[1]]

##   stageId eventId number                    name distance    status
## 1    1747     170      1 Sarriojärvi 1 (Live TV)    31.05 Completed
##      stageType timingPrecision locked code
## 1 SpecialStage           Tenth   TRUE  SS1

2.2.2 Time Controls

We can look up information about each time control from data provided as part of the itinerary lookup using the same trick as before to “unroll” the contents of each dataframe in a specified column into a single dataframe.

An alternative to the do.call() approach is to use a tidy approach and use the dplyr::bind_rows() function on the sections$controls column values via a pipe. We can add a reference to the original section ID by naming each row in the controls column with the itinerarySectionId value and then ensuring an identifier column is defined when we bind the dataframes:

get_controls = function(sections){
  # Name each row in the list of dataframes we want to bind
  names(sections$controls) = sections$itinerarySectionId
  
  controls = sections$controls %>%
    # Ensure that we create an identifier column (uses list names)
    bind_rows(.id='itinerarySectionId')
  
  controls
}

controls = get_controls(sections)

controls %>% head(2)

##   itinerarySectionId controlId eventId stageId        type code
## 1                733      7527     170      NA TimeControl  TC0
## 2                733      7564     170    1747 TimeControl  TC1
##                          location timingPrecision distance targetDuration
## 1 Start ( Santasport, Rovaniemi )          Minute       NA           <NA>
## 2                     Sarriojärvi          Minute    77.79       01:32:00
##   targetDurationMs firstCarDueDateTime  firstCarDueDateTimeLocal    status
## 1               NA 2021-02-26T11:30:00 2021-02-26T13:30:00+02:00 Completed
## 2          5520000 2021-02-26T13:02:00 2021-02-26T15:02:00+02:00 Completed
##   controlPenalties roundingPolicy locked
## 1              All     NoRounding   TRUE
## 2              All     NoRounding   TRUE

2.2.3 Stage Details

We can pull stage details from the dataframes contained in the sections dataframe from the itinerary lookup:

get_stages = function(sections){
  # Name each row in the list of dataframes we want to bind
  names(sections$stages) = sections$itinerarySectionId
  
  stages = sections$stages %>%
    # Ensure that we create an identifier column (uses list names)
    bind_rows(.id='itinerarySectionId')
    
  stages
}

stages = get_stages(sections)

stages %>% head()

##   itinerarySectionId stageId eventId number                    name distance
## 1                733    1747     170      1 Sarriojärvi 1 (Live TV)    31.05
## 2                734    1743     170      2 Sarriojärvi 2 (Live TV)    31.05
## 3                735    1750     170      3            Mustalampi 1    24.43
## 4                735    1751     170      4 Kaihuavaara 1 (Live TV)    19.91
## 5                735    1748     170      5             Siikakämä 1    27.68
## 6                736    1745     170      6            Mustalampi 2    24.43
##      status    stageType timingPrecision locked code
## 1 Completed SpecialStage           Tenth   TRUE  SS1
## 2 Completed SpecialStage           Tenth   TRUE  SS2
## 3 Completed SpecialStage           Tenth   TRUE  SS3
## 4 Completed SpecialStage           Tenth   TRUE  SS4
## 5 Completed SpecialStage           Tenth   TRUE  SS5
## 6 Completed SpecialStage           Tenth   TRUE  SS6

We can get a list of stage IDs from the stageId column (stages$stageId):

get_stage_list = function(stages){
  stage_list = stages$stageId
  stage_list
}

get_stage_list(stages)

##  [1] 1747 1743 1750 1751 1748 1745 1744 1742 1746 1749

Perhaps more conveniently, we can create a lookup from code to stage ID:

# https://stackoverflow.com/a/19265431/454773
get_stages_lookup = function(stages,
                             fromCol='code',  toCol='stageId'){
  stages_lookup = stages[[toCol]]
  names(stages_lookup) = stages[[fromCol]]
  stages_lookup
}

stages_lookup = get_stages_lookup(stages)
stages_lookup

##  SS1  SS2  SS3  SS4  SS5  SS6  SS7  SS8  SS9 SS10 
## 1747 1743 1750 1751 1748 1745 1744 1742 1746 1749

# Lookup particular stage ID by stage code
#stages_lookup[['SS2']]

From the stages table, we can get the identifier for a particular stage, either by code (for example, “SS3”) or by (partial) name match:

ssnum = 'SS3'

get_stage_id = function(stages, sname, typ='code'){
  # code, name
  if (typ=='code')
    stageId = stages[stages[typ] == sname, 'stageId']
  else
    stageId = stages[stringr::str_detect(stages[[typ]], sname), 'stageId']
  stageId
}

stageId = get_stage_id(stages, 'Mustalampi 1', 'name')
stageId

## [1] 1750

And the stage distance and name:

get_stage_info = function(stages, sid, typ='stageId', clean=TRUE){
  # stageId, code
  name=stages[stages[typ] == sid, 'name']
  distance=stages[stages[typ] == sid, 'distance']
  if (clean)
    stringr::str_replace(name, ' (Live TV)', '')
  
  c(name=name, distance=distance)
}

get_stage_info(stages, stageId)

##           name       distance 
## "Mustalampi 1"        "24.43"

2.2.4 Road Order Start Lists

The startListId can be used alongside the event ID to look up the startlist for a leg. We can order the startlist by start order:

get_startlist = function(eventId, startListId) {
  startlist_url = paste0(results_api, '/rally-event/',
                         eventId,'/start-list-external/', startListId)
  
  startlist = jsonlite::fromJSON(startlist_url)$startListItems
  
  # Order the startlist dataframe by start order
  startlist %>% arrange(order)
}

# Example startlist ID
# Use a regular expression to find the startlist ID by day
startListId = itinerary[str_detect(itinerary$name,
                                   regex('Friday', ignore_case = T)),
                       'startListId']

startlist = get_startlist(eventId, startListId)

startlist %>% head()

##   startListItemId startListId entryId       startDateTime
## 1           24435         523   21530 2021-02-26T11:30:00
## 2           24409         523   21531 2021-02-26T11:33:00
## 3           24410         523   21532 2021-02-26T11:36:00
## 4           24411         523   21533 2021-02-26T11:39:00
## 5           24412         523   21534 2021-02-26T11:42:00
## 6           24413         523   21535 2021-02-26T11:45:00
##          startDateTimeLocal order
## 1 2021-02-26T13:30:00+02:00     1
## 2 2021-02-26T13:33:00+02:00     2
## 3 2021-02-26T13:36:00+02:00     3
## 4 2021-02-26T13:39:00+02:00     4
## 5 2021-02-26T13:42:00+02:00     5
## 6 2021-02-26T13:45:00+02:00     6

Looking up a startlist ID is a little fiddly:

get_startlist_id = function(itinerary, itinerarySectionId){
  sections = get_sections(itinerary)
  itineraryLegId = sections[sections$itinerarySectionId==itinerarySectionId,
                            'itineraryLegId']
  itinerary[itinerary$itineraryLegId==itineraryLegId,'startListId']
}

get_startlist_id(itinerary, stages$itinerarySectionId[[1]])

## [1] 523

2.3 Competitor Details

Details of car entries for each event can be retrieved from the WRC live timing API given an event ID.

get_rally_entries = function(eventId) {
  cars_url = paste0(results_api, '/rally-event/',
                  eventId,'/cars')
  jsonlite::fromJSON(cars_url)
}

entries = get_rally_entries(eventId)
# $driver, $codriver, $manufacturer, $entrant, $group, $eventClasses
# $identifier, $vehicleModel, $eligibility, $status

entries %>% head(2)

##   driver.personId driver.countryId driver.country.countryId
## 1             670               76                       76
## 2             534              235                      235
##                                    driver.country.name driver.country.iso2
## 1                                               France                  FR
## 2 United Kingdom of Great Britain and Northern Ireland                  GB
##   driver.country.iso3 driver.firstName driver.lastName driver.abbvName
## 1                 FRA        Sébastien           OGIER        S. OGIER
## 2                 GBR            Elfyn           EVANS        E. EVANS
##   driver.fullName driver.code codriver.personId codriver.countryId
## 1 Sébastien OGIER         OGI              3027                 76
## 2     Elfyn EVANS         EVA               553                235
##   codriver.country.countryId
## 1                         76
## 2                        235
##                                  codriver.country.name codriver.country.iso2
## 1                                               France                    FR
## 2 United Kingdom of Great Britain and Northern Ireland                    GB
##   codriver.country.iso3 codriver.firstName codriver.lastName codriver.abbvName
## 1                   FRA             Julien         INGRASSIA      J. INGRASSIA
## 2                   GBR              Scott            MARTIN         S. MARTIN
##   codriver.fullName codriver.code manufacturer.manufacturerId manufacturer.name
## 1  Julien INGRASSIA           ING                          84            Toyota
## 2      Scott MARTIN           MAR                          84            Toyota
##   manufacturer.logoFilename entrant.entrantId            entrant.name
## 1                    toyota                91 TOYOTA GAZOO RACING WRT
## 2                    toyota                91 TOYOTA GAZOO RACING WRT
##   entrant.logoFilename group.groupId group.name  eventClasses tags entryId
## 1                 <NA>            10        WRC 640, 170, RC1 NULL   21530
## 2                 <NA>            10        WRC 640, 170, RC1 NULL   21531
##   eventId driverId codriverId manufacturerId entrantId groupId entryListOrder
## 1     170      670       3027             84        91      10              1
## 2     170      534        553             84        91      10              2
##   identifier vehicleModel eligibility priority   status tyreManufacturer
## 1          1    Yaris WRC           M       P1 Rejoined             None
## 2         33    Yaris WRC           M       P1    Entry             None

2.3.1 Looking Up Entries by Group

We can index the entries by group to find all the WRC car entryId values:

entries[entries$group$name=='WRC', 'entryId']

##  [1] 21530 21531 21532 21533 21534 21535 21536 21537 21538 21539 21540 21541
## [13] 21542

2.3.2 Driver & Codriver Details

Detailed information for each driver and codriver can be found in the corresponding sub-dataframes.

For example, we can look up the details for each driver, noting in this case that we need to column bind (cbind()) the subdataframes to produce the collated dataframe of driver details:

get_drivers = function(entries){
  drivers = do.call(cbind, entries$driver)
  drivers
}

drivers = get_drivers(entries)

drivers %>% head(2)

##   personId countryId country.countryId
## 1      670        76                76
## 2      534       235               235
##                                           country.name country.iso2
## 1                                               France           FR
## 2 United Kingdom of Great Britain and Northern Ireland           GB
##   country.iso3 firstName lastName abbvName        fullName code
## 1          FRA Sébastien    OGIER S. OGIER Sébastien OGIER  OGI
## 2          GBR     Elfyn    EVANS E. EVANS     Elfyn EVANS  EVA

We can similarly obtain data for the codrivers:

#codrivers = do.call(cbind, entries$codriver)
# Again, there is a tidyverse approach with dplyr::bind_cols()
get_codrivers = function(entries){
  codrivers = bind_cols(entries$codriver)
  codrivers
}

codrivers = get_codrivers(entries)

codrivers %>% head(2)

##   personId countryId country.countryId
## 1     3027        76                76
## 2      553       235               235
##                                           country.name country.iso2
## 1                                               France           FR
## 2 United Kingdom of Great Britain and Northern Ireland           GB
##   country.iso3 firstName  lastName     abbvName         fullName code
## 1          FRA    Julien INGRASSIA J. INGRASSIA Julien INGRASSIA  ING
## 2          GBR     Scott    MARTIN    S. MARTIN     Scott MARTIN  MAR

We can conveniently obtain the identifier for a particular driver or codriver by searching against their name or three letter code, although note that the three letter code may not be a unique identifier:

get_person_id = function(persons, sname, typ='fullName'){
  # code, fullName
  if (typ=='code')
    personsId = persons[persons[typ]==sname, 'personId']
  else
    personId = persons[str_detect(persons[[typ]],
                                           regex(sname,
                                                 ignore_case = T)),
                       'personId']
  personId
}

ogierDriverId = get_person_id(drivers, 'ogier')
ogierDriverId

## [1] 670

From the driver person identifier we can get the entry identifier for the rally we’re exploring:

ogierEntryId  = entries[entries['driverId']==ogierDriverId, 'entryId']
ogierEntryId

## [1] 21530

2.3.3 Summarising Essential Entry Data

We can manually create a dataframe containing essential fields from the original cars dataframe and the dataframes contained within it:

get_car_data = function(entries){
  cols = c('entryId', 'driverId', 'codriverId','manufacturerId',
           'vehicleModel','eligibility', 'classname','manufacturer',
           'entrantname', 'groupname', 'drivername', 'code',
           'driverfullname', 'codrivername','codriverfullname'
           )
  entries = entries %>%
                        rowwise() %>% 
                        mutate(classname = eventClasses$name) %>%
                        mutate(manufacturer = manufacturer$name) %>%
                        mutate(entrantname = entrant$name) %>%
                        mutate(groupname = group$name) %>%
                        mutate(drivername = driver$abbvName) %>%
                        mutate(driverfullname = driver$fullName) %>%
                        mutate(codrivername = codriver$abbvName) %>%
                        mutate(codriverfullname = codriver$fullName) %>%
                        mutate(code = driver$code) %>%
                        select(all_of(cols))
  
  # If we don't cast, it's a non-rankable rowwise df
  as.data.frame(entries)
}

get_car_data(entries) %>% head(2)

##   entryId driverId codriverId manufacturerId vehicleModel eligibility classname
## 1   21530      670       3027             84    Yaris WRC           M       RC1
## 2   21531      534        553             84    Yaris WRC           M       RC1
##   manufacturer             entrantname groupname drivername code
## 1       Toyota TOYOTA GAZOO RACING WRT       WRC   S. OGIER  OGI
## 2       Toyota TOYOTA GAZOO RACING WRT       WRC   E. EVANS  EVA
##    driverfullname codrivername codriverfullname
## 1 Sébastien OGIER J. INGRASSIA Julien INGRASSIA
## 2     Elfyn EVANS    S. MARTIN     Scott MARTIN

2.4 Penalties and Retirements

We can look up penalties from an event ID:

get_penalties = function(eventId) {
  penalties_url = paste0(results_api, '/rally-event/',
                       eventId, '/penalties')
  jsonlite::fromJSON(penalties_url)
}

get_penalties(eventId) %>% head(2)

##   penaltyId controlId entryId penaltyDurationMs penaltyDuration     reason
## 1       959      7546   21559             10000           PT10S 1 MIN LATE
## 2       956      7533   21568             10000           PT10S 1 MIN LATE

The event ID is also all we need to request a list of retirements:

get_retirements = function(eventId) {
  retirements_url = paste0(results_api, '/rally-event/',
                       eventId, '/retirements')
  jsonlite::fromJSON(retirements_url)
}

get_retirements(eventId) %>% head(2)

##   retirementId controlId entryId     reason  retirementDateTime
## 1         1802      7562   21547 MECHANICAL 2021-02-26T14:50:00
## 2         1803      7556   21542   OFF ROAD 2021-02-27T08:01:00
##     retirementDateTimeLocal    status
## 1 0001-01-01T00:00:00+00:00 Permanent
## 2 0001-01-01T00:00:00+00:00 Temporary

2.5 Results and Stage Winner

As well as retrieving penalties and retirements using just the event ID as a key, we can also retrieve the overall results and the stage winners:

get_result = function(eventId) {
  result_url = paste0(results_api, '/rally-event/',
                    eventId,'/result')
  
  jsonlite::fromJSON(result_url)
}

get_result(eventId) %>% head(2)

##   entryId stageTimeMs   stageTime penaltyTimeMs penaltyTime totalTimeMs
## 1   21536     7429600 PT2H3M49.6S             0        PT0S     7429600
## 2   21533     7447100  PT2H4M7.1S             0        PT0S     7447100
##     totalTime position diffFirstMs diffFirst diffPrevMs diffPrev
## 1 PT2H3M49.6S        1           0      PT0S          0     PT0S
## 2  PT2H4M7.1S        2       17500   PT17.5S      17500  PT17.5S

And for the stage winners:

get_stage_winners = function(eventId) {
  stage_winners_url = paste0(results_api, '/rally-event/',
                             eventId,'/stage-winners')
  
  jsonlite::fromJSON(stage_winners_url)
}

get_stage_winners(eventId) %>% head(2)

##   stageId entryId               stageName elapsedDurationMs  elapsedDuration
## 1    1747   21536 Sarriojärvi 1 (Live TV)            957800 00:15:57.8000000
## 2    1743   21536 Sarriojärvi 2 (Live TV)            952900 00:15:52.9000000

2.6 Stage Result

At the end of each stage, there are actually two different sorts of results data are available: data relating to the result of the stage itself, and data relating to how the stage result affected the overall rally position.

Let’s start by getting the overall rally result at the end of a particular stage. Note that the overall result does not include the stage ID in the returned data so we need to add it in:

get_overall_result = function(eventId, stageId) {
  overall_url = paste0(results_api, '/rally-event/',
                           eventId, '/stage-result/stage-external/',
                           stageId)
  jsonlite::fromJSON(overall_url) %>%
    # Also add in the stage ID
    mutate(stageId = stageId)
}

overall_result = get_overall_result(eventId, stageId)

overall_result %>% head(2)

##   entryId stageTimeMs  stageTime penaltyTimeMs penaltyTime totalTimeMs
## 1   21536     2745200 PT45M45.2S             0        PT0S     2745200
## 2   21538     2768800  PT46M8.8S             0        PT0S     2768800
##    totalTime position diffFirstMs diffFirst diffPrevMs diffPrev stageId
## 1 PT45M45.2S        1           0      PT0S          0     PT0S    1750
## 2  PT46M8.8S        2       23600   PT23.6S      23600  PT23.6S    1750

2.6.1 Getting Stage Results for Multiple Stages

It will be convenient to be able to retrieve overall results for multiple stages from one function call. One way of achieving that is to create a function to retrieve the details for a single specified stage that can be applied via a purrr::map() function call to a list of the stage IDs we want overall results data for:

library(purrr)

## 
## Attaching package: 'purrr'

## The following object is masked from 'package:jsonlite':
## 
##     flatten

get_overall_result2 = function(stageId, eventId) {
  get_overall_result(eventId, stageId)
}

get_multi_overall = function(stage_list){
  multi_overall = stage_list %>%
    map(get_overall_result2, eventId=eventId) %>% 
    bind_rows()
  multi_overall
}

# Specify the stage IDs for multiple stages
stage_list = c(1747,    1743)

multi_overall_results = get_multi_overall(stage_list)
  
multi_overall_results %>% tail(2)

##     entryId stageTimeMs  stageTime penaltyTimeMs penaltyTime totalTimeMs
## 107   21579     2866500 PT47M46.5S             0        PT0S     2866500
## 108   21573     3115400 PT51M55.4S             0        PT0S     3115400
##      totalTime position diffFirstMs  diffFirst diffPrevMs diffPrev stageId
## 107 PT47M46.5S       53      955800 PT15M55.8S     123200 PT2M3.2S    1743
## 108 PT51M55.4S       54     1204700  PT20M4.7S     248900 PT4M8.9S    1743

2.7 Stage Times

We can get the stage times for each stage on a rally by event and stage ID:

get_stage_times = function(eventId, stageId) {
  stage_times_url = paste0(results_api, '/rally-event/',
                           eventId, '/stage-times/stage-external/',
                           stageId)
  jsonlite::fromJSON(stage_times_url)
}

stage_times = get_stage_times(eventId, stageId)

stage_times %>% head(2)

##   stageTimeId stageId entryId elapsedDurationMs  elapsedDuration    status
## 1       96580    1750   21536            834500 00:13:54.5000000 Completed
## 2       96474    1750   21532            835500 00:13:55.5000000 Completed
##    source position diffFirstMs diffFirst diffPrevMs diffPrev
## 1 Default        1           0  00:00:00          0 00:00:00
## 2 Default        2        1000  00:00:01       1000 00:00:01

2.7.1 Getting Stage Times for Multiple Stages

It will also be convenient to be able to retrieve stage times for multiple stages from a single function call. We can take the same approach we used previously:

get_stage_times2 = function(stageId, eventId) {
                              get_stage_times(eventId, stageId)
                            }

get_multi_stage_times = function(stage_list){
  multi_stage_times = stage_list %>%
                      map(get_stage_times2, eventId=eventId) %>% 
                      bind_rows()
  multi_stage_times
}

multi_stage_times = get_multi_stage_times(stage_list)
  
multi_stage_times %>% tail(2)

##     stageTimeId stageId entryId elapsedDurationMs  elapsedDuration    status
## 109       96321    1743   21573           1950700 00:32:30.7000000 Completed
## 110       96355    1743   21547                NA             <NA>       DNS
##      source position diffFirstMs        diffFirst diffPrevMs         diffPrev
## 109 Default       54      997800 00:16:37.8000000     245500 00:04:05.5000000
## 110 Default       NA          NA             <NA>         NA             <NA>

2.7.2 Getting Wide Stage Times for Multiple Stages

We can then widen the stage times for each driver:

get_multi_stage_times_wide = function(multi_stage_times, stage_list){
  stage_times_cols = c('entryId', 'stageId', 'elapsedDurationMs')
  
  multi_stage_times_wide = multi_stage_times %>% 
                    select(all_of(stage_times_cols)) %>%
                    mutate(elapsedDurationS = elapsedDurationMs / 1000) %>%
                    select(-elapsedDurationMs) %>%
                    group_by(entryId) %>%
                    tidyr::spread(key = stageId,
                                  value = elapsedDurationS) %>%
                    select(c('entryId', as.character(stage_list))) %>%
                    # If we don't cast, it's a
                    # non-rankable rowwise df
                    as.data.frame()
  
  multi_stage_times_wide
}

multi_stage_times_wide = get_multi_stage_times_wide(multi_stage_times,
                                                    stage_list)

multi_stage_times_wide %>% head(2)

##   entryId  1747  1743
## 1   21530 980.5 980.0
## 2   21531 974.5 968.2

2.7.3 Getting Wide Stage Positions

We can also get the stage positions:

get_multi_stage_positions_wide = function(multi_stage_times, stage_list){
  stage_positions_cols = c('entryId', 'stageId', 'position')

  multi_stage_positions_wide = multi_stage_times %>% 
                    select(all_of(stage_positions_cols)) %>%
                    group_by(entryId) %>%
                    tidyr::spread(key = stageId,
                                  value = position) %>%
                    select(c('entryId', as.character(stage_list))) %>%
                    # If we don't cast, it's a
                    # non-rankable rowwise df
                    as.data.frame()
}

multi_stage_positions_wide = get_multi_stage_positions_wide(multi_stage_times, stage_list)

multi_stage_positions_wide %>% head(2)

##   entryId 1747 1743
## 1   21530    9   10
## 2   21531    5    5

2.7.4 Getting Generic Wide Dataframes

We can start to work up a function that is able to handle widening data frames more generally, albeit with a potential need to handle exceptions:

get_multi_stage_generic_wide = function(multi_stage_generic, stage_list,
                                        wide_val, group_key='entryId',
                                        spread_key='stageId'){
  
  stage_times_cols = c(group_key, spread_key, wide_val )
  
  if (wide_val=='elapsedDurationMs') {
    multi_stage_times_wide = multi_stage_times %>% 
      select(all_of(stage_times_cols)) %>%
      mutate(elapsedDurationS = elapsedDurationMs / 1000) %>%
      select(-elapsedDurationMs)
    
    wide_val = 'elapsedDurationS'
  }
  
  multi_stage_generic_wide = multi_stage_generic %>% 
    select(all_of(stage_times_cols)) %>%
    # group_by_at lets us pass in the grouping column by variable
    group_by_at(group_key) %>%
    tidyr::spread(key = spread_key,
                  value = wide_val) %>%
    select( c(group_key, as.character(stage_list))) %>%
    # If we don't cast, it's a
    # non-rankable rowwise df
    as.data.frame()
  
  multi_stage_generic_wide
}

multi_stage_positions_wide_g = get_multi_stage_generic_wide(multi_stage_times, stage_list, 'position')

## Note: Using an external vector in selections is ambiguous.
## ℹ Use `all_of(group_key)` instead of `group_key` to silence this message.
## ℹ See <https://tidyselect.r-lib.org/reference/faq-external-vector.html>.
## This message is displayed once per session.

multi_stage_positions_wide_g %>% head(2)

##   entryId 1747 1743
## 1   21530    9   10
## 2   21531    5    5

2.8 Split Times

We can get split times and distance into stage data for each stage given the stage identifier:

get_splits = function(eventId, stageId){
  splits_url=paste0(results_api, '/rally-event/', eventId,
                    '/split-times/stage-external/', stageId)

    jsonlite::fromJSON(splits_url)
}

splits = get_splits(eventId, stageId)
# $splitPoints
# $entrySplitPointTimes

This includes handy information about split locations, such as distance into stage. This can also be useful for pace calculations:

splits$splitPoints

##   splitPointId stageId number distance
## 1         3593    1750      5    23.21
## 2         3601    1750      2     9.02
## 3         3615    1750      1     4.83
## 4         3617    1750      4    20.63
## 5         3621    1750      3    14.87

We can also view the split point times for each driver. This second dataframe contains rows summarising the stage for each driver, and includes the stage start time and duration as well as a column splitPointTimes that itself contains a data frame of elapsed duration split point times:

splits$entrySplitPointTimes %>% select(-splitPointTimes) %>% head(2)

##   entryId       startDateTime        startDateTimeLocal stageTimeDurationMs
## 1   21540 2021-02-27T07:08:00 2021-02-27T09:08:00+02:00              844300
## 2   21541 2021-02-27T07:11:00 2021-02-27T09:11:00+02:00              864400
##   stageTimeDuration
## 1  00:14:04.3000000
## 2  00:14:24.4000000

To view the split times for a specific driver, we can index into the dataframe using the driver entryId value:

splits$entrySplitPointTimes[splits$entrySplitPointTimes['entryId']==ogierEntryId,]$splitPointTimes

## [[1]]
##   splitPointTimeId splitPointId entryId elapsedDurationMs elapsedDuration
## 1           123492         3615   21530            161700       PT2M41.7S
## 2           123495         3601   21530            272300       PT4M32.3S
## 3           123499         3621   21530            471300       PT7M51.3S
## 4           123505         3617   21530            690400      PT11M30.4S
## 5           123507         3593   21530            789200       PT13M9.2S
##           splitDateTime          splitDateTimeLocal
## 1 2021-02-27T07:22:41.7 2021-02-27T09:22:41.7+02:00
## 2 2021-02-27T07:24:32.3 2021-02-27T09:24:32.3+02:00
## 3 2021-02-27T07:27:51.3 2021-02-27T09:27:51.3+02:00
## 4 2021-02-27T07:31:30.4 2021-02-27T09:31:30.4+02:00
## 5 2021-02-27T07:33:09.2 2021-02-27T09:33:09.2+02:00

Each dataframe gives the split times on the stage for a particular driver in a long format.

Note that the split point times are strictly increasing and describe the elapsed time into the stage at each split point from the start location and time.

2.8.1 Driver Split Times Detail

We can get an unrolled long structure by combining the splitPointTimes dataframes from all drivers, also taking the opportunity to convert the elapsed duration in milliseconds to seconds along the way:

#driver_splits = do.call(rbind, entry_splits$splitPointTimes)
# The tidyverse approach is to use dplyr::bind_rows()
# We can also construct a pipe to streamline the processing
get_driver_splits = function(splits){
  driver_splits = splits$entrySplitPointTimes$splitPointTimes %>%
                    bind_rows() %>%
                    mutate(elapsedDurationS = elapsedDurationMs / 1000) %>%
                    select(-elapsedDurationMs)
  driver_splits
}

driver_splits = get_driver_splits(splits)

driver_splits %>% head(2)

##   splitPointTimeId splitPointId entryId elapsedDuration         splitDateTime
## 1           123482         3615   21540       PT2M41.4S 2021-02-27T07:10:41.4
## 2           123483         3601   21540       PT4M32.5S 2021-02-27T07:12:32.5
##            splitDateTimeLocal elapsedDurationS
## 1 2021-02-27T09:10:41.4+02:00            161.4
## 2 2021-02-27T09:12:32.5+02:00            272.5

2.8.2 Wide Driver Split Times

We can cast the data into a wide format, with splits ordered by their distance into the stage. Start by creating a function to help get the split point codes in order by distance along the stage:

get_split_cols = function(splits){
  split_cols =  as.character(arrange(splits$splitPoints, distance)$splitPointId)
  split_cols
}

Now create a function to get the driver splits in a wide format using the distance-into-stage ordered split point codes as the widened columns:

get_driver_splits_wide = function(driver_splits, splits){
    split_cols =  get_split_cols(splits)
    splits_cols = c('entryId', 'splitPointId', 'elapsedDurationS')
    
    driver_splits_wide = driver_splits %>% 
                            group_by(entryId) %>%
                            select(all_of(splits_cols)) %>%
                            tidyr::spread(key = splitPointId,
                                          value = elapsedDurationS) %>%
                            select(all_of(c('entryId', split_cols))) %>%
                            # If we don't cast, it's a
                            # non-rankable rowwise df
                            as.data.frame()
    driver_splits_wide
}

driver_splits_wide =  get_driver_splits_wide(driver_splits, splits)

driver_splits_wide %>% head(2)

##   entryId  3615  3601  3621  3617  3593
## 1   21530 161.7 272.3 471.3 690.4 789.2
## 2   21531 162.3 273.7 472.3 692.1 792.4

2.8.3 Multiple Stage Long Splits Data

A convenient way of working with the split times across multiple stages is to put the splits into a long form and then filter out the rows we are interested in.

We can generate a long form dataframe using the dlplyr::bind_rows() that we have met before:

get_split_times2 = function(stageId, eventId) {
                              splits = get_splits(eventId, stageId)
                              split_times = splits$entrySplitPointTimes
                              names(split_times$splitPointTimes) = splits$splitPoints$splitPointId
                              split_times$splitPointTimes
                            }

get_multi_split_times = function(stage_list){
  multi_split_times = stage_list %>%
                      map(get_split_times2, eventId=eventId) %>% 
                      bind_rows()
  multi_split_times
}

stage_list_sample = stage_list[1:2]

get_multi_split_times(stage_list[1:2]) %>% head(3)

##   splitPointTimeId splitPointId entryId elapsedDurationMs elapsedDuration
## 1           122826         3596   21530            135200       PT2M15.2S
## 2           122828         3608   21530            330100       PT5M30.1S
## 3           122829         3603   21530            473700       PT7M53.7S
##           splitDateTime          splitDateTimeLocal
## 1 2021-02-26T13:10:15.2 2021-02-26T15:10:15.2+02:00
## 2 2021-02-26T13:13:30.1 2021-02-26T15:13:30.1+02:00
## 3 2021-02-26T13:15:53.7 2021-02-26T15:15:53.7+02:00