6 Visualising Stage Pace
In this chapter, we’ll start to explore various ways in which we might visualise pace data.
6.1 Load Base Data
To get the stage data from a standing start, we can load in the current season list, select the rally we want, look up the itinerary from the rally, extract the sections and then the stages, and from that access the stage ID for the stage or stages we are interested in.
Load in the helper functions:
source('code/wrc-api.R')
source('code/wrc-wrangling.R')
source('code/wrc-charts.R')
And get the base data:
= get_active_season()
s = get_eventId_from_name(s, 'arctic')
eventId
= get_itinerary(eventId)
itinerary = get_sections(itinerary)
sections = get_stages(sections)
stages = get_stages_lookup(stages)
stages_lookup = get_stage_list(stages)
stage_list = get_stage_codes(stages)
stage_codes
# Driver details
= get_rally_entries(eventId)
entries = get_car_data(entries) cars
Get a sample stage ID:
= stages_lookup[['SS3']] stageId
Let’s also get some pace data:
= get_multi_stage_times(stage_list)
multi_stage_times
= get_multi_stage_pace(multi_stage_times, cars)
multi_stage_pace
= get_multi_stage_generic_wide(multi_stage_pace,
pace_wide 'pace',
stage_codes, # Unique group keys required
# Driver code not guaranteed unique
group_key=c('code_driver'),
spread_key='code')
= pace_wide[2,]$code_driver
example_driver
= rebase(pace_wide, example_driver, stage_codes,
pace_wide_rebased id_col='code_driver')
6.2 Pace Maps
To compare pace, it is useful to look at rebased pace times relative to a particular driver and also indicate the length of stage with which particular pace levels are associated.
We can do this with a chart that presents distance into stage along the horizontal x-axis and relative pace on the y axis, using a line to indicate the pace for each driver relative to a specified driver.
One of the easiest way of plotting charts is to plot from a tidy dataframe, so let’s cast the rebased wide pace dataframe back to a long form and also add in the distance into stage at the start and end of each stage:
library(tidyr)
= c(start=stage_codes[1],
stage_range end=stage_codes[length(stage_codes)])
$cum_dist = cumsum(stages$distance)
stages$start_dist = c(0, stages$cum_dist[-length(stages$cum_dist)])
stages
= pace_wide_rebased %>%
pace_stage gather(code, pace,
'start']:stage_range['end']) %>%
stage_range[merge(stages[,c('code', 'start_dist', 'cum_dist')],
by='code')
%>% head(3) pace_stage
## code code_driver pace start_dist cum_dist
## 1 SS1 BER 1.7004831 0 31.05
## 2 SS1 BRE 0.0000000 0 31.05
## 3 SS1 EVA 0.4219002 0 31.05
We can now construct a chart using line segments to represent the pace for each driver on each stage:
library(ggplot2)
= ggplot(pace_stage, aes(group=code_driver)) +
g0 geom_hline(yintercept = 0,
colour='lightgrey', linetype='dotted') +
geom_segment(aes(x=start_dist, xend=cum_dist,
y=pace, yend=pace),
color = 'lightgrey')
= g0 + geom_text(aes(x=(start_dist+cum_dist)/2,y=pace+0.03,
g label=code_driver,group=code_driver),
position = position_dodge(15), size=1) +
coord_cartesian(ylim=c(-0.5,2)) +
theme_classic()
g
We could highlight positive and negative differences in the label colourings:
+ geom_text(aes(x=(start_dist+cum_dist)/2,
g0 y=ifelse(pace>0,pace+0.03,pace-0.03),
label=code_driver,group=code_driver,
color=pace>0),
position = position_dodge(15), size=1) +
coord_cartesian(ylim=c(-0.5,2)) +
theme_classic() + theme(legend.position="none")
We can also highlight values for a particular driver:
+ geom_segment(data=pace_stage[pace_stage$code_driver=='EVA',],
g aes(x=start_dist, xend=cum_dist,
y=pace, yend=pace, color = pace>0)) +
theme(legend.position="none")
Or abuse the gghiglight
package to modify the aesthetics of unselected items:
+ gghighlight::gghighlight(code_driver=='EVA',
g unhighlighted_params=list(alpha=0.1))
Alternatively, abuse gghighlight()
again with a negative form of selection to highlight items:
+ gghighlight::gghighlight(code_driver!='EVA',label_key=code_driver,
g unhighlighted_params=list(color='blue'))
We could even add a transparency layer bar to highlight the pace difference compared to a particular driver:
+ geom_rect(data=pace_stage[pace_stage$code_driver=='EVA',],
g aes(xmin=start_dist, xmax=cum_dist,
ymin = ifelse(pace>0,0,pace),
ymax = ifelse(pace>0,pace,0),
fill = pace>0, alpha=0.7)) +
theme(legend.position="none")
Could we perhaps also extend that a little to allow us to compare more drivers?
= function(sub_df, m, n){
pace_map_highlight # If we don't grab the actual value
# the referenced value is used...
= m
m_ geom_rect(data=sub_df,
aes(xmin= start_dist + (m_-1) * (cum_dist - start_dist)/n,
xmax= start_dist + m_ * (cum_dist - start_dist)/n,
ymin = ifelse(pace>0,0,pace),
ymax = ifelse(pace>0,pace,0),
fill = pace>0, alpha=0.7))
}
= function(df, g, codes,
pace_map_highlight_many idcol='code_driver'){
= length(codes)
n for (m in 1:n){
= df[df[idcol]==codes[m],]
sub_df = g + pace_map_highlight(sub_df, m, n)
g
}
g }
Let’s try it with two drivers:
pace_map_highlight_many(pace_stage, g, c('EVA', 'ROV' )) +
theme(legend.position="none")
With multiple drivers, it may get difficult to see where the stages are delimited, so we might add separators to delimit them:
+ geom_vline(data = stages, aes(xintercept = cum_dist),
g color='lightgrey', linetype='dotted')
To highlight stages further, we could add a “banner” to the chart:
+ geom_rect(data=pace_stage[pace_stage$code_driver==example_driver,],
g aes(xmin=0, xmax=max(cum_dist),
ymin = 1.8, ymax = 2.0,
alpha=0), fill = 'black') +
geom_text(data=pace_stage[pace_stage$code_driver==example_driver,],
aes(x=(cum_dist + start_dist)/2, label=code),
y=1.9, color='yellow', size=3) +
geom_segment(data=pace_stage[pace_stage$code_driver==example_driver,],
aes(x=cum_dist, xend=cum_dist,
y=1.8, yend=2.0), color='yellow') +
theme(legend.position="none")
6.3 A Pace Map Function
Let’s start to work up a function based on the above sketches that will generate a pace map for us directly from a long format pace dataframe.
= function(pace_long, limits=c(-0.5,2),
pace_map labels=TRUE, drivers=NULL, lines=TRUE,
xstart='start_dist', xend='cum_dist',
pace='pace', typ='bar', pace_label_offset=0.03,
label_dodge=15,
idcol='code_driver'){
# There are downstream dependencies with colnames baked in atm...
$start_dist = pace_long[[xstart]]
pace_long$cum_dist = pace_long[[xend]]
pace_long$pace = pace_long[[pace]]
pace_long
= ggplot(pace_long, aes_string(group=idcol, label=idcol)) +
g0 geom_hline(yintercept = 0,
colour='lightgrey', linetype='dotted') +
geom_segment(aes(x=start_dist, xend=cum_dist,
y=pace, yend=pace),
color = 'lightgrey')
if (lines) {
= data.frame(cum_dist=unique(pace_long$cum_dist))
lines_df =g0 + geom_vline(data=lines_df, aes(xintercept = cum_dist),
g0 color='lightgrey', linetype='dotted')
}
if (labels){
= g0 + geom_text(aes(x= (start_dist+cum_dist)/2,
g0 y=pace+pace_label_offset),
position = position_dodge(label_dodge), size=1)
}if (!is.null(drivers) ){
if (typ=='bar'){
= pace_map_highlight_many(pace_long, g0,
g0 c(drivers), idcol=idcol)
else if (typ=='highlight')
}
{= pace_long[pace_long[idcol] %in% c(drivers),]
focus = g0 + geom_segment(data=focus,
g0 aes(x=start_dist, xend=cum_dist,
y=pace, yend=pace, color = pace>0))
}
}
= g0 + coord_cartesian(ylim=limits)
g0
+ theme_classic() + theme(legend.position="none")
g0 }
Let’s try it:
pace_map(pace_stage, drivers=c('EVA', 'ROV'))
6.4 Off-the-Pace Charts
Another way or reviewing pace is to consider the gap to leader, or rebased gap to a particular driver across the stages, using distance into stage along the x-axis to locate the x-value and gap (measured in seconds) along the y-axis. A moment’s consideration suggests that the gradient (\(\textrm{change_in_gap}/\textrm{change_in_distance}\)) is a measure of pace. The slope of the line thus indicates relative pace between the focal driver and the other drivers.
As with the pace map, if we have the data in a long, tidy form, we can create charts from it quite straightforwardly. So let’s add in the accumulated distance into stage and accumulated stage time for each time:
= multi_stage_pace %>%
off_the_pace merge(stages[,c('stageId', 'cum_dist')],
by='stageId') %>%
arrange(number) %>%
group_by(code_driver) %>%
mutate(totalDurationS = cumsum(elapsedDurationS))
%>% head(3) off_the_pace
## # A tibble: 3 x 8
## # Groups: code_driver [3]
## stageId number code_driver elapsedDurationS pace code cum_dist
## <int> <int> <chr> <dbl> <dbl> <chr> <dbl>
## 1 1747 1 TÄN 957.8 30.84702 SS1 31.05
## 2 1747 1 BRE 961.4 30.96296 SS1 31.05
## 3 1747 1 ROV 968.4 31.18841 SS1 31.05
## # … with 1 more variable: totalDurationS <dbl>
Now we can create a basic off-the pace chart:
ggplot(off_the_pace, aes(x=cum_dist, y=totalDurationS,
color=code_driver)) + geom_line()
As with the pace map, the chart is often most informative if we rebase it relative to a particular driver.
Let’s create a wide dataframe to simplify the rebasing process:
= get_multi_stage_generic_wide(off_the_pace,
off_the_pace_wide 'totalDurationS',
stage_codes, group_key=c('code_driver'),
spread_key='code')
%>% head(3) off_the_pace_wide
## code_driver SS1 SS2 SS3 SS4 SS5 SS6 SS7 SS8 SS9
## 1 BER 1014.2 2034.0 3468.5 4615.1 5965.2 7402.5 8551.3 9915.3 10560.0
## 2 BRE 961.4 1926.9 2768.8 3320.3 4076.9 4928.1 5486.4 6265.5 6879.5
## 3 EVA 974.5 1942.7 2788.5 3335.1 4085.3 4938.9 5491.5 6275.6 6883.1
## SS10
## 1 11203.580
## 2 7482.231
## 3 7491.128
Now we can rebase:
= rebase(off_the_pace_wide,
off_the_pace_wide_rebased
example_driver, stage_codes,id_col='code_driver')
%>% head(3) off_the_pace_wide_rebased
## code_driver SS1 SS2 SS3 SS4 SS5 SS6 SS7 SS8 SS9
## 1 BER 52.8 107.1 699.7 1294.8 1888.3 2474.4 3064.9 3649.8 3680.5
## 2 BRE 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
## 3 EVA 13.1 15.8 19.7 14.8 8.4 10.8 5.1 10.1 3.6
## SS10
## 1 3721.349
## 2 0.000
## 3 8.897
And cast back to the long, tidy form:
= off_the_pace_wide_rebased %>%
off_the_pace_long gather(code, totalDurationGapS,
'start']:stage_range['end']) %>%
stage_range[merge(stages[,c('code', 'cum_dist')],
by='code')
%>% head(3) off_the_pace_long
## code code_driver totalDurationGapS cum_dist
## 1 SS1 BER 52.8 31.05
## 2 SS1 BRE 0.0 31.05
## 3 SS1 EVA 13.1 31.05
And now we can plot the simple rebased off-the-pace chart:
= ggplot(off_the_pace_long, aes(x=cum_dist,
g_otp y=totalDurationGapS,
color=code_driver)) +
geom_line() +
# Retain the points outside the limits
# by using coord_cartesian()
# We can also flip the coordinate axis
coord_cartesian(ylim=c(100, -100)) + theme_classic()
g_otp
We might also want to zero the origin, for example by adding a row for each driver with a zeroed distance and gap.
Let’s create some dummy data to represent that:
= data.frame(code_driver=unique(off_the_pace_long$code_driver))
zero_df $cum_dist = 0
zero_df$totalDurationGapS = 0
zero_df$code = 'SS0' zero_df
We can then bind that data into our long form splits data and view the result:
= bind_rows(off_the_pace_long, zero_df) %>%
g_otp ggplot(aes(x=cum_dist,
y=totalDurationGapS,
color=code_driver)) +
geom_line() +
# Retain the points outside the limits
# by using coord_cartesian()
# We can also flip the coordinate axis
coord_cartesian(ylim=c(100, -100)) + theme_classic()
g_otp
Trivially, we might try to add labels at the end of each line:
= off_the_pace_long %>% filter(cum_dist == max(cum_dist))
off_the_pace_end
+ geom_text(data = off_the_pace_end,
g_otp aes(x = cum_dist+ 10, y = totalDurationGapS,
label = code_driver, color = code_driver)) +
theme(legend.position="none")
However, there are various other packages that provide alternative ways of doing this, including directlabels
and ggrepel
.
For example, using directlabels
:
library(directlabels)
+
g_otp geom_dl(aes(label = code_driver, x=cum_dist+2),
# cex is text label size
method = list('last.bumpup', cex = 0.5)) +
theme(legend.position="none")
And using ggrepel
, which also has the advantage of adding labels for drivers who curves are really of the pace, albeit not in an obviously natural order:
+ ggrepel::geom_text_repel(data = off_the_pace_end,
g_otp aes(label = code_driver),
size = 3) +
theme(legend.position="none")
The gghighlight
package is also useful in highlighting traces, as well as usefully automatically labeling highlighted lines:
+
g_otp ::gghighlight(code_driver %in% c('EVA','ROV'),
gghighlightunhighlighted_params=list(alpha=0.1)) +
theme(legend.position="none")
Again, let’s routinise the process of chart production with the beginnings of a function to generate the off-the-pace chart directly from an appropriate form dataframe:
= function(pace_long, highlight=NULL,
off_the_pace_chart label_typ='dl',
dist='cum_dist', t='totalDurationGapS',
code='code_driver', ylim=NULL){
= ggplot(pace_long, aes_string(x=dist, y=t,
g_otp color=code)) +
geom_line() +
# Retain the points outside the limits
# by using coord_cartesian()
# We can also flip the coordinate axis
coord_cartesian(ylim=ylim) + theme_classic()
= pace_long[pace_long[dist] == max(pace_long[dist]),]
off_the_pace_end if (!is.null(highlight))
= g_otp + gghighlight::gghighlight(code_driver %in% c(highlight),
g_otp unhighlighted_params=list(alpha=0.1))
else if (label_typ=='dl')
= g_otp + geom_dl(aes_string(label = code, x=dist),
g_otp # cex is text label size
method = list('last.bumpup', cex = 0.5))
else
= g_otp + ggrepel::geom_text_repel(data = off_the_pace_end,
g_otp aes_string(label = code),
size = 3)
+ theme(legend.position="none")
g_otp }
Let’s quickly test it, noting how we cast the limits to an inverted y-axis to show the leaders above the x-axis:
off_the_pace_chart(off_the_pace_long, ylim=c(50,-50))
And with highlighting:
off_the_pace_chart(off_the_pace_long, highlight=c('EVA', 'ROV'),
ylim=c(50,-50))
6.5 Comparing Pace Across Stages
One way of characterising stages is based on pace As a quick guide to possible pace variations over the stages of a rally, we might review the average pace. For example, here’s a look at pace over the course of the rally using a box plot to summarise the (non-outlier) pace values for each stage (we should probably use an ordered categorical stageId basis for the x-axis):
ggplot(off_the_pace[off_the_pace$pace<40,],
aes(x=cum_dist, y=pace)) +
geom_boxplot(aes(group=cum_dist))