Visualising Withdrawals

Visualising Withdrawals#

The withdrawals data provides lists competitors who have withdrawn on a particular stage. Perhaps the simplest visualisation is a simple interactive that lets us select a stage and then display the comperitors who withdraw on that stage. An extra all option allows us to display all the withdrawals.

from dakar_rallydj.getter import DakarAPIClient

dakar = DakarAPIClient(
    use_cache=True,
    backend='memory',
    # cache_name='dakar_cache',
    expire_after=3600  # Cache for 1 hour
)

# Optionally specify category (A. F, K, M)
withdrawals_df, withdrawn_competitors_df, withdrawn_teams_df = dakar.get_withdrawals()

One of the things we note from the withdrawals data is that we don’t know what classes the withdrawals relate to:

withdrawals_df.head()

	stage	bib	reason	_category
0	2	223	1	A
1	3	225	1	A
2	3	306	1	A
3	3	313	1	A
4	3	355	1	A

So let’s add in some additional metadata so we can filter on classes:

from dakar_rallydj.enrichers import derive_clazz_metadata

groups_df = dakar.get_groups()
clazz_df = dakar.get_clazz()

clazz_metadata_df = derive_clazz_metadata(withdrawn_teams_df, clazz_df, groups_df)

clazz_metadata_df.head()

	team.bib	reference	categoryClazz	clazz_label	tinyLabel	label	color	group_label
0	202	2025-A-T1-+	2025-A-T1	T1+: Prototype Cross-Country Cars 4x4	ULT	T1	#EBBC4E	Ultimate
1	205	2025-A-T1-+	2025-A-T1	T1+: Prototype Cross-Country Cars 4x4	ULT	T1	#EBBC4E	Ultimate
2	206	2025-A-T1-+	2025-A-T1	T1+: Prototype Cross-Country Cars 4x4	ULT	T1	#EBBC4E	Ultimate
3	208	2025-A-T1-+	2025-A-T1	T1+: Prototype Cross-Country Cars 4x4	ULT	T1	#EBBC4E	Ultimate
4	213	2025-A-T1-2	2025-A-T1	T1.2 Prototype Cross-Country Cars 4x2	ULT	T1	#EBBC4E	Ultimate

clazz_metadata_df["tinyLabel"].unique()

array(['ULT', 'CHG', 'SSV', 'TRK'], dtype=object)

We can now merge the withdrawal table with the metadata table:

import pandas as pd

withdrawals_df = pd.merge(withdrawals_df, clazz_metadata_df.rename(columns={"team.bib":"bib"}), on="bib")

withdrawals_df.head()

	stage	bib	reason	_category	reference	categoryClazz	clazz_label	tinyLabel	label	color	group_label
0	2	223	1	A	2025-A-T1-2	2025-A-T1	T1.2 Prototype Cross-Country Cars 4x2	ULT	T1	#EBBC4E	Ultimate
1	3	225	1	A	2025-A-T1-+	2025-A-T1	T1+: Prototype Cross-Country Cars 4x4	ULT	T1	#EBBC4E	Ultimate
2	3	306	1	A	2025-A-T3-1	2025-A-T3	T3.1: Lightweight Prototype Cross-Country	CHG	T3	#E04E39	Challenger
3	3	313	1	A	2025-A-T3-1	2025-A-T3	T3.1: Lightweight Prototype Cross-Country	CHG	T3	#E04E39	Challenger
4	3	355	1	A	2025-A-T3-1	2025-A-T3	T3.1: Lightweight Prototype Cross-Country	CHG	T3	#E04E39	Challenger

We can do some counting to analyse withdrawals by category:

import seaborn as sns
import matplotlib.pyplot as plt

plt.figure(figsize=(8, 6))
sns.countplot(data=withdrawals_df, x="categoryClazz",
              hue="color")

# Add labels and title
plt.xlabel("Clazz")
plt.ylabel("Count")
plt.title("Count of Items by Clazz, Colored by Color")

# Show plot
plt.legend(title="Color");

_images/cdd3f6c8b09b167ca7a01738ee72eee46ed2e601d2bad9de04394fc9e7012771.png

But that doesn’t play nicely with the colour mapping. To get better control over the colours, we can use a Grammar of Graphics approach:

from plotnine import ggplot, aes, geom_bar, theme_minimal, labs, theme, scale_fill_manual

color_map = dict(
    zip(withdrawals_df['tinyLabel'].unique(), withdrawals_df['color'].unique()))


g = (
    ggplot(withdrawals_df, aes(x="tinyLabel", fill="tinyLabel"))
    + geom_bar()
    + theme_minimal()
    + labs(
        title="Withdrawals by Category",
        x="Class",
        y="Count",
        fill="Color"
    )
    + scale_fill_manual(values=color_map)
    + theme(legend_position='none')
)

g

_images/6fa127058b47c704327379026b14a397e60412244e941b4120243539346866a9.png

Recall that these results are just for the specified categories, and by default we have only loaded in data for the auto (A) category.

How about withdrawals by stage?

from plotnine import scale_x_discrete, scale_y_continuous

(
    ggplot(withdrawals_df, aes(x='stage'))
    # Dodge groups bars
    # We also want the bars to have a preserved width
    + geom_bar()
    + theme_minimal()
    + scale_x_discrete(
        # Treat 'stage' as a categorical variable
        # while keeping the order of integers
        # Ensure 'stage' is treated in order
        limits=sorted(range(1, withdrawals_df['stage'].max()+1))
    )
    + scale_y_continuous(
        # Force integer ticks for the y-axis
        breaks=lambda l: range(0, int(l[-1])+1, 1)
    )
    + theme_minimal()
    + labs(
        title="Withdrawals by Stage",
        x="Stage",
        y="",
        fill="Class"
    )
)

_images/a99ac381323cca26d3f3f3018bd79901cd6e6c60545e63700ed31852651daa8b.png

Or withdrawals by stage and class?

(
    ggplot(withdrawals_df, aes(x='stage', fill='tinyLabel', group='tinyLabel'))
    # Stack group bars
    + geom_bar(position='stack')
    + theme_minimal()
    + scale_x_discrete(
        # Treat 'stage' as a categorical variable
        # while keeping the order of integers
        # Ensure 'stage' is treated in order
        limits=sorted(range(1, withdrawals_df['stage'].max()+1))
    )
    # The color is given in the data
    + scale_fill_manual(values=color_map)
    + scale_y_continuous(
        # Force integer ticks for the y-axis
        breaks=lambda l: range(0, int(l[-1])+1, 1)
    )
    + theme_minimal()
    + labs(
        title="Withdrawals by Stage and Class",
        x="Stage",
        y="",
        fill="Class"
    )
)

_images/d5c08e522001d4a50d10b6a0a452c30ffc1bc496446d0da080ac1a6ab50895c8.png

I find that sort of chart quite confusing. I find it much more readable if we dodge the bars rather than stack them:

from plotnine import position_dodge2

(
    ggplot(withdrawals_df, aes(x='stage', fill='tinyLabel', group='tinyLabel'))
    # Dodge groups bars
    # We also want the bars to have a preserved width
    + geom_bar(position=position_dodge2(width=0.9, preserve="single"))
    + theme_minimal()
    + scale_x_discrete(
        # Treat 'stage' as a categorical variable
        # while keeping the order of integers
        # Ensure 'stage' is treated in order
        limits=sorted(range(1, withdrawals_df['stage'].max()+1))
    )
    # The color is given in the data
    + scale_fill_manual(values=color_map)
    + scale_y_continuous(
        # Force integer ticks for the y-axis
        breaks=lambda l: range(0, int(l[-1])+1, 1)
    )
    + theme_minimal()
    + labs(
        title="Withdrawals by Stage and Class",
        x="Stage",
        y="",
        fill="Class"
    )
)

_images/d15d5f4b79fad2a0733126f55de0e5bd05d784b4b191a98787c697d7b9a117a9.png