Example of the selection and compound chart
By the end of this post, you will be able to do create a chart like above with your dataset. I used similar plots in evaluating crowd density estimation models post.
In this notebook, we are going to plot the stock prices to demonstrate the following in Altair.
How to:
- create compound charts
- create selections i.e cross filtering capabilities
The data was downloaded from Yahoo Finance using R.
import pandas as pd
import altair as alt
Load Data¶
We will load the data and print some of its rows.
df = pd.read_csv("data.csv", parse_dates=['date'])
df.head(5)
Print the column info and associated datatype
df.info()
How many ticker/symbols?
df['symbol'].unique()
What is the data range?
print(f"Min Date: {df.date.min()}, Max Date: {df.date.max()}")
# We are only going to use 2020 data
df = df.loc[df.date > "2020-01-01", :]
Print statistics of the numerical columns
# print stats
df.describe()
def find_data(df, symbol):
result = df.copy().loc[df['symbol'] == symbol, :]
return result.loc[:, ['symbol', 'date', 'adjusted', 'pct_change', 'volume']]
Actually, we are only going to use APPL in this notebook.
aapl_df = find_data(df, 'AAPL')
aapl_df.head()
aapl_bar = alt.Chart(aapl_df).mark_bar().encode(
alt.X('date:T', title=""),
alt.Y('volume', title="Volume")
).properties(
width=700,
title="Apple: Daily Volume 2020"
)
aapl_bar
Similarly, we can plot the price (adjusted price) too.
aapl_line = alt.Chart(aapl_df).mark_line().encode(
alt.X('date:T', title=""),
alt.Y('adjusted', title="Price")
).properties(
width=700,
height=200,
title="Apple: Daily Price 2020"
)
aapl_line
Compound Chart¶
Let us add them together using layers. Here, we are plotting Price and Volume in the same chart but separate y-axis (y1, y2).
base = alt.Chart(aapl_df).mark_line().encode(
alt.X('date:T', title=""),
)
bar = base.mark_bar().encode(
alt.Y('volume', title="Volume"),
alt.Tooltip(['date', 'volume', 'adjusted'])
)
line = base.mark_line(color='orange').encode(
alt.Y('adjusted', title="Price"),
alt.Tooltip(['date', 'volume', 'adjusted'])
)
alt.layer(bar, line).resolve_scale(
y='independent').properties(
title="Apple: Price and Volume Chart",
width=600)
The panic sell-off in March is very evident in this plot.
Since we have the data from a few stocks, it is a better idea to create functions to create the plot.
def create_base(stock_df):
base = alt.Chart(stock_df[stock_df['date'] > '2020-01-01']).mark_line().encode(
alt.X('date:T', title=""),
)
return base
def plot_line(stock_df, base, color='magenta', width=700, height=400, date_labels=None,
xlab="Date", y1_lab="Volume", y2_lab="Price", y1_domain=None, y2_domain=None,
label_angle=45):
if date_labels is None:
date_labels = list(stock_df.date.unique())
if y1_domain is None:
y1_domain = (stock_df.volume.min(), stock_df.volume.max())
if y2_domain is None:
y2_domain = (stock_df.adjusted.min(), stock_df.adjusted.max())
chart = base.mark_line(color=color).encode(
alt.Y("adjusted:Q", title=y2_lab, scale=alt.Scale(domain=y2_domain, )),
).properties(height=height, width=width)
return chart
def plot_bar(stock_df, base, color='magenta', width=700, height=400, date_labels=None,
xlab="Date", y1_lab="Volume", y2_lab="Price", y1_domain=None, y2_domain=None,
label_angle=45):
if date_labels is None:
date_labels = list(stock_df.date.unique())
if y1_domain is None:
y1_domain = (stock_df.volume.min(), stock_df.volume.max())
if y2_domain is None:
y2_domain = (stock_df.adjusted.min(), stock_df.adjusted.max())
chart = base.mark_bar(opacity=0.7).encode(
alt.Y('volume:Q', title=y1_lab, scale=alt.Scale(domain=y1_domain), axis=alt.Axis(format='s')),
).properties(height=height, width=width)
return chart
Another Price + Volume Chart¶
Let us now create the price and volume chart, which is more common. We concatenate the plots using vconcat and adjust the heights. The bottom chart is interactive, meaning you can use the mouse to zoom in and out.
aapl_base = create_base(aapl_df)
aapl_line = plot_line(aapl_df, aapl_base, color="orange", xlab="", label_angle=0, width=700, height=200)
aapl_bar = plot_bar(aapl_df, aapl_base, label_angle=0, width=700, height=80)
alt.vconcat(aapl_line, aapl_bar.interactive()).properties(
title="APPLE: 2020 Price Volume Chart"
)
The zoom in/out is of litte use in this plot. so, let us focus on the selections. Selections allow us to highlight a certain part of the plot.
Cross Filtering¶
We are going to add brush selection to the price chart and change the plot colors based on user selection.
💡 You can use your mouse to select a part of the line chart to see it in action.
brush = alt.selection_interval(encodings=['x'])
color = alt.condition(brush,
# adding date as color feels like a hack, and that also causes
# the blue gradient on the bar chart
alt.Color('date:T', legend=None),
alt.value('lightgray'))
upper = aapl_line.add_selection(brush)
lower = aapl_bar.encode(alt.Y('volume:Q'), color=color)
alt.vconcat(upper, lower).properties(
title="APPLE: 2020 Price Volume Chart"
)
More Datasources¶
We can now start to bring in other data sources such as covid-19 data (source: our world in data).
covid_cases = pd.read_csv("owid-covid-data.csv", parse_dates=['date'])
covid_cases.info()
covid_cases.head(1)
Covid Related Cases & Deaths¶
covid_deaths_plot = alt.Chart(covid_cases.groupby(['continent', 'date']).agg(sum).reset_index()).mark_area(opacity=0.7, clip=True, color='red').encode(
alt.X('date:T', scale=alt.Scale(domain=(aapl_df.date.min(), aapl_df.date.max()))),
alt.Y('total_deaths:Q', title="Total Deaths", axis=alt.Axis(format='s')),
color='continent:N'
).properties(
title="Total deaths due to Covid-19",
width=700
)
covid_deaths_plot
# Total Deaths (not grouped by continent anymore)
covid_deaths_plot = alt.Chart(covid_cases.groupby(['date']).agg(sum).reset_index()).mark_bar(opacity=0.7, clip=True, color='red').encode(
alt.X('date:T', scale=alt.Scale(domain=(aapl_df.date.min(), aapl_df.date.max()))),
alt.Y('total_deaths:Q', title="Total Deaths", axis=alt.Axis(format='s')),
color='continent:N'
).properties(
title="Total deaths due to Covid-19",
width=700
)
covid_cases_plot = alt.Chart(covid_cases.groupby(['continent','date']).agg(sum).reset_index()).mark_bar(opacity=0.8, clip=True, color='red').encode(
alt.X('date:T', scale=alt.Scale(domain=(aapl_df.date.min(), aapl_df.date.max())), title=""),
alt.Y('new_cases:Q', title="New Cases", axis=alt.Axis(format='s'), stack='normalize'),
color='continent:N'
).properties(
width=700
)
covid_cases_plot.interactive()
# Daily New Cases (not grouped by continent anymore)
covid_cases_plot = alt.Chart(covid_cases.groupby(['date']).agg(sum).reset_index()).mark_bar(opacity=0.7, clip=True, color='red').encode(
alt.X('date:T', scale=alt.Scale(domain=(aapl_df.date.min(), aapl_df.date.max()))),
alt.Y('new_cases:Q', title="New Cases", axis=alt.Axis(format='s')),
color='continent:N'
).properties(
width=700
)
Linking Plots¶
Now, we can add covid plots to the price volume chart to observe its impact on the stock price. As is known, the stock prices only took the beating in March 2020 and since then are on the rise.
# create selection
brush = alt.selection_interval(encodings=['x'])
color = alt.condition(brush,
# adding date as color feels like a hack, and that also causes
# the blue gradient on the bar chart
alt.Color('date:T', legend=None),
alt.value('lightgray'))
upper = aapl_line.add_selection(brush)
lower = aapl_bar.encode(alt.Y('volume:Q', axis=alt.Axis(format='s')), color=color)
covid_deaths = covid_deaths_plot.properties(width=700, height=100).encode(
alt.Y('total_deaths:Q', title="Covid Deaths", axis=alt.Axis(format='s')),
color=color
)
covid_new_cases = covid_cases_plot.properties(width=700, height=100).encode(
alt.Y('new_cases:Q', title="New Cases", axis=alt.Axis(format='s')),
color=color
)
alt.vconcat(upper, lower, covid_deaths, covid_new_cases).properties(
title="APPLE: 2020 Price Volume Chart"
)
You can now select a range on the price chart and see all other charts highligted accordingly.
Trellis Chart¶
And since we have the data for other stocks too, so we can create a trellis plot.
# same as the example here: https://altair-viz.github.io/gallery/trellis_area_sort_array.html
alt.Chart(df).transform_filter(
alt.datum.symbol != 'BTC-USD'
).mark_area(opacity=0.7).encode(
alt.X('date:T', title=""),
alt.Y('adjusted:Q', title='Price'),
alt.Tooltip(['date','adjusted', 'symbol']),
alt.Color('symbol:N',title=""),
row=alt.Row('symbol:N', title=""),
).properties(title="2020: Adjusted Stock Price", height=100, width=700).interactive()
Yeah, You can hardly see AAPL & TSLA. Since we have added the interactivity (i.e .interactive()), you can zoom into the plots.
Question for you: "What else would you do to improve the trellis plots?"
Your Turn¶
Go Ahead and make some plots.