2.3 Comparing Plotting Libraries and Declarative Visualizations

2.3 Comparing Plotting Libraries and Declarative Visualizations#

from plotnine import *
from matplotlib import pyplot as plt
from plotnine import data
import plotly.express as px
import seaborn as sns
mpg = data.mpg

Bar Chart#

mpg
manufacturer model displ year cyl trans drv cty hwy fl class
0 audi a4 1.8 1999 4 auto(l5) f 18 29 p compact
1 audi a4 1.8 1999 4 manual(m5) f 21 29 p compact
2 audi a4 2.0 2008 4 manual(m6) f 20 31 p compact
3 audi a4 2.0 2008 4 auto(av) f 21 30 p compact
4 audi a4 2.8 1999 6 auto(l5) f 16 26 p compact
... ... ... ... ... ... ... ... ... ... ... ...
229 volkswagen passat 2.0 2008 4 auto(s6) f 19 28 p midsize
230 volkswagen passat 2.0 2008 4 manual(m6) f 21 29 p midsize
231 volkswagen passat 2.8 1999 6 auto(l5) f 16 26 p midsize
232 volkswagen passat 2.8 1999 6 manual(m5) f 18 26 p midsize
233 volkswagen passat 3.6 2008 6 auto(s6) f 17 26 p midsize

234 rows × 11 columns

# Pandas
(mpg['manufacturer']
 .value_counts(sort=False)
 .plot.barh()
 .set_title('Number of Cars by Make')
)
Text(0.5, 1.0, 'Number of Cars by Make')
../_images/1b66cf33fb3993a383ed5228eeba6afa992f4343bb33b3eefda2a7fe96eb07d9.png
# Plotnine (ggplot2 clone)
(ggplot(mpg) + 
   aes(x='manufacturer') +
   geom_bar() + 
   coord_flip() +
   ggtitle('Number of Cars by Make')
)
../_images/c0ab579831fb573ac209571bde0e3d9db663d71750f2f70b14ef9510a0b5d3e0.png
fig = px.bar(
    mpg.groupby('manufacturer', observed=False).size().reset_index(name='count'),
    x='count',
    y='manufacturer',
    orientation='h',
    title='Number of Cars by Make',
)

fig

Scatter Plot#

(mpg.
    plot.
    scatter(x='displ', y='hwy'))
<Axes: xlabel='displ', ylabel='hwy'>
../_images/296a15bad33199ffa194244d667017edd7b6edf5264b9b7a79d7a682f48f1509.png
mpg
manufacturer model displ year cyl trans drv cty hwy fl class
0 audi a4 1.8 1999 4 auto(l5) f 18 29 p compact
1 audi a4 1.8 1999 4 manual(m5) f 21 29 p compact
2 audi a4 2.0 2008 4 manual(m6) f 20 31 p compact
3 audi a4 2.0 2008 4 auto(av) f 21 30 p compact
4 audi a4 2.8 1999 6 auto(l5) f 16 26 p compact
... ... ... ... ... ... ... ... ... ... ... ...
229 volkswagen passat 2.0 2008 4 auto(s6) f 19 28 p midsize
230 volkswagen passat 2.0 2008 4 manual(m6) f 21 29 p midsize
231 volkswagen passat 2.8 1999 6 auto(l5) f 16 26 p midsize
232 volkswagen passat 2.8 1999 6 manual(m5) f 18 26 p midsize
233 volkswagen passat 3.6 2008 6 auto(s6) f 17 26 p midsize

234 rows × 11 columns

(mpg
 .plot
 .scatter(x='displ', y='hwy')
 .set(title='Engine Displacement in Liters vs Highway MPG',
      xlabel='Engine Displacement in Liters',
      ylabel='Highway MPG'));
../_images/b856fac35c2a93cb13d4873ee698d4293b3784c139b3424590f613fb9e1a6491.png
(ggplot(mpg) +
    aes(x = 'displ', y = 'hwy') +
    geom_point() + 
    ggtitle('Engine Displacement in Liters vs Highway MPG') +
    xlab('Engine Displacement in Liters') +
    ylab('Highway MPG')
)
../_images/0aebbe674a87eafc009d2e67c983dfa9a03af2ea4f623e52a97ce974149e2311.png
fig = px.scatter(
    mpg,
    x='displ',
    y='hwy',
    title='Engine Displacement in Liters vs Highway MPG',
    labels={
        'displ': 'Engine Displacement in Liters',
        'hwy': 'Highway MPG'
    }
)

fig.show()

Scatter Plot, Faceted with Color#

for c, df in mpg.groupby('class'):
    plt.scatter(df['displ'], df['hwy'], label=c)
/var/folders/l3/tj6vb0ld2ys1h939jz0qrfrh0000gn/T/ipykernel_13123/2769678361.py:1: FutureWarning:

The default of observed=False is deprecated and will be changed to True in a future version of pandas. Pass observed=False to retain current behavior or observed=True to adopt the future default and silence this warning.
../_images/efb70788bcd19f34ce5986ed278c19c3da4683a107f0ffe5586fc9fea411c4e0.png
fig, ax = plt.subplots()
for c, df in mpg.groupby('class'):
    plt.scatter(df['displ'], df['hwy'], label=c)
/var/folders/l3/tj6vb0ld2ys1h939jz0qrfrh0000gn/T/ipykernel_13123/1913448170.py:2: FutureWarning:

The default of observed=False is deprecated and will be changed to True in a future version of pandas. Pass observed=False to retain current behavior or observed=True to adopt the future default and silence this warning.
../_images/efb70788bcd19f34ce5986ed278c19c3da4683a107f0ffe5586fc9fea411c4e0.png
fig, ax = plt.subplots()
for c, df in mpg.groupby('class'):
    ax.scatter(df['displ'], df['hwy'], label=c)
ax.legend()
ax.set_title('Engine Displacement in Liters vs Highway MPG')
ax.set_xlabel('Engine Displacement in Liters')
ax.set_ylabel('Highway MPG')
/var/folders/l3/tj6vb0ld2ys1h939jz0qrfrh0000gn/T/ipykernel_13123/3535434989.py:2: FutureWarning:

The default of observed=False is deprecated and will be changed to True in a future version of pandas. Pass observed=False to retain current behavior or observed=True to adopt the future default and silence this warning.
Text(0, 0.5, 'Highway MPG')
../_images/61dfa400511c733b8f2d0261f03c3a7c0d46428ded8b0a41f7da681ebbe65b6c.png
(sns
 .FacetGrid(mpg, hue='class', height=5)
 .map(plt.scatter, 'displ', 'hwy')
 .add_legend()
 .set(
    title='Engine Displacement in Liters vs Highway MPG',
    xlabel='Engine Displacement in Liters',
    ylabel='Highway MPG'
))
<seaborn.axisgrid.FacetGrid at 0x34fe4bda0>
../_images/829fa8653ea7db16949e28ca59365a67da7b055f0efb0328e5747b311b3a3723.png
(ggplot(mpg) + 
    aes(x = 'displ', y = 'hwy', color = 'class') +
    geom_point() + 
    ggtitle('Engine Displacement in Liters vs Highway MPG') +
    xlab('Engine Displacement in Liters') +
    ylab('Highway MPG'))
../_images/d5289e5e622a108d30cd4bfe7fd11136603d067c654eb7dba3cd6156b380ac3a.png
fig = px.scatter(
    mpg,
    x='displ',
    y='hwy',
    color='class',
    title='Engine Displacement in Liters vs Highway MPG',
    labels={
        'displ': 'Engine Displacement in Liters',
        'hwy': 'Highway MPG',
        'class': 'Vehicle Class'
    }
)

fig.show()