%sqlplot
#
New in version 0.5.2.
Note
%sqlplot
requires matplotlib
: pip install matplotlib
and this example requires
duckdb-engine: pip install duckdb-engine
%load_ext sql
%sql duckdb://
from pathlib import Path
from urllib.request import urlretrieve
if not Path("penguins.csv").is_file():
urlretrieve(
"https://raw.githubusercontent.com/mwaskom/seaborn-data/master/penguins.csv",
"penguins.csv",
)
%%sql
SELECT * FROM "penguins.csv" LIMIT 3
* duckdb://
Done.
species | island | bill_length_mm | bill_depth_mm | flipper_length_mm | body_mass_g | sex |
---|---|---|---|---|---|---|
Adelie | Torgersen | 39.1 | 18.7 | 181 | 3750 | MALE |
Adelie | Torgersen | 39.5 | 17.4 | 186 | 3800 | FEMALE |
Adelie | Torgersen | 40.3 | 18.0 | 195 | 3250 | FEMALE |
%sqlplot boxplot
#
Note
To use %sqlplot boxplot
, your SQL engine must support:
percentile_disc(...) WITHIN GROUP (ORDER BY ...)
Shortcut: %sqlplot box
-t
/--table
Table to use (if using DuckDB: path to the file to query)
-c
/--column
Column(s) to plot. You might pass one than one value (e.g., -c a b c
)
-o
/--orient
Boxplot orientation (h
for horizontal, v
for vertical)
-w
/--with
Use a previously saved query as input data
%sqlplot boxplot --table penguins.csv --column body_mass_g
<Axes: title={'center': "'body_mass_g' from 'penguins.csv'"}, ylabel='body_mass_g'>

Transform data before plotting#
%%sql
SELECT island, COUNT(*)
FROM penguins.csv
GROUP BY island
ORDER BY COUNT(*) DESC
* duckdb://
Done.
island | count_star() |
---|---|
Biscoe | 168 |
Dream | 124 |
Torgersen | 52 |
%%sql --save biscoe --no-execute
SELECT *
FROM penguins.csv
WHERE island = 'Biscoe'
* duckdb://
Skipping execution...
%sqlplot boxplot --table biscoe --column body_mass_g --with biscoe
<Axes: title={'center': "'body_mass_g' from 'biscoe'"}, ylabel='body_mass_g'>

Horizontal boxplot#
%sqlplot boxplot --table penguins.csv --column bill_length_mm --orient h
<Axes: title={'center': "'bill_length_mm' from 'penguins.csv'"}, xlabel='bill_length_mm'>

Multiple columns#
%sqlplot boxplot --table penguins.csv --column bill_length_mm bill_depth_mm flipper_length_mm
<Axes: title={'center': "Boxplot from 'penguins.csv'"}>

%sqlplot histogram
#
Shortcut: %sqlplot hist
-t
/--table
Table to use (if using DuckDB: path to the file to query)
-c
/--column
Column to plot
-b
/--bins
(default: 50
) Number of bins
-w
/--with
Use a previously saved query as input data
Histogram does not support NULL values, so let’s remove them:
%%sql --save no_nulls --no-execute
SELECT *
FROM penguins.csv
WHERE body_mass_g IS NOT NULL
* duckdb://
Skipping execution...
%sqlplot histogram --table no_nulls --column body_mass_g --with no_nulls
<Axes: title={'center': "'body_mass_g' from 'no_nulls'"}, xlabel='body_mass_g', ylabel='Count'>

Number of bins#
%sqlplot histogram --table no_nulls --column body_mass_g --with no_nulls --bins 100
<Axes: title={'center': "'body_mass_g' from 'no_nulls'"}, xlabel='body_mass_g', ylabel='Count'>

Multiple columns#
%sqlplot histogram --table no_nulls --column bill_length_mm bill_depth_mm --with no_nulls
<Axes: title={'center': "Histogram from 'no_nulls'"}, ylabel='Count'>

Customize plot#
%sqlplot
returns a matplotlib.Axes
object.
ax = %sqlplot histogram --table no_nulls --column body_mass_g --with no_nulls
ax.set_title("Body mass (grams)")
_ = ax.grid()
