Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

Commit b6d3f7c

Browse filesBrowse files
sasoripathostimhoffm
authored andcommitted
Feature: draw percentiles in violinplot (#14107)
* Fix issue 10788 * Fix style * Add missing import statement * Remove changes * Code for feature 8532 * Run boilerplate * Fix code style * Remove unused test images * Update pyplot.py * Update pyplot.py * Run boilerplate * Remove extra image-compare tests, update documentation * Use np.quantile instead of percentile * Update test images for violinplot * Revert "Update test images for violinplot" This reverts commit ee5cd08. * Use np.percentile implement quantile * Update api from percentiles to quantiles * Update violinplot call in test cases * Update documentations from percentiles to quantiles * Update docstring and backward campatibility Add next API change document for violin_stats in cbook. Make violin() more backward campatible by making quantiles key optional. Update smoke test for violin number and quantile lists number mismatch. * Remove unnecessary api change doc
1 parent 8f62234 commit b6d3f7c
Copy full SHA for b6d3f7c

File tree

Expand file treeCollapse file tree

7 files changed

+146
-15
lines changed
Filter options
Expand file treeCollapse file tree

7 files changed

+146
-15
lines changed

‎examples/statistics/violinplot.py

Copy file name to clipboardExpand all lines: examples/statistics/violinplot.py
+27-4Lines changed: 27 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@
2828
pos = [1, 2, 4, 5, 7, 8]
2929
data = [np.random.normal(0, std, size=100) for std in pos]
3030

31-
fig, axes = plt.subplots(nrows=2, ncols=3, figsize=(6, 6))
31+
fig, axes = plt.subplots(nrows=2, ncols=5, figsize=(10, 6))
3232

3333
axes[0, 0].violinplot(data, pos, points=20, widths=0.3,
3434
showmeans=True, showextrema=True, showmedians=True)
@@ -43,19 +43,42 @@
4343
showextrema=True, showmedians=True, bw_method=0.5)
4444
axes[0, 2].set_title('Custom violinplot 3', fontsize=fs)
4545

46+
axes[0, 3].violinplot(data, pos, points=60, widths=0.7, showmeans=True,
47+
showextrema=True, showmedians=True, bw_method=0.5,
48+
quantiles=[[0.1], [], [], [0.175, 0.954], [0.75],
49+
[0.25]])
50+
axes[0, 3].set_title('Custom violinplot 4', fontsize=fs)
51+
52+
axes[0, 4].violinplot(data[-1:], pos[-1:], points=60, widths=0.7,
53+
showmeans=True, showextrema=True, showmedians=True,
54+
quantiles=[0.05, 0.1, 0.8, 0.9], bw_method=0.5)
55+
axes[0, 4].set_title('Custom violinplot 5', fontsize=fs)
56+
4657
axes[1, 0].violinplot(data, pos, points=80, vert=False, widths=0.7,
4758
showmeans=True, showextrema=True, showmedians=True)
48-
axes[1, 0].set_title('Custom violinplot 4', fontsize=fs)
59+
axes[1, 0].set_title('Custom violinplot 6', fontsize=fs)
4960

5061
axes[1, 1].violinplot(data, pos, points=100, vert=False, widths=0.9,
5162
showmeans=True, showextrema=True, showmedians=True,
5263
bw_method='silverman')
53-
axes[1, 1].set_title('Custom violinplot 5', fontsize=fs)
64+
axes[1, 1].set_title('Custom violinplot 7', fontsize=fs)
5465

5566
axes[1, 2].violinplot(data, pos, points=200, vert=False, widths=1.1,
5667
showmeans=True, showextrema=True, showmedians=True,
5768
bw_method=0.5)
58-
axes[1, 2].set_title('Custom violinplot 6', fontsize=fs)
69+
axes[1, 2].set_title('Custom violinplot 8', fontsize=fs)
70+
71+
axes[1, 3].violinplot(data, pos, points=200, vert=False, widths=1.1,
72+
showmeans=True, showextrema=True, showmedians=True,
73+
quantiles=[[0.1], [], [], [0.175, 0.954], [0.75],
74+
[0.25]],
75+
bw_method=0.5)
76+
axes[1, 3].set_title('Custom violinplot 9', fontsize=fs)
77+
78+
axes[1, 4].violinplot(data[-1:], pos[-1:], points=200, vert=False, widths=1.1,
79+
showmeans=True, showextrema=True, showmedians=True,
80+
quantiles=[0.05, 0.1, 0.8, 0.9], bw_method=0.5)
81+
axes[1, 4].set_title('Custom violinplot 10', fontsize=fs)
5982

6083
for ax in axes.flat:
6184
ax.set_yticklabels([])

‎lib/matplotlib/axes/_axes.py

Copy file name to clipboardExpand all lines: lib/matplotlib/axes/_axes.py
+45-4Lines changed: 45 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -7880,14 +7880,14 @@ def matshow(self, Z, **kwargs):
78807880
@_preprocess_data(replace_names=["dataset"])
78817881
def violinplot(self, dataset, positions=None, vert=True, widths=0.5,
78827882
showmeans=False, showextrema=True, showmedians=False,
7883-
points=100, bw_method=None):
7883+
quantiles=None, points=100, bw_method=None):
78847884
"""
78857885
Make a violin plot.
78867886
78877887
Make a violin plot for each column of *dataset* or each vector in
78887888
sequence *dataset*. Each filled area extends to represent the
78897889
entire data range, with optional lines at the mean, the median,
7890-
the minimum, and the maximum.
7890+
the minimum, the maximum, and user-specified quantiles.
78917891
78927892
Parameters
78937893
----------
@@ -7916,6 +7916,11 @@ def violinplot(self, dataset, positions=None, vert=True, widths=0.5,
79167916
showmedians : bool, default = False
79177917
If `True`, will toggle rendering of the medians.
79187918
7919+
quantiles : array-like, default = None
7920+
If not None, set a list of floats in interval [0, 1] for each violin,
7921+
which stands for the quantiles that will be rendered for that
7922+
violin.
7923+
79197924
points : scalar, default = 100
79207925
Defines the number of points to evaluate each of the
79217926
gaussian kernel density estimations at.
@@ -7953,6 +7958,10 @@ def violinplot(self, dataset, positions=None, vert=True, widths=0.5,
79537958
- ``cmedians``: A `~.collections.LineCollection` instance that
79547959
marks the median values of each of the violin's distribution.
79557960
7961+
- ``cquantiles``: A `~.collections.LineCollection` instance created
7962+
to identify the quantile values of each of the violin's
7963+
distribution.
7964+
79567965
"""
79577966

79587967
def _kde_method(X, coords):
@@ -7962,7 +7971,8 @@ def _kde_method(X, coords):
79627971
kde = mlab.GaussianKDE(X, bw_method)
79637972
return kde.evaluate(coords)
79647973

7965-
vpstats = cbook.violin_stats(dataset, _kde_method, points=points)
7974+
vpstats = cbook.violin_stats(dataset, _kde_method, points=points,
7975+
quantiles=quantiles)
79667976
return self.violin(vpstats, positions=positions, vert=vert,
79677977
widths=widths, showmeans=showmeans,
79687978
showextrema=showextrema, showmedians=showmedians)
@@ -7973,7 +7983,7 @@ def violin(self, vpstats, positions=None, vert=True, widths=0.5,
79737983
79747984
Draw a violin plot for each column of `vpstats`. Each filled area
79757985
extends to represent the entire data range, with optional lines at the
7976-
mean, the median, the minimum, and the maximum.
7986+
mean, the median, the minimum, the maximum, and the quantiles values.
79777987
79787988
Parameters
79797989
----------
@@ -7997,6 +8007,11 @@ def violin(self, vpstats, positions=None, vert=True, widths=0.5,
79978007
79988008
- ``max``: The maximum value for this violin's dataset.
79998009
8010+
Optional keys are:
8011+
8012+
- ``quantiles``: A list of scalars containing the quantile values
8013+
for this violin's dataset.
8014+
80008015
positions : array-like, default = [1, 2, ..., n]
80018016
Sets the positions of the violins. The ticks and limits are
80028017
automatically set to match the positions.
@@ -8043,13 +8058,19 @@ def violin(self, vpstats, positions=None, vert=True, widths=0.5,
80438058
80448059
- ``cmedians``: A `~.collections.LineCollection` instance that
80458060
marks the median values of each of the violin's distribution.
8061+
8062+
- ``cquantiles``: A `~.collections.LineCollection` instance created
8063+
to identify the quantiles values of each of the violin's
8064+
distribution.
8065+
80468066
"""
80478067

80488068
# Statistical quantities to be plotted on the violins
80498069
means = []
80508070
mins = []
80518071
maxes = []
80528072
medians = []
8073+
quantiles = np.asarray([])
80538074

80548075
# Collections to be returned
80558076
artists = {}
@@ -8106,6 +8127,10 @@ def violin(self, vpstats, positions=None, vert=True, widths=0.5,
81068127
mins.append(stats['min'])
81078128
maxes.append(stats['max'])
81088129
medians.append(stats['median'])
8130+
q = stats.get('quantiles')
8131+
if q is not None:
8132+
# If exist key quantiles, assume it's a list of floats
8133+
quantiles = np.concatenate((quantiles, q))
81098134
artists['bodies'] = bodies
81108135

81118136
# Render means
@@ -8129,6 +8154,22 @@ def violin(self, vpstats, positions=None, vert=True, widths=0.5,
81298154
pmaxes,
81308155
colors=edgecolor)
81318156

8157+
# Render quantile values
8158+
if quantiles.size > 0:
8159+
# Recalculate ranges for statistics lines for quantiles.
8160+
# ppmins are the left end of quantiles lines
8161+
ppmins = np.asarray([])
8162+
# pmaxes are the right end of quantiles lines
8163+
ppmaxs = np.asarray([])
8164+
for stats, cmin, cmax in zip(vpstats, pmins, pmaxes):
8165+
q = stats.get('quantiles')
8166+
if q is not None:
8167+
ppmins = np.concatenate((ppmins, [cmin] * np.size(q)))
8168+
ppmaxs = np.concatenate((ppmaxs, [cmax] * np.size(q)))
8169+
# Start rendering
8170+
artists['cquantiles'] = perp_lines(quantiles, ppmins, ppmaxs,
8171+
colors=edgecolor)
8172+
81328173
return artists
81338174

81348175
# Methods that are entirely implemented in other modules.

‎lib/matplotlib/cbook/__init__.py

Copy file name to clipboardExpand all lines: lib/matplotlib/cbook/__init__.py
+24-2Lines changed: 24 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1431,7 +1431,7 @@ def _reshape_2D(X, name):
14311431
raise ValueError("{} must have 2 or fewer dimensions".format(name))
14321432

14331433

1434-
def violin_stats(X, method, points=100):
1434+
def violin_stats(X, method, points=100, quantiles=None):
14351435
"""
14361436
Returns a list of dictionaries of data which can be used to draw a series
14371437
of violin plots. See the `Returns` section below to view the required keys
@@ -1455,6 +1455,12 @@ def violin_stats(X, method, points=100):
14551455
Defines the number of points to evaluate each of the gaussian kernel
14561456
density estimates at.
14571457
1458+
quantiles : array-like, default = None
1459+
Defines (if not None) a list of floats in interval [0, 1] for each
1460+
column of data, which represents the quantiles that will be rendered
1461+
for that column of data. Must have 2 or fewer dimensions. 1D array will
1462+
be treated as a singleton list containing them.
1463+
14581464
Returns
14591465
-------
14601466
@@ -1469,6 +1475,7 @@ def violin_stats(X, method, points=100):
14691475
- median: The median value for this column of data.
14701476
- min: The minimum value for this column of data.
14711477
- max: The maximum value for this column of data.
1478+
- quantiles: The quantile values for this column of data.
14721479
"""
14731480

14741481
# List of dictionaries describing each of the violins.
@@ -1477,13 +1484,27 @@ def violin_stats(X, method, points=100):
14771484
# Want X to be a list of data sequences
14781485
X = _reshape_2D(X, "X")
14791486

1480-
for x in X:
1487+
# Want quantiles to be as the same shape as data sequences
1488+
if quantiles is not None and len(quantiles) != 0:
1489+
quantiles = _reshape_2D(quantiles, "quantiles")
1490+
# Else, mock quantiles if is none or empty
1491+
else:
1492+
quantiles = [[]] * np.shape(X)[0]
1493+
1494+
# quantiles should has the same size as dataset
1495+
if np.shape(X)[:1] != np.shape(quantiles)[:1]:
1496+
raise ValueError("List of violinplot statistics and quantiles values"
1497+
" must have the same length")
1498+
1499+
# Zip x and quantiles
1500+
for (x, q) in zip(X, quantiles):
14811501
# Dictionary of results for this distribution
14821502
stats = {}
14831503

14841504
# Calculate basic stats for the distribution
14851505
min_val = np.min(x)
14861506
max_val = np.max(x)
1507+
quantile_val = np.percentile(x, 100 * q)
14871508

14881509
# Evaluate the kernel density estimate
14891510
coords = np.linspace(min_val, max_val, points)
@@ -1495,6 +1516,7 @@ def violin_stats(X, method, points=100):
14951516
stats['median'] = np.median(x)
14961517
stats['min'] = min_val
14971518
stats['max'] = max_val
1519+
stats['quantiles'] = np.atleast_1d(quantile_val)
14981520

14991521
# Append to output
15001522
vpstats.append(stats)

‎lib/matplotlib/pyplot.py

Copy file name to clipboardExpand all lines: lib/matplotlib/pyplot.py
+4-3Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2998,12 +2998,13 @@ def triplot(*args, **kwargs):
29982998
def violinplot(
29992999
dataset, positions=None, vert=True, widths=0.5,
30003000
showmeans=False, showextrema=True, showmedians=False,
3001-
points=100, bw_method=None, *, data=None):
3001+
quantiles=None, points=100, bw_method=None, *, data=None):
30023002
return gca().violinplot(
30033003
dataset, positions=positions, vert=vert, widths=widths,
30043004
showmeans=showmeans, showextrema=showextrema,
3005-
showmedians=showmedians, points=points, bw_method=bw_method,
3006-
**({"data": data} if data is not None else {}))
3005+
showmedians=showmedians, quantiles=quantiles, points=points,
3006+
bw_method=bw_method, **({"data": data} if data is not None
3007+
else {}))
30073008

30083009

30093010
# Autogenerated by boilerplate.py. Do not edit as changes will be lost.
Loading
Loading

‎lib/matplotlib/tests/test_axes.py

Copy file name to clipboardExpand all lines: lib/matplotlib/tests/test_axes.py
+46-2Lines changed: 46 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2661,7 +2661,8 @@ def test_vert_violinplot_showall():
26612661
np.random.seed(316624790)
26622662
data = [np.random.normal(size=100) for i in range(4)]
26632663
ax.violinplot(data, positions=range(4), showmeans=1, showextrema=1,
2664-
showmedians=1)
2664+
showmedians=1,
2665+
quantiles=[[0.1, 0.9], [0.2, 0.8], [0.3, 0.7], [0.4, 0.6]])
26652666

26662667

26672668
@image_comparison(baseline_images=['violinplot_vert_custompoints_10'],
@@ -2738,7 +2739,8 @@ def test_horiz_violinplot_showall():
27382739
np.random.seed(82762530)
27392740
data = [np.random.normal(size=100) for i in range(4)]
27402741
ax.violinplot(data, positions=range(4), vert=False, showmeans=1,
2741-
showextrema=1, showmedians=1)
2742+
showextrema=1, showmedians=1,
2743+
quantiles=[[0.1, 0.9], [0.2, 0.8], [0.3, 0.7], [0.4, 0.6]])
27422744

27432745

27442746
@image_comparison(baseline_images=['violinplot_horiz_custompoints_10'],
@@ -2781,6 +2783,48 @@ def test_violinplot_bad_widths():
27812783
ax.violinplot(data, positions=range(4), widths=[1, 2, 3])
27822784

27832785

2786+
def test_violinplot_bad_quantiles():
2787+
ax = plt.axes()
2788+
# First 9 digits of frac(sqrt(73))
2789+
np.random.seed(544003745)
2790+
data = [np.random.normal(size=100)]
2791+
2792+
# Different size quantile list and plots
2793+
with pytest.raises(ValueError):
2794+
ax.violinplot(data, quantiles=[[0.1, 0.2], [0.5, 0.7]])
2795+
2796+
2797+
def test_violinplot_outofrange_quantiles():
2798+
ax = plt.axes()
2799+
# First 9 digits of frac(sqrt(79))
2800+
np.random.seed(888194417)
2801+
data = [np.random.normal(size=100)]
2802+
2803+
# Quantile value above 100
2804+
with pytest.raises(ValueError):
2805+
ax.violinplot(data, quantiles=[[0.1, 0.2, 0.3, 1.05]])
2806+
2807+
# Quantile value below 0
2808+
with pytest.raises(ValueError):
2809+
ax.violinplot(data, quantiles=[[-0.05, 0.2, 0.3, 0.75]])
2810+
2811+
2812+
@check_figures_equal(extensions=["png"])
2813+
def test_violinplot_single_list_quantiles(fig_test, fig_ref):
2814+
# Ensures quantile list for 1D can be passed in as single list
2815+
# First 9 digits of frac(sqrt(83))
2816+
np.random.seed(110433579)
2817+
data = [np.random.normal(size=100)]
2818+
2819+
# Test image
2820+
ax = fig_test.subplots()
2821+
ax.violinplot(data, quantiles=[0.1, 0.3, 0.9])
2822+
2823+
# Reference image
2824+
ax = fig_ref.subplots()
2825+
ax.violinplot(data, quantiles=[[0.1, 0.3, 0.9]])
2826+
2827+
27842828
def test_manage_xticks():
27852829
_, ax = plt.subplots()
27862830
ax.set_xlim(0, 4)

0 commit comments

Comments
0 (0)
Morty Proxy This is a proxified and sanitized view of the page, visit original site.