Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

Commit 447a7cc

Browse filesBrowse files
committed
Merged @phobson's boxplot enhancements.
1 parent 41822d5 commit 447a7cc
Copy full SHA for 447a7cc

File tree

Expand file treeCollapse file tree

8 files changed

+670
-93
lines changed
Filter options
Expand file treeCollapse file tree

8 files changed

+670
-93
lines changed

‎doc/users/whats_new.rst

Copy file name to clipboardExpand all lines: doc/users/whats_new.rst
+12Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,18 @@ Two new defaults are available in the matplotlibrc configuration file.
6565
These are savefig.bbox, which can be set to 'standard' or 'tight,' and
6666
savefig.pad_inches, which controls the bounding box padding.
6767

68+
69+
New Boxplot Functionality
70+
-------------------------
71+
72+
Users can now incorporate their own methods for computing the median and its
73+
confidence intervals into the boxplot method. For every column of data passed
74+
to boxplot, the user can specify an accompanying median and confidence
75+
interval.
76+
:meth: `matplotlib.axes.boxplot`
77+
.. plot:: examples/pylab_examples/boxplot_demo3.py
78+
79+
6880
.. _whats-new-1-1:
6981

7082
new in matplotlib-1.1

‎examples/pylab_examples/boxplot_demo3.py

Copy file name to clipboardExpand all lines: examples/pylab_examples/boxplot_demo3.py
+32-10Lines changed: 32 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -2,26 +2,48 @@
22
import matplotlib.transforms as mtransforms
33
import numpy as np
44

5+
def fakeBootStrapper(n):
6+
'''
7+
This is just a placeholder for the user's method of
8+
bootstrapping the median and its confidence intervals.
9+
10+
Returns an arbitrary median and confidence intervals
11+
packed into a tuple
12+
'''
13+
if n == 1:
14+
med = 0.1
15+
CI = (-0.25, 0.25)
16+
else:
17+
med = 0.2
18+
CI = (-0.35, 0.50)
19+
20+
return med, CI
21+
22+
23+
524
np.random.seed(2)
625
inc = 0.1
7-
e1 = np.random.uniform(0,1, size=(500,))
8-
e2 = np.random.uniform(0,1, size=(500,))
9-
e3 = np.random.uniform(0,1 + inc, size=(500,))
10-
e4 = np.random.uniform(0,1 + 2*inc, size=(500,))
26+
e1 = np.random.normal(0, 1, size=(500,))
27+
e2 = np.random.normal(0, 1, size=(500,))
28+
e3 = np.random.normal(0, 1 + inc, size=(500,))
29+
e4 = np.random.normal(0, 1 + 2*inc, size=(500,))
1130

1231
treatments = [e1,e2,e3,e4]
32+
med1, CI1 = fakeBootStrapper(1)
33+
med2, CI2 = fakeBootStrapper(2)
34+
medians = [None, None, med1, med2]
35+
conf_intervals = [None, None, CI1, CI2]
1336

1437
fig = plt.figure()
1538
ax = fig.add_subplot(111)
1639
pos = np.array(range(len(treatments)))+1
17-
bp = ax.boxplot( treatments, sym='k+', patch_artist=True,
18-
positions=pos, notch=1, bootstrap=5000 )
19-
text_transform= mtransforms.blended_transform_factory(ax.transData,
20-
ax.transAxes)
40+
bp = ax.boxplot(treatments, sym='k+', positions=pos,
41+
notch=1, bootstrap=5000,
42+
usermedians=medians,
43+
conf_intervals=conf_intervals)
44+
2145
ax.set_xlabel('treatment')
2246
ax.set_ylabel('response')
23-
ax.set_ylim(-0.2, 1.4)
2447
plt.setp(bp['whiskers'], color='k', linestyle='-' )
2548
plt.setp(bp['fliers'], markersize=3.0)
26-
fig.subplots_adjust(right=0.99,top=0.99)
2749
plt.show()

‎lib/matplotlib/axes.py

Copy file name to clipboardExpand all lines: lib/matplotlib/axes.py
+152-80Lines changed: 152 additions & 80 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,6 @@
3636
import matplotlib.ticker as mticker
3737
import matplotlib.transforms as mtransforms
3838
import matplotlib.tri as mtri
39-
4039
from matplotlib.container import BarContainer, ErrorbarContainer, StemContainer
4140

4241
iterable = cbook.iterable
@@ -5470,14 +5469,15 @@ def xywhere(xs, ys, mask):
54705469

54715470
return errorbar_container # (l0, caplines, barcols)
54725471

5473-
def boxplot(self, x, notch=0, sym='b+', vert=1, whis=1.5,
5472+
def boxplot(self, x, notch=False, sym='b+', vert=True, whis=1.5,
54745473
positions=None, widths=None, patch_artist=False,
5475-
bootstrap=None):
5474+
bootstrap=None, usermedians=None, conf_intervals=None):
54765475
"""
54775476
Call signature::
54785477
5479-
boxplot(x, notch=0, sym='+', vert=1, whis=1.5,
5480-
positions=None, widths=None, patch_artist=False)
5478+
boxplot(x, notch=False, sym='+', vert=True, whis=1.5,
5479+
positions=None, widths=None, patch_artist=False,
5480+
bootstrap=None, usermedians=None, conf_intervals=None)
54815481
54825482
Make a box and whisker plot for each column of *x* or each
54835483
vector in sequence *x*. The box extends from the lower to
@@ -5490,59 +5490,110 @@ def boxplot(self, x, notch=0, sym='b+', vert=1, whis=1.5,
54905490
*x* :
54915491
Array or a sequence of vectors.
54925492
5493-
*notch* : [ 0 (default) | 1]
5494-
If 0, produce a rectangular box plot.
5495-
If 1, produce a notched box plot
5493+
*notch* : [ False (default) | True ]
5494+
If False (default), produces a rectangular box plot.
5495+
If True, will produce a notched box plot
54965496
5497-
*sym* :
5498-
(default 'b+') is the default symbol for flier points.
5497+
*sym* : [ default 'b+' ]
5498+
The default symbol for flier points.
54995499
Enter an empty string ('') if you don't want to show fliers.
55005500
5501-
*vert* : [1 (default) | 0]
5502-
If 1, make the boxes vertical.
5503-
If 0, make horizontal boxes. (Odd, but kept for compatibility
5504-
with MATLAB boxplots)
5501+
*vert* : [ False | True (default) ]
5502+
If True (default), makes the boxes vertical.
5503+
If False, makes horizontal boxes.
55055504
5506-
*whis* : (default 1.5)
5507-
Defines the length of the whiskers as
5508-
a function of the inner quartile range. They extend to the
5509-
most extreme data point within ( ``whis*(75%-25%)`` ) data range.
5505+
*whis* : [ default 1.5 ]
5506+
Defines the length of the whiskers as a function of the inner
5507+
quartile range. They extend to the most extreme data point
5508+
within ( ``whis*(75%-25%)`` ) data range.
55105509
55115510
*bootstrap* : [ *None* (default) | integer ]
55125511
Specifies whether to bootstrap the confidence intervals
5513-
around the median for notched boxplots. If *None*, no
5514-
bootstrapping is performed, and notches are calculated
5515-
using a Gaussian-based asymptotic approximation
5516-
(see McGill, R., Tukey, J.W., and Larsen, W.A.,
5517-
1978, and Kendall and Stuart, 1967). Otherwise, bootstrap
5518-
specifies the number of times to bootstrap the median to
5519-
determine its 95% confidence intervals. Values between 1000
5520-
and 10000 are recommended.
5521-
5522-
*positions* : (default 1,2,...,n)
5523-
Sets the horizontal positions of
5524-
the boxes. The ticks and limits are automatically set to match
5525-
the positions.
5526-
5527-
*widths* : [ scalar | array ]
5528-
Either a scalar or a vector to set the width of each box.
5529-
The default is 0.5, or ``0.15*(distance between extreme
5530-
positions)`` if that is smaller.
5531-
5532-
*patch_artist* : boolean
5533-
If *False* (default), produce boxes with the
5534-
:class:`~matplotlib.lines.Line2D` artist.
5535-
If *True*, produce boxes with the
5536-
:class:`~matplotlib.patches.Patch` artist.
5512+
around the median for notched boxplots. If bootstrap==None,
5513+
no bootstrapping is performed, and notches are calculated
5514+
using a Gaussian-based asymptotic approximation (see McGill, R.,
5515+
Tukey, J.W., and Larsen, W.A., 1978, and Kendall and Stuart,
5516+
1967). Otherwise, bootstrap specifies the number of times to
5517+
bootstrap the median to determine it's 95% confidence intervals.
5518+
Values between 1000 and 10000 are recommended.
5519+
5520+
*usermedians* : [ default None ]
5521+
An array or sequence whose first dimension (or length) is
5522+
compatible with *x*. This overrides the medians computed by
5523+
matplotlib for each element of *usermedians* that is not None.
5524+
When an element of *usermedians* == None, the median will be
5525+
computed directly as normal.
5526+
5527+
*conf_intervals* : [ default None ]
5528+
Array or sequence whose first dimension (or length) is compatible
5529+
with *x* and whose second dimension is 2. When the current element
5530+
of *conf_intervals* is not None, the notch locations computed by
5531+
matplotlib are overridden (assuming notch is True). When an element of
5532+
*conf_intervals* is None, boxplot compute notches the method
5533+
specified by the other kwargs (e.g. *bootstrap*).
5534+
5535+
*positions* : [ default 1,2,...,n ]
5536+
Sets the horizontal positions of the boxes. The ticks and limits
5537+
are automatically set to match the positions.
5538+
5539+
*widths* : [ default 0.5 ]
5540+
Either a scalar or a vector and sets the width of each box. The
5541+
default is 0.5, or ``0.15*(distance between extreme positions)``
5542+
if that is smaller.
5543+
5544+
*patch_artist* : [ False (default) | True ]
5545+
If False produces boxes with the Line2D artist
5546+
If True produces boxes with the Patch artist
55375547
55385548
Returns a dictionary mapping each component of the boxplot
5539-
to a list of the :class:`~matplotlib.lines.Line2D`
5540-
instances created (unless *patch_artist* was *True*. See above.).
5549+
to a list of the :class:`matplotlib.lines.Line2D`
5550+
instances created. That disctionary has the following keys
5551+
(assuming vertical boxplots):
5552+
boxes: the main body of the boxplot showing the quartiles
5553+
and the median's confidence intervals if enabled.
5554+
medians: horizonal lines at the median of each box.
5555+
whiskers: the vertical lines extending to the most extreme,
5556+
non-outlier data points.
5557+
caps: the horizontal lines at the ends of the whiskers.
5558+
fliers: points representing data that extend beyone the
5559+
whiskers (outliers).
5560+
55415561
55425562
**Example:**
55435563
55445564
.. plot:: pyplots/boxplot_demo.py
55455565
"""
5566+
def bootstrapMedian(data, N=5000):
5567+
# determine 95% confidence intervals of the median
5568+
M = len(data)
5569+
percentile = [2.5,97.5]
5570+
estimate = np.zeros(N)
5571+
for n in range(N):
5572+
bsIndex = np.random.random_integers(0,M-1,M)
5573+
bsData = data[bsIndex]
5574+
estimate[n] = mlab.prctile(bsData, 50)
5575+
CI = mlab.prctile(estimate, percentile)
5576+
return CI
5577+
5578+
def computeConfInterval(data, med, iq, bootstrap):
5579+
if bootstrap is not None:
5580+
# Do a bootstrap estimate of notch locations.
5581+
# get conf. intervals around median
5582+
CI = bootstrapMedian(data, N=bootstrap)
5583+
notch_min = CI[0]
5584+
notch_max = CI[1]
5585+
else:
5586+
# Estimate notch locations using Gaussian-based
5587+
# asymptotic approximation.
5588+
#
5589+
# For discussion: McGill, R., Tukey, J.W.,
5590+
# and Larsen, W.A. (1978) "Variations of
5591+
# Boxplots", The American Statistician, 32:12-16.
5592+
N = len(data)
5593+
notch_min = med - 1.57*iq/np.sqrt(N)
5594+
notch_max = med + 1.57*iq/np.sqrt(N)
5595+
return notch_min, notch_max
5596+
55465597
if not self._hold: self.cla()
55475598
holdStatus = self._hold
55485599
whiskers, caps, boxes, medians, fliers = [], [], [], [], []
@@ -5568,6 +5619,38 @@ def boxplot(self, x, notch=0, sym='b+', vert=1, whis=1.5,
55685619
x = [x]
55695620
col = len(x)
55705621

5622+
# sanitize user-input medians
5623+
msg1 = "usermedians must either be a list/tuple or a 1d array"
5624+
msg2 = "usermedians' length must be compatible with x"
5625+
if usermedians is not None:
5626+
if hasattr(usermedians, 'shape'):
5627+
if len(usermedians.shape) != 1:
5628+
raise ValueError(msg1)
5629+
elif usermedians.shape[0] != col:
5630+
raise ValueError(msg2)
5631+
elif len(usermedians) != col:
5632+
raise ValueError(msg2)
5633+
5634+
#sanitize user-input confidence intervals
5635+
msg1 = "conf_intervals must either be a list of tuples or a 2d array"
5636+
msg2 = "conf_intervals' length must be compatible with x"
5637+
msg3 = "each conf_interval, if specificied, must have two values"
5638+
if conf_intervals is not None:
5639+
if hasattr(conf_intervals, 'shape'):
5640+
if len(conf_intervals.shape) != 2:
5641+
raise ValueError(msg1)
5642+
elif conf_intervals.shape[0] != col:
5643+
raise ValueError(msg2)
5644+
elif conf_intervals.shape[1] == 2:
5645+
raise ValueError(msg3)
5646+
else:
5647+
if len(conf_intervals) != col:
5648+
raise ValueError(msg2)
5649+
for ci in conf_intervals:
5650+
if ci is not None and len(ci) != 2:
5651+
raise ValueError(msg3)
5652+
5653+
55715654
# get some plot info
55725655
if positions is None:
55735656
positions = range(1, col + 1)
@@ -5579,14 +5662,21 @@ def boxplot(self, x, notch=0, sym='b+', vert=1, whis=1.5,
55795662

55805663
# loop through columns, adding each to plot
55815664
self.hold(True)
5582-
for i,pos in enumerate(positions):
5665+
for i, pos in enumerate(positions):
55835666
d = np.ravel(x[i])
55845667
row = len(d)
55855668
if row==0:
55865669
# no data, skip this position
55875670
continue
5671+
55885672
# get median and quartiles
55895673
q1, med, q3 = mlab.prctile(d,[25,50,75])
5674+
5675+
# replace with input medians if available
5676+
if usermedians is not None:
5677+
if usermedians[i] is not None:
5678+
med = usermedians[i]
5679+
55905680
# get high extreme
55915681
iq = q3 - q1
55925682
hi_val = q3 + whis*iq
@@ -5626,42 +5716,16 @@ def boxplot(self, x, notch=0, sym='b+', vert=1, whis=1.5,
56265716
# get y location for median
56275717
med_y = [med, med]
56285718

5629-
# calculate 'regular' plot
5630-
if notch == 0:
5631-
# make our box vectors
5632-
box_x = [box_x_min, box_x_max, box_x_max, box_x_min, box_x_min ]
5633-
box_y = [q1, q1, q3, q3, q1 ]
5634-
# make our median line vectors
5635-
med_x = [box_x_min, box_x_max]
56365719
# calculate 'notch' plot
5637-
else:
5638-
if bootstrap is not None:
5639-
# Do a bootstrap estimate of notch locations.
5640-
def bootstrapMedian(data, N=5000):
5641-
# determine 95% confidence intervals of the median
5642-
M = len(data)
5643-
percentile = [2.5,97.5]
5644-
estimate = np.zeros(N)
5645-
for n in range(N):
5646-
bsIndex = np.random.random_integers(0,M-1,M)
5647-
bsData = data[bsIndex]
5648-
estimate[n] = mlab.prctile(bsData, 50)
5649-
CI = mlab.prctile(estimate, percentile)
5650-
return CI
5651-
5652-
# get conf. intervals around median
5653-
CI = bootstrapMedian(d, N=bootstrap)
5654-
notch_max = CI[1]
5655-
notch_min = CI[0]
5720+
if notch:
5721+
# conf. intervals from user, if available
5722+
if conf_intervals is not None and conf_intervals[i] is not None:
5723+
notch_max = np.max(conf_intervals[i])
5724+
notch_min = np.min(conf_intervals[i])
56565725
else:
5657-
# Estimate notch locations using Gaussian-based
5658-
# asymptotic approximation.
5659-
#
5660-
# For discussion: McGill, R., Tukey, J.W.,
5661-
# and Larsen, W.A. (1978) "Variations of
5662-
# Boxplots", The American Statistician, 32:12-16.
5663-
notch_max = med + 1.57*iq/np.sqrt(row)
5664-
notch_min = med - 1.57*iq/np.sqrt(row)
5726+
notch_min, notch_max = computeConfInterval(d, med, iq,
5727+
bootstrap)
5728+
56655729
# make our notched box vectors
56665730
box_x = [box_x_min, box_x_max, box_x_max, cap_x_max, box_x_max,
56675731
box_x_max, box_x_min, box_x_min, cap_x_min, box_x_min,
@@ -5671,6 +5735,13 @@ def bootstrapMedian(data, N=5000):
56715735
# make our median line vectors
56725736
med_x = [cap_x_min, cap_x_max]
56735737
med_y = [med, med]
5738+
# calculate 'regular' plot
5739+
else:
5740+
# make our box vectors
5741+
box_x = [box_x_min, box_x_max, box_x_max, box_x_min, box_x_min ]
5742+
box_y = [q1, q1, q3, q3, q1 ]
5743+
# make our median line vectors
5744+
med_x = [box_x_min, box_x_max]
56745745

56755746
def to_vc(xs,ys):
56765747
# convert arguments to verts and codes
@@ -5720,12 +5791,13 @@ def dopatch(xs,ys):
57205791
boxes.extend(dopatch(box_x, box_y))
57215792
else:
57225793
boxes.extend(doplot(box_x, box_y, 'b-'))
5794+
57235795
medians.extend(doplot(med_x, med_y, median_color+'-'))
57245796
fliers.extend(doplot(flier_hi_x, flier_hi, sym,
57255797
flier_lo_x, flier_lo, sym))
57265798

57275799
# fix our axes/ticks up a little
5728-
if 1 == vert:
5800+
if vert:
57295801
setticks, setlim = self.set_xticks, self.set_xlim
57305802
else:
57315803
setticks, setlim = self.set_yticks, self.set_ylim

0 commit comments

Comments
0 (0)
Morty Proxy This is a proxified and sanitized view of the page, visit original site.