Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

Commit a3b2482

Browse filesBrowse files
khchansolvents
authored andcommitted
Fixed bug for regression test matplotlib#1181 in scipy unit tests; ksdensity is now referred to as gaussian_kde and exists as a class in mlab.
Fixed list comp position bug and updated examples
1 parent b1358df commit a3b2482
Copy full SHA for a3b2482

File tree

Expand file treeCollapse file tree

3 files changed

+149
-122
lines changed
Filter options
Expand file treeCollapse file tree

3 files changed

+149
-122
lines changed

‎examples/statistics/violinplot_demo.py

Copy file name to clipboardExpand all lines: examples/statistics/violinplot_demo.py
+13-7Lines changed: 13 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88

99
# fake data
1010
fs = 10 # fontsize
11-
pos = range(5)
11+
pos = [1,2,4,5,7,8]
1212
data = [np.random.normal(size=100) for i in pos]
1313

1414
# TODO: future customizability dicts go here
@@ -25,22 +25,28 @@
2525

2626
fig, axes = plt.subplots(nrows=2, ncols=3, figsize=(6,6))
2727

28-
axes[0, 0].violinplot(data, pos, width=0.1)
28+
axes[0, 0].violinplot(data, pos, points=20, widths=0.1, showmeans=True,
29+
showextrema=True, showmedians=True)
2930
axes[0, 0].set_title('Custom violinplot 1', fontsize=fs)
3031

31-
axes[0, 1].violinplot(data, pos, width=0.3)
32+
axes[0, 1].violinplot(data, pos, points=40, widths=0.3, showmeans=True,
33+
showextrema=True, showmedians=True)
3234
axes[0, 1].set_title('Custom violinplot 2', fontsize=fs)
3335

34-
axes[0, 2].violinplot(data, pos, width=0.5)
36+
axes[0, 2].violinplot(data, pos, points=60, widths=0.5, showmeans=True,
37+
showextrema=True, showmedians=True)
3538
axes[0, 2].set_title('Custom violinplot 3', fontsize=fs)
3639

37-
axes[1, 0].violinplot(data, pos, width=0.7)
40+
axes[1, 0].violinplot(data, pos, points=80, vert=False, widths=0.7,
41+
showmeans=True, showextrema=True, showmedians=True)
3842
axes[1, 0].set_title('Custom violinplot 4', fontsize=fs)
3943

40-
axes[1, 1].violinplot(data, pos, width=0.9)
44+
axes[1, 1].violinplot(data, pos, points=100, vert=False, widths=0.9,
45+
showmeans=True, showextrema=True, showmedians=True)
4146
axes[1, 1].set_title('Custom violinplot 5', fontsize=fs)
4247

43-
axes[1, 2].violinplot(data, pos, width=1.1)
48+
axes[1, 2].violinplot(data, pos, points=200, vert=False, widths=1.1,
49+
showmeans=True, showextrema=True, showmedians=True)
4450
axes[1, 2].set_title('Custom violinplot 6', fontsize=fs)
4551

4652
for ax in axes.flatten():

‎lib/matplotlib/axes/_axes.py

Copy file name to clipboardExpand all lines: lib/matplotlib/axes/_axes.py
+19-16Lines changed: 19 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -6651,7 +6651,7 @@ def matshow(self, Z, **kwargs):
66516651
integer=True))
66526652
return im
66536653

6654-
def violinplot(self, dataset, positions=None, vert=True, widths=0.5, showmeans=False,
6654+
def violinplot(self, dataset, positions=None, points=100, vert=True, widths=0.5, showmeans=False,
66556655
showextrema=True, showmedians=False):
66566656
"""
66576657
Make a violin plot.
@@ -6674,6 +6674,9 @@ def violinplot(self, dataset, positions=None, vert=True, widths=0.5, showmeans=F
66746674
positions : array-like, default = [1, 2, ..., n]
66756675
Sets the positions of the violins. The ticks and limits are
66766676
automatically set to match the positions.
6677+
6678+
points: array-like, default = 100
6679+
Number of points to evaluate pdf estimation for Gaussian kernel
66776680
66786681
vert : bool, default = True.
66796682
If true, creates vertical violin plot
@@ -6732,6 +6735,9 @@ def violinplot(self, dataset, positions=None, vert=True, widths=0.5, showmeans=F
67326735
cbars = None
67336736
cmedians = None
67346737

6738+
datashape_message = ("List of violinplot statistics and `{0}` "
6739+
"values must have same the length")
6740+
67356741
# Validate positions
67366742
if positions == None:
67376743
positions = range(1, len(dataset) + 1)
@@ -6756,13 +6762,14 @@ def violinplot(self, dataset, positions=None, vert=True, widths=0.5, showmeans=F
67566762
# Render violins
67576763
for d,p,w in zip(dataset,positions,widths):
67586764
# Calculate the kernel density
6759-
kde = mlab.ksdensity(d)
6760-
m = kde['xmin']
6761-
M = kde['xmax']
6762-
mean = kde['mean']
6763-
median = kde['median']
6764-
v = kde['result']
6765-
coords = np.arange(m,M,(M-m)/100.)
6765+
kde = mlab.gaussian_kde(d)
6766+
m = kde.dataset.min()
6767+
M = kde.dataset.max()
6768+
mean = np.mean(kde.dataset)
6769+
median = np.median(kde.dataset)
6770+
coords = np.arange(m,M,(M-m)/float(points))
6771+
6772+
v = kde.evaluate(coords)
67666773

67676774
# Since each data point p is plotted from v-p to v+p,
67686775
# we need to scale it by an additional 0.5 factor so that we get
@@ -6772,10 +6779,10 @@ def violinplot(self, dataset, positions=None, vert=True, widths=0.5, showmeans=F
67726779
# create vertical violin plot
67736780
if vert:
67746781
bodies += [self.fill_betweenx(coords,
6775-
-v+p,
6776-
v+p,
6777-
facecolor='y',
6778-
alpha=0.3)]
6782+
-v+p,
6783+
v+p,
6784+
facecolor='y',
6785+
alpha=0.3)]
67796786
# create horizontal violin plot
67806787
else:
67816788
bodies += [self.fill_between(coords,
@@ -6821,10 +6828,6 @@ def violinplot(self, dataset, positions=None, vert=True, widths=0.5, showmeans=F
68216828
if showmedians:
68226829
cmedians = self.vlines(medians, pmins, pmaxes, colors='r')
68236830

6824-
6825-
6826-
6827-
68286831
# Reset hold
68296832
self.hold(holdStatus)
68306833

‎lib/matplotlib/mlab.py

Copy file name to clipboardExpand all lines: lib/matplotlib/mlab.py
+117-99Lines changed: 117 additions & 99 deletions
Original file line numberDiff line numberDiff line change
@@ -3672,12 +3672,12 @@ def stineman_interp(xi,x,y,yp=None):
36723672
1/(dy1+dy2),))
36733673
return yi
36743674

3675-
def ksdensity(dataset, bw_method=None):
3675+
class gaussian_kde(object):
36763676
"""
36773677
Representation of a kernel-density estimate using Gaussian kernels.
36783678
36793679
Call signature::
3680-
kde_dict = ksdensity(dataset, 'silverman')
3680+
kde = gaussian_kde(dataset, 'silverman')
36813681
36823682
Parameters
36833683
----------
@@ -3692,10 +3692,10 @@ def ksdensity(dataset, bw_method=None):
36923692
Attributes
36933693
----------
36943694
dataset : ndarray
3695-
The dataset with which `ksdensity` was initialized.
3696-
d : int
3695+
The dataset with which `gaussian_kde` was initialized.
3696+
dim : int
36973697
Number of dimensions.
3698-
n : int
3698+
num_dp : int
36993699
Number of datapoints.
37003700
factor : float
37013701
The bandwidth factor, obtained from `kde.covariance_factor`, with which
@@ -3706,117 +3706,135 @@ def ksdensity(dataset, bw_method=None):
37063706
inv_cov : ndarray
37073707
The inverse of `covariance`.
37083708
3709-
Returns
3709+
Methods
37103710
-------
3711-
A dictionary mapping each various aspects of the computed KDE.
3712-
The dictionary has the following keys:
3713-
3714-
xmin : number
3715-
The min of the input dataset
3716-
xmax : number
3717-
The max of the input dataset
3718-
mean : number
3719-
The mean of the result
3720-
median: number
3721-
The median of the result
3722-
result: (# of points,)-array
3723-
The array of the evaluated PDF estimation
3724-
3725-
Raises
3726-
------
3727-
ValueError : if the dimensionality of the input points is different than
3728-
the dimensionality of the KDE.
3711+
kde.evaluate(points) : ndarray
3712+
Evaluate the estimated pdf on a provided set of points.
3713+
kde(points) : ndarray
3714+
Same as kde.evaluate(points)
3715+
kde.set_bandwidth(bw_method='scott') : None
3716+
Computes the bandwidth, i.e. the coefficient that multiplies the data
3717+
covariance matrix to obtain the kernel covariance matrix.
3718+
.. versionadded:: 0.11.0
3719+
kde.covariance_factor : float
3720+
Computes the coefficient (`kde.factor`) that multiplies the data
3721+
covariance matrix to obtain the kernel covariance matrix.
3722+
The default is `scotts_factor`. A subclass can overwrite this method
3723+
to provide a different method, or set it through a call to
3724+
`kde.set_bandwidth`.
37293725
37303726
"""
37313727

37323728
# This implementation with minor modification was too good to pass up.
37333729
# from scipy: https://github.com/scipy/scipy/blob/master/scipy/stats/kde.py
37343730

3735-
dataset = np.array(np.atleast_2d(dataset))
3736-
xmin = dataset.min()
3737-
xmax = dataset.max()
3731+
def __init__(self, dataset, bw_method=None):
3732+
self.dataset = np.atleast_2d(dataset)
3733+
if not self.dataset.size > 1:
3734+
raise ValueError("`dataset` input should have multiple elements.")
37383735

3739-
if not dataset.size > 1:
3740-
raise ValueError("`dataset` input should have multiple elements.")
3736+
self.dim, self.num_dp = self.dataset.shape
3737+
self.set_bandwidth(bw_method=bw_method)
37413738

3742-
dim, num_dp = dataset.shape
3739+
def scotts_factor(self):
3740+
return np.power(self.num_dp, -1./(self.dim+4))
37433741

3744-
# ----------------------------------------------
3745-
# Set Bandwidth, defaulted to Scott's Factor
3746-
# ----------------------------------------------
3747-
scotts_factor = lambda: np.power(num_dp, -1./(dim+4))
3748-
silverman_factor = lambda: np.power(num_dp*(dim+2.0)/4.0, -1./(dim+4))
3742+
def silverman_factor(self):
3743+
return np.power(self.num_dp*(self.dim+2.0)/4.0, -1./(self.dim+4))
37493744

3750-
# Default method to calculate bandwidth, can be overwritten by subclass
3745+
# Default method to calculate bandwidth, can be overwritten by subclass
37513746
covariance_factor = scotts_factor
37523747

3753-
if bw_method is None:
3754-
pass
3755-
elif bw_method == 'scott':
3756-
covariance_factor = scotts_factor
3757-
elif bw_method == 'silverman':
3758-
covariance_factor = silverman_factor
3759-
elif np.isscalar(bw_method) and not isinstance(bw_method, six.string_types):
3760-
covariance_factor = lambda: bw_method
3761-
else:
3762-
msg = "`bw_method` should be 'scott', 'silverman', or a scalar"
3763-
raise ValueError(msg)
3764-
3765-
# ---------------------------------------------------------------
3766-
# Computes covariance matrix for each Gaussian kernel with factor
3767-
# ---------------------------------------------------------------
3768-
factor = covariance_factor()
3769-
3770-
# Cache covariance and inverse covariance of the data
3771-
data_covariance = np.atleast_2d(np.cov(dataset, rowvar=1, bias=False))
3772-
data_inv_cov = np.linalg.inv(data_covariance)
3773-
3774-
covariance = data_covariance * factor**2
3775-
inv_cov = data_inv_cov / factor**2
3776-
norm_factor = np.sqrt(np.linalg.det(2*np.pi*covariance)) * num_dp
3777-
3778-
# ----------------------------------------------
3779-
# Evaluate the estimated pdf on a set of points.
3780-
# ----------------------------------------------
3781-
points = np.atleast_2d(np.arange(xmin, xmax, (xmax-xmin)/100.))
3782-
3783-
dim_pts, num_dp_pts = np.array(points).shape
3784-
if dim_pts != dim:
3785-
if dim_pts == 1 and num_dp_pts == num_dp:
3786-
# points was passed in as a row vector
3787-
points = np.reshape(points, (dim, 1))
3788-
num_dp_pts = 1
3748+
def set_bandwidth(self, bw_method=None):
3749+
if bw_method is None:
3750+
pass
3751+
elif bw_method == 'scott':
3752+
self.covariance_factor = self.scotts_factor
3753+
elif bw_method == 'silverman':
3754+
self.covariance_factor = self.silverman_factor
3755+
elif np.isscalar(bw_method) and not isinstance(bw_method, six.string_types):
3756+
self._bw_method = 'use constant'
3757+
self.covariance_factor = lambda: bw_method
3758+
elif callable(bw_method):
3759+
self._bw_method = bw_method
3760+
self.covariance_factor = lambda: self._bw_method(self)
37893761
else:
3790-
msg = "points have dimension %s,\
3791-
dataset has dimension %s" % (dim_pts, dim)
3762+
msg = "`bw_method` should be 'scott', 'silverman', a scalar " \
3763+
"or a callable."
37923764
raise ValueError(msg)
37933765

3794-
result = np.zeros((num_dp_pts,), dtype=np.float)
3766+
self._compute_covariance()
37953767

3796-
if num_dp_pts >= num_dp:
3797-
# there are more points than data, so loop over data
3798-
for i in range(num_dp):
3799-
diff = dataset[:, i, np.newaxis] - points
3800-
tdiff = np.dot(inv_cov, diff)
3801-
energy = np.sum(diff*tdiff, axis=0) / 2.0
3802-
result = result + np.exp(-energy)
3803-
else:
3804-
# loop over points
3805-
for i in range(num_dp_pts):
3806-
diff = dataset - points[:, i, np.newaxis]
3807-
tdiff = np.dot(inv_cov, diff)
3808-
energy = np.sum(diff * tdiff, axis=0) / 2.0
3809-
result[i] = np.sum(np.exp(-energy), axis=0)
3810-
3811-
result = result / norm_factor
3812-
3813-
return {
3814-
'xmin': xmin,
3815-
'xmax': xmax,
3816-
'mean': np.mean(dataset),
3817-
'median': np.median(dataset),
3818-
'result': result
3819-
}
3768+
def _compute_covariance(self):
3769+
"""Computes the covariance matrix for each Gaussian kernel using
3770+
covariance_factor().
3771+
"""
3772+
self.factor = self.covariance_factor()
3773+
# Cache covariance and inverse covariance of the data
3774+
if not hasattr(self, '_data_inv_cov'):
3775+
self._data_covariance = np.atleast_2d(np.cov(self.dataset, rowvar=1,
3776+
bias=False))
3777+
self._data_inv_cov = np.linalg.inv(self._data_covariance)
3778+
3779+
self.covariance = self._data_covariance * self.factor**2
3780+
self.inv_cov = self._data_inv_cov / self.factor**2
3781+
self._norm_factor = np.sqrt(np.linalg.det(2*np.pi*self.covariance)) * self.num_dp
3782+
3783+
def evaluate(self, points):
3784+
"""Evaluate the estimated pdf on a set of points.
3785+
3786+
Parameters
3787+
----------
3788+
points : (# of dimensions, # of points)-array
3789+
Alternatively, a (# of dimensions,) vector can be passed in and
3790+
treated as a single point.
3791+
3792+
Returns
3793+
-------
3794+
values : (# of points,)-array
3795+
The values at each point.
3796+
3797+
Raises
3798+
------
3799+
ValueError : if the dimensionality of the input points is different than
3800+
the dimensionality of the KDE.
3801+
3802+
"""
3803+
points = np.atleast_2d(points)
3804+
3805+
d, m = points.shape
3806+
if d != self.dim:
3807+
if d == 1 and m == self.dim:
3808+
# points was passed in as a row vector
3809+
points = np.reshape(points, (self.dim, 1))
3810+
m = 1
3811+
else:
3812+
msg = "points have dimension %s, dataset has dimension %s" % (d,
3813+
self.dim)
3814+
raise ValueError(msg)
3815+
3816+
result = np.zeros((m,), dtype=np.float)
3817+
3818+
if m >= self.num_dp:
3819+
# there are more points than data, so loop over data
3820+
for i in range(self.num_dp):
3821+
diff = self.dataset[:, i, np.newaxis] - points
3822+
tdiff = np.dot(self.inv_cov, diff)
3823+
energy = np.sum(diff*tdiff,axis=0) / 2.0
3824+
result = result + np.exp(-energy)
3825+
else:
3826+
# loop over points
3827+
for i in range(m):
3828+
diff = self.dataset - points[:, i, np.newaxis]
3829+
tdiff = np.dot(self.inv_cov, diff)
3830+
energy = np.sum(diff * tdiff, axis=0) / 2.0
3831+
result[i] = np.sum(np.exp(-energy), axis=0)
3832+
3833+
result = result / self._norm_factor
3834+
3835+
return result
3836+
3837+
__call__ = evaluate
38203838

38213839
##################################################
38223840
# Code related to things in and around polygons

0 commit comments

Comments
0 (0)
Morty Proxy This is a proxified and sanitized view of the page, visit original site.