Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

Commit c7ddf0e

Browse filesBrowse files
khchansolvents
authored andcommitted
Fixed bug for regression test matplotlib#1181 in scipy unit tests; ksdensity is now referred to as gaussian_kde and exists as a class in mlab.
Fixed list comp position bug and updated examples
1 parent 2b4c01c commit c7ddf0e
Copy full SHA for c7ddf0e

File tree

Expand file treeCollapse file tree

3 files changed

+149
-122
lines changed
Filter options
Expand file treeCollapse file tree

3 files changed

+149
-122
lines changed

‎examples/statistics/violinplot_demo.py

Copy file name to clipboardExpand all lines: examples/statistics/violinplot_demo.py
+13-7Lines changed: 13 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88

99
# fake data
1010
fs = 10 # fontsize
11-
pos = range(5)
11+
pos = [1,2,4,5,7,8]
1212
data = [np.random.normal(size=100) for i in pos]
1313

1414
# TODO: future customizability dicts go here
@@ -25,22 +25,28 @@
2525

2626
fig, axes = plt.subplots(nrows=2, ncols=3, figsize=(6,6))
2727

28-
axes[0, 0].violinplot(data, pos, width=0.1)
28+
axes[0, 0].violinplot(data, pos, points=20, widths=0.1, showmeans=True,
29+
showextrema=True, showmedians=True)
2930
axes[0, 0].set_title('Custom violinplot 1', fontsize=fs)
3031

31-
axes[0, 1].violinplot(data, pos, width=0.3)
32+
axes[0, 1].violinplot(data, pos, points=40, widths=0.3, showmeans=True,
33+
showextrema=True, showmedians=True)
3234
axes[0, 1].set_title('Custom violinplot 2', fontsize=fs)
3335

34-
axes[0, 2].violinplot(data, pos, width=0.5)
36+
axes[0, 2].violinplot(data, pos, points=60, widths=0.5, showmeans=True,
37+
showextrema=True, showmedians=True)
3538
axes[0, 2].set_title('Custom violinplot 3', fontsize=fs)
3639

37-
axes[1, 0].violinplot(data, pos, width=0.7)
40+
axes[1, 0].violinplot(data, pos, points=80, vert=False, widths=0.7,
41+
showmeans=True, showextrema=True, showmedians=True)
3842
axes[1, 0].set_title('Custom violinplot 4', fontsize=fs)
3943

40-
axes[1, 1].violinplot(data, pos, width=0.9)
44+
axes[1, 1].violinplot(data, pos, points=100, vert=False, widths=0.9,
45+
showmeans=True, showextrema=True, showmedians=True)
4146
axes[1, 1].set_title('Custom violinplot 5', fontsize=fs)
4247

43-
axes[1, 2].violinplot(data, pos, width=1.1)
48+
axes[1, 2].violinplot(data, pos, points=200, vert=False, widths=1.1,
49+
showmeans=True, showextrema=True, showmedians=True)
4450
axes[1, 2].set_title('Custom violinplot 6', fontsize=fs)
4551

4652
for ax in axes.flatten():

‎lib/matplotlib/axes/_axes.py

Copy file name to clipboardExpand all lines: lib/matplotlib/axes/_axes.py
+19-16Lines changed: 19 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -6739,7 +6739,7 @@ def matshow(self, Z, **kwargs):
67396739
integer=True))
67406740
return im
67416741

6742-
def violinplot(self, dataset, positions=None, vert=True, widths=0.5, showmeans=False,
6742+
def violinplot(self, dataset, positions=None, points=100, vert=True, widths=0.5, showmeans=False,
67436743
showextrema=True, showmedians=False):
67446744
"""
67456745
Make a violin plot.
@@ -6762,6 +6762,9 @@ def violinplot(self, dataset, positions=None, vert=True, widths=0.5, showmeans=F
67626762
positions : array-like, default = [1, 2, ..., n]
67636763
Sets the positions of the violins. The ticks and limits are
67646764
automatically set to match the positions.
6765+
6766+
points: array-like, default = 100
6767+
Number of points to evaluate pdf estimation for Gaussian kernel
67656768
67666769
vert : bool, default = True.
67676770
If true, creates vertical violin plot
@@ -6820,6 +6823,9 @@ def violinplot(self, dataset, positions=None, vert=True, widths=0.5, showmeans=F
68206823
cbars = None
68216824
cmedians = None
68226825

6826+
datashape_message = ("List of violinplot statistics and `{0}` "
6827+
"values must have same the length")
6828+
68236829
# Validate positions
68246830
if positions == None:
68256831
positions = range(1, len(dataset) + 1)
@@ -6844,13 +6850,14 @@ def violinplot(self, dataset, positions=None, vert=True, widths=0.5, showmeans=F
68446850
# Render violins
68456851
for d,p,w in zip(dataset,positions,widths):
68466852
# Calculate the kernel density
6847-
kde = mlab.ksdensity(d)
6848-
m = kde['xmin']
6849-
M = kde['xmax']
6850-
mean = kde['mean']
6851-
median = kde['median']
6852-
v = kde['result']
6853-
coords = np.arange(m,M,(M-m)/100.)
6853+
kde = mlab.gaussian_kde(d)
6854+
m = kde.dataset.min()
6855+
M = kde.dataset.max()
6856+
mean = np.mean(kde.dataset)
6857+
median = np.median(kde.dataset)
6858+
coords = np.arange(m,M,(M-m)/float(points))
6859+
6860+
v = kde.evaluate(coords)
68546861

68556862
# Since each data point p is plotted from v-p to v+p,
68566863
# we need to scale it by an additional 0.5 factor so that we get
@@ -6860,10 +6867,10 @@ def violinplot(self, dataset, positions=None, vert=True, widths=0.5, showmeans=F
68606867
# create vertical violin plot
68616868
if vert:
68626869
bodies += [self.fill_betweenx(coords,
6863-
-v+p,
6864-
v+p,
6865-
facecolor='y',
6866-
alpha=0.3)]
6870+
-v+p,
6871+
v+p,
6872+
facecolor='y',
6873+
alpha=0.3)]
68676874
# create horizontal violin plot
68686875
else:
68696876
bodies += [self.fill_between(coords,
@@ -6909,10 +6916,6 @@ def violinplot(self, dataset, positions=None, vert=True, widths=0.5, showmeans=F
69096916
if showmedians:
69106917
cmedians = self.vlines(medians, pmins, pmaxes, colors='r')
69116918

6912-
6913-
6914-
6915-
69166919
# Reset hold
69176920
self.hold(holdStatus)
69186921

‎lib/matplotlib/mlab.py

Copy file name to clipboardExpand all lines: lib/matplotlib/mlab.py
+117-99Lines changed: 117 additions & 99 deletions
Original file line numberDiff line numberDiff line change
@@ -3656,12 +3656,12 @@ def stineman_interp(xi,x,y,yp=None):
36563656
1/(dy1+dy2),))
36573657
return yi
36583658

3659-
def ksdensity(dataset, bw_method=None):
3659+
class gaussian_kde(object):
36603660
"""
36613661
Representation of a kernel-density estimate using Gaussian kernels.
36623662
36633663
Call signature::
3664-
kde_dict = ksdensity(dataset, 'silverman')
3664+
kde = gaussian_kde(dataset, 'silverman')
36653665
36663666
Parameters
36673667
----------
@@ -3676,10 +3676,10 @@ def ksdensity(dataset, bw_method=None):
36763676
Attributes
36773677
----------
36783678
dataset : ndarray
3679-
The dataset with which `ksdensity` was initialized.
3680-
d : int
3679+
The dataset with which `gaussian_kde` was initialized.
3680+
dim : int
36813681
Number of dimensions.
3682-
n : int
3682+
num_dp : int
36833683
Number of datapoints.
36843684
factor : float
36853685
The bandwidth factor, obtained from `kde.covariance_factor`, with which
@@ -3690,117 +3690,135 @@ def ksdensity(dataset, bw_method=None):
36903690
inv_cov : ndarray
36913691
The inverse of `covariance`.
36923692
3693-
Returns
3693+
Methods
36943694
-------
3695-
A dictionary mapping each various aspects of the computed KDE.
3696-
The dictionary has the following keys:
3697-
3698-
xmin : number
3699-
The min of the input dataset
3700-
xmax : number
3701-
The max of the input dataset
3702-
mean : number
3703-
The mean of the result
3704-
median: number
3705-
The median of the result
3706-
result: (# of points,)-array
3707-
The array of the evaluated PDF estimation
3708-
3709-
Raises
3710-
------
3711-
ValueError : if the dimensionality of the input points is different than
3712-
the dimensionality of the KDE.
3695+
kde.evaluate(points) : ndarray
3696+
Evaluate the estimated pdf on a provided set of points.
3697+
kde(points) : ndarray
3698+
Same as kde.evaluate(points)
3699+
kde.set_bandwidth(bw_method='scott') : None
3700+
Computes the bandwidth, i.e. the coefficient that multiplies the data
3701+
covariance matrix to obtain the kernel covariance matrix.
3702+
.. versionadded:: 0.11.0
3703+
kde.covariance_factor : float
3704+
Computes the coefficient (`kde.factor`) that multiplies the data
3705+
covariance matrix to obtain the kernel covariance matrix.
3706+
The default is `scotts_factor`. A subclass can overwrite this method
3707+
to provide a different method, or set it through a call to
3708+
`kde.set_bandwidth`.
37133709
37143710
"""
37153711

37163712
# This implementation with minor modification was too good to pass up.
37173713
# from scipy: https://github.com/scipy/scipy/blob/master/scipy/stats/kde.py
37183714

3719-
dataset = np.array(np.atleast_2d(dataset))
3720-
xmin = dataset.min()
3721-
xmax = dataset.max()
3715+
def __init__(self, dataset, bw_method=None):
3716+
self.dataset = np.atleast_2d(dataset)
3717+
if not self.dataset.size > 1:
3718+
raise ValueError("`dataset` input should have multiple elements.")
37223719

3723-
if not dataset.size > 1:
3724-
raise ValueError("`dataset` input should have multiple elements.")
3720+
self.dim, self.num_dp = self.dataset.shape
3721+
self.set_bandwidth(bw_method=bw_method)
37253722

3726-
dim, num_dp = dataset.shape
3723+
def scotts_factor(self):
3724+
return np.power(self.num_dp, -1./(self.dim+4))
37273725

3728-
# ----------------------------------------------
3729-
# Set Bandwidth, defaulted to Scott's Factor
3730-
# ----------------------------------------------
3731-
scotts_factor = lambda: np.power(num_dp, -1./(dim+4))
3732-
silverman_factor = lambda: np.power(num_dp*(dim+2.0)/4.0, -1./(dim+4))
3726+
def silverman_factor(self):
3727+
return np.power(self.num_dp*(self.dim+2.0)/4.0, -1./(self.dim+4))
37333728

3734-
# Default method to calculate bandwidth, can be overwritten by subclass
3729+
# Default method to calculate bandwidth, can be overwritten by subclass
37353730
covariance_factor = scotts_factor
37363731

3737-
if bw_method is None:
3738-
pass
3739-
elif bw_method == 'scott':
3740-
covariance_factor = scotts_factor
3741-
elif bw_method == 'silverman':
3742-
covariance_factor = silverman_factor
3743-
elif np.isscalar(bw_method) and not isinstance(bw_method, six.string_types):
3744-
covariance_factor = lambda: bw_method
3745-
else:
3746-
msg = "`bw_method` should be 'scott', 'silverman', or a scalar"
3747-
raise ValueError(msg)
3748-
3749-
# ---------------------------------------------------------------
3750-
# Computes covariance matrix for each Gaussian kernel with factor
3751-
# ---------------------------------------------------------------
3752-
factor = covariance_factor()
3753-
3754-
# Cache covariance and inverse covariance of the data
3755-
data_covariance = np.atleast_2d(np.cov(dataset, rowvar=1, bias=False))
3756-
data_inv_cov = np.linalg.inv(data_covariance)
3757-
3758-
covariance = data_covariance * factor**2
3759-
inv_cov = data_inv_cov / factor**2
3760-
norm_factor = np.sqrt(np.linalg.det(2*np.pi*covariance)) * num_dp
3761-
3762-
# ----------------------------------------------
3763-
# Evaluate the estimated pdf on a set of points.
3764-
# ----------------------------------------------
3765-
points = np.atleast_2d(np.arange(xmin, xmax, (xmax-xmin)/100.))
3766-
3767-
dim_pts, num_dp_pts = np.array(points).shape
3768-
if dim_pts != dim:
3769-
if dim_pts == 1 and num_dp_pts == num_dp:
3770-
# points was passed in as a row vector
3771-
points = np.reshape(points, (dim, 1))
3772-
num_dp_pts = 1
3732+
def set_bandwidth(self, bw_method=None):
3733+
if bw_method is None:
3734+
pass
3735+
elif bw_method == 'scott':
3736+
self.covariance_factor = self.scotts_factor
3737+
elif bw_method == 'silverman':
3738+
self.covariance_factor = self.silverman_factor
3739+
elif np.isscalar(bw_method) and not isinstance(bw_method, six.string_types):
3740+
self._bw_method = 'use constant'
3741+
self.covariance_factor = lambda: bw_method
3742+
elif callable(bw_method):
3743+
self._bw_method = bw_method
3744+
self.covariance_factor = lambda: self._bw_method(self)
37733745
else:
3774-
msg = "points have dimension %s,\
3775-
dataset has dimension %s" % (dim_pts, dim)
3746+
msg = "`bw_method` should be 'scott', 'silverman', a scalar " \
3747+
"or a callable."
37763748
raise ValueError(msg)
37773749

3778-
result = np.zeros((num_dp_pts,), dtype=np.float)
3750+
self._compute_covariance()
37793751

3780-
if num_dp_pts >= num_dp:
3781-
# there are more points than data, so loop over data
3782-
for i in range(num_dp):
3783-
diff = dataset[:, i, np.newaxis] - points
3784-
tdiff = np.dot(inv_cov, diff)
3785-
energy = np.sum(diff*tdiff, axis=0) / 2.0
3786-
result = result + np.exp(-energy)
3787-
else:
3788-
# loop over points
3789-
for i in range(num_dp_pts):
3790-
diff = dataset - points[:, i, np.newaxis]
3791-
tdiff = np.dot(inv_cov, diff)
3792-
energy = np.sum(diff * tdiff, axis=0) / 2.0
3793-
result[i] = np.sum(np.exp(-energy), axis=0)
3794-
3795-
result = result / norm_factor
3796-
3797-
return {
3798-
'xmin': xmin,
3799-
'xmax': xmax,
3800-
'mean': np.mean(dataset),
3801-
'median': np.median(dataset),
3802-
'result': result
3803-
}
3752+
def _compute_covariance(self):
3753+
"""Computes the covariance matrix for each Gaussian kernel using
3754+
covariance_factor().
3755+
"""
3756+
self.factor = self.covariance_factor()
3757+
# Cache covariance and inverse covariance of the data
3758+
if not hasattr(self, '_data_inv_cov'):
3759+
self._data_covariance = np.atleast_2d(np.cov(self.dataset, rowvar=1,
3760+
bias=False))
3761+
self._data_inv_cov = np.linalg.inv(self._data_covariance)
3762+
3763+
self.covariance = self._data_covariance * self.factor**2
3764+
self.inv_cov = self._data_inv_cov / self.factor**2
3765+
self._norm_factor = np.sqrt(np.linalg.det(2*np.pi*self.covariance)) * self.num_dp
3766+
3767+
def evaluate(self, points):
3768+
"""Evaluate the estimated pdf on a set of points.
3769+
3770+
Parameters
3771+
----------
3772+
points : (# of dimensions, # of points)-array
3773+
Alternatively, a (# of dimensions,) vector can be passed in and
3774+
treated as a single point.
3775+
3776+
Returns
3777+
-------
3778+
values : (# of points,)-array
3779+
The values at each point.
3780+
3781+
Raises
3782+
------
3783+
ValueError : if the dimensionality of the input points is different than
3784+
the dimensionality of the KDE.
3785+
3786+
"""
3787+
points = np.atleast_2d(points)
3788+
3789+
d, m = points.shape
3790+
if d != self.dim:
3791+
if d == 1 and m == self.dim:
3792+
# points was passed in as a row vector
3793+
points = np.reshape(points, (self.dim, 1))
3794+
m = 1
3795+
else:
3796+
msg = "points have dimension %s, dataset has dimension %s" % (d,
3797+
self.dim)
3798+
raise ValueError(msg)
3799+
3800+
result = np.zeros((m,), dtype=np.float)
3801+
3802+
if m >= self.num_dp:
3803+
# there are more points than data, so loop over data
3804+
for i in range(self.num_dp):
3805+
diff = self.dataset[:, i, np.newaxis] - points
3806+
tdiff = np.dot(self.inv_cov, diff)
3807+
energy = np.sum(diff*tdiff,axis=0) / 2.0
3808+
result = result + np.exp(-energy)
3809+
else:
3810+
# loop over points
3811+
for i in range(m):
3812+
diff = self.dataset - points[:, i, np.newaxis]
3813+
tdiff = np.dot(self.inv_cov, diff)
3814+
energy = np.sum(diff * tdiff, axis=0) / 2.0
3815+
result[i] = np.sum(np.exp(-energy), axis=0)
3816+
3817+
result = result / self._norm_factor
3818+
3819+
return result
3820+
3821+
__call__ = evaluate
38043822

38053823
##################################################
38063824
# Code related to things in and around polygons

0 commit comments

Comments
0 (0)
Morty Proxy This is a proxified and sanitized view of the page, visit original site.