Open
Description
Bug report
Bug summary
Handing of missing data (represented by np.nan
) is inconsistent and surprising.
Code for reproduction
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
print(mpl.__version__)
x, y = ["a", np.nan, "c"], [1, 2, 3]
plt.figure()
plt.plot(x, y) # runs
plt.figure()
plt.bar(x, y) # raises
plt.figure()
plt.bar(np.array(x), y) # runs
Actual outcome
The first issue is that plt.bar
(and plt.scatter
, probably others, but notably not plt.plot
) raise when they get a list containing np.nan
(but not an array containing np.nan
?):
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-119-77b75fdcb3b5> in <module>
9
10 plt.figure()
---> 11 plt.bar(x, y)
12
13 plt.figure()
~/miniconda3/envs/seaborn-py38-latest/lib/python3.8/site-packages/matplotlib/pyplot.py in bar(x, height, width, bottom, align, data, **kwargs)
2485 x, height, width=0.8, bottom=None, *, align='center',
2486 data=None, **kwargs):
-> 2487 return gca().bar(
2488 x, height, width=width, bottom=bottom, align=align,
2489 **({"data": data} if data is not None else {}), **kwargs)
~/miniconda3/envs/seaborn-py38-latest/lib/python3.8/site-packages/matplotlib/__init__.py in inner(ax, data, *args, **kwargs)
1436 def inner(ax, *args, data=None, **kwargs):
1437 if data is None:
-> 1438 return func(ax, *map(sanitize_sequence, args), **kwargs)
1439
1440 bound = new_sig.bind(ax, *args, **kwargs)
~/miniconda3/envs/seaborn-py38-latest/lib/python3.8/site-packages/matplotlib/axes/_axes.py in bar(self, x, height, width, bottom, align, **kwargs)
2405
2406 if orientation == 'vertical':
-> 2407 self._process_unit_info(xdata=x, ydata=height, kwargs=kwargs)
2408 if log:
2409 self.set_yscale('log', nonpositive='clip')
~/miniconda3/envs/seaborn-py38-latest/lib/python3.8/site-packages/matplotlib/axes/_base.py in _process_unit_info(self, xdata, ydata, kwargs)
2187 return kwargs
2188
-> 2189 kwargs = _process_single_axis(xdata, self.xaxis, 'xunits', kwargs)
2190 kwargs = _process_single_axis(ydata, self.yaxis, 'yunits', kwargs)
2191 return kwargs
~/miniconda3/envs/seaborn-py38-latest/lib/python3.8/site-packages/matplotlib/axes/_base.py in _process_single_axis(data, axis, unit_name, kwargs)
2170 # We only need to update if there is nothing set yet.
2171 if not axis.have_units():
-> 2172 axis.update_units(data)
2173
2174 # Check for units in the kwargs, and if present update axis
~/miniconda3/envs/seaborn-py38-latest/lib/python3.8/site-packages/matplotlib/axis.py in update_units(self, data)
1461 neednew = self.converter != converter
1462 self.converter = converter
-> 1463 default = self.converter.default_units(data, self)
1464 if default is not None and self.units is None:
1465 self.set_units(default)
~/miniconda3/envs/seaborn-py38-latest/lib/python3.8/site-packages/matplotlib/category.py in default_units(data, axis)
105 # the conversion call stack is default_units -> axis_info -> convert
106 if axis.units is None:
--> 107 axis.set_units(UnitData(data))
108 else:
109 axis.units.update(data)
~/miniconda3/envs/seaborn-py38-latest/lib/python3.8/site-packages/matplotlib/category.py in __init__(self, data)
174 self._counter = itertools.count()
175 if data is not None:
--> 176 self.update(data)
177
178 @staticmethod
~/miniconda3/envs/seaborn-py38-latest/lib/python3.8/site-packages/matplotlib/category.py in update(self, data)
209 for val in OrderedDict.fromkeys(data):
210 # OrderedDict just iterates over unique values in data.
--> 211 cbook._check_isinstance((str, bytes), value=val)
212 if convertible:
213 # this will only be called so long as convertible is True.
~/miniconda3/envs/seaborn-py38-latest/lib/python3.8/site-packages/matplotlib/cbook/__init__.py in _check_isinstance(_types, **kwargs)
2244 for k, v in kwargs.items():
2245 if not isinstance(v, types):
-> 2246 raise TypeError(
2247 "{!r} must be an instance of {}, not a {}".format(
2248 k,
TypeError: 'value' must be an instance of str or bytes, not a float
~/miniconda3/envs/seaborn-py38-latest/lib/python3.8/site-packages/matplotlib/cbook/__init__.py in _check_isinstance(_types, **kwargs)
2244 for k, v in kwargs.items():
2245 if not isinstance(v, types):
-> 2246 raise TypeError(
2247 "{!r} must be an instance of {}, not a {}".format(
2248 k,
TypeError: 'value' must be an instance of str or bytes, not a float
The second issue is that when the code does run, it includes "nan"
as a category in the plot, whereas (consistent with the rest of matplotlib) I would have expected the nans to propagate through and not appear in the plot:
Expected outcome
- Consistent behavior across input objects (seems straightforward)
- Treating nans in categorical datasets as missing, rather than as a different category (possibly controversial?)
Matplotlib version
- Operating system: macos
- Matplotlib version: 3.3.2
- Matplotlib backend (
print(matplotlib.get_backend())
): pylab inline - Python version: 3.8