Closed
Description
As described in #9023 (comment), the way DataFrame.stack()
and DataFrame.unstack()
treat NaN
indices is rather odd/inconsistent. Despite passing test_unstack_nan_index()
in test_frame.py
, I observe the following (this is from 0.15.2, but I think it's unchanged in the current master for 0.16.0):
In [140]: df = pd.DataFrame(np.arange(4).reshape(2, 2),
columns=pd.MultiIndex.from_tuples([('A','a'), ('B', 'b')],
names=['Upper', 'Lower']),
index=Index([0, 1], name='Num'), dtype=np.float64)
In [141]: df_nan = pd.DataFrame(np.arange(4).reshape(2, 2),
columns=pd.MultiIndex.from_tuples([('A',np.nan), ('B', 'b')],
names=['Upper', 'Lower']),
index=Index([0, 1], name='Num'), dtype=np.float64)
In [148]: df
Out[148]:
Upper A B
Lower a b
Num
0 0 1
1 2 3
In [149]: df.stack()
Out[149]:
Upper A B
Num Lower
0 a 0 NaN
b NaN 1
1 a 2 NaN
b NaN 3
In [150]: df.T.unstack().T
Out[150]:
Upper A B
Num Lower
0 a 0 NaN
b NaN 1
1 a 2 NaN
b NaN 3
In [151]: df_nan
Out[151]:
Upper A B
Lower NaN b
Num
0 0 1
1 2 3
In [152]: df_nan.stack()
Out[152]:
Upper A B
Num Lower
0 NaN 0 1
b 0 1
1 NaN 2 3
b 2 3
In [153]: df_nan.T.unstack().T
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-153-edbcaeb64f64> in <module>()
----> 1 df_nan.T.unstack().T
C:\Python34\lib\site-packages\pandas\core\frame.py in unstack(self, level)
3486 """
3487 from pandas.core.reshape import unstack
-> 3488 return unstack(self, level)
3489
3490 #----------------------------------------------------------------------
C:\Python34\lib\site-packages\pandas\core\reshape.py in unstack(obj, level)
439 if isinstance(obj, DataFrame):
440 if isinstance(obj.index, MultiIndex):
--> 441 return _unstack_frame(obj, level)
442 else:
443 return obj.T.stack(dropna=False)
C:\Python34\lib\site-packages\pandas\core\reshape.py in _unstack_frame(obj, level)
479 else:
480 unstacker = _Unstacker(obj.values, obj.index, level=level,
--> 481 value_columns=obj.columns)
482 return unstacker.get_result()
483
C:\Python34\lib\site-packages\pandas\core\reshape.py in __init__(self, values, index, level, value_columns)
101
102 self._make_sorted_values_labels()
--> 103 self._make_selectors()
104
105 def _make_sorted_values_labels(self):
C:\Python34\lib\site-packages\pandas\core\reshape.py in _make_selectors(self)
143
144 if mask.sum() < len(self.index):
--> 145 raise ValueError('Index contains duplicate entries, '
146 'cannot reshape')
147
ValueError: Index contains duplicate entries, cannot reshape