Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

BUG: Fix #61221: Exception with unstack(sort=False) and NA in index. #61226

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 24 commits into
base: main
Choose a base branch
Loading
from
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
7503537
BUG: Fix #61221: Exception with unstack(sort=False) and NA in index.
Apr 3, 2025
c0a7c80
BUG: Fix #61221: Exception with unstack(sort=False) and NA in index.
Apr 3, 2025
7a8fddb
fixed formatting
Apr 3, 2025
a397466
fixed issue with unsorted unstack, should now work
Apr 3, 2025
eb2fb7a
Merge branch 'main' into fix-issue-61221
gsmll Apr 4, 2025
3539ac6
Instead of creating variable self.na, constructed na index locally
Apr 4, 2025
64f5173
fixed issues with local variable
Apr 4, 2025
e2b38b1
fixed the fix -oops
Apr 4, 2025
31d7b33
fixed up tests
Apr 9, 2025
05c8a02
Merge branch 'main' into fix-issue-61221
gsmll Apr 9, 2025
cc1deb6
Merge branch 'main' into fix-issue-61221
gsmll Apr 11, 2025
84d6bd3
Add Pandas Cookbook to Book Recommendations (#61271)
WillAyd Apr 11, 2025
f7e910e
shortened factorize
Apr 11, 2025
a147c6d
optimized shortened factorized
Apr 11, 2025
1abbc73
Merge branch 'main' into fix-issue-61221
gsmll Apr 11, 2025
c35e8cc
fixed typing issue
Apr 11, 2025
8afbad2
Merge branch 'main' into fix-issue-61221
gsmll Apr 13, 2025
8c19221
Merge branch 'main' into fix-issue-61221
gsmll Apr 14, 2025
555bad9
Merge branch 'main' into fix-issue-61221
gsmll Apr 16, 2025
cfb5e92
Merge branch 'main' into fix-issue-61221
gsmll Apr 22, 2025
a677315
Merge branch 'main' into fix-issue-61221
gsmll May 12, 2025
1f9cd53
Merge branch 'main' into fix-issue-61221
gsmll May 15, 2025
f72d1df
Merge branch 'main' into fix-issue-61221
gsmll May 15, 2025
c514d68
Merge branch 'main' into fix-issue-61221
gsmll May 16, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
fixed formatting
  • Loading branch information
Gabe Small authored and Gabe Small committed Apr 3, 2025
commit 7a8fddb6656c73ff785303efd65c185c715829f5
16 changes: 8 additions & 8 deletions 16 pandas/core/reshape/reshape.py
Original file line number Diff line number Diff line change
Expand Up @@ -135,10 +135,10 @@ def __init__(
if not self.sort:
unique_codes = unique(self.index.codes[self.level])
# Bug Fix GH 61221
# The -1 in the unsorted unique codes causes for doubling and an eventual ValueError
# The -1 in the unsorted unique codes causes for errors
# saving the NA location to be used in the repeater
self.na = np.where(unique_codes == -1)[0][0] if -1 in unique_codes else None
gsmll marked this conversation as resolved.
Show resolved Hide resolved
unique_codes = unique_codes[unique_codes != -1]
unique_codes = unique_codes[unique_codes != -1]
self.removed_level = self.removed_level.take(unique_codes)
self.removed_level_full = self.removed_level_full.take(unique_codes)

Expand Down Expand Up @@ -395,13 +395,13 @@ def _repeater(self) -> np.ndarray:
stride = len(self.removed_level) + self.lift
if self.sort or not self.na:
repeater = np.arange(stride) - self.lift
else :
#move the -1 to the position at self.na
else:
# move the -1 to the position at self.na
repeater = np.arange(stride)
if(self.na):
if self.na:
repeater[self.na] = -1
if(self.na + 1) < len(repeater):
repeater[self.na + 1:] -= 1
if (self.na + 1) < len(repeater):
repeater[self.na + 1 :] -= 1

return repeater

Expand Down Expand Up @@ -1065,7 +1065,7 @@ def stack_reshape(
else:
data.columns = default_index(len(data.columns))
buf.append(data)

if len(buf) > 0 and not frame.empty:
result = concat(buf, ignore_index=True)
else:
Expand Down
158 changes: 92 additions & 66 deletions 158 pandas/tests/frame/test_stack_unstack.py
Original file line number Diff line number Diff line change
Expand Up @@ -1610,79 +1610,105 @@ def assert_na_safe_equal(left, right):
left = left.rename(columns={pd.NA: np.nan}, level=1)
right = right.rename(columns={pd.NA: np.nan}, level=1)
tm.assert_frame_equal(left, right, check_dtype=False)



def test_unstack_sort_false_na():
gsmll marked this conversation as resolved.
Show resolved Hide resolved
# GH 61221
levels1 = ['b','a']
levels2 = pd.Index([1, 2, 3, pd.NA], dtype=pd.Int64Dtype())
index = pd.MultiIndex.from_product([levels1, levels2], names=['level1', 'level2'])
df = pd.DataFrame(dict(value=range(len(index))), index=index)
result = df.unstack(level='level2', sort=False)
expected = pd.DataFrame(
{
('value', 1): [0, 4],
('value', 2): [1, 5],
('value', 3): [2, 6],
('value', pd.Int64Dtype().na_value): [3, 7]
},
index=pd.Index(['b', 'a'], name='level1'),
columns=pd.MultiIndex.from_tuples([
('value', 1), ('value', 2), ('value', 3), ('value', pd.Int64Dtype().na_value)
], names=[None, 'level2'])
)
levels1 = ["b", "a"]
levels2 = Index([1, 2, 3, pd.NA], dtype=pd.Int64Dtype())
index = MultiIndex.from_product([levels1, levels2], names=["level1", "level2"])
df = DataFrame({"value": range(len(index))}, index=index)
result = df.unstack(level="level2", sort=False)
expected = DataFrame(
{
("value", 1): [0, 4],
("value", 2): [1, 5],
("value", 3): [2, 6],
("value", pd.Int64Dtype().na_value): [3, 7],
},
index=Index(["b", "a"], name="level1"),
columns=MultiIndex.from_tuples(
[
("value", 1),
("value", 2),
("value", 3),
("value", pd.Int64Dtype().na_value),
],
names=[None, "level2"],
),
)
assert_na_safe_equal(result, expected)
levels2 = pd.Index([pd.NA, 1, 2, 3], dtype=pd.Int64Dtype())
index = pd.MultiIndex.from_product([levels1, levels2], names=['level1', 'level2'])
df = pd.DataFrame(dict(value=range(len(index))), index=index)
result = df.unstack(level='level2', sort=False)
expected = pd.DataFrame(
{
('value', pd.Int64Dtype().na_value): [0, 4],
('value', 1): [1, 5],
('value', 2): [2, 6],
('value', 3): [3, 7] # Use actual pd.NA object
},
index=pd.Index(['b', 'a'], name='level1'),
columns=pd.MultiIndex.from_tuples([
('value', pd.Int64Dtype().na_value), ('value', 1), ('value', 2), ('value', 3)
], names=[None, 'level2'])
)
levels2 = Index([pd.NA, 1, 2, 3], dtype=pd.Int64Dtype())
index = MultiIndex.from_product([levels1, levels2], names=["level1", "level2"])
df = DataFrame({"value": range(len(index))}, index=index)
result = df.unstack(level="level2", sort=False)
expected = DataFrame(
{
("value", pd.Int64Dtype().na_value): [0, 4],
("value", 1): [1, 5],
("value", 2): [2, 6],
("value", 3): [3, 7],
},
index=Index(["b", "a"], name="level1"),
columns=MultiIndex.from_tuples(
[
("value", pd.Int64Dtype().na_value),
("value", 1),
("value", 2),
("value", 3),
],
names=[None, "level2"],
),
)
assert_na_safe_equal(result, expected)
levels2 = pd.Index([ 1, pd.NA, 2, 3], dtype=pd.Int64Dtype())
index = pd.MultiIndex.from_product([levels1, levels2], names=['level1', 'level2'])
df = pd.DataFrame(dict(value=range(len(index))), index=index)
result = df.unstack(level='level2', sort=False)
expected = pd.DataFrame(
{
('value', 1): [0, 4],
('value', pd.Int64Dtype().na_value): [1, 5],
('value', 2): [2, 6],
('value', 3): [3, 7] # Use actual pd.NA object
},
index=pd.Index(['b', 'a'], name='level1'),
columns=pd.MultiIndex.from_tuples([
('value', 1), ('value', pd.Int64Dtype().na_value), ('value', 2), ('value', 3)
], names=[None, 'level2'])
)
levels2 = Index([1, pd.NA, 2, 3], dtype=pd.Int64Dtype())
index = MultiIndex.from_product([levels1, levels2], names=["level1", "level2"])
df = DataFrame({"value": range(len(index))}, index=index)
result = df.unstack(level="level2", sort=False)
expected = DataFrame(
{
("value", 1): [0, 4],
("value", pd.Int64Dtype().na_value): [1, 5],
("value", 2): [2, 6],
("value", 3): [3, 7],
},
index=Index(["b", "a"], name="level1"),
columns=MultiIndex.from_tuples(
[
("value", 1),
("value", pd.Int64Dtype().na_value),
("value", 2),
("value", 3),
],
names=[None, "level2"],
),
)
assert_na_safe_equal(result, expected)
levels2 = pd.Index([3, pd.NA, 1, 2], dtype=pd.Int64Dtype())
index = pd.MultiIndex.from_product([levels1, levels2], names=['level1', 'level2'])
df = pd.DataFrame(dict(value=range(len(index))), index=index)
result = df.unstack(level='level2', sort=False)
expected = pd.DataFrame(
{
('value', 3): [0, 4],
('value', pd.Int64Dtype().na_value): [1, 5],
('value', 1): [2, 6],
('value', 2): [3, 7] # Use actual pd.NA object
},
index=pd.Index(['b', 'a'], name='level1'),
columns=pd.MultiIndex.from_tuples([
('value', 3), ('value', pd.Int64Dtype().na_value), ('value', 1), ('value', 2)
], names=[None, 'level2'])
)
levels2 = Index([3, pd.NA, 1, 2], dtype=pd.Int64Dtype())
index = MultiIndex.from_product([levels1, levels2], names=["level1", "level2"])
df = DataFrame({"value": range(len(index))}, index=index)
result = df.unstack(level="level2", sort=False)
expected = DataFrame(
{
("value", 3): [0, 4],
("value", pd.Int64Dtype().na_value): [1, 5],
("value", 1): [2, 6],
("value", 2): [3, 7], # Use actual pd.NA object
},
index=Index(["b", "a"], name="level1"),
columns=MultiIndex.from_tuples(
[
("value", 3),
("value", pd.Int64Dtype().na_value),
("value", 1),
("value", 2),
],
names=[None, "level2"],
),
)
assert_na_safe_equal(result, expected)


@pytest.mark.filterwarnings("ignore:The previous implementation of stack is deprecated")
def test_stack_sort_false_multi_level(future_stack):
# GH 15105
Expand Down
Loading
Morty Proxy This is a proxified and sanitized view of the page, visit original site.