Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

fix: translate labels to col ids when copying dataframes #1372

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 7 commits into from
Feb 6, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 20 additions & 0 deletions 20 bigframes/core/blocks.py
Original file line number Diff line number Diff line change
Expand Up @@ -276,6 +276,26 @@ def label_to_col_id(self) -> typing.Mapping[Label, typing.Sequence[str]]:
mapping[label] = (*mapping.get(label, ()), id)
return mapping

def resolve_label_exact(self, label: Label) -> Optional[str]:
"""Returns the column id matching the label if there is exactly
one such column. If there are multiple columns with the same name,
raises an error. If there is no such a column, returns None."""
matches = self.label_to_col_id.get(label, [])
if len(matches) > 1:
raise ValueError(
f"Multiple columns matching id {label} were found. {constants.FEEDBACK_LINK}"
)
return matches[0] if len(matches) != 0 else None

def resolve_label_exact_or_error(self, label: Label) -> str:
"""Returns the column id matching the label if there is exactly
one such column. If there are multiple columns with the same name,
raises an error. If there is no such a column, raises an error too."""
col_id = self.resolve_label_exact(label)
if col_id is None:
raise ValueError(f"Label {label} not found. {constants.FEEDBACK_LINK}")
return col_id

@functools.cached_property
def col_id_to_index_name(self) -> typing.Mapping[str, Label]:
"""Get column label for value columns, or index name for index columns"""
Expand Down
15 changes: 5 additions & 10 deletions 15 bigframes/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -180,7 +180,10 @@ def __init__(
)
block = block.set_index([r_mapping[idx_col] for idx_col in idx_cols])
if columns:
block = block.select_columns(list(columns)) # type:ignore
column_ids = [
block.resolve_label_exact_or_error(label) for label in list(columns)
]
block = block.select_columns(column_ids) # type:ignore
if dtype:
bf_dtype = bigframes.dtypes.bigframes_type(dtype)
block = block.multi_apply_unary_op(ops.AsTypeOp(to_type=bf_dtype))
Expand Down Expand Up @@ -238,15 +241,7 @@ def _find_indices(
return [self._block.value_columns.index(col_id) for col_id in col_ids]

def _resolve_label_exact(self, label) -> Optional[str]:
"""Returns the column id matching the label if there is exactly
one such column. If there are multiple columns with the same name,
raises an error. If there is no such column, returns None."""
matches = self._block.label_to_col_id.get(label, [])
if len(matches) > 1:
raise ValueError(
f"Multiple columns matching id {label} were found. {constants.FEEDBACK_LINK}"
)
return matches[0] if len(matches) != 0 else None
return self._block.resolve_label_exact(label)

def _sql_names(
self,
Expand Down
11 changes: 9 additions & 2 deletions 11 tests/system/small/test_dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,8 +44,15 @@
def test_df_construct_copy(scalars_dfs):
columns = ["int64_col", "string_col", "float64_col"]
scalars_df, scalars_pandas_df = scalars_dfs
bf_result = dataframe.DataFrame(scalars_df, columns=columns).to_pandas()
pd_result = pd.DataFrame(scalars_pandas_df, columns=columns)
# Make the mapping from label to col_id non-trivial
bf_df = scalars_df.copy()
bf_df["int64_col"] = bf_df["int64_col"] / 2
pd_df = scalars_pandas_df.copy()
pd_df["int64_col"] = pd_df["int64_col"] / 2

bf_result = dataframe.DataFrame(bf_df, columns=columns).to_pandas()

pd_result = pd.DataFrame(pd_df, columns=columns)
pandas.testing.assert_frame_equal(bf_result, pd_result)


Expand Down
Morty Proxy This is a proxified and sanitized view of the page, visit original site.