Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

feat: add dataframe.insert #770

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We鈥檒l occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Jun 11, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 28 additions & 0 deletions 28 bigframes/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -1305,6 +1305,34 @@ def nsmallest(
column_ids = self._sql_names(columns)
return DataFrame(block_ops.nsmallest(self._block, n, column_ids, keep=keep))

def insert(
self,
loc: int,
column: blocks.Label,
value: SingleItemValue,
allow_duplicates: bool = False,
):
column_count = len(self.columns)
if loc > column_count:
raise IndexError(
f"Column index {loc} is out of bounds with {column_count} total columns."
)
if (column in self.columns) and not allow_duplicates:
raise ValueError(f"cannot insert {column}, already exists")

temp_column = bigframes.core.guid.generate_guid(prefix=str(column))
df = self._assign_single_item(temp_column, value)

block = df._get_block()
value_columns = typing.cast(List, block.value_columns)
value_columns, new_column = value_columns[:-1], value_columns[-1]
value_columns.insert(loc, new_column)

block = block.select_columns(value_columns)
block = block.rename(columns={temp_column: column})

self._set_block(block)

def drop(
self,
labels: typing.Any = None,
Expand Down
38 changes: 38 additions & 0 deletions 38 tests/system/small/test_dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -270,6 +270,44 @@ def test_get_columns_default(scalars_dfs):
assert result == "default_val"


@pytest.mark.parametrize(
("loc", "column", "value", "allow_duplicates"),
[
(0, 666, 2, False),
(5, "float64_col", 2.2, True),
(13, "rowindex_2", [8, 7, 6, 5, 4, 3, 2, 1, 0], True),
pytest.param(
14,
"test",
2,
False,
marks=pytest.mark.xfail(
raises=IndexError,
),
),
pytest.param(
12,
"int64_col",
2,
False,
marks=pytest.mark.xfail(
raises=ValueError,
),
),
],
)
def test_insert(scalars_dfs, loc, column, value, allow_duplicates):
scalars_df, scalars_pandas_df = scalars_dfs
# insert works inplace, so will influence other tests.
# make a copy to avoid inplace changes.
bf_df = scalars_df.copy()
pd_df = scalars_pandas_df.copy()
bf_df.insert(loc, column, value, allow_duplicates)
pd_df.insert(loc, column, value, allow_duplicates)

pd.testing.assert_frame_equal(bf_df.to_pandas(), pd_df, check_dtype=False)


def test_drop_column(scalars_dfs):
scalars_df, scalars_pandas_df = scalars_dfs
col_name = "int64_col"
Expand Down
45 changes: 45 additions & 0 deletions 45 third_party/bigframes_vendored/pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -1067,6 +1067,51 @@ def reindex_like(self, other):
"""
raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)

def insert(self, loc, column, value, allow_duplicates=False):
"""Insert column into DataFrame at specified location.

Raises a ValueError if `column` is already contained in the DataFrame,
unless `allow_duplicates` is set to True.

**Examples:**

>>> import bigframes.pandas as bpd
>>> bpd.options.display.progress_bar = None

>>> df = bpd.DataFrame({'col1': [1, 2], 'col2': [3, 4]})

Insert a new column named 'col3' between 'col1' and 'col2' with all entries set to 5.

>>> df.insert(1, 'col3', 5)
>>> df
col1 col3 col2
0 1 5 3
1 2 5 4
<BLANKLINE>
[2 rows x 3 columns]

Insert another column named 'col2' at the beginning of the DataFrame with values [5, 6]

>>> df.insert(0, 'col2', [5, 6], allow_duplicates=True)
>>> df
col2 col1 col3 col2
0 5 1 5 3
1 6 2 5 4
<BLANKLINE>
[2 rows x 4 columns]

Args:
loc (int):
Insertion index. Must verify 0 <= loc <= len(columns).
column (str, number, or hashable object):
Label of the inserted column.
value (Scalar, Series, or array-like):
Content of the inserted column.
allow_duplicates (bool, default False):
Allow duplicate column labels to be created.
"""
raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)

def drop(
self, labels=None, *, axis=0, index=None, columns=None, level=None
) -> DataFrame | None:
Expand Down
Morty Proxy This is a proxified and sanitized view of the page, visit original site.