From 02de9e77d18e0eead10627e59a1424e38715f911 Mon Sep 17 00:00:00 2001 From: Trevor Bergeron Date: Tue, 16 Apr 2024 22:44:24 +0000 Subject: [PATCH 1/5] feat: Series binary ops compatible with more types --- bigframes/core/convert.py | 25 +++++++++--- bigframes/dataframe.py | 4 +- bigframes/operations/base.py | 68 ++++++++++++++++++------------- bigframes/series.py | 3 -- tests/system/small/test_series.py | 20 +++++++++ 5 files changed, 82 insertions(+), 38 deletions(-) diff --git a/bigframes/core/convert.py b/bigframes/core/convert.py index 1ef329b0c7..c5176ffd62 100644 --- a/bigframes/core/convert.py +++ b/bigframes/core/convert.py @@ -21,7 +21,22 @@ import bigframes.series as series -def to_bf_series(obj, default_index: Optional[index.Index]) -> series.Series: +def is_series_convertible(obj) -> bool: + if isinstance(obj, series.Series): + return True + if isinstance(obj, pd.Series): + return True + if isinstance(obj, index.Index): + return True + if isinstance(obj, pd.Index): + return True + if pd.api.types.is_list_like(obj): + return True + else: + return False + + +def to_bf_series(obj, default_index: Optional[index.Index], session) -> series.Series: """ Convert a an object to a bigframes series @@ -37,13 +52,13 @@ def to_bf_series(obj, default_index: Optional[index.Index]) -> series.Series: if isinstance(obj, series.Series): return obj if isinstance(obj, pd.Series): - return series.Series(obj) + return series.Series(obj, session=session) if isinstance(obj, index.Index): - return series.Series(obj, default_index) + return series.Series(obj, default_index, session=session) if isinstance(obj, pd.Index): - return series.Series(obj, default_index) + return series.Series(obj, default_index, session=session) if pd.api.types.is_list_like(obj): - return series.Series(obj, default_index) + return series.Series(obj, default_index, session=session) else: raise TypeError(f"Cannot interpret {obj} as series.") diff --git a/bigframes/dataframe.py b/bigframes/dataframe.py index 2deef95277..5a40e9bf56 100644 --- a/bigframes/dataframe.py +++ b/bigframes/dataframe.py @@ -668,7 +668,9 @@ def _apply_binop( DataFrame(other), op, how=how, reverse=reverse ) elif utils.get_axis_number(axis) == 0: - bf_series = bigframes.core.convert.to_bf_series(other, self.index) + bf_series = bigframes.core.convert.to_bf_series( + other, self.index, self._session + ) return self._apply_series_binop_axis_0(bf_series, op, how, reverse) elif utils.get_axis_number(axis) == 1: pd_series = bigframes.core.convert.to_pd_series(other, self.columns) diff --git a/bigframes/operations/base.py b/bigframes/operations/base.py index 9bfa0500b5..8fa01d7b4c 100644 --- a/bigframes/operations/base.py +++ b/bigframes/operations/base.py @@ -21,6 +21,7 @@ import bigframes.constants as constants import bigframes.core.blocks as blocks +import bigframes.core.convert import bigframes.core.expression as ex import bigframes.core.indexes as indexes import bigframes.core.scalar as scalars @@ -44,7 +45,15 @@ def __init__( *, session: typing.Optional[bigframes.session.Session] = None, ): - block = None + import bigframes.pandas + + read_pandas_func = ( + session.read_pandas + if (session is not None) + else (lambda x: bigframes.pandas.read_pandas(x)) + ) + + block: typing.Optional[blocks.Block] = None if copy is not None and not copy: raise ValueError( f"Series constructor only supports copy=True. {constants.FEEDBACK_LINK}" @@ -55,29 +64,36 @@ def __init__( assert index is None block = data - elif isinstance(data, SeriesMethods): - block = data._block + elif isinstance(data, SeriesMethods) or isinstance(data, pd.Series): + if isinstance(data, pd.Series): + data = read_pandas_func(data) + data_block = data._block if index is not None: # reindex - bf_index = indexes.Index(index) + bf_index = indexes.Index(index, session=session) idx_block = bf_index._block idx_cols = idx_block.value_columns - block_idx, _ = idx_block.join(block, how="left") - block = block_idx.with_index_labels(bf_index.names) + block_idx, _ = idx_block.join(data_block, how="left") + data_block = block_idx.with_index_labels(bf_index.names) + block = data_block - elif isinstance(data, indexes.Index): + elif isinstance(data, indexes.Index) or pd.api.types.is_list_like(data): + data = indexes.Index(data, session=session) if data.nlevels != 1: raise NotImplementedError("Cannot interpret multi-index as Series.") # Reset index to promote index columns to value columns, set default index - block = data._block.reset_index(drop=False) + data_block = data._block.reset_index(drop=False) if index is not None: # Align by offset - bf_index = indexes.Index(index) - idx_block = bf_index._block.reset_index(drop=False) + bf_index = indexes.Index(index, session=session) + idx_block = bf_index._block.reset_index( + drop=False + ) # reset to align by offsets, and then reset back idx_cols = idx_block.value_columns - block, (l_mapping, _) = idx_block.join(block, how="left") - block = block.set_index([l_mapping[col] for col in idx_cols]) - block = block.with_index_labels(bf_index.names) + data_block, (l_mapping, _) = idx_block.join(data_block, how="left") + data_block = data_block.set_index([l_mapping[col] for col in idx_cols]) + data_block = data_block.with_index_labels(bf_index.names) + block = data_block if block: if name: @@ -91,8 +107,6 @@ def __init__( block.value_columns, ops.AsTypeOp(to_type=dtype) ) else: - import bigframes.pandas - pd_series = pd.Series( data=data, index=index, dtype=dtype, name=name # type:ignore ) @@ -100,13 +114,10 @@ def __init__( if pd_series.name is None: # to_frame will set default numeric column label if unnamed, but we do not support int column label, so must rename pd_dataframe = pd_dataframe.set_axis(["unnamed_col"], axis=1) - if session: - block = session.read_pandas(pd_dataframe)._get_block() - else: - # Uses default global session - block = bigframes.pandas.read_pandas(pd_dataframe)._get_block() + block = read_pandas_func(pd_dataframe)._get_block() # type: ignore if pd_series.name is None: - block = block.with_column_labels([None]) + block = block.with_column_labels([None]) # type: ignore + assert block is not None self._block: blocks.Block = block @property @@ -145,17 +156,16 @@ def _apply_binary_op( reverse: bool = False, ) -> series.Series: """Applies a binary operator to the series and other.""" - if isinstance(other, pd.Series): - # TODO: Convert to BigQuery DataFrames series - raise NotImplementedError( - f"Pandas series not supported as operand. {constants.FEEDBACK_LINK}" + if bigframes.core.convert.is_series_convertible(other): + self_index = indexes.Index(self._block) + other_series = bigframes.core.convert.to_bf_series( + other, self_index, self._block.session ) - if isinstance(other, series.Series): - (self_col, other_col, block) = self._align(other, how=alignment) + (self_col, other_col, block) = self._align(other_series, how=alignment) name = self._name if ( - isinstance(other, series.Series) + hasattr(other, "name") and other.name != self._name and alignment == "outer" ): @@ -166,7 +176,7 @@ def _apply_binary_op( block, result_id = block.project_expr(expr, name) return series.Series(block.select_column(result_id)) - else: + else: # Scalar binop name = self._name expr = op.as_expr( ex.const(other) if reverse else self._value_column, diff --git a/bigframes/series.py b/bigframes/series.py index 2f9123f9a3..ea6f692d2d 100644 --- a/bigframes/series.py +++ b/bigframes/series.py @@ -812,9 +812,6 @@ def combine_first(self, other: Series) -> Series: return result def update(self, other: Union[Series, Sequence, Mapping]) -> None: - import bigframes.core.convert - - other = bigframes.core.convert.to_bf_series(other, default_index=None) result = self._apply_binary_op( other, ops.coalesce_op, reverse=True, alignment="left" ) diff --git a/tests/system/small/test_series.py b/tests/system/small/test_series.py index d27cd0a236..76b17626ef 100644 --- a/tests/system/small/test_series.py +++ b/tests/system/small/test_series.py @@ -1269,6 +1269,26 @@ def test_binop_right_filtered(scalars_dfs): ) +@pytest.mark.parametrize( + ("other",), + [ + ([-1.4, 2.3, None],), + (pd.Index([-1.4, 2.3, None]),), + (pd.Series([-1.4, 2.3, None], index=[44, 2, 1]),), + ], +) +def test_series_binop_w_other_types(scalars_dfs, other): + scalars_df, scalars_pandas_df = scalars_dfs + + bf_result = (scalars_df["int64_col"].head(3) + other).to_pandas() + pd_result = scalars_pandas_df["int64_col"].head(3) + other + + assert_series_equal( + bf_result, + pd_result, + ) + + @skip_legacy_pandas def test_series_combine_first(scalars_dfs): scalars_df, scalars_pandas_df = scalars_dfs From 0d3c0d8a0e588045027b9ddf3de37b262d373694 Mon Sep 17 00:00:00 2001 From: Trevor Bergeron Date: Wed, 17 Apr 2024 19:26:04 +0000 Subject: [PATCH 2/5] fix series name issue and remove obsolete test --- bigframes/operations/base.py | 4 +++- tests/system/small/test_series.py | 12 +----------- 2 files changed, 4 insertions(+), 12 deletions(-) diff --git a/bigframes/operations/base.py b/bigframes/operations/base.py index 8fa01d7b4c..03b1667c12 100644 --- a/bigframes/operations/base.py +++ b/bigframes/operations/base.py @@ -82,7 +82,9 @@ def __init__( if data.nlevels != 1: raise NotImplementedError("Cannot interpret multi-index as Series.") # Reset index to promote index columns to value columns, set default index - data_block = data._block.reset_index(drop=False) + data_block = data._block.reset_index(drop=False).with_column_labels( + data.names + ) if index is not None: # Align by offset bf_index = indexes.Index(index, session=session) diff --git a/tests/system/small/test_series.py b/tests/system/small/test_series.py index 76b17626ef..8f43772d01 100644 --- a/tests/system/small/test_series.py +++ b/tests/system/small/test_series.py @@ -949,17 +949,6 @@ def test_reset_index_no_drop(scalars_df_index, scalars_pandas_df_index, name): pd.testing.assert_frame_equal(bf_result.to_pandas(), pd_result) -def test_series_add_pandas_series_not_implemented(scalars_dfs): - scalars_df, _ = scalars_dfs - with pytest.raises(NotImplementedError): - ( - scalars_df["float64_col"] - + pd.Series( - [1, 1, 1, 1], - ) - ).to_pandas() - - def test_copy(scalars_df_index, scalars_pandas_df_index): col_name = "float64_col" # Expect mutation on original not to effect_copy @@ -1277,6 +1266,7 @@ def test_binop_right_filtered(scalars_dfs): (pd.Series([-1.4, 2.3, None], index=[44, 2, 1]),), ], ) +@skip_legacy_pandas def test_series_binop_w_other_types(scalars_dfs, other): scalars_df, scalars_pandas_df = scalars_dfs From 65465c4e8a784527876230d0895f888985e3e6e7 Mon Sep 17 00:00:00 2001 From: Trevor Bergeron Date: Fri, 19 Apr 2024 00:54:54 +0000 Subject: [PATCH 3/5] better handle dict and dtype object in series constructor --- bigframes/core/convert.py | 4 ++++ bigframes/dataframe.py | 8 ++++--- bigframes/operations/base.py | 41 ++++++++++++++++++------------------ 3 files changed, 29 insertions(+), 24 deletions(-) diff --git a/bigframes/core/convert.py b/bigframes/core/convert.py index c5176ffd62..7bfca82779 100644 --- a/bigframes/core/convert.py +++ b/bigframes/core/convert.py @@ -57,6 +57,8 @@ def to_bf_series(obj, default_index: Optional[index.Index], session) -> series.S return series.Series(obj, default_index, session=session) if isinstance(obj, pd.Index): return series.Series(obj, default_index, session=session) + if pd.api.types.is_dict_like(obj): + return series.Series(obj, session=session) if pd.api.types.is_list_like(obj): return series.Series(obj, default_index, session=session) else: @@ -84,6 +86,8 @@ def to_pd_series(obj, default_index: pd.Index) -> pd.Series: return pd.Series(obj.to_pandas(), default_index) if isinstance(obj, pd.Index): return pd.Series(obj, default_index) + if pd.api.types.is_dict_like(obj): + return pd.Series(obj) if pd.api.types.is_list_like(obj): return pd.Series(obj, default_index) else: diff --git a/bigframes/dataframe.py b/bigframes/dataframe.py index 0fac1a49d2..723a15a807 100644 --- a/bigframes/dataframe.py +++ b/bigframes/dataframe.py @@ -155,9 +155,11 @@ def __init__( if columns: block = block.select_columns(list(columns)) # type:ignore if dtype: - block = block.multi_apply_unary_op( - block.value_columns, ops.AsTypeOp(to_type=dtype) - ) + # just ignore object dtype if provided + if dtype not in {numpy.dtypes.ObjectDType, "object"}: + block = block.multi_apply_unary_op( + block.value_columns, ops.AsTypeOp(to_type=dtype) + ) self._block = block else: diff --git a/bigframes/operations/base.py b/bigframes/operations/base.py index 03b1667c12..cf9f8e5bb6 100644 --- a/bigframes/operations/base.py +++ b/bigframes/operations/base.py @@ -17,6 +17,7 @@ import typing import bigframes_vendored.pandas.pandas._typing as vendored_pandas_typing +import numpy import pandas as pd import bigframes.constants as constants @@ -64,9 +65,16 @@ def __init__( assert index is None block = data - elif isinstance(data, SeriesMethods) or isinstance(data, pd.Series): + # interpret these cases as both index and data + elif ( + isinstance(data, SeriesMethods) + or isinstance(data, pd.Series) + or pd.api.types.is_dict_like(data) + ): if isinstance(data, pd.Series): data = read_pandas_func(data) + elif pd.api.types.is_dict_like(data): + data = read_pandas_func(pd.Series(data)) data_block = data._block if index is not None: # reindex @@ -77,6 +85,7 @@ def __init__( data_block = block_idx.with_index_labels(bf_index.names) block = data_block + # list-like data that will get default index elif isinstance(data, indexes.Index) or pd.api.types.is_list_like(data): data = indexes.Index(data, session=session) if data.nlevels != 1: @@ -97,29 +106,19 @@ def __init__( data_block = data_block.with_index_labels(bf_index.names) block = data_block - if block: - if name: - if not isinstance(name, typing.Hashable): - raise ValueError( - f"BigQuery DataFrames only supports hashable series names. {constants.FEEDBACK_LINK}" - ) - block = block.with_column_labels([name]) - if dtype: + assert block is not None + if name: + if not isinstance(name, typing.Hashable): + raise ValueError( + f"BigQuery DataFrames only supports hashable series names. {constants.FEEDBACK_LINK}" + ) + block = block.with_column_labels([name]) + if dtype: + # just ignore object dtype if provided + if dtype not in {numpy.dtypes.ObjectDType, "object"}: block = block.multi_apply_unary_op( block.value_columns, ops.AsTypeOp(to_type=dtype) ) - else: - pd_series = pd.Series( - data=data, index=index, dtype=dtype, name=name # type:ignore - ) - pd_dataframe = pd_series.to_frame() - if pd_series.name is None: - # to_frame will set default numeric column label if unnamed, but we do not support int column label, so must rename - pd_dataframe = pd_dataframe.set_axis(["unnamed_col"], axis=1) - block = read_pandas_func(pd_dataframe)._get_block() # type: ignore - if pd_series.name is None: - block = block.with_column_labels([None]) # type: ignore - assert block is not None self._block: blocks.Block = block @property From c8b123b1d7f365bbe2f2c73fc2f794140984956a Mon Sep 17 00:00:00 2001 From: Trevor Bergeron Date: Fri, 19 Apr 2024 18:12:45 +0000 Subject: [PATCH 4/5] generalize series constructor to scalars --- bigframes/core/__init__.py | 12 --- bigframes/core/compile/scalar_op_compiler.py | 2 +- bigframes/dataframe.py | 11 +-- bigframes/operations/base.py | 24 ++++-- bigframes/series.py | 6 -- tests/system/small/test_series.py | 80 +++++++++++++++++--- 6 files changed, 95 insertions(+), 40 deletions(-) diff --git a/bigframes/core/__init__.py b/bigframes/core/__init__.py index 9e6b86fc30..04291edbb1 100644 --- a/bigframes/core/__init__.py +++ b/bigframes/core/__init__.py @@ -117,18 +117,6 @@ def _compiled_schema(self) -> schemata.ArraySchema: ) return schemata.ArraySchema(items) - def validate_schema(self): - tree_derived = self.node.schema - ibis_derived = self._compiled_schema - if tree_derived.names != ibis_derived.names: - raise ValueError( - f"Unexpected names internal {tree_derived.names} vs compiled {ibis_derived.names}" - ) - if tree_derived.dtypes != ibis_derived.dtypes: - raise ValueError( - f"Unexpected types internal {tree_derived.dtypes} vs compiled {ibis_derived.dtypes}" - ) - def _try_evaluate_local(self): """Use only for unit testing paths - not fully featured. Will throw exception if fails.""" import ibis diff --git a/bigframes/core/compile/scalar_op_compiler.py b/bigframes/core/compile/scalar_op_compiler.py index 072d974b39..a65ff6fe0c 100644 --- a/bigframes/core/compile/scalar_op_compiler.py +++ b/bigframes/core/compile/scalar_op_compiler.py @@ -1366,7 +1366,7 @@ def clip_op( @scalar_op_compiler.register_nary_op(ops.case_when_op) -def switch_op(*cases_and_outputs: ibis_types.Value) -> ibis_types.Value: +def case_when_op(*cases_and_outputs: ibis_types.Value) -> ibis_types.Value: # ibis can handle most type coercions, but we need to force bool -> int # TODO: dispatch coercion depending on bigframes dtype schema result_values = cases_and_outputs[1::2] diff --git a/bigframes/dataframe.py b/bigframes/dataframe.py index 7a51a6c710..4f9ee44f09 100644 --- a/bigframes/dataframe.py +++ b/bigframes/dataframe.py @@ -105,6 +105,9 @@ def __init__( raise ValueError( f"DataFrame constructor only supports copy=True. {constants.FEEDBACK_LINK}" ) + # just ignore object dtype if provided + if dtype in {numpy.dtypes.ObjectDType, "object"}: + dtype = None # Check to see if constructing from BigQuery-backed objects before # falling back to pandas constructor @@ -155,11 +158,9 @@ def __init__( if columns: block = block.select_columns(list(columns)) # type:ignore if dtype: - # just ignore object dtype if provided - if dtype not in {numpy.dtypes.ObjectDType, "object"}: - block = block.multi_apply_unary_op( - block.value_columns, ops.AsTypeOp(to_type=dtype) - ) + block = block.multi_apply_unary_op( + block.value_columns, ops.AsTypeOp(to_type=dtype) + ) self._block = block else: diff --git a/bigframes/operations/base.py b/bigframes/operations/base.py index cf9f8e5bb6..c22d824f5b 100644 --- a/bigframes/operations/base.py +++ b/bigframes/operations/base.py @@ -48,6 +48,10 @@ def __init__( ): import bigframes.pandas + # just ignore object dtype if provided + if dtype in {numpy.dtypes.ObjectDType, "object"}: + dtype = None + read_pandas_func = ( session.read_pandas if (session is not None) @@ -106,6 +110,18 @@ def __init__( data_block = data_block.with_index_labels(bf_index.names) block = data_block + else: # Scalar case + if index is not None: + bf_index = indexes.Index(index, session=session) + else: + bf_index = indexes.Index( + [] if (data is None) else [0], + session=session, + dtype=bigframes.dtypes.INT_DTYPE, + ) + block, _ = bf_index._block.create_constant(data, dtype) + block = block.with_column_labels([name]) + assert block is not None if name: if not isinstance(name, typing.Hashable): @@ -114,11 +130,9 @@ def __init__( ) block = block.with_column_labels([name]) if dtype: - # just ignore object dtype if provided - if dtype not in {numpy.dtypes.ObjectDType, "object"}: - block = block.multi_apply_unary_op( - block.value_columns, ops.AsTypeOp(to_type=dtype) - ) + block = block.multi_apply_unary_op( + block.value_columns, ops.AsTypeOp(to_type=dtype) + ) self._block: blocks.Block = block @property diff --git a/bigframes/series.py b/bigframes/series.py index 7c3e5ad9c5..5184d4bf1d 100644 --- a/bigframes/series.py +++ b/bigframes/series.py @@ -20,7 +20,6 @@ import inspect import itertools import numbers -import os import textwrap import typing from typing import Any, cast, Literal, Mapping, Optional, Sequence, Tuple, Union @@ -73,11 +72,6 @@ def __init__(self, *args, **kwargs): self._query_job: Optional[bigquery.QueryJob] = None super().__init__(*args, **kwargs) - # Runs strict validations to ensure internal type predictions and ibis are completely in sync - # Do not execute these validations outside of testing suite. - if "PYTEST_CURRENT_TEST" in os.environ: - self._block.expr.validate_schema() - @property def dt(self) -> dt.DatetimeMethods: return dt.DatetimeMethods(self._block) diff --git a/tests/system/small/test_series.py b/tests/system/small/test_series.py index cefe8102f4..38aed19f05 100644 --- a/tests/system/small/test_series.py +++ b/tests/system/small/test_series.py @@ -126,6 +126,75 @@ def test_series_construct_from_list(): pd.testing.assert_series_equal(bf_result, pd_result) +def test_series_construct_reindex(): + bf_result = series.Series( + series.Series({1: 10, 2: 30, 3: 30}), index=[3, 2], dtype="Int64" + ).to_pandas() + pd_result = pd.Series(pd.Series({1: 10, 2: 30, 3: 30}), index=[3, 2], dtype="Int64") + + # BigQuery DataFrame default indices use nullable Int64 always + pd_result.index = pd_result.index.astype("Int64") + + pd.testing.assert_series_equal(bf_result, pd_result) + + +def test_series_construct_from_list_w_index(): + bf_result = series.Series( + [1, 1, 2, 3, 5, 8, 13], index=[10, 20, 30, 40, 50, 60, 70], dtype="Int64" + ).to_pandas() + pd_result = pd.Series( + [1, 1, 2, 3, 5, 8, 13], index=[10, 20, 30, 40, 50, 60, 70], dtype="Int64" + ) + + # BigQuery DataFrame default indices use nullable Int64 always + pd_result.index = pd_result.index.astype("Int64") + + pd.testing.assert_series_equal(bf_result, pd_result) + + +def test_series_construct_empty(session: bigframes.Session): + bf_series: series.Series = series.Series(session=session) + pd_series: pd.Series = pd.Series() + + bf_result = bf_series.empty + pd_result = pd_series.empty + + assert pd_result + assert bf_result == pd_result + + +def test_series_construct_scalar_no_index(): + bf_result = series.Series("hello world", dtype="string[pyarrow]").to_pandas() + pd_result = pd.Series("hello world", dtype="string[pyarrow]") + + # BigQuery DataFrame default indices use nullable Int64 always + pd_result.index = pd_result.index.astype("Int64") + + pd.testing.assert_series_equal(bf_result, pd_result) + + +def test_series_construct_scalar_w_index(): + bf_result = series.Series( + "hello world", dtype="string[pyarrow]", index=[0, 2, 1] + ).to_pandas() + pd_result = pd.Series("hello world", dtype="string[pyarrow]", index=[0, 2, 1]) + + # BigQuery DataFrame default indices use nullable Int64 always + pd_result.index = pd_result.index.astype("Int64") + + pd.testing.assert_series_equal(bf_result, pd_result) + + +def test_series_construct_nan(): + bf_result = series.Series(numpy.nan).to_pandas() + pd_result = pd.Series(numpy.nan) + + pd_result.index = pd_result.index.astype("Int64") + pd_result = pd_result.astype("Float64") + + pd.testing.assert_series_equal(bf_result, pd_result) + + def test_series_construct_from_list_escaped_strings(): """Check that special characters are supported.""" strings = [ @@ -1781,17 +1850,6 @@ def test_empty_true_row_filter(scalars_dfs): assert pd_result == bf_result -def test_empty_true_memtable(session: bigframes.Session): - bf_series: series.Series = series.Series(session=session) - pd_series: pd.Series = pd.Series() - - bf_result = bf_series.empty - pd_result = pd_series.empty - - assert pd_result - assert bf_result == pd_result - - def test_series_names(scalars_dfs): scalars_df, scalars_pandas_df = scalars_dfs From 249be961b8b49811773c8e4e21c30cc3a10ebd00 Mon Sep 17 00:00:00 2001 From: Trevor Bergeron Date: Mon, 22 Apr 2024 23:06:00 +0000 Subject: [PATCH 5/5] fix dtype constructor arg handling to apply locally if possible --- bigframes/operations/base.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/bigframes/operations/base.py b/bigframes/operations/base.py index c22d824f5b..b003ce59cc 100644 --- a/bigframes/operations/base.py +++ b/bigframes/operations/base.py @@ -78,7 +78,8 @@ def __init__( if isinstance(data, pd.Series): data = read_pandas_func(data) elif pd.api.types.is_dict_like(data): - data = read_pandas_func(pd.Series(data)) + data = read_pandas_func(pd.Series(data, dtype=dtype)) # type: ignore + dtype = None data_block = data._block if index is not None: # reindex @@ -91,7 +92,10 @@ def __init__( # list-like data that will get default index elif isinstance(data, indexes.Index) or pd.api.types.is_list_like(data): - data = indexes.Index(data, session=session) + data = indexes.Index(data, dtype=dtype, session=session) + dtype = ( + None # set to none as it has already been applied, avoid re-cast later + ) if data.nlevels != 1: raise NotImplementedError("Cannot interpret multi-index as Series.") # Reset index to promote index columns to value columns, set default index @@ -120,6 +124,7 @@ def __init__( dtype=bigframes.dtypes.INT_DTYPE, ) block, _ = bf_index._block.create_constant(data, dtype) + dtype = None block = block.with_column_labels([name]) assert block is not None