From 89af6f73113d5f5ba13177e3578bb0285052f6ab Mon Sep 17 00:00:00 2001 From: Chelsea Lin Date: Tue, 9 Apr 2024 21:36:55 +0000 Subject: [PATCH] feat: Series.struct.dtypes --- bigframes/operations/structs.py | 12 ++++++++ .../pandas/core/arrays/arrow/accessors.py | 29 +++++++++++++++++++ 2 files changed, 41 insertions(+) diff --git a/bigframes/operations/structs.py b/bigframes/operations/structs.py index e8a1af9602..d222f0993b 100644 --- a/bigframes/operations/structs.py +++ b/bigframes/operations/structs.py @@ -15,9 +15,11 @@ from __future__ import annotations import bigframes_vendored.pandas.core.arrays.arrow.accessors as vendoracessors +import pandas as pd from bigframes.core import log_adapter import bigframes.dataframe +import bigframes.dtypes import bigframes.operations import bigframes.operations.base import bigframes.series @@ -45,3 +47,13 @@ def explode(self) -> bigframes.dataframe.DataFrame: return bigframes.pandas.concat( [self.field(i) for i in range(pa_type.num_fields)], axis="columns" ) + + def dtypes(self) -> pd.Series: + pa_type = self._dtype.pyarrow_dtype + return pd.Series( + data=[ + bigframes.dtypes.arrow_dtype_to_bigframes_dtype(pa_type.field(i).type) + for i in range(pa_type.num_fields) + ], + index=[pa_type.field(i).name for i in range(pa_type.num_fields)], + ) diff --git a/third_party/bigframes_vendored/pandas/core/arrays/arrow/accessors.py b/third_party/bigframes_vendored/pandas/core/arrays/arrow/accessors.py index 8e3ea06a3d..bd6e50d096 100644 --- a/third_party/bigframes_vendored/pandas/core/arrays/arrow/accessors.py +++ b/third_party/bigframes_vendored/pandas/core/arrays/arrow/accessors.py @@ -92,3 +92,32 @@ def explode(self): The data corresponding to all child fields. """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) + + def dtypes(self): + """ + Return the dtype object of each child field of the struct. + + **Examples:** + + >>> import bigframes.pandas as bpd + >>> import pyarrow as pa + >>> bpd.options.display.progress_bar = None + >>> s = bpd.Series( + ... [ + ... {"version": 1, "project": "pandas"}, + ... {"version": 2, "project": "pandas"}, + ... {"version": 1, "project": "numpy"}, + ... ], + ... dtype=bpd.ArrowDtype(pa.struct( + ... [("version", pa.int64()), ("project", pa.string())] + ... )) + ... ) + >>> s.struct.dtypes() + version Int64 + project string[pyarrow] + dtype: object + + Returns: + A *pandas* Series with the data type of all child fields. + """ + raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)