From 055a53bb90ad340d639ea391406d797bb1805820 Mon Sep 17 00:00:00 2001 From: Shobhit Singh Date: Thu, 20 Feb 2025 20:59:32 +0000 Subject: [PATCH 1/3] feat: support routines with ARRAY return type in `read_gbq_function` --- bigframes/core/compile/ibis_types.py | 17 ++++-- bigframes/dtypes.py | 1 + bigframes/functions/_function_session.py | 6 ++ bigframes/functions/function.py | 12 ++-- bigframes/operations/remote_function_ops.py | 20 +++---- bigframes/series.py | 9 ++- tests/system/conftest.py | 5 ++ .../large/functions/test_remote_function.py | 4 ++ .../small/functions/test_remote_function.py | 59 ++++++++++++++----- tests/unit/functions/test_remote_function.py | 6 ++ 10 files changed, 102 insertions(+), 37 deletions(-) diff --git a/bigframes/core/compile/ibis_types.py b/bigframes/core/compile/ibis_types.py index af2b7908ad..5ab66c9ecf 100644 --- a/bigframes/core/compile/ibis_types.py +++ b/bigframes/core/compile/ibis_types.py @@ -474,10 +474,19 @@ def ibis_array_output_type_from_python_type(t: type) -> ibis_dtypes.DataType: return python_type_to_ibis_type(t) -def ibis_type_from_type_kind(tk: bigquery.StandardSqlTypeNames) -> ibis_dtypes.DataType: +def ibis_type_from_bigquery_type( + type_: bigquery.StandardSqlDataType, +) -> ibis_dtypes.DataType: """Convert bq type to ibis. Only to be used for remote functions, does not handle all types.""" - if tk not in bigframes.dtypes.RF_SUPPORTED_IO_BIGQUERY_TYPEKINDS: + if type_.type_kind not in bigframes.dtypes.RF_SUPPORTED_IO_BIGQUERY_TYPEKINDS: raise UnsupportedTypeError( - tk, bigframes.dtypes.RF_SUPPORTED_IO_BIGQUERY_TYPEKINDS + type_.type_kind, bigframes.dtypes.RF_SUPPORTED_IO_BIGQUERY_TYPEKINDS + ) + elif type_.type_kind == "ARRAY": + return ibis_dtypes.Array( + value_type=ibis_type_from_bigquery_type( + typing.cast(bigquery.StandardSqlDataType, type_.array_element_type) + ) ) - return third_party_ibis_bqtypes.BigQueryType.to_ibis(tk) + else: + return third_party_ibis_bqtypes.BigQueryType.to_ibis(type_.type_kind) diff --git a/bigframes/dtypes.py b/bigframes/dtypes.py index e4db904210..54b621a0f8 100644 --- a/bigframes/dtypes.py +++ b/bigframes/dtypes.py @@ -874,4 +874,5 @@ def lcd_type_or_throw(dtype1: Dtype, dtype2: Dtype) -> Dtype: "INT64", "INTEGER", "STRING", + "ARRAY", } diff --git a/bigframes/functions/_function_session.py b/bigframes/functions/_function_session.py index a0518978a3..93b5c4c596 100644 --- a/bigframes/functions/_function_session.py +++ b/bigframes/functions/_function_session.py @@ -501,6 +501,7 @@ def try_delattr(attr): try_delattr("bigframes_remote_function") try_delattr("input_dtypes") try_delattr("output_dtype") + try_delattr("bigframes_bigquery_function_output_dtype") try_delattr("is_row_processor") try_delattr("ibis_node") @@ -589,6 +590,11 @@ def try_delattr(attr): ibis_signature.output_type ) ) + func.bigframes_bigquery_function_output_dtype = ( + bigframes.core.compile.ibis_types.ibis_dtype_to_bigframes_dtype( + ibis_output_type_for_bqrf + ) + ) func.is_row_processor = is_row_processor func.ibis_node = node diff --git a/bigframes/functions/function.py b/bigframes/functions/function.py index ef2c81a953..c2809b96eb 100644 --- a/bigframes/functions/function.py +++ b/bigframes/functions/function.py @@ -56,8 +56,10 @@ class ReturnTypeMissingError(ValueError): # TODO: Move this to compile folder def ibis_signature_from_routine(routine: bigquery.Routine) -> _utils.IbisSignature: if routine.return_type: - ibis_output_type = bigframes.core.compile.ibis_types.ibis_type_from_type_kind( - routine.return_type.type_kind + ibis_output_type = ( + bigframes.core.compile.ibis_types.ibis_type_from_bigquery_type( + routine.return_type + ) ) else: raise ReturnTypeMissingError @@ -82,8 +84,8 @@ def ibis_signature_from_routine(routine: bigquery.Routine) -> _utils.IbisSignatu return _utils.IbisSignature( parameter_names=[arg.name for arg in routine.arguments], input_types=[ - bigframes.core.compile.ibis_types.ibis_type_from_type_kind( - arg.data_type.type_kind + bigframes.core.compile.ibis_types.ibis_type_from_bigquery_type( + arg.data_type ) if arg.data_type else None @@ -233,6 +235,8 @@ def func(*bigframes_args, **bigframes_kwargs): else ibis_signature.output_type ) + func.bigframes_bigquery_function_output_dtype = bigframes.core.compile.ibis_types.ibis_dtype_to_bigframes_dtype(ibis_signature.output_type) # type: ignore + func.is_row_processor = is_row_processor # type: ignore func.ibis_node = node # type: ignore return func diff --git a/bigframes/operations/remote_function_ops.py b/bigframes/operations/remote_function_ops.py index 5b738c0bb5..9de8875de9 100644 --- a/bigframes/operations/remote_function_ops.py +++ b/bigframes/operations/remote_function_ops.py @@ -31,17 +31,17 @@ def expensive(self) -> bool: def output_type(self, *input_types): # This property should be set to a valid Dtype by the @remote_function decorator or read_gbq_function method - if hasattr(self.func, "output_dtype"): - if dtypes.is_array_like(self.func.output_dtype): - # TODO(b/284515241): remove this special handling to support - # array output types once BQ remote functions support ARRAY. - # Until then, use json serialized strings at the remote function - # level, and parse that to the intended output type at the - # bigframes level. - return dtypes.STRING_DTYPE - return self.func.output_dtype + if hasattr(self.func, "bigframes_bigquery_function_output_dtype"): + # if dtypes.is_array_like(self.func.output_dtype): + # # TODO(b/284515241): remove this special handling to support + # # array output types once BQ remote functions support ARRAY. + # # Until then, use json serialized strings at the remote function + # # level, and parse that to the intended output type at the + # # bigframes level. + # return dtypes.STRING_DTYPE + return self.func.bigframes_bigquery_function_output_dtype else: - raise AttributeError("output_dtype not defined") + raise AttributeError("bigframes_bigquery_function_output_dtype not defined") @dataclasses.dataclass(frozen=True) diff --git a/bigframes/series.py b/bigframes/series.py index af9fce6e20..462f8f82e6 100644 --- a/bigframes/series.py +++ b/bigframes/series.py @@ -1539,9 +1539,12 @@ def apply( ops.RemoteFunctionOp(func=func, apply_on_null=True) ) - # if the output is an array, reconstruct it from the json serialized - # string form - if bigframes.dtypes.is_array_like(func.output_dtype): + # If the result type is string but the function output is intended to + # be an array, reconstruct the array from the string assuming it is a + # json serialized form of the array. + if bigframes.dtypes.is_string_like( + result_series.dtype + ) and bigframes.dtypes.is_array_like(func.output_dtype): import bigframes.bigquery as bbq result_dtype = bigframes.dtypes.arrow_dtype_to_bigframes_dtype( diff --git a/tests/system/conftest.py b/tests/system/conftest.py index 29234bc4ef..d40d0e0eef 100644 --- a/tests/system/conftest.py +++ b/tests/system/conftest.py @@ -251,6 +251,11 @@ def table_id_unique(dataset_id: str): return f"{dataset_id}.{prefixer.create_prefix()}" +@pytest.fixture(scope="function") +def routine_id_unique(dataset_id: str): + return f"{dataset_id}.{prefixer.create_prefix()}" + + @pytest.fixture(scope="session") def scalars_schema(bigquery_client: bigquery.Client): # TODO(swast): Add missing scalar data types such as BIGNUMERIC. diff --git a/tests/system/large/functions/test_remote_function.py b/tests/system/large/functions/test_remote_function.py index 54ba0549a0..7363e370bb 100644 --- a/tests/system/large/functions/test_remote_function.py +++ b/tests/system/large/functions/test_remote_function.py @@ -2193,6 +2193,10 @@ def foo(x, y, z): ) ) ) + assert ( + getattr(foo, "bigframes_bigquery_function_output_dtype") + == bigframes.dtypes.STRING_DTYPE + ) # Fails to apply on dataframe with incompatible number of columns with pytest.raises( diff --git a/tests/system/small/functions/test_remote_function.py b/tests/system/small/functions/test_remote_function.py index 0dc8960f62..34a810e587 100644 --- a/tests/system/small/functions/test_remote_function.py +++ b/tests/system/small/functions/test_remote_function.py @@ -14,6 +14,7 @@ import inspect import re +import textwrap import google.api_core.exceptions from google.cloud import bigquery @@ -27,6 +28,7 @@ import bigframes.exceptions from bigframes.functions import _utils as bff_utils from bigframes.functions import function as bff +import bigframes.session._io.bigquery from tests.system.utils import assert_pandas_df_equal _prefixer = test_utils.prefixer.Prefixer("bigframes", "") @@ -632,7 +634,6 @@ def add_one(x): )(add_one) -@pytest.mark.flaky(retries=2, delay=120) def test_read_gbq_function_detects_invalid_function(session, dataset_id): dataset_ref = bigquery.DatasetReference.from_string(dataset_id) with pytest.raises(ValueError) as e: @@ -705,21 +706,49 @@ def square1(x): assert_pandas_df_equal(s1_result.to_pandas(), s2_result.to_pandas()) -@pytest.mark.flaky(retries=2, delay=120) def test_read_gbq_function_runs_existing_udf(session): func = session.read_gbq_function("bqutil.fn.cw_lower_case_ascii_only") got = func("AURÉLIE") assert got == "aurÉlie" -@pytest.mark.flaky(retries=2, delay=120) def test_read_gbq_function_runs_existing_udf_4_params(session): func = session.read_gbq_function("bqutil.fn.cw_instr4") got = func("TestStr123456Str", "Str", 1, 2) assert got == 14 -@pytest.mark.flaky(retries=2, delay=120) +def test_read_gbq_function_runs_existing_udf_array_output(session, routine_id_unique): + bigframes.session._io.bigquery.start_query_with_client( + session.bqclient, + textwrap.dedent( + f""" + CREATE OR REPLACE FUNCTION `{routine_id_unique}`(x STRING) + RETURNS ARRAY + AS ( + [x, x] + ) + """ + ), + job_config=bigquery.QueryJobConfig(), + ) + func = session.read_gbq_function(routine_id_unique) + + # Test on scalar value + got = func("hello") + assert got == ["hello", "hello"] + + # Test on a series, assert pandas parity + pd_s = pd.Series(["alpha", "beta", "gamma"]) + bf_s = session.read_pandas(pd_s) + pd_result = pd_s.apply(func) + bf_result = bf_s.apply(func).to_pandas() + assert bigframes.dtypes.is_array_string_like(bf_result.dtype) + pd.testing.assert_series_equal( + pd_result, bf_result, check_dtype=False, check_index_type=False + ) + + def test_read_gbq_function_reads_udfs(session, bigquery_client, dataset_id): dataset_ref = bigquery.DatasetReference.from_string(dataset_id) arg = bigquery.RoutineArgument( @@ -754,6 +783,10 @@ def test_read_gbq_function_reads_udfs(session, bigquery_client, dataset_id): assert square.bigframes_remote_function == str(routine.reference) assert square.input_dtypes == (bigframes.dtypes.INT_DTYPE,) assert square.output_dtype == bigframes.dtypes.INT_DTYPE + assert ( + square.bigframes_bigquery_function_output_dtype + == bigframes.dtypes.INT_DTYPE + ) src = {"x": [-5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5]} @@ -772,7 +805,6 @@ def test_read_gbq_function_reads_udfs(session, bigquery_client, dataset_id): ) -@pytest.mark.flaky(retries=2, delay=120) def test_read_gbq_function_requires_explicit_types( session, bigquery_client, dataset_id ): @@ -863,7 +895,6 @@ def test_read_gbq_function_requires_explicit_types( ), ], ) -@pytest.mark.flaky(retries=2, delay=120) def test_read_gbq_function_respects_python_output_type( request, session_fixture, bigquery_client, dataset_id, array_type, expected_data ): @@ -906,7 +937,6 @@ def test_read_gbq_function_respects_python_output_type( pytest.param(list[str], id="list-str"), ], ) -@pytest.mark.flaky(retries=2, delay=120) def test_read_gbq_function_supports_python_output_type_only_for_string_outputs( session, bigquery_client, dataset_id, array_type ): @@ -945,7 +975,6 @@ def test_read_gbq_function_supports_python_output_type_only_for_string_outputs( pytest.param(list[str], id="list-str"), ], ) -@pytest.mark.flaky(retries=2, delay=120) def test_read_gbq_function_supported_python_output_type( session, bigquery_client, dataset_id, array_type ): @@ -992,7 +1021,6 @@ def test_df_apply_scalar_func(session, scalars_dfs): ) -@pytest.mark.flaky(retries=2, delay=120) def test_read_gbq_function_multiple_inputs_not_a_row_processor(session): with pytest.raises(ValueError) as context: # The remote function has two args, which cannot be row processed. Throw @@ -1214,20 +1242,19 @@ def should_mask(name: str) -> bool: repr(s.mask(should_mask, "REDACTED")) -@pytest.mark.flaky(retries=2, delay=120) -def test_read_gbq_function_application_repr(session, dataset_id, scalars_df_index): - gbq_function = f"{dataset_id}.should_mask" - +def test_read_gbq_function_application_repr( + session, routine_id_unique, scalars_df_index +): # This function deliberately has a param with name "name", this is to test # a specific ibis' internal handling of object names session.bqclient.query_and_wait( - f"CREATE OR REPLACE FUNCTION `{gbq_function}`(name STRING) RETURNS BOOL AS (MOD(LENGTH(name), 2) = 1)" + f"CREATE OR REPLACE FUNCTION `{routine_id_unique}`(name STRING) RETURNS BOOL AS (MOD(LENGTH(name), 2) = 1)" ) - routine = session.bqclient.get_routine(gbq_function) + routine = session.bqclient.get_routine(routine_id_unique) assert "name" in [arg.name for arg in routine.arguments] # read the function and apply to dataframe - should_mask = session.read_gbq_function(gbq_function) + should_mask = session.read_gbq_function(routine_id_unique) s = scalars_df_index["string_col"] diff --git a/tests/unit/functions/test_remote_function.py b/tests/unit/functions/test_remote_function.py index 413a694680..d377fb4d49 100644 --- a/tests/unit/functions/test_remote_function.py +++ b/tests/unit/functions/test_remote_function.py @@ -66,6 +66,12 @@ def test_supported_types_correspond(): ibis_types_from_bigquery = { third_party_ibis_bqtypes.BigQueryType.to_ibis(tk) for tk in bigframes.dtypes.RF_SUPPORTED_IO_BIGQUERY_TYPEKINDS + # TODO(b/284515241): ARRAY is the only exception because it is supported + # as an output type of the BQ routine in the read_gbq_function path but + # not in the remote function path. Remove this handline once BQ remote + # functions supports ARRAY output and the bigframes remote functions + # utilizes that to support array output. + if tk != "ARRAY" } assert ibis_types_from_python == ibis_types_from_bigquery From de500cf26c481665ad08681a11c512c45dc8057f Mon Sep 17 00:00:00 2001 From: Shobhit Singh Date: Fri, 21 Feb 2025 00:00:09 +0000 Subject: [PATCH 2/3] remove commented out code, fix test to work for all python versions --- bigframes/operations/remote_function_ops.py | 7 ------- tests/system/small/functions/test_remote_function.py | 4 ++-- 2 files changed, 2 insertions(+), 9 deletions(-) diff --git a/bigframes/operations/remote_function_ops.py b/bigframes/operations/remote_function_ops.py index 9de8875de9..6d8b20f3f5 100644 --- a/bigframes/operations/remote_function_ops.py +++ b/bigframes/operations/remote_function_ops.py @@ -32,13 +32,6 @@ def expensive(self) -> bool: def output_type(self, *input_types): # This property should be set to a valid Dtype by the @remote_function decorator or read_gbq_function method if hasattr(self.func, "bigframes_bigquery_function_output_dtype"): - # if dtypes.is_array_like(self.func.output_dtype): - # # TODO(b/284515241): remove this special handling to support - # # array output types once BQ remote functions support ARRAY. - # # Until then, use json serialized strings at the remote function - # # level, and parse that to the intended output type at the - # # bigframes level. - # return dtypes.STRING_DTYPE return self.func.bigframes_bigquery_function_output_dtype else: raise AttributeError("bigframes_bigquery_function_output_dtype not defined") diff --git a/tests/system/small/functions/test_remote_function.py b/tests/system/small/functions/test_remote_function.py index 34a810e587..ea0082c269 100644 --- a/tests/system/small/functions/test_remote_function.py +++ b/tests/system/small/functions/test_remote_function.py @@ -742,10 +742,10 @@ def test_read_gbq_function_runs_existing_udf_array_output(session, routine_id_un pd_s = pd.Series(["alpha", "beta", "gamma"]) bf_s = session.read_pandas(pd_s) pd_result = pd_s.apply(func) - bf_result = bf_s.apply(func).to_pandas() + bf_result = bf_s.apply(func) assert bigframes.dtypes.is_array_string_like(bf_result.dtype) pd.testing.assert_series_equal( - pd_result, bf_result, check_dtype=False, check_index_type=False + pd_result, bf_result.to_pandas(), check_dtype=False, check_index_type=False ) From b6b0a92e1c83a1ff14b1f4cd790bad494fc3f9af Mon Sep 17 00:00:00 2001 From: Shobhit Singh Date: Fri, 21 Feb 2025 21:12:33 +0000 Subject: [PATCH 3/3] support array output in binary and nary applications of read_gbq_function --- bigframes/dataframe.py | 9 +- bigframes/operations/remote_function_ops.py | 27 ++---- bigframes/series.py | 9 +- .../small/functions/test_remote_function.py | 84 +++++++++++++++++++ 4 files changed, 102 insertions(+), 27 deletions(-) diff --git a/bigframes/dataframe.py b/bigframes/dataframe.py index c02b182ee3..b4bcce93bc 100644 --- a/bigframes/dataframe.py +++ b/bigframes/dataframe.py @@ -4086,9 +4086,12 @@ def apply(self, func, *, axis=0, args: typing.Tuple = (), **kwargs): ) result_series.name = None - # if the output is an array, reconstruct it from the json serialized - # string form - if bigframes.dtypes.is_array_like(func.output_dtype): + # If the result type is string but the function output is intended + # to be an array, reconstruct the array from the string assuming it + # is a json serialized form of the array. + if bigframes.dtypes.is_string_like( + result_series.dtype + ) and bigframes.dtypes.is_array_like(func.output_dtype): import bigframes.bigquery as bbq result_dtype = bigframes.dtypes.arrow_dtype_to_bigframes_dtype( diff --git a/bigframes/operations/remote_function_ops.py b/bigframes/operations/remote_function_ops.py index 6d8b20f3f5..8505fd1607 100644 --- a/bigframes/operations/remote_function_ops.py +++ b/bigframes/operations/remote_function_ops.py @@ -15,7 +15,6 @@ import dataclasses import typing -from bigframes import dtypes from bigframes.operations import base_ops @@ -48,17 +47,10 @@ def expensive(self) -> bool: def output_type(self, *input_types): # This property should be set to a valid Dtype by the @remote_function decorator or read_gbq_function method - if hasattr(self.func, "output_dtype"): - if dtypes.is_array_like(self.func.output_dtype): - # TODO(b/284515241): remove this special handling to support - # array output types once BQ remote functions support ARRAY. - # Until then, use json serialized strings at the remote function - # level, and parse that to the intended output type at the - # bigframes level. - return dtypes.STRING_DTYPE - return self.func.output_dtype + if hasattr(self.func, "bigframes_bigquery_function_output_dtype"): + return self.func.bigframes_bigquery_function_output_dtype else: - raise AttributeError("output_dtype not defined") + raise AttributeError("bigframes_bigquery_function_output_dtype not defined") @dataclasses.dataclass(frozen=True) @@ -72,14 +64,7 @@ def expensive(self) -> bool: def output_type(self, *input_types): # This property should be set to a valid Dtype by the @remote_function decorator or read_gbq_function method - if hasattr(self.func, "output_dtype"): - if dtypes.is_array_like(self.func.output_dtype): - # TODO(b/284515241): remove this special handling to support - # array output types once BQ remote functions support ARRAY. - # Until then, use json serialized strings at the remote function - # level, and parse that to the intended output type at the - # bigframes level. - return dtypes.STRING_DTYPE - return self.func.output_dtype + if hasattr(self.func, "bigframes_bigquery_function_output_dtype"): + return self.func.bigframes_bigquery_function_output_dtype else: - raise AttributeError("output_dtype not defined") + raise AttributeError("bigframes_bigquery_function_output_dtype not defined") diff --git a/bigframes/series.py b/bigframes/series.py index c7cc6e5fb3..ddf9d52ae4 100644 --- a/bigframes/series.py +++ b/bigframes/series.py @@ -1588,9 +1588,12 @@ def combine( other, ops.BinaryRemoteFunctionOp(func=func) ) - # if the output is an array, reconstruct it from the json serialized - # string form - if bigframes.dtypes.is_array_like(func.output_dtype): + # If the result type is string but the function output is intended to + # be an array, reconstruct the array from the string assuming it is a + # json serialized form of the array. + if bigframes.dtypes.is_string_like( + result_series.dtype + ) and bigframes.dtypes.is_array_like(func.output_dtype): import bigframes.bigquery as bbq result_dtype = bigframes.dtypes.arrow_dtype_to_bigframes_dtype( diff --git a/tests/system/small/functions/test_remote_function.py b/tests/system/small/functions/test_remote_function.py index ea0082c269..99a017c917 100644 --- a/tests/system/small/functions/test_remote_function.py +++ b/tests/system/small/functions/test_remote_function.py @@ -749,6 +749,90 @@ def test_read_gbq_function_runs_existing_udf_array_output(session, routine_id_un ) +def test_read_gbq_function_runs_existing_udf_2_params_array_output( + session, routine_id_unique +): + bigframes.session._io.bigquery.start_query_with_client( + session.bqclient, + textwrap.dedent( + f""" + CREATE OR REPLACE FUNCTION `{routine_id_unique}`(x STRING, y STRING) + RETURNS ARRAY + AS ( + [x, y] + ) + """ + ), + job_config=bigquery.QueryJobConfig(), + ) + func = session.read_gbq_function(routine_id_unique) + + # Test on scalar value + got = func("hello", "world") + assert got == ["hello", "world"] + + # Test on series, assert pandas parity + pd_df = pd.DataFrame( + {"col0": ["alpha", "beta", "gamma"], "col1": ["delta", "theta", "phi"]} + ) + bf_df = session.read_pandas(pd_df) + pd_result = pd_df["col0"].combine(pd_df["col1"], func) + bf_result = bf_df["col0"].combine(bf_df["col1"], func) + assert bigframes.dtypes.is_array_string_like(bf_result.dtype) + pd.testing.assert_series_equal( + pd_result, bf_result.to_pandas(), check_dtype=False, check_index_type=False + ) + + +def test_read_gbq_function_runs_existing_udf_4_params_array_output( + session, routine_id_unique +): + bigframes.session._io.bigquery.start_query_with_client( + session.bqclient, + textwrap.dedent( + f""" + CREATE OR REPLACE FUNCTION `{routine_id_unique}`(x STRING, y BOOL, z INT64, w FLOAT64) + RETURNS ARRAY + AS ( + [x, CAST(y AS STRING), CAST(z AS STRING), CAST(w AS STRING)] + ) + """ + ), + job_config=bigquery.QueryJobConfig(), + ) + func = session.read_gbq_function(routine_id_unique) + + # Test on scalar value + got = func("hello", True, 1, 2.3) + assert got == ["hello", "true", "1", "2.3"] + + # Test on a dataframe, assert pandas parity + pd_df = pd.DataFrame( + { + "col0": ["alpha", "beta", "gamma"], + "col1": [True, False, True], + "col2": [1, 2, 3], + "col3": [4.5, 6, 7.75], + } + ) + bf_df = session.read_pandas(pd_df) + # Simulate the result directly, since the function cannot be applied + # directly on a pandas dataframe with axis=1, as this is a special type of + # function with multiple params supported only on bigframes dataframe. + pd_result = pd.Series( + [ + ["alpha", "true", "1", "4.5"], + ["beta", "false", "2", "6"], + ["gamma", "true", "3", "7.75"], + ] + ) + bf_result = bf_df.apply(func, axis=1) + assert bigframes.dtypes.is_array_string_like(bf_result.dtype) + pd.testing.assert_series_equal( + pd_result, bf_result.to_pandas(), check_dtype=False, check_index_type=False + ) + + def test_read_gbq_function_reads_udfs(session, bigquery_client, dataset_id): dataset_ref = bigquery.DatasetReference.from_string(dataset_id) arg = bigquery.RoutineArgument(