diff --git a/bigframes/functions/remote_function.py b/bigframes/functions/remote_function.py index 472ac07547..920dc7c039 100644 --- a/bigframes/functions/remote_function.py +++ b/bigframes/functions/remote_function.py @@ -39,6 +39,7 @@ import warnings import ibis +import numpy import pandas import pyarrow import requests @@ -280,6 +281,9 @@ def generate_cloud_function_code( if is_row_processor: # bigframes remote function will send an entire row of data as json, # which would be converted to a pandas series and processed + # Ensure numpy versions match to avoid unpickling problems. See + # internal issue b/347934471. + requirements.append(f"numpy=={numpy.__version__}") requirements.append(f"pandas=={pandas.__version__}") requirements.append(f"pyarrow=={pyarrow.__version__}") if package_requirements: diff --git a/tests/system/small/test_remote_function.py b/tests/system/small/test_remote_function.py index d84d520988..5838ad75b0 100644 --- a/tests/system/small/test_remote_function.py +++ b/tests/system/small/test_remote_function.py @@ -742,109 +742,6 @@ def test_read_gbq_function_enforces_explicit_types( ) -@pytest.mark.flaky(retries=2, delay=120) -def test_df_apply_axis_1(session, scalars_dfs): - columns = [ - "bool_col", - "int64_col", - "int64_too", - "float64_col", - "string_col", - "bytes_col", - ] - scalars_df, scalars_pandas_df = scalars_dfs - - def add_ints(row): - return row["int64_col"] + row["int64_too"] - - with pytest.warns( - bigframes.exceptions.PreviewWarning, - match="input_types=Series is in preview.", - ): - add_ints_remote = session.remote_function( - bigframes.series.Series, - int, - )(add_ints) - - with pytest.warns( - bigframes.exceptions.PreviewWarning, match="axis=1 scenario is in preview." - ): - bf_result = scalars_df[columns].apply(add_ints_remote, axis=1).to_pandas() - - pd_result = scalars_pandas_df[columns].apply(add_ints, axis=1) - - # bf_result.dtype is 'Int64' while pd_result.dtype is 'object', ignore this - # mismatch by using check_dtype=False. - # - # bf_result.to_numpy() produces an array of numpy.float64's - # (in system_prerelease tests), while pd_result.to_numpy() produces an - # array of ints, ignore this mismatch by using check_exact=False. - pd.testing.assert_series_equal( - pd_result, bf_result, check_dtype=False, check_exact=False - ) - - -@pytest.mark.flaky(retries=2, delay=120) -def test_df_apply_axis_1_ordering(session, scalars_dfs): - columns = ["bool_col", "int64_col", "int64_too", "float64_col", "string_col"] - ordering_columns = ["bool_col", "int64_col"] - scalars_df, scalars_pandas_df = scalars_dfs - - def add_ints(row): - return row["int64_col"] + row["int64_too"] - - add_ints_remote = session.remote_function(bigframes.series.Series, int)(add_ints) - - bf_result = ( - scalars_df[columns] - .sort_values(ordering_columns) - .apply(add_ints_remote, axis=1) - .to_pandas() - ) - pd_result = ( - scalars_pandas_df[columns].sort_values(ordering_columns).apply(add_ints, axis=1) - ) - - # bf_result.dtype is 'Int64' while pd_result.dtype is 'object', ignore this - # mismatch by using check_dtype=False. - # - # bf_result.to_numpy() produces an array of numpy.float64's - # (in system_prerelease tests), while pd_result.to_numpy() produces an - # array of ints, ignore this mismatch by using check_exact=False. - pd.testing.assert_series_equal( - pd_result, bf_result, check_dtype=False, check_exact=False - ) - - -@pytest.mark.flaky(retries=2, delay=120) -def test_df_apply_axis_1_multiindex(session): - pd_df = pd.DataFrame( - {"x": [1, 2, 3], "y": [1.5, 3.75, 5], "z": ["pq", "rs", "tu"]}, - index=pd.MultiIndex.from_tuples([("a", 100), ("a", 200), ("b", 300)]), - ) - bf_df = session.read_pandas(pd_df) - - def add_numbers(row): - return row["x"] + row["y"] - - add_numbers_remote = session.remote_function(bigframes.series.Series, float)( - add_numbers - ) - - bf_result = bf_df.apply(add_numbers_remote, axis=1).to_pandas() - pd_result = pd_df.apply(add_numbers, axis=1) - - # bf_result.dtype is 'Float64' while pd_result.dtype is 'float64', ignore this - # mismatch by using check_dtype=False. - # - # bf_result.index[0].dtype is 'string[pyarrow]' while - # pd_result.index[0].dtype is 'object', ignore this mismatch by using - # check_index_type=False. - pd.testing.assert_series_equal( - pd_result, bf_result, check_dtype=False, check_index_type=False - ) - - def test_df_apply_axis_1_unsupported_callable(scalars_dfs): scalars_df, scalars_pandas_df = scalars_dfs columns = ["bool_col", "int64_col", "int64_too", "float64_col", "string_col"]