From 408f3de013e855c1197b7c2f6c71f611dfdc1d4a Mon Sep 17 00:00:00 2001 From: Trevor Bergeron Date: Mon, 25 Sep 2023 21:07:07 +0000 Subject: [PATCH] feat: support casting string to integer or float --- bigframes/dtypes.py | 2 +- tests/system/small/test_series.py | 23 +++++++++++++++++++++++ 2 files changed, 24 insertions(+), 1 deletion(-) diff --git a/bigframes/dtypes.py b/bigframes/dtypes.py index 271b8aa2f2..59d3007fab 100644 --- a/bigframes/dtypes.py +++ b/bigframes/dtypes.py @@ -313,7 +313,7 @@ def cast_ibis_value( ibis_dtypes.string, ), ibis_dtypes.float64: (ibis_dtypes.string, ibis_dtypes.int64), - ibis_dtypes.string: (), + ibis_dtypes.string: (ibis_dtypes.int64, ibis_dtypes.float64), ibis_dtypes.date: (), ibis_dtypes.time: (), ibis_dtypes.timestamp: (ibis_dtypes.Timestamp(timezone="UTC"),), diff --git a/tests/system/small/test_series.py b/tests/system/small/test_series.py index d702049e68..588dcc2c83 100644 --- a/tests/system/small/test_series.py +++ b/tests/system/small/test_series.py @@ -2389,6 +2389,29 @@ def test_astype(scalars_df_index, scalars_pandas_df_index, column, to_type): pd.testing.assert_series_equal(bf_result, pd_result) +def test_string_astype_int(): + pd_series = pd.Series(["4", "-7", "0", " -03"]) + bf_series = series.Series(pd_series) + + pd_result = pd_series.astype("Int64") + bf_result = bf_series.astype("Int64").to_pandas() + + pd.testing.assert_series_equal(bf_result, pd_result, check_index_type=False) + + +def test_string_astype_float(): + pd_series = pd.Series( + ["1", "-1", "-0", "000", " -03.235", "naN", "-inf", "INf", ".33", "7.235e-8"] + ) + + bf_series = series.Series(pd_series) + + pd_result = pd_series.astype("Float64") + bf_result = bf_series.astype("Float64").to_pandas() + + pd.testing.assert_series_equal(bf_result, pd_result, check_index_type=False) + + @pytest.mark.parametrize( "index", [0, 5, -2],