From 2c896face8a80fc71c6ee808d432fbd0bd2ef3d5 Mon Sep 17 00:00:00 2001 From: Garrett Wu Date: Wed, 19 Feb 2025 00:36:04 +0000 Subject: [PATCH 1/2] chore: add experimental blob from_glob_path test --- tests/system/small/blob/test_io.py | 55 ++++++++++++++++++++++++------ 1 file changed, 45 insertions(+), 10 deletions(-) diff --git a/tests/system/small/blob/test_io.py b/tests/system/small/blob/test_io.py index effadd3b22..1d843d6bc2 100644 --- a/tests/system/small/blob/test_io.py +++ b/tests/system/small/blob/test_io.py @@ -12,22 +12,57 @@ # See the License for the specific language governing permissions and # limitations under the License. +import pandas as pd + import bigframes import bigframes.pandas as bpd -def test_blob_create_from_uri_str(): +def test_blob_create_from_uri_str(bq_connection: str, session: bigframes.Session): bigframes.options.experiments.blob = True - uri_series = bpd.Series( - [ - "gs://bigframes_blob_test/images/img0.jpg", - "gs://bigframes_blob_test/images/img1.jpg", - ] + uris = [ + "gs://bigframes_blob_test/images/img0.jpg", + "gs://bigframes_blob_test/images/img1.jpg", + ] + + uri_series = bpd.Series(uris, session=session) + blob_series = uri_series.str.to_blob(connection=bq_connection) + + pd_blob_df = blob_series.to_pandas().struct.explode() + expected_pd_df = pd.DataFrame( + { + "uri": uris, + "version": [None, None], + "authorizer": [bq_connection.casefold(), bq_connection.casefold()], + "details": [None, None], + } + ) + + pd.testing.assert_frame_equal( + pd_blob_df, expected_pd_df, check_dtype=False, check_index_type=False ) - # TODO: use bq_connection fixture when MMD location capitalization fix is in prod - blob_series = uri_series.str.to_blob(connection="us.bigframes-default-connection") - pd_blob_series = blob_series.to_pandas() - assert len(pd_blob_series) == 2 +def test_blob_create_from_glob_path(bq_connection: str, session: bigframes.Session): + bigframes.options.experiments.blob = True + + blob_df = session.from_glob_path( + "gs://bigframes_blob_test/images/*", connection=bq_connection, name="blob_col" + ) + pd_blob_df = blob_df["blob_col"].to_pandas().struct.explode() + expected_df = pd.DataFrame( + { + "uri": [ + "gs://bigframes_blob_test/images/img0.jpg", + "gs://bigframes_blob_test/images/img1.jpg", + ], + "version": [None, None], + "authorizer": [bq_connection.casefold(), bq_connection.casefold()], + "details": [None, None], + } + ) + + pd.testing.assert_frame_equal( + pd_blob_df, expected_df, check_dtype=False, check_index_type=False + ) From a0a809112dc6ff1c9aaf91b5765273b8500ca83c Mon Sep 17 00:00:00 2001 From: Garrett Wu Date: Wed, 19 Feb 2025 21:57:56 +0000 Subject: [PATCH 2/2] fix pandas < 2.2 --- tests/system/small/blob/test_io.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/system/small/blob/test_io.py b/tests/system/small/blob/test_io.py index 1d843d6bc2..8ecb36ecc9 100644 --- a/tests/system/small/blob/test_io.py +++ b/tests/system/small/blob/test_io.py @@ -29,7 +29,7 @@ def test_blob_create_from_uri_str(bq_connection: str, session: bigframes.Session uri_series = bpd.Series(uris, session=session) blob_series = uri_series.str.to_blob(connection=bq_connection) - pd_blob_df = blob_series.to_pandas().struct.explode() + pd_blob_df = blob_series.struct.explode().to_pandas() expected_pd_df = pd.DataFrame( { "uri": uris, @@ -50,7 +50,7 @@ def test_blob_create_from_glob_path(bq_connection: str, session: bigframes.Sessi blob_df = session.from_glob_path( "gs://bigframes_blob_test/images/*", connection=bq_connection, name="blob_col" ) - pd_blob_df = blob_df["blob_col"].to_pandas().struct.explode() + pd_blob_df = blob_df["blob_col"].struct.explode().to_pandas() expected_df = pd.DataFrame( { "uri": [