Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings
This repository was archived by the owner on May 7, 2026. It is now read-only.

Commit 494a0a1

Browse filesBrowse files
authored
feat: support full round-trip persistence for multimodal reference cols (#2511)
Saves multimodal metadata descriptions on .to_gbq(). Fixes #<452681068> 🦕
1 parent 3ddd7eb commit 494a0a1
Copy full SHA for 494a0a1

4 files changed

+56-5Lines changed: 56 additions & 5 deletions

File tree

Expand file treeCollapse file tree
Open diff view settings
Filter options
Expand file treeCollapse file tree
Open diff view settings
Collapse file

‎bigframes/dtypes.py‎

Copy file name to clipboardExpand all lines: bigframes/dtypes.py
+21-1Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -772,6 +772,13 @@ def convert_schema_field(
772772
) -> typing.Tuple[str, Dtype]:
773773
is_repeated = field.mode == "REPEATED"
774774
if field.field_type == "RECORD":
775+
if field.description == OBJ_REF_DESCRIPTION_TAG:
776+
bf_dtype = OBJ_REF_DTYPE # type: ignore
777+
if is_repeated:
778+
pa_type = pa.list_(bigframes_dtype_to_arrow_dtype(bf_dtype))
779+
bf_dtype = pd.ArrowDtype(pa_type)
780+
return field.name, bf_dtype
781+
775782
mapped_fields = map(convert_schema_field, field.fields)
776783
fields = []
777784
for name, dtype in mapped_fields:
@@ -815,7 +822,11 @@ def convert_to_schema_field(
815822
)
816823
inner_field = convert_to_schema_field(name, inner_type, overrides)
817824
return google.cloud.bigquery.SchemaField(
818-
name, inner_field.field_type, mode="REPEATED", fields=inner_field.fields
825+
name,
826+
inner_field.field_type,
827+
mode="REPEATED",
828+
fields=inner_field.fields,
829+
description=inner_field.description,
819830
)
820831
if pa.types.is_struct(bigframes_dtype.pyarrow_dtype):
821832
inner_fields: list[google.cloud.bigquery.SchemaField] = []
@@ -827,6 +838,14 @@ def convert_to_schema_field(
827838
convert_to_schema_field(field.name, inner_bf_type, overrides)
828839
)
829840

841+
if bigframes_dtype == OBJ_REF_DTYPE:
842+
return google.cloud.bigquery.SchemaField(
843+
name,
844+
"RECORD",
845+
fields=inner_fields,
846+
description=OBJ_REF_DESCRIPTION_TAG,
847+
)
848+
830849
return google.cloud.bigquery.SchemaField(
831850
name, "RECORD", fields=inner_fields
832851
)
@@ -971,6 +990,7 @@ def lcd_type_or_throw(dtype1: Dtype, dtype2: Dtype) -> Dtype:
971990

972991

973992
TIMEDELTA_DESCRIPTION_TAG = "#microseconds"
993+
OBJ_REF_DESCRIPTION_TAG = "bigframes_dtype: OBJ_REF_DTYPE"
974994

975995

976996
def contains_db_dtypes_json_arrow_type(type_):
Collapse file

‎bigframes/session/bq_caching_executor.py‎

Copy file name to clipboardExpand all lines: bigframes/session/bq_caching_executor.py
+5-4Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -334,13 +334,14 @@ def _export_gbq(
334334
session=array_value.session,
335335
)
336336

337-
has_timedelta_col = any(
338-
t == bigframes.dtypes.TIMEDELTA_DTYPE for t in array_value.schema.dtypes
337+
has_special_dtype_col = any(
338+
t in (bigframes.dtypes.TIMEDELTA_DTYPE, bigframes.dtypes.OBJ_REF_DTYPE)
339+
for t in array_value.schema.dtypes
339340
)
340341

341-
if spec.if_exists != "append" and has_timedelta_col:
342+
if spec.if_exists != "append" and has_special_dtype_col:
342343
# Only update schema if this is not modifying an existing table, and the
343-
# new table contains timedelta columns.
344+
# new table contains special columns (like timedelta or obj_ref).
344345
table = self.bqclient.get_table(spec.table)
345346
table.schema = array_value.schema.to_bigquery()
346347
self.bqclient.update_table(table, ["schema"])
Collapse file

‎tests/system/small/test_dataframe_io.py‎

Copy file name to clipboardExpand all lines: tests/system/small/test_dataframe_io.py
+22Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1002,6 +1002,28 @@ def test_to_gbq_timedelta_tag_ignored_when_appending(bigquery_client, dataset_id
10021002
assert table.schema[0].description is None
10031003

10041004

1005+
def test_to_gbq_obj_ref(session, dataset_id: str, bigquery_client):
1006+
destination_table = f"{dataset_id}.test_to_gbq_obj_ref"
1007+
sql = """
1008+
SELECT
1009+
'gs://cloud-samples-data/vision/ocr/sign.jpg' AS uri_col
1010+
"""
1011+
df = session.read_gbq(sql)
1012+
df["obj_ref_col"] = df["uri_col"].str.to_blob()
1013+
df = df.drop(columns=["uri_col"])
1014+
1015+
df.to_gbq(destination_table)
1016+
1017+
table = bigquery_client.get_table(destination_table)
1018+
obj_ref_field = next(f for f in table.schema if f.name == "obj_ref_col")
1019+
assert obj_ref_field.field_type == "RECORD"
1020+
assert obj_ref_field.description == "bigframes_dtype: OBJ_REF_DTYPE"
1021+
1022+
reloaded_df = session.read_gbq(destination_table)
1023+
assert reloaded_df["obj_ref_col"].dtype == dtypes.OBJ_REF_DTYPE
1024+
assert len(reloaded_df) == 1
1025+
1026+
10051027
@pytest.mark.parametrize(
10061028
("index"),
10071029
[True, False],
Collapse file

‎tests/unit/test_dtypes.py‎

Copy file name to clipboardExpand all lines: tests/unit/test_dtypes.py
+8Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,3 +71,11 @@ def test_infer_literal_type_arrow_scalar(scalar, expected_dtype):
7171
)
7272
def test_contains_db_dtypes_json_arrow_type(type_, expected):
7373
assert bigframes.dtypes.contains_db_dtypes_json_arrow_type(type_) == expected
74+
75+
76+
def test_convert_to_schema_field_list_description():
77+
bf_dtype = bigframes.dtypes.OBJ_REF_DTYPE
78+
list_bf_dtype = bigframes.dtypes.list_type(bf_dtype)
79+
field = bigframes.dtypes.convert_to_schema_field("my_list", list_bf_dtype)
80+
assert field.description == "bigframes_dtype: OBJ_REF_DTYPE"
81+
assert field.mode == "REPEATED"

0 commit comments

Comments
0 (0)
Morty Proxy This is a proxified and sanitized view of the page, visit original site.