diff --git a/bigframes/core/compile/compiler.py b/bigframes/core/compile/compiler.py index ed95dacf74..517c265cba 100644 --- a/bigframes/core/compile/compiler.py +++ b/bigframes/core/compile/compiler.py @@ -240,9 +240,7 @@ def compile_read_table_unordered( return compiled.UnorderedIR( ibis_table, tuple( - bigframes.core.compile.ibis_types.ibis_value_to_canonical_type( - ibis_table[scan_item.source_id].name(scan_item.id.sql) - ) + ibis_table[scan_item.source_id].name(scan_item.id.sql) for scan_item in scan.items ), ) diff --git a/bigframes/core/compile/ibis_types.py b/bigframes/core/compile/ibis_types.py index af2b7908ad..2dcc1b3c8a 100644 --- a/bigframes/core/compile/ibis_types.py +++ b/bigframes/core/compile/ibis_types.py @@ -113,7 +113,9 @@ def cast_ibis_value( Raises: TypeError: if the type cast cannot be executed""" - if value.type() == to_type: + # normalize to nullable, which doesn't impact compatibility + value_type = value.type().copy(nullable=True) + if value_type == to_type: return value # casts that just work # TODO(bmil): add to this as more casts are verified @@ -189,52 +191,39 @@ def cast_ibis_value( ibis_dtypes.multipolygon: (IBIS_GEO_TYPE,), } - value = ibis_value_to_canonical_type(value) - if value.type() in good_casts: - if to_type in good_casts[value.type()]: + if value_type in good_casts: + if to_type in good_casts[value_type]: return value.try_cast(to_type) if safe else value.cast(to_type) else: # this should never happen raise TypeError( - f"Unexpected value type {value.type()}. {constants.FEEDBACK_LINK}" + f"Unexpected value type {value_type}. {constants.FEEDBACK_LINK}" ) # casts that need some encouragement # BigQuery casts bools to lower case strings. Capitalize the result to match Pandas # TODO(bmil): remove this workaround after fixing Ibis - if value.type() == ibis_dtypes.bool and to_type == ibis_dtypes.string: + if value_type == ibis_dtypes.bool and to_type == ibis_dtypes.string: if safe: return cast(ibis_types.StringValue, value.try_cast(to_type)).capitalize() else: return cast(ibis_types.StringValue, value.cast(to_type)).capitalize() - if value.type() == ibis_dtypes.bool and to_type == ibis_dtypes.float64: + if value_type == ibis_dtypes.bool and to_type == ibis_dtypes.float64: if safe: return value.try_cast(ibis_dtypes.int64).try_cast(ibis_dtypes.float64) else: return value.cast(ibis_dtypes.int64).cast(ibis_dtypes.float64) - if value.type() == ibis_dtypes.float64 and to_type == ibis_dtypes.bool: + if value_type == ibis_dtypes.float64 and to_type == ibis_dtypes.bool: return value != ibis_types.literal(0) raise TypeError( - f"Unsupported cast {value.type()} to {to_type}. {constants.FEEDBACK_LINK}" + f"Unsupported cast {value_type} to {to_type}. {constants.FEEDBACK_LINK}" ) -def ibis_value_to_canonical_type(value: ibis_types.Value) -> ibis_types.Value: - """Converts an Ibis expression to canonical type. - - This is useful in cases where multiple types correspond to the same BigFrames dtype. - """ - ibis_type = value.type() - name = value.get_name() - # Allow REQUIRED fields to be joined with NULLABLE fields. - nullable_type = ibis_type.copy(nullable=True) - return value.cast(nullable_type).name(name) - - def bigframes_dtype_to_ibis_dtype( bigframes_dtype: bigframes.dtypes.Dtype, ) -> ibis_dtypes.DataType: diff --git a/third_party/bigframes_vendored/ibis/expr/operations/relations.py b/third_party/bigframes_vendored/ibis/expr/operations/relations.py index fccd4cbf58..ef45fdfc0d 100644 --- a/third_party/bigframes_vendored/ibis/expr/operations/relations.py +++ b/third_party/bigframes_vendored/ibis/expr/operations/relations.py @@ -10,11 +10,7 @@ from typing import Annotated, Any, Literal, Optional, TypeVar from bigframes_vendored.ibis.common.annotations import attribute -from bigframes_vendored.ibis.common.collections import ( - ConflictingValuesError, - FrozenDict, - FrozenOrderedDict, -) +from bigframes_vendored.ibis.common.collections import FrozenDict, FrozenOrderedDict from bigframes_vendored.ibis.common.exceptions import ( IbisTypeError, IntegrityError, @@ -342,20 +338,6 @@ class Set(Relation): values = FrozenOrderedDict() def __init__(self, left, right, **kwargs): - err_msg = "Table schemas must be equal for set operations." - try: - missing_from_left = right.schema - left.schema - missing_from_right = left.schema - right.schema - except ConflictingValuesError as e: - raise RelationError(err_msg + "\n" + str(e)) from e - if missing_from_left or missing_from_right: - msgs = [err_msg] - if missing_from_left: - msgs.append(f"Columns missing from the left:\n{missing_from_left}.") - if missing_from_right: - msgs.append(f"Columns missing from the right:\n{missing_from_right}.") - raise RelationError("\n".join(msgs)) - if left.schema.names != right.schema.names: # rewrite so that both sides have the columns in the same order making it # easier for the backends to implement set operations @@ -365,7 +347,15 @@ def __init__(self, left, right, **kwargs): @attribute def schema(self): - return self.left.schema + dtypes = ( + dt.higher_precedence(ltype, rtype) + for ltype, rtype in zip( + self.left.schema.values(), self.right.schema.values() + ) + ) + return Schema.from_tuples( + (name, coltype) for name, coltype in zip(self.left.schema.names, dtypes) + ) @public