Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings
This repository was archived by the owner on May 7, 2026. It is now read-only.

Commit 1d81b41

Browse filesBrowse files
authored
fix: allow IsInOp with same dtypes regardless nullable (#2466)
- Update Ibis isin_op_impl to compare types by name, allowing comparisons between columns and literals with different nullability. - Update SQLGlot IsInOp implementation to use dtypes.can_compare for more robust type compatibility checking. - Improve dtypes.can_compare to gracefully handle type coercion failures. - Migrate TPCH verification script to tests/system/large/test_tpch.py for better integration with the test suite. Fixes 485642936 🦕
1 parent 6306478 commit 1d81b41
Copy full SHA for 1d81b41

5 files changed

+108-137Lines changed: 108 additions & 137 deletions

File tree

Expand file treeCollapse file tree
Open diff view settings
Filter options
Expand file treeCollapse file tree
Open diff view settings
Collapse file

‎bigframes/core/compile/ibis_compiler/scalar_op_registry.py‎

Copy file name to clipboardExpand all lines: bigframes/core/compile/ibis_compiler/scalar_op_registry.py
+1-1Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -962,7 +962,7 @@ def isin_op_impl(x: ibis_types.Value, op: ops.IsInOp):
962962
# to actually cast it, as that could be lossy (eg float -> int)
963963
item_inferred_type = ibis_types.literal(item).type()
964964
if (
965-
x.type() == item_inferred_type
965+
x.type().name == item_inferred_type.name
966966
or x.type().is_numeric()
967967
and item_inferred_type.is_numeric()
968968
):
Collapse file

‎bigframes/core/compile/sqlglot/expressions/comparison_ops.py‎

Copy file name to clipboardExpand all lines: bigframes/core/compile/sqlglot/expressions/comparison_ops.py
+1-6Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -33,16 +33,11 @@
3333
@register_unary_op(ops.IsInOp, pass_op=True)
3434
def _(expr: TypedExpr, op: ops.IsInOp) -> sge.Expression:
3535
values = []
36-
is_numeric_expr = dtypes.is_numeric(expr.dtype, include_bool=False)
3736
for value in op.values:
3837
if _is_null(value):
3938
continue
4039
dtype = dtypes.bigframes_type(type(value))
41-
if (
42-
expr.dtype == dtype
43-
or is_numeric_expr
44-
and dtypes.is_numeric(dtype, include_bool=False)
45-
):
40+
if dtypes.can_compare(expr.dtype, dtype):
4641
values.append(sge.convert(value))
4742

4843
if op.match_nulls:
Collapse file

‎bigframes/dtypes.py‎

Copy file name to clipboardExpand all lines: bigframes/dtypes.py
+5-2Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -370,8 +370,11 @@ def is_comparable(type_: ExpressionType) -> bool:
370370

371371

372372
def can_compare(type1: ExpressionType, type2: ExpressionType) -> bool:
373-
coerced_type = coerce_to_common(type1, type2)
374-
return is_comparable(coerced_type)
373+
try:
374+
coerced_type = coerce_to_common(type1, type2)
375+
return is_comparable(coerced_type)
376+
except TypeError:
377+
return False
375378

376379

377380
def get_struct_fields(type_: ExpressionType) -> dict[str, Dtype]:
Collapse file

‎scripts/tpch_result_verify.py‎

Copy file name to clipboardExpand all lines: scripts/tpch_result_verify.py
-128Lines changed: 0 additions & 128 deletions
This file was deleted.
Collapse file

‎tests/system/large/test_tpch.py‎

Copy file name to clipboard
+101Lines changed: 101 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,101 @@
1+
# Copyright 2026 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
import os
16+
import re
17+
18+
from google.cloud import bigquery
19+
import pandas as pd
20+
import pytest
21+
22+
TPCH_PATH = "third_party/bigframes_vendored/tpch"
23+
PROJECT_ID = "bigframes-dev-perf"
24+
DATASET_ID = "tpch_0001g"
25+
DATASET = {
26+
"line_item_ds": f"{PROJECT_ID}.{DATASET_ID}.LINEITEM",
27+
"region_ds": f"{PROJECT_ID}.{DATASET_ID}.REGION",
28+
"nation_ds": f"{PROJECT_ID}.{DATASET_ID}.NATION",
29+
"supplier_ds": f"{PROJECT_ID}.{DATASET_ID}.SUPPLIER",
30+
"part_ds": f"{PROJECT_ID}.{DATASET_ID}.PART",
31+
"part_supp_ds": f"{PROJECT_ID}.{DATASET_ID}.PARTSUPP",
32+
"customer_ds": f"{PROJECT_ID}.{DATASET_ID}.CUSTOMER",
33+
"orders_ds": f"{PROJECT_ID}.{DATASET_ID}.ORDERS",
34+
}
35+
36+
37+
def _execute_sql_query(bigquery_client, sql_query):
38+
sql_query = sql_query.format(**DATASET)
39+
40+
job_config = bigquery.QueryJobConfig(use_query_cache=False)
41+
query_job = bigquery_client.query(sql_query, job_config=job_config)
42+
query_job.result()
43+
df = query_job.to_dataframe()
44+
df.columns = df.columns.str.upper()
45+
return df
46+
47+
48+
def _execute_bigframes_script(session, bigframes_script):
49+
bigframes_script = re.sub(
50+
r"next\((\w+)\.to_pandas_batches\((.*?)\)\)",
51+
r"return \1.to_pandas()",
52+
bigframes_script,
53+
)
54+
bigframes_script = re.sub(r"_\s*=\s*(\w+)", r"return \1", bigframes_script)
55+
56+
bigframes_script = (
57+
bigframes_script
58+
+ f"\nresult = q('{PROJECT_ID}', '{DATASET_ID}', _initialize_session)"
59+
)
60+
exec_globals = {"_initialize_session": session}
61+
exec(bigframes_script, exec_globals)
62+
bigframes_result = exec_globals.get("result")
63+
return bigframes_result
64+
65+
66+
def _verify_result(bigframes_result, sql_result):
67+
if isinstance(bigframes_result, pd.DataFrame):
68+
pd.testing.assert_frame_equal(
69+
sql_result.reset_index(drop=True),
70+
bigframes_result.reset_index(drop=True),
71+
check_dtype=False,
72+
)
73+
else:
74+
assert sql_result.shape == (1, 1)
75+
sql_scalar = sql_result.iloc[0, 0]
76+
assert sql_scalar == bigframes_result
77+
78+
79+
@pytest.mark.parametrize("query_num", range(1, 23))
80+
@pytest.mark.parametrize("ordered", [True, False])
81+
def test_tpch_correctness(session, unordered_session, query_num, ordered):
82+
"""Runs verification of TPCH benchmark script outputs to ensure correctness."""
83+
# Execute SQL:
84+
sql_file_path = f"{TPCH_PATH}/sql_queries/q{query_num}.sql"
85+
assert os.path.exists(sql_file_path)
86+
with open(sql_file_path, "r") as f:
87+
sql_query = f.read()
88+
89+
sql_result = _execute_sql_query(session.bqclient, sql_query)
90+
91+
# Execute BigFrames:
92+
file_path = f"{TPCH_PATH}/queries/q{query_num}.py"
93+
assert os.path.exists(file_path)
94+
with open(file_path, "r") as file:
95+
bigframes_script = file.read()
96+
97+
bigframes_result = _execute_bigframes_script(
98+
session if ordered else unordered_session, bigframes_script
99+
)
100+
101+
_verify_result(bigframes_result, sql_result)

0 commit comments

Comments
0 (0)
Morty Proxy This is a proxified and sanitized view of the page, visit original site.