diff --git a/bigframes/session/__init__.py b/bigframes/session/__init__.py index 40831292de..4cb3c11859 100644 --- a/bigframes/session/__init__.py +++ b/bigframes/session/__init__.py @@ -318,6 +318,7 @@ def _to_query( valid_operators: Mapping[third_party_pandas_gbq.FilterOps, str] = { "in": "IN", "not in": "NOT IN", + "LIKE": "LIKE", "==": "=", ">": ">", "<": "<", diff --git a/tests/system/small/test_session.py b/tests/system/small/test_session.py index aba4a52c43..d0cd24e2be 100644 --- a/tests/system/small/test_session.py +++ b/tests/system/small/test_session.py @@ -327,6 +327,18 @@ def test_read_gbq_twice_with_same_timestamp(session, penguins_table_id): assert df3 is not None +def test_read_gbq_table_clustered_with_filter(session: bigframes.Session): + df = session.read_gbq_table( + "bigquery-public-data.cloud_storage_geo_index.landsat_index", + filters=[[("sensor_id", "LIKE", "OLI%")], [("sensor_id", "LIKE", "%TIRS")]], # type: ignore + columns=["sensor_id"], + ) + sensors = df.groupby(["sensor_id"]).agg("count").to_pandas(ordered=False) + assert "OLI" in sensors.index + assert "TIRS" in sensors.index + assert "OLI_TIRS" in sensors.index + + def test_read_gbq_wildcard(session: bigframes.Session): df = session.read_gbq("bigquery-public-data.noaa_gsod.gsod193*") assert df.shape == (348485, 32) diff --git a/third_party/bigframes_vendored/pandas/io/gbq.py b/third_party/bigframes_vendored/pandas/io/gbq.py index 1f31c530d2..74602b5af1 100644 --- a/third_party/bigframes_vendored/pandas/io/gbq.py +++ b/third_party/bigframes_vendored/pandas/io/gbq.py @@ -7,7 +7,7 @@ from bigframes import constants -FilterOps = Literal["in", "not in", "<", "<=", "==", "!=", ">=", ">"] +FilterOps = Literal["in", "not in", "<", "<=", "==", "!=", ">=", ">", "LIKE"] FilterType = Tuple[str, FilterOps, Any] FiltersType = Union[Iterable[FilterType], Iterable[Iterable[FilterType]]] @@ -112,7 +112,7 @@ def read_gbq( query results. filters (Union[Iterable[FilterType], Iterable[Iterable[FilterType]]], default ()): To filter out data. Filter syntax: [[(column, op, val), …],…] where - op is [==, >, >=, <, <=, !=, in, not in]. The innermost tuples + op is [==, >, >=, <, <=, !=, in, not in, LIKE]. The innermost tuples are transposed into a set of filters applied through an AND operation. The outer Iterable combines these sets of filters through an OR operation. A single Iterable of tuples can also