From d49db2aea0715e8bf81d420491c2e71863d6c8b5 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Mon, 18 Mar 2024 15:46:33 +0000 Subject: [PATCH 1/3] feat: `read_gbq_table` supports `LIKE` as a operator in `filters` --- bigframes/session/__init__.py | 1 + tests/system/small/test_session.py | 12 ++++++++++++ third_party/bigframes_vendored/pandas/io/gbq.py | 2 +- 3 files changed, 14 insertions(+), 1 deletion(-) diff --git a/bigframes/session/__init__.py b/bigframes/session/__init__.py index 40831292de..4cb3c11859 100644 --- a/bigframes/session/__init__.py +++ b/bigframes/session/__init__.py @@ -318,6 +318,7 @@ def _to_query( valid_operators: Mapping[third_party_pandas_gbq.FilterOps, str] = { "in": "IN", "not in": "NOT IN", + "LIKE": "LIKE", "==": "=", ">": ">", "<": "<", diff --git a/tests/system/small/test_session.py b/tests/system/small/test_session.py index aba4a52c43..d0cd24e2be 100644 --- a/tests/system/small/test_session.py +++ b/tests/system/small/test_session.py @@ -327,6 +327,18 @@ def test_read_gbq_twice_with_same_timestamp(session, penguins_table_id): assert df3 is not None +def test_read_gbq_table_clustered_with_filter(session: bigframes.Session): + df = session.read_gbq_table( + "bigquery-public-data.cloud_storage_geo_index.landsat_index", + filters=[[("sensor_id", "LIKE", "OLI%")], [("sensor_id", "LIKE", "%TIRS")]], # type: ignore + columns=["sensor_id"], + ) + sensors = df.groupby(["sensor_id"]).agg("count").to_pandas(ordered=False) + assert "OLI" in sensors.index + assert "TIRS" in sensors.index + assert "OLI_TIRS" in sensors.index + + def test_read_gbq_wildcard(session: bigframes.Session): df = session.read_gbq("bigquery-public-data.noaa_gsod.gsod193*") assert df.shape == (348485, 32) diff --git a/third_party/bigframes_vendored/pandas/io/gbq.py b/third_party/bigframes_vendored/pandas/io/gbq.py index 1f31c530d2..e52c71da76 100644 --- a/third_party/bigframes_vendored/pandas/io/gbq.py +++ b/third_party/bigframes_vendored/pandas/io/gbq.py @@ -112,7 +112,7 @@ def read_gbq( query results. filters (Union[Iterable[FilterType], Iterable[Iterable[FilterType]]], default ()): To filter out data. Filter syntax: [[(column, op, val), …],…] where - op is [==, >, >=, <, <=, !=, in, not in]. The innermost tuples + op is [==, >, >=, <, <=, !=, in, not in, LIKE]. The innermost tuples are transposed into a set of filters applied through an AND operation. The outer Iterable combines these sets of filters through an OR operation. A single Iterable of tuples can also From ba18eb18b5dda87033663af2f2251f1eb6ea65ea Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Mon, 18 Mar 2024 19:11:44 +0000 Subject: [PATCH 2/3] mypy error --- bigframes/session/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bigframes/session/__init__.py b/bigframes/session/__init__.py index 4cb3c11859..d908e09e5a 100644 --- a/bigframes/session/__init__.py +++ b/bigframes/session/__init__.py @@ -318,7 +318,7 @@ def _to_query( valid_operators: Mapping[third_party_pandas_gbq.FilterOps, str] = { "in": "IN", "not in": "NOT IN", - "LIKE": "LIKE", + "LIKE": "LIKE", # type: ignore "==": "=", ">": ">", "<": "<", From 6fbbbbc21bdb7c9b295be3a73105b4aca8208942 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Mon, 18 Mar 2024 19:17:04 +0000 Subject: [PATCH 3/3] mypy again --- bigframes/session/__init__.py | 2 +- third_party/bigframes_vendored/pandas/io/gbq.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/bigframes/session/__init__.py b/bigframes/session/__init__.py index d908e09e5a..4cb3c11859 100644 --- a/bigframes/session/__init__.py +++ b/bigframes/session/__init__.py @@ -318,7 +318,7 @@ def _to_query( valid_operators: Mapping[third_party_pandas_gbq.FilterOps, str] = { "in": "IN", "not in": "NOT IN", - "LIKE": "LIKE", # type: ignore + "LIKE": "LIKE", "==": "=", ">": ">", "<": "<", diff --git a/third_party/bigframes_vendored/pandas/io/gbq.py b/third_party/bigframes_vendored/pandas/io/gbq.py index e52c71da76..74602b5af1 100644 --- a/third_party/bigframes_vendored/pandas/io/gbq.py +++ b/third_party/bigframes_vendored/pandas/io/gbq.py @@ -7,7 +7,7 @@ from bigframes import constants -FilterOps = Literal["in", "not in", "<", "<=", "==", "!=", ">=", ">"] +FilterOps = Literal["in", "not in", "<", "<=", "==", "!=", ">=", ">", "LIKE"] FilterType = Tuple[str, FilterOps, Any] FiltersType = Union[Iterable[FilterType], Iterable[Iterable[FilterType]]]