From 3b04019a9894cb7c4a15a1c405ae3d7fbd9b8ec6 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Wed, 31 Mar 2021 16:10:28 -0500 Subject: [PATCH 1/3] fix: avoid 404 if dataset is deleted while listing tables or views --- pybigquery/sqlalchemy_bigquery.py | 16 ++++++++++++---- tests/unit/test_sqlalchemy_bigquery.py | 13 +++++++++++++ 2 files changed, 25 insertions(+), 4 deletions(-) create mode 100644 tests/unit/test_sqlalchemy_bigquery.py diff --git a/pybigquery/sqlalchemy_bigquery.py b/pybigquery/sqlalchemy_bigquery.py index ff83f319..b889463b 100644 --- a/pybigquery/sqlalchemy_bigquery.py +++ b/pybigquery/sqlalchemy_bigquery.py @@ -25,6 +25,7 @@ import operator from google import auth +import google.api_core.exceptions from google.cloud import bigquery from google.cloud.bigquery import dbapi from google.cloud.bigquery.schema import SchemaField @@ -434,10 +435,17 @@ def _get_table_or_view_names(self, connection, table_type, schema=None): if current_schema is not None and current_schema != dataset.dataset_id: continue - tables = client.list_tables(dataset.reference) - for table in tables: - if table_type == table.table_type: - result.append(get_table_name(table)) + try: + tables = client.list_tables(dataset.reference) + for table in tables: + if table_type == table.table_type: + result.append(get_table_name(table)) + except google.api_core.exceptions.NotFound: + # It's possible that the dataset was deleted between when we + # fetched the list of datasets and when we try to list the + # tables from it. See: + # https://github.com/googleapis/python-bigquery-sqlalchemy/issues/105 + pass return result @staticmethod diff --git a/tests/unit/test_sqlalchemy_bigquery.py b/tests/unit/test_sqlalchemy_bigquery.py new file mode 100644 index 00000000..2595164f --- /dev/null +++ b/tests/unit/test_sqlalchemy_bigquery.py @@ -0,0 +1,13 @@ +# Copyright 2021 The PyBigQuery Authors +# +# Use of this source code is governed by an MIT-style +# license that can be found in the LICENSE file or at +# https://opensource.org/licenses/MIT. + + +def test_get_table_names(): + assert False + + +def test_get_view_names(): + assert False From 2244efc78b6100e612fa53046f22093f4636bea4 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Wed, 31 Mar 2021 17:33:11 -0500 Subject: [PATCH 2/3] add unit tests --- tests/unit/test_sqlalchemy_bigquery.py | 134 ++++++++++++++++++++++++- 1 file changed, 130 insertions(+), 4 deletions(-) diff --git a/tests/unit/test_sqlalchemy_bigquery.py b/tests/unit/test_sqlalchemy_bigquery.py index 2595164f..dc65d513 100644 --- a/tests/unit/test_sqlalchemy_bigquery.py +++ b/tests/unit/test_sqlalchemy_bigquery.py @@ -4,10 +4,136 @@ # license that can be found in the LICENSE file or at # https://opensource.org/licenses/MIT. +from unittest import mock -def test_get_table_names(): - assert False +import google.api_core.exceptions +from google.cloud import bigquery +from google.cloud.bigquery.dataset import DatasetListItem +from google.cloud.bigquery.table import TableListItem +import pytest +import sqlalchemy -def test_get_view_names(): - assert False +@pytest.fixture +def mock_bigquery_client(): + return mock.create_autospec(bigquery.Client, instance=True) + + +@pytest.fixture +def mock_connection(monkeypatch, mock_bigquery_client): + from pybigquery import sqlalchemy_bigquery + + def mock_connect_args(*args, **kwargs): + return ([mock_bigquery_client], {}) + + monkeypatch.setattr( + sqlalchemy_bigquery.BigQueryDialect, "create_connect_args", mock_connect_args + ) + + +@pytest.fixture +def engine_under_test(mock_connection): + return sqlalchemy.create_engine("bigquery://") + + +@pytest.fixture +def inspector_under_test(engine_under_test): + from sqlalchemy.engine.reflection import Inspector + + return Inspector.from_engine(engine_under_test) + + +def dataset_item(dataset_id): + return DatasetListItem( + {"datasetReference": {"projectId": "some-project-id", "datasetId": dataset_id}} + ) + + +def table_item(dataset_id, table_id, type_="TABLE"): + return TableListItem( + { + "type": type_, + "tableReference": { + "projectId": "some-project-id", + "datasetId": dataset_id, + "tableId": table_id, + }, + } + ) + + +@pytest.mark.parametrize( + ["datasets_list", "tables_lists", "expected"], + [ + ([], [], []), + ([dataset_item("dataset_1")], [[]], []), + ( + [dataset_item("dataset_1"), dataset_item("dataset_2")], + [ + [table_item("dataset_1", "d1t1"), table_item("dataset_1", "d1t2")], + [ + table_item("dataset_2", "d2t1"), + table_item("dataset_2", "d2view", type_="VIEW"), + ], + ], + ["dataset_1.d1t1", "dataset_1.d1t2", "dataset_2.d2t1"], + ), + ( + [dataset_item("dataset_1"), dataset_item("dataset_deleted")], + [ + [table_item("dataset_1", "d1t1")], + google.api_core.exceptions.NotFound("dataset_deleted"), + ], + ["dataset_1.d1t1"], + ), + ], +) +def test_get_table_names( + engine_under_test, mock_bigquery_client, datasets_list, tables_lists, expected +): + mock_bigquery_client.list_datasets.return_value = datasets_list + mock_bigquery_client.list_tables.side_effect = tables_lists + table_names = engine_under_test.table_names() + mock_bigquery_client.list_datasets.assert_called_once() + assert mock_bigquery_client.list_tables.call_count == len(datasets_list) + assert list(sorted(table_names)) == list(sorted(expected)) + + +@pytest.mark.parametrize( + ["datasets_list", "tables_lists", "expected"], + [ + ([], [], []), + ([dataset_item("dataset_1")], [[]], []), + ( + [dataset_item("dataset_1"), dataset_item("dataset_2")], + [ + [ + table_item("dataset_1", "d1t1"), + table_item("dataset_1", "d1view", type_="VIEW"), + ], + [ + table_item("dataset_2", "d2t1"), + table_item("dataset_2", "d2view", type_="VIEW"), + ], + ], + ["dataset_1.d1view", "dataset_2.d2view"], + ), + ( + [dataset_item("dataset_1"), dataset_item("dataset_deleted")], + [ + [table_item("dataset_1", "d1view", type_="VIEW")], + google.api_core.exceptions.NotFound("dataset_deleted"), + ], + ["dataset_1.d1view"], + ), + ], +) +def test_get_view_names( + inspector_under_test, mock_bigquery_client, datasets_list, tables_lists, expected +): + mock_bigquery_client.list_datasets.return_value = datasets_list + mock_bigquery_client.list_tables.side_effect = tables_lists + view_names = inspector_under_test.get_view_names() + mock_bigquery_client.list_datasets.assert_called_once() + assert mock_bigquery_client.list_tables.call_count == len(datasets_list) + assert list(sorted(view_names)) == list(sorted(expected)) From 644406325fb5f6e2e6fa73d38af7d740713921c3 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Thu, 1 Apr 2021 09:33:48 -0500 Subject: [PATCH 3/3] remove unnecessary bigquery import --- pybigquery/sqlalchemy_bigquery.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pybigquery/sqlalchemy_bigquery.py b/pybigquery/sqlalchemy_bigquery.py index 789540bc..c73adea9 100644 --- a/pybigquery/sqlalchemy_bigquery.py +++ b/pybigquery/sqlalchemy_bigquery.py @@ -26,7 +26,6 @@ from google import auth import google.api_core.exceptions -from google.cloud import bigquery from google.cloud.bigquery import dbapi from google.cloud.bigquery.schema import SchemaField from google.cloud.bigquery.table import TableReference