From 4741cf1d17243cf7bfc6a1ca65a90ad4e3dd3447 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Thu, 7 Sep 2017 13:26:37 -0700 Subject: [PATCH] BigQuery : add sample for writing query results to a destination table. See: https://cloud.google.com/bigquery/docs/writing-results --- bigquery/cloud-client/query.py | 33 +++++++++++++++++++++++++++++ bigquery/cloud-client/query_test.py | 20 +++++++++++++++++ 2 files changed, 53 insertions(+) diff --git a/bigquery/cloud-client/query.py b/bigquery/cloud-client/query.py index f01f912cce6..93b13b84873 100755 --- a/bigquery/cloud-client/query.py +++ b/bigquery/cloud-client/query.py @@ -63,6 +63,30 @@ def query_standard_sql(query): print(row) +def query_destination_table(query, dest_dataset_id, dest_table_id): + client = bigquery.Client() + query_job = client.run_async_query(str(uuid.uuid4()), query) + + # Allow for query results larger than the maximum response size. + query_job.allow_large_results = True + + # When large results are allowed, a destination table must be set. + dest_dataset = client.dataset(dest_dataset_id) + dest_table = dest_dataset.table(dest_table_id) + query_job.destination = dest_table + + # Allow the results table to be overwritten. + query_job.write_disposition = 'WRITE_TRUNCATE' + + query_job.begin() + query_job.result() # Wait for job to complete. + + # Verify that the results were written to the destination table. + dest_table.reload() # Get the table metadata, such as the schema. + for row in dest_table.fetch_data(): + print(row) + + if __name__ == '__main__': parser = argparse.ArgumentParser( description=__doc__, @@ -72,10 +96,19 @@ def query_standard_sql(query): '--use_standard_sql', action='store_true', help='Use standard SQL syntax.') + parser.add_argument( + '--destination_table', + type=str, + help=( + 'Destination table to use for results. ' + 'Example: my_dataset.my_table')) args = parser.parse_args() if args.use_standard_sql: query_standard_sql(args.query) + elif args.destination_table: + dataset, table = args.destination_table.split('.') + query_destination_table(args.query, dataset, table) else: query(args.query) diff --git a/bigquery/cloud-client/query_test.py b/bigquery/cloud-client/query_test.py index fa698e146a8..9d6c912b4e5 100644 --- a/bigquery/cloud-client/query_test.py +++ b/bigquery/cloud-client/query_test.py @@ -15,6 +15,10 @@ import query +DATASET_ID = 'test_dataset' +TABLE_ID = 'test_destination_table' + + def test_query(capsys): # Query only outputs the first 10 rows, sort results to avoid randomness query_string = '''#standardSQL @@ -44,3 +48,19 @@ def test_query_standard_sql(capsys): out, _ = capsys.readouterr() assert 'antonyandcleopatra' in out + + +def test_query_destination_table(capsys): + # Query only outputs the first 10 rows, sort results to avoid randomness + query_string = '''#standardSQL + SELECT corpus + FROM `publicdata.samples.shakespeare` + GROUP BY corpus + ORDER BY corpus + LIMIT 10;''' + + query.query_destination_table(query_string, DATASET_ID, TABLE_ID) + + out, _ = capsys.readouterr() + + assert 'antonyandcleopatra' in out