Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

Commit c6b3914

Browse filesBrowse files
authored
BigQuery: Moves BigQuery tutorial for Dataproc to python-docs-samples (GoogleCloudPlatform#1494)
1 parent 509e3f2 commit c6b3914
Copy full SHA for c6b3914

File tree

Expand file treeCollapse file tree

2 files changed

+123
-0
lines changed
Filter options
Expand file treeCollapse file tree

2 files changed

+123
-0
lines changed
+81Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,81 @@
1+
#!/usr/bin/env python
2+
3+
# Copyright 2018 Google Inc. All Rights Reserved.
4+
#
5+
# Licensed under the Apache License, Version 2.0 (the "License");
6+
# you may not use this file except in compliance with the License.
7+
# You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing, software
12+
# distributed under the License is distributed on an "AS IS" BASIS,
13+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
# See the License for the specific language governing permissions and
15+
# limitations under the License.
16+
17+
18+
def run_natality_tutorial():
19+
# [START bigquery_query_natality_tutorial]
20+
"""Create a Google BigQuery linear regression input table.
21+
22+
In the code below, the following actions are taken:
23+
* A new dataset is created "natality_regression."
24+
* A query is run against the public dataset,
25+
bigquery-public-data.samples.natality, selecting only the data of
26+
interest to the regression, the output of which is stored in a new
27+
"regression_input" table.
28+
* The output table is moved over the wire to the user's default project via
29+
the built-in BigQuery Connector for Spark that bridges BigQuery and
30+
Cloud Dataproc.
31+
"""
32+
33+
from google.cloud import bigquery
34+
35+
# Create a new Google BigQuery client using Google Cloud Platform project
36+
# defaults.
37+
client = bigquery.Client()
38+
39+
# Prepare a reference to a new dataset for storing the query results.
40+
dataset_ref = client.dataset('natality_regression')
41+
dataset = bigquery.Dataset(dataset_ref)
42+
43+
# Create the new BigQuery dataset.
44+
dataset = client.create_dataset(dataset)
45+
46+
# In the new BigQuery dataset, create a reference to a new table for
47+
# storing the query results.
48+
table_ref = dataset.table('regression_input')
49+
50+
# Configure the query job.
51+
job_config = bigquery.QueryJobConfig()
52+
53+
# Set the destination table to the table reference created above.
54+
job_config.destination = table_ref
55+
56+
# Set up a query in Standard SQL, which is the default for the BigQuery
57+
# Python client library.
58+
# The query selects the fields of interest.
59+
query = """
60+
SELECT
61+
weight_pounds, mother_age, father_age, gestation_weeks,
62+
weight_gain_pounds, apgar_5min
63+
FROM
64+
`bigquery-public-data.samples.natality`
65+
WHERE
66+
weight_pounds IS NOT NULL
67+
AND mother_age IS NOT NULL
68+
AND father_age IS NOT NULL
69+
AND gestation_weeks IS NOT NULL
70+
AND weight_gain_pounds IS NOT NULL
71+
AND apgar_5min IS NOT NULL
72+
"""
73+
74+
# Run the query.
75+
query_job = client.query(query, job_config=job_config)
76+
query_job.result() # Waits for the query to finish
77+
# [END bigquery_query_natality_tutorial]
78+
79+
80+
if __name__ == '__main__':
81+
run_natality_tutorial()
+42Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
# Copyright 2018 Google Inc. All Rights Reserved.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
from google.cloud import bigquery
16+
from google.cloud import exceptions
17+
18+
import natality_tutorial
19+
20+
21+
def dataset_exists(dataset, client):
22+
try:
23+
client.get_dataset(dataset)
24+
return True
25+
except exceptions.NotFound:
26+
return False
27+
28+
29+
def test_natality_tutorial():
30+
client = bigquery.Client()
31+
dataset_ref = client.dataset('natality_regression')
32+
assert not dataset_exists(dataset_ref, client)
33+
34+
natality_tutorial.run_natality_tutorial()
35+
36+
assert dataset_exists(dataset_ref, client)
37+
38+
table = client.get_table(
39+
bigquery.Table(dataset_ref.table('regression_input')))
40+
assert table.num_rows > 0
41+
42+
client.delete_dataset(dataset_ref, delete_contents=True)

0 commit comments

Comments
0 (0)
Morty Proxy This is a proxified and sanitized view of the page, visit original site.