Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

Commit bb8c80e

Browse filesBrowse files
authored
Add magics tutorial with BigQuery Storage API integration. (GoogleCloudPlatform#2087)
* Add magics tutorial with BigQuery Storage API integration. This is a notebooks tutorial, modeled after the Jupyter notebook example code for BigQuery. Use some caution when running these tests, as they run some large-ish (5 GB processed) queries and download about 500 MB worth of data. This is intentional, as the BigQuery Storage API is most useful for downloading large results. * Update deps. * Don't run big queries on Travis.
1 parent 3d1f403 commit bb8c80e
Copy full SHA for bb8c80e

File tree

Expand file treeCollapse file tree

2 files changed

+149
-0
lines changed
Filter options
Expand file treeCollapse file tree

2 files changed

+149
-0
lines changed
+148Lines changed: 148 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,148 @@
1+
# Copyright 2019 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
import os
16+
17+
import IPython
18+
from IPython.terminal import interactiveshell
19+
from IPython.testing import tools
20+
import pytest
21+
22+
# Ignore semicolon lint warning because semicolons are used in notebooks
23+
# flake8: noqa E703
24+
25+
26+
@pytest.fixture(scope="session")
27+
def ipython():
28+
config = tools.default_config()
29+
config.TerminalInteractiveShell.simple_prompt = True
30+
shell = interactiveshell.TerminalInteractiveShell.instance(config=config)
31+
return shell
32+
33+
34+
@pytest.fixture()
35+
def ipython_interactive(request, ipython):
36+
"""Activate IPython's builtin hooks
37+
38+
for the duration of the test scope.
39+
"""
40+
with ipython.builtin_trap:
41+
yield ipython
42+
43+
44+
def _strip_region_tags(sample_text):
45+
"""Remove blank lines and region tags from sample text"""
46+
magic_lines = [
47+
line for line in sample_text.split("\n") if len(line) > 0 and "# [" not in line
48+
]
49+
return "\n".join(magic_lines)
50+
51+
52+
def test_jupyter_small_query(ipython):
53+
ip = IPython.get_ipython()
54+
ip.extension_manager.load_extension("google.cloud.bigquery")
55+
56+
# Include a small query to demonstrate that it falls back to the
57+
# tabledata.list API when the BQ Storage API cannot be used.
58+
sample = """
59+
# [START bigquerystorage_jupyter_tutorial_fallback]
60+
%%bigquery stackoverflow --use_bqstorage_api
61+
SELECT
62+
CONCAT(
63+
'https://stackoverflow.com/questions/',
64+
CAST(id as STRING)) as url,
65+
view_count
66+
FROM `bigquery-public-data.stackoverflow.posts_questions`
67+
WHERE tags like '%google-bigquery%'
68+
ORDER BY view_count DESC
69+
LIMIT 10
70+
# [END bigquerystorage_jupyter_tutorial_fallback]
71+
"""
72+
73+
result = ip.run_cell(_strip_region_tags(sample))
74+
result.raise_error() # Throws an exception if the cell failed.
75+
assert "stackoverflow" in ip.user_ns # verify that variable exists
76+
77+
78+
@pytest.mark.skipif(
79+
"TRAVIS" in os.environ, reason="Not running long-running queries on Travis"
80+
)
81+
def test_jupyter_tutorial(ipython):
82+
ip = IPython.get_ipython()
83+
ip.extension_manager.load_extension("google.cloud.bigquery")
84+
85+
# This code sample intentionally queries a lot of data to demonstrate the
86+
# speed-up of using the BigQuery Storage API to download the results.
87+
sample = """
88+
# [START bigquerystorage_jupyter_tutorial_query]
89+
%%bigquery nodejs_deps --use_bqstorage_api
90+
SELECT
91+
dependency_name,
92+
dependency_platform,
93+
project_name,
94+
project_id,
95+
version_number,
96+
version_id,
97+
dependency_kind,
98+
optional_dependency,
99+
dependency_requirements,
100+
dependency_project_id
101+
FROM
102+
`bigquery-public-data.libraries_io.dependencies`
103+
WHERE
104+
LOWER(dependency_platform) = 'npm'
105+
LIMIT 2500000
106+
# [END bigquerystorage_jupyter_tutorial_query]
107+
"""
108+
result = ip.run_cell(_strip_region_tags(sample))
109+
result.raise_error() # Throws an exception if the cell failed.
110+
111+
assert "nodejs_deps" in ip.user_ns # verify that variable exists
112+
nodejs_deps = ip.user_ns["nodejs_deps"]
113+
114+
# [START bigquerystorage_jupyter_tutorial_results]
115+
nodejs_deps.head()
116+
# [END bigquerystorage_jupyter_tutorial_results]
117+
118+
# [START bigquerystorage_jupyter_tutorial_context]
119+
import google.cloud.bigquery.magics
120+
121+
google.cloud.bigquery.magics.context.use_bqstorage_api = True
122+
# [END bigquerystorage_jupyter_tutorial_context]
123+
124+
sample = """
125+
# [START bigquerystorage_jupyter_tutorial_query]
126+
%%bigquery java_deps
127+
SELECT
128+
dependency_name,
129+
dependency_platform,
130+
project_name,
131+
project_id,
132+
version_number,
133+
version_id,
134+
dependency_kind,
135+
optional_dependency,
136+
dependency_requirements,
137+
dependency_project_id
138+
FROM
139+
`bigquery-public-data.libraries_io.dependencies`
140+
WHERE
141+
LOWER(dependency_platform) = 'maven'
142+
LIMIT 2500000
143+
# [END bigquerystorage_jupyter_tutorial_query]
144+
"""
145+
result = ip.run_cell(_strip_region_tags(sample))
146+
result.raise_error() # Throws an exception if the cell failed.
147+
148+
assert "java_deps" in ip.user_ns # verify that variable exists

‎bigquery_storage/to_dataframe/requirements.txt

Copy file name to clipboardExpand all lines: bigquery_storage/to_dataframe/requirements.txt
+1Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,4 +2,5 @@ google-auth==1.6.2
22
google-cloud-bigquery-storage==0.3.0
33
google-cloud-bigquery==1.11.1
44
fastavro==0.21.17
5+
ipython==7.2.0
56
pandas==0.24.0

0 commit comments

Comments
0 (0)
Morty Proxy This is a proxified and sanitized view of the page, visit original site.