Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

Commit b4f6649

Browse filesBrowse files
authored
Samples for pandas-gbq migration guide (GoogleCloudPlatform#1811)
1 parent 9473909 commit b4f6649
Copy full SHA for b4f6649

File tree

Expand file treeCollapse file tree

3 files changed

+227
-0
lines changed
Filter options
Expand file treeCollapse file tree

3 files changed

+227
-0
lines changed
+4Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
# pandas-gbq Migration Guide
2+
3+
This directory contains samples used in the `pandas-gbq` to
4+
`google-cloud-bigquery` migration guide.
+2Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
google-cloud-bigquery[pandas,pyarrow]==1.6.0
2+
pandas-gbq==0.7.0
+221Lines changed: 221 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,221 @@
1+
# Copyright 2018 Google Inc.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
import os
16+
import time
17+
18+
import pytest
19+
20+
21+
@pytest.fixture
22+
def temp_dataset():
23+
from google.cloud import bigquery
24+
25+
client = bigquery.Client()
26+
dataset_id = "temp_dataset_{}".format(int(time.time() * 1000))
27+
dataset_ref = bigquery.DatasetReference(client.project, dataset_id)
28+
dataset = client.create_dataset(bigquery.Dataset(dataset_ref))
29+
yield dataset
30+
client.delete_dataset(dataset, delete_contents=True)
31+
32+
33+
def test_client_library_query():
34+
# [START bigquery_migration_client_library_query]
35+
from google.cloud import bigquery
36+
37+
client = bigquery.Client()
38+
sql = """
39+
SELECT name
40+
FROM `bigquery-public-data.usa_names.usa_1910_current`
41+
WHERE state = 'TX'
42+
LIMIT 100
43+
"""
44+
45+
# Run a Standard SQL query using the environment's default project
46+
df = client.query(sql).to_dataframe()
47+
48+
# Run a Standard SQL query with the project set explicitly
49+
project_id = 'your-project-id'
50+
# [END bigquery_migration_client_library_query]
51+
assert len(df) > 0
52+
project_id = os.environ['GCLOUD_PROJECT']
53+
# [START bigquery_migration_client_library_query]
54+
df = client.query(sql, project=project_id).to_dataframe()
55+
# [END bigquery_migration_client_library_query]
56+
assert len(df) > 0
57+
58+
59+
def test_pandas_gbq_query():
60+
# [START bigquery_migration_pandas_gbq_query]
61+
import pandas
62+
63+
sql = """
64+
SELECT name
65+
FROM `bigquery-public-data.usa_names.usa_1910_current`
66+
WHERE state = 'TX'
67+
LIMIT 100
68+
"""
69+
70+
# Run a Standard SQL query using the environment's default project
71+
df = pandas.read_gbq(sql, dialect='standard')
72+
73+
# Run a Standard SQL query with the project set explicitly
74+
project_id = 'your-project-id'
75+
# [END bigquery_migration_pandas_gbq_query]
76+
assert len(df) > 0
77+
project_id = os.environ['GCLOUD_PROJECT']
78+
# [START bigquery_migration_pandas_gbq_query]
79+
df = pandas.read_gbq(sql, project_id=project_id, dialect='standard')
80+
# [END bigquery_migration_pandas_gbq_query]
81+
assert len(df) > 0
82+
83+
84+
def test_client_library_legacy_query():
85+
# [START bigquery_migration_client_library_query_legacy]
86+
from google.cloud import bigquery
87+
88+
client = bigquery.Client()
89+
sql = """
90+
SELECT name
91+
FROM [bigquery-public-data:usa_names.usa_1910_current]
92+
WHERE state = 'TX'
93+
LIMIT 100
94+
"""
95+
query_config = bigquery.QueryJobConfig()
96+
query_config.use_legacy_sql = True
97+
98+
# Run a Standard SQL query using the environment's default project
99+
df = client.query(sql, job_config=query_config).to_dataframe()
100+
# [END bigquery_migration_client_library_query_legacy]
101+
assert len(df) > 0
102+
103+
104+
def test_pandas_gbq_legacy_query():
105+
# [START bigquery_migration_pandas_gbq_query_legacy]
106+
import pandas
107+
108+
sql = """
109+
SELECT name
110+
FROM [bigquery-public-data:usa_names.usa_1910_current]
111+
WHERE state = 'TX'
112+
LIMIT 100
113+
"""
114+
115+
df = pandas.read_gbq(sql, dialect='legacy')
116+
# [END bigquery_migration_pandas_gbq_query_legacy]
117+
assert len(df) > 0
118+
119+
120+
def test_client_library_query_with_parameters():
121+
# [START bigquery_migration_client_library_query_parameters]
122+
from google.cloud import bigquery
123+
124+
client = bigquery.Client()
125+
sql = """
126+
SELECT name
127+
FROM `bigquery-public-data.usa_names.usa_1910_current`
128+
WHERE state = @state
129+
LIMIT @limit
130+
"""
131+
query_config = bigquery.QueryJobConfig()
132+
query_config.query_parameters = [
133+
bigquery.ScalarQueryParameter('state', 'STRING', 'TX'),
134+
bigquery.ScalarQueryParameter('limit', 'INTEGER', 100)
135+
]
136+
df = client.query(sql, job_config=query_config).to_dataframe()
137+
# [END bigquery_migration_client_library_query_parameters]
138+
assert len(df) > 0
139+
140+
141+
def test_pandas_gbq_query_with_parameters():
142+
# [START bigquery_migration_pandas_gbq_query_parameters]
143+
import pandas
144+
145+
sql = """
146+
SELECT name
147+
FROM `bigquery-public-data.usa_names.usa_1910_current`
148+
WHERE state = @state
149+
LIMIT @limit
150+
"""
151+
query_config = {
152+
'query': {
153+
'parameterMode': 'NAMED',
154+
'queryParameters': [
155+
{
156+
'name': 'state',
157+
'parameterType': {'type': 'STRING'},
158+
'parameterValue': {'value': 'TX'}
159+
},
160+
{
161+
'name': 'limit',
162+
'parameterType': {'type': 'INTEGER'},
163+
'parameterValue': {'value': 100}
164+
}
165+
]
166+
}
167+
}
168+
df = pandas.read_gbq(sql, configuration=query_config)
169+
# [END bigquery_migration_pandas_gbq_query_parameters]
170+
assert len(df) > 0
171+
172+
173+
def test_client_library_upload_from_dataframe(temp_dataset):
174+
# [START bigquery_migration_client_library_upload_from_dataframe]
175+
from google.cloud import bigquery
176+
import pandas
177+
178+
df = pandas.DataFrame(
179+
{
180+
'my_string': ['a', 'b', 'c'],
181+
'my_int64': [1, 2, 3],
182+
'my_float64': [4.0, 5.0, 6.0],
183+
}
184+
)
185+
client = bigquery.Client()
186+
dataset_ref = client.dataset('my_dataset')
187+
# [END bigquery_migration_client_library_upload_from_dataframe]
188+
dataset_ref = client.dataset(temp_dataset.dataset_id)
189+
# [START bigquery_migration_client_library_upload_from_dataframe]
190+
table_ref = dataset_ref.table('new_table')
191+
client.load_table_from_dataframe(df, table_ref).result()
192+
# [END bigquery_migration_client_library_upload_from_dataframe]
193+
client = bigquery.Client()
194+
table = client.get_table(table_ref)
195+
assert table.num_rows == 3
196+
197+
198+
def test_pandas_gbq_upload_from_dataframe(temp_dataset):
199+
from google.cloud import bigquery
200+
# [START bigquery_migration_pandas_gbq_upload_from_dataframe]
201+
import pandas
202+
203+
df = pandas.DataFrame(
204+
{
205+
'my_string': ['a', 'b', 'c'],
206+
'my_int64': [1, 2, 3],
207+
'my_float64': [4.0, 5.0, 6.0],
208+
}
209+
)
210+
full_table_id = 'my_dataset.new_table'
211+
project_id = 'my-project-id'
212+
# [END bigquery_migration_pandas_gbq_upload_from_dataframe]
213+
table_id = 'new_table'
214+
full_table_id = '{}.{}'.format(temp_dataset.dataset_id, table_id)
215+
project_id = os.environ['GCLOUD_PROJECT']
216+
# [START bigquery_migration_pandas_gbq_upload_from_dataframe]
217+
df.to_gbq(full_table_id, project_id=project_id)
218+
# [END bigquery_migration_pandas_gbq_upload_from_dataframe]
219+
client = bigquery.Client()
220+
table = client.get_table(temp_dataset.table(table_id))
221+
assert table.num_rows == 3

0 commit comments

Comments
0 (0)
Morty Proxy This is a proxified and sanitized view of the page, visit original site.