Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

Commit 0f85ad0

Browse filesBrowse files
tswastJon Wayne Parrott
authored andcommitted
Add DML samples for BigQuery. (GoogleCloudPlatform#546)
Add BigQuery DML sample for inserts. This sample reads a SQL file (for example: one that was output from mysqldump) and executes each line as a query. At least in my configuration of mysqldump, each insert statement was on a single line, so I was able to write data from MySQL to BigQuery with this sample.
1 parent de3cf80 commit 0f85ad0
Copy full SHA for 0f85ad0

File tree

Expand file treeCollapse file tree

9 files changed

+356
-0
lines changed
Filter options
Expand file treeCollapse file tree

9 files changed

+356
-0
lines changed

‎bigquery/dml/.gitignore

Copy file name to clipboard
+1Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
sample_db_export.sql

‎bigquery/dml/README.md

Copy file name to clipboard
+15Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
# BigQuery DML Samples
2+
3+
<!-- auto-doc-link -->
4+
These samples are used on the following documentation page:
5+
6+
> https://cloud.google.combigquery/docs/loading-data-sql-dml
7+
8+
<!-- end-auto-doc-link -->
9+
10+
To create a test database, run the `populate_db.py` script.
11+
12+
```
13+
python populate_db.py 100 localhost root 'mysql-password' sample_db
14+
```
15+

‎bigquery/dml/insert_sql.py

Copy file name to clipboard
+78Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,78 @@
1+
#!/usr/bin/env python
2+
3+
# Copyright 2016 Google Inc. All Rights Reserved.
4+
#
5+
# Licensed under the Apache License, Version 2.0 (the "License");
6+
# you may not use this file except in compliance with the License.
7+
# You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing, software
12+
# distributed under the License is distributed on an "AS IS" BASIS,
13+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
# See the License for the specific language governing permissions and
15+
# limitations under the License.
16+
17+
"""Sample that runs a file containing INSERT SQL statements in Big Query.
18+
19+
This could be used to run the INSERT statements in a mysqldump output such as
20+
21+
mysqldump --user=root \
22+
--password='secret-password' \
23+
--host=127.0.0.1 \
24+
--no-create-info sample_db \
25+
--skip-add-locks > sample_db_export.sql
26+
27+
To run, first create tables with the same names and columns as the sample
28+
database. Then run this script.
29+
30+
python insert_sql.py my-project my_dataset sample_db_export.sql
31+
"""
32+
33+
# [START insert_sql]
34+
import argparse
35+
36+
from gcloud import bigquery
37+
38+
39+
def insert_sql(project, default_dataset, sql_path):
40+
"""Run all the SQL statements in a SQL file."""
41+
42+
client = bigquery.Client(project=project)
43+
44+
with open(sql_path) as f:
45+
for line in f:
46+
line = line.strip()
47+
48+
if not line.startswith('INSERT'):
49+
continue
50+
51+
print('Running query: {}{}'.format(
52+
line[:60],
53+
'...' if len(line) > 60 else ''))
54+
query = client.run_sync_query(line)
55+
56+
# Set use_legacy_sql to False to enable standard SQL syntax.
57+
# This is required to use the Data Manipulation Language features.
58+
#
59+
# For more information about enabling standard SQL, see:
60+
# https://cloud.google.com/bigquery/sql-reference/enabling-standard-sql
61+
query.use_legacy_sql = False
62+
query.default_dataset = client.dataset(default_dataset)
63+
query.run()
64+
65+
66+
if __name__ == "__main__":
67+
parser = argparse.ArgumentParser(
68+
description=__doc__,
69+
formatter_class=argparse.RawDescriptionHelpFormatter)
70+
parser.add_argument('project', help='Google Cloud project name')
71+
parser.add_argument(
72+
'default_dataset', help='Default BigQuery dataset name')
73+
parser.add_argument('sql_path', help='Path to SQL file')
74+
75+
args = parser.parse_args()
76+
77+
insert_sql(args.project, args.default_dataset, args.sql_path)
78+
# [END insert_sql]

‎bigquery/dml/insert_sql_test.py

Copy file name to clipboard
+32Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
# Copyright 2016 Google Inc. All Rights Reserved.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
import os.path
16+
17+
from insert_sql import insert_sql
18+
19+
20+
def test_insert_sql(cloud_config, capsys):
21+
sql_path = os.path.join(
22+
os.path.dirname(__file__),
23+
'resources',
24+
'insert_sql_test.sql')
25+
26+
insert_sql(cloud_config.project, 'test_dataset', sql_path)
27+
28+
out, _ = capsys.readouterr()
29+
30+
assert (
31+
'INSERT INTO `test_table` (`Name`) VALUES (\'hello world\')'
32+
in out)

‎bigquery/dml/populate_db.py

Copy file name to clipboard
+182Lines changed: 182 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,182 @@
1+
#!/usr/bin/env python
2+
3+
# Copyright 2016 Google Inc. All Rights Reserved.
4+
#
5+
# Licensed under the Apache License, Version 2.0 (the "License");
6+
# you may not use this file except in compliance with the License.
7+
# You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing, software
12+
# distributed under the License is distributed on an "AS IS" BASIS,
13+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
# See the License for the specific language governing permissions and
15+
# limitations under the License.
16+
17+
"""Command-line tool to simulate user actions and write to SQL database.
18+
"""
19+
20+
from __future__ import division
21+
22+
import argparse
23+
import datetime
24+
import random
25+
import uuid
26+
27+
from six.moves.urllib import parse
28+
import sqlalchemy
29+
from sqlalchemy.ext import declarative
30+
import sqlalchemy.orm
31+
32+
33+
SECONDS_IN_DAY = 24 * 60 * 60
34+
SECONDS_IN_2016 = 366 * SECONDS_IN_DAY
35+
36+
# Unix timestamp for the beginning of 2016.
37+
# http://stackoverflow.com/a/19801806/101923
38+
TIMESTAMP_2016 = (
39+
datetime.datetime(2016, 1, 1, 0, 0, 0) -
40+
datetime.datetime.fromtimestamp(0)).total_seconds()
41+
42+
43+
Base = declarative.declarative_base()
44+
45+
46+
class User(Base):
47+
__tablename__ = 'Users'
48+
49+
id = sqlalchemy.Column(sqlalchemy.Integer, primary_key=True)
50+
date_joined = sqlalchemy.Column(sqlalchemy.DateTime)
51+
52+
53+
class UserSession(Base):
54+
__tablename__ = 'UserSessions'
55+
56+
id = sqlalchemy.Column(sqlalchemy.String(length=36), primary_key=True)
57+
user_id = sqlalchemy.Column(
58+
sqlalchemy.Integer, sqlalchemy.ForeignKey('Users.id'))
59+
login_time = sqlalchemy.Column(sqlalchemy.DateTime)
60+
logout_time = sqlalchemy.Column(sqlalchemy.DateTime)
61+
ip_address = sqlalchemy.Column(sqlalchemy.String(length=40))
62+
63+
64+
def generate_users(session, num_users):
65+
users = []
66+
67+
for userid in range(1, num_users + 1):
68+
year_portion = random.random()
69+
date_joined = datetime.datetime.fromtimestamp(
70+
TIMESTAMP_2016 + SECONDS_IN_2016 * year_portion)
71+
user = User(id=userid, date_joined=date_joined)
72+
users.append(user)
73+
session.add(user)
74+
75+
session.commit()
76+
return users
77+
78+
79+
def random_ip():
80+
"""Choose a random example IP address.
81+
82+
Examples are chosen from the test networks described in
83+
https://tools.ietf.org/html/rfc5737
84+
"""
85+
network = random.choice([
86+
'192.0.2', # RFC-5737 TEST-NET-1
87+
'198.51.100', # RFC-5737 TEST-NET-2
88+
'203.0.113', # RFC-5737 TEST-NET-3
89+
])
90+
ip_address = '{}.{}'.format(network, random.randrange(256))
91+
return ip_address
92+
93+
94+
def simulate_user_session(session, user, previous_user_session=None):
95+
"""Simulates a single session (login to logout) of a user's history."""
96+
login_time = user.date_joined
97+
98+
if previous_user_session is not None:
99+
login_time = (
100+
previous_user_session.logout_time +
101+
datetime.timedelta(
102+
days=1, seconds=random.randrange(SECONDS_IN_DAY)))
103+
104+
session_id = str(uuid.uuid4())
105+
user_session = UserSession(
106+
id=session_id,
107+
user_id=user.id,
108+
login_time=login_time,
109+
ip_address=random_ip())
110+
user_session.logout_time = (
111+
login_time +
112+
datetime.timedelta(seconds=(1 + random.randrange(59))))
113+
session.commit()
114+
session.add(user_session)
115+
return user_session
116+
117+
118+
def simulate_user_history(session, user):
119+
"""Simulates the entire history of activity for a single user."""
120+
total_sessions = random.randrange(10)
121+
previous_user_session = None
122+
123+
for _ in range(total_sessions):
124+
user_session = simulate_user_session(
125+
session, user, previous_user_session)
126+
previous_user_session = user_session
127+
128+
129+
def run_simulation(session, users):
130+
"""Simulates app activity for all users."""
131+
132+
for n, user in enumerate(users):
133+
if n % 100 == 0 and n != 0:
134+
print('Simulated data for {} users'.format(n))
135+
136+
simulate_user_history(session, user)
137+
138+
print('COMPLETE: Simulated data for {} users'.format(len(users)))
139+
140+
141+
def populate_db(session, total_users=3):
142+
"""Populate database with total_users simulated users and their actions."""
143+
users = generate_users(session, total_users)
144+
run_simulation(session, users)
145+
146+
147+
def create_session(engine):
148+
Base.metadata.drop_all(engine)
149+
Base.metadata.create_all(engine)
150+
Session = sqlalchemy.orm.sessionmaker(bind=engine)
151+
return Session()
152+
153+
154+
def main(total_users, host, user, password, db_name):
155+
engine = sqlalchemy.create_engine(
156+
'mysql+pymysql://{user}:{password}@{host}/{db_name}'.format(
157+
user=user,
158+
password=parse.quote_plus(password),
159+
host=host,
160+
db_name=db_name))
161+
session = create_session(engine)
162+
163+
try:
164+
populate_db(session, total_users)
165+
finally:
166+
session.close()
167+
168+
169+
if __name__ == '__main__':
170+
parser = argparse.ArgumentParser(
171+
description=__doc__,
172+
formatter_class=argparse.RawDescriptionHelpFormatter)
173+
parser.add_argument(
174+
'total_users', help='How many simulated users to create.', type=int)
175+
parser.add_argument('host', help='Host of the database to write to.')
176+
parser.add_argument('user', help='User to connect to the database.')
177+
parser.add_argument('password', help='Password for the database user.')
178+
parser.add_argument('db', help='Name of the database to write to.')
179+
180+
args = parser.parse_args()
181+
182+
main(args.total_users, args.host, args.user, args.password, args.db)

‎bigquery/dml/populate_db_test.py

Copy file name to clipboard
+34Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
# Copyright 2016 Google Inc. All Rights Reserved.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
import sqlalchemy
16+
17+
from populate_db import create_session, populate_db
18+
19+
20+
def test_populate_db_populates_users():
21+
engine = sqlalchemy.create_engine('sqlite://')
22+
session = create_session(engine)
23+
24+
try:
25+
populate_db(session, total_users=10)
26+
27+
connection = session.connection().connection
28+
cursor = connection.cursor()
29+
cursor.execute('SELECT COUNT(*) FROM Users')
30+
assert cursor.fetchone()[0] == 10
31+
cursor.execute('SELECT COUNT(*) FROM UserSessions')
32+
assert cursor.fetchone()[0] >= 10
33+
finally:
34+
session.close()

‎bigquery/dml/requirements.txt

Copy file name to clipboard
+5Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
flake8==3.0.4
2+
gcloud==0.18.1
3+
PyMySQL==0.7.7
4+
six==1.10.0
5+
SQLAlchemy==1.0.15
+6Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
-- This file is used to test ../insert_sql.py.
2+
-- These are comments.
3+
-- Each query to be executed should be on a single line.
4+
5+
/* Another ignored line. */
6+
INSERT INTO `test_table` (`Name`) VALUES ('hello world')

‎scripts/resources/docs-links.json

Copy file name to clipboardExpand all lines: scripts/resources/docs-links.json
+3Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -361,6 +361,9 @@
361361
"/bigquery/docs/data": [
362362
"bigquery/api/sync_query.py"
363363
],
364+
"bigquery/docs/loading-data-sql-dml": [
365+
"bigquery/dml/insert_sql.py"
366+
],
364367
"/appengine/docs/python/memcache/examples": [
365368
"appengine/memcache/snippets/snippets.py",
366369
"appengine/memcache/guestbook/main.py"

0 commit comments

Comments
0 (0)
Morty Proxy This is a proxified and sanitized view of the page, visit original site.