Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings
This repository was archived by the owner on May 7, 2026. It is now read-only.

Commit 4b0f13b

Browse filesBrowse files
authored
feat: add bigframe.bigquery.load_data function (#2426)
Thank you for opening a Pull Request! Before submitting your PR, there are a few things you can do to make sure it goes smoothly: - [ ] Make sure to open an issue as a [bug/issue](https://github.com/googleapis/python-bigquery-dataframes/issues/new/choose) before writing your code! That way we can discuss the change, evaluate designs, and agree on the general idea - [ ] Ensure the tests and linter pass - [ ] Code coverage does not decrease (if any source code was changed) - [ ] Appropriate docs were updated (if necessary) Fixes #<issue_number_goes_here> 🦕
1 parent fbd3a55 commit 4b0f13b
Copy full SHA for 4b0f13b

6 files changed

+356Lines changed: 356 additions & 0 deletions

File tree

Expand file treeCollapse file tree
Open diff view settings
Filter options
Expand file treeCollapse file tree
Open diff view settings
Collapse file

‎bigframes/bigquery/__init__.py‎

Copy file name to clipboardExpand all lines: bigframes/bigquery/__init__.py
+5Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@
4343
st_regionstats,
4444
st_simplify,
4545
)
46+
from bigframes.bigquery._operations.io import load_data
4647
from bigframes.bigquery._operations.json import (
4748
json_extract,
4849
json_extract_array,
@@ -107,6 +108,8 @@
107108
struct,
108109
# table ops
109110
create_external_table,
111+
# io ops
112+
load_data,
110113
]
111114

112115
_module = sys.modules[__name__]
@@ -160,6 +163,8 @@
160163
"struct",
161164
# table ops
162165
"create_external_table",
166+
# io ops
167+
"load_data",
163168
# Modules / SQL namespaces
164169
"ai",
165170
"ml",
Collapse file
+94Lines changed: 94 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,94 @@
1+
# Copyright 2026 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
from __future__ import annotations
16+
17+
from typing import Mapping, Optional, Union
18+
19+
import pandas as pd
20+
21+
from bigframes.bigquery._operations.table import _get_table_metadata
22+
import bigframes.core.logging.log_adapter as log_adapter
23+
import bigframes.core.sql.io
24+
import bigframes.session
25+
26+
27+
@log_adapter.method_logger(custom_base_name="bigquery_io")
28+
def load_data(
29+
table_name: str,
30+
*,
31+
write_disposition: str = "INTO",
32+
columns: Optional[Mapping[str, str]] = None,
33+
partition_by: Optional[list[str]] = None,
34+
cluster_by: Optional[list[str]] = None,
35+
table_options: Optional[Mapping[str, Union[str, int, float, bool, list]]] = None,
36+
from_files_options: Mapping[str, Union[str, int, float, bool, list]],
37+
with_partition_columns: Optional[Mapping[str, str]] = None,
38+
connection_name: Optional[str] = None,
39+
session: Optional[bigframes.session.Session] = None,
40+
) -> pd.Series:
41+
"""
42+
Loads data into a BigQuery table.
43+
See the `BigQuery LOAD DATA DDL syntax
44+
<https://docs.cloud.google.com/bigquery/docs/reference/standard-sql/load-statements>`_
45+
for additional reference.
46+
Args:
47+
table_name (str):
48+
The name of the table in BigQuery.
49+
write_disposition (str, default "INTO"):
50+
Whether to replace the table if it already exists ("OVERWRITE") or append to it ("INTO").
51+
columns (Mapping[str, str], optional):
52+
The table's schema.
53+
partition_by (list[str], optional):
54+
A list of partition expressions to partition the table by. See https://docs.cloud.google.com/bigquery/docs/reference/standard-sql/load-statements#partition_expression.
55+
cluster_by (list[str], optional):
56+
A list of columns to cluster the table by.
57+
table_options (Mapping[str, Union[str, int, float, bool, list]], optional):
58+
The table options.
59+
from_files_options (Mapping[str, Union[str, int, float, bool, list]]):
60+
The options for loading data from files.
61+
with_partition_columns (Mapping[str, str], optional):
62+
The table's partition columns.
63+
connection_name (str, optional):
64+
The connection to use for the table.
65+
session (bigframes.session.Session, optional):
66+
The session to use. If not provided, the default session is used.
67+
Returns:
68+
pandas.Series:
69+
A Series with object dtype containing the table metadata. Reference
70+
the `BigQuery Table REST API reference
71+
<https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#Table>`_
72+
for available fields.
73+
"""
74+
import bigframes.pandas as bpd
75+
76+
sql = bigframes.core.sql.io.load_data_ddl(
77+
table_name=table_name,
78+
write_disposition=write_disposition,
79+
columns=columns,
80+
partition_by=partition_by,
81+
cluster_by=cluster_by,
82+
table_options=table_options,
83+
from_files_options=from_files_options,
84+
with_partition_columns=with_partition_columns,
85+
connection_name=connection_name,
86+
)
87+
88+
if session is None:
89+
bpd.read_gbq_query(sql)
90+
session = bpd.get_global_session()
91+
else:
92+
session.read_gbq_query(sql)
93+
94+
return _get_table_metadata(bqclient=session.bqclient, table_name=table_name)
Collapse file

‎bigframes/core/sql/io.py‎

Copy file name to clipboard
+87Lines changed: 87 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,87 @@
1+
# Copyright 2026 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
from __future__ import annotations
16+
17+
from typing import Mapping, Optional, Union
18+
19+
20+
def load_data_ddl(
21+
table_name: str,
22+
*,
23+
write_disposition: str = "INTO",
24+
columns: Optional[Mapping[str, str]] = None,
25+
partition_by: Optional[list[str]] = None,
26+
cluster_by: Optional[list[str]] = None,
27+
table_options: Optional[Mapping[str, Union[str, int, float, bool, list]]] = None,
28+
from_files_options: Mapping[str, Union[str, int, float, bool, list]],
29+
with_partition_columns: Optional[Mapping[str, str]] = None,
30+
connection_name: Optional[str] = None,
31+
) -> str:
32+
"""Generates the LOAD DATA DDL statement."""
33+
statement = ["LOAD DATA"]
34+
statement.append(write_disposition)
35+
statement.append(table_name)
36+
37+
if columns:
38+
column_defs = ", ".join([f"{name} {typ}" for name, typ in columns.items()])
39+
statement.append(f"({column_defs})")
40+
41+
if partition_by:
42+
statement.append(f"PARTITION BY {', '.join(partition_by)}")
43+
44+
if cluster_by:
45+
statement.append(f"CLUSTER BY {', '.join(cluster_by)}")
46+
47+
if table_options:
48+
opts = []
49+
for key, value in table_options.items():
50+
if isinstance(value, str):
51+
value_sql = repr(value)
52+
opts.append(f"{key} = {value_sql}")
53+
elif isinstance(value, bool):
54+
opts.append(f"{key} = {str(value).upper()}")
55+
elif isinstance(value, list):
56+
list_str = ", ".join([repr(v) for v in value])
57+
opts.append(f"{key} = [{list_str}]")
58+
else:
59+
opts.append(f"{key} = {value}")
60+
options_str = ", ".join(opts)
61+
statement.append(f"OPTIONS ({options_str})")
62+
63+
opts = []
64+
for key, value in from_files_options.items():
65+
if isinstance(value, str):
66+
value_sql = repr(value)
67+
opts.append(f"{key} = {value_sql}")
68+
elif isinstance(value, bool):
69+
opts.append(f"{key} = {str(value).upper()}")
70+
elif isinstance(value, list):
71+
list_str = ", ".join([repr(v) for v in value])
72+
opts.append(f"{key} = [{list_str}]")
73+
else:
74+
opts.append(f"{key} = {value}")
75+
options_str = ", ".join(opts)
76+
statement.append(f"FROM FILES ({options_str})")
77+
78+
if with_partition_columns:
79+
part_defs = ", ".join(
80+
[f"{name} {typ}" for name, typ in with_partition_columns.items()]
81+
)
82+
statement.append(f"WITH PARTITION COLUMNS ({part_defs})")
83+
84+
if connection_name:
85+
statement.append(f"WITH CONNECTION `{connection_name}`")
86+
87+
return " ".join(statement)
Collapse file
+39Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
# Copyright 2026 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
import bigframes.bigquery as bbq
16+
17+
18+
def test_load_data(session, dataset_id):
19+
table_name = f"{dataset_id}.test_load_data"
20+
uri = "gs://cloud-samples-data/bigquery/us-states/us-states.csv"
21+
22+
# Create the external table
23+
table = bbq.load_data(
24+
table_name,
25+
columns={
26+
"name": "STRING",
27+
"post_abbr": "STRING",
28+
},
29+
from_files_options={"format": "CSV", "uris": [uri], "skip_leading_rows": 1},
30+
session=session,
31+
)
32+
assert table is not None
33+
34+
# Read the table to verify
35+
import bigframes.pandas as bpd
36+
37+
bf_df = bpd.read_gbq(table_name)
38+
pd_df = bf_df.to_pandas()
39+
assert len(pd_df) > 0
Collapse file
+41Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
# Copyright 2026 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
from unittest import mock
16+
17+
import pytest
18+
19+
import bigframes.bigquery._operations.io
20+
import bigframes.core.sql.io
21+
import bigframes.session
22+
23+
24+
@pytest.fixture
25+
def mock_session():
26+
return mock.create_autospec(spec=bigframes.session.Session)
27+
28+
29+
@mock.patch("bigframes.bigquery._operations.io._get_table_metadata")
30+
def test_load_data(get_table_metadata_mock, mock_session):
31+
bigframes.bigquery._operations.io.load_data(
32+
"my-project.my_dataset.my_table",
33+
columns={"col1": "INT64", "col2": "STRING"},
34+
from_files_options={"format": "CSV", "uris": ["gs://bucket/path*"]},
35+
session=mock_session,
36+
)
37+
mock_session.read_gbq_query.assert_called_once()
38+
generated_sql = mock_session.read_gbq_query.call_args[0][0]
39+
expected = "LOAD DATA INTO my-project.my_dataset.my_table (col1 INT64, col2 STRING) FROM FILES (format = 'CSV', uris = ['gs://bucket/path*'])"
40+
assert generated_sql == expected
41+
get_table_metadata_mock.assert_called_once()
Collapse file

‎tests/unit/core/sql/test_io.py‎

Copy file name to clipboard
+90Lines changed: 90 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,90 @@
1+
# Copyright 2026 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may
6+
# http://www.apache.org/licenses/LICENSE-2.0
7+
#
8+
# Unless required by applicable law or agreed to in writing, software
9+
# distributed under the License is distributed on an "AS IS" BASIS,
10+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11+
# See the License for the specific language governing permissions and
12+
# limitations under the License.
13+
14+
import bigframes.core.sql.io
15+
16+
17+
def test_load_data_ddl():
18+
sql = bigframes.core.sql.io.load_data_ddl(
19+
"my-project.my_dataset.my_table",
20+
columns={"col1": "INT64", "col2": "STRING"},
21+
from_files_options={"format": "CSV", "uris": ["gs://bucket/path*"]},
22+
)
23+
expected = "LOAD DATA INTO my-project.my_dataset.my_table (col1 INT64, col2 STRING) FROM FILES (format = 'CSV', uris = ['gs://bucket/path*'])"
24+
assert sql == expected
25+
26+
27+
def test_load_data_ddl_overwrite():
28+
sql = bigframes.core.sql.io.load_data_ddl(
29+
"my-project.my_dataset.my_table",
30+
write_disposition="OVERWRITE",
31+
columns={"col1": "INT64", "col2": "STRING"},
32+
from_files_options={"format": "CSV", "uris": ["gs://bucket/path*"]},
33+
)
34+
expected = "LOAD DATA OVERWRITE my-project.my_dataset.my_table (col1 INT64, col2 STRING) FROM FILES (format = 'CSV', uris = ['gs://bucket/path*'])"
35+
assert sql == expected
36+
37+
38+
def test_load_data_ddl_with_partition_columns():
39+
sql = bigframes.core.sql.io.load_data_ddl(
40+
"my-project.my_dataset.my_table",
41+
columns={"col1": "INT64", "col2": "STRING"},
42+
with_partition_columns={"part1": "DATE", "part2": "STRING"},
43+
from_files_options={"format": "CSV", "uris": ["gs://bucket/path*"]},
44+
)
45+
expected = "LOAD DATA INTO my-project.my_dataset.my_table (col1 INT64, col2 STRING) FROM FILES (format = 'CSV', uris = ['gs://bucket/path*']) WITH PARTITION COLUMNS (part1 DATE, part2 STRING)"
46+
assert sql == expected
47+
48+
49+
def test_load_data_ddl_connection():
50+
sql = bigframes.core.sql.io.load_data_ddl(
51+
"my-project.my_dataset.my_table",
52+
columns={"col1": "INT64", "col2": "STRING"},
53+
connection_name="my-connection",
54+
from_files_options={"format": "CSV", "uris": ["gs://bucket/path*"]},
55+
)
56+
expected = "LOAD DATA INTO my-project.my_dataset.my_table (col1 INT64, col2 STRING) FROM FILES (format = 'CSV', uris = ['gs://bucket/path*']) WITH CONNECTION `my-connection`"
57+
assert sql == expected
58+
59+
60+
def test_load_data_ddl_partition_by():
61+
sql = bigframes.core.sql.io.load_data_ddl(
62+
"my-project.my_dataset.my_table",
63+
columns={"col1": "INT64", "col2": "STRING"},
64+
partition_by=["date_col"],
65+
from_files_options={"format": "CSV", "uris": ["gs://bucket/path*"]},
66+
)
67+
expected = "LOAD DATA INTO my-project.my_dataset.my_table (col1 INT64, col2 STRING) PARTITION BY date_col FROM FILES (format = 'CSV', uris = ['gs://bucket/path*'])"
68+
assert sql == expected
69+
70+
71+
def test_load_data_ddl_cluster_by():
72+
sql = bigframes.core.sql.io.load_data_ddl(
73+
"my-project.my_dataset.my_table",
74+
columns={"col1": "INT64", "col2": "STRING"},
75+
cluster_by=["cluster_col"],
76+
from_files_options={"format": "CSV", "uris": ["gs://bucket/path*"]},
77+
)
78+
expected = "LOAD DATA INTO my-project.my_dataset.my_table (col1 INT64, col2 STRING) CLUSTER BY cluster_col FROM FILES (format = 'CSV', uris = ['gs://bucket/path*'])"
79+
assert sql == expected
80+
81+
82+
def test_load_data_ddl_table_options():
83+
sql = bigframes.core.sql.io.load_data_ddl(
84+
"my-project.my_dataset.my_table",
85+
columns={"col1": "INT64", "col2": "STRING"},
86+
table_options={"description": "my table"},
87+
from_files_options={"format": "CSV", "uris": ["gs://bucket/path*"]},
88+
)
89+
expected = "LOAD DATA INTO my-project.my_dataset.my_table (col1 INT64, col2 STRING) OPTIONS (description = 'my table') FROM FILES (format = 'CSV', uris = ['gs://bucket/path*'])"
90+
assert sql == expected

0 commit comments

Comments
0 (0)
Morty Proxy This is a proxified and sanitized view of the page, visit original site.