Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

Commit 1917649

Browse filesBrowse files
feat: Add micro-benchmarks for reads comparing standard (regional) vs rapid (zonal) buckets. (#1697)
* Add performance microbenchmarking suite for Sequential and Random Reads * This compares Regional (Standard Storage) with Zonal (Rapid Storage) buckets. * Regional uses JSON wheres Zonal uses gRPC bidi --------- Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
1 parent cc4831d commit 1917649
Copy full SHA for 1917649

File tree

Expand file treeCollapse file tree

15 files changed

+1276
-0
lines changed
Open diff view settings
Filter options
Expand file treeCollapse file tree

15 files changed

+1276
-0
lines changed
Open diff view settings
Collapse file

‎.gitignore‎

Copy file name to clipboardExpand all lines: .gitignore
+3Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,3 +62,6 @@ system_tests/local_test_setup
6262
# Make sure a generated file isn't accidentally committed.
6363
pylintrc
6464
pylintrc.test
65+
66+
# Benchmarking results and logs
67+
__benchmark_results__/**
Collapse file

‎setup.py‎

Copy file name to clipboardExpand all lines: setup.py
+6Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,12 @@
6060
"opentelemetry-api >= 1.1.0, < 2.0.0",
6161
],
6262
"testing": [
63+
"google-cloud-testutils",
64+
"numpy",
65+
"psutil",
66+
"py-cpuinfo",
67+
"pytest-benchmark",
68+
"PyYAML",
6369
"mock",
6470
"pytest",
6571
"pytest-cov",
Collapse file

‎tests/perf/__init__.py‎

Copy file name to clipboardExpand all lines: tests/perf/__init__.py
Whitespace-only changes.
Collapse file
+38Lines changed: 38 additions & 0 deletions
  • Display the source diff
  • Display the rich diff
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
# Performance Microbenchmarks
2+
3+
This directory contains performance microbenchmarks for the Python Storage client library.
4+
5+
## Usage
6+
7+
To run the benchmarks, use `pytest` with the `--benchmark-json` flag to specify an output file for the results.
8+
9+
Example:
10+
```bash
11+
pytest --benchmark-json=output.json -vv -s tests/perf/microbenchmarks/reads/test_reads.py
12+
```
13+
14+
### Running a Specific Test
15+
16+
To run a single test, append `::` followed by the test name to the file path.
17+
18+
Example:
19+
```bash
20+
pytest --benchmark-json=output.json -vv -s tests/perf/microbenchmarks/reads/test_reads.py::test_downloads_single_proc_single_coro
21+
```
22+
23+
## Configuration
24+
25+
The benchmarks are configured using `config.yaml` files located in the respective subdirectories (e.g., `reads/config.yaml`).
26+
27+
## Overriding Buckets
28+
29+
You can override the buckets used in the benchmarks by setting environment variables. Please refer to the specific benchmark implementation for the environment variable names.
30+
31+
## Output
32+
33+
The benchmarks produce a JSON file with the results. This file can be converted to a CSV file for easier analysis in spreadsheets using the provided `json_to_csv.py` script.
34+
35+
Example:
36+
```bash
37+
python3 tests/perf/microbenchmarks/json_to_csv.py output.json
38+
```
Collapse file
+13Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
# Copyright 2026 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
Collapse file
+163Lines changed: 163 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,163 @@
1+
# Copyright 2026 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
from typing import Any, List
15+
import statistics
16+
import io
17+
import os
18+
19+
20+
def publish_benchmark_extra_info(
21+
benchmark: Any,
22+
params: Any,
23+
benchmark_group: str = "read",
24+
true_times: List[float] = [],
25+
) -> None:
26+
"""
27+
Helper function to publish benchmark parameters to the extra_info property.
28+
"""
29+
30+
benchmark.extra_info["num_files"] = params.num_files
31+
benchmark.extra_info["file_size"] = params.file_size_bytes
32+
benchmark.extra_info["chunk_size"] = params.chunk_size_bytes
33+
if benchmark_group == "write":
34+
benchmark.extra_info["pattern"] = "seq"
35+
else:
36+
benchmark.extra_info["pattern"] = params.pattern
37+
benchmark.extra_info["coros"] = params.num_coros
38+
benchmark.extra_info["rounds"] = params.rounds
39+
benchmark.extra_info["bucket_name"] = params.bucket_name
40+
benchmark.extra_info["bucket_type"] = params.bucket_type
41+
benchmark.extra_info["processes"] = params.num_processes
42+
benchmark.group = benchmark_group
43+
44+
object_size = params.file_size_bytes
45+
num_files = params.num_files
46+
total_uploaded_mib = (object_size / (1024 * 1024) * num_files)
47+
min_throughput = total_uploaded_mib / benchmark.stats["max"]
48+
max_throughput = total_uploaded_mib / benchmark.stats["min"]
49+
mean_throughput = total_uploaded_mib / benchmark.stats["mean"]
50+
median_throughput = total_uploaded_mib / benchmark.stats["median"]
51+
52+
benchmark.extra_info["throughput_MiB_s_min"] = min_throughput
53+
benchmark.extra_info["throughput_MiB_s_max"] = max_throughput
54+
benchmark.extra_info["throughput_MiB_s_mean"] = mean_throughput
55+
benchmark.extra_info["throughput_MiB_s_median"] = median_throughput
56+
57+
print("\nThroughput Statistics (MiB/s):")
58+
print(f" Min: {min_throughput:.2f} (from max time)")
59+
print(f" Max: {max_throughput:.2f} (from min time)")
60+
print(f" Mean: {mean_throughput:.2f} (approx, from mean time)")
61+
print(f" Median: {median_throughput:.2f} (approx, from median time)")
62+
63+
if true_times:
64+
throughputs = [total_uploaded_mib / t for t in true_times]
65+
true_min_throughput = min(throughputs)
66+
true_max_throughput = max(throughputs)
67+
true_mean_throughput = statistics.mean(throughputs)
68+
true_median_throughput = statistics.median(throughputs)
69+
70+
benchmark.extra_info["true_throughput_MiB_s_min"] = true_min_throughput
71+
benchmark.extra_info["true_throughput_MiB_s_max"] = true_max_throughput
72+
benchmark.extra_info["true_throughput_MiB_s_mean"] = true_mean_throughput
73+
benchmark.extra_info["true_throughput_MiB_s_median"] = true_median_throughput
74+
75+
print("\nThroughput Statistics from true_times (MiB/s):")
76+
print(f" Min: {true_min_throughput:.2f}")
77+
print(f" Max: {true_max_throughput:.2f}")
78+
print(f" Mean: {true_mean_throughput:.2f}")
79+
print(f" Median: {true_median_throughput:.2f}")
80+
81+
# Get benchmark name, rounds, and iterations
82+
name = benchmark.name
83+
rounds = benchmark.stats['rounds']
84+
iterations = benchmark.stats['iterations']
85+
86+
# Header for throughput table
87+
header = "\n\n" + "-" * 125 + "\n"
88+
header += "Throughput Benchmark (MiB/s)\n"
89+
header += "-" * 125 + "\n"
90+
header += f"{'Name':<50} {'Min':>10} {'Max':>10} {'Mean':>10} {'StdDev':>10} {'Median':>10} {'Rounds':>8} {'Iterations':>12}\n"
91+
header += "-" * 125
92+
93+
# Data row for throughput table
94+
# The table headers (Min, Max) refer to the throughput values.
95+
row = f"{name:<50} {min_throughput:>10.4f} {max_throughput:>10.4f} {mean_throughput:>10.4f} {'N/A':>10} {median_throughput:>10.4f} {rounds:>8} {iterations:>12}"
96+
97+
print(header)
98+
print(row)
99+
print("-" * 125)
100+
101+
class RandomBytesIO(io.RawIOBase):
102+
"""
103+
A file-like object that generates random bytes using os.urandom.
104+
It enforces a fixed size and an upper safety cap.
105+
"""
106+
# 10 GiB default safety cap
107+
DEFAULT_CAP = 10 * 1024 * 1024 * 1024
108+
109+
def __init__(self, size, max_size=DEFAULT_CAP):
110+
"""
111+
Args:
112+
size (int): The exact size of the virtual file in bytes.
113+
max_size (int): The maximum allowed size to prevent safety issues.
114+
"""
115+
if size is None:
116+
raise ValueError("Size must be defined (cannot be infinite).")
117+
118+
if size > max_size:
119+
raise ValueError(f"Requested size {size} exceeds the maximum limit of {max_size} bytes (10 GiB).")
120+
121+
self._size = size
122+
self._pos = 0
123+
124+
def read(self, n=-1):
125+
# 1. Handle "read all" (n=-1)
126+
if n is None or n < 0:
127+
n = self._size - self._pos
128+
129+
# 2. Handle EOF (End of File)
130+
if self._pos >= self._size:
131+
return b""
132+
133+
# 3. Clamp read amount to remaining size
134+
# This ensures we stop exactly at `size` bytes.
135+
n = min(n, self._size - self._pos)
136+
137+
# 4. Generate data
138+
data = os.urandom(n)
139+
self._pos += len(data)
140+
return data
141+
142+
def readable(self):
143+
return True
144+
145+
def seekable(self):
146+
return True
147+
148+
def tell(self):
149+
return self._pos
150+
151+
def seek(self, offset, whence=io.SEEK_SET):
152+
if whence == io.SEEK_SET:
153+
new_pos = offset
154+
elif whence == io.SEEK_CUR:
155+
new_pos = self._pos + offset
156+
elif whence == io.SEEK_END:
157+
new_pos = self._size + offset
158+
else:
159+
raise ValueError(f"Invalid whence: {whence}")
160+
161+
# Clamp position to valid range [0, size]
162+
self._pos = max(0, min(new_pos, self._size))
163+
return self._pos
Collapse file
+144Lines changed: 144 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,144 @@
1+
# Copyright 2026 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
import contextlib
15+
from typing import Any
16+
from tests.perf.microbenchmarks.resource_monitor import ResourceMonitor
17+
import pytest
18+
from tests.system._helpers import delete_blob
19+
20+
import asyncio
21+
import multiprocessing
22+
import os
23+
import uuid
24+
from google.cloud import storage
25+
from google.cloud.storage._experimental.asyncio.async_appendable_object_writer import (
26+
AsyncAppendableObjectWriter,
27+
)
28+
from google.cloud.storage._experimental.asyncio.async_grpc_client import AsyncGrpcClient
29+
30+
_OBJECT_NAME_PREFIX = "micro-benchmark"
31+
32+
33+
@pytest.fixture(scope="function")
34+
def blobs_to_delete():
35+
blobs_to_delete = []
36+
37+
yield blobs_to_delete
38+
39+
for blob in blobs_to_delete:
40+
delete_blob(blob)
41+
42+
43+
@pytest.fixture(scope="session")
44+
def storage_client():
45+
from google.cloud.storage import Client
46+
47+
client = Client()
48+
with contextlib.closing(client):
49+
yield client
50+
51+
@pytest.fixture
52+
def monitor():
53+
"""
54+
Provides the ResourceMonitor class.
55+
Usage: with monitor() as m: ...
56+
"""
57+
return ResourceMonitor
58+
59+
def publish_resource_metrics(benchmark: Any, monitor: ResourceMonitor) -> None:
60+
"""
61+
Helper function to publish resource monitor results to the extra_info property.
62+
"""
63+
benchmark.extra_info.update(
64+
{
65+
"cpu_max_global": f"{monitor.max_cpu:.2f}",
66+
"mem_max": f"{monitor.max_mem:.2f}",
67+
"net_throughput_mb_s": f"{monitor.throughput_mb_s:.2f}",
68+
"vcpus": monitor.vcpus,
69+
}
70+
)
71+
72+
73+
async def upload_appendable_object(bucket_name, object_name, object_size, chunk_size):
74+
# flush interval set to little over 1GiB to minimize number of flushes.
75+
# this method is to write "appendable" objects which will be used for
76+
# benchmarking reads, hence not concerned performance of writes here.
77+
writer = AsyncAppendableObjectWriter(
78+
AsyncGrpcClient().grpc_client, bucket_name, object_name, writer_options={"FLUSH_INTERVAL_BYTES": 1026 * 1024 ** 2}
79+
)
80+
await writer.open()
81+
uploaded_bytes = 0
82+
while uploaded_bytes < object_size:
83+
bytes_to_upload = min(chunk_size, object_size - uploaded_bytes)
84+
await writer.append(os.urandom(bytes_to_upload))
85+
uploaded_bytes += bytes_to_upload
86+
object_metdata = await writer.close(finalize_on_close=True)
87+
assert object_metdata.size == uploaded_bytes
88+
return uploaded_bytes
89+
90+
91+
def upload_simple_object(bucket_name, object_name, object_size, chunk_size):
92+
storage_client = storage.Client()
93+
bucket = storage_client.bucket(bucket_name)
94+
blob = bucket.blob(object_name)
95+
blob.chunk_size = chunk_size
96+
data = os.urandom(object_size)
97+
blob.upload_from_string(data)
98+
return object_size
99+
100+
101+
def _upload_worker(args):
102+
bucket_name, object_name, object_size, chunk_size, bucket_type = args
103+
if bucket_type == "zonal":
104+
uploaded_bytes = asyncio.run(
105+
upload_appendable_object(bucket_name, object_name, object_size, chunk_size)
106+
)
107+
else:
108+
uploaded_bytes = upload_simple_object(bucket_name, object_name, object_size, chunk_size)
109+
return object_name, uploaded_bytes
110+
111+
112+
def _create_files(num_files, bucket_name, bucket_type, object_size, chunk_size=1024 * 1024 * 1024):
113+
"""
114+
Create/Upload objects for benchmarking and return a list of their names.
115+
"""
116+
object_names = [
117+
f"{_OBJECT_NAME_PREFIX}-{uuid.uuid4().hex[:5]}" for _ in range(num_files)
118+
]
119+
120+
args_list = [
121+
(bucket_name, object_names[i], object_size, chunk_size, bucket_type)
122+
for i in range(num_files)
123+
]
124+
125+
ctx = multiprocessing.get_context("spawn")
126+
with ctx.Pool() as pool:
127+
results = pool.map(_upload_worker, args_list)
128+
129+
total_uploaded_bytes = sum(r[1] for r in results)
130+
assert total_uploaded_bytes == object_size * num_files
131+
132+
return [r[0] for r in results]
133+
134+
135+
@pytest.fixture
136+
def workload_params(request):
137+
params = request.param
138+
files_names = _create_files(
139+
params.num_files,
140+
params.bucket_name,
141+
params.bucket_type,
142+
params.file_size_bytes,
143+
)
144+
return params, files_names

0 commit comments

Comments
0 (0)
Morty Proxy This is a proxified and sanitized view of the page, visit original site.