Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

Commit 139390c

Browse filesBrowse files
authored
feat: compute chunk wise checksum for bidi_writes (#1675)
feat: compute chunk wise checksum for bidi_writes and send it via BidiWriteObjectRequest As a part of this change, also did a small refactoring * Moved the precondition check to __utils.py_ file
1 parent d6b8f55 commit 139390c
Copy full SHA for 139390c

File tree

Expand file treeCollapse file tree

5 files changed

+74
-18
lines changed
Open diff view settings
Filter options
Expand file treeCollapse file tree

5 files changed

+74
-18
lines changed
Open diff view settings
Collapse file
+34Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
# Copyright 2025 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# https://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
import google_crc32c
16+
17+
from google.api_core import exceptions
18+
19+
def raise_if_no_fast_crc32c():
20+
"""Check if the C-accelerated version of google-crc32c is available.
21+
22+
If not, raise an error to prevent silent performance degradation.
23+
24+
raises google.api_core.exceptions.FailedPrecondition: If the C extension is not available.
25+
returns: True if the C extension is available.
26+
rtype: bool
27+
28+
"""
29+
if google_crc32c.implementation != "c":
30+
raise exceptions.FailedPrecondition(
31+
"The google-crc32c package is not installed with C support. "
32+
"C extension is required for faster data integrity checks."
33+
"For more information, see https://github.com/googleapis/python-crc32c."
34+
)
Collapse file

‎google/cloud/storage/_experimental/asyncio/async_appendable_object_writer.py‎

Copy file name to clipboardExpand all lines: google/cloud/storage/_experimental/asyncio/async_appendable_object_writer.py
+8-1Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,10 @@
2222
2323
"""
2424
from typing import Optional, Union
25+
26+
from google_crc32c import Checksum
27+
28+
from ._utils import raise_if_no_fast_crc32c
2529
from google.cloud import _storage_v2
2630
from google.cloud.storage._experimental.asyncio.async_grpc_client import (
2731
AsyncGrpcClient,
@@ -100,6 +104,7 @@ def __init__(
100104
:param write_handle: (Optional) An existing handle for writing the object.
101105
If provided, opening the bidi-gRPC connection will be faster.
102106
"""
107+
raise_if_no_fast_crc32c()
103108
self.client = client
104109
self.bucket_name = bucket_name
105110
self.object_name = object_name
@@ -191,11 +196,13 @@ async def append(self, data: bytes) -> None:
191196
bytes_to_flush = 0
192197
while start_idx < total_bytes:
193198
end_idx = min(start_idx + _MAX_CHUNK_SIZE_BYTES, total_bytes)
199+
data_chunk = data[start_idx:end_idx]
194200
await self.write_obj_stream.send(
195201
_storage_v2.BidiWriteObjectRequest(
196202
write_offset=self.offset,
197203
checksummed_data=_storage_v2.ChecksummedData(
198-
content=data[start_idx:end_idx]
204+
content=data_chunk,
205+
crc32c=int.from_bytes(Checksum(data_chunk).digest(), "big"),
199206
),
200207
)
201208
)
Collapse file

‎google/cloud/storage/_experimental/asyncio/async_multi_range_downloader.py‎

Copy file name to clipboardExpand all lines: google/cloud/storage/_experimental/asyncio/async_multi_range_downloader.py
+4-12Lines changed: 4 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -14,12 +14,11 @@
1414

1515
from __future__ import annotations
1616
import asyncio
17-
import google_crc32c
18-
from google.api_core import exceptions
19-
from google_crc32c import Checksum
20-
2117
from typing import List, Optional, Tuple
2218

19+
from google_crc32c import Checksum
20+
21+
from ._utils import raise_if_no_fast_crc32c
2322
from google.cloud.storage._experimental.asyncio.async_read_object_stream import (
2423
_AsyncReadObjectStream,
2524
)
@@ -160,14 +159,7 @@ def __init__(
160159
:param read_handle: (Optional) An existing read handle.
161160
"""
162161

163-
# Verify that the fast, C-accelerated version of crc32c is available.
164-
# If not, raise an error to prevent silent performance degradation.
165-
if google_crc32c.implementation != "c":
166-
raise exceptions.NotFound(
167-
"The google-crc32c package is not installed with C support. "
168-
"Bidi reads require the C extension for data integrity checks."
169-
"For more information, see https://github.com/googleapis/python-crc32c."
170-
)
162+
raise_if_no_fast_crc32c()
171163

172164
self.client = client
173165
self.bucket_name = bucket_name
Collapse file

‎tests/unit/asyncio/test_async_appendable_object_writer.py‎

Copy file name to clipboardExpand all lines: tests/unit/asyncio/test_async_appendable_object_writer.py
+26-1Lines changed: 26 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,9 @@
1515
import pytest
1616
from unittest import mock
1717

18+
from google_crc32c import Checksum
19+
20+
from google.api_core import exceptions
1821
from google.cloud.storage._experimental.asyncio.async_appendable_object_writer import (
1922
AsyncAppendableObjectWriter,
2023
)
@@ -85,6 +88,23 @@ def test_init_with_optional_args(mock_write_object_stream, mock_client):
8588
)
8689

8790

91+
@mock.patch("google.cloud.storage._experimental.asyncio._utils.google_crc32c")
92+
@mock.patch(
93+
"google.cloud.storage._experimental.asyncio.async_grpc_client.AsyncGrpcClient.grpc_client"
94+
)
95+
def test_init_raises_if_crc32c_c_extension_is_missing(
96+
mock_grpc_client, mock_google_crc32c
97+
):
98+
mock_google_crc32c.implementation = "python"
99+
100+
with pytest.raises(exceptions.FailedPrecondition) as exc_info:
101+
AsyncAppendableObjectWriter(mock_grpc_client, "bucket", "object")
102+
103+
assert "The google-crc32c package is not installed with C support" in str(
104+
exc_info.value
105+
)
106+
107+
88108
@pytest.mark.asyncio
89109
@mock.patch(
90110
"google.cloud.storage._experimental.asyncio.async_appendable_object_writer._AsyncWriteObjectStream"
@@ -434,10 +454,15 @@ async def test_append_sends_data_in_chunks(mock_write_object_stream, mock_client
434454
# First chunk
435455
assert first_call[0][0].write_offset == 100
436456
assert len(first_call[0][0].checksummed_data.content) == _MAX_CHUNK_SIZE_BYTES
437-
457+
assert first_call[0][0].checksummed_data.crc32c == int.from_bytes(
458+
Checksum(data[:_MAX_CHUNK_SIZE_BYTES]).digest(), byteorder="big"
459+
)
438460
# Second chunk
439461
assert second_call[0][0].write_offset == 100 + _MAX_CHUNK_SIZE_BYTES
440462
assert len(second_call[0][0].checksummed_data.content) == 1
463+
assert second_call[0][0].checksummed_data.crc32c == int.from_bytes(
464+
Checksum(data[_MAX_CHUNK_SIZE_BYTES:]).digest(), byteorder="big"
465+
)
441466

442467
assert writer.offset == 100 + len(data)
443468
writer.simple_flush.assert_not_awaited()
Collapse file

‎tests/unit/asyncio/test_async_multi_range_downloader.py‎

Copy file name to clipboardExpand all lines: tests/unit/asyncio/test_async_multi_range_downloader.py
+2-4Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -349,9 +349,7 @@ async def test_downloading_without_opening_should_throw_error(
349349
assert str(exc.value) == "Underlying bidi-gRPC stream is not open"
350350
assert not mrd.is_stream_open
351351

352-
@mock.patch(
353-
"google.cloud.storage._experimental.asyncio.async_multi_range_downloader.google_crc32c"
354-
)
352+
@mock.patch("google.cloud.storage._experimental.asyncio._utils.google_crc32c")
355353
@mock.patch(
356354
"google.cloud.storage._experimental.asyncio.async_grpc_client.AsyncGrpcClient.grpc_client"
357355
)
@@ -360,7 +358,7 @@ def test_init_raises_if_crc32c_c_extension_is_missing(
360358
):
361359
mock_google_crc32c.implementation = "python"
362360

363-
with pytest.raises(exceptions.NotFound) as exc_info:
361+
with pytest.raises(exceptions.FailedPrecondition) as exc_info:
364362
AsyncMultiRangeDownloader(mock_grpc_client, "bucket", "object")
365363

366364
assert "The google-crc32c package is not installed with C support" in str(

0 commit comments

Comments
0 (0)
Morty Proxy This is a proxified and sanitized view of the page, visit original site.