Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

Commit 629b4e5

Browse filesBrowse files
dstansbyd-v-b
andauthored
Allow no compressor for v2 arrays (#3039)
* Allow no compressor for v2 arrays * Use typing aliases for compressors * Test v2 array w/ v3 codec errors * Add changelog entry * Update type comment * fix test names Co-authored-by: Davis Bennett <davis.v.bennett@gmail.com> --------- Co-authored-by: Davis Bennett <davis.v.bennett@gmail.com>
1 parent 7584b96 commit 629b4e5
Copy full SHA for 629b4e5

File tree

Expand file treeCollapse file tree

6 files changed

+70
-19
lines changed
Filter options
Expand file treeCollapse file tree

6 files changed

+70
-19
lines changed

‎changes/3039.bugfix.rst

Copy file name to clipboard
+5Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
It is now possible to specify no compressor when creating a zarr format 2 array.
2+
This can be done by passing ``compressor=None`` to the various array creation routines.
3+
4+
The default behaviour of automatically choosing a suitable default compressor remains if the compressor argument is not given.
5+
To reproduce the behaviour in previous zarr-python versions when ``compressor=None`` was passed, pass ``compressor='auto'`` instead.

‎src/zarr/api/asynchronous.py

Copy file name to clipboardExpand all lines: src/zarr/api/asynchronous.py
+10-3Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,14 @@
99
import numpy.typing as npt
1010
from typing_extensions import deprecated
1111

12-
from zarr.core.array import Array, AsyncArray, create_array, from_array, get_array_metadata
12+
from zarr.core.array import (
13+
Array,
14+
AsyncArray,
15+
CompressorLike,
16+
create_array,
17+
from_array,
18+
get_array_metadata,
19+
)
1320
from zarr.core.array_spec import ArrayConfig, ArrayConfigLike, ArrayConfigParams
1421
from zarr.core.buffer import NDArrayLike
1522
from zarr.core.common import (
@@ -838,7 +845,7 @@ async def create(
838845
*, # Note: this is a change from v2
839846
chunks: ChunkCoords | int | None = None, # TODO: v2 allowed chunks=True
840847
dtype: npt.DTypeLike | None = None,
841-
compressor: dict[str, JSON] | None = None, # TODO: default and type change
848+
compressor: CompressorLike = "auto",
842849
fill_value: Any | None = 0, # TODO: need type
843850
order: MemoryOrder | None = None,
844851
store: str | StoreLike | None = None,
@@ -991,7 +998,7 @@ async def create(
991998
dtype = parse_dtype(dtype, zarr_format)
992999
if not filters:
9931000
filters = _default_filters(dtype)
994-
if not compressor:
1001+
if compressor == "auto":
9951002
compressor = _default_compressor(dtype)
9961003
elif zarr_format == 3 and chunk_shape is None: # type: ignore[redundant-expr]
9971004
if chunks is not None:

‎src/zarr/api/synchronous.py

Copy file name to clipboardExpand all lines: src/zarr/api/synchronous.py
+2-2Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
import zarr.api.asynchronous as async_api
88
import zarr.core.array
99
from zarr._compat import _deprecate_positional_args
10-
from zarr.core.array import Array, AsyncArray
10+
from zarr.core.array import Array, AsyncArray, CompressorLike
1111
from zarr.core.group import Group
1212
from zarr.core.sync import sync
1313
from zarr.core.sync_group import create_hierarchy
@@ -599,7 +599,7 @@ def create(
599599
*, # Note: this is a change from v2
600600
chunks: ChunkCoords | int | bool | None = None,
601601
dtype: npt.DTypeLike | None = None,
602-
compressor: dict[str, JSON] | None = None, # TODO: default and type change
602+
compressor: CompressorLike = "auto",
603603
fill_value: Any | None = 0, # TODO: need type
604604
order: MemoryOrder | None = None,
605605
store: str | StoreLike | None = None,

‎src/zarr/core/array.py

Copy file name to clipboardExpand all lines: src/zarr/core/array.py
+27-11Lines changed: 27 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -102,6 +102,7 @@
102102
T_ArrayMetadata,
103103
)
104104
from zarr.core.metadata.v2 import (
105+
CompressorLikev2,
105106
_default_compressor,
106107
_default_filters,
107108
parse_compressor,
@@ -303,7 +304,7 @@ async def create(
303304
dimension_separator: Literal[".", "/"] | None = None,
304305
order: MemoryOrder | None = None,
305306
filters: list[dict[str, JSON]] | None = None,
306-
compressor: dict[str, JSON] | None = None,
307+
compressor: CompressorLikev2 | Literal["auto"] = "auto",
307308
# runtime
308309
overwrite: bool = False,
309310
data: npt.ArrayLike | None = None,
@@ -394,7 +395,7 @@ async def create(
394395
dimension_separator: Literal[".", "/"] | None = None,
395396
order: MemoryOrder | None = None,
396397
filters: list[dict[str, JSON]] | None = None,
397-
compressor: dict[str, JSON] | None = None,
398+
compressor: CompressorLike = "auto",
398399
# runtime
399400
overwrite: bool = False,
400401
data: npt.ArrayLike | None = None,
@@ -429,7 +430,7 @@ async def create(
429430
dimension_separator: Literal[".", "/"] | None = None,
430431
order: MemoryOrder | None = None,
431432
filters: list[dict[str, JSON]] | None = None,
432-
compressor: dict[str, JSON] | None = None,
433+
compressor: CompressorLike = "auto",
433434
# runtime
434435
overwrite: bool = False,
435436
data: npt.ArrayLike | None = None,
@@ -570,7 +571,7 @@ async def _create(
570571
dimension_separator: Literal[".", "/"] | None = None,
571572
order: MemoryOrder | None = None,
572573
filters: list[dict[str, JSON]] | None = None,
573-
compressor: dict[str, JSON] | None = None,
574+
compressor: CompressorLike = "auto",
574575
# runtime
575576
overwrite: bool = False,
576577
data: npt.ArrayLike | None = None,
@@ -604,7 +605,7 @@ async def _create(
604605
raise ValueError(
605606
"filters cannot be used for arrays with zarr_format 3. Use array-to-array codecs instead."
606607
)
607-
if compressor is not None:
608+
if compressor != "auto":
608609
raise ValueError(
609610
"compressor cannot be used for arrays with zarr_format 3. Use bytes-to-bytes codecs instead."
610611
)
@@ -768,7 +769,7 @@ def _create_metadata_v2(
768769
dimension_separator: Literal[".", "/"] | None = None,
769770
fill_value: float | None = None,
770771
filters: Iterable[dict[str, JSON] | numcodecs.abc.Codec] | None = None,
771-
compressor: dict[str, JSON] | numcodecs.abc.Codec | None = None,
772+
compressor: CompressorLikev2 = None,
772773
attributes: dict[str, JSON] | None = None,
773774
) -> ArrayV2Metadata:
774775
if dimension_separator is None:
@@ -809,7 +810,7 @@ async def _create_v2(
809810
dimension_separator: Literal[".", "/"] | None = None,
810811
fill_value: float | None = None,
811812
filters: Iterable[dict[str, JSON] | numcodecs.abc.Codec] | None = None,
812-
compressor: dict[str, JSON] | numcodecs.abc.Codec | None = None,
813+
compressor: CompressorLike = "auto",
813814
attributes: dict[str, JSON] | None = None,
814815
overwrite: bool = False,
815816
) -> AsyncArray[ArrayV2Metadata]:
@@ -821,6 +822,17 @@ async def _create_v2(
821822
else:
822823
await ensure_no_existing_node(store_path, zarr_format=2)
823824

825+
compressor_parsed: CompressorLikev2
826+
if compressor == "auto":
827+
compressor_parsed = _default_compressor(dtype)
828+
elif isinstance(compressor, BytesBytesCodec):
829+
raise ValueError(
830+
"Cannot use a BytesBytesCodec as a compressor for zarr v2 arrays. "
831+
"Use a numcodecs codec directly instead."
832+
)
833+
else:
834+
compressor_parsed = compressor
835+
824836
metadata = cls._create_metadata_v2(
825837
shape=shape,
826838
dtype=dtype,
@@ -829,7 +841,7 @@ async def _create_v2(
829841
dimension_separator=dimension_separator,
830842
fill_value=fill_value,
831843
filters=filters,
832-
compressor=compressor,
844+
compressor=compressor_parsed,
833845
attributes=attributes,
834846
)
835847

@@ -1751,7 +1763,7 @@ def create(
17511763
dimension_separator: Literal[".", "/"] | None = None,
17521764
order: MemoryOrder | None = None,
17531765
filters: list[dict[str, JSON]] | None = None,
1754-
compressor: dict[str, JSON] | None = None,
1766+
compressor: CompressorLike = "auto",
17551767
# runtime
17561768
overwrite: bool = False,
17571769
config: ArrayConfigLike | None = None,
@@ -1880,7 +1892,7 @@ def _create(
18801892
dimension_separator: Literal[".", "/"] | None = None,
18811893
order: MemoryOrder | None = None,
18821894
filters: list[dict[str, JSON]] | None = None,
1883-
compressor: dict[str, JSON] | None = None,
1895+
compressor: CompressorLike = "auto",
18841896
# runtime
18851897
overwrite: bool = False,
18861898
config: ArrayConfigLike | None = None,
@@ -3792,7 +3804,11 @@ def _get_default_codecs(
37923804
| Literal["auto"]
37933805
| None
37943806
)
3795-
CompressorLike: TypeAlias = dict[str, JSON] | BytesBytesCodec | numcodecs.abc.Codec | None
3807+
# Union of acceptable types for users to pass in for both v2 and v3 compressors
3808+
CompressorLike: TypeAlias = (
3809+
dict[str, JSON] | BytesBytesCodec | numcodecs.abc.Codec | Literal["auto"] | None
3810+
)
3811+
37963812
CompressorsLike: TypeAlias = (
37973813
Iterable[dict[str, JSON] | BytesBytesCodec | numcodecs.abc.Codec]
37983814
| dict[str, JSON]

‎src/zarr/core/metadata/v2.py

Copy file name to clipboardExpand all lines: src/zarr/core/metadata/v2.py
+7-3Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
from collections.abc import Iterable, Sequence
66
from enum import Enum
77
from functools import cached_property
8-
from typing import TYPE_CHECKING, Any, TypedDict, cast
8+
from typing import TYPE_CHECKING, Any, TypeAlias, TypedDict, cast
99

1010
import numcodecs.abc
1111

@@ -43,6 +43,10 @@ class ArrayV2MetadataDict(TypedDict):
4343
attributes: dict[str, JSON]
4444

4545

46+
# Union of acceptable types for v2 compressors
47+
CompressorLikev2: TypeAlias = dict[str, JSON] | numcodecs.abc.Codec | None
48+
49+
4650
@dataclass(frozen=True, kw_only=True)
4751
class ArrayV2Metadata(Metadata):
4852
shape: ChunkCoords
@@ -52,7 +56,7 @@ class ArrayV2Metadata(Metadata):
5256
order: MemoryOrder = "C"
5357
filters: tuple[numcodecs.abc.Codec, ...] | None = None
5458
dimension_separator: Literal[".", "/"] = "."
55-
compressor: numcodecs.abc.Codec | None = None
59+
compressor: CompressorLikev2
5660
attributes: dict[str, JSON] = field(default_factory=dict)
5761
zarr_format: Literal[2] = field(init=False, default=2)
5862

@@ -65,7 +69,7 @@ def __init__(
6569
fill_value: Any,
6670
order: MemoryOrder,
6771
dimension_separator: Literal[".", "/"] = ".",
68-
compressor: numcodecs.abc.Codec | dict[str, JSON] | None = None,
72+
compressor: CompressorLikev2 = None,
6973
filters: Iterable[numcodecs.abc.Codec | dict[str, JSON]] | None = None,
7074
attributes: dict[str, JSON] | None = None,
7175
) -> None:

‎tests/test_api.py

Copy file name to clipboardExpand all lines: tests/test_api.py
+19Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,8 @@
22

33
from typing import TYPE_CHECKING
44

5+
import zarr.codecs
6+
57
if TYPE_CHECKING:
68
import pathlib
79

@@ -1190,3 +1192,20 @@ def test_gpu_basic(store: Store, zarr_format: ZarrFormat | None) -> None:
11901192
# assert_array_equal doesn't check the type
11911193
assert isinstance(result, type(src))
11921194
cp.testing.assert_array_equal(result, src[:10, :10])
1195+
1196+
1197+
def test_v2_without_compressor() -> None:
1198+
# Make sure it's possible to set no compressor for v2 arrays
1199+
arr = zarr.create(store={}, shape=(1), dtype="uint8", zarr_format=2, compressor=None)
1200+
assert arr.compressors == ()
1201+
1202+
1203+
def test_v2_with_v3_compressor() -> None:
1204+
# Check trying to create a v2 array with a v3 compressor fails
1205+
with pytest.raises(
1206+
ValueError,
1207+
match="Cannot use a BytesBytesCodec as a compressor for zarr v2 arrays. Use a numcodecs codec directly instead.",
1208+
):
1209+
zarr.create(
1210+
store={}, shape=(1), dtype="uint8", zarr_format=2, compressor=zarr.codecs.BloscCodec()
1211+
)

0 commit comments

Comments
0 (0)
Morty Proxy This is a proxified and sanitized view of the page, visit original site.