Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

Commit 018f61d

Browse filesBrowse files
brokkoli71normanrz
andauthored
zarr.array from from an existing zarr.Array (#2622)
* add creation from other zarr * remove duplicated tests * improve test * test_iter_grid for non-squares * concurrent streaming for equal chunk sizes * fix merge * fix mypy * fix mypy * fix test_iter_grid * extract to zarr.from_array * fix mypy * fix mypy * format * fix test_creation_from_other_zarr_format * distinguish between keep and auto for from_array arguments * partition concurrency along new_array chunks * fix mypy * improve test_creation_from_other_zarr_format * add typing in test * Update src/zarr/core/array.py Co-authored-by: Norman Rzepka <code@normanrz.com> * add from_array with npt.ArrayLike * add write_data argument * improve tests * improve docstrings and add examples * fix mypy and readthedocs * fix mypy and readthedocs * fix mypy and readthedocs * fix mypy and readthedocs * fix readthedocs ERROR: Unexpected indentation * add release notes * format docstring examples * add write_data attr to synchronous.create_array * `create_array` calls `from_array` calls `init_array` * document changes * fix serializer from_array v2 to v3 * fix mypy * improve codecov * fix mypy * from_array: copy zarr format on default * in ``from_array`` make all arguments except ``store`` keyword-only, to match ``create_array`` * in ``from_array`` default shards="keep" * redundant ``ChunkKeyEncoding | ChunkKeyEncodingLike`` * fix argument order in calls of `from_array` * fix numpydoc-validation * add docstring to store2 pytest fixture * extract `_parse_keep_array_attr` from `from_array` * extract `_parse_keep_array_attr` from `from_array` * correct _parse_keep_array_attr * fix merge * fix merge --------- Co-authored-by: Norman Rzepka <code@normanrz.com>
1 parent 06f7796 commit 018f61d
Copy full SHA for 018f61d

File tree

Expand file treeCollapse file tree

10 files changed

+772
-40
lines changed
Filter options
Expand file treeCollapse file tree

10 files changed

+772
-40
lines changed

‎changes/2622.feature.rst

Copy file name to clipboard
+1Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Add ``zarr.from_array`` using concurrent streaming of source data

‎docs/release-notes.rst

Copy file name to clipboardExpand all lines: docs/release-notes.rst
+2Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -145,6 +145,8 @@ Other
145145
3.0.1 (Jan. 17, 2025)
146146
---------------------
147147

148+
* Implement ``zarr.from_array`` using concurrent streaming (:issue:`2622`).
149+
148150
Bug fixes
149151
~~~~~~~~~
150152
* Fixes ``order`` argument for Zarr format 2 arrays (:issue:`2679`).

‎src/zarr/__init__.py

Copy file name to clipboardExpand all lines: src/zarr/__init__.py
+2Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
create_hierarchy,
1212
empty,
1313
empty_like,
14+
from_array,
1415
full,
1516
full_like,
1617
group,
@@ -54,6 +55,7 @@
5455
"create_hierarchy",
5556
"empty",
5657
"empty_like",
58+
"from_array",
5759
"full",
5860
"full_like",
5961
"group",

‎src/zarr/api/asynchronous.py

Copy file name to clipboardExpand all lines: src/zarr/api/asynchronous.py
+7-3Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
import numpy.typing as npt
1010
from typing_extensions import deprecated
1111

12-
from zarr.core.array import Array, AsyncArray, create_array, get_array_metadata
12+
from zarr.core.array import Array, AsyncArray, create_array, from_array, get_array_metadata
1313
from zarr.core.array_spec import ArrayConfig, ArrayConfigLike, ArrayConfigParams
1414
from zarr.core.buffer import NDArrayLike
1515
from zarr.core.common import (
@@ -57,6 +57,7 @@
5757
"create_hierarchy",
5858
"empty",
5959
"empty_like",
60+
"from_array",
6061
"full",
6162
"full_like",
6263
"group",
@@ -534,7 +535,7 @@ async def tree(grp: AsyncGroup, expand: bool | None = None, level: int | None =
534535

535536

536537
async def array(
537-
data: npt.ArrayLike, **kwargs: Any
538+
data: npt.ArrayLike | Array, **kwargs: Any
538539
) -> AsyncArray[ArrayV2Metadata] | AsyncArray[ArrayV3Metadata]:
539540
"""Create an array filled with `data`.
540541
@@ -551,13 +552,16 @@ async def array(
551552
The new array.
552553
"""
553554

555+
if isinstance(data, Array):
556+
return await from_array(data=data, **kwargs)
557+
554558
# ensure data is array-like
555559
if not hasattr(data, "shape") or not hasattr(data, "dtype"):
556560
data = np.asanyarray(data)
557561

558562
# setup dtype
559563
kw_dtype = kwargs.get("dtype")
560-
if kw_dtype is None:
564+
if kw_dtype is None and hasattr(data, "dtype"):
561565
kwargs["dtype"] = data.dtype
562566
else:
563567
kwargs["dtype"] = kw_dtype

‎src/zarr/api/synchronous.py

Copy file name to clipboardExpand all lines: src/zarr/api/synchronous.py
+224-3Lines changed: 224 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@
5050
"create_hierarchy",
5151
"empty",
5252
"empty_like",
53+
"from_array",
5354
"full",
5455
"full_like",
5556
"group",
@@ -359,7 +360,7 @@ def tree(grp: Group, expand: bool | None = None, level: int | None = None) -> An
359360

360361

361362
# TODO: add type annotations for kwargs
362-
def array(data: npt.ArrayLike, **kwargs: Any) -> Array:
363+
def array(data: npt.ArrayLike | Array, **kwargs: Any) -> Array:
363364
"""Create an array filled with `data`.
364365
365366
Parameters
@@ -759,11 +760,12 @@ def create_array(
759760
order: MemoryOrder | None = None,
760761
zarr_format: ZarrFormat | None = 3,
761762
attributes: dict[str, JSON] | None = None,
762-
chunk_key_encoding: ChunkKeyEncoding | ChunkKeyEncodingLike | None = None,
763+
chunk_key_encoding: ChunkKeyEncodingLike | None = None,
763764
dimension_names: Iterable[str] | None = None,
764765
storage_options: dict[str, Any] | None = None,
765766
overwrite: bool = False,
766767
config: ArrayConfigLike | None = None,
768+
write_data: bool = True,
767769
) -> Array:
768770
"""Create an array.
769771
@@ -857,6 +859,11 @@ def create_array(
857859
Whether to overwrite an array with the same name in the store, if one exists.
858860
config : ArrayConfigLike, optional
859861
Runtime configuration for the array.
862+
write_data : bool
863+
If a pre-existing array-like object was provided to this function via the ``data`` parameter
864+
then ``write_data`` determines whether the values in that array-like object should be
865+
written to the Zarr array created by this function. If ``write_data`` is ``False``, then the
866+
array will be left empty.
860867
861868
Returns
862869
-------
@@ -866,7 +873,7 @@ def create_array(
866873
Examples
867874
--------
868875
>>> import zarr
869-
>>> store = zarr.storage.MemoryStore(mode='w')
876+
>>> store = zarr.storage.MemoryStore()
870877
>>> arr = await zarr.create_array(
871878
>>> store=store,
872879
>>> shape=(100,100),
@@ -897,6 +904,220 @@ def create_array(
897904
storage_options=storage_options,
898905
overwrite=overwrite,
899906
config=config,
907+
write_data=write_data,
908+
)
909+
)
910+
)
911+
912+
913+
def from_array(
914+
store: str | StoreLike,
915+
*,
916+
data: Array | npt.ArrayLike,
917+
write_data: bool = True,
918+
name: str | None = None,
919+
chunks: Literal["auto", "keep"] | ChunkCoords = "keep",
920+
shards: ShardsLike | None | Literal["keep"] = "keep",
921+
filters: FiltersLike | Literal["keep"] = "keep",
922+
compressors: CompressorsLike | Literal["keep"] = "keep",
923+
serializer: SerializerLike | Literal["keep"] = "keep",
924+
fill_value: Any | None = None,
925+
order: MemoryOrder | None = None,
926+
zarr_format: ZarrFormat | None = None,
927+
attributes: dict[str, JSON] | None = None,
928+
chunk_key_encoding: ChunkKeyEncodingLike | None = None,
929+
dimension_names: Iterable[str] | None = None,
930+
storage_options: dict[str, Any] | None = None,
931+
overwrite: bool = False,
932+
config: ArrayConfigLike | None = None,
933+
) -> Array:
934+
"""Create an array from an existing array or array-like.
935+
936+
Parameters
937+
----------
938+
store : str or Store
939+
Store or path to directory in file system or name of zip file for the new array.
940+
data : Array | array-like
941+
The array to copy.
942+
write_data : bool, default True
943+
Whether to copy the data from the input array to the new array.
944+
If ``write_data`` is ``False``, the new array will be created with the same metadata as the
945+
input array, but without any data.
946+
name : str or None, optional
947+
The name of the array within the store. If ``name`` is ``None``, the array will be located
948+
at the root of the store.
949+
chunks : ChunkCoords or "auto" or "keep", optional
950+
Chunk shape of the array.
951+
Following values are supported:
952+
953+
- "auto": Automatically determine the chunk shape based on the array's shape and dtype.
954+
- "keep": Retain the chunk shape of the data array if it is a zarr Array.
955+
- ChunkCoords: A tuple of integers representing the chunk shape.
956+
957+
If not specified, defaults to "keep" if data is a zarr Array, otherwise "auto".
958+
shards : ChunkCoords, optional
959+
Shard shape of the array.
960+
Following values are supported:
961+
962+
- "auto": Automatically determine the shard shape based on the array's shape and chunk shape.
963+
- "keep": Retain the shard shape of the data array if it is a zarr Array.
964+
- ChunkCoords: A tuple of integers representing the shard shape.
965+
- None: No sharding.
966+
967+
If not specified, defaults to "keep" if data is a zarr Array, otherwise None.
968+
filters : Iterable[Codec] or "auto" or "keep", optional
969+
Iterable of filters to apply to each chunk of the array, in order, before serializing that
970+
chunk to bytes.
971+
972+
For Zarr format 3, a "filter" is a codec that takes an array and returns an array,
973+
and these values must be instances of ``ArrayArrayCodec``, or dict representations
974+
of ``ArrayArrayCodec``.
975+
976+
For Zarr format 2, a "filter" can be any numcodecs codec; you should ensure that the
977+
the order if your filters is consistent with the behavior of each filter.
978+
979+
Following values are supported:
980+
981+
- Iterable[Codec]: List of filters to apply to the array.
982+
- "auto": Automatically determine the filters based on the array's dtype.
983+
- "keep": Retain the filters of the data array if it is a zarr Array.
984+
985+
If no ``filters`` are provided, defaults to "keep" if data is a zarr Array, otherwise "auto".
986+
compressors : Iterable[Codec] or "auto" or "keep", optional
987+
List of compressors to apply to the array. Compressors are applied in order, and after any
988+
filters are applied (if any are specified) and the data is serialized into bytes.
989+
990+
For Zarr format 3, a "compressor" is a codec that takes a bytestream, and
991+
returns another bytestream. Multiple compressors my be provided for Zarr format 3.
992+
993+
For Zarr format 2, a "compressor" can be any numcodecs codec. Only a single compressor may
994+
be provided for Zarr format 2.
995+
996+
Following values are supported:
997+
998+
- Iterable[Codec]: List of compressors to apply to the array.
999+
- "auto": Automatically determine the compressors based on the array's dtype.
1000+
- "keep": Retain the compressors of the input array if it is a zarr Array.
1001+
1002+
If no ``compressors`` are provided, defaults to "keep" if data is a zarr Array, otherwise "auto".
1003+
serializer : dict[str, JSON] | ArrayBytesCodec or "auto" or "keep", optional
1004+
Array-to-bytes codec to use for encoding the array data.
1005+
Zarr format 3 only. Zarr format 2 arrays use implicit array-to-bytes conversion.
1006+
1007+
Following values are supported:
1008+
1009+
- dict[str, JSON]: A dict representation of an ``ArrayBytesCodec``.
1010+
- ArrayBytesCodec: An instance of ``ArrayBytesCodec``.
1011+
- "auto": a default serializer will be used. These defaults can be changed by modifying the value of
1012+
``array.v3_default_serializer`` in :mod:`zarr.core.config`.
1013+
- "keep": Retain the serializer of the input array if it is a zarr Array.
1014+
1015+
fill_value : Any, optional
1016+
Fill value for the array.
1017+
If not specified, defaults to the fill value of the data array.
1018+
order : {"C", "F"}, optional
1019+
The memory of the array (default is "C").
1020+
For Zarr format 2, this parameter sets the memory order of the array.
1021+
For Zarr format 3, this parameter is deprecated, because memory order
1022+
is a runtime parameter for Zarr format 3 arrays. The recommended way to specify the memory
1023+
order for Zarr format 3 arrays is via the ``config`` parameter, e.g. ``{'config': 'C'}``.
1024+
If not specified, defaults to the memory order of the data array.
1025+
zarr_format : {2, 3}, optional
1026+
The zarr format to use when saving.
1027+
If not specified, defaults to the zarr format of the data array.
1028+
attributes : dict, optional
1029+
Attributes for the array.
1030+
If not specified, defaults to the attributes of the data array.
1031+
chunk_key_encoding : ChunkKeyEncoding, optional
1032+
A specification of how the chunk keys are represented in storage.
1033+
For Zarr format 3, the default is ``{"name": "default", "separator": "/"}}``.
1034+
For Zarr format 2, the default is ``{"name": "v2", "separator": "."}}``.
1035+
If not specified and the data array has the same zarr format as the target array,
1036+
the chunk key encoding of the data array is used.
1037+
dimension_names : Iterable[str], optional
1038+
The names of the dimensions (default is None).
1039+
Zarr format 3 only. Zarr format 2 arrays should not use this parameter.
1040+
If not specified, defaults to the dimension names of the data array.
1041+
storage_options : dict, optional
1042+
If using an fsspec URL to create the store, these will be passed to the backend implementation.
1043+
Ignored otherwise.
1044+
overwrite : bool, default False
1045+
Whether to overwrite an array with the same name in the store, if one exists.
1046+
config : ArrayConfig or ArrayConfigLike, optional
1047+
Runtime configuration for the array.
1048+
1049+
Returns
1050+
-------
1051+
Array
1052+
The array.
1053+
1054+
Examples
1055+
--------
1056+
Create an array from an existing Array::
1057+
1058+
>>> import zarr
1059+
>>> store = zarr.storage.MemoryStore()
1060+
>>> store2 = zarr.storage.LocalStore('example.zarr')
1061+
>>> arr = zarr.create_array(
1062+
>>> store=store,
1063+
>>> shape=(100,100),
1064+
>>> chunks=(10,10),
1065+
>>> dtype='int32',
1066+
>>> fill_value=0)
1067+
>>> arr2 = zarr.from_array(store2, data=arr)
1068+
<Array file://example.zarr shape=(100, 100) dtype=int32>
1069+
1070+
Create an array from an existing NumPy array::
1071+
1072+
>>> import numpy as np
1073+
>>> arr3 = zarr.from_array(
1074+
zarr.storage.MemoryStore(),
1075+
>>> data=np.arange(10000, dtype='i4').reshape(100, 100),
1076+
>>> )
1077+
<Array memory://125477403529984 shape=(100, 100) dtype=int32>
1078+
1079+
Create an array from any array-like object::
1080+
1081+
>>> arr4 = zarr.from_array(
1082+
>>> zarr.storage.MemoryStore(),
1083+
>>> data=[[1, 2], [3, 4]],
1084+
>>> )
1085+
<Array memory://125477392154368 shape=(2, 2) dtype=int64>
1086+
>>> arr4[...]
1087+
array([[1, 2],[3, 4]])
1088+
1089+
Create an array from an existing Array without copying the data::
1090+
1091+
>>> arr5 = zarr.from_array(
1092+
>>> zarr.storage.MemoryStore(),
1093+
>>> data=arr4,
1094+
>>> write_data=False,
1095+
>>> )
1096+
<Array memory://140678602965568 shape=(2, 2) dtype=int64>
1097+
>>> arr5[...]
1098+
array([[0, 0],[0, 0]])
1099+
"""
1100+
return Array(
1101+
sync(
1102+
zarr.core.array.from_array(
1103+
store,
1104+
data=data,
1105+
write_data=write_data,
1106+
name=name,
1107+
chunks=chunks,
1108+
shards=shards,
1109+
filters=filters,
1110+
compressors=compressors,
1111+
serializer=serializer,
1112+
fill_value=fill_value,
1113+
order=order,
1114+
zarr_format=zarr_format,
1115+
attributes=attributes,
1116+
chunk_key_encoding=chunk_key_encoding,
1117+
dimension_names=dimension_names,
1118+
storage_options=storage_options,
1119+
overwrite=overwrite,
1120+
config=config,
9001121
)
9011122
)
9021123
)

0 commit comments

Comments
0 (0)
Morty Proxy This is a proxified and sanitized view of the page, visit original site.