kd.data.py.HuggingFace

kd.data.py.HuggingFace#

class kauldron.data.py.HuggingFace(
path: str,
config: str | None = None,
*,
_fake_refs: type[_FakeRefsUnset] | dict[str,
_FakeRootCfg] = <class 'kauldron.utils.config_util._FakeRefsUnset'>,
batch_size: int | None = None,
seed: int | typing.Sequence[int] | numpy.ndarray | jaxtyping.UInt32[Array,
'2'] | jaxtyping.UInt32[ndarray,
'2'] | jax.Array | None = _FakeRootCfg('cfg.seed'),
transforms: tr_normalize.Transformations = <factory>,
num_epochs: Optional[int] = None,
batch_drop_remainder: bool = True,
num_workers: int = 16,
read_options: grain.ReadOptions | None = None,
enable_profiling: bool = False,
per_worker_buffer_size: int = 1,
shuffle: bool,
split: str,
data_dir: epath.PathLike | None = None,
cache_dir: epath.PathLike | None = None,
)[source]

Bases: kauldron.data.py.base.DataSourceBase

HuggingFace loader.

path: str
config: str | None = None
split: str
data_dir: epath.PathLike | None = None
cache_dir: epath.PathLike | None = None
property data_source: grain._src.python.data_sources.RandomAccessDataSource
Morty Proxy This is a proxified and sanitized view of the page, visit original site.