From 9a0aa157ce436ed544922591342d8f6773f75c53 Mon Sep 17 00:00:00 2001 From: Josef Prochazka Date: Wed, 18 Jun 2025 10:34:27 +0200 Subject: [PATCH] Working tests on explicit black magic implementation of Actor --- src/apify/_actor.py | 54 +++++++++++++++++-- tests/unit/actor/test_actor_dataset.py | 2 +- .../unit/actor/test_actor_key_value_store.py | 2 +- tests/unit/actor/test_actor_lifecycle.py | 19 +++---- tests/unit/actor/test_actor_request_queue.py | 2 +- tests/unit/conftest.py | 5 +- .../pipelines/test_actor_dataset_push.py | 21 ++++---- uv.lock | 2 +- 8 files changed, 75 insertions(+), 32 deletions(-) diff --git a/src/apify/_actor.py b/src/apify/_actor.py index b8fcdc05..bc8c9d02 100644 --- a/src/apify/_actor.py +++ b/src/apify/_actor.py @@ -141,7 +141,7 @@ async def __aexit__( await self.exit() def __repr__(self) -> str: - if self is cast('Proxy', Actor).__wrapped__: + if self is cast('Proxy', Actor)._singleton_actor: return '' return super().__repr__() @@ -221,8 +221,6 @@ async def init(self) -> None: if _ActorType._is_any_instance_initialized: self.log.warning('Repeated Actor initialization detected - this is non-standard usage, proceed with care') - # Make sure that the currently initialized instance is also available through the global `Actor` proxy - cast('Proxy', Actor).__wrapped__ = self self._is_exiting = False self._was_final_persist_state_emitted = False @@ -1190,5 +1188,53 @@ def _get_default_exit_process(self) -> bool: return True -Actor = cast('_ActorType', Proxy(_ActorType)) +class _Actor: + """This is not a change proposal, just an exposed explicit form of Actor black magic""" + + def __init__(self): + self._singleton_actor : _ActorType | None = None + + def __call__(self, + configuration: Configuration | None = None, + *, + configure_logging: bool = True, + exit_process: bool | None = None, + ) -> None: + """Fake call, but actually an init due to Actor being class and somehow its onw instance as well...""" + self._singleton_actor = _ActorType( + configuration=configuration, + configure_logging=configure_logging, + exit_process=exit_process, + ) + return self._singleton_actor + + + async def __aenter__(self) -> _ActorType: + """If called before init, take default Actor""" + if self._singleton_actor is _ActorType: + # Default init in context manager if init not called yet + self._singleton_actor = self._singleton_actor() + return await self._singleton_actor.__aenter__() + + async def __aexit__(self, exc_type, exc_val, exc_tb) -> None: + await self._singleton_actor.__aexit__(exc_type, exc_val, exc_tb) + + def __getattribute__ (self, item) -> Any: + # These are the only attributes that are not being automatically called from the self._singleton_actor + wrapper_attributes = {'__init__', '__call__', '__aenter__', '__aexit__', '_singleton_actor'} + + if item not in wrapper_attributes: + if item in {'init', 'log'}: + # Another form of init, for some methods that are allowed to be called even on uninitialized Actor + if self._singleton_actor is _ActorType: + self._singleton_actor = self._singleton_actor() + return getattr(self._singleton_actor, item) + if item == '_singleton_actor' and not object.__getattribute__(self, item): + return _ActorType + + return object.__getattribute__(self, item) + + + +Actor = cast('_ActorType', _Actor()) """The entry point of the SDK, through which all the Actor operations should be done.""" diff --git a/tests/unit/actor/test_actor_dataset.py b/tests/unit/actor/test_actor_dataset.py index ef6282bb..c6fad91d 100644 --- a/tests/unit/actor/test_actor_dataset.py +++ b/tests/unit/actor/test_actor_dataset.py @@ -16,7 +16,7 @@ async def test_throws_error_without_actor_init() -> None: - with pytest.raises(RuntimeError): + with pytest.raises(TypeError): await Actor.open_dataset() diff --git a/tests/unit/actor/test_actor_key_value_store.py b/tests/unit/actor/test_actor_key_value_store.py index 821065e1..a3516a81 100644 --- a/tests/unit/actor/test_actor_key_value_store.py +++ b/tests/unit/actor/test_actor_key_value_store.py @@ -36,7 +36,7 @@ async def test_open_returns_same_references() -> None: async def test_open_throws_without_init() -> None: - with pytest.raises(RuntimeError): + with pytest.raises(TypeError): await Actor.open_key_value_store() diff --git a/tests/unit/actor/test_actor_lifecycle.py b/tests/unit/actor/test_actor_lifecycle.py index 75e696a9..04884ec6 100644 --- a/tests/unit/actor/test_actor_lifecycle.py +++ b/tests/unit/actor/test_actor_lifecycle.py @@ -5,7 +5,7 @@ import json import sys from datetime import datetime, timezone -from typing import TYPE_CHECKING, Any, Callable, cast +from typing import Any, Callable from unittest.mock import AsyncMock, Mock import pytest @@ -14,19 +14,16 @@ from apify_shared.consts import ActorEnvVars, ApifyEnvVars from crawlee.events._types import Event, EventPersistStateData -import apify._actor from apify import Actor from apify._actor import _ActorType -if TYPE_CHECKING: - from lazy_object_proxy import Proxy - async def test_actor_properly_init_with_async() -> None: + assert Actor._singleton_actor is _ActorType async with Actor: - assert cast('Proxy', apify._actor.Actor).__wrapped__ is not None - assert cast('Proxy', apify._actor.Actor).__wrapped__._is_initialized - assert not cast('Proxy', apify._actor.Actor).__wrapped__._is_initialized + assert isinstance(Actor._singleton_actor,_ActorType) + assert Actor._is_initialized + assert not Actor._is_initialized async def test_actor_init() -> None: @@ -94,7 +91,7 @@ def on_event(event_type: Event) -> Callable: async def test_exit_without_init_raises_error() -> None: - with pytest.raises(RuntimeError): + with pytest.raises(TypeError): await Actor.exit() @@ -118,12 +115,12 @@ async def test_actor_handles_failure_gracefully() -> None: async def test_fail_without_init_raises_error() -> None: - with pytest.raises(RuntimeError): + with pytest.raises(TypeError): await Actor.fail() async def test_actor_reboot_fails_locally() -> None: - with pytest.raises(RuntimeError): + with pytest.raises(TypeError): await Actor.reboot() diff --git a/tests/unit/actor/test_actor_request_queue.py b/tests/unit/actor/test_actor_request_queue.py index 5504715f..edf9c35f 100644 --- a/tests/unit/actor/test_actor_request_queue.py +++ b/tests/unit/actor/test_actor_request_queue.py @@ -8,7 +8,7 @@ async def test_open_throws_without_init() -> None: - with pytest.raises(RuntimeError): + with pytest.raises(TypeError): await Actor.open_request_queue() diff --git a/tests/unit/conftest.py b/tests/unit/conftest.py index 6f336cd6..18d37c87 100644 --- a/tests/unit/conftest.py +++ b/tests/unit/conftest.py @@ -16,7 +16,7 @@ from crawlee.storage_clients import MemoryStorageClient from crawlee.storages import _creation_management -import apify._actor +import apify if TYPE_CHECKING: from pathlib import Path @@ -38,8 +38,7 @@ def prepare_test_env(monkeypatch: pytest.MonkeyPatch, tmp_path: Path) -> Callabl """ def _prepare_test_env() -> None: - delattr(apify._actor.Actor, '__wrapped__') - apify._actor._ActorType._is_any_instance_initialized = False + apify.Actor._singleton_actor = None # Set the environment variable for the local storage directory to the temporary path. monkeypatch.setenv(ApifyEnvVars.LOCAL_STORAGE_DIR, str(tmp_path)) diff --git a/tests/unit/scrapy/pipelines/test_actor_dataset_push.py b/tests/unit/scrapy/pipelines/test_actor_dataset_push.py index 832ea910..647eca1f 100644 --- a/tests/unit/scrapy/pipelines/test_actor_dataset_push.py +++ b/tests/unit/scrapy/pipelines/test_actor_dataset_push.py @@ -72,16 +72,17 @@ async def test_process_item( ) -> None: dataset = [] - async def mock_push_data(item: dict) -> None: - dataset.append(item) + async with Actor: + async def mock_push_data(item: dict) -> None: + dataset.append(item) - monkeypatch.setattr(Actor, 'push_data', mock_push_data) + monkeypatch.setattr(Actor._singleton_actor, 'push_data', mock_push_data) - if tc.expected_exception: - with pytest.raises(tc.expected_exception): - await pipeline.process_item(tc.item, spider) + if tc.expected_exception: + with pytest.raises(tc.expected_exception): + await pipeline.process_item(tc.item, spider) - else: - output = await pipeline.process_item(tc.item, spider) - assert output == tc.item - assert dataset == [tc.item_dict] + else: + output = await pipeline.process_item(tc.item, spider) + assert output == tc.item + assert dataset == [tc.item_dict] diff --git a/uv.lock b/uv.lock index 7f8a01cd..52114db9 100644 --- a/uv.lock +++ b/uv.lock @@ -32,7 +32,7 @@ wheels = [ [[package]] name = "apify" -version = "2.6.0" +version = "2.6.1" source = { editable = "." } dependencies = [ { name = "apify-client" },