Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

Commit 581e5d7

Browse filesBrowse files
committed
feat: Upgrade to Crawlee v0.5
1 parent ccba8d1 commit 581e5d7
Copy full SHA for 581e5d7
Expand file treeCollapse file tree

22 files changed

+427
-197
lines changed

‎poetry.lock

Copy file name to clipboardExpand all lines: poetry.lock
+150-38Lines changed: 150 additions & 38 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

‎pyproject.toml

Copy file name to clipboardExpand all lines: pyproject.toml
+1-1Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ keywords = [
4545
python = "^3.9"
4646
apify-client = ">=1.8.1"
4747
apify-shared = ">=1.2.1"
48-
crawlee = "~0.4.0"
48+
crawlee = "~0.5.0"
4949
cryptography = ">=42.0.0"
5050
httpx = ">=0.27.0"
5151
lazy-object-proxy = ">=1.10.0"

‎src/apify/_actor.py

Copy file name to clipboardExpand all lines: src/apify/_actor.py
+36-26Lines changed: 36 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -13,8 +13,9 @@
1313
from apify_client import ApifyClientAsync
1414
from apify_shared.consts import ActorEnvVars, ActorExitCodes, ApifyEnvVars
1515
from apify_shared.utils import ignore_docs, maybe_extract_enum_member_value
16-
from crawlee import service_container
16+
from crawlee import service_locator
1717
from crawlee.events._types import Event, EventMigratingData, EventPersistStateData
18+
from crawlee.storage_clients import MemoryStorageClient
1819

1920
from apify._configuration import Configuration
2021
from apify._consts import EVENT_LISTENERS_TIMEOUT
@@ -71,17 +72,22 @@ def __init__(
7172
self._configure_logging = configure_logging
7273
self._apify_client = self.new_client()
7374

74-
self._event_manager: EventManager
75-
if self._configuration.is_at_home:
76-
self._event_manager = PlatformEventManager(
77-
config=self._configuration,
78-
persist_state_interval=self._configuration.persist_state_interval,
75+
# We need to keep both local & cloud storage clients because of the `force_cloud` option.
76+
self._local_storage_client = MemoryStorageClient.from_config(config=self.config)
77+
self._cloud_storage_client = ApifyStorageClient.from_config(config=self.config)
78+
79+
# Set the event manager based on whether the Actor is running on the platform or locally.
80+
self._event_manager = (
81+
PlatformEventManager(
82+
config=self.config,
83+
persist_state_interval=self.config.persist_state_interval,
7984
)
80-
else:
81-
self._event_manager = LocalEventManager(
82-
system_info_interval=self._configuration.system_info_interval,
83-
persist_state_interval=self._configuration.persist_state_interval,
85+
if self.is_at_home()
86+
else LocalEventManager(
87+
system_info_interval=self.config.system_info_interval,
88+
persist_state_interval=self.config.persist_state_interval,
8489
)
90+
)
8591

8692
self._is_initialized = False
8793

@@ -94,9 +100,6 @@ async def __aenter__(self) -> Self:
94100
When you exit the `async with` block, the `Actor.exit()` method is called, and if any exception happens while
95101
executing the block code, the `Actor.fail` method is called.
96102
"""
97-
if self._configure_logging:
98-
_configure_logging(self._configuration)
99-
100103
await self.init()
101104
return self
102105

@@ -184,18 +187,21 @@ async def init(self) -> None:
184187
if self._is_initialized:
185188
raise RuntimeError('The Actor was already initialized!')
186189

187-
if self._configuration.token:
188-
service_container.set_cloud_storage_client(ApifyStorageClient(configuration=self._configuration))
190+
self._is_exiting = False
191+
self._was_final_persist_state_emitted = False
189192

190-
if self._configuration.is_at_home:
191-
service_container.set_default_storage_client_type('cloud')
193+
# Register services in the service locator.
194+
if self.is_at_home():
195+
service_locator.set_storage_client(self._cloud_storage_client)
192196
else:
193-
service_container.set_default_storage_client_type('local')
197+
service_locator.set_storage_client(self._local_storage_client)
194198

195-
service_container.set_event_manager(self._event_manager)
199+
service_locator.set_event_manager(self.event_manager)
200+
service_locator.set_configuration(self.configuration)
196201

197-
self._is_exiting = False
198-
self._was_final_persist_state_emitted = False
202+
# The logging configuration has to be called after all service_locator set methods.
203+
if self._configure_logging:
204+
_configure_logging()
199205

200206
self.log.info('Initializing Actor...')
201207
self.log.info('System info', extra=get_system_info())
@@ -245,7 +251,6 @@ async def finalize() -> None:
245251
await self._event_manager.wait_for_all_listeners_to_complete(timeout=event_listeners_timeout)
246252

247253
await self._event_manager.__aexit__(None, None, None)
248-
cast(dict, service_container._services).clear() # noqa: SLF001
249254

250255
await asyncio.wait_for(finalize(), cleanup_timeout.total_seconds())
251256
self._is_initialized = False
@@ -349,11 +354,13 @@ async def open_dataset(
349354
self._raise_if_not_initialized()
350355
self._raise_if_cloud_requested_but_not_configured(force_cloud=force_cloud)
351356

357+
storage_client = self._cloud_storage_client if force_cloud else service_locator.get_storage_client()
358+
352359
return await Dataset.open(
353360
id=id,
354361
name=name,
355362
configuration=self._configuration,
356-
storage_client=service_container.get_storage_client(client_type='cloud' if force_cloud else None),
363+
storage_client=storage_client,
357364
)
358365

359366
async def open_key_value_store(
@@ -381,12 +388,13 @@ async def open_key_value_store(
381388
"""
382389
self._raise_if_not_initialized()
383390
self._raise_if_cloud_requested_but_not_configured(force_cloud=force_cloud)
391+
storage_client = self._cloud_storage_client if force_cloud else service_locator.get_storage_client()
384392

385393
return await KeyValueStore.open(
386394
id=id,
387395
name=name,
388396
configuration=self._configuration,
389-
storage_client=service_container.get_storage_client(client_type='cloud' if force_cloud else None),
397+
storage_client=storage_client,
390398
)
391399

392400
async def open_request_queue(
@@ -417,11 +425,13 @@ async def open_request_queue(
417425
self._raise_if_not_initialized()
418426
self._raise_if_cloud_requested_but_not_configured(force_cloud=force_cloud)
419427

428+
storage_client = self._cloud_storage_client if force_cloud else service_locator.get_storage_client()
429+
420430
return await RequestQueue.open(
421431
id=id,
422432
name=name,
423433
configuration=self._configuration,
424-
storage_client=service_container.get_storage_client(client_type='cloud' if force_cloud else None),
434+
storage_client=storage_client,
425435
)
426436

427437
async def push_data(self, data: dict | list[dict]) -> None:
@@ -963,7 +973,7 @@ async def create_proxy_configuration(
963973
password: str | None = None,
964974
groups: list[str] | None = None,
965975
country_code: str | None = None,
966-
proxy_urls: list[str] | None = None,
976+
proxy_urls: list[str | None] | None = None,
967977
new_url_function: _NewUrlFunction | None = None,
968978
) -> ProxyConfiguration | None:
969979
"""Create a ProxyConfiguration object with the passed proxy configuration.

‎src/apify/_configuration.py

Copy file name to clipboardExpand all lines: src/apify/_configuration.py
+12Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
from __future__ import annotations
22

33
from datetime import datetime, timedelta
4+
from logging import getLogger
45
from typing import Annotated, Any
56

67
from pydantic import AliasChoices, BeforeValidator, Field
@@ -12,6 +13,8 @@
1213

1314
from apify._utils import docs_group
1415

16+
logger = getLogger(__name__)
17+
1518

1619
def _transform_to_list(value: Any) -> list[str] | None:
1720
if value is None:
@@ -353,6 +356,15 @@ class Configuration(CrawleeConfiguration):
353356
),
354357
] = None
355358

359+
@classmethod
360+
def get_global_configuration(cls) -> Configuration:
361+
"""Retrieve the global instance of the configuration.
362+
363+
Mostly for the backwards compatibility. It is recommended to use the `service_locator.get_configuration()`
364+
instead.
365+
"""
366+
return cls()
367+
356368

357369
# Monkey-patch the base class so that it works with the extended configuration
358370
CrawleeConfiguration.get_global_configuration = Configuration.get_global_configuration # type: ignore[method-assign]

‎src/apify/_proxy_configuration.py

Copy file name to clipboardExpand all lines: src/apify/_proxy_configuration.py
+3-3Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -111,9 +111,9 @@ def __init__(
111111
password: str | None = None,
112112
groups: list[str] | None = None,
113113
country_code: str | None = None,
114-
proxy_urls: list[str] | None = None,
114+
proxy_urls: list[str | None] | None = None,
115115
new_url_function: _NewUrlFunction | None = None,
116-
tiered_proxy_urls: list[list[str]] | None = None,
116+
tiered_proxy_urls: list[list[str | None]] | None = None,
117117
_actor_config: Configuration | None = None,
118118
_apify_client: ApifyClientAsync | None = None,
119119
) -> None:
@@ -148,7 +148,7 @@ def __init__(
148148
' "groups" or "country_code".'
149149
)
150150

151-
if proxy_urls and any('apify.com' in url for url in proxy_urls):
151+
if proxy_urls and any('apify.com' in (url or '') for url in proxy_urls):
152152
logger.warning(
153153
'Some Apify proxy features may work incorrectly. Please consider setting up Apify properties '
154154
'instead of `proxy_urls`.\n'

‎src/apify/apify_storage_client/_apify_storage_client.py

Copy file name to clipboardExpand all lines: src/apify/apify_storage_client/_apify_storage_client.py
+12-2Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,13 @@
1+
from __future__ import annotations
2+
3+
from typing import TYPE_CHECKING
4+
15
from typing_extensions import override
26

37
from apify_client import ApifyClientAsync
48
from crawlee._utils.crypto import crypto_random_object_id
5-
from crawlee.base_storage_client import BaseStorageClient
9+
from crawlee.storage_clients import BaseStorageClient
610

7-
from apify._configuration import Configuration
811
from apify._utils import docs_group
912
from apify.apify_storage_client._dataset_client import DatasetClient
1013
from apify.apify_storage_client._dataset_collection_client import DatasetCollectionClient
@@ -13,6 +16,9 @@
1316
from apify.apify_storage_client._request_queue_client import RequestQueueClient
1417
from apify.apify_storage_client._request_queue_collection_client import RequestQueueCollectionClient
1518

19+
if TYPE_CHECKING:
20+
from apify._configuration import Configuration
21+
1622

1723
@docs_group('Classes')
1824
class ApifyStorageClient(BaseStorageClient):
@@ -29,6 +35,10 @@ def __init__(self, *, configuration: Configuration) -> None:
2935
)
3036
self._configuration = configuration
3137

38+
@classmethod
39+
def from_config(cls, config: Configuration) -> ApifyStorageClient:
40+
return cls(configuration=config)
41+
3242
@override
3343
def dataset(self, id: str) -> DatasetClient:
3444
return DatasetClient(self._apify_client.dataset(id))

‎src/apify/apify_storage_client/_dataset_client.py

Copy file name to clipboardExpand all lines: src/apify/apify_storage_client/_dataset_client.py
+2-1Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,8 @@
44

55
from typing_extensions import override
66

7-
from crawlee.base_storage_client import BaseDatasetClient, DatasetItemsListPage, DatasetMetadata
7+
from crawlee.storage_clients._base import BaseDatasetClient
8+
from crawlee.storage_clients.models import DatasetItemsListPage, DatasetMetadata
89

910
if TYPE_CHECKING:
1011
from collections.abc import AsyncIterator

‎src/apify/apify_storage_client/_dataset_collection_client.py

Copy file name to clipboardExpand all lines: src/apify/apify_storage_client/_dataset_collection_client.py
+2-1Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,8 @@
44

55
from typing_extensions import override
66

7-
from crawlee.base_storage_client import BaseDatasetCollectionClient, DatasetListPage, DatasetMetadata
7+
from crawlee.storage_clients._base import BaseDatasetCollectionClient
8+
from crawlee.storage_clients.models import DatasetListPage, DatasetMetadata
89

910
if TYPE_CHECKING:
1011
from apify_client.clients import DatasetCollectionClientAsync

‎src/apify/apify_storage_client/_key_value_store_client.py

Copy file name to clipboardExpand all lines: src/apify/apify_storage_client/_key_value_store_client.py
+2-6Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -5,12 +5,8 @@
55

66
from typing_extensions import override
77

8-
from crawlee.base_storage_client import (
9-
BaseKeyValueStoreClient,
10-
KeyValueStoreListKeysPage,
11-
KeyValueStoreMetadata,
12-
KeyValueStoreRecord,
13-
)
8+
from crawlee.storage_clients._base import BaseKeyValueStoreClient
9+
from crawlee.storage_clients.models import KeyValueStoreListKeysPage, KeyValueStoreMetadata, KeyValueStoreRecord
1410

1511
if TYPE_CHECKING:
1612
from collections.abc import AsyncIterator

‎src/apify/apify_storage_client/_key_value_store_collection_client.py

Copy file name to clipboardExpand all lines: src/apify/apify_storage_client/_key_value_store_collection_client.py
+2-1Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,8 @@
44

55
from typing_extensions import override
66

7-
from crawlee.base_storage_client import BaseKeyValueStoreCollectionClient, KeyValueStoreListPage, KeyValueStoreMetadata
7+
from crawlee.storage_clients._base import BaseKeyValueStoreCollectionClient
8+
from crawlee.storage_clients.models import KeyValueStoreListPage, KeyValueStoreMetadata
89

910
if TYPE_CHECKING:
1011
from apify_client.clients import KeyValueStoreCollectionClientAsync

‎src/apify/apify_storage_client/_request_queue_client.py

Copy file name to clipboardExpand all lines: src/apify/apify_storage_client/_request_queue_client.py
+2-2Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,8 @@
55
from typing_extensions import override
66

77
from crawlee import Request
8-
from crawlee.base_storage_client import (
9-
BaseRequestQueueClient,
8+
from crawlee.storage_clients._base import BaseRequestQueueClient
9+
from crawlee.storage_clients.models import (
1010
BatchRequestsOperationResponse,
1111
ProcessedRequest,
1212
ProlongRequestLockResponse,

‎src/apify/apify_storage_client/_request_queue_collection_client.py

Copy file name to clipboardExpand all lines: src/apify/apify_storage_client/_request_queue_collection_client.py
+2-1Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,8 @@
44

55
from typing_extensions import override
66

7-
from crawlee.base_storage_client import BaseRequestQueueCollectionClient, RequestQueueListPage, RequestQueueMetadata
7+
from crawlee.storage_clients._base import BaseRequestQueueCollectionClient
8+
from crawlee.storage_clients.models import RequestQueueListPage, RequestQueueMetadata
89

910
if TYPE_CHECKING:
1011
from apify_client.clients import RequestQueueCollectionClientAsync

‎src/apify/log.py

Copy file name to clipboardExpand all lines: src/apify/log.py
+4-8Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,10 @@
11
from __future__ import annotations
22

33
import logging
4-
from typing import TYPE_CHECKING
54

65
from apify_shared.utils import ignore_docs
76
from crawlee._log_config import CrawleeLogFormatter, configure_logger, get_configured_log_level
87

9-
if TYPE_CHECKING:
10-
from apify import Configuration
11-
128
# Name of the logger used throughout the library (resolves to 'apify')
139
logger_name = __name__.split('.')[0]
1410

@@ -21,11 +17,11 @@ class ActorLogFormatter(CrawleeLogFormatter): # noqa: D101 (Inherited from pare
2117
pass
2218

2319

24-
def _configure_logging(configuration: Configuration) -> None:
20+
def _configure_logging() -> None:
2521
apify_client_logger = logging.getLogger('apify_client')
26-
configure_logger(apify_client_logger, configuration, remove_old_handlers=True)
22+
configure_logger(apify_client_logger, remove_old_handlers=True)
2723

28-
level = get_configured_log_level(configuration)
24+
level = get_configured_log_level()
2925

3026
# Keep apify_client logger quiet unless debug logging is requested
3127
if level > logging.DEBUG:
@@ -42,4 +38,4 @@ def _configure_logging(configuration: Configuration) -> None:
4238

4339
# Use configured log level for apify logger
4440
apify_logger = logging.getLogger('apify')
45-
configure_logger(apify_logger, configuration, remove_old_handlers=True)
41+
configure_logger(apify_logger, remove_old_handlers=True)

‎src/apify/storages/_request_list.py

Copy file name to clipboardExpand all lines: src/apify/storages/_request_list.py
+1-1Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
from crawlee import Request
1212
from crawlee._types import HttpMethod
1313
from crawlee.http_clients import BaseHttpClient, HttpxHttpClient
14-
from crawlee.storages import RequestList as CrawleeRequestList
14+
from crawlee.request_loaders import RequestList as CrawleeRequestList
1515

1616
from apify._utils import docs_group
1717

0 commit comments

Comments
0 (0)
Morty Proxy This is a proxified and sanitized view of the page, visit original site.