1
1
import asyncio
2
- import functools
3
2
import inspect
4
3
from datetime import datetime
5
4
from types import TracebackType
6
5
from typing import Any , Awaitable , Callable , Coroutine , Dict , List , Optional , Type , TypeVar , Union , cast
7
6
8
7
from apify_client import ApifyClientAsync
9
- from apify_client .clients import DatasetClientAsync , KeyValueStoreClientAsync , RequestQueueClientAsync
10
8
from apify_client .consts import WebhookEventType
11
9
12
10
from ._utils import (
15
13
_get_memory_usage_bytes ,
16
14
_log_system_info ,
17
15
_run_func_at_interval_async ,
16
+ _wrap_internal ,
18
17
dualproperty ,
19
18
)
20
19
from .config import Configuration
21
20
from .consts import ActorEventType , ApifyEnvVars
22
21
from .event_manager import EventManager
22
+ from .memory_storage import MemoryStorage
23
23
from .proxy_configuration import ProxyConfiguration
24
+ from .storage_client_manager import StorageClientManager
25
+ from .storages import Dataset , KeyValueStore , RequestQueue , StorageManager
24
26
25
27
MainReturnType = TypeVar ('MainReturnType' )
26
28
27
- T = TypeVar ('T' , bound = Callable )
28
- U = TypeVar ('U' , bound = Callable )
29
-
30
-
31
- def _wrap_internal (implementation : T , metadata_source : U ) -> U :
32
- @functools .wraps (metadata_source )
33
- def wrapper (* args : Any , ** kwargs : Any ) -> Any :
34
- return implementation (* args , ** kwargs )
35
-
36
- return cast (U , wrapper )
37
-
38
-
39
29
# This metaclass is needed so you can do `with Actor: ...` instead of `with Actor() as a: ...`
40
30
# and have automatic `Actor.init()` and `Actor.exit()`
41
31
# TODO: decide if this mumbo jumbo is worth it or not, or if it maybe breaks something
32
+
33
+
42
34
class _ActorContextManager (type ):
43
35
@staticmethod
44
36
async def __aenter__ () -> Type ['Actor' ]:
@@ -67,6 +59,7 @@ class Actor(metaclass=_ActorContextManager):
67
59
68
60
_default_instance : Optional ['Actor' ] = None
69
61
_apify_client : ApifyClientAsync
62
+ _memory_storage : MemoryStorage
70
63
_config : Configuration
71
64
_event_manager : EventManager
72
65
_send_system_info_interval_task : Optional [asyncio .Task ] = None
@@ -195,7 +188,9 @@ async def _init_internal(self) -> None:
195
188
),
196
189
)
197
190
198
- if not self .is_at_home ():
191
+ if self .is_at_home ():
192
+ StorageClientManager .set_storage_client (self ._apify_client )
193
+ else :
199
194
self ._send_system_info_interval_task = asyncio .create_task (
200
195
_run_func_at_interval_async (
201
196
lambda : self ._event_manager .emit (ActorEventType .SYSTEM_INFO , self ._get_system_info ()),
@@ -340,89 +335,60 @@ def _new_client_internal(
340
335
timeout_secs = timeout_secs ,
341
336
)
342
337
343
- # TODO: create proper Dataset, KeyValueStore and RequestQueue class
338
+ def _get_storage_client (self , force_cloud : bool ) -> Optional [ApifyClientAsync ]:
339
+ return self ._apify_client if force_cloud else None
340
+
344
341
@classmethod
345
- async def open_dataset (cls , dataset_id_or_name : Optional [str ] = None ) -> DatasetClientAsync :
342
+ async def open_dataset (cls , dataset_id_or_name : Optional [str ] = None , * , force_cloud : bool = False ) -> Dataset :
346
343
"""TODO: docs."""
347
- return await cls ._get_default_instance ().open_dataset (dataset_id_or_name = dataset_id_or_name )
348
-
349
- async def _open_dataset_internal (self , dataset_id_or_name : Optional [str ] = None ) -> DatasetClientAsync :
350
- # TODO: this should return a Dataset class rather than the raw client
344
+ return await cls ._get_default_instance ().open_dataset (dataset_id_or_name = dataset_id_or_name , force_cloud = force_cloud )
351
345
346
+ async def _open_dataset_internal (self , dataset_id_or_name : Optional [str ] = None , * , force_cloud : bool = False ) -> Dataset :
352
347
self ._raise_if_not_initialized ()
353
348
354
- if not dataset_id_or_name :
355
- dataset_id_or_name = self ._config .default_dataset_id
356
-
357
- dataset_client = self ._apify_client .dataset (dataset_id_or_name )
358
-
359
- if await dataset_client .get ():
360
- return dataset_client
361
-
362
- else :
363
- dataset = await self ._apify_client .datasets ().get_or_create (name = dataset_id_or_name )
364
- return self ._apify_client .dataset (dataset ['id' ])
349
+ return await StorageManager .open_storage (Dataset , dataset_id_or_name , self ._get_storage_client (force_cloud ), self ._config )
365
350
366
351
@classmethod
367
- async def open_key_value_store (cls , key_value_store_id_or_name : Optional [str ] = None ) -> KeyValueStoreClientAsync :
352
+ async def open_key_value_store (cls , key_value_store_id_or_name : Optional [str ] = None , * , force_cloud : bool = False ) -> KeyValueStore :
368
353
"""TODO: docs."""
369
- return await cls ._get_default_instance ().open_key_value_store (key_value_store_id_or_name = key_value_store_id_or_name )
370
-
371
- async def _open_key_value_store_internal (self , key_value_store_id_or_name : Optional [str ] = None ) -> KeyValueStoreClientAsync :
372
- # TODO: this should return a KeyValueStore class rather than the raw client
354
+ return await cls ._get_default_instance ().open_key_value_store (key_value_store_id_or_name = key_value_store_id_or_name , force_cloud = force_cloud )
373
355
356
+ async def _open_key_value_store_internal (self , key_value_store_id_or_name : Optional [str ] = None , * , force_cloud : bool = False ) -> KeyValueStore :
374
357
self ._raise_if_not_initialized ()
375
358
376
- if not key_value_store_id_or_name :
377
- key_value_store_id_or_name = self ._config .default_key_value_store_id
378
-
379
- store_client = self ._apify_client .key_value_store (key_value_store_id_or_name )
380
-
381
- if await store_client .get ():
382
- return store_client
383
-
384
- else :
385
- key_value_store = await self ._apify_client .key_value_stores ().get_or_create (name = key_value_store_id_or_name )
386
- return self ._apify_client .key_value_store (key_value_store ['id' ])
359
+ return await StorageManager .open_storage (KeyValueStore , key_value_store_id_or_name , self ._get_storage_client (force_cloud ), self ._config )
387
360
388
361
@classmethod
389
- async def open_request_queue (cls , request_queue_id_or_name : Optional [str ] = None ) -> RequestQueueClientAsync :
362
+ async def open_request_queue (cls , request_queue_id_or_name : Optional [str ] = None , * , force_cloud : bool = False ) -> RequestQueue :
390
363
"""TODO: docs."""
391
- return await cls ._get_default_instance ().open_request_queue (request_queue_id_or_name = request_queue_id_or_name )
392
-
393
- async def _open_request_queue_internal (self , request_queue_id_or_name : Optional [str ] = None ) -> RequestQueueClientAsync :
394
- # TODO: this should return a RequestQueue class rather than the raw client
364
+ return await cls ._get_default_instance ().open_request_queue (request_queue_id_or_name = request_queue_id_or_name , force_cloud = force_cloud )
395
365
366
+ async def _open_request_queue_internal (
367
+ self ,
368
+ request_queue_id_or_name : Optional [str ] = None ,
369
+ * ,
370
+ force_cloud : bool = False ,
371
+ ) -> RequestQueue :
396
372
self ._raise_if_not_initialized ()
397
373
398
- if not request_queue_id_or_name :
399
- request_queue_id_or_name = self ._config .default_request_queue_id
400
-
401
- queue_client = self ._apify_client .request_queue (request_queue_id_or_name )
402
-
403
- if await queue_client .get ():
404
- return queue_client
405
-
406
- else :
407
- request_queue = await self ._apify_client .request_queues ().get_or_create (name = request_queue_id_or_name )
408
- return self ._apify_client .request_queue (request_queue ['id' ])
374
+ return await StorageManager .open_storage (RequestQueue , request_queue_id_or_name , self ._get_storage_client (force_cloud ), self ._config )
409
375
410
376
@classmethod
411
377
async def push_data (cls , data : Any ) -> None :
412
378
"""TODO: docs."""
413
- return await cls ._get_default_instance ().push_data (data = data )
379
+ await cls ._get_default_instance ().push_data (data = data )
414
380
415
381
async def _push_data_internal (self , data : Any ) -> None :
416
382
self ._raise_if_not_initialized ()
417
383
418
384
if not data :
419
385
return
420
386
421
- if not isinstance (data , list ):
387
+ if not isinstance (data , list ): # TODO: Memory storage does this on its own...
422
388
data = [data ]
423
389
424
- dataset_client = await self .open_dataset ()
425
- return await dataset_client . push_items (data )
390
+ dataset = await self .open_dataset ()
391
+ await dataset . push_data (data )
426
392
427
393
@classmethod
428
394
async def get_input (cls ) -> Any :
@@ -444,11 +410,9 @@ async def get_value(cls, key: str) -> Any:
444
410
async def _get_value_internal (self , key : str ) -> Any :
445
411
self ._raise_if_not_initialized ()
446
412
447
- key_value_store_client = await self .open_key_value_store ()
448
- record = await key_value_store_client .get_record (key )
449
- if record :
450
- return record ['value' ]
451
- return None
413
+ key_value_store = await self .open_key_value_store ()
414
+ value = await key_value_store .get_value (key )
415
+ return value
452
416
453
417
@classmethod
454
418
async def set_value (cls , key : str , value : Any , options : Optional [Dict ] = None ) -> None :
@@ -464,8 +428,8 @@ async def _set_value_internal(self, key: str, value: Any, options: Optional[Dict
464
428
465
429
content_type = options ['content_type' ] if options else None
466
430
467
- key_value_store_client = await self .open_key_value_store ()
468
- return await key_value_store_client . set_record (key , value , content_type = content_type )
431
+ key_value_store = await self .open_key_value_store ()
432
+ return await key_value_store . set_value (key , value , content_type = content_type )
469
433
470
434
@classmethod
471
435
def on (cls , event : ActorEventType , listener : Callable ) -> Callable :
0 commit comments