@@ -40,6 +40,7 @@ def __init__(self, settings: BaseSettings) -> None:
40
40
self ._async_thread : AsyncThread | None = None
41
41
42
42
def open_spider (self , spider : Spider ) -> None :
43
+ """Open the cache storage for a spider."""
43
44
logger .debug ('Using Apify key value cache storage' , extra = {'spider' : spider })
44
45
self .spider = spider
45
46
self ._fingerprinter = spider .crawler .request_fingerprinter
@@ -57,16 +58,19 @@ async def open_kv() -> KeyValueStore:
57
58
logger .debug (f"Opening cache storage's { kv_name !r} key value store" )
58
59
self ._kv = self ._async_thread .run_coro (open_kv ())
59
60
60
- def close_spider (self , spider : Spider , current_time : int | None = None ) -> None :
61
- assert self ._async_thread is not None , 'Async thread not initialized'
61
+ def close_spider (self , _ : Spider , current_time : int | None = None ) -> None :
62
+ """Close the cache storage for a spider."""
63
+ if self ._async_thread is None :
64
+ raise ValueError ('Async thread not initialized' )
62
65
63
66
logger .info (f'Cleaning up cache items (max { self .expiration_max_items } )' )
64
- if 0 < self .expiration_secs :
67
+ if self .expiration_secs > 0 :
65
68
if current_time is None :
66
69
current_time = int (time ())
67
70
68
71
async def expire_kv () -> None :
69
- assert self ._kv is not None , 'Key value store not initialized'
72
+ if self ._kv is None :
73
+ raise ValueError ('Key value store not initialized' )
70
74
i = 0
71
75
async for item in self ._kv .iterate_keys ():
72
76
value = await self ._kv .get_value (item .key )
@@ -97,10 +101,14 @@ async def expire_kv() -> None:
97
101
finally :
98
102
logger .debug ('Cache storage closed' )
99
103
100
- def retrieve_response (self , spider : Spider , request : Request , current_time : int | None = None ) -> Response | None :
101
- assert self ._async_thread is not None , 'Async thread not initialized'
102
- assert self ._kv is not None , 'Key value store not initialized'
103
- assert self ._fingerprinter is not None , 'Request fingerprinter not initialized'
104
+ def retrieve_response (self , _ : Spider , request : Request , current_time : int | None = None ) -> Response | None :
105
+ """Retrieve a response from the cache storage."""
106
+ if self ._async_thread is None :
107
+ raise ValueError ('Async thread not initialized' )
108
+ if self ._kv is None :
109
+ raise ValueError ('Key value store not initialized' )
110
+ if self ._fingerprinter is None :
111
+ raise ValueError ('Request fingerprinter not initialized' )
104
112
105
113
key = self ._fingerprinter .fingerprint (request ).hex ()
106
114
value = self ._async_thread .run_coro (self ._kv .get_value (key ))
@@ -125,10 +133,14 @@ def retrieve_response(self, spider: Spider, request: Request, current_time: int
125
133
logger .debug ('Cache hit' , extra = {'request' : request })
126
134
return respcls (url = url , headers = headers , status = status , body = body )
127
135
128
- def store_response (self , spider : Spider , request : Request , response : Response ) -> None :
129
- assert self ._async_thread is not None , 'Async thread not initialized'
130
- assert self ._kv is not None , 'Key value store not initialized'
131
- assert self ._fingerprinter is not None , 'Request fingerprinter not initialized'
136
+ def store_response (self , _ : Spider , request : Request , response : Response ) -> None :
137
+ """Store a response in the cache storage."""
138
+ if self ._async_thread is None :
139
+ raise ValueError ('Async thread not initialized' )
140
+ if self ._kv is None :
141
+ raise ValueError ('Key value store not initialized' )
142
+ if self ._fingerprinter is None :
143
+ raise ValueError ('Request fingerprinter not initialized' )
132
144
133
145
key = self ._fingerprinter .fingerprint (request ).hex ()
134
146
data = {
@@ -143,20 +155,21 @@ def store_response(self, spider: Spider, request: Request, response: Response) -
143
155
144
156
def to_gzip (data : dict , mtime : int | None = None ) -> bytes :
145
157
"""Dump a dictionary to a gzip-compressed byte stream."""
146
- with io .BytesIO () as byte_stream :
147
- with gzip .GzipFile (fileobj = byte_stream , mode = 'wb' , mtime = mtime ) as gzip_file :
148
- pickle .dump (data , gzip_file , protocol = 4 )
149
- return byte_stream .getvalue ()
158
+ with io .BytesIO () as byte_stream , gzip .GzipFile (fileobj = byte_stream , mode = 'wb' , mtime = mtime ) as gzip_file :
159
+ pickle .dump (data , gzip_file , protocol = 4 )
160
+ return byte_stream .getvalue ()
150
161
151
162
152
163
def from_gzip (gzip_bytes : bytes ) -> dict :
153
164
"""Load a dictionary from a gzip-compressed byte stream."""
154
165
with io .BytesIO (gzip_bytes ) as byte_stream , gzip .GzipFile (fileobj = byte_stream , mode = 'rb' ) as gzip_file :
155
- return pickle .load (gzip_file )
166
+ data : dict = pickle .load (gzip_file )
167
+ return data
156
168
157
169
158
170
def read_gzip_time (gzip_bytes : bytes ) -> int :
159
171
"""Read the modification time from a gzip-compressed byte stream without decompressing the data."""
160
172
header = gzip_bytes [:10 ]
161
173
header_components = struct .unpack ('<HBBI2B' , header )
162
- return header_components [3 ]
174
+ mtime : int = header_components [3 ]
175
+ return mtime
0 commit comments