From 1e2eb11ff4d9c5312d042bab88082cc3073f96d3 Mon Sep 17 00:00:00 2001 From: Daniil Poletaev <44584010+danpoletaev@users.noreply.github.com> Date: Wed, 19 Feb 2025 09:03:57 +0100 Subject: [PATCH 1/7] feat: added create_hmac_signature function --- src/apify/_crypto.py | 36 ++++++++++++++++++++++++++++++++++++ tests/unit/test_crypto.py | 26 +++++++++++++++++++++++++- 2 files changed, 61 insertions(+), 1 deletion(-) diff --git a/src/apify/_crypto.py b/src/apify/_crypto.py index d3257582..c437f222 100644 --- a/src/apify/_crypto.py +++ b/src/apify/_crypto.py @@ -1,6 +1,8 @@ from __future__ import annotations import base64 +import hashlib +import hmac from typing import Any from cryptography.exceptions import InvalidTag as InvalidTagException @@ -153,3 +155,37 @@ def decrypt_input_secrets(private_key: rsa.RSAPrivateKey, input_data: Any) -> An ) return input_data + + +CHARSET = '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ' + + +def encode_base62(num: int) -> str: + """Encode the given number to base62.""" + if num == 0: + return CHARSET[0] + + res = '' + while num > 0: + num, remainder = divmod(num, 62) + res = CHARSET[remainder] + res + return res + + +# createHmacSignature +@ignore_docs +def create_hmac_signature(secret_key: str, message: str) -> str: + """Generates an HMAC signature and encodes it using Base62. Base62 encoding reduces the signature length. + + Args: + secret_key (str): Secret key used for signing signatures + message (str): Message to be signed + + Returns: + str: Base62 encoded signature + """ + signature = hmac.new(secret_key.encode('utf-8'), message.encode('utf-8'), hashlib.sha256).hexdigest()[:30] + + decimal_signature = int(signature, 16) + + return encode_base62(decimal_signature) diff --git a/tests/unit/test_crypto.py b/tests/unit/test_crypto.py index 24da3b6b..fbdc7d67 100644 --- a/tests/unit/test_crypto.py +++ b/tests/unit/test_crypto.py @@ -4,7 +4,14 @@ import pytest -from apify._crypto import _load_public_key, crypto_random_object_id, load_private_key, private_decrypt, public_encrypt +from apify._crypto import ( + _load_public_key, + create_hmac_signature, + crypto_random_object_id, + load_private_key, + private_decrypt, + public_encrypt, +) # NOTE: Uses the same keys as in: # https://github.com/apify/apify-shared-js/blob/master/test/crypto.test.ts @@ -105,3 +112,20 @@ def test_crypto_random_object_id_length_and_charset() -> None: long_random_object_id = crypto_random_object_id(1000) for char in long_random_object_id: assert char in 'abcdefghijklmnopqrstuvwxyzABCEDFGHIJKLMNOPQRSTUVWXYZ0123456789' + + +# Check if the method is compatible with js version of the same method in: +# https://github.com/apify/apify-shared-js/blob/master/packages/utilities/src/hmac.ts +def test_create_valid_hmac_signature() -> None: + # This test uses the same secret key and message as in JS tests. + secret_key = 'hmac-secret-key' + message = 'hmac-message-to-be-authenticated' + assert create_hmac_signature(secret_key, message) == 'pcVagAsudj8dFqdlg7mG' + + +def test_create_same_hmac() -> None: + # This test uses the same secret key and message as in JS tests. + secret_key = 'hmac-same-secret-key' + message = 'hmac-same-message-to-be-authenticated' + for _ in range(5): + assert create_hmac_signature(secret_key, message) == 'FYMcmTIm3idXqleF1Sw5' From fb622d262300450946fbc5a2dc8dd386f5cfbb83 Mon Sep 17 00:00:00 2001 From: Daniil Poletaev <44584010+danpoletaev@users.noreply.github.com> Date: Wed, 19 Feb 2025 09:06:59 +0100 Subject: [PATCH 2/7] feat: update get_public_url to sign urls --- .../apify_storage_client/_key_value_store_client.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/src/apify/apify_storage_client/_key_value_store_client.py b/src/apify/apify_storage_client/_key_value_store_client.py index 601f0b89..d2b06477 100644 --- a/src/apify/apify_storage_client/_key_value_store_client.py +++ b/src/apify/apify_storage_client/_key_value_store_client.py @@ -5,8 +5,10 @@ from typing_extensions import override -from crawlee.storage_clients._base import BaseKeyValueStoreClient +from crawlee.storage_clients._base import KeyValueStoreClient as BaseKeyValueStoreClient from crawlee.storage_clients.models import KeyValueStoreListKeysPage, KeyValueStoreMetadata, KeyValueStoreRecord +from apify._crypto import (create_hmac_signature) + if TYPE_CHECKING: from collections.abc import AsyncIterator @@ -90,5 +92,9 @@ async def get_public_url(self, key: str) -> str: key: The key for which the URL should be generated. """ public_api_url = self._api_public_base_url + public_url = f'{public_api_url}/v2/key-value-stores/{self._client.resource_id}/records/{key}' + + if getattr(self.storage_object, 'url_signing_secret_key', None): + public_url += f'?signature={create_hmac_signature(self.storage_object.url_signing_secret_key, key)}' - return f'{public_api_url}/v2/key-value-stores/{self._client.resource_id}/records/{key}' + return public_url \ No newline at end of file From 208a253c378fb3c500eebb678f265f1ffe2fd07b Mon Sep 17 00:00:00 2001 From: Daniil Poletaev <44584010+danpoletaev@users.noreply.github.com> Date: Wed, 19 Feb 2025 10:08:12 +0100 Subject: [PATCH 3/7] chore: updated test_generate_public_url_for_kvs_record test --- .../_key_value_store_client.py | 9 +++++---- tests/integration/test_actor_key_value_store.py | 15 +++++++++++---- 2 files changed, 16 insertions(+), 8 deletions(-) diff --git a/src/apify/apify_storage_client/_key_value_store_client.py b/src/apify/apify_storage_client/_key_value_store_client.py index d2b06477..140f273e 100644 --- a/src/apify/apify_storage_client/_key_value_store_client.py +++ b/src/apify/apify_storage_client/_key_value_store_client.py @@ -7,8 +7,8 @@ from crawlee.storage_clients._base import KeyValueStoreClient as BaseKeyValueStoreClient from crawlee.storage_clients.models import KeyValueStoreListKeysPage, KeyValueStoreMetadata, KeyValueStoreRecord -from apify._crypto import (create_hmac_signature) +from apify._crypto import create_hmac_signature if TYPE_CHECKING: from collections.abc import AsyncIterator @@ -94,7 +94,8 @@ async def get_public_url(self, key: str) -> str: public_api_url = self._api_public_base_url public_url = f'{public_api_url}/v2/key-value-stores/{self._client.resource_id}/records/{key}' - if getattr(self.storage_object, 'url_signing_secret_key', None): - public_url += f'?signature={create_hmac_signature(self.storage_object.url_signing_secret_key, key)}' + url_signing_secret_key = getattr(self.storage_object, 'url_signing_secret_key', None) # type: ignore[attr-defined] + if url_signing_secret_key: + public_url += f'?signature={create_hmac_signature(url_signing_secret_key, key)}' - return public_url \ No newline at end of file + return public_url diff --git a/tests/integration/test_actor_key_value_store.py b/tests/integration/test_actor_key_value_store.py index 6ed64123..e4708caa 100644 --- a/tests/integration/test_actor_key_value_store.py +++ b/tests/integration/test_actor_key_value_store.py @@ -6,6 +6,7 @@ from ._utils import generate_unique_resource_name from apify import Actor +from apify._crypto import create_hmac_signature if TYPE_CHECKING: import pytest @@ -210,10 +211,16 @@ async def main() -> None: default_store_id = Actor.config.default_key_value_store_id store = await Actor.open_key_value_store() - record_url = await cast(KeyValueStoreClient, store._resource_client).get_public_url('dummy') - print(record_url) - - assert record_url == f'{public_api_url}/v2/key-value-stores/{default_store_id}/records/dummy' + record_key = 'dummy' + record_url = await cast(KeyValueStoreClient, store._resource_client).get_public_url(record_key) + url_signing_secret_key = cast(str, getattr(store.storage_object, 'url_signing_secret_key', None)) + signature = create_hmac_signature(url_signing_secret_key, record_key) + + assert url_signing_secret_key is not None + assert ( + record_url + == f'{public_api_url}/v2/key-value-stores/{default_store_id}/records/{record_key}?signature={signature}' + ) actor = await make_actor(label='kvs-get-public-url', main_func=main) run_result = await run_actor(actor) From 7f5d3735f1d565c8fa0bfce29c3bef6c064766aa Mon Sep 17 00:00:00 2001 From: Daniil Poletaev <44584010+danpoletaev@users.noreply.github.com> Date: Tue, 25 Feb 2025 15:57:48 +0100 Subject: [PATCH 4/7] fix: import --- src/apify/apify_storage_client/_key_value_store_client.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/apify/apify_storage_client/_key_value_store_client.py b/src/apify/apify_storage_client/_key_value_store_client.py index 140f273e..92ec0325 100644 --- a/src/apify/apify_storage_client/_key_value_store_client.py +++ b/src/apify/apify_storage_client/_key_value_store_client.py @@ -5,7 +5,7 @@ from typing_extensions import override -from crawlee.storage_clients._base import KeyValueStoreClient as BaseKeyValueStoreClient +from crawlee.storage_clients._base import BaseKeyValueStoreClient from crawlee.storage_clients.models import KeyValueStoreListKeysPage, KeyValueStoreMetadata, KeyValueStoreRecord from apify._crypto import create_hmac_signature From 2451a25987631c042260ccc4dcda0a949e710357 Mon Sep 17 00:00:00 2001 From: Daniil Poletaev <44584010+danpoletaev@users.noreply.github.com> Date: Thu, 27 Feb 2025 15:45:14 +0100 Subject: [PATCH 5/7] refactor: clean up --- src/apify/_crypto.py | 6 ++++-- .../apify_storage_client/_key_value_store_client.py | 2 +- tests/unit/test_crypto.py | 12 +++++++++--- 3 files changed, 14 insertions(+), 6 deletions(-) diff --git a/src/apify/_crypto.py b/src/apify/_crypto.py index c437f222..2e51f18b 100644 --- a/src/apify/_crypto.py +++ b/src/apify/_crypto.py @@ -3,6 +3,7 @@ import base64 import hashlib import hmac +import string from typing import Any from cryptography.exceptions import InvalidTag as InvalidTagException @@ -157,7 +158,7 @@ def decrypt_input_secrets(private_key: rsa.RSAPrivateKey, input_data: Any) -> An return input_data -CHARSET = '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ' +CHARSET = string.digits + string.ascii_letters def encode_base62(num: int) -> str: @@ -172,11 +173,12 @@ def encode_base62(num: int) -> str: return res -# createHmacSignature @ignore_docs def create_hmac_signature(secret_key: str, message: str) -> str: """Generates an HMAC signature and encodes it using Base62. Base62 encoding reduces the signature length. + HMAC signature is truncated to 30 characters to make it shorter. + Args: secret_key (str): Secret key used for signing signatures message (str): Message to be signed diff --git a/src/apify/apify_storage_client/_key_value_store_client.py b/src/apify/apify_storage_client/_key_value_store_client.py index 92ec0325..fb41b156 100644 --- a/src/apify/apify_storage_client/_key_value_store_client.py +++ b/src/apify/apify_storage_client/_key_value_store_client.py @@ -94,7 +94,7 @@ async def get_public_url(self, key: str) -> str: public_api_url = self._api_public_base_url public_url = f'{public_api_url}/v2/key-value-stores/{self._client.resource_id}/records/{key}' - url_signing_secret_key = getattr(self.storage_object, 'url_signing_secret_key', None) # type: ignore[attr-defined] + url_signing_secret_key = getattr(self.storage_object, 'url_signing_secret_key', None) if url_signing_secret_key: public_url += f'?signature={create_hmac_signature(url_signing_secret_key, key)}' diff --git a/tests/unit/test_crypto.py b/tests/unit/test_crypto.py index fbdc7d67..1dead9c0 100644 --- a/tests/unit/test_crypto.py +++ b/tests/unit/test_crypto.py @@ -8,6 +8,7 @@ _load_public_key, create_hmac_signature, crypto_random_object_id, + encode_base62, load_private_key, private_decrypt, public_encrypt, @@ -114,7 +115,12 @@ def test_crypto_random_object_id_length_and_charset() -> None: assert char in 'abcdefghijklmnopqrstuvwxyzABCEDFGHIJKLMNOPQRSTUVWXYZ0123456789' -# Check if the method is compatible with js version of the same method in: +@pytest.mark.parametrize(('test_input', 'expected'), [(0, '0'), (10, 'a'), (999999999, '15FTGf')]) +def test_encode_base62(test_input: int, expected: str) -> None: + assert encode_base62(test_input) == expected + + +# This test ensures compatibility with the JavaScript version of the same method. # https://github.com/apify/apify-shared-js/blob/master/packages/utilities/src/hmac.ts def test_create_valid_hmac_signature() -> None: # This test uses the same secret key and message as in JS tests. @@ -127,5 +133,5 @@ def test_create_same_hmac() -> None: # This test uses the same secret key and message as in JS tests. secret_key = 'hmac-same-secret-key' message = 'hmac-same-message-to-be-authenticated' - for _ in range(5): - assert create_hmac_signature(secret_key, message) == 'FYMcmTIm3idXqleF1Sw5' + assert create_hmac_signature(secret_key, message) == 'FYMcmTIm3idXqleF1Sw5' + assert create_hmac_signature(secret_key, message) == 'FYMcmTIm3idXqleF1Sw5' From b058ca35de1701c199ad53658ebe5c1aa86eae8a Mon Sep 17 00:00:00 2001 From: Daniil Poletaev <44584010+danpoletaev@users.noreply.github.com> Date: Fri, 7 Mar 2025 15:35:42 +0100 Subject: [PATCH 6/7] refactor: clean up --- .../_key_value_store_client.py | 20 +++++++++++++------ .../integration/test_actor_key_value_store.py | 18 +++++++++-------- uv.lock | 6 +++--- 3 files changed, 27 insertions(+), 17 deletions(-) diff --git a/src/apify/apify_storage_client/_key_value_store_client.py b/src/apify/apify_storage_client/_key_value_store_client.py index b39ac462..49883b3f 100644 --- a/src/apify/apify_storage_client/_key_value_store_client.py +++ b/src/apify/apify_storage_client/_key_value_store_client.py @@ -4,6 +4,7 @@ from typing import TYPE_CHECKING, Any from typing_extensions import override +from yarl import URL from crawlee.storage_clients._base import KeyValueStoreClient as BaseKeyValueStoreClient from crawlee.storage_clients.models import KeyValueStoreListKeysPage, KeyValueStoreMetadata, KeyValueStoreRecord @@ -91,11 +92,18 @@ async def get_public_url(self, key: str) -> str: Args: key: The key for which the URL should be generated. """ - public_api_url = self._api_public_base_url - public_url = f'{public_api_url}/v2/key-value-stores/{self._client.resource_id}/records/{key}' + if self._client.resource_id is None: + raise ValueError('resource_id cannot be None when generating a public URL') - url_signing_secret_key = getattr(self.storage_object, 'url_signing_secret_key', None) - if url_signing_secret_key: - public_url += f'?signature={create_hmac_signature(url_signing_secret_key, key)}' + public_url = ( + URL(self._api_public_base_url) / 'v2' / 'key-value-stores' / self._client.resource_id / 'records' / key + ) + + key_value_store = await self.get() + + if key_value_store is not None and isinstance(key_value_store.model_extra, dict): + url_signing_secret_key = key_value_store.model_extra.get('urlSigningSecretKey') + if url_signing_secret_key: + public_url = public_url.with_query(signature=create_hmac_signature(url_signing_secret_key, key)) - return public_url + return str(public_url) diff --git a/tests/integration/test_actor_key_value_store.py b/tests/integration/test_actor_key_value_store.py index e4708caa..6b6dd767 100644 --- a/tests/integration/test_actor_key_value_store.py +++ b/tests/integration/test_actor_key_value_store.py @@ -6,7 +6,6 @@ from ._utils import generate_unique_resource_name from apify import Actor -from apify._crypto import create_hmac_signature if TYPE_CHECKING: import pytest @@ -202,21 +201,24 @@ async def test_generate_public_url_for_kvs_record( run_actor: RunActorFunction, ) -> None: async def main() -> None: - from typing import cast - - from apify.apify_storage_client._key_value_store_client import KeyValueStoreClient + from apify._crypto import create_hmac_signature async with Actor: public_api_url = Actor.config.api_public_base_url default_store_id = Actor.config.default_key_value_store_id + record_key = 'public-record-key' store = await Actor.open_key_value_store() - record_key = 'dummy' - record_url = await cast(KeyValueStoreClient, store._resource_client).get_public_url(record_key) - url_signing_secret_key = cast(str, getattr(store.storage_object, 'url_signing_secret_key', None)) - signature = create_hmac_signature(url_signing_secret_key, record_key) + assert isinstance(store.storage_object.model_extra, dict) + url_signing_secret_key = store.storage_object.model_extra.get('urlSigningSecretKey') assert url_signing_secret_key is not None + + await store.set_value(record_key, {'exposedData': 'test'}, 'application/json') + + record_url = await store.get_public_url(record_key) + + signature = create_hmac_signature(url_signing_secret_key, record_key) assert ( record_url == f'{public_api_url}/v2/key-value-stores/{default_store_id}/records/{record_key}?signature={signature}' diff --git a/uv.lock b/uv.lock index 436deb0a..a7843e97 100644 --- a/uv.lock +++ b/uv.lock @@ -597,7 +597,7 @@ toml = [ [[package]] name = "crawlee" -version = "0.6.1" +version = "0.6.3" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "browserforge" }, @@ -617,9 +617,9 @@ dependencies = [ { name = "typing-extensions" }, { name = "yarl" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/b6/1c/102480fb8460eb317e39b11961fe63422af0df2c70e61c6c2c9f702624c9/crawlee-0.6.1.tar.gz", hash = "sha256:5af9d5ef3eaafdba4cfa73224f09b503302a3b00522288ba4f40a9ad90fdedef", size = 23642433 } +sdist = { url = "https://files.pythonhosted.org/packages/dd/55/466dca83ccc8eb2769b93d35b1077af62fc8928989efb7f9f08b2a11d139/crawlee-0.6.3.tar.gz", hash = "sha256:d1dfcbbeebaa20ef6a762ad2407f7969ce57fd59f74298b8e2d7e40a1593a199", size = 23646865 } wheels = [ - { url = "https://files.pythonhosted.org/packages/3a/60/ad20c1b9cd68b52e70ce00008f76660417db2b68e64883d053086a8b3a92/crawlee-0.6.1-py3-none-any.whl", hash = "sha256:49780603b7ac76508a30e1473d5d6cf161e08c63c31855a93e22c8a807e1d9c3", size = 240393 }, + { url = "https://files.pythonhosted.org/packages/f2/19/4c479175d9c7830b911f81495139f08a75f33a692a490189ab61f6fb73c2/crawlee-0.6.3-py3-none-any.whl", hash = "sha256:11a745c3a858c7098af4046165b6246fd2187f7bdd1180d45ed7b6e8ebc20216", size = 243799 }, ] [[package]] From de1d87494f0dc0655a0d68aaa6161aa566f5ee37 Mon Sep 17 00:00:00 2001 From: Vlada Dusek Date: Fri, 7 Mar 2025 16:27:21 +0100 Subject: [PATCH 7/7] Update src/apify/_crypto.py --- src/apify/_crypto.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/apify/_crypto.py b/src/apify/_crypto.py index 2e51f18b..3071efaa 100644 --- a/src/apify/_crypto.py +++ b/src/apify/_crypto.py @@ -175,16 +175,16 @@ def encode_base62(num: int) -> str: @ignore_docs def create_hmac_signature(secret_key: str, message: str) -> str: - """Generates an HMAC signature and encodes it using Base62. Base62 encoding reduces the signature length. + """Generate an HMAC signature and encodes it using Base62. Base62 encoding reduces the signature length. HMAC signature is truncated to 30 characters to make it shorter. Args: - secret_key (str): Secret key used for signing signatures - message (str): Message to be signed + secret_key: Secret key used for signing signatures. + message: Message to be signed. Returns: - str: Base62 encoded signature + Base62 encoded signature. """ signature = hmac.new(secret_key.encode('utf-8'), message.encode('utf-8'), hashlib.sha256).hexdigest()[:30]