diff --git a/.github/workflows/docs.yaml b/.github/workflows/docs.yaml index 77340833da..c8a0340752 100644 --- a/.github/workflows/docs.yaml +++ b/.github/workflows/docs.yaml @@ -40,6 +40,7 @@ jobs: npm run build env: APIFY_SIGNING_TOKEN: ${{ secrets.APIFY_SIGNING_TOKEN }} + SMARTLOOK_PROJECT_KEY: ${{ secrets.SMARTLOOK_DOCS_PROJECT_KEY }} - name: Commit the updated package(-lock).json uses: stefanzweifel/git-auto-commit-action@v5 diff --git a/CHANGELOG.md b/CHANGELOG.md index b52ccbad03..d4afcefc4c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,14 @@ All notable changes to this project will be documented in this file. See [Conventional Commits](https://conventionalcommits.org) for commit guidelines. +## [3.1.16](https://github.com/apify/apify-sdk-js/compare/apify@3.1.15...apify@3.1.16) (2024-02-23) + +**Note:** Version bump only for package apify + + + + + ## [3.1.15](https://github.com/apify/apify-sdk-js/compare/apify@3.1.14...apify@3.1.15) (2024-01-08) diff --git a/package-lock.json b/package-lock.json index 3d1b500fa4..f8a178951c 100644 --- a/package-lock.json +++ b/package-lock.json @@ -28,7 +28,7 @@ "@typescript-eslint/eslint-plugin": "^7.0.0", "@typescript-eslint/parser": "^7.0.0", "commitlint": "^18.0.0", - "crawlee": "^3.8.0", + "crawlee": "^3.9.0", "eslint": "^8.54.0", "fs-extra": "^11.1.1", "gen-esm-wrapper": "^1.1.3", @@ -648,16 +648,16 @@ } }, "node_modules/@crawlee/basic": { - "version": "3.8.0", - "resolved": "https://registry.npmjs.org/@crawlee/basic/-/basic-3.8.0.tgz", - "integrity": "sha512-Yc00vNVGMGsyFK0DqeNRyjb8+yk9nBbHs6Qh+KARN1ljYo0CmUBYjaTdOAM5LCIe7Num1pakLSAFJpKP26+7EQ==", + "version": "3.9.0", + "resolved": "https://registry.npmjs.org/@crawlee/basic/-/basic-3.9.0.tgz", + "integrity": "sha512-KPpmkOWIDHtUUqE0Vcl53hrzpffoP4Sf9UJG3A61zWg5PmUJ9F422O9UxkAoYcq6UIKzsrhs9LVOsAJ3OEupUw==", "dependencies": { "@apify/log": "^2.4.0", "@apify/timeout": "^0.3.0", "@apify/utilities": "^2.7.10", - "@crawlee/core": "3.8.0", - "@crawlee/types": "3.8.0", - "@crawlee/utils": "3.8.0", + "@crawlee/core": "3.9.0", + "@crawlee/types": "3.9.0", + "@crawlee/utils": "3.9.0", "csv-stringify": "^6.2.0", "fs-extra": "^11.0.0", "got-scraping": "^4.0.0", @@ -671,15 +671,15 @@ } }, "node_modules/@crawlee/browser": { - "version": "3.8.0", - "resolved": "https://registry.npmjs.org/@crawlee/browser/-/browser-3.8.0.tgz", - "integrity": "sha512-EgEbOhpRnrlOKDxEV4rppyVUJVDVhS0XAehF4LArzBI4zXUboPlHrKi0CxcFSgXim205RbiQAKSJRxdXaADoZQ==", + "version": "3.9.0", + "resolved": "https://registry.npmjs.org/@crawlee/browser/-/browser-3.9.0.tgz", + "integrity": "sha512-wBjBFtVzTH3hf8XJpA7687DT9gk8Z6qi7WYRq05BnJZYs8OEndf6yu/fAJkH25WBG4Jolom6RT9ZbGbXWSQavA==", "dependencies": { "@apify/timeout": "^0.3.0", - "@crawlee/basic": "3.8.0", - "@crawlee/browser-pool": "3.8.0", - "@crawlee/types": "3.8.0", - "@crawlee/utils": "3.8.0", + "@crawlee/basic": "3.9.0", + "@crawlee/browser-pool": "3.9.0", + "@crawlee/types": "3.9.0", + "@crawlee/utils": "3.9.0", "ow": "^0.28.1", "tslib": "^2.4.0", "type-fest": "^4.0.0" @@ -689,14 +689,14 @@ } }, "node_modules/@crawlee/browser-pool": { - "version": "3.8.0", - "resolved": "https://registry.npmjs.org/@crawlee/browser-pool/-/browser-pool-3.8.0.tgz", - "integrity": "sha512-oqTYHOcUKxsiOCKUSqiLkSRD8pRkmOilfHLCymMt+eH49M0P4Zpi9+wo61gYsbch+YixeJ6ACOZzT5YLNgfbhQ==", + "version": "3.9.0", + "resolved": "https://registry.npmjs.org/@crawlee/browser-pool/-/browser-pool-3.9.0.tgz", + "integrity": "sha512-a6sFkz/s8h8s0OR2VcoLpy5MUCdg0vxrJM3MN+AzBRXRFUMO4KFMyqnV99XcwRm1eoHnQzE3IECBFJ/QF1aBNQ==", "dependencies": { "@apify/log": "^2.4.0", "@apify/timeout": "^0.3.0", - "@crawlee/core": "3.8.0", - "@crawlee/types": "3.8.0", + "@crawlee/core": "3.9.0", + "@crawlee/types": "3.9.0", "fingerprint-generator": "^2.0.6", "fingerprint-injector": "^2.0.5", "lodash.merge": "^4.6.2", @@ -725,13 +725,13 @@ } }, "node_modules/@crawlee/cheerio": { - "version": "3.8.0", - "resolved": "https://registry.npmjs.org/@crawlee/cheerio/-/cheerio-3.8.0.tgz", - "integrity": "sha512-BeIR2Z70UXuBSzEO78f+MVJcgmV4He5lEhRHOmGE2p5/NQ2GNaK6HI1m+OlDbAoj73BYz+89lskjLNk02Kho6Q==", + "version": "3.9.0", + "resolved": "https://registry.npmjs.org/@crawlee/cheerio/-/cheerio-3.9.0.tgz", + "integrity": "sha512-YXbnVCp5p0Pm32kr5IqTWkYmTGkHy5KRRu/Nn79iKJAfZMU9DIUaRduf4z5+4aeHmcPuHZqxinSHOgwUIt17Gg==", "dependencies": { - "@crawlee/http": "3.8.0", - "@crawlee/types": "3.8.0", - "@crawlee/utils": "3.8.0", + "@crawlee/http": "3.9.0", + "@crawlee/types": "3.9.0", + "@crawlee/utils": "3.9.0", "cheerio": "^1.0.0-rc.12", "htmlparser2": "^9.0.0", "tslib": "^2.4.0" @@ -741,11 +741,11 @@ } }, "node_modules/@crawlee/cli": { - "version": "3.8.0", - "resolved": "https://registry.npmjs.org/@crawlee/cli/-/cli-3.8.0.tgz", - "integrity": "sha512-J2Tpx8067+ViuUl+7BpUz0f477mXI3LABTIPQIZQVWEK0YqV0RRv+0mNZFkFhTjHL2A7HR99kwGBrOm0X2MgXA==", + "version": "3.9.0", + "resolved": "https://registry.npmjs.org/@crawlee/cli/-/cli-3.9.0.tgz", + "integrity": "sha512-D5O/HCcp5x7wvt5I4BbQ0gXORPbnQaGCnFvg0Sw3R0La/o+BfrohKusBEEGKOfbq42ph0MV2ApPg7lScdRZElA==", "dependencies": { - "@crawlee/templates": "3.8.0", + "@crawlee/templates": "3.9.0", "ansi-colors": "^4.1.3", "fs-extra": "^11.0.0", "inquirer": "^8.2.4", @@ -761,9 +761,9 @@ } }, "node_modules/@crawlee/core": { - "version": "3.8.0", - "resolved": "https://registry.npmjs.org/@crawlee/core/-/core-3.8.0.tgz", - "integrity": "sha512-10kx1Mrvqv87cE4J2LjPClxHwa5RcJOiOxge4e9CweQjuY5sIc3Z7h+cJ86u27LFqWpbcCMWtNbA5DR8dn8QgQ==", + "version": "3.9.0", + "resolved": "https://registry.npmjs.org/@crawlee/core/-/core-3.9.0.tgz", + "integrity": "sha512-joabXmEBIos6NNqRnU5AVMU4BoKpXihb6bIYXIQ322Vk1m1Kbr8+GVdHMNMNBytfVyvO1eNBpowKfXScKS7lfQ==", "dependencies": { "@apify/consts": "^2.20.0", "@apify/datastructures": "^2.0.0", @@ -771,9 +771,9 @@ "@apify/pseudo_url": "^2.0.30", "@apify/timeout": "^0.3.0", "@apify/utilities": "^2.7.10", - "@crawlee/memory-storage": "3.8.0", - "@crawlee/types": "3.8.0", - "@crawlee/utils": "3.8.0", + "@crawlee/memory-storage": "3.9.0", + "@crawlee/types": "3.9.0", + "@crawlee/utils": "3.9.0", "@sapphire/async-queue": "^1.5.1", "@types/tough-cookie": "^4.0.2", "@vladfrangu/async_event_emitter": "^2.2.2", @@ -795,15 +795,15 @@ } }, "node_modules/@crawlee/http": { - "version": "3.8.0", - "resolved": "https://registry.npmjs.org/@crawlee/http/-/http-3.8.0.tgz", - "integrity": "sha512-wpzvk1PGHduj2m0+eqN1F9+mRu4ZX1A1g7SVLdvywwo1nSCb0aa2i8NRD1uxrCJ+NFDpa7tx/W4+ZTzuLhgYeA==", + "version": "3.9.0", + "resolved": "https://registry.npmjs.org/@crawlee/http/-/http-3.9.0.tgz", + "integrity": "sha512-4rPNn6Kq3ah0fZhQplKcxB+JRNwKgHUfCN/CFhF1teQwnxhf21vwr8d7uCx7JlQEQwUwBqcKUggHSONh7iVsQQ==", "dependencies": { "@apify/timeout": "^0.3.0", "@apify/utilities": "^2.7.10", - "@crawlee/basic": "3.8.0", - "@crawlee/types": "3.8.0", - "@crawlee/utils": "3.8.0", + "@crawlee/basic": "3.9.0", + "@crawlee/types": "3.9.0", + "@crawlee/utils": "3.9.0", "@types/content-type": "^1.1.5", "cheerio": "^1.0.0-rc.12", "content-type": "^1.0.4", @@ -819,14 +819,14 @@ } }, "node_modules/@crawlee/jsdom": { - "version": "3.8.0", - "resolved": "https://registry.npmjs.org/@crawlee/jsdom/-/jsdom-3.8.0.tgz", - "integrity": "sha512-jN1U9skIBIDXwqcOdrCf63EL18y/a4lyk6vMV3IPXWwSWxdnlrGmfCl6iCGpYlO48QEqLxtwMxNtyklbHlis6A==", + "version": "3.9.0", + "resolved": "https://registry.npmjs.org/@crawlee/jsdom/-/jsdom-3.9.0.tgz", + "integrity": "sha512-Ixii3Jugq4MrgnPMXyDBnkOa3M3iTvyMwN7vuSP+Ztj7gRUpzXsenN0hMiCxpgrgfdZrkp64Z0C0neXLmpSk6A==", "dependencies": { "@apify/timeout": "^0.3.0", "@apify/utilities": "^2.7.10", - "@crawlee/http": "3.8.0", - "@crawlee/types": "3.8.0", + "@crawlee/http": "3.9.0", + "@crawlee/types": "3.9.0", "@types/jsdom": "^21.0.0", "cheerio": "^1.0.0-rc.12", "jsdom": "^24.0.0", @@ -838,14 +838,14 @@ } }, "node_modules/@crawlee/linkedom": { - "version": "3.8.0", - "resolved": "https://registry.npmjs.org/@crawlee/linkedom/-/linkedom-3.8.0.tgz", - "integrity": "sha512-8S24JDKSCy6S+UTei9IQdoRdsQiVhXJ/a3zmjTtU+niMqstHijGcsPC5CLNjaPREt9xOcz5XecyhKTRQIwkoYw==", + "version": "3.9.0", + "resolved": "https://registry.npmjs.org/@crawlee/linkedom/-/linkedom-3.9.0.tgz", + "integrity": "sha512-tfERHoCdP3mjEqN2p946jqiXe8Rk9feIlXujN7EsUVl16SPjAEN2FDMndnE8XT54VuEIeN+c9finSqTctKC6Nw==", "dependencies": { "@apify/timeout": "^0.3.0", "@apify/utilities": "^2.7.10", - "@crawlee/http": "3.8.0", - "@crawlee/types": "3.8.0", + "@crawlee/http": "3.9.0", + "@crawlee/types": "3.9.0", "linkedom": "^0.16.0", "ow": "^0.28.2", "tslib": "^2.4.0" @@ -855,12 +855,12 @@ } }, "node_modules/@crawlee/memory-storage": { - "version": "3.8.0", - "resolved": "https://registry.npmjs.org/@crawlee/memory-storage/-/memory-storage-3.8.0.tgz", - "integrity": "sha512-dJY5Y0Zxn+cR6m7+CqHF4IwarsSESvVWoTEba5vfnxQPXTVYLren0gnqVCLI4O5Alhid/BwwQJPe7gjGWI/K2w==", + "version": "3.9.0", + "resolved": "https://registry.npmjs.org/@crawlee/memory-storage/-/memory-storage-3.9.0.tgz", + "integrity": "sha512-q07SN8y4eUwHZsYTQWRd1hULPj5RH6r9fBzbbVePHposoLyb/fGr9H2bhjwrcRWM/41ZevlijQr6p5lFa7WW5A==", "dependencies": { "@apify/log": "^2.4.0", - "@crawlee/types": "3.8.0", + "@crawlee/types": "3.9.0", "@sapphire/async-queue": "^1.5.0", "@sapphire/shapeshift": "^3.0.0", "content-type": "^1.0.4", @@ -875,18 +875,18 @@ } }, "node_modules/@crawlee/playwright": { - "version": "3.8.0", - "resolved": "https://registry.npmjs.org/@crawlee/playwright/-/playwright-3.8.0.tgz", - "integrity": "sha512-61VM4MbVyq5vFFwhJDmoMKlfIOA8uJmhaTfJztZb9MupM8H+Cm2KawtTlz7lF8oAZ9Qr1+/vY1YwnDufEV6Rvg==", + "version": "3.9.0", + "resolved": "https://registry.npmjs.org/@crawlee/playwright/-/playwright-3.9.0.tgz", + "integrity": "sha512-ABvLb1sUIvmq2u27rGwkEHxI6Vflpkb+xVKA2hx/sy+lcQY6xjL9zzv4FVDR38kgiVDcVgMiP5kThdZG6pb+Ng==", "dependencies": { "@apify/datastructures": "^2.0.0", "@apify/log": "^2.4.0", "@apify/timeout": "^0.3.1", - "@crawlee/browser": "3.8.0", - "@crawlee/browser-pool": "3.8.0", - "@crawlee/core": "3.8.0", - "@crawlee/types": "3.8.0", - "@crawlee/utils": "3.8.0", + "@crawlee/browser": "3.9.0", + "@crawlee/browser-pool": "3.9.0", + "@crawlee/core": "3.9.0", + "@crawlee/types": "3.9.0", + "@crawlee/utils": "3.9.0", "cheerio": "^1.0.0-rc.12", "idcac-playwright": "^0.1.2", "jquery": "^3.6.0", @@ -909,16 +909,16 @@ } }, "node_modules/@crawlee/puppeteer": { - "version": "3.8.0", - "resolved": "https://registry.npmjs.org/@crawlee/puppeteer/-/puppeteer-3.8.0.tgz", - "integrity": "sha512-gRq7//CZ+K6H4Gzon3ioU5UuOw53GMTPvgGUqyp3B8NWhuRFidvDYHv8GnJjSZbVJGjpeN+CRaiVv59QiORvtg==", + "version": "3.9.0", + "resolved": "https://registry.npmjs.org/@crawlee/puppeteer/-/puppeteer-3.9.0.tgz", + "integrity": "sha512-8/4GUcjJgLBWeXXTdfStmUewPA0E8o9/51KAloIudM3uaCaqGL9sWSTtE6AqLA0a+L2yEC8iFsBF1aE44ZdQ8A==", "dependencies": { "@apify/datastructures": "^2.0.0", "@apify/log": "^2.4.0", - "@crawlee/browser": "3.8.0", - "@crawlee/browser-pool": "3.8.0", - "@crawlee/types": "3.8.0", - "@crawlee/utils": "3.8.0", + "@crawlee/browser": "3.9.0", + "@crawlee/browser-pool": "3.9.0", + "@crawlee/types": "3.9.0", + "@crawlee/utils": "3.9.0", "cheerio": "^1.0.0-rc.12", "devtools-protocol": "*", "idcac-playwright": "^0.1.2", @@ -939,9 +939,9 @@ } }, "node_modules/@crawlee/templates": { - "version": "3.8.0", - "resolved": "https://registry.npmjs.org/@crawlee/templates/-/templates-3.8.0.tgz", - "integrity": "sha512-FCxhJqMrzJM4fR4h5PZmEd88CmoiHix6xFQKS+LzQZMHM9zCoPm9XAPj9a6h3b8uO6Dy5D8uYdqIi7s/b9n2JA==", + "version": "3.9.0", + "resolved": "https://registry.npmjs.org/@crawlee/templates/-/templates-3.9.0.tgz", + "integrity": "sha512-xoPIC5O+zeiStjzFurS732xw8WqQvJWE8L8TNgu9PkMR5HWwQRutaRiwAwj3/aovJpnn6snMwZUWlfwqRwVZdg==", "dependencies": { "ansi-colors": "^4.1.3", "inquirer": "^9.0.0", @@ -973,11 +973,11 @@ } }, "node_modules/@crawlee/templates/node_modules/inquirer": { - "version": "9.2.15", - "resolved": "https://registry.npmjs.org/inquirer/-/inquirer-9.2.15.tgz", - "integrity": "sha512-vI2w4zl/mDluHt9YEQ/543VTCwPKWiHzKtm9dM2V0NdFcqEexDAjUHzO1oA60HRNaVifGXXM1tRRNluLVHa0Kg==", + "version": "9.2.17", + "resolved": "https://registry.npmjs.org/inquirer/-/inquirer-9.2.17.tgz", + "integrity": "sha512-Vr3Ia2ud5sGnioURkE69endl4SkeJcMzTF6SosKcX5GALJfId7C+JvO5ZZb6y1LOXnEofCPbwzoQ1q0e8Gaduw==", "dependencies": { - "@ljharb/through": "^2.3.12", + "@ljharb/through": "^2.3.13", "ansi-escapes": "^4.3.2", "chalk": "^5.3.0", "cli-cursor": "^3.1.0", @@ -1014,9 +1014,9 @@ } }, "node_modules/@crawlee/types": { - "version": "3.8.0", - "resolved": "https://registry.npmjs.org/@crawlee/types/-/types-3.8.0.tgz", - "integrity": "sha512-NVD5Ay2Gq3E0lZkowea2KgM0HO7Fl8VC28OYstzHH/CQvxvgIQq9ft3txnoy/FfJGP1qcu3dz2G1a/V995Ml4w==", + "version": "3.9.0", + "resolved": "https://registry.npmjs.org/@crawlee/types/-/types-3.9.0.tgz", + "integrity": "sha512-YxTUNWIGxA2LrAK9WdUdCF6yFsPFOs+VGmCpKeCIlj0CUd/YmGz6LnewCikoq+f02v+yDWChlKRGYuIVOrzfVQ==", "dependencies": { "tslib": "^2.4.0" }, @@ -1025,13 +1025,13 @@ } }, "node_modules/@crawlee/utils": { - "version": "3.8.0", - "resolved": "https://registry.npmjs.org/@crawlee/utils/-/utils-3.8.0.tgz", - "integrity": "sha512-wFuLmJYTpO0baI1/ZpH0k20nEEdl7F8vaa4hcYuNEt6KIJJoXKj3rJbOa4LWEek6ub+jtdHds6lgVJ7h7e9Gaw==", + "version": "3.9.0", + "resolved": "https://registry.npmjs.org/@crawlee/utils/-/utils-3.9.0.tgz", + "integrity": "sha512-x7ANY5gKe4Y2ssZ13xJYT+woqyfJL0fqTQPa77I4FJBdGgTYcgAGFW8ytTglNc5otLM7woQdNxuNZ+77QBFc7A==", "dependencies": { "@apify/log": "^2.4.0", "@apify/ps-tree": "^1.2.0", - "@crawlee/types": "3.8.0", + "@crawlee/types": "3.9.0", "@types/sax": "^1.2.7", "cheerio": "^1.0.0-rc.12", "got-scraping": "^4.0.3", @@ -1923,11 +1923,11 @@ } }, "node_modules/@ljharb/through": { - "version": "2.3.12", - "resolved": "https://registry.npmjs.org/@ljharb/through/-/through-2.3.12.tgz", - "integrity": "sha512-ajo/heTlG3QgC8EGP6APIejksVAYt4ayz4tqoP3MolFELzcH1x1fzwEYRJTPO0IELutZ5HQ0c26/GqAYy79u3g==", + "version": "2.3.13", + "resolved": "https://registry.npmjs.org/@ljharb/through/-/through-2.3.13.tgz", + "integrity": "sha512-/gKJun8NNiWGZJkGzI/Ragc53cOdcLNdzjLaIa+GEjguQs0ulsurx8WN0jijdK9yPqDvziX995sMRLyLt1uZMQ==", "dependencies": { - "call-bind": "^1.0.5" + "call-bind": "^1.0.7" }, "engines": { "node": ">= 0.4" @@ -5799,22 +5799,22 @@ } }, "node_modules/crawlee": { - "version": "3.8.0", - "resolved": "https://registry.npmjs.org/crawlee/-/crawlee-3.8.0.tgz", - "integrity": "sha512-1jV2MPO1Ji2BHbFo/tNrrqlORfiiSAJ5HzG+ONFynsM+FePCyEs97cVX74vnwLFUGCMH6SxnD8IfFC9jcSS2Gg==", - "dependencies": { - "@crawlee/basic": "3.8.0", - "@crawlee/browser": "3.8.0", - "@crawlee/browser-pool": "3.8.0", - "@crawlee/cheerio": "3.8.0", - "@crawlee/cli": "3.8.0", - "@crawlee/core": "3.8.0", - "@crawlee/http": "3.8.0", - "@crawlee/jsdom": "3.8.0", - "@crawlee/linkedom": "3.8.0", - "@crawlee/playwright": "3.8.0", - "@crawlee/puppeteer": "3.8.0", - "@crawlee/utils": "3.8.0", + "version": "3.9.0", + "resolved": "https://registry.npmjs.org/crawlee/-/crawlee-3.9.0.tgz", + "integrity": "sha512-F1obyHVccZYV3D7y2W1zEZToJXCQCY1OgkB51qtlKMbs3dgA/V+SelsjmTNP2kFe1Xv40S/F0rpraYrxbPCyFQ==", + "dependencies": { + "@crawlee/basic": "3.9.0", + "@crawlee/browser": "3.9.0", + "@crawlee/browser-pool": "3.9.0", + "@crawlee/cheerio": "3.9.0", + "@crawlee/cli": "3.9.0", + "@crawlee/core": "3.9.0", + "@crawlee/http": "3.9.0", + "@crawlee/jsdom": "3.9.0", + "@crawlee/linkedom": "3.9.0", + "@crawlee/playwright": "3.9.0", + "@crawlee/puppeteer": "3.9.0", + "@crawlee/utils": "3.9.0", "import-local": "^3.1.0", "tslib": "^2.4.0" }, @@ -10557,14 +10557,14 @@ } }, "node_modules/linkedom": { - "version": "0.16.8", - "resolved": "https://registry.npmjs.org/linkedom/-/linkedom-0.16.8.tgz", - "integrity": "sha512-+HtHVHBb3yZKlP9pgcJdi1AIG9tsAuo+Qtlz+79cCTsxgQwDzajsZjYvpp+DEckCK/zoGVhzkADniYZQ57KcFQ==", + "version": "0.16.11", + "resolved": "https://registry.npmjs.org/linkedom/-/linkedom-0.16.11.tgz", + "integrity": "sha512-WgaTVbj7itjyXTsCvgerpneERXShcnNJF5VIV+/4SLtyRLN+HppPre/WDHRofAr2IpEuujSNgJbCBd5lMl6lRw==", "dependencies": { "css-select": "^5.1.0", "cssom": "^0.5.0", "html-escaper": "^3.0.3", - "htmlparser2": "^9.0.0", + "htmlparser2": "^9.1.0", "uhyphen": "^0.2.0" } }, @@ -17170,11 +17170,11 @@ }, "packages/actor-scraper/cheerio-scraper": { "name": "actor-cheerio-scraper", - "version": "3.0.1", + "version": "3.1.0", "license": "Apache-2.0", "dependencies": { "@apify/scraper-tools": "^1.1.1", - "@crawlee/cheerio": "^3.5.2", + "@crawlee/cheerio": "^3.8.2", "apify": "^3.1.8" }, "devDependencies": { @@ -17189,11 +17189,11 @@ }, "packages/actor-scraper/jsdom-scraper": { "name": "actor-jsdom-scraper", - "version": "3.0.1", + "version": "3.1.0", "license": "Apache-2.0", "dependencies": { "@apify/scraper-tools": "^1.1.1", - "@crawlee/jsdom": "^3.5.2", + "@crawlee/jsdom": "^3.8.2", "apify": "^3.1.8" }, "devDependencies": { @@ -17208,13 +17208,13 @@ }, "packages/actor-scraper/playwright-scraper": { "name": "actor-playwright-scraper", - "version": "3.0.1", + "version": "3.1.0", "license": "Apache-2.0", "dependencies": { "@apify/scraper-tools": "^1.1.1", - "@crawlee/core": "^3.5.2", - "@crawlee/playwright": "^3.5.2", - "@crawlee/utils": "^3.5.2", + "@crawlee/core": "^3.8.2", + "@crawlee/playwright": "^3.8.2", + "@crawlee/utils": "^3.8.2", "apify": "^3.1.8", "idcac-playwright": "^0.1.2", "playwright": "*" @@ -17228,11 +17228,11 @@ }, "packages/actor-scraper/puppeteer-scraper": { "name": "actor-puppeteer-scraper", - "version": "3.0.1", + "version": "3.1.0", "license": "Apache-2.0", "dependencies": { "@apify/scraper-tools": "^1.1.1", - "@crawlee/puppeteer": "^3.5.2", + "@crawlee/puppeteer": "^3.8.2", "apify": "^3.1.8", "idcac-playwright": "^0.1.2", "puppeteer": "*" @@ -17246,14 +17246,14 @@ }, "packages/actor-scraper/web-scraper": { "name": "actor-web-scraper", - "version": "3.0.1", + "version": "3.1.0", "license": "Apache-2.0", "dependencies": { "@apify/scraper-tools": "^1.1.1", - "@crawlee/puppeteer": "^3.5.2", + "@crawlee/puppeteer": "^3.8.2", "apify": "^3.1.8", "content-type": "^1.0.5", - "crawlee": "^3.5.2", + "crawlee": "^3.8.2", "devtools-server": "^0.0.2", "idcac-playwright": "^0.1.2", "puppeteer": "*" @@ -17267,7 +17267,7 @@ } }, "packages/apify": { - "version": "3.1.16", + "version": "3.2.0", "license": "Apache-2.0", "dependencies": { "@apify/consts": "^2.23.0", @@ -17275,9 +17275,9 @@ "@apify/log": "^2.4.3", "@apify/timeout": "^0.3.0", "@apify/utilities": "^2.9.3", - "@crawlee/core": "^3.6.1", - "@crawlee/types": "^3.6.1", - "@crawlee/utils": "^3.6.1", + "@crawlee/core": "^3.9.0", + "@crawlee/types": "^3.9.0", + "@crawlee/utils": "^3.9.0", "apify-client": "^2.9.0", "ow": "^0.28.2", "semver": "^7.5.4", @@ -17290,7 +17290,7 @@ }, "packages/scraper-tools": { "name": "@apify/scraper-tools", - "version": "1.1.2", + "version": "1.1.4", "license": "Apache-2.0", "dependencies": { "@apify/log": "^2.4.0", @@ -17299,17 +17299,17 @@ "tslib": "^2.6.1" }, "devDependencies": { - "@crawlee/browser-pool": "^3.5.2", - "@crawlee/core": "^3.5.2", - "@crawlee/types": "^3.5.2", - "@crawlee/utils": "^3.5.2", + "@crawlee/browser-pool": "^3.8.2", + "@crawlee/core": "^3.8.2", + "@crawlee/types": "^3.8.2", + "@crawlee/utils": "^3.8.2", "apify": "^3.1.8" }, "peerDependencies": { - "@crawlee/browser-pool": "^3.5.2", - "@crawlee/core": "^3.5.2", - "@crawlee/types": "^3.5.2", - "@crawlee/utils": "^3.5.2", + "@crawlee/browser-pool": "^3.8.2", + "@crawlee/core": "^3.8.2", + "@crawlee/types": "^3.8.2", + "@crawlee/utils": "^3.8.2", "apify": "^3.1.8" }, "peerDependenciesMeta": { diff --git a/package.json b/package.json index dbc864441a..abcaa7c84c 100644 --- a/package.json +++ b/package.json @@ -65,7 +65,7 @@ "@typescript-eslint/eslint-plugin": "^7.0.0", "@typescript-eslint/parser": "^7.0.0", "commitlint": "^18.0.0", - "crawlee": "^3.8.0", + "crawlee": "^3.9.0", "eslint": "^8.54.0", "fs-extra": "^11.1.1", "gen-esm-wrapper": "^1.1.3", diff --git a/packages/actor-scraper/cheerio-scraper/CHANGELOG.md b/packages/actor-scraper/cheerio-scraper/CHANGELOG.md index 863f092202..6008afd223 100644 --- a/packages/actor-scraper/cheerio-scraper/CHANGELOG.md +++ b/packages/actor-scraper/cheerio-scraper/CHANGELOG.md @@ -1,5 +1,10 @@ # Change Log +## 3.0.15 (2024-04-09) + +- Updated Crawlee version to v3.8.0. +- Updated to use new request queue in scraper + ## 3.0.11 (2023-08-22) - Updated Crawlee version to v3.5.2. diff --git a/packages/actor-scraper/cheerio-scraper/package.json b/packages/actor-scraper/cheerio-scraper/package.json index 26fe2a74c5..3ce550aca9 100644 --- a/packages/actor-scraper/cheerio-scraper/package.json +++ b/packages/actor-scraper/cheerio-scraper/package.json @@ -1,12 +1,12 @@ { "name": "actor-cheerio-scraper", - "version": "3.0.1", + "version": "3.1.0", "private": true, "description": "Crawl web pages using HTTP requests and Cheerio", "type": "module", "dependencies": { "@apify/scraper-tools": "^1.1.1", - "@crawlee/cheerio": "^3.5.2", + "@crawlee/cheerio": "^3.8.2", "apify": "^3.1.8" }, "devDependencies": { diff --git a/packages/actor-scraper/cheerio-scraper/src/internals/crawler_setup.ts b/packages/actor-scraper/cheerio-scraper/src/internals/crawler_setup.ts index cdc0806a48..53c0a74609 100644 --- a/packages/actor-scraper/cheerio-scraper/src/internals/crawler_setup.ts +++ b/packages/actor-scraper/cheerio-scraper/src/internals/crawler_setup.ts @@ -20,7 +20,7 @@ import { ProxyConfiguration, Request, RequestList, - RequestQueue, + RequestQueueV2, log, Dictionary, Awaitable, @@ -48,7 +48,7 @@ export class CrawlerSetup implements CrawlerSetupOptions { * Used to store data that persist navigations */ globalStore = new Map(); - requestQueue: RequestQueue; + requestQueue: RequestQueueV2; keyValueStore: KeyValueStore; customData: unknown; input: Input; @@ -137,7 +137,7 @@ export class CrawlerSetup implements CrawlerSetupOptions { this.requestList = await RequestList.open('CHEERIO_SCRAPER', startUrls); // RequestQueue - this.requestQueue = await RequestQueue.open(this.requestQueueName); + this.requestQueue = await RequestQueueV2.open(this.requestQueueName); // Dataset this.dataset = await Dataset.open(this.datasetName); @@ -188,6 +188,9 @@ export class CrawlerSetup implements CrawlerSetupOptions { maxUsageCount: this.maxSessionUsageCount, }, }, + experiments: { + requestLocking: true, + }, }; this._createNavigationHooks(options); diff --git a/packages/actor-scraper/jsdom-scraper/CHANGELOG.md b/packages/actor-scraper/jsdom-scraper/CHANGELOG.md index c59618f98c..bec7d09261 100644 --- a/packages/actor-scraper/jsdom-scraper/CHANGELOG.md +++ b/packages/actor-scraper/jsdom-scraper/CHANGELOG.md @@ -1,5 +1,10 @@ # Change Log +## 0.1.6 (2024-04-09) + +- Updated Crawlee version to v3.8.0. +- Updated to use new request queue in scraper + ## 0.1 - Initial version built on Crawlee. diff --git a/packages/actor-scraper/jsdom-scraper/package.json b/packages/actor-scraper/jsdom-scraper/package.json index 93802155d7..b8ed064cb3 100644 --- a/packages/actor-scraper/jsdom-scraper/package.json +++ b/packages/actor-scraper/jsdom-scraper/package.json @@ -1,12 +1,12 @@ { "name": "actor-jsdom-scraper", - "version": "3.0.1", + "version": "3.1.0", "private": true, "description": "Crawl web pages using HTTP requests and JSDOM parser", "type": "module", "dependencies": { "@apify/scraper-tools": "^1.1.1", - "@crawlee/jsdom": "^3.5.2", + "@crawlee/jsdom": "^3.8.2", "apify": "^3.1.8" }, "devDependencies": { diff --git a/packages/actor-scraper/jsdom-scraper/src/internals/crawler_setup.ts b/packages/actor-scraper/jsdom-scraper/src/internals/crawler_setup.ts index ed13fed07b..480429e001 100644 --- a/packages/actor-scraper/jsdom-scraper/src/internals/crawler_setup.ts +++ b/packages/actor-scraper/jsdom-scraper/src/internals/crawler_setup.ts @@ -20,7 +20,7 @@ import { ProxyConfiguration, Request, RequestList, - RequestQueue, + RequestQueueV2, log, Dictionary, Awaitable, @@ -47,7 +47,7 @@ export class CrawlerSetup implements CrawlerSetupOptions { * Used to store data that persist navigations */ globalStore = new Map(); - requestQueue: RequestQueue; + requestQueue: RequestQueueV2; keyValueStore: KeyValueStore; customData: unknown; input: Input; @@ -136,7 +136,7 @@ export class CrawlerSetup implements CrawlerSetupOptions { this.requestList = await RequestList.open('JSDOM_SCRAPER', startUrls); // RequestQueue - this.requestQueue = await RequestQueue.open(this.requestQueueName); + this.requestQueue = await RequestQueueV2.open(this.requestQueueName); // Dataset this.dataset = await Dataset.open(this.datasetName); @@ -189,6 +189,9 @@ export class CrawlerSetup implements CrawlerSetupOptions { maxUsageCount: this.maxSessionUsageCount, }, }, + experiments: { + requestLocking: true, + }, }; this._createNavigationHooks(options); diff --git a/packages/actor-scraper/playwright-scraper/CHANGELOG.md b/packages/actor-scraper/playwright-scraper/CHANGELOG.md index 87e5e7557a..88e50c01e2 100644 --- a/packages/actor-scraper/playwright-scraper/CHANGELOG.md +++ b/packages/actor-scraper/playwright-scraper/CHANGELOG.md @@ -1,5 +1,10 @@ # Change Log +## 1.0.14 (2024-04-09) + +- Updated Crawlee version to v3.8.0. +- Updated to use new request queue in scraper + ## 1.0.11 (2023-08-22) - Updated Crawlee version to v3.5.2. diff --git a/packages/actor-scraper/playwright-scraper/package.json b/packages/actor-scraper/playwright-scraper/package.json index ec1d67b0a6..8768719e51 100644 --- a/packages/actor-scraper/playwright-scraper/package.json +++ b/packages/actor-scraper/playwright-scraper/package.json @@ -1,14 +1,14 @@ { "name": "actor-playwright-scraper", - "version": "3.0.1", + "version": "3.1.0", "private": true, "description": "Crawl web pages using Apify, headless browser and Playwright", "type": "module", "dependencies": { "@apify/scraper-tools": "^1.1.1", - "@crawlee/core": "^3.5.2", - "@crawlee/playwright": "^3.5.2", - "@crawlee/utils": "^3.5.2", + "@crawlee/core": "^3.8.2", + "@crawlee/playwright": "^3.8.2", + "@crawlee/utils": "^3.8.2", "apify": "^3.1.8", "idcac-playwright": "^0.1.2", "playwright": "*" diff --git a/packages/actor-scraper/playwright-scraper/src/internals/crawler_setup.ts b/packages/actor-scraper/playwright-scraper/src/internals/crawler_setup.ts index 54b102e31a..09536948d2 100644 --- a/packages/actor-scraper/playwright-scraper/src/internals/crawler_setup.ts +++ b/packages/actor-scraper/playwright-scraper/src/internals/crawler_setup.ts @@ -9,7 +9,7 @@ import { KeyValueStore, Request, RequestList, - RequestQueue, + RequestQueueV2, PlaywrightCrawlingContext, PlaywrightCrawler, PlaywrightCrawlerOptions, @@ -42,7 +42,7 @@ export class CrawlerSetup implements CrawlerSetupOptions { * Used to store data that persist navigations */ globalStore = new Map(); - requestQueue: RequestQueue; + requestQueue: RequestQueueV2; keyValueStore: KeyValueStore; customData: unknown; input: Input; @@ -152,7 +152,7 @@ export class CrawlerSetup implements CrawlerSetupOptions { this.requestList = await RequestList.open('PLAYWRIGHT_SCRAPER', startUrls); // RequestQueue - this.requestQueue = await RequestQueue.open(this.requestQueueName); + this.requestQueue = await RequestQueueV2.open(this.requestQueueName); // Dataset this.dataset = await Dataset.open(this.datasetName); @@ -205,6 +205,9 @@ export class CrawlerSetup implements CrawlerSetupOptions { maxUsageCount: this.maxSessionUsageCount, }, }, + experiments: { + requestLocking: true, + }, }; this._createNavigationHooks(options); diff --git a/packages/actor-scraper/puppeteer-scraper/CHANGELOG.md b/packages/actor-scraper/puppeteer-scraper/CHANGELOG.md index 03f3006bf4..48e728c631 100644 --- a/packages/actor-scraper/puppeteer-scraper/CHANGELOG.md +++ b/packages/actor-scraper/puppeteer-scraper/CHANGELOG.md @@ -1,5 +1,10 @@ # Change Log +## 3.0.12 (2024-04-09) + +- Updated Crawlee version to v3.8.0. +- Updated to use new request queue in scraper + ## 3.0.8 (2023-08-22) - Updated Crawlee version to v3.5.2. diff --git a/packages/actor-scraper/puppeteer-scraper/package.json b/packages/actor-scraper/puppeteer-scraper/package.json index 7b4ecd3a10..ca21b1b349 100644 --- a/packages/actor-scraper/puppeteer-scraper/package.json +++ b/packages/actor-scraper/puppeteer-scraper/package.json @@ -1,12 +1,12 @@ { "name": "actor-puppeteer-scraper", - "version": "3.0.1", + "version": "3.1.0", "private": true, "description": "Crawl web pages using Apify, headless Chrome and Puppeteer", "type": "module", "dependencies": { "@apify/scraper-tools": "^1.1.1", - "@crawlee/puppeteer": "^3.5.2", + "@crawlee/puppeteer": "^3.8.2", "apify": "^3.1.8", "idcac-playwright": "^0.1.2", "puppeteer": "*" diff --git a/packages/actor-scraper/puppeteer-scraper/src/internals/crawler_setup.ts b/packages/actor-scraper/puppeteer-scraper/src/internals/crawler_setup.ts index ecfcd72ba0..757eb7fd90 100644 --- a/packages/actor-scraper/puppeteer-scraper/src/internals/crawler_setup.ts +++ b/packages/actor-scraper/puppeteer-scraper/src/internals/crawler_setup.ts @@ -9,7 +9,7 @@ import { KeyValueStore, Request, RequestList, - RequestQueue, + RequestQueueV2, EnqueueLinksByClickingElementsOptions, PuppeteerCrawlingContext, PuppeteerCrawler, @@ -42,7 +42,7 @@ export class CrawlerSetup implements CrawlerSetupOptions { * Used to store data that persist navigations */ globalStore = new Map(); - requestQueue: RequestQueue; + requestQueue: RequestQueueV2; keyValueStore: KeyValueStore; customData: unknown; input: Input; @@ -150,7 +150,7 @@ export class CrawlerSetup implements CrawlerSetupOptions { this.requestList = await RequestList.open('PUPPETEER_SCRAPER', startUrls); // RequestQueue - this.requestQueue = await RequestQueue.open(this.requestQueueName); + this.requestQueue = await RequestQueueV2.open(this.requestQueueName); // Dataset this.dataset = await Dataset.open(this.datasetName); @@ -201,6 +201,9 @@ export class CrawlerSetup implements CrawlerSetupOptions { maxUsageCount: this.maxSessionUsageCount, }, }, + experiments: { + requestLocking: true, + }, }; this._createNavigationHooks(options); diff --git a/packages/actor-scraper/web-scraper/CHANGELOG.md b/packages/actor-scraper/web-scraper/CHANGELOG.md index 45587dc08f..9041c4b3b9 100644 --- a/packages/actor-scraper/web-scraper/CHANGELOG.md +++ b/packages/actor-scraper/web-scraper/CHANGELOG.md @@ -1,5 +1,10 @@ # Change Log +## 3.0.18 (2024-04-09) + +- Updated Crawlee version to v3.8.0. +- Updated to use new request queue in scraper + ## 3.0.14 (2023-08-22) - Updated Crawlee version to v3.5.2. diff --git a/packages/actor-scraper/web-scraper/package.json b/packages/actor-scraper/web-scraper/package.json index 53613c82fa..9ce546728c 100644 --- a/packages/actor-scraper/web-scraper/package.json +++ b/packages/actor-scraper/web-scraper/package.json @@ -1,16 +1,16 @@ { "name": "actor-web-scraper", - "version": "3.0.1", + "version": "3.1.0", "private": true, "description": "Crawl web pages using Apify, headless Chrome and Puppeteer", "main": "dist/main.js", "type": "module", "dependencies": { "@apify/scraper-tools": "^1.1.1", - "@crawlee/puppeteer": "^3.5.2", + "@crawlee/puppeteer": "^3.8.2", "apify": "^3.1.8", "content-type": "^1.0.5", - "crawlee": "^3.5.2", + "crawlee": "^3.8.2", "devtools-server": "^0.0.2", "idcac-playwright": "^0.1.2", "puppeteer": "*" diff --git a/packages/actor-scraper/web-scraper/src/internals/crawler_setup.ts b/packages/actor-scraper/web-scraper/src/internals/crawler_setup.ts index 6c485191b2..b1e44362c4 100644 --- a/packages/actor-scraper/web-scraper/src/internals/crawler_setup.ts +++ b/packages/actor-scraper/web-scraper/src/internals/crawler_setup.ts @@ -14,7 +14,7 @@ import { puppeteerUtils, Request, RequestList, - RequestQueue, + RequestQueueV2, log, Awaitable, Dictionary, @@ -65,7 +65,7 @@ export class CrawlerSetup implements CrawlerSetupOptions { * Used to store data that persist navigations */ globalStore = new GlobalStore(); - requestQueue: RequestQueue; + requestQueue: RequestQueueV2; keyValueStore: KeyValueStore; customData: unknown; input: Input; @@ -175,7 +175,7 @@ export class CrawlerSetup implements CrawlerSetupOptions { this.requestList = await RequestList.open('WEB_SCRAPER', startUrls); // RequestQueue - this.requestQueue = await RequestQueue.open(this.requestQueueName); + this.requestQueue = await RequestQueueV2.open(this.requestQueueName); // Dataset this.dataset = await Dataset.open(this.datasetName); @@ -193,7 +193,7 @@ export class CrawlerSetup implements CrawlerSetupOptions { async createCrawler() { await this.initPromise; - const args = []; + const args = ['--disable-dev-shm-usage']; if (this.input.ignoreCorsAndCsp) args.push('--disable-web-security'); if (this.isDevRun) args.push(`--remote-debugging-port=${CHROME_DEBUGGER_PORT}`); @@ -243,6 +243,9 @@ export class CrawlerSetup implements CrawlerSetupOptions { maxUsageCount: this.maxSessionUsageCount, }, }, + experiments: { + requestLocking: true, + }, }; this._createNavigationHooks(options); diff --git a/packages/apify/package.json b/packages/apify/package.json index ded97b9b30..0612c22b1e 100644 --- a/packages/apify/package.json +++ b/packages/apify/package.json @@ -1,6 +1,6 @@ { "name": "apify", - "version": "3.1.16", + "version": "3.2.0", "description": "The scalable web crawling and scraping library for JavaScript/Node.js. Enables development of data extraction and web automation jobs (not only) with headless Chrome and Puppeteer.", "engines": { "node": ">=16.0.0" @@ -59,9 +59,9 @@ "@apify/log": "^2.4.3", "@apify/timeout": "^0.3.0", "@apify/utilities": "^2.9.3", - "@crawlee/core": "^3.6.1", - "@crawlee/types": "^3.6.1", - "@crawlee/utils": "^3.6.1", + "@crawlee/core": "^3.9.0", + "@crawlee/types": "^3.9.0", + "@crawlee/utils": "^3.9.0", "apify-client": "^2.9.0", "ow": "^0.28.2", "semver": "^7.5.4", diff --git a/packages/apify/src/proxy_configuration.ts b/packages/apify/src/proxy_configuration.ts index 0fdf7fa814..aea77ceca1 100644 --- a/packages/apify/src/proxy_configuration.ts +++ b/packages/apify/src/proxy_configuration.ts @@ -1,4 +1,5 @@ import { APIFY_PROXY_VALUE_REGEX, APIFY_ENV_VARS } from '@apify/consts'; +import { cryptoRandomObjectId } from '@apify/utilities'; import type { ProxyConfigurationOptions as CoreProxyConfigurationOptions, ProxyInfo as CoreProxyInfo, @@ -56,6 +57,12 @@ export interface ProxyConfigurationOptions extends CoreProxyConfigurationOptions * configurate the proxy by UI input schema. You should use the `countryCode` option in your crawler code. */ apifyProxyCountry?: string; + + /** + * Multiple different ProxyConfigurationOptions stratified into tiers. Crawlee crawlers will switch between those tiers + * based on the blocked request statistics. + */ + tieredProxyConfig?: Omit[]; } /** @@ -170,6 +177,8 @@ export class ProxyConfiguration extends CoreProxyConfiguration { countryCode: ow.optional.string.matches(COUNTRY_CODE_REGEX), apifyProxyCountry: ow.optional.string.matches(COUNTRY_CODE_REGEX), password: ow.optional.string, + tieredProxyUrls: ow.optional.array.ofType(ow.array.ofType(ow.string)), + tieredProxyConfig: ow.optional.array.ofType(ow.object), })); const { @@ -178,8 +187,16 @@ export class ProxyConfiguration extends CoreProxyConfiguration { countryCode, apifyProxyCountry, password = config.get('proxyPassword'), + tieredProxyConfig, + tieredProxyUrls, } = options; + this.tieredProxyUrls ??= tieredProxyUrls; + + if (tieredProxyConfig) { + this.tieredProxyUrls = this._generateTieredProxyUrls(tieredProxyConfig, options); + } + const groupsToUse = groups.length ? groups : apifyProxyGroups; const countryCodeToUse = countryCode || apifyProxyCountry; const hostname = config.get('proxyHostname'); @@ -240,16 +257,18 @@ export class ProxyConfiguration extends CoreProxyConfiguration { * The identifier must not be longer than 50 characters and include only the following: `0-9`, `a-z`, `A-Z`, `"."`, `"_"` and `"~"`. * @return Represents information about used proxy and its configuration. */ - override async newProxyInfo(sessionId?: string | number): Promise { + override async newProxyInfo(sessionId?: string | number, options?: Parameters[1]): Promise { if (typeof sessionId === 'number') sessionId = `${sessionId}`; ow(sessionId, ow.optional.string.maxLength(MAX_SESSION_ID_LENGTH).matches(APIFY_PROXY_VALUE_REGEX)); - const url = await this.newUrl(sessionId); - const { groups, countryCode, password, port, hostname } = (this.usesApifyProxy ? this : new URL(url)) as ProxyConfiguration; + const proxyInfo = await super.newProxyInfo(sessionId, options); + if (!proxyInfo) return proxyInfo; + + const { groups, countryCode, password, port, hostname } = (this.usesApifyProxy ? this : new URL(proxyInfo.url)) as ProxyConfiguration; return { + ...proxyInfo, sessionId, - url, groups, countryCode, password: password ?? '', @@ -271,19 +290,40 @@ export class ProxyConfiguration extends CoreProxyConfiguration { * @return A string with a proxy URL, including authentication credentials and port number. * For example, `http://bob:password123@proxy.example.com:8000` */ - override async newUrl(sessionId?: string | number): Promise { + override async newUrl(sessionId?: string | number, options?: Parameters[1]): Promise { if (typeof sessionId === 'number') sessionId = `${sessionId}`; ow(sessionId, ow.optional.string.maxLength(MAX_SESSION_ID_LENGTH).matches(APIFY_PROXY_VALUE_REGEX)); if (this.newUrlFunction) { - return this._callNewUrlFunction(sessionId)!; + return (await this._callNewUrlFunction(sessionId, { request: options?.request }) ?? undefined); } if (this.proxyUrls) { return this._handleCustomUrl(sessionId); } - const username = this._getUsername(sessionId); - const { password, hostname, port } = this; - return `http://${username}:${password}@${hostname}:${port}`; + if (this.tieredProxyUrls) { + return this._handleTieredUrl( + sessionId ?? cryptoRandomObjectId(6), + options, + ).proxyUrl; + } + + return this.composeDefaultUrl(sessionId); + } + + protected _generateTieredProxyUrls( + tieredProxyConfig: NonNullable, + globalOptions: ProxyConfigurationOptions, + ) { + return tieredProxyConfig + .map( + (config) => [ + new ProxyConfiguration({ + ...globalOptions, + ...config, + tieredProxyConfig: undefined, + }).composeDefaultUrl(), + ], + ); } /** @@ -311,6 +351,13 @@ export class ProxyConfiguration extends CoreProxyConfiguration { return username; } + protected composeDefaultUrl(sessionId?: string): string { + const username = this._getUsername(sessionId); + const { password, hostname, port } = this; + + return `http://${username}:${password}@${hostname}:${port}`; + } + /** * Checks if Apify Token is provided in env and gets the password via API and sets it to env */ diff --git a/packages/scraper-tools/CHANGELOG.md b/packages/scraper-tools/CHANGELOG.md index a49e34bf24..424439715c 100644 --- a/packages/scraper-tools/CHANGELOG.md +++ b/packages/scraper-tools/CHANGELOG.md @@ -3,6 +3,14 @@ All notable changes to this project will be documented in this file. See [Conventional Commits](https://conventionalcommits.org) for commit guidelines. +## [1.1.4](https://github.com/apify/apify-sdk-js/compare/@apify/scraper-tools@1.1.2...@apify/scraper-tools@1.1.4) (2024-03-25) + +**Note:** Version bump only for package @apify/scraper-tools + + + + + ## [1.1.2](https://github.com/apify/apify-sdk-js/compare/@apify/scraper-tools@1.1.1...@apify/scraper-tools@1.1.2) (2023-07-28) diff --git a/packages/scraper-tools/package.json b/packages/scraper-tools/package.json index 5d6f3cccb7..e832e5ea57 100644 --- a/packages/scraper-tools/package.json +++ b/packages/scraper-tools/package.json @@ -1,6 +1,6 @@ { "name": "@apify/scraper-tools", - "version": "1.1.2", + "version": "1.1.4", "description": "Tools shared by Apify actor-scrapers.", "types": "dist/index.d.ts", "exports": { @@ -40,17 +40,17 @@ "tslib": "^2.6.1" }, "devDependencies": { - "@crawlee/browser-pool": "^3.5.2", - "@crawlee/core": "^3.5.2", - "@crawlee/types": "^3.5.2", - "@crawlee/utils": "^3.5.2", + "@crawlee/browser-pool": "^3.8.2", + "@crawlee/core": "^3.8.2", + "@crawlee/types": "^3.8.2", + "@crawlee/utils": "^3.8.2", "apify": "^3.1.8" }, "peerDependencies": { - "@crawlee/browser-pool": "^3.5.2", - "@crawlee/core": "^3.5.2", - "@crawlee/types": "^3.5.2", - "@crawlee/utils": "^3.5.2", + "@crawlee/browser-pool": "^3.8.2", + "@crawlee/core": "^3.8.2", + "@crawlee/types": "^3.8.2", + "@crawlee/utils": "^3.8.2", "apify": "^3.1.8" }, "peerDependenciesMeta": { diff --git a/packages/scraper-tools/src/context.ts b/packages/scraper-tools/src/context.ts index bde697d2fe..629e411b18 100644 --- a/packages/scraper-tools/src/context.ts +++ b/packages/scraper-tools/src/context.ts @@ -5,7 +5,7 @@ import type { RecordOptions, Request, RequestOptions, - RequestQueue, + RequestQueueV2, RequestQueueOperationOptions, } from '@crawlee/core'; import type { Dictionary } from '@crawlee/utils'; @@ -23,7 +23,7 @@ export interface CrawlerSetupOptions { rawInput: string; env: ApifyEnv; globalStore: Map | MapLike; - requestQueue: RequestQueue; + requestQueue: RequestQueueV2; keyValueStore: KeyValueStore; customData: unknown; } @@ -124,7 +124,7 @@ class Context { + ) : ReturnType { const defaultRequestOpts = { useExtendedUniqueKey: true, keepUrlFragment: this.input.keepUrlFragments, diff --git a/test/apify/proxy_configuration.test.ts b/test/apify/proxy_configuration.test.ts index f3d3fb948a..a2fd55ccb6 100644 --- a/test/apify/proxy_configuration.test.ts +++ b/test/apify/proxy_configuration.test.ts @@ -1,6 +1,7 @@ import { APIFY_ENV_VARS, LOCAL_APIFY_ENV_VARS } from '@apify/consts'; import { Actor, ProxyConfiguration } from 'apify'; import { UserClient } from 'apify-client'; +import { Request } from 'crawlee'; const groups = ['GROUP1', 'GROUP2']; const hostname = LOCAL_APIFY_ENV_VARS[APIFY_ENV_VARS.PROXY_HOSTNAME]; @@ -72,8 +73,9 @@ describe('ProxyConfiguration', () => { password, hostname, port, + username: 'groups-GROUP1+GROUP2,session-538909250932,country-CZ', }; - expect(await proxyConfiguration.newProxyInfo(sessionId)).toStrictEqual(proxyInfo); + expect(await proxyConfiguration.newProxyInfo(sessionId)).toEqual(proxyInfo); }); test('actor UI input schema should work', () => { @@ -331,6 +333,63 @@ describe('ProxyConfiguration', () => { } }); }); + + describe('With tieredProxyUrls', () => { + test('proxy configuration accepts the tiered urls (Crawlee style)', async () => { + const proxyConfiguration = new ProxyConfiguration({ + tieredProxyUrls: [ + ['http://proxy.com:1111'], + ['http://proxy.com:2222'], + ['http://proxy.com:3333'], + ['http://proxy.com:4444'], + ], + }); + + // through newUrl() + expect(await proxyConfiguration.newUrl( + 'abc', + { request: new Request({ url: 'http://example.com' }) as any }, + )).toEqual('http://proxy.com:1111'); + + // through newProxyInfo() + expect((await proxyConfiguration.newProxyInfo( + 'abc', + { request: new Request({ url: 'http://example.com' }) as any }, + )).url).toEqual('http://proxy.com:1111'); + }); + + test('shorthand tieredProxyConfig gets correctly expanded', async () => { + const proxyConfiguration = new ProxyConfiguration({ + password: 'password', + countryCode: 'DE', + tieredProxyConfig: [ + { + groups: ['GROUP1'], + countryCode: 'CZ', + }, + { + groups: ['GROUP2'], + countryCode: 'US', + }, + { + groups: ['GROUP3', 'GROUP4'], + }, + { + groups: ['GROUP3', 'GROUP4'], + countryCode: undefined, + }, + ], + }); + + // eslint-disable-next-line dot-notation + expect(proxyConfiguration['tieredProxyUrls']).toEqual([ + ['http://groups-GROUP1,country-CZ:password@proxy.apify.com:8000'], + ['http://groups-GROUP2,country-US:password@proxy.apify.com:8000'], + ['http://groups-GROUP3+GROUP4,country-DE:password@proxy.apify.com:8000'], + ['http://groups-GROUP3+GROUP4:password@proxy.apify.com:8000'], + ]); + }); + }); }); describe('Actor.createProxyConfiguration()', () => { @@ -483,4 +542,61 @@ describe('Actor.createProxyConfiguration()', () => { gotScrapingSpy.mockRestore(); }); + + describe('With tieredProxyUrls', () => { + test('proxy configuration accepts the tiered urls (Crawlee style)', async () => { + const proxyConfiguration = await Actor.createProxyConfiguration({ + tieredProxyUrls: [ + ['http://proxy.com:1111'], + ['http://proxy.com:2222'], + ['http://proxy.com:3333'], + ['http://proxy.com:4444'], + ], + }); + + // through newUrl() + expect(await proxyConfiguration.newUrl( + 'abc', + { request: new Request({ url: 'http://example.com' }) as any }, + )).toEqual('http://proxy.com:1111'); + + // through newProxyInfo() + expect((await proxyConfiguration.newProxyInfo( + 'abc', + { request: new Request({ url: 'http://example.com' }) as any }, + )).url).toEqual('http://proxy.com:1111'); + }); + + test('shorthand tieredProxyConfig gets correctly expanded', async () => { + const proxyConfiguration = await Actor.createProxyConfiguration({ + password: 'password', + countryCode: 'DE', + tieredProxyConfig: [ + { + groups: ['GROUP1'], + countryCode: 'CZ', + }, + { + groups: ['GROUP2'], + countryCode: 'US', + }, + { + groups: ['GROUP3', 'GROUP4'], + }, + { + groups: ['GROUP3', 'GROUP4'], + countryCode: undefined, + }, + ], + }); + + // eslint-disable-next-line dot-notation + expect(proxyConfiguration['tieredProxyUrls']).toEqual([ + ['http://groups-GROUP1,country-CZ:password@proxy.apify.com:8000'], + ['http://groups-GROUP2,country-US:password@proxy.apify.com:8000'], + ['http://groups-GROUP3+GROUP4,country-DE:password@proxy.apify.com:8000'], + ['http://groups-GROUP3+GROUP4:password@proxy.apify.com:8000'], + ]); + }); + }); }); diff --git a/website/package-lock.json b/website/package-lock.json index 8ecd186028..3261c5181f 100644 --- a/website/package-lock.json +++ b/website/package-lock.json @@ -255,9 +255,9 @@ } }, "node_modules/@apify/docs-theme": { - "version": "1.0.105", - "resolved": "https://registry.npmjs.org/@apify/docs-theme/-/docs-theme-1.0.105.tgz", - "integrity": "sha512-rTdC1eXHqgZf+99qWnBNjyrFDPNYktha5dRCLO0Mh7JLB6b9MWrD7Bg8hRUJJA9GyXwSX82YPAPHBz2SRLGX/A==", + "version": "1.0.114", + "resolved": "https://registry.npmjs.org/@apify/docs-theme/-/docs-theme-1.0.114.tgz", + "integrity": "sha512-FHZadzINpNV2Qpu/z2oHpA+eGK11S33J+RLQju01sAQFrXvjJ3fydaABd+NbJHaUDzNXcXByNkaI6BfBvnKq6g==", "dependencies": { "@apify/docs-search-modal": "^1.0.25", "@docusaurus/theme-common": "^2.4.1", @@ -265,6 +265,7 @@ "axios": "^1.4.0", "babel-loader": "^9.1.3", "docusaurus-gtm-plugin": "^0.0.2", + "docusaurus-plugin-smartlook": "^1.0.2", "postcss-preset-env": "^9.3.0", "prism-react-renderer": "^2.0.6" }, @@ -7959,6 +7960,12 @@ "resolved": "https://registry.npmjs.org/docusaurus-gtm-plugin/-/docusaurus-gtm-plugin-0.0.2.tgz", "integrity": "sha512-Xx/df0Ppd5SultlzUj9qlQk2lX9mNVfTb41juyBUPZ1Nc/5dNx+uN0VuLyF4JEObkDRrUY1EFo9fEUDo8I6QOQ==" }, + "node_modules/docusaurus-plugin-smartlook": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/docusaurus-plugin-smartlook/-/docusaurus-plugin-smartlook-1.0.2.tgz", + "integrity": "sha512-HKOavP16LMWsdZ6xpEqGecIweButfZ3hteCy6FZb1+s8c5b3hFsn9n1ohChAC1B1KETQZG5OhmQ270C0ak06Rg==", + "deprecated": "docusaurus-plugin-smartlook is now available at @stackql/docusaurus-plugin-smartlook" + }, "node_modules/docusaurus-plugin-typedoc-api": { "version": "3.0.0", "resolved": "https://registry.npmjs.org/docusaurus-plugin-typedoc-api/-/docusaurus-plugin-typedoc-api-3.0.0.tgz", @@ -18434,9 +18441,9 @@ } }, "@apify/docs-theme": { - "version": "1.0.105", - "resolved": "https://registry.npmjs.org/@apify/docs-theme/-/docs-theme-1.0.105.tgz", - "integrity": "sha512-rTdC1eXHqgZf+99qWnBNjyrFDPNYktha5dRCLO0Mh7JLB6b9MWrD7Bg8hRUJJA9GyXwSX82YPAPHBz2SRLGX/A==", + "version": "1.0.114", + "resolved": "https://registry.npmjs.org/@apify/docs-theme/-/docs-theme-1.0.114.tgz", + "integrity": "sha512-FHZadzINpNV2Qpu/z2oHpA+eGK11S33J+RLQju01sAQFrXvjJ3fydaABd+NbJHaUDzNXcXByNkaI6BfBvnKq6g==", "requires": { "@apify/docs-search-modal": "^1.0.25", "@docusaurus/theme-common": "^2.4.1", @@ -18444,6 +18451,7 @@ "axios": "^1.4.0", "babel-loader": "^9.1.3", "docusaurus-gtm-plugin": "^0.0.2", + "docusaurus-plugin-smartlook": "^1.0.2", "postcss-preset-env": "^9.3.0", "prism-react-renderer": "^2.0.6" }, @@ -23613,6 +23621,11 @@ "resolved": "https://registry.npmjs.org/docusaurus-gtm-plugin/-/docusaurus-gtm-plugin-0.0.2.tgz", "integrity": "sha512-Xx/df0Ppd5SultlzUj9qlQk2lX9mNVfTb41juyBUPZ1Nc/5dNx+uN0VuLyF4JEObkDRrUY1EFo9fEUDo8I6QOQ==" }, + "docusaurus-plugin-smartlook": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/docusaurus-plugin-smartlook/-/docusaurus-plugin-smartlook-1.0.2.tgz", + "integrity": "sha512-HKOavP16LMWsdZ6xpEqGecIweButfZ3hteCy6FZb1+s8c5b3hFsn9n1ohChAC1B1KETQZG5OhmQ270C0ak06Rg==" + }, "docusaurus-plugin-typedoc-api": { "version": "3.0.0", "resolved": "https://registry.npmjs.org/docusaurus-plugin-typedoc-api/-/docusaurus-plugin-typedoc-api-3.0.0.tgz", diff --git a/website/src/pages/index.module.css b/website/src/pages/index.module.css index 2155995584..8fac7ea35e 100644 --- a/website/src/pages/index.module.css +++ b/website/src/pages/index.module.css @@ -52,6 +52,21 @@ width: calc(100% - 2rem); } +.heroBanner h1::selection, +.heroBanner h1 span::selection { + color: rgb(36, 39, 54) !important; + -webkit-text-fill-color: rgb(36, 39, 54); + background: #B4D7FE !important; + -webkit-background-clip: unset; + background-clip: unset; +} + +html[data-theme='dark'] .heroBanner ::selection { + color: #fff !important; + -webkit-text-fill-color: #fff; + background: #385477 !important; +} + html .heroBanner h2 { font-style: normal; font-weight: 400;