diff --git a/apps/api/src/lib/strings.ts b/apps/api/src/lib/strings.ts index bf6156d284..db53857554 100644 --- a/apps/api/src/lib/strings.ts +++ b/apps/api/src/lib/strings.ts @@ -1,5 +1,5 @@ import { isSelfHosted } from "./deployment"; export const BLOCKLISTED_URL_MESSAGE = isSelfHosted() - ? "This website is no longer supported. Please check your server configuration and logs for more details." - : "This website is no longer supported, please reach out to help@firecrawl.com for more info on how to activate it on your account."; + ? "This website is not currently supported. Please check your server configuration and logs for more details." + : "This website is not currently supported. If you are part of an enterprise, please reach out to help@firecrawl.com to discuss the possibility of getting it activated on your account."; diff --git a/apps/js-sdk/firecrawl/src/__tests__/e2e/v1/index.test.ts b/apps/js-sdk/firecrawl/src/__tests__/e2e/v1/index.test.ts index a0a2d345a1..95d6852de1 100644 --- a/apps/js-sdk/firecrawl/src/__tests__/e2e/v1/index.test.ts +++ b/apps/js-sdk/firecrawl/src/__tests__/e2e/v1/index.test.ts @@ -36,7 +36,7 @@ describe('FirecrawlApp E2E Tests', () => { test.concurrent('should throw error for blocklisted URL on scrape', async () => { const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL }); const blocklistedUrl = "https://facebook.com/fake-test"; - await expect(app.scrapeUrl(blocklistedUrl)).rejects.toThrow("This website is no longer supported"); + await expect(app.scrapeUrl(blocklistedUrl)).rejects.toThrow("This website is not currently supported"); }); test.concurrent('should return successful response for valid scrape', async () => { @@ -52,13 +52,13 @@ describe('FirecrawlApp E2E Tests', () => { const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL }); const response = await app.scrapeUrl( 'https://roastmywebsite.ai', { - formats: ['markdown', 'html', 'rawHtml', 'screenshot', 'links'], - headers: { "x-key": "test" }, - includeTags: ['h1'], - excludeTags: ['h2'], - onlyMainContent: true, - timeout: 30000, - waitFor: 1000 + formats: ['markdown', 'html', 'rawHtml', 'screenshot', 'links'], + headers: { "x-key": "test" }, + includeTags: ['h1'], + excludeTags: ['h2'], + onlyMainContent: true, + timeout: 30000, + waitFor: 1000 }); if (!response.success) { @@ -70,7 +70,7 @@ describe('FirecrawlApp E2E Tests', () => { const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL }); const response = await app.scrapeUrl( 'https://roastmywebsite.ai', { - formats: ['screenshot@fullPage'], + formats: ['screenshot@fullPage'], }); if (!response.success) { throw new Error(response.error); @@ -149,7 +149,7 @@ describe('FirecrawlApp E2E Tests', () => { } }, 60000); // 60 seconds timeout - test.concurrent('should return successful response for crawl with options and wait for completion', async () => { + test.concurrent('should return successful response for crawl with options and wait for completion', async () => { const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL }); const response = await app.crawlUrl('https://roastmywebsite.ai', { excludePaths: ['blog/*'], @@ -192,7 +192,7 @@ describe('FirecrawlApp E2E Tests', () => { test.concurrent('should check crawl status', async () => { const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL }); - const response = await app.asyncCrawlUrl('https://firecrawl.dev', { limit: 20, scrapeOptions: { formats: ['markdown', 'html', 'rawHtml', 'screenshot', 'links']}} as CrawlParams) as CrawlResponse; + const response = await app.asyncCrawlUrl('https://firecrawl.dev', { limit: 20, scrapeOptions: { formats: ['markdown', 'html', 'rawHtml', 'screenshot', 'links'] } } as CrawlParams) as CrawlResponse; expect(response).not.toBeNull(); expect(response.id).toBeDefined(); @@ -250,16 +250,16 @@ describe('FirecrawlApp E2E Tests', () => { }); test.concurrent('should return successful response for valid map', async () => { - const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL }); const response = await app.mapUrl('https://roastmywebsite.ai') as MapResponse; + const app = new FirecrawlApp({ apiKey: TEST_API_KEY, apiUrl: API_URL }); const response = await app.mapUrl('https://roastmywebsite.ai') as MapResponse; expect(response).not.toBeNull(); - + expect(response.links?.length).toBeGreaterThan(0); expect(response.links?.[0]).toContain("https://"); const filteredLinks = response.links?.filter((link: string) => link.includes("roastmywebsite.ai")); expect(filteredLinks?.length).toBeGreaterThan(0); }, 30000); // 30 seconds timeout - + test('should search with string query', async () => { const app = new FirecrawlApp({ apiUrl: API_URL, apiKey: TEST_API_KEY }); diff --git a/apps/test-suite/tests/scrape.test.ts b/apps/test-suite/tests/scrape.test.ts index 8b2e15d189..776c59fe5a 100644 --- a/apps/test-suite/tests/scrape.test.ts +++ b/apps/test-suite/tests/scrape.test.ts @@ -40,13 +40,13 @@ describe("Scraping Checkup (E2E)", () => { const startTime = new Date().getTime(); const date = new Date(); const logsDir = `logs/${date.getMonth() + 1}-${date.getDate()}-${date.getFullYear()}`; - + let errorLogFileName = `${logsDir}/run.log_${new Date().toTimeString().split(' ')[0]}`; const errorLog: WebsiteScrapeError[] = []; - + for (let i = 0; i < websitesData.length; i += batchSize) { // Introducing delay to respect the rate limit of 15 requests per minute - await new Promise(resolve => setTimeout(resolve, 10000)); + await new Promise(resolve => setTimeout(resolve, 10000)); const batch = websitesData.slice(i, i + batchSize); const batchPromise = Promise.all( @@ -80,7 +80,7 @@ describe("Scraping Checkup (E2E)", () => { }); const prompt = `Based on this markdown extracted from a website html page, ${websiteData.prompt} Just say 'yes' or 'no' to the question.\nWebsite markdown: ${scrapedContent.body.data.markdown}\n`; - + let msg = null; const maxRetries = 3; let attempts = 0; @@ -122,7 +122,7 @@ describe("Scraping Checkup (E2E)", () => { const actualOutput = (msg.choices[0].message.content ?? "").toLowerCase() const expectedOutput = websiteData.expected_output.toLowerCase(); - const numTokens = numTokensFromString(prompt,"gpt-4") + numTokensFromString(actualOutput,"gpt-4"); + const numTokens = numTokensFromString(prompt, "gpt-4") + numTokensFromString(actualOutput, "gpt-4"); totalTokens += numTokens; if (actualOutput.includes(expectedOutput)) { @@ -174,14 +174,14 @@ describe("Scraping Checkup (E2E)", () => { console.log(`Total time taken: ${totalTimeTaken} miliseconds`); await logErrors(errorLog, timeTaken, totalTokens, score, websitesData.length); - + if (process.env.ENV === "local" && errorLog.length > 0) { - if (!fs.existsSync(logsDir)){ + if (!fs.existsSync(logsDir)) { fs.mkdirSync(logsDir, { recursive: true }); } fs.writeFileSync(errorLogFileName, JSON.stringify(errorLog, null, 2)); } - + expect(score).toBeGreaterThanOrEqual(70); }, 350000); // 150 seconds timeout