diff --git a/src/index.ts b/src/index.ts index 63af336..7902701 100644 --- a/src/index.ts +++ b/src/index.ts @@ -52,12 +52,11 @@ function removeEmptyTopLevel>( } class ConsoleLogger implements Logger { - private shouldLog = ( + private shouldLog = process.env.CLOUD_SERVICE === 'true' || process.env.SSE_LOCAL === 'true' || - process.env.HTTP_STREAMABLE_SERVER === 'true' - ); - + process.env.HTTP_STREAMABLE_SERVER === 'true'; + debug(...args: unknown[]): void { if (this.shouldLog) { console.debug('[DEBUG]', new Date().toISOString(), ...args); @@ -90,7 +89,9 @@ const server = new FastMCP({ version: '3.0.0', logger: new ConsoleLogger(), roots: { enabled: false }, - authenticate: async (request: { headers: IncomingHttpHeaders }): Promise => { + authenticate: async (request: { + headers: IncomingHttpHeaders; + }): Promise => { if (process.env.CLOUD_SERVICE === 'true') { const apiKey = extractApiKey(request.headers); @@ -101,7 +102,9 @@ const server = new FastMCP({ } else { // For self-hosted instances, API key is optional if FIRECRAWL_API_URL is provided if (!process.env.FIRECRAWL_API_KEY && !process.env.FIRECRAWL_API_URL) { - console.error('Either FIRECRAWL_API_KEY or FIRECRAWL_API_URL must be provided'); + console.error( + 'Either FIRECRAWL_API_KEY or FIRECRAWL_API_URL must be provided' + ); process.exit(1); } return { firecrawlApiKey: process.env.FIRECRAWL_API_KEY }; @@ -122,12 +125,12 @@ function createClient(apiKey?: string): FirecrawlApp { apiUrl: process.env.FIRECRAWL_API_URL, }), }; - + // Only add apiKey if it's provided (required for cloud, optional for self-hosted) if (apiKey) { config.apiKey = apiKey; } - + return new FirecrawlApp(config); } @@ -144,12 +147,17 @@ function getClient(session?: SessionData): FirecrawlApp { } return createClient(session.firecrawlApiKey); } - + // For self-hosted instances, API key is optional if FIRECRAWL_API_URL is provided - if (!process.env.FIRECRAWL_API_URL && (!session || !session.firecrawlApiKey)) { - throw new Error('Unauthorized: API key is required when not using a self-hosted instance'); + if ( + !process.env.FIRECRAWL_API_URL && + (!session || !session.firecrawlApiKey) + ) { + throw new Error( + 'Unauthorized: API key is required when not using a self-hosted instance' + ); } - + return createClient(session?.firecrawlApiKey); } @@ -162,7 +170,13 @@ function asText(data: unknown): string { // Define safe action types const safeActionTypes = ['wait', 'screenshot', 'scroll', 'scrape'] as const; -const otherActions = ['click', 'write', 'press', 'executeJavascript', 'generatePDF'] as const; +const otherActions = [ + 'click', + 'write', + 'press', + 'executeJavascript', + 'generatePDF', +] as const; const allActionTypes = [...safeActionTypes, ...otherActions] as const; // Use appropriate action types based on safe mode @@ -198,26 +212,39 @@ const scrapeParamsSchema = z.object({ ]) ) .optional(), + parsers: z + .array( + z.union([ + z.enum(['pdf']), + z.object({ + type: z.enum(['pdf']), + maxPages: z.number().int().min(1).max(10000).optional(), + }), + ]) + ) + .optional(), onlyMainContent: z.boolean().optional(), includeTags: z.array(z.string()).optional(), excludeTags: z.array(z.string()).optional(), waitFor: z.number().optional(), - ...(SAFE_MODE ? {} : { - actions: z - .array( - z.object({ - type: z.enum(allowedActionTypes), - selector: z.string().optional(), - milliseconds: z.number().optional(), - text: z.string().optional(), - key: z.string().optional(), - direction: z.enum(['up', 'down']).optional(), - script: z.string().optional(), - fullPage: z.boolean().optional(), - }) - ) - .optional(), - }), + ...(SAFE_MODE + ? {} + : { + actions: z + .array( + z.object({ + type: z.enum(allowedActionTypes), + selector: z.string().optional(), + milliseconds: z.number().optional(), + text: z.string().optional(), + key: z.string().optional(), + direction: z.enum(['up', 'down']).optional(), + script: z.string().optional(), + fullPage: z.boolean().optional(), + }) + ) + .optional(), + }), mobile: z.boolean().optional(), skipTlsVerification: z.boolean().optional(), removeBase64Images: z.boolean().optional(), @@ -254,18 +281,28 @@ This is the most powerful, fastest and most reliable scraper tool, if available \`\`\` **Performance:** Add maxAge parameter for 500% faster scrapes using cached data. **Returns:** Markdown, HTML, or other formats as specified. -${SAFE_MODE ? '**Safe Mode:** Read-only content extraction. Interactive actions (click, write, executeJavascript) are disabled for security.' : ''} +${ + SAFE_MODE + ? '**Safe Mode:** Read-only content extraction. Interactive actions (click, write, executeJavascript) are disabled for security.' + : '' +} `, parameters: scrapeParamsSchema, execute: async ( args: unknown, { session, log }: { session?: SessionData; log: Logger } ): Promise => { - const { url, ...options } = args as { url: string } & Record; + const { url, ...options } = args as { url: string } & Record< + string, + unknown + >; const client = getClient(session); const cleaned = removeEmptyTopLevel(options as Record); log.info('Scraping URL', { url: String(url) }); - const res = await client.scrape(String(url), { ...cleaned, origin: ORIGIN } as any); + const res = await client.scrape(String(url), { + ...cleaned, + origin: ORIGIN, + } as any); return asText(res); }, }); @@ -302,11 +339,17 @@ Map a website to discover all indexed URLs on the site. args: unknown, { session, log }: { session?: SessionData; log: Logger } ): Promise => { - const { url, ...options } = args as { url: string } & Record; + const { url, ...options } = args as { url: string } & Record< + string, + unknown + >; const client = getClient(session); const cleaned = removeEmptyTopLevel(options as Record); log.info('Mapping URL', { url: String(url) }); - const res = await client.map(String(url), { ...cleaned, origin: ORIGIN } as any); + const res = await client.map(String(url), { + ...cleaned, + origin: ORIGIN, + } as any); return asText(res); }, }); @@ -424,7 +467,11 @@ server.addTool({ } \`\`\` **Returns:** Operation ID for status checking; use firecrawl_check_crawl_status to check progress. - ${SAFE_MODE ? '**Safe Mode:** Read-only crawling. Webhooks and interactive actions are disabled for security.' : ''} + ${ + SAFE_MODE + ? '**Safe Mode:** Read-only crawling. Webhooks and interactive actions are disabled for security.' + : '' + } `, parameters: z.object({ url: z.string(), @@ -439,17 +486,19 @@ server.addTool({ crawlEntireDomain: z.boolean().optional(), delay: z.number().optional(), maxConcurrency: z.number().optional(), - ...(SAFE_MODE ? {} : { - webhook: z - .union([ - z.string(), - z.object({ - url: z.string(), - headers: z.record(z.string(), z.string()).optional(), - }), - ]) - .optional(), - }), + ...(SAFE_MODE + ? {} + : { + webhook: z + .union([ + z.string(), + z.object({ + url: z.string(), + headers: z.record(z.string(), z.string()).optional(), + }), + ]) + .optional(), + }), deduplicateSimilarURLs: z.boolean().optional(), ignoreQueryParameters: z.boolean().optional(), scrapeOptions: scrapeParamsSchema.omit({ url: true }).partial().optional(),